| 1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
/**
|
|
|
4 |
* This file is part of FPDI
|
|
|
5 |
*
|
|
|
6 |
* @package setasign\Fpdi
|
|
|
7 |
* @copyright Copyright (c) 2023 Setasign GmbH & Co. KG (https://www.setasign.com)
|
|
|
8 |
* @license http://opensource.org/licenses/mit-license The MIT License
|
|
|
9 |
*/
|
|
|
10 |
|
|
|
11 |
namespace setasign\Fpdi\PdfReader;
|
|
|
12 |
|
|
|
13 |
use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
|
|
|
14 |
use setasign\Fpdi\PdfParser\PdfParser;
|
|
|
15 |
use setasign\Fpdi\PdfParser\PdfParserException;
|
|
|
16 |
use setasign\Fpdi\PdfParser\Type\PdfArray;
|
|
|
17 |
use setasign\Fpdi\PdfParser\Type\PdfDictionary;
|
|
|
18 |
use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
|
|
|
19 |
use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
|
|
|
20 |
use setasign\Fpdi\PdfParser\Type\PdfNumeric;
|
|
|
21 |
use setasign\Fpdi\PdfParser\Type\PdfType;
|
|
|
22 |
use setasign\Fpdi\PdfParser\Type\PdfTypeException;
|
|
|
23 |
|
|
|
24 |
/**
|
|
|
25 |
* A PDF reader class
|
|
|
26 |
*/
|
|
|
27 |
class PdfReader
|
|
|
28 |
{
|
|
|
29 |
/**
|
|
|
30 |
* @var PdfParser
|
|
|
31 |
*/
|
|
|
32 |
protected $parser;
|
|
|
33 |
|
|
|
34 |
/**
|
|
|
35 |
* @var int
|
|
|
36 |
*/
|
|
|
37 |
protected $pageCount;
|
|
|
38 |
|
|
|
39 |
/**
|
|
|
40 |
* Indirect objects of resolved pages.
|
|
|
41 |
*
|
|
|
42 |
* @var PdfIndirectObjectReference[]|PdfIndirectObject[]
|
|
|
43 |
*/
|
|
|
44 |
protected $pages = [];
|
|
|
45 |
|
|
|
46 |
/**
|
|
|
47 |
* PdfReader constructor.
|
|
|
48 |
*
|
|
|
49 |
* @param PdfParser $parser
|
|
|
50 |
*/
|
|
|
51 |
public function __construct(PdfParser $parser)
|
|
|
52 |
{
|
|
|
53 |
$this->parser = $parser;
|
|
|
54 |
}
|
|
|
55 |
|
|
|
56 |
/**
|
|
|
57 |
* PdfReader destructor.
|
|
|
58 |
*/
|
|
|
59 |
public function __destruct()
|
|
|
60 |
{
|
|
|
61 |
if ($this->parser !== null) {
|
|
|
62 |
$this->parser->cleanUp();
|
|
|
63 |
}
|
|
|
64 |
}
|
|
|
65 |
|
|
|
66 |
/**
|
|
|
67 |
* Get the pdf parser instance.
|
|
|
68 |
*
|
|
|
69 |
* @return PdfParser
|
|
|
70 |
*/
|
|
|
71 |
public function getParser()
|
|
|
72 |
{
|
|
|
73 |
return $this->parser;
|
|
|
74 |
}
|
|
|
75 |
|
|
|
76 |
/**
|
|
|
77 |
* Get the PDF version.
|
|
|
78 |
*
|
|
|
79 |
* @return string
|
|
|
80 |
* @throws PdfParserException
|
|
|
81 |
*/
|
|
|
82 |
public function getPdfVersion()
|
|
|
83 |
{
|
|
|
84 |
return \implode('.', $this->parser->getPdfVersion());
|
|
|
85 |
}
|
|
|
86 |
|
|
|
87 |
/**
|
|
|
88 |
* Get the page count.
|
|
|
89 |
*
|
|
|
90 |
* @return int
|
|
|
91 |
* @throws PdfTypeException
|
|
|
92 |
* @throws CrossReferenceException
|
|
|
93 |
* @throws PdfParserException
|
|
|
94 |
*/
|
|
|
95 |
public function getPageCount()
|
|
|
96 |
{
|
|
|
97 |
if ($this->pageCount === null) {
|
|
|
98 |
$catalog = $this->parser->getCatalog();
|
|
|
99 |
|
|
|
100 |
$pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
|
|
|
101 |
$count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
|
|
|
102 |
|
|
|
103 |
$this->pageCount = PdfNumeric::ensure($count)->value;
|
|
|
104 |
}
|
|
|
105 |
|
|
|
106 |
return $this->pageCount;
|
|
|
107 |
}
|
|
|
108 |
|
|
|
109 |
/**
|
|
|
110 |
* Get a page instance.
|
|
|
111 |
*
|
|
|
112 |
* @param int $pageNumber
|
|
|
113 |
* @return Page
|
|
|
114 |
* @throws PdfTypeException
|
|
|
115 |
* @throws CrossReferenceException
|
|
|
116 |
* @throws PdfParserException
|
|
|
117 |
* @throws \InvalidArgumentException
|
|
|
118 |
*/
|
|
|
119 |
public function getPage($pageNumber)
|
|
|
120 |
{
|
|
|
121 |
if (!\is_numeric($pageNumber)) {
|
|
|
122 |
throw new \InvalidArgumentException(
|
|
|
123 |
'Page number needs to be a number.'
|
|
|
124 |
);
|
|
|
125 |
}
|
|
|
126 |
|
|
|
127 |
if ($pageNumber < 1 || $pageNumber > $this->getPageCount()) {
|
|
|
128 |
throw new \InvalidArgumentException(
|
|
|
129 |
\sprintf(
|
|
|
130 |
'Page number "%s" out of available page range (1 - %s)',
|
|
|
131 |
$pageNumber,
|
|
|
132 |
$this->getPageCount()
|
|
|
133 |
)
|
|
|
134 |
);
|
|
|
135 |
}
|
|
|
136 |
|
|
|
137 |
$this->readPages();
|
|
|
138 |
|
|
|
139 |
$page = $this->pages[$pageNumber - 1];
|
|
|
140 |
|
|
|
141 |
if ($page instanceof PdfIndirectObjectReference) {
|
|
|
142 |
$readPages = function ($kids) use (&$readPages) {
|
|
|
143 |
$kids = PdfArray::ensure($kids);
|
|
|
144 |
|
|
|
145 |
/** @noinspection LoopWhichDoesNotLoopInspection */
|
|
|
146 |
foreach ($kids->value as $reference) {
|
|
|
147 |
$reference = PdfIndirectObjectReference::ensure($reference);
|
|
|
148 |
$object = $this->parser->getIndirectObject($reference->value);
|
|
|
149 |
$type = PdfDictionary::get($object->value, 'Type');
|
|
|
150 |
|
|
|
151 |
if ($type->value === 'Pages') {
|
|
|
152 |
return $readPages(PdfDictionary::get($object->value, 'Kids'));
|
|
|
153 |
}
|
|
|
154 |
|
|
|
155 |
return $object;
|
|
|
156 |
}
|
|
|
157 |
|
|
|
158 |
throw new PdfReaderException(
|
|
|
159 |
'Kids array cannot be empty.',
|
|
|
160 |
PdfReaderException::KIDS_EMPTY
|
|
|
161 |
);
|
|
|
162 |
};
|
|
|
163 |
|
|
|
164 |
$page = $this->parser->getIndirectObject($page->value);
|
|
|
165 |
$dict = PdfType::resolve($page, $this->parser);
|
|
|
166 |
$type = PdfDictionary::get($dict, 'Type');
|
|
|
167 |
|
|
|
168 |
if ($type->value === 'Pages') {
|
|
|
169 |
$kids = PdfType::resolve(PdfDictionary::get($dict, 'Kids'), $this->parser);
|
|
|
170 |
try {
|
|
|
171 |
$page = $this->pages[$pageNumber - 1] = $readPages($kids);
|
|
|
172 |
} catch (PdfReaderException $e) {
|
|
|
173 |
if ($e->getCode() !== PdfReaderException::KIDS_EMPTY) {
|
|
|
174 |
throw $e;
|
|
|
175 |
}
|
|
|
176 |
|
|
|
177 |
// let's reset the pages array and read all page objects
|
|
|
178 |
$this->pages = [];
|
|
|
179 |
$this->readPages(true);
|
|
|
180 |
// @phpstan-ignore-next-line
|
|
|
181 |
$page = $this->pages[$pageNumber - 1];
|
|
|
182 |
}
|
|
|
183 |
} else {
|
|
|
184 |
$this->pages[$pageNumber - 1] = $page;
|
|
|
185 |
}
|
|
|
186 |
}
|
|
|
187 |
|
|
|
188 |
return new Page($page, $this->parser);
|
|
|
189 |
}
|
|
|
190 |
|
|
|
191 |
/**
|
|
|
192 |
* Walk the page tree and resolve all indirect objects of all pages.
|
|
|
193 |
*
|
|
|
194 |
* @param bool $readAll
|
|
|
195 |
* @throws CrossReferenceException
|
|
|
196 |
* @throws PdfParserException
|
|
|
197 |
* @throws PdfTypeException
|
|
|
198 |
*/
|
|
|
199 |
protected function readPages($readAll = false)
|
|
|
200 |
{
|
|
|
201 |
if (\count($this->pages) > 0) {
|
|
|
202 |
return;
|
|
|
203 |
}
|
|
|
204 |
|
|
|
205 |
$expectedPageCount = $this->getPageCount();
|
|
|
206 |
$readPages = function ($kids, $count) use (&$readPages, $readAll, $expectedPageCount) {
|
|
|
207 |
$kids = PdfArray::ensure($kids);
|
|
|
208 |
$isLeaf = ($count->value === \count($kids->value));
|
|
|
209 |
|
|
|
210 |
foreach ($kids->value as $reference) {
|
|
|
211 |
$reference = PdfIndirectObjectReference::ensure($reference);
|
|
|
212 |
|
|
|
213 |
if (!$readAll && $isLeaf) {
|
|
|
214 |
$this->pages[] = $reference;
|
|
|
215 |
continue;
|
|
|
216 |
}
|
|
|
217 |
|
|
|
218 |
$object = $this->parser->getIndirectObject($reference->value);
|
|
|
219 |
$type = PdfDictionary::get($object->value, 'Type');
|
|
|
220 |
|
|
|
221 |
if ($type->value === 'Pages') {
|
|
|
222 |
$readPages(PdfDictionary::get($object->value, 'Kids'), PdfDictionary::get($object->value, 'Count'));
|
|
|
223 |
} else {
|
|
|
224 |
$this->pages[] = $object;
|
|
|
225 |
}
|
|
|
226 |
|
|
|
227 |
// stop if all pages are read - faulty documents exists with additional entries with invalid data.
|
|
|
228 |
if (count($this->pages) === $expectedPageCount) {
|
|
|
229 |
break;
|
|
|
230 |
}
|
|
|
231 |
}
|
|
|
232 |
};
|
|
|
233 |
|
|
|
234 |
$catalog = $this->parser->getCatalog();
|
|
|
235 |
$pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
|
|
|
236 |
$count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
|
|
|
237 |
$kids = PdfType::resolve(PdfDictionary::get($pages, 'Kids'), $this->parser);
|
|
|
238 |
$readPages($kids, $count);
|
|
|
239 |
}
|
|
|
240 |
}
|