Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1441 ariadna 1
<?php
2
 
3
/**
4
 * This file is part of FPDI
5
 *
6
 * @package   setasign\Fpdi
7
 * @copyright Copyright (c) 2024 Setasign GmbH & Co. KG (https://www.setasign.com)
8
 * @license   http://opensource.org/licenses/mit-license The MIT License
9
 */
10
 
11
namespace setasign\Fpdi\PdfParser;
12
 
13
use setasign\Fpdi\PdfParser\CrossReference\CrossReference;
14
use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
15
use setasign\Fpdi\PdfParser\Type\PdfArray;
16
use setasign\Fpdi\PdfParser\Type\PdfBoolean;
17
use setasign\Fpdi\PdfParser\Type\PdfDictionary;
18
use setasign\Fpdi\PdfParser\Type\PdfHexString;
19
use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
20
use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
21
use setasign\Fpdi\PdfParser\Type\PdfName;
22
use setasign\Fpdi\PdfParser\Type\PdfNull;
23
use setasign\Fpdi\PdfParser\Type\PdfNumeric;
24
use setasign\Fpdi\PdfParser\Type\PdfStream;
25
use setasign\Fpdi\PdfParser\Type\PdfString;
26
use setasign\Fpdi\PdfParser\Type\PdfToken;
27
use setasign\Fpdi\PdfParser\Type\PdfType;
28
use setasign\Fpdi\PdfParser\Type\PdfTypeException;
29
 
30
/**
31
 * A PDF parser class
32
 */
33
class PdfParser
34
{
35
    /**
36
     * @var StreamReader
37
     */
38
    protected $streamReader;
39
 
40
    /**
41
     * @var Tokenizer
42
     */
43
    protected $tokenizer;
44
 
45
    /**
46
     * The file header.
47
     *
48
     * @var string
49
     */
50
    protected $fileHeader;
51
 
52
    /**
53
     * The offset to the file header.
54
     *
55
     * @var int
56
     */
57
    protected $fileHeaderOffset;
58
 
59
    /**
60
     * @var CrossReference|null
61
     */
62
    protected $xref;
63
 
64
    /**
65
     * All read objects.
66
     *
67
     * @var array
68
     */
69
    protected $objects = [];
70
 
71
    /**
72
     * PdfParser constructor.
73
     *
74
     * @param StreamReader $streamReader
75
     */
76
    public function __construct(StreamReader $streamReader)
77
    {
78
        $this->streamReader = $streamReader;
79
        $this->tokenizer = new Tokenizer($streamReader);
80
    }
81
 
82
    /**
83
     * Removes cycled references.
84
     *
85
     * @internal
86
     */
87
    public function cleanUp()
88
    {
89
        $this->xref = null;
90
    }
91
 
92
    /**
93
     * Get the stream reader instance.
94
     *
95
     * @return StreamReader
96
     */
97
    public function getStreamReader()
98
    {
99
        return $this->streamReader;
100
    }
101
 
102
    /**
103
     * Get the tokenizer instance.
104
     *
105
     * @return Tokenizer
106
     */
107
    public function getTokenizer()
108
    {
109
        return $this->tokenizer;
110
    }
111
 
112
    /**
113
     * Resolves the file header.
114
     *
115
     * @throws PdfParserException
116
     * @return int
117
     */
118
    protected function resolveFileHeader()
119
    {
120
        if ($this->fileHeader) {
121
            return $this->fileHeaderOffset;
122
        }
123
 
124
        $this->streamReader->reset(0);
125
        $maxIterations = 1000;
126
        while (true) {
127
            $buffer = $this->streamReader->getBuffer(false);
128
            $offset = \strpos($buffer, '%PDF-');
129
            if ($offset === false) {
130
                if (!$this->streamReader->increaseLength(100) || (--$maxIterations === 0)) {
131
                    throw new PdfParserException(
132
                        'Unable to find PDF file header.',
133
                        PdfParserException::FILE_HEADER_NOT_FOUND
134
                    );
135
                }
136
                continue;
137
            }
138
            break;
139
        }
140
 
141
        $this->fileHeaderOffset = $offset;
142
        $this->streamReader->setOffset($offset);
143
 
144
        $this->fileHeader = \trim($this->streamReader->readLine());
145
        return $this->fileHeaderOffset;
146
    }
147
 
148
    /**
149
     * Get the cross-reference instance.
150
     *
151
     * @return CrossReference
152
     * @throws CrossReferenceException
153
     * @throws PdfParserException
154
     */
155
    public function getCrossReference()
156
    {
157
        if ($this->xref === null) {
158
            $this->xref = new CrossReference($this, $this->resolveFileHeader());
159
        }
160
 
161
        return $this->xref;
162
    }
163
 
164
    /**
165
     * Get the PDF version.
166
     *
167
     * @return int[] An array of major and minor version.
168
     * @throws PdfParserException
169
     */
170
    public function getPdfVersion()
171
    {
172
        $this->resolveFileHeader();
173
 
174
        if (\preg_match('/%PDF-(\d)\.(\d)/', $this->fileHeader, $result) === 0) {
175
            throw new PdfParserException(
176
                'Unable to extract PDF version from file header.',
177
                PdfParserException::PDF_VERSION_NOT_FOUND
178
            );
179
        }
180
        list(, $major, $minor) = $result;
181
 
182
        $catalog = $this->getCatalog();
183
        if (isset($catalog->value['Version'])) {
184
            $versionParts = \explode(
185
                '.',
186
                PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value)
187
            );
188
            if (count($versionParts) === 2) {
189
                list($major, $minor) = $versionParts;
190
            }
191
        }
192
 
193
        return [(int) $major, (int) $minor];
194
    }
195
 
196
    /**
197
     * Get the catalog dictionary.
198
     *
199
     * @return PdfDictionary
200
     * @throws Type\PdfTypeException
201
     * @throws CrossReferenceException
202
     * @throws PdfParserException
203
     */
204
    public function getCatalog()
205
    {
206
        $trailer = $this->getCrossReference()->getTrailer();
207
 
208
        $catalog = PdfType::resolve(PdfDictionary::get($trailer, 'Root'), $this);
209
 
210
        return PdfDictionary::ensure($catalog);
211
    }
212
 
213
    /**
214
     * Get an indirect object by its object number.
215
     *
216
     * @param int $objectNumber
217
     * @param bool $cache
218
     * @return PdfIndirectObject
219
     * @throws CrossReferenceException
220
     * @throws PdfParserException
221
     */
222
    public function getIndirectObject($objectNumber, $cache = false)
223
    {
224
        $objectNumber = (int) $objectNumber;
225
        if (isset($this->objects[$objectNumber])) {
226
            return $this->objects[$objectNumber];
227
        }
228
 
229
        $object = $this->getCrossReference()->getIndirectObject($objectNumber);
230
 
231
        if ($cache) {
232
            $this->objects[$objectNumber] = $object;
233
        }
234
 
235
        return $object;
236
    }
237
 
238
    /**
239
     * Read a PDF value.
240
     *
241
     * @param null|bool|string $token
242
     * @param null|string $expectedType
243
     * @return false|PdfArray|PdfBoolean|PdfDictionary|PdfHexString|PdfIndirectObject|PdfIndirectObjectReference|PdfName|PdfNull|PdfNumeric|PdfStream|PdfString|PdfToken
244
     * @throws Type\PdfTypeException
245
     */
246
    public function readValue($token = null, $expectedType = null)
247
    {
248
        if ($token === null) {
249
            $token = $this->tokenizer->getNextToken();
250
        }
251
 
252
        if ($token === false) {
253
            if ($expectedType !== null) {
254
                throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
255
            }
256
            return false;
257
        }
258
 
259
        switch ($token) {
260
            case '(':
261
                $this->ensureExpectedType($token, $expectedType);
262
                return $this->parsePdfString();
263
 
264
            case '<':
265
                if ($this->streamReader->getByte() === '<') {
266
                    $this->ensureExpectedType('<<', $expectedType);
267
                    $this->streamReader->addOffset(1);
268
                    return $this->parsePdfDictionary();
269
                }
270
 
271
                $this->ensureExpectedType($token, $expectedType);
272
                return $this->parsePdfHexString();
273
 
274
            case '/':
275
                $this->ensureExpectedType($token, $expectedType);
276
                return $this->parsePdfName();
277
 
278
            case '[':
279
                $this->ensureExpectedType($token, $expectedType);
280
                return $this->parsePdfArray();
281
 
282
            default:
283
                if (\is_numeric($token)) {
284
                    $token2 = $this->tokenizer->getNextToken();
285
                    if ($token2 !== false) {
286
                        if (\is_numeric($token2)) {
287
                            $token3 = $this->tokenizer->getNextToken();
288
                            if ($token3 === 'obj') {
289
                                if ($expectedType !== null && $expectedType !== PdfIndirectObject::class) {
290
                                    throw new Type\PdfTypeException(
291
                                        'Got unexpected token type.',
292
                                        Type\PdfTypeException::INVALID_DATA_TYPE
293
                                    );
294
                                }
295
 
296
                                return $this->parsePdfIndirectObject((int) $token, (int) $token2);
297
                            } elseif ($token3 === 'R') {
298
                                if (
299
                                    $expectedType !== null &&
300
                                    $expectedType !== PdfIndirectObjectReference::class
301
                                ) {
302
                                    throw new Type\PdfTypeException(
303
                                        'Got unexpected token type.',
304
                                        Type\PdfTypeException::INVALID_DATA_TYPE
305
                                    );
306
                                }
307
 
308
                                return PdfIndirectObjectReference::create((int) $token, (int) $token2);
309
                            } elseif ($token3 !== false) {
310
                                $this->tokenizer->pushStack($token3);
311
                            }
312
                        }
313
 
314
                        $this->tokenizer->pushStack($token2);
315
                    }
316
 
317
                    if ($expectedType !== null && $expectedType !== PdfNumeric::class) {
318
                        throw new Type\PdfTypeException(
319
                            'Got unexpected token type.',
320
                            Type\PdfTypeException::INVALID_DATA_TYPE
321
                        );
322
                    }
323
                    return PdfNumeric::create($token + 0);
324
                }
325
 
326
                if ($token === 'true' || $token === 'false') {
327
                    $this->ensureExpectedType($token, $expectedType);
328
                    return PdfBoolean::create($token === 'true');
329
                }
330
 
331
                if ($token === 'null') {
332
                    $this->ensureExpectedType($token, $expectedType);
333
                    return new PdfNull();
334
                }
335
 
336
                if ($expectedType !== null && $expectedType !== PdfToken::class) {
337
                    throw new Type\PdfTypeException(
338
                        'Got unexpected token type.',
339
                        Type\PdfTypeException::INVALID_DATA_TYPE
340
                    );
341
                }
342
 
343
                $v = new PdfToken();
344
                $v->value = $token;
345
 
346
                return $v;
347
        }
348
    }
349
 
350
    /**
351
     * @return PdfString
352
     */
353
    protected function parsePdfString()
354
    {
355
        return PdfString::parse($this->streamReader);
356
    }
357
 
358
    /**
359
     * @return false|PdfHexString
360
     */
361
    protected function parsePdfHexString()
362
    {
363
        return PdfHexString::parse($this->streamReader);
364
    }
365
 
366
    /**
367
     * @return bool|PdfDictionary
368
     * @throws PdfTypeException
369
     */
370
    protected function parsePdfDictionary()
371
    {
372
        return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this);
373
    }
374
 
375
    /**
376
     * @return PdfName
377
     */
378
    protected function parsePdfName()
379
    {
380
        return PdfName::parse($this->tokenizer, $this->streamReader);
381
    }
382
 
383
    /**
384
     * @return false|PdfArray
385
     * @throws PdfTypeException
386
     */
387
    protected function parsePdfArray()
388
    {
389
        return PdfArray::parse($this->tokenizer, $this);
390
    }
391
 
392
    /**
393
     * @param int $objectNumber
394
     * @param int $generationNumber
395
     * @return false|PdfIndirectObject
396
     * @throws Type\PdfTypeException
397
     */
398
    protected function parsePdfIndirectObject($objectNumber, $generationNumber)
399
    {
400
        return PdfIndirectObject::parse(
401
            $objectNumber,
402
            $generationNumber,
403
            $this,
404
            $this->tokenizer,
405
            $this->streamReader
406
        );
407
    }
408
 
409
    /**
410
     * Ensures that the token will evaluate to an expected object type (or not).
411
     *
412
     * @param string $token
413
     * @param string|null $expectedType
414
     * @return bool
415
     * @throws Type\PdfTypeException
416
     */
417
    protected function ensureExpectedType($token, $expectedType)
418
    {
419
        static $mapping = [
420
            '(' => PdfString::class,
421
            '<' => PdfHexString::class,
422
            '<<' => PdfDictionary::class,
423
            '/' => PdfName::class,
424
            '[' => PdfArray::class,
425
            'true' => PdfBoolean::class,
426
            'false' => PdfBoolean::class,
427
            'null' => PdfNull::class
428
        ];
429
 
430
        if ($expectedType === null || $mapping[$token] === $expectedType) {
431
            return true;
432
        }
433
 
434
        throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
435
    }
436
}