Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
/**
4
 * This file is part of FPDI
5
 *
6
 * @package   setasign\Fpdi
7
 * @copyright Copyright (c) 2023 Setasign GmbH & Co. KG (https://www.setasign.com)
8
 * @license   http://opensource.org/licenses/mit-license The MIT License
9
 */
10
 
11
namespace setasign\Fpdi\PdfParser;
12
 
13
use setasign\Fpdi\PdfParser\CrossReference\CrossReference;
14
use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
15
use setasign\Fpdi\PdfParser\Type\PdfArray;
16
use setasign\Fpdi\PdfParser\Type\PdfBoolean;
17
use setasign\Fpdi\PdfParser\Type\PdfDictionary;
18
use setasign\Fpdi\PdfParser\Type\PdfHexString;
19
use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
20
use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
21
use setasign\Fpdi\PdfParser\Type\PdfName;
22
use setasign\Fpdi\PdfParser\Type\PdfNull;
23
use setasign\Fpdi\PdfParser\Type\PdfNumeric;
24
use setasign\Fpdi\PdfParser\Type\PdfStream;
25
use setasign\Fpdi\PdfParser\Type\PdfString;
26
use setasign\Fpdi\PdfParser\Type\PdfToken;
27
use setasign\Fpdi\PdfParser\Type\PdfType;
28
use setasign\Fpdi\PdfParser\Type\PdfTypeException;
29
 
30
/**
31
 * A PDF parser class
32
 */
33
class PdfParser
34
{
35
    /**
36
     * @var StreamReader
37
     */
38
    protected $streamReader;
39
 
40
    /**
41
     * @var Tokenizer
42
     */
43
    protected $tokenizer;
44
 
45
    /**
46
     * The file header.
47
     *
48
     * @var string
49
     */
50
    protected $fileHeader;
51
 
52
    /**
53
     * The offset to the file header.
54
     *
55
     * @var int
56
     */
57
    protected $fileHeaderOffset;
58
 
59
    /**
60
     * @var CrossReference|null
61
     */
62
    protected $xref;
63
 
64
    /**
65
     * All read objects.
66
     *
67
     * @var array
68
     */
69
    protected $objects = [];
70
 
71
    /**
72
     * PdfParser constructor.
73
     *
74
     * @param StreamReader $streamReader
75
     */
76
    public function __construct(StreamReader $streamReader)
77
    {
78
        $this->streamReader = $streamReader;
79
        $this->tokenizer = new Tokenizer($streamReader);
80
    }
81
 
82
    /**
83
     * Removes cycled references.
84
     *
85
     * @internal
86
     */
87
    public function cleanUp()
88
    {
89
        $this->xref = null;
90
    }
91
 
92
    /**
93
     * Get the stream reader instance.
94
     *
95
     * @return StreamReader
96
     */
97
    public function getStreamReader()
98
    {
99
        return $this->streamReader;
100
    }
101
 
102
    /**
103
     * Get the tokenizer instance.
104
     *
105
     * @return Tokenizer
106
     */
107
    public function getTokenizer()
108
    {
109
        return $this->tokenizer;
110
    }
111
 
112
    /**
113
     * Resolves the file header.
114
     *
115
     * @throws PdfParserException
116
     * @return int
117
     */
118
    protected function resolveFileHeader()
119
    {
120
        if ($this->fileHeader) {
121
            return $this->fileHeaderOffset;
122
        }
123
 
124
        $this->streamReader->reset(0);
125
        $maxIterations = 1000;
126
        while (true) {
127
            $buffer = $this->streamReader->getBuffer(false);
128
            $offset = \strpos($buffer, '%PDF-');
129
            if ($offset === false) {
130
                if (!$this->streamReader->increaseLength(100) || (--$maxIterations === 0)) {
131
                    throw new PdfParserException(
132
                        'Unable to find PDF file header.',
133
                        PdfParserException::FILE_HEADER_NOT_FOUND
134
                    );
135
                }
136
                continue;
137
            }
138
            break;
139
        }
140
 
141
        $this->fileHeaderOffset = $offset;
142
        $this->streamReader->setOffset($offset);
143
 
144
        $this->fileHeader = \trim($this->streamReader->readLine());
145
        return $this->fileHeaderOffset;
146
    }
147
 
148
    /**
149
     * Get the cross-reference instance.
150
     *
151
     * @return CrossReference
152
     * @throws CrossReferenceException
153
     * @throws PdfParserException
154
     */
155
    public function getCrossReference()
156
    {
157
        if ($this->xref === null) {
158
            $this->xref = new CrossReference($this, $this->resolveFileHeader());
159
        }
160
 
161
        return $this->xref;
162
    }
163
 
164
    /**
165
     * Get the PDF version.
166
     *
167
     * @return int[] An array of major and minor version.
168
     * @throws PdfParserException
169
     */
170
    public function getPdfVersion()
171
    {
172
        $this->resolveFileHeader();
173
 
174
        if (\preg_match('/%PDF-(\d)\.(\d)/', $this->fileHeader, $result) === 0) {
175
            throw new PdfParserException(
176
                'Unable to extract PDF version from file header.',
177
                PdfParserException::PDF_VERSION_NOT_FOUND
178
            );
179
        }
180
        list(, $major, $minor) = $result;
181
 
182
        $catalog = $this->getCatalog();
183
        if (isset($catalog->value['Version'])) {
184
            $versionParts = \explode(
185
                '.',
186
                PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value)
187
            );
188
            if (count($versionParts) === 2) {
189
                list($major, $minor) = $versionParts;
190
            }
191
        }
192
 
193
        return [(int) $major, (int) $minor];
194
    }
195
 
196
    /**
197
     * Get the catalog dictionary.
198
     *
199
     * @return PdfDictionary
200
     * @throws Type\PdfTypeException
201
     * @throws CrossReferenceException
202
     * @throws PdfParserException
203
     */
204
    public function getCatalog()
205
    {
206
        $trailer = $this->getCrossReference()->getTrailer();
207
 
208
        $catalog = PdfType::resolve(PdfDictionary::get($trailer, 'Root'), $this);
209
 
210
        return PdfDictionary::ensure($catalog);
211
    }
212
 
213
    /**
214
     * Get an indirect object by its object number.
215
     *
216
     * @param int $objectNumber
217
     * @param bool $cache
218
     * @return PdfIndirectObject
219
     * @throws CrossReferenceException
220
     * @throws PdfParserException
221
     */
222
    public function getIndirectObject($objectNumber, $cache = false)
223
    {
224
        $objectNumber = (int) $objectNumber;
225
        if (isset($this->objects[$objectNumber])) {
226
            return $this->objects[$objectNumber];
227
        }
228
 
229
        $object = $this->getCrossReference()->getIndirectObject($objectNumber);
230
 
231
        if ($cache) {
232
            $this->objects[$objectNumber] = $object;
233
        }
234
 
235
        return $object;
236
    }
237
 
238
    /**
239
     * Read a PDF value.
240
     *
241
     * @param null|bool|string $token
242
     * @param null|string $expectedType
243
     * @return false|PdfArray|PdfBoolean|PdfDictionary|PdfHexString|PdfIndirectObject|PdfIndirectObjectReference|PdfName|PdfNull|PdfNumeric|PdfStream|PdfString|PdfToken
244
     * @throws Type\PdfTypeException
245
     */
246
    public function readValue($token = null, $expectedType = null)
247
    {
248
        if ($token === null) {
249
            $token = $this->tokenizer->getNextToken();
250
        }
251
 
252
        if ($token === false) {
253
            if ($expectedType !== null) {
254
                throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
255
            }
256
            return false;
257
        }
258
 
259
        switch ($token) {
260
            case '(':
261
                $this->ensureExpectedType($token, $expectedType);
262
                return $this->parsePdfString();
263
 
264
            case '<':
265
                if ($this->streamReader->getByte() === '<') {
266
                    $this->ensureExpectedType('<<', $expectedType);
267
                    $this->streamReader->addOffset(1);
268
                    return $this->parsePdfDictionary();
269
                }
270
 
271
                $this->ensureExpectedType($token, $expectedType);
272
                return $this->parsePdfHexString();
273
 
274
            case '/':
275
                $this->ensureExpectedType($token, $expectedType);
276
                return $this->parsePdfName();
277
 
278
            case '[':
279
                $this->ensureExpectedType($token, $expectedType);
280
                return $this->parsePdfArray();
281
 
282
            default:
283
                if (\is_numeric($token)) {
284
                    if (($token2 = $this->tokenizer->getNextToken()) !== false) {
285
                        if (\is_numeric($token2) && ($token3 = $this->tokenizer->getNextToken()) !== false) {
286
                            switch ($token3) {
287
                                case 'obj':
288
                                    if ($expectedType !== null && $expectedType !== PdfIndirectObject::class) {
289
                                        throw new Type\PdfTypeException(
290
                                            'Got unexpected token type.',
291
                                            Type\PdfTypeException::INVALID_DATA_TYPE
292
                                        );
293
                                    }
294
 
295
                                    return $this->parsePdfIndirectObject((int)$token, (int)$token2);
296
                                case 'R':
297
                                    if (
298
                                        $expectedType !== null &&
299
                                        $expectedType !== PdfIndirectObjectReference::class
300
                                    ) {
301
                                        throw new Type\PdfTypeException(
302
                                            'Got unexpected token type.',
303
                                            Type\PdfTypeException::INVALID_DATA_TYPE
304
                                        );
305
                                    }
306
 
307
                                    return PdfIndirectObjectReference::create((int)$token, (int)$token2);
308
                            }
309
 
310
                            $this->tokenizer->pushStack($token3);
311
                        }
312
 
313
                        $this->tokenizer->pushStack($token2);
314
                    }
315
 
316
                    if ($expectedType !== null && $expectedType !== PdfNumeric::class) {
317
                        throw new Type\PdfTypeException(
318
                            'Got unexpected token type.',
319
                            Type\PdfTypeException::INVALID_DATA_TYPE
320
                        );
321
                    }
322
                    return PdfNumeric::create($token + 0);
323
                }
324
 
325
                if ($token === 'true' || $token === 'false') {
326
                    $this->ensureExpectedType($token, $expectedType);
327
                    return PdfBoolean::create($token === 'true');
328
                }
329
 
330
                if ($token === 'null') {
331
                    $this->ensureExpectedType($token, $expectedType);
332
                    return new PdfNull();
333
                }
334
 
335
                if ($expectedType !== null && $expectedType !== PdfToken::class) {
336
                    throw new Type\PdfTypeException(
337
                        'Got unexpected token type.',
338
                        Type\PdfTypeException::INVALID_DATA_TYPE
339
                    );
340
                }
341
 
342
                $v = new PdfToken();
343
                $v->value = $token;
344
 
345
                return $v;
346
        }
347
    }
348
 
349
    /**
350
     * @return PdfString
351
     */
352
    protected function parsePdfString()
353
    {
354
        return PdfString::parse($this->streamReader);
355
    }
356
 
357
    /**
358
     * @return false|PdfHexString
359
     */
360
    protected function parsePdfHexString()
361
    {
362
        return PdfHexString::parse($this->streamReader);
363
    }
364
 
365
    /**
366
     * @return bool|PdfDictionary
367
     * @throws PdfTypeException
368
     */
369
    protected function parsePdfDictionary()
370
    {
371
        return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this);
372
    }
373
 
374
    /**
375
     * @return PdfName
376
     */
377
    protected function parsePdfName()
378
    {
379
        return PdfName::parse($this->tokenizer, $this->streamReader);
380
    }
381
 
382
    /**
383
     * @return false|PdfArray
384
     * @throws PdfTypeException
385
     */
386
    protected function parsePdfArray()
387
    {
388
        return PdfArray::parse($this->tokenizer, $this);
389
    }
390
 
391
    /**
392
     * @param int $objectNumber
393
     * @param int $generationNumber
394
     * @return false|PdfIndirectObject
395
     * @throws Type\PdfTypeException
396
     */
397
    protected function parsePdfIndirectObject($objectNumber, $generationNumber)
398
    {
399
        return PdfIndirectObject::parse(
400
            $objectNumber,
401
            $generationNumber,
402
            $this,
403
            $this->tokenizer,
404
            $this->streamReader
405
        );
406
    }
407
 
408
    /**
409
     * Ensures that the token will evaluate to an expected object type (or not).
410
     *
411
     * @param string $token
412
     * @param string|null $expectedType
413
     * @return bool
414
     * @throws Type\PdfTypeException
415
     */
416
    protected function ensureExpectedType($token, $expectedType)
417
    {
418
        static $mapping = [
419
            '(' => PdfString::class,
420
            '<' => PdfHexString::class,
421
            '<<' => PdfDictionary::class,
422
            '/' => PdfName::class,
423
            '[' => PdfArray::class,
424
            'true' => PdfBoolean::class,
425
            'false' => PdfBoolean::class,
426
            'null' => PdfNull::class
427
        ];
428
 
429
        if ($expectedType === null || $mapping[$token] === $expectedType) {
430
            return true;
431
        }
432
 
433
        throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
434
    }
435
}