| 1441 |
ariadna |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
/**
|
|
|
4 |
* This file is part of FPDI
|
|
|
5 |
*
|
|
|
6 |
* @package setasign\Fpdi
|
|
|
7 |
* @copyright Copyright (c) 2024 Setasign GmbH & Co. KG (https://www.setasign.com)
|
|
|
8 |
* @license http://opensource.org/licenses/mit-license The MIT License
|
|
|
9 |
*/
|
|
|
10 |
|
|
|
11 |
namespace setasign\Fpdi\PdfParser\CrossReference;
|
|
|
12 |
|
|
|
13 |
use setasign\Fpdi\PdfParser\PdfParser;
|
|
|
14 |
use setasign\Fpdi\PdfParser\StreamReader;
|
|
|
15 |
|
|
|
16 |
/**
|
|
|
17 |
* Class FixedReader
|
|
|
18 |
*
|
|
|
19 |
* This reader allows a very less overhead parsing of single entries of the cross-reference, because the main entries
|
|
|
20 |
* are only read when needed and not in a single run.
|
|
|
21 |
*/
|
|
|
22 |
class FixedReader extends AbstractReader implements ReaderInterface
|
|
|
23 |
{
|
|
|
24 |
/**
|
|
|
25 |
* @var StreamReader
|
|
|
26 |
*/
|
|
|
27 |
protected $reader;
|
|
|
28 |
|
|
|
29 |
/**
|
|
|
30 |
* Data of subsections.
|
|
|
31 |
*
|
|
|
32 |
* @var array
|
|
|
33 |
*/
|
|
|
34 |
protected $subSections;
|
|
|
35 |
|
|
|
36 |
/**
|
|
|
37 |
* FixedReader constructor.
|
|
|
38 |
*
|
|
|
39 |
* @param PdfParser $parser
|
|
|
40 |
* @throws CrossReferenceException
|
|
|
41 |
*/
|
|
|
42 |
public function __construct(PdfParser $parser)
|
|
|
43 |
{
|
|
|
44 |
$this->reader = $parser->getStreamReader();
|
|
|
45 |
$this->read();
|
|
|
46 |
parent::__construct($parser);
|
|
|
47 |
}
|
|
|
48 |
|
|
|
49 |
/**
|
|
|
50 |
* Get all subsection data.
|
|
|
51 |
*
|
|
|
52 |
* @return array
|
|
|
53 |
*/
|
|
|
54 |
public function getSubSections()
|
|
|
55 |
{
|
|
|
56 |
return $this->subSections;
|
|
|
57 |
}
|
|
|
58 |
|
|
|
59 |
/**
|
|
|
60 |
* @inheritdoc
|
|
|
61 |
* @return int|false
|
|
|
62 |
*/
|
|
|
63 |
public function getOffsetFor($objectNumber)
|
|
|
64 |
{
|
|
|
65 |
foreach ($this->subSections as $offset => list($startObject, $objectCount)) {
|
|
|
66 |
/**
|
|
|
67 |
* @var int $startObject
|
|
|
68 |
* @var int $objectCount
|
|
|
69 |
*/
|
|
|
70 |
if ($objectNumber >= $startObject && $objectNumber < ($startObject + $objectCount)) {
|
|
|
71 |
$position = $offset + 20 * ($objectNumber - $startObject);
|
|
|
72 |
$this->reader->ensure($position, 20);
|
|
|
73 |
$line = $this->reader->readBytes(20);
|
|
|
74 |
if ($line[17] === 'f') {
|
|
|
75 |
return false;
|
|
|
76 |
}
|
|
|
77 |
|
|
|
78 |
return (int) \substr($line, 0, 10);
|
|
|
79 |
}
|
|
|
80 |
}
|
|
|
81 |
|
|
|
82 |
return false;
|
|
|
83 |
}
|
|
|
84 |
|
|
|
85 |
/**
|
|
|
86 |
* Read the cross-reference.
|
|
|
87 |
*
|
|
|
88 |
* This reader will only read the subsections in this method. The offsets were resolved individually by this
|
|
|
89 |
* information.
|
|
|
90 |
*
|
|
|
91 |
* @throws CrossReferenceException
|
|
|
92 |
*/
|
|
|
93 |
protected function read()
|
|
|
94 |
{
|
|
|
95 |
$subSections = [];
|
|
|
96 |
|
|
|
97 |
$startObject = $entryCount = $lastLineStart = null;
|
|
|
98 |
$validityChecked = false;
|
|
|
99 |
while (($line = $this->reader->readLine(20)) !== false) {
|
|
|
100 |
if (\strpos($line, 'trailer') !== false) {
|
|
|
101 |
$this->reader->reset($lastLineStart);
|
|
|
102 |
break;
|
|
|
103 |
}
|
|
|
104 |
|
|
|
105 |
// jump over if line content doesn't match the expected string
|
|
|
106 |
if (\sscanf($line, '%d %d', $startObject, $entryCount) !== 2) {
|
|
|
107 |
continue;
|
|
|
108 |
}
|
|
|
109 |
|
|
|
110 |
$oldPosition = $this->reader->getPosition();
|
|
|
111 |
$position = $oldPosition + $this->reader->getOffset();
|
|
|
112 |
|
|
|
113 |
if (!$validityChecked && $entryCount > 0) {
|
|
|
114 |
$nextLine = $this->reader->readBytes(21);
|
|
|
115 |
/* Check the next line for maximum of 20 bytes and not longer
|
|
|
116 |
* By catching 21 bytes and trimming the length should be still 21.
|
|
|
117 |
*/
|
|
|
118 |
if (\strlen(\trim($nextLine)) !== 21) {
|
|
|
119 |
throw new CrossReferenceException(
|
|
|
120 |
'Cross-reference entries are larger than 20 bytes.',
|
|
|
121 |
CrossReferenceException::ENTRIES_TOO_LARGE
|
|
|
122 |
);
|
|
|
123 |
}
|
|
|
124 |
|
|
|
125 |
/* Check for less than 20 bytes: cut the line to 20 bytes and trim; have to result in exactly 18 bytes.
|
|
|
126 |
* If it would have less bytes the substring would get the first bytes of the next line which would
|
|
|
127 |
* evaluate to a 20 bytes long string after trimming.
|
|
|
128 |
*/
|
|
|
129 |
if (\strlen(\trim(\substr($nextLine, 0, 20))) !== 18) {
|
|
|
130 |
throw new CrossReferenceException(
|
|
|
131 |
'Cross-reference entries are less than 20 bytes.',
|
|
|
132 |
CrossReferenceException::ENTRIES_TOO_SHORT
|
|
|
133 |
);
|
|
|
134 |
}
|
|
|
135 |
|
|
|
136 |
$validityChecked = true;
|
|
|
137 |
}
|
|
|
138 |
|
|
|
139 |
$subSections[$position] = [$startObject, $entryCount];
|
|
|
140 |
|
|
|
141 |
$lastLineStart = $position + $entryCount * 20;
|
|
|
142 |
$this->reader->reset($lastLineStart);
|
|
|
143 |
}
|
|
|
144 |
|
|
|
145 |
// reset after the last correct parsed line
|
|
|
146 |
$this->reader->reset($lastLineStart);
|
|
|
147 |
|
|
|
148 |
if (\count($subSections) === 0) {
|
|
|
149 |
throw new CrossReferenceException(
|
|
|
150 |
'No entries found in cross-reference.',
|
|
|
151 |
CrossReferenceException::NO_ENTRIES
|
|
|
152 |
);
|
|
|
153 |
}
|
|
|
154 |
|
|
|
155 |
$this->subSections = $subSections;
|
|
|
156 |
}
|
|
|
157 |
|
|
|
158 |
/**
|
|
|
159 |
* Fixes an invalid object number shift.
|
|
|
160 |
*
|
|
|
161 |
* This method can be used to repair documents with an invalid subsection header:
|
|
|
162 |
*
|
|
|
163 |
* <code>
|
|
|
164 |
* xref
|
|
|
165 |
* 1 7
|
|
|
166 |
* 0000000000 65535 f
|
|
|
167 |
* 0000000009 00000 n
|
|
|
168 |
* 0000412075 00000 n
|
|
|
169 |
* 0000412172 00000 n
|
|
|
170 |
* 0000412359 00000 n
|
|
|
171 |
* 0000412417 00000 n
|
|
|
172 |
* 0000412468 00000 n
|
|
|
173 |
* </code>
|
|
|
174 |
*
|
|
|
175 |
* It shall only be called on the first table.
|
|
|
176 |
*
|
|
|
177 |
* @return bool
|
|
|
178 |
*/
|
|
|
179 |
public function fixFaultySubSectionShift()
|
|
|
180 |
{
|
|
|
181 |
$subSections = $this->getSubSections();
|
|
|
182 |
if (\count($subSections) > 1) {
|
|
|
183 |
return false;
|
|
|
184 |
}
|
|
|
185 |
|
|
|
186 |
$subSection = \current($subSections);
|
|
|
187 |
if ($subSection[0] != 1) {
|
|
|
188 |
return false;
|
|
|
189 |
}
|
|
|
190 |
|
|
|
191 |
if ($this->getOffsetFor(1) === false) {
|
|
|
192 |
foreach ($subSections as $offset => list($startObject, $objectCount)) {
|
|
|
193 |
$this->subSections[$offset] = [$startObject - 1, $objectCount];
|
|
|
194 |
}
|
|
|
195 |
return true;
|
|
|
196 |
}
|
|
|
197 |
|
|
|
198 |
return false;
|
|
|
199 |
}
|
|
|
200 |
}
|