| 1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
declare(strict_types=1);
|
|
|
4 |
|
|
|
5 |
namespace MaxMind\Db\Reader;
|
|
|
6 |
|
|
|
7 |
// @codingStandardsIgnoreLine
|
|
|
8 |
use RuntimeException;
|
|
|
9 |
|
|
|
10 |
class Decoder
|
|
|
11 |
{
|
|
|
12 |
/**
|
|
|
13 |
* @var resource
|
|
|
14 |
*/
|
|
|
15 |
private $fileStream;
|
|
|
16 |
/**
|
|
|
17 |
* @var int
|
|
|
18 |
*/
|
|
|
19 |
private $pointerBase;
|
|
|
20 |
/**
|
|
|
21 |
* @var float
|
|
|
22 |
*/
|
|
|
23 |
private $pointerBaseByteSize;
|
|
|
24 |
/**
|
|
|
25 |
* This is only used for unit testing.
|
|
|
26 |
*
|
|
|
27 |
* @var bool
|
|
|
28 |
*/
|
|
|
29 |
private $pointerTestHack;
|
|
|
30 |
/**
|
|
|
31 |
* @var bool
|
|
|
32 |
*/
|
|
|
33 |
private $switchByteOrder;
|
|
|
34 |
|
|
|
35 |
private const _EXTENDED = 0;
|
|
|
36 |
private const _POINTER = 1;
|
|
|
37 |
private const _UTF8_STRING = 2;
|
|
|
38 |
private const _DOUBLE = 3;
|
|
|
39 |
private const _BYTES = 4;
|
|
|
40 |
private const _UINT16 = 5;
|
|
|
41 |
private const _UINT32 = 6;
|
|
|
42 |
private const _MAP = 7;
|
|
|
43 |
private const _INT32 = 8;
|
|
|
44 |
private const _UINT64 = 9;
|
|
|
45 |
private const _UINT128 = 10;
|
|
|
46 |
private const _ARRAY = 11;
|
|
|
47 |
private const _CONTAINER = 12;
|
|
|
48 |
private const _END_MARKER = 13;
|
|
|
49 |
private const _BOOLEAN = 14;
|
|
|
50 |
private const _FLOAT = 15;
|
|
|
51 |
|
|
|
52 |
/**
|
|
|
53 |
* @param resource $fileStream
|
|
|
54 |
*/
|
|
|
55 |
public function __construct(
|
|
|
56 |
$fileStream,
|
|
|
57 |
int $pointerBase = 0,
|
|
|
58 |
bool $pointerTestHack = false
|
|
|
59 |
) {
|
|
|
60 |
$this->fileStream = $fileStream;
|
|
|
61 |
$this->pointerBase = $pointerBase;
|
|
|
62 |
|
|
|
63 |
$this->pointerBaseByteSize = $pointerBase > 0 ? log($pointerBase, 2) / 8 : 0;
|
|
|
64 |
$this->pointerTestHack = $pointerTestHack;
|
|
|
65 |
|
|
|
66 |
$this->switchByteOrder = $this->isPlatformLittleEndian();
|
|
|
67 |
}
|
|
|
68 |
|
|
|
69 |
public function decode(int $offset): array
|
|
|
70 |
{
|
|
|
71 |
$ctrlByte = \ord(Util::read($this->fileStream, $offset, 1));
|
|
|
72 |
++$offset;
|
|
|
73 |
|
|
|
74 |
$type = $ctrlByte >> 5;
|
|
|
75 |
|
|
|
76 |
// Pointers are a special case, we don't read the next $size bytes, we
|
|
|
77 |
// use the size to determine the length of the pointer and then follow
|
|
|
78 |
// it.
|
|
|
79 |
if ($type === self::_POINTER) {
|
|
|
80 |
[$pointer, $offset] = $this->decodePointer($ctrlByte, $offset);
|
|
|
81 |
|
|
|
82 |
// for unit testing
|
|
|
83 |
if ($this->pointerTestHack) {
|
|
|
84 |
return [$pointer];
|
|
|
85 |
}
|
|
|
86 |
|
|
|
87 |
[$result] = $this->decode($pointer);
|
|
|
88 |
|
|
|
89 |
return [$result, $offset];
|
|
|
90 |
}
|
|
|
91 |
|
|
|
92 |
if ($type === self::_EXTENDED) {
|
|
|
93 |
$nextByte = \ord(Util::read($this->fileStream, $offset, 1));
|
|
|
94 |
|
|
|
95 |
$type = $nextByte + 7;
|
|
|
96 |
|
|
|
97 |
if ($type < 8) {
|
|
|
98 |
throw new InvalidDatabaseException(
|
|
|
99 |
'Something went horribly wrong in the decoder. An extended type '
|
|
|
100 |
. 'resolved to a type number < 8 ('
|
|
|
101 |
. $type
|
|
|
102 |
. ')'
|
|
|
103 |
);
|
|
|
104 |
}
|
|
|
105 |
|
|
|
106 |
++$offset;
|
|
|
107 |
}
|
|
|
108 |
|
|
|
109 |
[$size, $offset] = $this->sizeFromCtrlByte($ctrlByte, $offset);
|
|
|
110 |
|
|
|
111 |
return $this->decodeByType($type, $offset, $size);
|
|
|
112 |
}
|
|
|
113 |
|
|
|
114 |
private function decodeByType(int $type, int $offset, int $size): array
|
|
|
115 |
{
|
|
|
116 |
switch ($type) {
|
|
|
117 |
case self::_MAP:
|
|
|
118 |
return $this->decodeMap($size, $offset);
|
|
|
119 |
|
|
|
120 |
case self::_ARRAY:
|
|
|
121 |
return $this->decodeArray($size, $offset);
|
|
|
122 |
|
|
|
123 |
case self::_BOOLEAN:
|
|
|
124 |
return [$this->decodeBoolean($size), $offset];
|
|
|
125 |
}
|
|
|
126 |
|
|
|
127 |
$newOffset = $offset + $size;
|
|
|
128 |
$bytes = Util::read($this->fileStream, $offset, $size);
|
|
|
129 |
|
|
|
130 |
switch ($type) {
|
|
|
131 |
case self::_BYTES:
|
|
|
132 |
case self::_UTF8_STRING:
|
|
|
133 |
return [$bytes, $newOffset];
|
|
|
134 |
|
|
|
135 |
case self::_DOUBLE:
|
|
|
136 |
$this->verifySize(8, $size);
|
|
|
137 |
|
|
|
138 |
return [$this->decodeDouble($bytes), $newOffset];
|
|
|
139 |
|
|
|
140 |
case self::_FLOAT:
|
|
|
141 |
$this->verifySize(4, $size);
|
|
|
142 |
|
|
|
143 |
return [$this->decodeFloat($bytes), $newOffset];
|
|
|
144 |
|
|
|
145 |
case self::_INT32:
|
|
|
146 |
return [$this->decodeInt32($bytes, $size), $newOffset];
|
|
|
147 |
|
|
|
148 |
case self::_UINT16:
|
|
|
149 |
case self::_UINT32:
|
|
|
150 |
case self::_UINT64:
|
|
|
151 |
case self::_UINT128:
|
|
|
152 |
return [$this->decodeUint($bytes, $size), $newOffset];
|
|
|
153 |
|
|
|
154 |
default:
|
|
|
155 |
throw new InvalidDatabaseException(
|
|
|
156 |
'Unknown or unexpected type: ' . $type
|
|
|
157 |
);
|
|
|
158 |
}
|
|
|
159 |
}
|
|
|
160 |
|
|
|
161 |
private function verifySize(int $expected, int $actual): void
|
|
|
162 |
{
|
|
|
163 |
if ($expected !== $actual) {
|
|
|
164 |
throw new InvalidDatabaseException(
|
|
|
165 |
"The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)"
|
|
|
166 |
);
|
|
|
167 |
}
|
|
|
168 |
}
|
|
|
169 |
|
|
|
170 |
private function decodeArray(int $size, int $offset): array
|
|
|
171 |
{
|
|
|
172 |
$array = [];
|
|
|
173 |
|
|
|
174 |
for ($i = 0; $i < $size; ++$i) {
|
|
|
175 |
[$value, $offset] = $this->decode($offset);
|
|
|
176 |
$array[] = $value;
|
|
|
177 |
}
|
|
|
178 |
|
|
|
179 |
return [$array, $offset];
|
|
|
180 |
}
|
|
|
181 |
|
|
|
182 |
private function decodeBoolean(int $size): bool
|
|
|
183 |
{
|
|
|
184 |
return $size !== 0;
|
|
|
185 |
}
|
|
|
186 |
|
|
|
187 |
private function decodeDouble(string $bytes): float
|
|
|
188 |
{
|
|
|
189 |
// This assumes IEEE 754 doubles, but most (all?) modern platforms
|
|
|
190 |
// use them.
|
|
|
191 |
[, $double] = unpack('E', $bytes);
|
|
|
192 |
|
|
|
193 |
return $double;
|
|
|
194 |
}
|
|
|
195 |
|
|
|
196 |
private function decodeFloat(string $bytes): float
|
|
|
197 |
{
|
|
|
198 |
// This assumes IEEE 754 floats, but most (all?) modern platforms
|
|
|
199 |
// use them.
|
|
|
200 |
[, $float] = unpack('G', $bytes);
|
|
|
201 |
|
|
|
202 |
return $float;
|
|
|
203 |
}
|
|
|
204 |
|
|
|
205 |
private function decodeInt32(string $bytes, int $size): int
|
|
|
206 |
{
|
|
|
207 |
switch ($size) {
|
|
|
208 |
case 0:
|
|
|
209 |
return 0;
|
|
|
210 |
|
|
|
211 |
case 1:
|
|
|
212 |
case 2:
|
|
|
213 |
case 3:
|
|
|
214 |
$bytes = str_pad($bytes, 4, "\x00", \STR_PAD_LEFT);
|
|
|
215 |
|
|
|
216 |
break;
|
|
|
217 |
|
|
|
218 |
case 4:
|
|
|
219 |
break;
|
|
|
220 |
|
|
|
221 |
default:
|
|
|
222 |
throw new InvalidDatabaseException(
|
|
|
223 |
"The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)"
|
|
|
224 |
);
|
|
|
225 |
}
|
|
|
226 |
|
|
|
227 |
[, $int] = unpack('l', $this->maybeSwitchByteOrder($bytes));
|
|
|
228 |
|
|
|
229 |
return $int;
|
|
|
230 |
}
|
|
|
231 |
|
|
|
232 |
private function decodeMap(int $size, int $offset): array
|
|
|
233 |
{
|
|
|
234 |
$map = [];
|
|
|
235 |
|
|
|
236 |
for ($i = 0; $i < $size; ++$i) {
|
|
|
237 |
[$key, $offset] = $this->decode($offset);
|
|
|
238 |
[$value, $offset] = $this->decode($offset);
|
|
|
239 |
$map[$key] = $value;
|
|
|
240 |
}
|
|
|
241 |
|
|
|
242 |
return [$map, $offset];
|
|
|
243 |
}
|
|
|
244 |
|
|
|
245 |
private function decodePointer(int $ctrlByte, int $offset): array
|
|
|
246 |
{
|
|
|
247 |
$pointerSize = (($ctrlByte >> 3) & 0x3) + 1;
|
|
|
248 |
|
|
|
249 |
$buffer = Util::read($this->fileStream, $offset, $pointerSize);
|
|
|
250 |
$offset = $offset + $pointerSize;
|
|
|
251 |
|
|
|
252 |
switch ($pointerSize) {
|
|
|
253 |
case 1:
|
|
|
254 |
$packed = \chr($ctrlByte & 0x7) . $buffer;
|
|
|
255 |
[, $pointer] = unpack('n', $packed);
|
|
|
256 |
$pointer += $this->pointerBase;
|
|
|
257 |
|
|
|
258 |
break;
|
|
|
259 |
|
|
|
260 |
case 2:
|
|
|
261 |
$packed = "\x00" . \chr($ctrlByte & 0x7) . $buffer;
|
|
|
262 |
[, $pointer] = unpack('N', $packed);
|
|
|
263 |
$pointer += $this->pointerBase + 2048;
|
|
|
264 |
|
|
|
265 |
break;
|
|
|
266 |
|
|
|
267 |
case 3:
|
|
|
268 |
$packed = \chr($ctrlByte & 0x7) . $buffer;
|
|
|
269 |
|
|
|
270 |
// It is safe to use 'N' here, even on 32 bit machines as the
|
|
|
271 |
// first bit is 0.
|
|
|
272 |
[, $pointer] = unpack('N', $packed);
|
|
|
273 |
$pointer += $this->pointerBase + 526336;
|
|
|
274 |
|
|
|
275 |
break;
|
|
|
276 |
|
|
|
277 |
case 4:
|
|
|
278 |
// We cannot use unpack here as we might overflow on 32 bit
|
|
|
279 |
// machines
|
|
|
280 |
$pointerOffset = $this->decodeUint($buffer, $pointerSize);
|
|
|
281 |
|
|
|
282 |
$pointerBase = $this->pointerBase;
|
|
|
283 |
|
|
|
284 |
if (\PHP_INT_MAX - $pointerBase >= $pointerOffset) {
|
|
|
285 |
$pointer = $pointerOffset + $pointerBase;
|
|
|
286 |
} else {
|
|
|
287 |
throw new RuntimeException(
|
|
|
288 |
'The database offset is too large to be represented on your platform.'
|
|
|
289 |
);
|
|
|
290 |
}
|
|
|
291 |
|
|
|
292 |
break;
|
|
|
293 |
|
|
|
294 |
default:
|
|
|
295 |
throw new InvalidDatabaseException(
|
|
|
296 |
'Unexpected pointer size ' . $pointerSize
|
|
|
297 |
);
|
|
|
298 |
}
|
|
|
299 |
|
|
|
300 |
return [$pointer, $offset];
|
|
|
301 |
}
|
|
|
302 |
|
|
|
303 |
// @phpstan-ignore-next-line
|
|
|
304 |
private function decodeUint(string $bytes, int $byteLength)
|
|
|
305 |
{
|
|
|
306 |
if ($byteLength === 0) {
|
|
|
307 |
return 0;
|
|
|
308 |
}
|
|
|
309 |
|
|
|
310 |
$integer = 0;
|
|
|
311 |
|
|
|
312 |
// PHP integers are signed. PHP_INT_SIZE - 1 is the number of
|
|
|
313 |
// complete bytes that can be converted to an integer. However,
|
|
|
314 |
// we can convert another byte if the leading bit is zero.
|
|
|
315 |
$useRealInts = $byteLength <= \PHP_INT_SIZE - 1
|
|
|
316 |
|| ($byteLength === \PHP_INT_SIZE && (\ord($bytes[0]) & 0x80) === 0);
|
|
|
317 |
|
|
|
318 |
for ($i = 0; $i < $byteLength; ++$i) {
|
|
|
319 |
$part = \ord($bytes[$i]);
|
|
|
320 |
|
|
|
321 |
// We only use gmp or bcmath if the final value is too big
|
|
|
322 |
if ($useRealInts) {
|
|
|
323 |
$integer = ($integer << 8) + $part;
|
|
|
324 |
} elseif (\extension_loaded('gmp')) {
|
|
|
325 |
$integer = gmp_strval(gmp_add(gmp_mul((string) $integer, '256'), $part));
|
|
|
326 |
} elseif (\extension_loaded('bcmath')) {
|
|
|
327 |
$integer = bcadd(bcmul((string) $integer, '256'), (string) $part);
|
|
|
328 |
} else {
|
|
|
329 |
throw new RuntimeException(
|
|
|
330 |
'The gmp or bcmath extension must be installed to read this database.'
|
|
|
331 |
);
|
|
|
332 |
}
|
|
|
333 |
}
|
|
|
334 |
|
|
|
335 |
return $integer;
|
|
|
336 |
}
|
|
|
337 |
|
|
|
338 |
private function sizeFromCtrlByte(int $ctrlByte, int $offset): array
|
|
|
339 |
{
|
|
|
340 |
$size = $ctrlByte & 0x1F;
|
|
|
341 |
|
|
|
342 |
if ($size < 29) {
|
|
|
343 |
return [$size, $offset];
|
|
|
344 |
}
|
|
|
345 |
|
|
|
346 |
$bytesToRead = $size - 28;
|
|
|
347 |
$bytes = Util::read($this->fileStream, $offset, $bytesToRead);
|
|
|
348 |
|
|
|
349 |
if ($size === 29) {
|
|
|
350 |
$size = 29 + \ord($bytes);
|
|
|
351 |
} elseif ($size === 30) {
|
|
|
352 |
[, $adjust] = unpack('n', $bytes);
|
|
|
353 |
$size = 285 + $adjust;
|
|
|
354 |
} else {
|
|
|
355 |
[, $adjust] = unpack('N', "\x00" . $bytes);
|
|
|
356 |
$size = $adjust + 65821;
|
|
|
357 |
}
|
|
|
358 |
|
|
|
359 |
return [$size, $offset + $bytesToRead];
|
|
|
360 |
}
|
|
|
361 |
|
|
|
362 |
private function maybeSwitchByteOrder(string $bytes): string
|
|
|
363 |
{
|
|
|
364 |
return $this->switchByteOrder ? strrev($bytes) : $bytes;
|
|
|
365 |
}
|
|
|
366 |
|
|
|
367 |
private function isPlatformLittleEndian(): bool
|
|
|
368 |
{
|
|
|
369 |
$testint = 0x00FF;
|
|
|
370 |
$packed = pack('S', $testint);
|
|
|
371 |
|
|
|
372 |
return $testint === current(unpack('v', $packed));
|
|
|
373 |
}
|
|
|
374 |
}
|