| 1441 |
ariadna |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
namespace PhpOffice\PhpSpreadsheet\Reader;
|
|
|
4 |
|
|
|
5 |
use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
|
|
|
6 |
use PhpOffice\PhpSpreadsheet\Shared\CodePage;
|
|
|
7 |
use PhpOffice\PhpSpreadsheet\Shared\File;
|
|
|
8 |
use PhpOffice\PhpSpreadsheet\Shared\OLERead;
|
|
|
9 |
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
|
|
|
10 |
use PhpOffice\PhpSpreadsheet\Style\Border;
|
|
|
11 |
|
|
|
12 |
class XlsBase extends BaseReader
|
|
|
13 |
{
|
|
|
14 |
final protected const HIGH_ORDER_BIT = 0x80 << 24;
|
|
|
15 |
final protected const FC000000 = 0xFC << 24;
|
|
|
16 |
final protected const FE000000 = 0xFE << 24;
|
|
|
17 |
|
|
|
18 |
// ParseXL definitions
|
|
|
19 |
final const XLS_BIFF8 = 0x0600;
|
|
|
20 |
final const XLS_BIFF7 = 0x0500;
|
|
|
21 |
final const XLS_WORKBOOKGLOBALS = 0x0005;
|
|
|
22 |
final const XLS_WORKSHEET = 0x0010;
|
|
|
23 |
|
|
|
24 |
// record identifiers
|
|
|
25 |
final const XLS_TYPE_FORMULA = 0x0006;
|
|
|
26 |
final const XLS_TYPE_EOF = 0x000A;
|
|
|
27 |
final const XLS_TYPE_PROTECT = 0x0012;
|
|
|
28 |
final const XLS_TYPE_OBJECTPROTECT = 0x0063;
|
|
|
29 |
final const XLS_TYPE_SCENPROTECT = 0x00DD;
|
|
|
30 |
final const XLS_TYPE_PASSWORD = 0x0013;
|
|
|
31 |
final const XLS_TYPE_HEADER = 0x0014;
|
|
|
32 |
final const XLS_TYPE_FOOTER = 0x0015;
|
|
|
33 |
final const XLS_TYPE_EXTERNSHEET = 0x0017;
|
|
|
34 |
final const XLS_TYPE_DEFINEDNAME = 0x0018;
|
|
|
35 |
final const XLS_TYPE_VERTICALPAGEBREAKS = 0x001A;
|
|
|
36 |
final const XLS_TYPE_HORIZONTALPAGEBREAKS = 0x001B;
|
|
|
37 |
final const XLS_TYPE_NOTE = 0x001C;
|
|
|
38 |
final const XLS_TYPE_SELECTION = 0x001D;
|
|
|
39 |
final const XLS_TYPE_DATEMODE = 0x0022;
|
|
|
40 |
final const XLS_TYPE_EXTERNNAME = 0x0023;
|
|
|
41 |
final const XLS_TYPE_LEFTMARGIN = 0x0026;
|
|
|
42 |
final const XLS_TYPE_RIGHTMARGIN = 0x0027;
|
|
|
43 |
final const XLS_TYPE_TOPMARGIN = 0x0028;
|
|
|
44 |
final const XLS_TYPE_BOTTOMMARGIN = 0x0029;
|
|
|
45 |
final const XLS_TYPE_PRINTGRIDLINES = 0x002B;
|
|
|
46 |
final const XLS_TYPE_FILEPASS = 0x002F;
|
|
|
47 |
final const XLS_TYPE_FONT = 0x0031;
|
|
|
48 |
final const XLS_TYPE_CONTINUE = 0x003C;
|
|
|
49 |
final const XLS_TYPE_PANE = 0x0041;
|
|
|
50 |
final const XLS_TYPE_CODEPAGE = 0x0042;
|
|
|
51 |
final const XLS_TYPE_DEFCOLWIDTH = 0x0055;
|
|
|
52 |
final const XLS_TYPE_OBJ = 0x005D;
|
|
|
53 |
final const XLS_TYPE_COLINFO = 0x007D;
|
|
|
54 |
final const XLS_TYPE_IMDATA = 0x007F;
|
|
|
55 |
final const XLS_TYPE_SHEETPR = 0x0081;
|
|
|
56 |
final const XLS_TYPE_HCENTER = 0x0083;
|
|
|
57 |
final const XLS_TYPE_VCENTER = 0x0084;
|
|
|
58 |
final const XLS_TYPE_SHEET = 0x0085;
|
|
|
59 |
final const XLS_TYPE_PALETTE = 0x0092;
|
|
|
60 |
final const XLS_TYPE_SCL = 0x00A0;
|
|
|
61 |
final const XLS_TYPE_PAGESETUP = 0x00A1;
|
|
|
62 |
final const XLS_TYPE_MULRK = 0x00BD;
|
|
|
63 |
final const XLS_TYPE_MULBLANK = 0x00BE;
|
|
|
64 |
final const XLS_TYPE_DBCELL = 0x00D7;
|
|
|
65 |
final const XLS_TYPE_XF = 0x00E0;
|
|
|
66 |
final const XLS_TYPE_MERGEDCELLS = 0x00E5;
|
|
|
67 |
final const XLS_TYPE_MSODRAWINGGROUP = 0x00EB;
|
|
|
68 |
final const XLS_TYPE_MSODRAWING = 0x00EC;
|
|
|
69 |
final const XLS_TYPE_SST = 0x00FC;
|
|
|
70 |
final const XLS_TYPE_LABELSST = 0x00FD;
|
|
|
71 |
final const XLS_TYPE_EXTSST = 0x00FF;
|
|
|
72 |
final const XLS_TYPE_EXTERNALBOOK = 0x01AE;
|
|
|
73 |
final const XLS_TYPE_DATAVALIDATIONS = 0x01B2;
|
|
|
74 |
final const XLS_TYPE_TXO = 0x01B6;
|
|
|
75 |
final const XLS_TYPE_HYPERLINK = 0x01B8;
|
|
|
76 |
final const XLS_TYPE_DATAVALIDATION = 0x01BE;
|
|
|
77 |
final const XLS_TYPE_DIMENSION = 0x0200;
|
|
|
78 |
final const XLS_TYPE_BLANK = 0x0201;
|
|
|
79 |
final const XLS_TYPE_NUMBER = 0x0203;
|
|
|
80 |
final const XLS_TYPE_LABEL = 0x0204;
|
|
|
81 |
final const XLS_TYPE_BOOLERR = 0x0205;
|
|
|
82 |
final const XLS_TYPE_STRING = 0x0207;
|
|
|
83 |
final const XLS_TYPE_ROW = 0x0208;
|
|
|
84 |
final const XLS_TYPE_INDEX = 0x020B;
|
|
|
85 |
final const XLS_TYPE_ARRAY = 0x0221;
|
|
|
86 |
final const XLS_TYPE_DEFAULTROWHEIGHT = 0x0225;
|
|
|
87 |
final const XLS_TYPE_WINDOW2 = 0x023E;
|
|
|
88 |
final const XLS_TYPE_RK = 0x027E;
|
|
|
89 |
final const XLS_TYPE_STYLE = 0x0293;
|
|
|
90 |
final const XLS_TYPE_FORMAT = 0x041E;
|
|
|
91 |
final const XLS_TYPE_SHAREDFMLA = 0x04BC;
|
|
|
92 |
final const XLS_TYPE_BOF = 0x0809;
|
|
|
93 |
final const XLS_TYPE_SHEETPROTECTION = 0x0867;
|
|
|
94 |
final const XLS_TYPE_RANGEPROTECTION = 0x0868;
|
|
|
95 |
final const XLS_TYPE_SHEETLAYOUT = 0x0862;
|
|
|
96 |
final const XLS_TYPE_XFEXT = 0x087D;
|
|
|
97 |
final const XLS_TYPE_PAGELAYOUTVIEW = 0x088B;
|
|
|
98 |
final const XLS_TYPE_CFHEADER = 0x01B0;
|
|
|
99 |
final const XLS_TYPE_CFRULE = 0x01B1;
|
|
|
100 |
final const XLS_TYPE_UNKNOWN = 0xFFFF;
|
|
|
101 |
|
|
|
102 |
// Encryption type
|
|
|
103 |
final const MS_BIFF_CRYPTO_NONE = 0;
|
|
|
104 |
final const MS_BIFF_CRYPTO_XOR = 1;
|
|
|
105 |
final const MS_BIFF_CRYPTO_RC4 = 2;
|
|
|
106 |
|
|
|
107 |
// Size of stream blocks when using RC4 encryption
|
|
|
108 |
final const REKEY_BLOCK = 0x400;
|
|
|
109 |
|
|
|
110 |
// should be consistent with Writer\Xls\Style\CellBorder
|
|
|
111 |
final const BORDER_STYLE_MAP = [
|
|
|
112 |
Border::BORDER_NONE, // => 0x00,
|
|
|
113 |
Border::BORDER_THIN, // => 0x01,
|
|
|
114 |
Border::BORDER_MEDIUM, // => 0x02,
|
|
|
115 |
Border::BORDER_DASHED, // => 0x03,
|
|
|
116 |
Border::BORDER_DOTTED, // => 0x04,
|
|
|
117 |
Border::BORDER_THICK, // => 0x05,
|
|
|
118 |
Border::BORDER_DOUBLE, // => 0x06,
|
|
|
119 |
Border::BORDER_HAIR, // => 0x07,
|
|
|
120 |
Border::BORDER_MEDIUMDASHED, // => 0x08,
|
|
|
121 |
Border::BORDER_DASHDOT, // => 0x09,
|
|
|
122 |
Border::BORDER_MEDIUMDASHDOT, // => 0x0A,
|
|
|
123 |
Border::BORDER_DASHDOTDOT, // => 0x0B,
|
|
|
124 |
Border::BORDER_MEDIUMDASHDOTDOT, // => 0x0C,
|
|
|
125 |
Border::BORDER_SLANTDASHDOT, // => 0x0D,
|
|
|
126 |
Border::BORDER_OMIT, // => 0x0E,
|
|
|
127 |
Border::BORDER_OMIT, // => 0x0F,
|
|
|
128 |
];
|
|
|
129 |
|
|
|
130 |
/**
|
|
|
131 |
* Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95)
|
|
|
132 |
* For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE'.
|
|
|
133 |
*/
|
|
|
134 |
protected string $codepage = '';
|
|
|
135 |
|
|
|
136 |
public function setCodepage(string $codepage): void
|
|
|
137 |
{
|
|
|
138 |
if (CodePage::validate($codepage) === false) {
|
|
|
139 |
throw new PhpSpreadsheetException('Unknown codepage: ' . $codepage);
|
|
|
140 |
}
|
|
|
141 |
|
|
|
142 |
$this->codepage = $codepage;
|
|
|
143 |
}
|
|
|
144 |
|
|
|
145 |
public function getCodepage(): string
|
|
|
146 |
{
|
|
|
147 |
return $this->codepage;
|
|
|
148 |
}
|
|
|
149 |
|
|
|
150 |
/**
|
|
|
151 |
* Can the current IReader read the file?
|
|
|
152 |
*/
|
|
|
153 |
public function canRead(string $filename): bool
|
|
|
154 |
{
|
|
|
155 |
if (File::testFileNoThrow($filename) === false) {
|
|
|
156 |
return false;
|
|
|
157 |
}
|
|
|
158 |
|
|
|
159 |
try {
|
|
|
160 |
// Use ParseXL for the hard work.
|
|
|
161 |
$ole = new OLERead();
|
|
|
162 |
|
|
|
163 |
// get excel data
|
|
|
164 |
$ole->read($filename);
|
|
|
165 |
if ($ole->wrkbook === null) {
|
|
|
166 |
throw new Exception('The filename ' . $filename . ' is not recognised as a Spreadsheet file');
|
|
|
167 |
}
|
|
|
168 |
|
|
|
169 |
return true;
|
|
|
170 |
} catch (PhpSpreadsheetException) {
|
|
|
171 |
return false;
|
|
|
172 |
}
|
|
|
173 |
}
|
|
|
174 |
|
|
|
175 |
/**
|
|
|
176 |
* Extract RGB color
|
|
|
177 |
* OpenOffice.org's Documentation of the Microsoft Excel File Format, section 2.5.4.
|
|
|
178 |
*
|
|
|
179 |
* @param string $rgb Encoded RGB value (4 bytes)
|
|
|
180 |
*/
|
|
|
181 |
protected static function readRGB(string $rgb): array
|
|
|
182 |
{
|
|
|
183 |
// offset: 0; size 1; Red component
|
|
|
184 |
$r = ord($rgb[0]);
|
|
|
185 |
|
|
|
186 |
// offset: 1; size: 1; Green component
|
|
|
187 |
$g = ord($rgb[1]);
|
|
|
188 |
|
|
|
189 |
// offset: 2; size: 1; Blue component
|
|
|
190 |
$b = ord($rgb[2]);
|
|
|
191 |
|
|
|
192 |
// HEX notation, e.g. 'FF00FC'
|
|
|
193 |
$rgb = sprintf('%02X%02X%02X', $r, $g, $b);
|
|
|
194 |
|
|
|
195 |
return ['rgb' => $rgb];
|
|
|
196 |
}
|
|
|
197 |
|
|
|
198 |
/**
|
|
|
199 |
* Extracts an Excel Unicode short string (8-bit string length)
|
|
|
200 |
* OpenOffice documentation: 2.5.3
|
|
|
201 |
* function will automatically find out where the Unicode string ends.
|
|
|
202 |
*/
|
|
|
203 |
protected static function readUnicodeStringShort(string $subData): array
|
|
|
204 |
{
|
|
|
205 |
// offset: 0: size: 1; length of the string (character count)
|
|
|
206 |
$characterCount = ord($subData[0]);
|
|
|
207 |
|
|
|
208 |
$string = self::readUnicodeString(substr($subData, 1), $characterCount);
|
|
|
209 |
|
|
|
210 |
// add 1 for the string length
|
|
|
211 |
++$string['size'];
|
|
|
212 |
|
|
|
213 |
return $string;
|
|
|
214 |
}
|
|
|
215 |
|
|
|
216 |
/**
|
|
|
217 |
* Extracts an Excel Unicode long string (16-bit string length)
|
|
|
218 |
* OpenOffice documentation: 2.5.3
|
|
|
219 |
* this function is under construction, needs to support rich text, and Asian phonetic settings.
|
|
|
220 |
*/
|
|
|
221 |
protected static function readUnicodeStringLong(string $subData): array
|
|
|
222 |
{
|
|
|
223 |
// offset: 0: size: 2; length of the string (character count)
|
|
|
224 |
$characterCount = self::getUInt2d($subData, 0);
|
|
|
225 |
|
|
|
226 |
$string = self::readUnicodeString(substr($subData, 2), $characterCount);
|
|
|
227 |
|
|
|
228 |
// add 2 for the string length
|
|
|
229 |
$string['size'] += 2;
|
|
|
230 |
|
|
|
231 |
return $string;
|
|
|
232 |
}
|
|
|
233 |
|
|
|
234 |
/**
|
|
|
235 |
* Read Unicode string with no string length field, but with known character count
|
|
|
236 |
* this function is under construction, needs to support rich text, and Asian phonetic settings
|
|
|
237 |
* OpenOffice.org's Documentation of the Microsoft Excel File Format, section 2.5.3.
|
|
|
238 |
*/
|
|
|
239 |
protected static function readUnicodeString(string $subData, int $characterCount): array
|
|
|
240 |
{
|
|
|
241 |
// offset: 0: size: 1; option flags
|
|
|
242 |
// bit: 0; mask: 0x01; character compression (0 = compressed 8-bit, 1 = uncompressed 16-bit)
|
|
|
243 |
$isCompressed = !((0x01 & ord($subData[0])) >> 0);
|
|
|
244 |
|
|
|
245 |
// bit: 2; mask: 0x04; Asian phonetic settings
|
|
|
246 |
//$hasAsian = (0x04) & ord($subData[0]) >> 2;
|
|
|
247 |
|
|
|
248 |
// bit: 3; mask: 0x08; Rich-Text settings
|
|
|
249 |
//$hasRichText = (0x08) & ord($subData[0]) >> 3;
|
|
|
250 |
|
|
|
251 |
// offset: 1: size: var; character array
|
|
|
252 |
// this offset assumes richtext and Asian phonetic settings are off which is generally wrong
|
|
|
253 |
// needs to be fixed
|
|
|
254 |
$value = self::encodeUTF16(substr($subData, 1, $isCompressed ? $characterCount : 2 * $characterCount), $isCompressed);
|
|
|
255 |
|
|
|
256 |
return [
|
|
|
257 |
'value' => $value,
|
|
|
258 |
'size' => $isCompressed ? 1 + $characterCount : 1 + 2 * $characterCount, // the size in bytes including the option flags
|
|
|
259 |
];
|
|
|
260 |
}
|
|
|
261 |
|
|
|
262 |
/**
|
|
|
263 |
* Convert UTF-8 string to string surounded by double quotes. Used for explicit string tokens in formulas.
|
|
|
264 |
* Example: hello"world --> "hello""world".
|
|
|
265 |
*
|
|
|
266 |
* @param string $value UTF-8 encoded string
|
|
|
267 |
*/
|
|
|
268 |
protected static function UTF8toExcelDoubleQuoted(string $value): string
|
|
|
269 |
{
|
|
|
270 |
return '"' . str_replace('"', '""', $value) . '"';
|
|
|
271 |
}
|
|
|
272 |
|
|
|
273 |
/**
|
|
|
274 |
* Reads first 8 bytes of a string and return IEEE 754 float.
|
|
|
275 |
*
|
|
|
276 |
* @param string $data Binary string that is at least 8 bytes long
|
|
|
277 |
*/
|
|
|
278 |
protected static function extractNumber(string $data): int|float
|
|
|
279 |
{
|
|
|
280 |
$rknumhigh = self::getInt4d($data, 4);
|
|
|
281 |
$rknumlow = self::getInt4d($data, 0);
|
|
|
282 |
$sign = ($rknumhigh & self::HIGH_ORDER_BIT) >> 31;
|
|
|
283 |
$exp = (($rknumhigh & 0x7FF00000) >> 20) - 1023;
|
|
|
284 |
$mantissa = (0x100000 | ($rknumhigh & 0x000FFFFF));
|
|
|
285 |
$mantissalow1 = ($rknumlow & self::HIGH_ORDER_BIT) >> 31;
|
|
|
286 |
$mantissalow2 = ($rknumlow & 0x7FFFFFFF);
|
|
|
287 |
$value = $mantissa / 2 ** (20 - $exp);
|
|
|
288 |
|
|
|
289 |
if ($mantissalow1 != 0) {
|
|
|
290 |
$value += 1 / 2 ** (21 - $exp);
|
|
|
291 |
}
|
|
|
292 |
|
|
|
293 |
if ($mantissalow2 != 0) {
|
|
|
294 |
$value += $mantissalow2 / 2 ** (52 - $exp);
|
|
|
295 |
}
|
|
|
296 |
if ($sign) {
|
|
|
297 |
$value *= -1;
|
|
|
298 |
}
|
|
|
299 |
|
|
|
300 |
return $value;
|
|
|
301 |
}
|
|
|
302 |
|
|
|
303 |
protected static function getIEEE754(int $rknum): float|int
|
|
|
304 |
{
|
|
|
305 |
if (($rknum & 0x02) != 0) {
|
|
|
306 |
$value = $rknum >> 2;
|
|
|
307 |
} else {
|
|
|
308 |
// changes by mmp, info on IEEE754 encoding from
|
|
|
309 |
// research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html
|
|
|
310 |
// The RK format calls for using only the most significant 30 bits
|
|
|
311 |
// of the 64 bit floating point value. The other 34 bits are assumed
|
|
|
312 |
// to be 0 so we use the upper 30 bits of $rknum as follows...
|
|
|
313 |
$sign = ($rknum & self::HIGH_ORDER_BIT) >> 31;
|
|
|
314 |
$exp = ($rknum & 0x7FF00000) >> 20;
|
|
|
315 |
$mantissa = (0x100000 | ($rknum & 0x000FFFFC));
|
|
|
316 |
$value = $mantissa / 2 ** (20 - ($exp - 1023));
|
|
|
317 |
if ($sign) {
|
|
|
318 |
$value = -1 * $value;
|
|
|
319 |
}
|
|
|
320 |
//end of changes by mmp
|
|
|
321 |
}
|
|
|
322 |
if (($rknum & 0x01) != 0) {
|
|
|
323 |
$value /= 100;
|
|
|
324 |
}
|
|
|
325 |
|
|
|
326 |
return $value;
|
|
|
327 |
}
|
|
|
328 |
|
|
|
329 |
/**
|
|
|
330 |
* Get UTF-8 string from (compressed or uncompressed) UTF-16 string.
|
|
|
331 |
*/
|
|
|
332 |
protected static function encodeUTF16(string $string, bool $compressed = false): string
|
|
|
333 |
{
|
|
|
334 |
if ($compressed) {
|
|
|
335 |
$string = self::uncompressByteString($string);
|
|
|
336 |
}
|
|
|
337 |
|
|
|
338 |
return StringHelper::convertEncoding($string, 'UTF-8', 'UTF-16LE');
|
|
|
339 |
}
|
|
|
340 |
|
|
|
341 |
/**
|
|
|
342 |
* Convert UTF-16 string in compressed notation to uncompressed form. Only used for BIFF8.
|
|
|
343 |
*/
|
|
|
344 |
protected static function uncompressByteString(string $string): string
|
|
|
345 |
{
|
|
|
346 |
$uncompressedString = '';
|
|
|
347 |
$strLen = strlen($string);
|
|
|
348 |
for ($i = 0; $i < $strLen; ++$i) {
|
|
|
349 |
$uncompressedString .= $string[$i] . "\0";
|
|
|
350 |
}
|
|
|
351 |
|
|
|
352 |
return $uncompressedString;
|
|
|
353 |
}
|
|
|
354 |
|
|
|
355 |
/**
|
|
|
356 |
* Convert string to UTF-8. Only used for BIFF5.
|
|
|
357 |
*/
|
|
|
358 |
protected function decodeCodepage(string $string): string
|
|
|
359 |
{
|
|
|
360 |
return StringHelper::convertEncoding($string, 'UTF-8', $this->codepage);
|
|
|
361 |
}
|
|
|
362 |
|
|
|
363 |
/**
|
|
|
364 |
* Read 16-bit unsigned integer.
|
|
|
365 |
*/
|
|
|
366 |
public static function getUInt2d(string $data, int $pos): int
|
|
|
367 |
{
|
|
|
368 |
return ord($data[$pos]) | (ord($data[$pos + 1]) << 8);
|
|
|
369 |
}
|
|
|
370 |
|
|
|
371 |
/**
|
|
|
372 |
* Read 16-bit signed integer.
|
|
|
373 |
*/
|
|
|
374 |
public static function getInt2d(string $data, int $pos): int
|
|
|
375 |
{
|
|
|
376 |
return unpack('s', $data[$pos] . $data[$pos + 1])[1]; // @phpstan-ignore-line
|
|
|
377 |
}
|
|
|
378 |
|
|
|
379 |
/**
|
|
|
380 |
* Read 32-bit signed integer.
|
|
|
381 |
*/
|
|
|
382 |
public static function getInt4d(string $data, int $pos): int
|
|
|
383 |
{
|
|
|
384 |
// FIX: represent numbers correctly on 64-bit system
|
|
|
385 |
// http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334
|
|
|
386 |
// Changed by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
|
|
|
387 |
$_or_24 = ord($data[$pos + 3]);
|
|
|
388 |
if ($_or_24 >= 128) {
|
|
|
389 |
// negative number
|
|
|
390 |
$_ord_24 = -abs((256 - $_or_24) << 24);
|
|
|
391 |
} else {
|
|
|
392 |
$_ord_24 = ($_or_24 & 127) << 24;
|
|
|
393 |
}
|
|
|
394 |
|
|
|
395 |
return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $_ord_24;
|
|
|
396 |
}
|
|
|
397 |
}
|