Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1441 ariadna 1
<?php
2
 
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
 
5
use DOMAttr;
6
use DOMDocument;
7
use DOMElement;
8
use DOMNode;
9
use DOMText;
10
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
11
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
12
use PhpOffice\PhpSpreadsheet\Cell\DataType;
13
use PhpOffice\PhpSpreadsheet\Comment;
14
use PhpOffice\PhpSpreadsheet\Document\Properties;
15
use PhpOffice\PhpSpreadsheet\Exception as SpreadsheetException;
16
use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
17
use PhpOffice\PhpSpreadsheet\Helper\Html as HelperHtml;
18
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
19
use PhpOffice\PhpSpreadsheet\Spreadsheet;
20
use PhpOffice\PhpSpreadsheet\Style\Border;
21
use PhpOffice\PhpSpreadsheet\Style\Color;
22
use PhpOffice\PhpSpreadsheet\Style\Fill;
23
use PhpOffice\PhpSpreadsheet\Style\Font;
24
use PhpOffice\PhpSpreadsheet\Style\Style;
25
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
26
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
27
use Throwable;
28
 
29
class Html extends BaseReader
30
{
31
    /**
32
     * Sample size to read to determine if it's HTML or not.
33
     */
34
    const TEST_SAMPLE_SIZE = 2048;
35
 
36
    private const STARTS_WITH_BOM = '/^(?:\xfe\xff|\xff\xfe|\xEF\xBB\xBF)/';
37
 
38
    private const DECLARES_CHARSET = '/\bcharset=/i';
39
 
40
    /**
41
     * Input encoding.
42
     */
43
    protected string $inputEncoding = 'ANSI';
44
 
45
    /**
46
     * Sheet index to read.
47
     */
48
    protected int $sheetIndex = 0;
49
 
50
    /**
51
     * Formats.
52
     */
53
    protected array $formats = [
54
        'h1' => [
55
            'font' => [
56
                'bold' => true,
57
                'size' => 24,
58
            ],
59
        ], //    Bold, 24pt
60
        'h2' => [
61
            'font' => [
62
                'bold' => true,
63
                'size' => 18,
64
            ],
65
        ], //    Bold, 18pt
66
        'h3' => [
67
            'font' => [
68
                'bold' => true,
69
                'size' => 13.5,
70
            ],
71
        ], //    Bold, 13.5pt
72
        'h4' => [
73
            'font' => [
74
                'bold' => true,
75
                'size' => 12,
76
            ],
77
        ], //    Bold, 12pt
78
        'h5' => [
79
            'font' => [
80
                'bold' => true,
81
                'size' => 10,
82
            ],
83
        ], //    Bold, 10pt
84
        'h6' => [
85
            'font' => [
86
                'bold' => true,
87
                'size' => 7.5,
88
            ],
89
        ], //    Bold, 7.5pt
90
        'a' => [
91
            'font' => [
92
                'underline' => true,
93
                'color' => [
94
                    'argb' => Color::COLOR_BLUE,
95
                ],
96
            ],
97
        ], //    Blue underlined
98
        'hr' => [
99
            'borders' => [
100
                'bottom' => [
101
                    'borderStyle' => Border::BORDER_THIN,
102
                    'color' => [
103
                        Color::COLOR_BLACK,
104
                    ],
105
                ],
106
            ],
107
        ], //    Bottom border
108
        'strong' => [
109
            'font' => [
110
                'bold' => true,
111
            ],
112
        ], //    Bold
113
        'b' => [
114
            'font' => [
115
                'bold' => true,
116
            ],
117
        ], //    Bold
118
        'i' => [
119
            'font' => [
120
                'italic' => true,
121
            ],
122
        ], //    Italic
123
        'em' => [
124
            'font' => [
125
                'italic' => true,
126
            ],
127
        ], //    Italic
128
    ];
129
 
130
    protected array $rowspan = [];
131
 
132
    /**
133
     * Create a new HTML Reader instance.
134
     */
135
    public function __construct()
136
    {
137
        parent::__construct();
138
        $this->securityScanner = XmlScanner::getInstance($this);
139
    }
140
 
141
    /**
142
     * Validate that the current file is an HTML file.
143
     */
144
    public function canRead(string $filename): bool
145
    {
146
        // Check if file exists
147
        try {
148
            $this->openFile($filename);
149
        } catch (Exception) {
150
            return false;
151
        }
152
 
153
        $beginning = preg_replace(self::STARTS_WITH_BOM, '', $this->readBeginning()) ?? '';
154
 
155
        $startWithTag = self::startsWithTag($beginning);
156
        $containsTags = self::containsTags($beginning);
157
        $endsWithTag = self::endsWithTag($this->readEnding());
158
 
159
        fclose($this->fileHandle);
160
 
161
        return $startWithTag && $containsTags && $endsWithTag;
162
    }
163
 
164
    private function readBeginning(): string
165
    {
166
        fseek($this->fileHandle, 0);
167
 
168
        return (string) fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
169
    }
170
 
171
    private function readEnding(): string
172
    {
173
        $meta = stream_get_meta_data($this->fileHandle);
174
        // Phpstan incorrectly flags following line for Php8.2-, corrected in 8.3
175
        $filename = $meta['uri']; //@phpstan-ignore-line
176
 
177
        clearstatcache(true, $filename);
178
        $size = (int) filesize($filename);
179
        if ($size === 0) {
180
            return '';
181
        }
182
 
183
        $blockSize = self::TEST_SAMPLE_SIZE;
184
        if ($size < $blockSize) {
185
            $blockSize = $size;
186
        }
187
 
188
        fseek($this->fileHandle, $size - $blockSize);
189
 
190
        return (string) fread($this->fileHandle, $blockSize);
191
    }
192
 
193
    private static function startsWithTag(string $data): bool
194
    {
195
        return str_starts_with(trim($data), '<');
196
    }
197
 
198
    private static function endsWithTag(string $data): bool
199
    {
200
        return str_ends_with(trim($data), '>');
201
    }
202
 
203
    private static function containsTags(string $data): bool
204
    {
205
        return strlen($data) !== strlen(strip_tags($data));
206
    }
207
 
208
    /**
209
     * Loads Spreadsheet from file.
210
     */
211
    public function loadSpreadsheetFromFile(string $filename): Spreadsheet
212
    {
213
        // Create new Spreadsheet
214
        $spreadsheet = new Spreadsheet();
215
        $spreadsheet->setValueBinder($this->valueBinder);
216
 
217
        // Load into this instance
218
        return $this->loadIntoExisting($filename, $spreadsheet);
219
    }
220
 
221
    //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
222
 
223
    protected array $dataArray = [];
224
 
225
    protected int $tableLevel = 0;
226
 
227
    protected array $nestedColumn = ['A'];
228
 
229
    protected function setTableStartColumn(string $column): string
230
    {
231
        if ($this->tableLevel == 0) {
232
            $column = 'A';
233
        }
234
        ++$this->tableLevel;
235
        $this->nestedColumn[$this->tableLevel] = $column;
236
 
237
        return $this->nestedColumn[$this->tableLevel];
238
    }
239
 
240
    protected function getTableStartColumn(): string
241
    {
242
        return $this->nestedColumn[$this->tableLevel];
243
    }
244
 
245
    protected function releaseTableStartColumn(): string
246
    {
247
        --$this->tableLevel;
248
 
249
        return array_pop($this->nestedColumn);
250
    }
251
 
252
    /**
253
     * Flush cell.
254
     */
255
    protected function flushCell(Worksheet $sheet, string $column, int|string $row, mixed &$cellContent, array $attributeArray): void
256
    {
257
        if (is_string($cellContent)) {
258
            //    Simple String content
259
            if (trim($cellContent) > '') {
260
                //    Only actually write it if there's content in the string
261
                //    Write to worksheet to be done here...
262
                //    ... we return the cell, so we can mess about with styles more easily
263
 
264
                // Set cell value explicitly if there is data-type attribute
265
                if (isset($attributeArray['data-type'])) {
266
                    $datatype = $attributeArray['data-type'];
267
                    if (in_array($datatype, [DataType::TYPE_STRING, DataType::TYPE_STRING2, DataType::TYPE_INLINE])) {
268
                        //Prevent to Excel treat string with beginning equal sign or convert big numbers to scientific number
269
                        if (str_starts_with($cellContent, '=')) {
270
                            $sheet->getCell($column . $row)
271
                                ->getStyle()
272
                                ->setQuotePrefix(true);
273
                        }
274
                    }
275
                    if ($datatype === DataType::TYPE_BOOL) {
276
                        $cellContent = self::convertBoolean($cellContent);
277
                        if (!is_bool($cellContent)) {
278
                            $attributeArray['data-type'] = DataType::TYPE_STRING;
279
                        }
280
                    }
281
 
282
                    //catching the Exception and ignoring the invalid data types
283
                    try {
284
                        $sheet->setCellValueExplicit($column . $row, $cellContent, $attributeArray['data-type']);
285
                    } catch (SpreadsheetException) {
286
                        $sheet->setCellValue($column . $row, $cellContent);
287
                    }
288
                } else {
289
                    $sheet->setCellValue($column . $row, $cellContent);
290
                }
291
                $this->dataArray[$row][$column] = $cellContent;
292
            }
293
        } else {
294
            //    We have a Rich Text run
295
            //    TODO
296
            $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
297
        }
298
        $cellContent = (string) '';
299
    }
300
 
301
    /** @var array<int, array<int, string>> */
302
    private static array $falseTrueArray = [];
303
 
304
    private static function convertBoolean(?string $cellContent): bool|string
305
    {
306
        if ($cellContent === '1') {
307
            return true;
308
        }
309
        if ($cellContent === '0' || $cellContent === '' || $cellContent === null) {
310
            return false;
311
        }
312
        if (empty(self::$falseTrueArray)) {
313
            $calc = Calculation::getInstance();
314
            self::$falseTrueArray = $calc->getFalseTrueArray();
315
        }
316
        if (in_array(mb_strtoupper($cellContent), self::$falseTrueArray[1], true)) {
317
            return true;
318
        }
319
        if (in_array(mb_strtoupper($cellContent), self::$falseTrueArray[0], true)) {
320
            return false;
321
        }
322
 
323
        return $cellContent;
324
    }
325
 
326
    private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
327
    {
328
        $attributeArray = [];
329
        /** @var DOMAttr $attribute */
330
        foreach ($child->attributes as $attribute) {
331
            $attributeArray[$attribute->name] = $attribute->value;
332
        }
333
 
334
        if ($child->nodeName === 'body') {
335
            $row = 1;
336
            $column = 'A';
337
            $cellContent = '';
338
            $this->tableLevel = 0;
339
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
340
        } else {
341
            $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
342
        }
343
    }
344
 
345
    private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
346
    {
347
        if ($child->nodeName === 'title') {
348
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
349
 
350
            try {
351
                $sheet->setTitle($cellContent, true, true);
352
                $sheet->getParent()?->getProperties()?->setTitle($cellContent);
353
            } catch (SpreadsheetException) {
354
                // leave default title if too long or illegal chars
355
            }
356
            $cellContent = '';
357
        } else {
358
            $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
359
        }
360
    }
361
 
362
    private const SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
363
 
364
    private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
365
    {
366
        if (in_array((string) $child->nodeName, self::SPAN_ETC, true)) {
367
            if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
368
                $sheet->getComment($column . $row)
369
                    ->getText()
370
                    ->createTextRun($child->textContent);
371
                if (isset($attributeArray['dir']) && $attributeArray['dir'] === 'rtl') {
372
                    $sheet->getComment($column . $row)->setTextboxDirection(Comment::TEXTBOX_DIRECTION_RTL);
373
                }
374
                if (isset($attributeArray['style'])) {
375
                    $alignStyle = $attributeArray['style'];
376
                    if (preg_match('/\btext-align:\s*(left|right|center|justify)\b/', $alignStyle, $matches) === 1) {
377
                        $sheet->getComment($column . $row)->setAlignment($matches[1]);
378
                    }
379
                }
380
            } else {
381
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
382
            }
383
 
384
            if (isset($this->formats[$child->nodeName])) {
385
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
386
            }
387
        } else {
388
            $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
389
        }
390
    }
391
 
392
    private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
393
    {
394
        if ($child->nodeName === 'hr') {
395
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
396
            ++$row;
397
            if (isset($this->formats[$child->nodeName])) {
398
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
399
            }
400
            ++$row;
401
        }
402
        // fall through to br
403
        $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
404
    }
405
 
406
    private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
407
    {
408
        if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
409
            if ($this->tableLevel > 0) {
410
                //    If we're inside a table, replace with a newline and set the cell to wrap
411
                $cellContent .= "\n";
412
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
413
            } else {
414
                //    Otherwise flush our existing content and move the row cursor on
415
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
416
                ++$row;
417
            }
418
        } else {
419
            $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
420
        }
421
    }
422
 
423
    private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
424
    {
425
        if ($child->nodeName === 'a') {
426
            foreach ($attributeArray as $attributeName => $attributeValue) {
427
                switch ($attributeName) {
428
                    case 'href':
429
                        $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
430
                        if (isset($this->formats[$child->nodeName])) {
431
                            $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
432
                        }
433
 
434
                        break;
435
                    case 'class':
436
                        if ($attributeValue === 'comment-indicator') {
437
                            break; // Ignore - it's just a red square.
438
                        }
439
                }
440
            }
441
            // no idea why this should be needed
442
            //$cellContent .= ' ';
443
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
444
        } else {
445
            $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
446
        }
447
    }
448
 
449
    private const H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
450
 
451
    private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
452
    {
453
        if (in_array((string) $child->nodeName, self::H1_ETC, true)) {
454
            if ($this->tableLevel > 0) {
455
                //    If we're inside a table, replace with a newline
456
                $cellContent .= $cellContent ? "\n" : '';
457
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
458
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
459
            } else {
460
                if ($cellContent > '') {
461
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
462
                    ++$row;
463
                }
464
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
465
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
466
 
467
                if (isset($this->formats[$child->nodeName])) {
468
                    $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
469
                }
470
 
471
                ++$row;
472
                $column = 'A';
473
            }
474
        } else {
475
            $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
476
        }
477
    }
478
 
479
    private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
480
    {
481
        if ($child->nodeName === 'li') {
482
            if ($this->tableLevel > 0) {
483
                //    If we're inside a table, replace with a newline
484
                $cellContent .= $cellContent ? "\n" : '';
485
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
486
            } else {
487
                if ($cellContent > '') {
488
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
489
                }
490
                ++$row;
491
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
492
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
493
                $column = 'A';
494
            }
495
        } else {
496
            $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
497
        }
498
    }
499
 
500
    private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
501
    {
502
        if ($child->nodeName === 'img') {
503
            $this->insertImage($sheet, $column, $row, $attributeArray);
504
        } else {
505
            $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
506
        }
507
    }
508
 
509
    private string $currentColumn = 'A';
510
 
511
    private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
512
    {
513
        if ($child->nodeName === 'table') {
514
            if (isset($attributeArray['class'])) {
515
                $classes = explode(' ', $attributeArray['class']);
516
                $sheet->setShowGridlines(in_array('gridlines', $classes, true));
517
                $sheet->setPrintGridlines(in_array('gridlinesp', $classes, true));
518
            }
519
            $this->currentColumn = 'A';
520
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
521
            $column = $this->setTableStartColumn($column);
522
            if ($this->tableLevel > 1 && $row > 1) {
523
                --$row;
524
            }
525
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
526
            $column = $this->releaseTableStartColumn();
527
            if ($this->tableLevel > 1) {
528
                ++$column; //* @phpstan-ignore-line
529
            } else {
530
                ++$row;
531
            }
532
        } else {
533
            $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
534
        }
535
    }
536
 
537
    private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
538
    {
539
        if ($child->nodeName === 'col') {
540
            $this->applyInlineStyle($sheet, -1, $this->currentColumn, $attributeArray);
541
            ++$this->currentColumn;
542
        } elseif ($child->nodeName === 'tr') {
543
            $column = $this->getTableStartColumn();
544
            $cellContent = '';
545
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
546
 
547
            if (isset($attributeArray['height'])) {
548
                $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
549
            }
550
 
551
            ++$row;
552
        } else {
553
            $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
554
        }
555
    }
556
 
557
    private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
558
    {
559
        if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
560
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
561
        } else {
562
            $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
563
        }
564
    }
565
 
566
    private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
567
    {
568
        if (isset($attributeArray['bgcolor'])) {
569
            $sheet->getStyle("$column$row")->applyFromArray(
570
                [
571
                    'fill' => [
572
                        'fillType' => Fill::FILL_SOLID,
573
                        'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
574
                    ],
575
                ]
576
            );
577
        }
578
    }
579
 
580
    private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
581
    {
582
        if (isset($attributeArray['width'])) {
583
            $sheet->getColumnDimension($column)->setWidth((new CssDimension($attributeArray['width']))->width());
584
        }
585
    }
586
 
587
    private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
588
    {
589
        if (isset($attributeArray['height'])) {
590
            $sheet->getRowDimension($row)->setRowHeight((new CssDimension($attributeArray['height']))->height());
591
        }
592
    }
593
 
594
    private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
595
    {
596
        if (isset($attributeArray['align'])) {
597
            $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
598
        }
599
    }
600
 
601
    private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
602
    {
603
        if (isset($attributeArray['valign'])) {
604
            $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
605
        }
606
    }
607
 
608
    private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
609
    {
610
        if (isset($attributeArray['data-format'])) {
611
            $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
612
        }
613
    }
614
 
615
    private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
616
    {
617
        while (isset($this->rowspan[$column . $row])) {
618
            ++$column; //* @phpstan-ignore-line
619
        }
620
        //* @phpstan-ignore-next-line
621
        $this->processDomElement($child, $sheet, $row, $column, $cellContent); // ++$column above confuses Phpstan
622
 
623
        // apply inline style
624
        $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
625
 
626
        $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
627
 
628
        $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
629
        $this->processDomElementWidth($sheet, $column, $attributeArray);
630
        $this->processDomElementHeight($sheet, $row, $attributeArray);
631
        $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
632
        $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
633
        $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
634
 
635
        if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
636
            //create merging rowspan and colspan
637
            $columnTo = $column;
638
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
639
                ++$columnTo;
640
            }
641
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
642
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
643
                $this->rowspan[$value] = true;
644
            }
645
            $sheet->mergeCells($range);
646
            //* @phpstan-ignore-next-line
647
            $column = $columnTo; // ++$columnTo above confuses phpstan
648
        } elseif (isset($attributeArray['rowspan'])) {
649
            //create merging rowspan
650
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
651
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
652
                $this->rowspan[$value] = true;
653
            }
654
            $sheet->mergeCells($range);
655
        } elseif (isset($attributeArray['colspan'])) {
656
            //create merging colspan
657
            $columnTo = $column;
658
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
659
                ++$columnTo;
660
            }
661
            $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
662
            //* @phpstan-ignore-next-line
663
            $column = $columnTo; // ++$columnTo above confuses phpstan
664
        }
665
 
666
        ++$column;
667
    }
668
 
669
    protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
670
    {
671
        foreach ($element->childNodes as $child) {
672
            if ($child instanceof DOMText) {
673
                $domText = (string) preg_replace('/\s+/', ' ', trim($child->nodeValue ?? ''));
674
                if ($domText === "\u{a0}") {
675
                    $domText = '';
676
                }
677
                //    simply append the text if the cell content is a plain text string
678
                $cellContent .= $domText;
679
                //    but if we have a rich text run instead, we need to append it correctly
680
                //    TODO
681
            } elseif ($child instanceof DOMElement) {
682
                $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
683
            }
684
        }
685
    }
686
 
687
    /**
688
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
689
     */
690
    public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
691
    {
692
        // Validate
693
        if (!$this->canRead($filename)) {
694
            throw new Exception($filename . ' is an Invalid HTML file.');
695
        }
696
 
697
        // Create a new DOM object
698
        $dom = new DOMDocument();
699
 
700
        // Reload the HTML file into the DOM object
701
        try {
702
            $convert = $this->getSecurityScannerOrThrow()->scanFile($filename);
703
            $convert = self::replaceNonAsciiIfNeeded($convert);
704
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
705
        } catch (Throwable $e) {
706
            $loaded = false;
707
        }
708
        if ($loaded === false) {
709
            throw new Exception('Failed to load ' . $filename . ' as a DOM Document', 0, $e ?? null);
710
        }
711
        self::loadProperties($dom, $spreadsheet);
712
 
713
        return $this->loadDocument($dom, $spreadsheet);
714
    }
715
 
716
    private static function loadProperties(DOMDocument $dom, Spreadsheet $spreadsheet): void
717
    {
718
        $properties = $spreadsheet->getProperties();
719
        foreach ($dom->getElementsByTagName('meta') as $meta) {
720
            $metaContent = (string) $meta->getAttribute('content');
721
            if ($metaContent !== '') {
722
                $metaName = (string) $meta->getAttribute('name');
723
                switch ($metaName) {
724
                    case 'author':
725
                        $properties->setCreator($metaContent);
726
 
727
                        break;
728
                    case 'category':
729
                        $properties->setCategory($metaContent);
730
 
731
                        break;
732
                    case 'company':
733
                        $properties->setCompany($metaContent);
734
 
735
                        break;
736
                    case 'created':
737
                        $properties->setCreated($metaContent);
738
 
739
                        break;
740
                    case 'description':
741
                        $properties->setDescription($metaContent);
742
 
743
                        break;
744
                    case 'keywords':
745
                        $properties->setKeywords($metaContent);
746
 
747
                        break;
748
                    case 'lastModifiedBy':
749
                        $properties->setLastModifiedBy($metaContent);
750
 
751
                        break;
752
                    case 'manager':
753
                        $properties->setManager($metaContent);
754
 
755
                        break;
756
                    case 'modified':
757
                        $properties->setModified($metaContent);
758
 
759
                        break;
760
                    case 'subject':
761
                        $properties->setSubject($metaContent);
762
 
763
                        break;
764
                    case 'title':
765
                        $properties->setTitle($metaContent);
766
 
767
                        break;
768
                    case 'viewport':
769
                        $properties->setViewport($metaContent);
770
 
771
                        break;
772
                    default:
773
                        if (preg_match('/^custom[.](bool|date|float|int|string)[.](.+)$/', $metaName, $matches) === 1) {
774
                            match ($matches[1]) {
775
                                'bool' => $properties->setCustomProperty($matches[2], (bool) $metaContent, Properties::PROPERTY_TYPE_BOOLEAN),
776
                                'float' => $properties->setCustomProperty($matches[2], (float) $metaContent, Properties::PROPERTY_TYPE_FLOAT),
777
                                'int' => $properties->setCustomProperty($matches[2], (int) $metaContent, Properties::PROPERTY_TYPE_INTEGER),
778
                                'date' => $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_DATE),
779
                                // string
780
                                default => $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_STRING),
781
                            };
782
                        }
783
                }
784
            }
785
        }
786
        if (!empty($dom->baseURI)) {
787
            $properties->setHyperlinkBase($dom->baseURI);
788
        }
789
    }
790
 
791
    private static function replaceNonAscii(array $matches): string
792
    {
793
        return '&#' . mb_ord($matches[0], 'UTF-8') . ';';
794
    }
795
 
796
    private static function replaceNonAsciiIfNeeded(string $convert): ?string
797
    {
798
        if (preg_match(self::STARTS_WITH_BOM, $convert) !== 1 && preg_match(self::DECLARES_CHARSET, $convert) !== 1) {
799
            $lowend = "\u{80}";
800
            $highend = "\u{10ffff}";
801
            $regexp = "/[$lowend-$highend]/u";
802
            /** @var callable $callback */
803
            $callback = [self::class, 'replaceNonAscii'];
804
            $convert = preg_replace_callback($regexp, $callback, $convert);
805
        }
806
 
807
        return $convert;
808
    }
809
 
810
    /**
811
     * Spreadsheet from content.
812
     */
813
    public function loadFromString(string $content, ?Spreadsheet $spreadsheet = null): Spreadsheet
814
    {
815
        //    Create a new DOM object
816
        $dom = new DOMDocument();
817
 
818
        //    Reload the HTML file into the DOM object
819
        try {
820
            $convert = $this->getSecurityScannerOrThrow()->scan($content);
821
            $convert = self::replaceNonAsciiIfNeeded($convert);
822
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
823
        } catch (Throwable $e) {
824
            $loaded = false;
825
        }
826
        if ($loaded === false) {
827
            throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null);
828
        }
829
        $spreadsheet = $spreadsheet ?? new Spreadsheet();
830
        $spreadsheet->setValueBinder($this->valueBinder);
831
        self::loadProperties($dom, $spreadsheet);
832
 
833
        return $this->loadDocument($dom, $spreadsheet);
834
    }
835
 
836
    /**
837
     * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
838
     */
839
    private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
840
    {
841
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
842
            $spreadsheet->createSheet();
843
        }
844
        $spreadsheet->setActiveSheetIndex($this->sheetIndex);
845
 
846
        // Discard white space
847
        $document->preserveWhiteSpace = false;
848
 
849
        $row = 0;
850
        $column = 'A';
851
        $content = '';
852
        $this->rowspan = [];
853
        $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
854
 
855
        // Return
856
        return $spreadsheet;
857
    }
858
 
859
    /**
860
     * Get sheet index.
861
     */
862
    public function getSheetIndex(): int
863
    {
864
        return $this->sheetIndex;
865
    }
866
 
867
    /**
868
     * Set sheet index.
869
     *
870
     * @param int $sheetIndex Sheet index
871
     *
872
     * @return $this
873
     */
874
    public function setSheetIndex(int $sheetIndex): static
875
    {
876
        $this->sheetIndex = $sheetIndex;
877
 
878
        return $this;
879
    }
880
 
881
    /**
882
     * Apply inline css inline style.
883
     *
884
     * NOTES :
885
     * Currently only intended for td & th element,
886
     * and only takes 'background-color' and 'color'; property with HEX color
887
     *
888
     * TODO :
889
     * - Implement to other propertie, such as border
890
     */
891
    private function applyInlineStyle(Worksheet &$sheet, int $row, string $column, array $attributeArray): void
892
    {
893
        if (!isset($attributeArray['style'])) {
894
            return;
895
        }
896
 
897
        if ($row <= 0 || $column === '') {
898
            $cellStyle = new Style();
899
        } elseif (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
900
            $columnTo = $column;
901
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
902
                ++$columnTo;
903
            }
904
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
905
            $cellStyle = $sheet->getStyle($range);
906
        } elseif (isset($attributeArray['rowspan'])) {
907
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
908
            $cellStyle = $sheet->getStyle($range);
909
        } elseif (isset($attributeArray['colspan'])) {
910
            $columnTo = $column;
911
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
912
                ++$columnTo;
913
            }
914
            $range = $column . $row . ':' . $columnTo . $row;
915
            $cellStyle = $sheet->getStyle($range);
916
        } else {
917
            $cellStyle = $sheet->getStyle($column . $row);
918
        }
919
 
920
        // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
921
        $styles = explode(';', $attributeArray['style']);
922
        foreach ($styles as $st) {
923
            $value = explode(':', $st);
924
            $styleName = trim($value[0]);
925
            $styleValue = isset($value[1]) ? trim($value[1]) : null;
926
            $styleValueString = (string) $styleValue;
927
 
928
            if (!$styleName) {
929
                continue;
930
            }
931
 
932
            switch ($styleName) {
933
                case 'background':
934
                case 'background-color':
935
                    $styleColor = $this->getStyleColor($styleValueString);
936
 
937
                    if (!$styleColor) {
938
                        continue 2;
939
                    }
940
 
941
                    $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
942
 
943
                    break;
944
                case 'color':
945
                    $styleColor = $this->getStyleColor($styleValueString);
946
 
947
                    if (!$styleColor) {
948
                        continue 2;
949
                    }
950
 
951
                    $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
952
 
953
                    break;
954
 
955
                case 'border':
956
                    $this->setBorderStyle($cellStyle, $styleValueString, 'allBorders');
957
 
958
                    break;
959
 
960
                case 'border-top':
961
                    $this->setBorderStyle($cellStyle, $styleValueString, 'top');
962
 
963
                    break;
964
 
965
                case 'border-bottom':
966
                    $this->setBorderStyle($cellStyle, $styleValueString, 'bottom');
967
 
968
                    break;
969
 
970
                case 'border-left':
971
                    $this->setBorderStyle($cellStyle, $styleValueString, 'left');
972
 
973
                    break;
974
 
975
                case 'border-right':
976
                    $this->setBorderStyle($cellStyle, $styleValueString, 'right');
977
 
978
                    break;
979
 
980
                case 'font-size':
981
                    $cellStyle->getFont()->setSize(
982
                        (float) $styleValue
983
                    );
984
 
985
                    break;
986
 
987
                case 'font-weight':
988
                    if ($styleValue === 'bold' || $styleValue >= 500) {
989
                        $cellStyle->getFont()->setBold(true);
990
                    }
991
 
992
                    break;
993
 
994
                case 'font-style':
995
                    if ($styleValue === 'italic') {
996
                        $cellStyle->getFont()->setItalic(true);
997
                    }
998
 
999
                    break;
1000
 
1001
                case 'font-family':
1002
                    $cellStyle->getFont()->setName(str_replace('\'', '', $styleValueString));
1003
 
1004
                    break;
1005
 
1006
                case 'text-decoration':
1007
                    switch ($styleValue) {
1008
                        case 'underline':
1009
                            $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
1010
 
1011
                            break;
1012
                        case 'line-through':
1013
                            $cellStyle->getFont()->setStrikethrough(true);
1014
 
1015
                            break;
1016
                    }
1017
 
1018
                    break;
1019
 
1020
                case 'text-align':
1021
                    $cellStyle->getAlignment()->setHorizontal($styleValueString);
1022
 
1023
                    break;
1024
 
1025
                case 'vertical-align':
1026
                    $cellStyle->getAlignment()->setVertical($styleValueString);
1027
 
1028
                    break;
1029
 
1030
                case 'width':
1031
                    if ($column !== '') {
1032
                        $sheet->getColumnDimension($column)->setWidth(
1033
                            (new CssDimension($styleValue ?? ''))->width()
1034
                        );
1035
                    }
1036
 
1037
                    break;
1038
 
1039
                case 'height':
1040
                    if ($row > 0) {
1041
                        $sheet->getRowDimension($row)->setRowHeight(
1042
                            (new CssDimension($styleValue ?? ''))->height()
1043
                        );
1044
                    }
1045
 
1046
                    break;
1047
 
1048
                case 'word-wrap':
1049
                    $cellStyle->getAlignment()->setWrapText(
1050
                        $styleValue === 'break-word'
1051
                    );
1052
 
1053
                    break;
1054
 
1055
                case 'text-indent':
1056
                    $cellStyle->getAlignment()->setIndent(
1057
                        (int) str_replace(['px'], '', $styleValueString)
1058
                    );
1059
 
1060
                    break;
1061
            }
1062
        }
1063
    }
1064
 
1065
    /**
1066
     * Check if has #, so we can get clean hex.
1067
     */
1068
    public function getStyleColor(?string $value): string
1069
    {
1070
        $value = (string) $value;
1071
        if (str_starts_with($value, '#')) {
1072
            return substr($value, 1);
1073
        }
1074
 
1075
        return HelperHtml::colourNameLookup($value);
1076
    }
1077
 
1078
    private function insertImage(Worksheet $sheet, string $column, int $row, array $attributes): void
1079
    {
1080
        if (!isset($attributes['src'])) {
1081
            return;
1082
        }
1083
        $styleArray = self::getStyleArray($attributes);
1084
 
1085
        $src = $attributes['src'];
1086
        if (substr($src, 0, 5) !== 'data:') {
1087
            $src = urldecode($src);
1088
        }
1089
        $width = isset($attributes['width']) ? (float) $attributes['width'] : ($styleArray['width'] ?? null);
1090
        $height = isset($attributes['height']) ? (float) $attributes['height'] : ($styleArray['height'] ?? null);
1091
        $name = $attributes['alt'] ?? null;
1092
 
1093
        $drawing = new Drawing();
1094
        $drawing->setPath($src, false);
1095
        if ($drawing->getPath() === '') {
1096
            return;
1097
        }
1098
        $drawing->setWorksheet($sheet);
1099
        $drawing->setCoordinates($column . $row);
1100
        $drawing->setOffsetX(0);
1101
        $drawing->setOffsetY(10);
1102
        $drawing->setResizeProportional(true);
1103
 
1104
        if ($name) {
1105
            $drawing->setName($name);
1106
        }
1107
 
1108
        if ($width) {
1109
            if ($height) {
1110
                $drawing->setWidthAndHeight((int) $width, (int) $height);
1111
            } else {
1112
                $drawing->setWidth((int) $width);
1113
            }
1114
        } elseif ($height) {
1115
            $drawing->setHeight((int) $height);
1116
        }
1117
 
1118
        $sheet->getColumnDimension($column)->setWidth(
1119
            $drawing->getWidth() / 6
1120
        );
1121
 
1122
        $sheet->getRowDimension($row)->setRowHeight(
1123
            $drawing->getHeight() * 0.9
1124
        );
1125
 
1126
        if (isset($styleArray['opacity'])) {
1127
            $opacity = $styleArray['opacity'];
1128
            if (is_numeric($opacity)) {
1129
                $drawing->setOpacity((int) ($opacity * 100000));
1130
            }
1131
        }
1132
    }
1133
 
1134
    private static function getStyleArray(array $attributes): array
1135
    {
1136
        $styleArray = [];
1137
        if (isset($attributes['style'])) {
1138
            $styles = explode(';', $attributes['style']);
1139
            foreach ($styles as $style) {
1140
                $value = explode(':', $style);
1141
                if (count($value) === 2) {
1142
                    $arrayKey = trim($value[0]);
1143
                    $arrayValue = trim($value[1]);
1144
                    if ($arrayKey === 'width') {
1145
                        if (substr($arrayValue, -2) === 'px') {
1146
                            $arrayValue = (string) (((float) substr($arrayValue, 0, -2)));
1147
                        } else {
1148
                            $arrayValue = (new CssDimension($arrayValue))->width();
1149
                        }
1150
                    } elseif ($arrayKey === 'height') {
1151
                        if (substr($arrayValue, -2) === 'px') {
1152
                            $arrayValue = substr($arrayValue, 0, -2);
1153
                        } else {
1154
                            $arrayValue = (new CssDimension($arrayValue))->height();
1155
                        }
1156
                    }
1157
                    $styleArray[$arrayKey] = $arrayValue;
1158
                }
1159
            }
1160
        }
1161
 
1162
        return $styleArray;
1163
    }
1164
 
1165
    private const BORDER_MAPPINGS = [
1166
        'dash-dot' => Border::BORDER_DASHDOT,
1167
        'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
1168
        'dashed' => Border::BORDER_DASHED,
1169
        'dotted' => Border::BORDER_DOTTED,
1170
        'double' => Border::BORDER_DOUBLE,
1171
        'hair' => Border::BORDER_HAIR,
1172
        'medium' => Border::BORDER_MEDIUM,
1173
        'medium-dashed' => Border::BORDER_MEDIUMDASHED,
1174
        'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
1175
        'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
1176
        'none' => Border::BORDER_NONE,
1177
        'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
1178
        'solid' => Border::BORDER_THIN,
1179
        'thick' => Border::BORDER_THICK,
1180
    ];
1181
 
1182
    public static function getBorderMappings(): array
1183
    {
1184
        return self::BORDER_MAPPINGS;
1185
    }
1186
 
1187
    /**
1188
     * Map html border style to PhpSpreadsheet border style.
1189
     */
1190
    public function getBorderStyle(string $style): ?string
1191
    {
1192
        return self::BORDER_MAPPINGS[$style] ?? null;
1193
    }
1194
 
1195
    private function setBorderStyle(Style $cellStyle, string $styleValue, string $type): void
1196
    {
1197
        if (trim($styleValue) === Border::BORDER_NONE) {
1198
            $borderStyle = Border::BORDER_NONE;
1199
            $color = null;
1200
        } else {
1201
            $borderArray = explode(' ', $styleValue);
1202
            $borderCount = count($borderArray);
1203
            if ($borderCount >= 3) {
1204
                $borderStyle = $borderArray[1];
1205
                $color = $borderArray[2];
1206
            } else {
1207
                $borderStyle = $borderArray[0];
1208
                $color = $borderArray[1] ?? null;
1209
            }
1210
        }
1211
 
1212
        $cellStyle->applyFromArray([
1213
            'borders' => [
1214
                $type => [
1215
                    'borderStyle' => $this->getBorderStyle($borderStyle),
1216
                    'color' => ['rgb' => $this->getStyleColor($color)],
1217
                ],
1218
            ],
1219
        ]);
1220
    }
1221
 
1222
    /**
1223
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
1224
     */
1225
    public function listWorksheetInfo(string $filename): array
1226
    {
1227
        $info = [];
1228
        $spreadsheet = new Spreadsheet();
1229
        $this->loadIntoExisting($filename, $spreadsheet);
1230
        foreach ($spreadsheet->getAllSheets() as $sheet) {
1231
            $newEntry = ['worksheetName' => $sheet->getTitle()];
1232
            $newEntry['lastColumnLetter'] = $sheet->getHighestDataColumn();
1233
            $newEntry['lastColumnIndex'] = Coordinate::columnIndexFromString($sheet->getHighestDataColumn()) - 1;
1234
            $newEntry['totalRows'] = $sheet->getHighestDataRow();
1235
            $newEntry['totalColumns'] = $newEntry['lastColumnIndex'] + 1;
1236
            $newEntry['sheetState'] = Worksheet::SHEETSTATE_VISIBLE;
1237
            $info[] = $newEntry;
1238
        }
1239
        $spreadsheet->disconnectWorksheets();
1240
 
1241
        return $info;
1242
    }
1243
}