Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
 
5
use DOMDocument;
6
use DOMElement;
7
use DOMNode;
8
use DOMText;
9
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
10
use PhpOffice\PhpSpreadsheet\Cell\DataType;
11
use PhpOffice\PhpSpreadsheet\Document\Properties;
12
use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
13
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
14
use PhpOffice\PhpSpreadsheet\Spreadsheet;
15
use PhpOffice\PhpSpreadsheet\Style\Border;
16
use PhpOffice\PhpSpreadsheet\Style\Color;
17
use PhpOffice\PhpSpreadsheet\Style\Fill;
18
use PhpOffice\PhpSpreadsheet\Style\Font;
19
use PhpOffice\PhpSpreadsheet\Style\Style;
20
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
21
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
22
use Throwable;
23
 
24
class Html extends BaseReader
25
{
26
    /**
27
     * Sample size to read to determine if it's HTML or not.
28
     */
29
    const TEST_SAMPLE_SIZE = 2048;
30
 
31
    /**
32
     * Input encoding.
33
     *
34
     * @var string
35
     */
36
    protected $inputEncoding = 'ANSI';
37
 
38
    /**
39
     * Sheet index to read.
40
     *
41
     * @var int
42
     */
43
    protected $sheetIndex = 0;
44
 
45
    /**
46
     * Formats.
47
     *
48
     * @var array
49
     */
50
    protected $formats = [
51
        'h1' => [
52
            'font' => [
53
                'bold' => true,
54
                'size' => 24,
55
            ],
56
        ], //    Bold, 24pt
57
        'h2' => [
58
            'font' => [
59
                'bold' => true,
60
                'size' => 18,
61
            ],
62
        ], //    Bold, 18pt
63
        'h3' => [
64
            'font' => [
65
                'bold' => true,
66
                'size' => 13.5,
67
            ],
68
        ], //    Bold, 13.5pt
69
        'h4' => [
70
            'font' => [
71
                'bold' => true,
72
                'size' => 12,
73
            ],
74
        ], //    Bold, 12pt
75
        'h5' => [
76
            'font' => [
77
                'bold' => true,
78
                'size' => 10,
79
            ],
80
        ], //    Bold, 10pt
81
        'h6' => [
82
            'font' => [
83
                'bold' => true,
84
                'size' => 7.5,
85
            ],
86
        ], //    Bold, 7.5pt
87
        'a' => [
88
            'font' => [
89
                'underline' => true,
90
                'color' => [
91
                    'argb' => Color::COLOR_BLUE,
92
                ],
93
            ],
94
        ], //    Blue underlined
95
        'hr' => [
96
            'borders' => [
97
                'bottom' => [
98
                    'borderStyle' => Border::BORDER_THIN,
99
                    'color' => [
100
                        Color::COLOR_BLACK,
101
                    ],
102
                ],
103
            ],
104
        ], //    Bottom border
105
        'strong' => [
106
            'font' => [
107
                'bold' => true,
108
            ],
109
        ], //    Bold
110
        'b' => [
111
            'font' => [
112
                'bold' => true,
113
            ],
114
        ], //    Bold
115
        'i' => [
116
            'font' => [
117
                'italic' => true,
118
            ],
119
        ], //    Italic
120
        'em' => [
121
            'font' => [
122
                'italic' => true,
123
            ],
124
        ], //    Italic
125
    ];
126
 
127
    /** @var array */
128
    protected $rowspan = [];
129
 
130
    /**
131
     * Create a new HTML Reader instance.
132
     */
133
    public function __construct()
134
    {
135
        parent::__construct();
136
        $this->securityScanner = XmlScanner::getInstance($this);
137
    }
138
 
139
    /**
140
     * Validate that the current file is an HTML file.
141
     */
142
    public function canRead(string $filename): bool
143
    {
144
        // Check if file exists
145
        try {
146
            $this->openFile($filename);
147
        } catch (Exception $e) {
148
            return false;
149
        }
150
 
151
        $beginning = $this->readBeginning();
152
        $startWithTag = self::startsWithTag($beginning);
153
        $containsTags = self::containsTags($beginning);
154
        $endsWithTag = self::endsWithTag($this->readEnding());
155
 
156
        fclose($this->fileHandle);
157
 
158
        return $startWithTag && $containsTags && $endsWithTag;
159
    }
160
 
161
    private function readBeginning(): string
162
    {
163
        fseek($this->fileHandle, 0);
164
 
165
        return (string) fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
166
    }
167
 
168
    private function readEnding(): string
169
    {
170
        $meta = stream_get_meta_data($this->fileHandle);
171
        $filename = $meta['uri'];
172
 
173
        $size = (int) filesize($filename);
174
        if ($size === 0) {
175
            return '';
176
        }
177
 
178
        $blockSize = self::TEST_SAMPLE_SIZE;
179
        if ($size < $blockSize) {
180
            $blockSize = $size;
181
        }
182
 
183
        fseek($this->fileHandle, $size - $blockSize);
184
 
185
        return (string) fread($this->fileHandle, $blockSize);
186
    }
187
 
188
    private static function startsWithTag(string $data): bool
189
    {
190
        return '<' === substr(trim($data), 0, 1);
191
    }
192
 
193
    private static function endsWithTag(string $data): bool
194
    {
195
        return '>' === substr(trim($data), -1, 1);
196
    }
197
 
198
    private static function containsTags(string $data): bool
199
    {
200
        return strlen($data) !== strlen(strip_tags($data));
201
    }
202
 
203
    /**
204
     * Loads Spreadsheet from file.
205
     */
206
    public function loadSpreadsheetFromFile(string $filename): Spreadsheet
207
    {
208
        // Create new Spreadsheet
209
        $spreadsheet = new Spreadsheet();
210
 
211
        // Load into this instance
212
        return $this->loadIntoExisting($filename, $spreadsheet);
213
    }
214
 
215
    /**
216
     * Set input encoding.
217
     *
218
     * @param string $inputEncoding Input encoding, eg: 'ANSI'
219
     *
220
     * @return $this
221
     *
222
     * @codeCoverageIgnore
223
     *
224
     * @deprecated no use is made of this property
225
     */
226
    public function setInputEncoding($inputEncoding)
227
    {
228
        $this->inputEncoding = $inputEncoding;
229
 
230
        return $this;
231
    }
232
 
233
    /**
234
     * Get input encoding.
235
     *
236
     * @return string
237
     *
238
     * @codeCoverageIgnore
239
     *
240
     * @deprecated no use is made of this property
241
     */
242
    public function getInputEncoding()
243
    {
244
        return $this->inputEncoding;
245
    }
246
 
247
    //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
248
 
249
    /** @var array */
250
    protected $dataArray = [];
251
 
252
    /** @var int */
253
    protected $tableLevel = 0;
254
 
255
    /** @var array */
256
    protected $nestedColumn = ['A'];
257
 
258
    protected function setTableStartColumn(string $column): string
259
    {
260
        if ($this->tableLevel == 0) {
261
            $column = 'A';
262
        }
263
        ++$this->tableLevel;
264
        $this->nestedColumn[$this->tableLevel] = $column;
265
 
266
        return $this->nestedColumn[$this->tableLevel];
267
    }
268
 
269
    protected function getTableStartColumn(): string
270
    {
271
        return $this->nestedColumn[$this->tableLevel];
272
    }
273
 
274
    protected function releaseTableStartColumn(): string
275
    {
276
        --$this->tableLevel;
277
 
278
        return array_pop($this->nestedColumn);
279
    }
280
 
281
    /**
282
     * Flush cell.
283
     *
284
     * @param string $column
285
     * @param int|string $row
286
     * @param mixed $cellContent
287
     */
288
    protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent, array $attributeArray): void
289
    {
290
        if (is_string($cellContent)) {
291
            //    Simple String content
292
            if (trim($cellContent) > '') {
293
                //    Only actually write it if there's content in the string
294
                //    Write to worksheet to be done here...
295
                //    ... we return the cell, so we can mess about with styles more easily
296
 
297
                // Set cell value explicitly if there is data-type attribute
298
                if (isset($attributeArray['data-type'])) {
299
                    $datatype = $attributeArray['data-type'];
300
                    if (in_array($datatype, [DataType::TYPE_STRING, DataType::TYPE_STRING2, DataType::TYPE_INLINE])) {
301
                        //Prevent to Excel treat string with beginning equal sign or convert big numbers to scientific number
302
                        if (substr($cellContent, 0, 1) === '=') {
303
                            $sheet->getCell($column . $row)
304
                                ->getStyle()
305
                                ->setQuotePrefix(true);
306
                        }
307
                    }
308
                    //catching the Exception and ignoring the invalid data types
309
                    try {
310
                        $sheet->setCellValueExplicit($column . $row, $cellContent, $attributeArray['data-type']);
311
                    } catch (\PhpOffice\PhpSpreadsheet\Exception $exception) {
312
                        $sheet->setCellValue($column . $row, $cellContent);
313
                    }
314
                } else {
315
                    $sheet->setCellValue($column . $row, $cellContent);
316
                }
317
                $this->dataArray[$row][$column] = $cellContent;
318
            }
319
        } else {
320
            //    We have a Rich Text run
321
            //    TODO
322
            $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
323
        }
324
        $cellContent = (string) '';
325
    }
326
 
327
    private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
328
    {
329
        $attributeArray = [];
330
        foreach ($child->attributes as $attribute) {
331
            $attributeArray[$attribute->name] = $attribute->value;
332
        }
333
 
334
        if ($child->nodeName === 'body') {
335
            $row = 1;
336
            $column = 'A';
337
            $cellContent = '';
338
            $this->tableLevel = 0;
339
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
340
        } else {
341
            $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
342
        }
343
    }
344
 
345
    private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
346
    {
347
        if ($child->nodeName === 'title') {
348
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
349
            $sheet->setTitle($cellContent, true, true);
350
            $cellContent = '';
351
        } else {
352
            $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
353
        }
354
    }
355
 
356
    private const SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
357
 
358
    private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
359
    {
360
        if (in_array((string) $child->nodeName, self::SPAN_ETC, true)) {
361
            if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
362
                $sheet->getComment($column . $row)
363
                    ->getText()
364
                    ->createTextRun($child->textContent);
365
            } else {
366
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
367
            }
368
 
369
            if (isset($this->formats[$child->nodeName])) {
370
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
371
            }
372
        } else {
373
            $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
374
        }
375
    }
376
 
377
    private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
378
    {
379
        if ($child->nodeName === 'hr') {
380
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
381
            ++$row;
382
            if (isset($this->formats[$child->nodeName])) {
383
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
384
            }
385
            ++$row;
386
        }
387
        // fall through to br
388
        $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
389
    }
390
 
391
    private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
392
    {
393
        if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
394
            if ($this->tableLevel > 0) {
395
                //    If we're inside a table, replace with a \n and set the cell to wrap
396
                $cellContent .= "\n";
397
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
398
            } else {
399
                //    Otherwise flush our existing content and move the row cursor on
400
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
401
                ++$row;
402
            }
403
        } else {
404
            $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
405
        }
406
    }
407
 
408
    private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
409
    {
410
        if ($child->nodeName === 'a') {
411
            foreach ($attributeArray as $attributeName => $attributeValue) {
412
                switch ($attributeName) {
413
                    case 'href':
414
                        $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
415
                        if (isset($this->formats[$child->nodeName])) {
416
                            $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
417
                        }
418
 
419
                        break;
420
                    case 'class':
421
                        if ($attributeValue === 'comment-indicator') {
422
                            break; // Ignore - it's just a red square.
423
                        }
424
                }
425
            }
426
            // no idea why this should be needed
427
            //$cellContent .= ' ';
428
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
429
        } else {
430
            $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
431
        }
432
    }
433
 
434
    private const H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
435
 
436
    private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
437
    {
438
        if (in_array((string) $child->nodeName, self::H1_ETC, true)) {
439
            if ($this->tableLevel > 0) {
440
                //    If we're inside a table, replace with a \n
441
                $cellContent .= $cellContent ? "\n" : '';
442
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
443
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
444
            } else {
445
                if ($cellContent > '') {
446
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
447
                    ++$row;
448
                }
449
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
450
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
451
 
452
                if (isset($this->formats[$child->nodeName])) {
453
                    $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
454
                }
455
 
456
                ++$row;
457
                $column = 'A';
458
            }
459
        } else {
460
            $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
461
        }
462
    }
463
 
464
    private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
465
    {
466
        if ($child->nodeName === 'li') {
467
            if ($this->tableLevel > 0) {
468
                //    If we're inside a table, replace with a \n
469
                $cellContent .= $cellContent ? "\n" : '';
470
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
471
            } else {
472
                if ($cellContent > '') {
473
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
474
                }
475
                ++$row;
476
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
477
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
478
                $column = 'A';
479
            }
480
        } else {
481
            $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
482
        }
483
    }
484
 
485
    private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
486
    {
487
        if ($child->nodeName === 'img') {
488
            $this->insertImage($sheet, $column, $row, $attributeArray);
489
        } else {
490
            $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
491
        }
492
    }
493
 
494
    private string $currentColumn = 'A';
495
 
496
    private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
497
    {
498
        if ($child->nodeName === 'table') {
499
            $this->currentColumn = 'A';
500
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
501
            $column = $this->setTableStartColumn($column);
502
            if ($this->tableLevel > 1 && $row > 1) {
503
                --$row;
504
            }
505
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
506
            $column = $this->releaseTableStartColumn();
507
            if ($this->tableLevel > 1) {
508
                ++$column;
509
            } else {
510
                ++$row;
511
            }
512
        } else {
513
            $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
514
        }
515
    }
516
 
517
    private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
518
    {
519
        if ($child->nodeName === 'col') {
520
            $this->applyInlineStyle($sheet, -1, $this->currentColumn, $attributeArray);
521
            ++$this->currentColumn;
522
        } elseif ($child->nodeName === 'tr') {
523
            $column = $this->getTableStartColumn();
524
            $cellContent = '';
525
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
526
 
527
            if (isset($attributeArray['height'])) {
528
                $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
529
            }
530
 
531
            ++$row;
532
        } else {
533
            $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
534
        }
535
    }
536
 
537
    private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
538
    {
539
        if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
540
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
541
        } else {
542
            $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
543
        }
544
    }
545
 
546
    private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
547
    {
548
        if (isset($attributeArray['bgcolor'])) {
549
            $sheet->getStyle("$column$row")->applyFromArray(
550
                [
551
                    'fill' => [
552
                        'fillType' => Fill::FILL_SOLID,
553
                        'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
554
                    ],
555
                ]
556
            );
557
        }
558
    }
559
 
560
    private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
561
    {
562
        if (isset($attributeArray['width'])) {
563
            $sheet->getColumnDimension($column)->setWidth((new CssDimension($attributeArray['width']))->width());
564
        }
565
    }
566
 
567
    private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
568
    {
569
        if (isset($attributeArray['height'])) {
570
            $sheet->getRowDimension($row)->setRowHeight((new CssDimension($attributeArray['height']))->height());
571
        }
572
    }
573
 
574
    private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
575
    {
576
        if (isset($attributeArray['align'])) {
577
            $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
578
        }
579
    }
580
 
581
    private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
582
    {
583
        if (isset($attributeArray['valign'])) {
584
            $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
585
        }
586
    }
587
 
588
    private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
589
    {
590
        if (isset($attributeArray['data-format'])) {
591
            $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
592
        }
593
    }
594
 
595
    private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
596
    {
597
        while (isset($this->rowspan[$column . $row])) {
598
            ++$column;
599
        }
600
        $this->processDomElement($child, $sheet, $row, $column, $cellContent);
601
 
602
        // apply inline style
603
        $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
604
 
605
        $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
606
 
607
        $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
608
        $this->processDomElementWidth($sheet, $column, $attributeArray);
609
        $this->processDomElementHeight($sheet, $row, $attributeArray);
610
        $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
611
        $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
612
        $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
613
 
614
        if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
615
            //create merging rowspan and colspan
616
            $columnTo = $column;
617
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
618
                ++$columnTo;
619
            }
620
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
621
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
622
                $this->rowspan[$value] = true;
623
            }
624
            $sheet->mergeCells($range);
625
            $column = $columnTo;
626
        } elseif (isset($attributeArray['rowspan'])) {
627
            //create merging rowspan
628
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
629
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
630
                $this->rowspan[$value] = true;
631
            }
632
            $sheet->mergeCells($range);
633
        } elseif (isset($attributeArray['colspan'])) {
634
            //create merging colspan
635
            $columnTo = $column;
636
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
637
                ++$columnTo;
638
            }
639
            $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
640
            $column = $columnTo;
641
        }
642
 
643
        ++$column;
644
    }
645
 
646
    protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
647
    {
648
        foreach ($element->childNodes as $child) {
649
            if ($child instanceof DOMText) {
650
                $domText = (string) preg_replace('/\s+/u', ' ', trim($child->nodeValue ?? ''));
651
                if (is_string($cellContent)) {
652
                    //    simply append the text if the cell content is a plain text string
653
                    $cellContent .= $domText;
654
                }
655
                //    but if we have a rich text run instead, we need to append it correctly
656
                    //    TODO
657
            } elseif ($child instanceof DOMElement) {
658
                $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
659
            }
660
        }
661
    }
662
 
663
    /**
664
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
665
     *
666
     * @param string $filename
667
     *
668
     * @return Spreadsheet
669
     */
670
    public function loadIntoExisting($filename, Spreadsheet $spreadsheet)
671
    {
672
        // Validate
673
        if (!$this->canRead($filename)) {
674
            throw new Exception($filename . ' is an Invalid HTML file.');
675
        }
676
 
677
        // Create a new DOM object
678
        $dom = new DOMDocument();
679
        // Reload the HTML file into the DOM object
680
        try {
681
            $convert = $this->getSecurityScannerOrThrow()->scanFile($filename);
682
            $lowend = "\u{80}";
683
            $highend = "\u{10ffff}";
684
            $regexp = "/[$lowend-$highend]/u";
685
            /** @var callable */
686
            $callback = [self::class, 'replaceNonAscii'];
687
            $convert = preg_replace_callback($regexp, $callback, $convert);
688
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
689
        } catch (Throwable $e) {
690
            $loaded = false;
691
        }
692
        if ($loaded === false) {
693
            throw new Exception('Failed to load ' . $filename . ' as a DOM Document', 0, $e ?? null);
694
        }
695
        self::loadProperties($dom, $spreadsheet);
696
 
697
        return $this->loadDocument($dom, $spreadsheet);
698
    }
699
 
700
    private static function loadProperties(DOMDocument $dom, Spreadsheet $spreadsheet): void
701
    {
702
        $properties = $spreadsheet->getProperties();
703
        foreach ($dom->getElementsByTagName('meta') as $meta) {
704
            $metaContent = (string) $meta->getAttribute('content');
705
            if ($metaContent !== '') {
706
                $metaName = (string) $meta->getAttribute('name');
707
                switch ($metaName) {
708
                    case 'author':
709
                        $properties->setCreator($metaContent);
710
 
711
                        break;
712
                    case 'category':
713
                        $properties->setCategory($metaContent);
714
 
715
                        break;
716
                    case 'company':
717
                        $properties->setCompany($metaContent);
718
 
719
                        break;
720
                    case 'created':
721
                        $properties->setCreated($metaContent);
722
 
723
                        break;
724
                    case 'description':
725
                        $properties->setDescription($metaContent);
726
 
727
                        break;
728
                    case 'keywords':
729
                        $properties->setKeywords($metaContent);
730
 
731
                        break;
732
                    case 'lastModifiedBy':
733
                        $properties->setLastModifiedBy($metaContent);
734
 
735
                        break;
736
                    case 'manager':
737
                        $properties->setManager($metaContent);
738
 
739
                        break;
740
                    case 'modified':
741
                        $properties->setModified($metaContent);
742
 
743
                        break;
744
                    case 'subject':
745
                        $properties->setSubject($metaContent);
746
 
747
                        break;
748
                    case 'title':
749
                        $properties->setTitle($metaContent);
750
 
751
                        break;
752
                    default:
753
                        if (preg_match('/^custom[.](bool|date|float|int|string)[.](.+)$/', $metaName, $matches) === 1) {
754
                            switch ($matches[1]) {
755
                                case 'bool':
756
                                    $properties->setCustomProperty($matches[2], (bool) $metaContent, Properties::PROPERTY_TYPE_BOOLEAN);
757
 
758
                                    break;
759
                                case 'float':
760
                                    $properties->setCustomProperty($matches[2], (float) $metaContent, Properties::PROPERTY_TYPE_FLOAT);
761
 
762
                                    break;
763
                                case 'int':
764
                                    $properties->setCustomProperty($matches[2], (int) $metaContent, Properties::PROPERTY_TYPE_INTEGER);
765
 
766
                                    break;
767
                                case 'date':
768
                                    $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_DATE);
769
 
770
                                    break;
771
                                default: // string
772
                                    $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_STRING);
773
                            }
774
                        }
775
                }
776
            }
777
        }
778
        if (!empty($dom->baseURI)) {
779
            $properties->setHyperlinkBase($dom->baseURI);
780
        }
781
    }
782
 
783
    private static function replaceNonAscii(array $matches): string
784
    {
785
        return '&#' . mb_ord($matches[0], 'UTF-8') . ';';
786
    }
787
 
788
    /**
789
     * Spreadsheet from content.
790
     *
791
     * @param string $content
792
     */
793
    public function loadFromString($content, ?Spreadsheet $spreadsheet = null): Spreadsheet
794
    {
795
        //    Create a new DOM object
796
        $dom = new DOMDocument();
797
        //    Reload the HTML file into the DOM object
798
        try {
799
            $convert = $this->getSecurityScannerOrThrow()->scan($content);
800
            $lowend = "\u{80}";
801
            $highend = "\u{10ffff}";
802
            $regexp = "/[$lowend-$highend]/u";
803
            /** @var callable */
804
            $callback = [self::class, 'replaceNonAscii'];
805
            $convert = preg_replace_callback($regexp, $callback, $convert);
806
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
807
        } catch (Throwable $e) {
808
            $loaded = false;
809
        }
810
        if ($loaded === false) {
811
            throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null);
812
        }
813
        $spreadsheet = $spreadsheet ?? new Spreadsheet();
814
        self::loadProperties($dom, $spreadsheet);
815
 
816
        return $this->loadDocument($dom, $spreadsheet);
817
    }
818
 
819
    /**
820
     * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
821
     */
822
    private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
823
    {
824
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
825
            $spreadsheet->createSheet();
826
        }
827
        $spreadsheet->setActiveSheetIndex($this->sheetIndex);
828
 
829
        // Discard white space
830
        $document->preserveWhiteSpace = false;
831
 
832
        $row = 0;
833
        $column = 'A';
834
        $content = '';
835
        $this->rowspan = [];
836
        $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
837
 
838
        // Return
839
        return $spreadsheet;
840
    }
841
 
842
    /**
843
     * Get sheet index.
844
     *
845
     * @return int
846
     */
847
    public function getSheetIndex()
848
    {
849
        return $this->sheetIndex;
850
    }
851
 
852
    /**
853
     * Set sheet index.
854
     *
855
     * @param int $sheetIndex Sheet index
856
     *
857
     * @return $this
858
     */
859
    public function setSheetIndex($sheetIndex)
860
    {
861
        $this->sheetIndex = $sheetIndex;
862
 
863
        return $this;
864
    }
865
 
866
    /**
867
     * Apply inline css inline style.
868
     *
869
     * NOTES :
870
     * Currently only intended for td & th element,
871
     * and only takes 'background-color' and 'color'; property with HEX color
872
     *
873
     * TODO :
874
     * - Implement to other propertie, such as border
875
     *
876
     * @param int $row
877
     * @param string $column
878
     * @param array $attributeArray
879
     */
880
    private function applyInlineStyle(Worksheet &$sheet, $row, $column, $attributeArray): void
881
    {
882
        if (!isset($attributeArray['style'])) {
883
            return;
884
        }
885
 
886
        if ($row <= 0 || $column === '') {
887
            $cellStyle = new Style();
888
        } elseif (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
889
            $columnTo = $column;
890
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
891
                ++$columnTo;
892
            }
893
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
894
            $cellStyle = $sheet->getStyle($range);
895
        } elseif (isset($attributeArray['rowspan'])) {
896
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
897
            $cellStyle = $sheet->getStyle($range);
898
        } elseif (isset($attributeArray['colspan'])) {
899
            $columnTo = $column;
900
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
901
                ++$columnTo;
902
            }
903
            $range = $column . $row . ':' . $columnTo . $row;
904
            $cellStyle = $sheet->getStyle($range);
905
        } else {
906
            $cellStyle = $sheet->getStyle($column . $row);
907
        }
908
 
909
        // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
910
        $styles = explode(';', $attributeArray['style']);
911
        foreach ($styles as $st) {
912
            $value = explode(':', $st);
913
            $styleName = isset($value[0]) ? trim($value[0]) : null;
914
            $styleValue = isset($value[1]) ? trim($value[1]) : null;
915
            $styleValueString = (string) $styleValue;
916
 
917
            if (!$styleName) {
918
                continue;
919
            }
920
 
921
            switch ($styleName) {
922
                case 'background':
923
                case 'background-color':
924
                    $styleColor = $this->getStyleColor($styleValueString);
925
 
926
                    if (!$styleColor) {
927
                        continue 2;
928
                    }
929
 
930
                    $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
931
 
932
                    break;
933
                case 'color':
934
                    $styleColor = $this->getStyleColor($styleValueString);
935
 
936
                    if (!$styleColor) {
937
                        continue 2;
938
                    }
939
 
940
                    $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
941
 
942
                    break;
943
 
944
                case 'border':
945
                    $this->setBorderStyle($cellStyle, $styleValueString, 'allBorders');
946
 
947
                    break;
948
 
949
                case 'border-top':
950
                    $this->setBorderStyle($cellStyle, $styleValueString, 'top');
951
 
952
                    break;
953
 
954
                case 'border-bottom':
955
                    $this->setBorderStyle($cellStyle, $styleValueString, 'bottom');
956
 
957
                    break;
958
 
959
                case 'border-left':
960
                    $this->setBorderStyle($cellStyle, $styleValueString, 'left');
961
 
962
                    break;
963
 
964
                case 'border-right':
965
                    $this->setBorderStyle($cellStyle, $styleValueString, 'right');
966
 
967
                    break;
968
 
969
                case 'font-size':
970
                    $cellStyle->getFont()->setSize(
971
                        (float) $styleValue
972
                    );
973
 
974
                    break;
975
 
976
                case 'font-weight':
977
                    if ($styleValue === 'bold' || $styleValue >= 500) {
978
                        $cellStyle->getFont()->setBold(true);
979
                    }
980
 
981
                    break;
982
 
983
                case 'font-style':
984
                    if ($styleValue === 'italic') {
985
                        $cellStyle->getFont()->setItalic(true);
986
                    }
987
 
988
                    break;
989
 
990
                case 'font-family':
991
                    $cellStyle->getFont()->setName(str_replace('\'', '', $styleValueString));
992
 
993
                    break;
994
 
995
                case 'text-decoration':
996
                    switch ($styleValue) {
997
                        case 'underline':
998
                            $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
999
 
1000
                            break;
1001
                        case 'line-through':
1002
                            $cellStyle->getFont()->setStrikethrough(true);
1003
 
1004
                            break;
1005
                    }
1006
 
1007
                    break;
1008
 
1009
                case 'text-align':
1010
                    $cellStyle->getAlignment()->setHorizontal($styleValueString);
1011
 
1012
                    break;
1013
 
1014
                case 'vertical-align':
1015
                    $cellStyle->getAlignment()->setVertical($styleValueString);
1016
 
1017
                    break;
1018
 
1019
                case 'width':
1020
                    if ($column !== '') {
1021
                        $sheet->getColumnDimension($column)->setWidth(
1022
                            (new CssDimension($styleValue ?? ''))->width()
1023
                        );
1024
                    }
1025
 
1026
                    break;
1027
 
1028
                case 'height':
1029
                    if ($row > 0) {
1030
                        $sheet->getRowDimension($row)->setRowHeight(
1031
                            (new CssDimension($styleValue ?? ''))->height()
1032
                        );
1033
                    }
1034
 
1035
                    break;
1036
 
1037
                case 'word-wrap':
1038
                    $cellStyle->getAlignment()->setWrapText(
1039
                        $styleValue === 'break-word'
1040
                    );
1041
 
1042
                    break;
1043
 
1044
                case 'text-indent':
1045
                    $cellStyle->getAlignment()->setIndent(
1046
                        (int) str_replace(['px'], '', $styleValueString)
1047
                    );
1048
 
1049
                    break;
1050
            }
1051
        }
1052
    }
1053
 
1054
    /**
1055
     * Check if has #, so we can get clean hex.
1056
     *
1057
     * @param mixed $value
1058
     *
1059
     * @return null|string
1060
     */
1061
    public function getStyleColor($value)
1062
    {
1063
        $value = (string) $value;
1064
        if (strpos($value, '#') === 0) {
1065
            return substr($value, 1);
1066
        }
1067
 
1068
        return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup($value);
1069
    }
1070
 
1071
    /**
1072
     * @param string    $column
1073
     * @param int       $row
1074
     */
1075
    private function insertImage(Worksheet $sheet, $column, $row, array $attributes): void
1076
    {
1077
        if (!isset($attributes['src'])) {
1078
            return;
1079
        }
1080
 
1081
        $src = urldecode($attributes['src']);
1082
        $width = isset($attributes['width']) ? (float) $attributes['width'] : null;
1083
        $height = isset($attributes['height']) ? (float) $attributes['height'] : null;
1084
        $name = $attributes['alt'] ?? null;
1085
 
1086
        $drawing = new Drawing();
1087
        $drawing->setPath($src);
1088
        $drawing->setWorksheet($sheet);
1089
        $drawing->setCoordinates($column . $row);
1090
        $drawing->setOffsetX(0);
1091
        $drawing->setOffsetY(10);
1092
        $drawing->setResizeProportional(true);
1093
 
1094
        if ($name) {
1095
            $drawing->setName($name);
1096
        }
1097
 
1098
        if ($width) {
1099
            $drawing->setWidth((int) $width);
1100
        }
1101
 
1102
        if ($height) {
1103
            $drawing->setHeight((int) $height);
1104
        }
1105
 
1106
        $sheet->getColumnDimension($column)->setWidth(
1107
            $drawing->getWidth() / 6
1108
        );
1109
 
1110
        $sheet->getRowDimension($row)->setRowHeight(
1111
            $drawing->getHeight() * 0.9
1112
        );
1113
    }
1114
 
1115
    private const BORDER_MAPPINGS = [
1116
        'dash-dot' => Border::BORDER_DASHDOT,
1117
        'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
1118
        'dashed' => Border::BORDER_DASHED,
1119
        'dotted' => Border::BORDER_DOTTED,
1120
        'double' => Border::BORDER_DOUBLE,
1121
        'hair' => Border::BORDER_HAIR,
1122
        'medium' => Border::BORDER_MEDIUM,
1123
        'medium-dashed' => Border::BORDER_MEDIUMDASHED,
1124
        'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
1125
        'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
1126
        'none' => Border::BORDER_NONE,
1127
        'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
1128
        'solid' => Border::BORDER_THIN,
1129
        'thick' => Border::BORDER_THICK,
1130
    ];
1131
 
1132
    public static function getBorderMappings(): array
1133
    {
1134
        return self::BORDER_MAPPINGS;
1135
    }
1136
 
1137
    /**
1138
     * Map html border style to PhpSpreadsheet border style.
1139
     *
1140
     * @param  string $style
1141
     *
1142
     * @return null|string
1143
     */
1144
    public function getBorderStyle($style)
1145
    {
1146
        return self::BORDER_MAPPINGS[$style] ?? null;
1147
    }
1148
 
1149
    /**
1150
     * @param string $styleValue
1151
     * @param string $type
1152
     */
1153
    private function setBorderStyle(Style $cellStyle, $styleValue, $type): void
1154
    {
1155
        if (trim($styleValue) === Border::BORDER_NONE) {
1156
            $borderStyle = Border::BORDER_NONE;
1157
            $color = null;
1158
        } else {
1159
            $borderArray = explode(' ', $styleValue);
1160
            $borderCount = count($borderArray);
1161
            if ($borderCount >= 3) {
1162
                $borderStyle = $borderArray[1];
1163
                $color = $borderArray[2];
1164
            } else {
1165
                $borderStyle = $borderArray[0];
1166
                $color = $borderArray[1] ?? null;
1167
            }
1168
        }
1169
 
1170
        $cellStyle->applyFromArray([
1171
            'borders' => [
1172
                $type => [
1173
                    'borderStyle' => $this->getBorderStyle($borderStyle),
1174
                    'color' => ['rgb' => $this->getStyleColor($color)],
1175
                ],
1176
            ],
1177
        ]);
1178
    }
1179
}