Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
 
5
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
6
use PhpOffice\PhpSpreadsheet\Cell\Cell;
7
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
8
use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter;
9
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
10
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
11
use PhpOffice\PhpSpreadsheet\Spreadsheet;
12
use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
13
 
14
class Csv extends BaseReader
15
{
16
    const DEFAULT_FALLBACK_ENCODING = 'CP1252';
17
    const GUESS_ENCODING = 'guess';
18
    const UTF8_BOM = "\xEF\xBB\xBF";
19
    const UTF8_BOM_LEN = 3;
20
    const UTF16BE_BOM = "\xfe\xff";
21
    const UTF16BE_BOM_LEN = 2;
22
    const UTF16BE_LF = "\x00\x0a";
23
    const UTF16LE_BOM = "\xff\xfe";
24
    const UTF16LE_BOM_LEN = 2;
25
    const UTF16LE_LF = "\x0a\x00";
26
    const UTF32BE_BOM = "\x00\x00\xfe\xff";
27
    const UTF32BE_BOM_LEN = 4;
28
    const UTF32BE_LF = "\x00\x00\x00\x0a";
29
    const UTF32LE_BOM = "\xff\xfe\x00\x00";
30
    const UTF32LE_BOM_LEN = 4;
31
    const UTF32LE_LF = "\x0a\x00\x00\x00";
32
 
33
    /**
34
     * Input encoding.
35
     *
36
     * @var string
37
     */
38
    private $inputEncoding = 'UTF-8';
39
 
40
    /**
41
     * Fallback encoding if guess strikes out.
42
     *
43
     * @var string
44
     */
45
    private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
46
 
47
    /**
48
     * Delimiter.
49
     *
50
     * @var ?string
51
     */
52
    private $delimiter;
53
 
54
    /**
55
     * Enclosure.
56
     *
57
     * @var string
58
     */
59
    private $enclosure = '"';
60
 
61
    /**
62
     * Sheet index to read.
63
     *
64
     * @var int
65
     */
66
    private $sheetIndex = 0;
67
 
68
    /**
69
     * Load rows contiguously.
70
     *
71
     * @var bool
72
     */
73
    private $contiguous = false;
74
 
75
    /**
76
     * The character that can escape the enclosure.
77
     *
78
     * @var string
79
     */
80
    private $escapeCharacter = '\\';
81
 
82
    /**
83
     * Callback for setting defaults in construction.
84
     *
85
     * @var ?callable
86
     */
87
    private static $constructorCallback;
88
 
89
    /**
90
     * Attempt autodetect line endings (deprecated after PHP8.1)?
91
     *
92
     * @var bool
93
     */
94
    private $testAutodetect = true;
95
 
96
    /**
97
     * @var bool
98
     */
99
    protected $castFormattedNumberToNumeric = false;
100
 
101
    /**
102
     * @var bool
103
     */
104
    protected $preserveNumericFormatting = false;
105
 
106
    /** @var bool */
107
    private $preserveNullString = false;
108
 
109
    /**
110
     * Create a new CSV Reader instance.
111
     */
112
    public function __construct()
113
    {
114
        parent::__construct();
115
        $callback = self::$constructorCallback;
116
        if ($callback !== null) {
117
            $callback($this);
118
        }
119
    }
120
 
121
    /**
122
     * Set a callback to change the defaults.
123
     *
124
     * The callback must accept the Csv Reader object as the first parameter,
125
     * and it should return void.
126
     */
127
    public static function setConstructorCallback(?callable $callback): void
128
    {
129
        self::$constructorCallback = $callback;
130
    }
131
 
132
    public static function getConstructorCallback(): ?callable
133
    {
134
        return self::$constructorCallback;
135
    }
136
 
137
    public function setInputEncoding(string $encoding): self
138
    {
139
        $this->inputEncoding = $encoding;
140
 
141
        return $this;
142
    }
143
 
144
    public function getInputEncoding(): string
145
    {
146
        return $this->inputEncoding;
147
    }
148
 
149
    public function setFallbackEncoding(string $fallbackEncoding): self
150
    {
151
        $this->fallbackEncoding = $fallbackEncoding;
152
 
153
        return $this;
154
    }
155
 
156
    public function getFallbackEncoding(): string
157
    {
158
        return $this->fallbackEncoding;
159
    }
160
 
161
    /**
162
     * Move filepointer past any BOM marker.
163
     */
164
    protected function skipBOM(): void
165
    {
166
        rewind($this->fileHandle);
167
 
168
        if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
169
            rewind($this->fileHandle);
170
        }
171
    }
172
 
173
    /**
174
     * Identify any separator that is explicitly set in the file.
175
     */
176
    protected function checkSeparator(): void
177
    {
178
        $line = fgets($this->fileHandle);
179
        if ($line === false) {
180
            return;
181
        }
182
 
183
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
184
            $this->delimiter = substr($line, 4, 1);
185
 
186
            return;
187
        }
188
 
189
        $this->skipBOM();
190
    }
191
 
192
    /**
193
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
194
     */
195
    protected function inferSeparator(): void
196
    {
197
        if ($this->delimiter !== null) {
198
            return;
199
        }
200
 
201
        $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
202
 
203
        // If number of lines is 0, nothing to infer : fall back to the default
204
        if ($inferenceEngine->linesCounted() === 0) {
205
            $this->delimiter = $inferenceEngine->getDefaultDelimiter();
206
            $this->skipBOM();
207
 
208
            return;
209
        }
210
 
211
        $this->delimiter = $inferenceEngine->infer();
212
 
213
        // If no delimiter could be detected, fall back to the default
214
        if ($this->delimiter === null) {
215
            $this->delimiter = $inferenceEngine->getDefaultDelimiter();
216
        }
217
 
218
        $this->skipBOM();
219
    }
220
 
221
    /**
222
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
223
     */
224
    public function listWorksheetInfo(string $filename): array
225
    {
226
        // Open file
227
        $this->openFileOrMemory($filename);
228
        $fileHandle = $this->fileHandle;
229
 
230
        // Skip BOM, if any
231
        $this->skipBOM();
232
        $this->checkSeparator();
233
        $this->inferSeparator();
234
 
235
        $worksheetInfo = [];
236
        $worksheetInfo[0]['worksheetName'] = 'Worksheet';
237
        $worksheetInfo[0]['lastColumnLetter'] = 'A';
238
        $worksheetInfo[0]['lastColumnIndex'] = 0;
239
        $worksheetInfo[0]['totalRows'] = 0;
240
        $worksheetInfo[0]['totalColumns'] = 0;
241
 
242
        // Loop through each line of the file in turn
243
        $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
244
        while (is_array($rowData)) {
245
            ++$worksheetInfo[0]['totalRows'];
246
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
247
            $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
248
        }
249
 
250
        $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
251
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
252
 
253
        // Close file
254
        fclose($fileHandle);
255
 
256
        return $worksheetInfo;
257
    }
258
 
259
    /**
260
     * Loads Spreadsheet from file.
261
     */
262
    protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
263
    {
264
        // Create new Spreadsheet
265
        $spreadsheet = new Spreadsheet();
266
 
267
        // Load into this instance
268
        return $this->loadIntoExisting($filename, $spreadsheet);
269
    }
270
 
271
    /**
272
     * Loads Spreadsheet from string.
273
     */
274
    public function loadSpreadsheetFromString(string $contents): Spreadsheet
275
    {
276
        // Create new Spreadsheet
277
        $spreadsheet = new Spreadsheet();
278
 
279
        // Load into this instance
280
        return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true);
281
    }
282
 
283
    private function openFileOrMemory(string $filename): void
284
    {
285
        // Open file
286
        $fhandle = $this->canRead($filename);
287
        if (!$fhandle) {
288
            throw new Exception($filename . ' is an Invalid Spreadsheet file.');
289
        }
290
        if ($this->inputEncoding === self::GUESS_ENCODING) {
291
            $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
292
        }
293
        $this->openFile($filename);
294
        if ($this->inputEncoding !== 'UTF-8') {
295
            fclose($this->fileHandle);
296
            $entireFile = file_get_contents($filename);
297
            $fileHandle = fopen('php://memory', 'r+b');
298
            if ($fileHandle !== false && $entireFile !== false) {
299
                $this->fileHandle = $fileHandle;
300
                $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
301
                fwrite($this->fileHandle, $data);
302
                $this->skipBOM();
303
            }
304
        }
305
    }
306
 
307
    public function setTestAutoDetect(bool $value): self
308
    {
309
        $this->testAutodetect = $value;
310
 
311
        return $this;
312
    }
313
 
314
    private function setAutoDetect(?string $value): ?string
315
    {
316
        $retVal = null;
317
        if ($value !== null && $this->testAutodetect) {
318
            $retVal2 = @ini_set('auto_detect_line_endings', $value);
319
            if (is_string($retVal2)) {
320
                $retVal = $retVal2;
321
            }
322
        }
323
 
324
        return $retVal;
325
    }
326
 
327
    public function castFormattedNumberToNumeric(
328
        bool $castFormattedNumberToNumeric,
329
        bool $preserveNumericFormatting = false
330
    ): void {
331
        $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric;
332
        $this->preserveNumericFormatting = $preserveNumericFormatting;
333
    }
334
 
335
    /**
336
     * Open data uri for reading.
337
     */
338
    private function openDataUri(string $filename): void
339
    {
340
        $fileHandle = fopen($filename, 'rb');
341
        if ($fileHandle === false) {
342
            // @codeCoverageIgnoreStart
343
            throw new ReaderException('Could not open file ' . $filename . ' for reading.');
344
            // @codeCoverageIgnoreEnd
345
        }
346
 
347
        $this->fileHandle = $fileHandle;
348
    }
349
 
350
    /**
351
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
352
     */
353
    public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
354
    {
355
        return $this->loadStringOrFile($filename, $spreadsheet, false);
356
    }
357
 
358
    /**
359
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
360
     */
361
    private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet
362
    {
363
        // Deprecated in Php8.1
364
        $iniset = $this->setAutoDetect('1');
365
 
366
        // Open file
367
        if ($dataUri) {
368
            $this->openDataUri($filename);
369
        } else {
370
            $this->openFileOrMemory($filename);
371
        }
372
        $fileHandle = $this->fileHandle;
373
 
374
        // Skip BOM, if any
375
        $this->skipBOM();
376
        $this->checkSeparator();
377
        $this->inferSeparator();
378
 
379
        // Create new PhpSpreadsheet object
380
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
381
            $spreadsheet->createSheet();
382
        }
383
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
384
 
385
        // Set our starting row based on whether we're in contiguous mode or not
386
        $currentRow = 1;
387
        $outRow = 0;
388
 
389
        // Loop through each line of the file in turn
390
        $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
391
        $valueBinder = Cell::getValueBinder();
392
        $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
393
        while (is_array($rowData)) {
394
            $noOutputYet = true;
395
            $columnLetter = 'A';
396
            foreach ($rowData as $rowDatum) {
397
                $this->convertBoolean($rowDatum, $preserveBooleanString);
398
                $numberFormatMask = $this->convertFormattedNumber($rowDatum);
399
                if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
400
                    if ($this->contiguous) {
401
                        if ($noOutputYet) {
402
                            $noOutputYet = false;
403
                            ++$outRow;
404
                        }
405
                    } else {
406
                        $outRow = $currentRow;
407
                    }
408
                    // Set basic styling for the value (Note that this could be overloaded by styling in a value binder)
409
                    $sheet->getCell($columnLetter . $outRow)->getStyle()
410
                        ->getNumberFormat()
411
                        ->setFormatCode($numberFormatMask);
412
                    // Set cell value
413
                    $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
414
                }
415
                ++$columnLetter;
416
            }
417
            $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
418
            ++$currentRow;
419
        }
420
 
421
        // Close file
422
        fclose($fileHandle);
423
 
424
        $this->setAutoDetect($iniset);
425
 
426
        // Return
427
        return $spreadsheet;
428
    }
429
 
430
    /**
431
     * Convert string true/false to boolean, and null to null-string.
432
     *
433
     * @param mixed $rowDatum
434
     */
435
    private function convertBoolean(&$rowDatum, bool $preserveBooleanString): void
436
    {
437
        if (is_string($rowDatum) && !$preserveBooleanString) {
438
            if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
439
                $rowDatum = true;
440
            } elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
441
                $rowDatum = false;
442
            }
443
        } else {
444
            $rowDatum = $rowDatum ?? '';
445
        }
446
    }
447
 
448
    /**
449
     * Convert numeric strings to int or float values.
450
     *
451
     * @param mixed $rowDatum
452
     */
453
    private function convertFormattedNumber(&$rowDatum): string
454
    {
455
        $numberFormatMask = NumberFormat::FORMAT_GENERAL;
456
        if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) {
457
            $numeric = str_replace(
458
                [StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()],
459
                ['', '.'],
460
                $rowDatum
461
            );
462
 
463
            if (is_numeric($numeric)) {
464
                $decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator());
465
                if ($this->preserveNumericFormatting === true) {
466
                    $numberFormatMask = (strpos($rowDatum, StringHelper::getThousandsSeparator()) !== false)
467
                        ? '#,##0' : '0';
468
                    if ($decimalPos !== false) {
469
                        $decimals = strlen($rowDatum) - $decimalPos - 1;
470
                        $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6));
471
                    }
472
                }
473
 
474
                $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric;
475
            }
476
        }
477
 
478
        return $numberFormatMask;
479
    }
480
 
481
    public function getDelimiter(): ?string
482
    {
483
        return $this->delimiter;
484
    }
485
 
486
    public function setDelimiter(?string $delimiter): self
487
    {
488
        $this->delimiter = $delimiter;
489
 
490
        return $this;
491
    }
492
 
493
    public function getEnclosure(): string
494
    {
495
        return $this->enclosure;
496
    }
497
 
498
    public function setEnclosure(string $enclosure): self
499
    {
500
        if ($enclosure == '') {
501
            $enclosure = '"';
502
        }
503
        $this->enclosure = $enclosure;
504
 
505
        return $this;
506
    }
507
 
508
    public function getSheetIndex(): int
509
    {
510
        return $this->sheetIndex;
511
    }
512
 
513
    public function setSheetIndex(int $indexValue): self
514
    {
515
        $this->sheetIndex = $indexValue;
516
 
517
        return $this;
518
    }
519
 
520
    public function setContiguous(bool $contiguous): self
521
    {
522
        $this->contiguous = $contiguous;
523
 
524
        return $this;
525
    }
526
 
527
    public function getContiguous(): bool
528
    {
529
        return $this->contiguous;
530
    }
531
 
532
    public function setEscapeCharacter(string $escapeCharacter): self
533
    {
534
        $this->escapeCharacter = $escapeCharacter;
535
 
536
        return $this;
537
    }
538
 
539
    public function getEscapeCharacter(): string
540
    {
541
        return $this->escapeCharacter;
542
    }
543
 
544
    /**
545
     * Can the current IReader read the file?
546
     */
547
    public function canRead(string $filename): bool
548
    {
549
        // Check if file exists
550
        try {
551
            $this->openFile($filename);
552
        } catch (ReaderException $e) {
553
            return false;
554
        }
555
 
556
        fclose($this->fileHandle);
557
 
558
        // Trust file extension if any
559
        $extension = strtolower(/** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_EXTENSION));
560
        if (in_array($extension, ['csv', 'tsv'])) {
561
            return true;
562
        }
563
 
564
        // Attempt to guess mimetype
565
        $type = mime_content_type($filename);
566
        $supportedTypes = [
567
            'application/csv',
568
            'text/csv',
569
            'text/plain',
570
            'inode/x-empty',
571
        ];
572
 
573
        return in_array($type, $supportedTypes, true);
574
    }
575
 
576
    private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
577
    {
578
        if ($encoding === '') {
579
            $pos = strpos($contents, $compare);
580
            if ($pos !== false && $pos % strlen($compare) === 0) {
581
                $encoding = $setEncoding;
582
            }
583
        }
584
    }
585
 
586
    private static function guessEncodingNoBom(string $filename): string
587
    {
588
        $encoding = '';
589
        $contents = file_get_contents($filename);
590
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
591
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
592
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
593
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
594
        if ($encoding === '' && preg_match('//u', $contents) === 1) {
595
            $encoding = 'UTF-8';
596
        }
597
 
598
        return $encoding;
599
    }
600
 
601
    private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
602
    {
603
        if ($encoding === '') {
604
            if ($compare === substr($first4, 0, strlen($compare))) {
605
                $encoding = $setEncoding;
606
            }
607
        }
608
    }
609
 
610
    private static function guessEncodingBom(string $filename): string
611
    {
612
        $encoding = '';
613
        $first4 = file_get_contents($filename, false, null, 0, 4);
614
        if ($first4 !== false) {
615
            self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
616
            self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
617
            self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
618
            self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
619
            self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
620
        }
621
 
622
        return $encoding;
623
    }
624
 
625
    public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
626
    {
627
        $encoding = self::guessEncodingBom($filename);
628
        if ($encoding === '') {
629
            $encoding = self::guessEncodingNoBom($filename);
630
        }
631
 
632
        return ($encoding === '') ? $dflt : $encoding;
633
    }
634
 
635
    public function setPreserveNullString(bool $value): self
636
    {
637
        $this->preserveNullString = $value;
638
 
639
        return $this;
640
    }
641
 
642
    public function getPreserveNullString(): bool
643
    {
644
        return $this->preserveNullString;
645
    }
646
}