Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Reader\CSV;
6
 
7
use OpenSpout\Common\Entity\Cell;
8
use OpenSpout\Common\Entity\Row;
9
use OpenSpout\Common\Exception\EncodingConversionException;
10
use OpenSpout\Common\Helper\EncodingHelper;
11
use OpenSpout\Reader\RowIteratorInterface;
12
 
13
/**
14
 * Iterate over CSV rows.
15
 */
16
final class RowIterator implements RowIteratorInterface
17
{
18
    /**
19
     * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accommodates for very long lines).
20
     */
21
    public const MAX_READ_BYTES_PER_LINE = 0;
22
 
23
    /** @var null|resource Pointer to the CSV file to read */
24
    private $filePointer;
25
 
26
    /** @var int Number of read rows */
27
    private int $numReadRows = 0;
28
 
29
    /** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
30
    private ?Row $rowBuffer = null;
31
 
32
    /** @var bool Indicates whether all rows have been read */
33
    private bool $hasReachedEndOfFile = false;
34
 
35
    private readonly Options $options;
36
 
37
    /** @var EncodingHelper Helper to work with different encodings */
38
    private readonly EncodingHelper $encodingHelper;
39
 
40
    /**
41
     * @param resource $filePointer Pointer to the CSV file to read
42
     */
43
    public function __construct(
44
        $filePointer,
45
        Options $options,
46
        EncodingHelper $encodingHelper
47
    ) {
48
        $this->filePointer = $filePointer;
49
        $this->options = $options;
50
        $this->encodingHelper = $encodingHelper;
51
    }
52
 
53
    /**
54
     * Rewind the Iterator to the first element.
55
     *
56
     * @see http://php.net/manual/en/iterator.rewind.php
57
     */
58
    public function rewind(): void
59
    {
60
        $this->rewindAndSkipBom();
61
 
62
        $this->numReadRows = 0;
63
        $this->rowBuffer = null;
64
 
65
        $this->next();
66
    }
67
 
68
    /**
69
     * Checks if current position is valid.
70
     *
71
     * @see http://php.net/manual/en/iterator.valid.php
72
     */
73
    public function valid(): bool
74
    {
75
        return null !== $this->filePointer && !$this->hasReachedEndOfFile;
76
    }
77
 
78
    /**
79
     * Move forward to next element. Reads data for the next unprocessed row.
80
     *
81
     * @see http://php.net/manual/en/iterator.next.php
82
     *
83
     * @throws EncodingConversionException If unable to convert data to UTF-8
84
     */
85
    public function next(): void
86
    {
87
        $this->hasReachedEndOfFile = feof($this->filePointer);
88
 
89
        if (!$this->hasReachedEndOfFile) {
90
            $this->readDataForNextRow();
91
        }
92
    }
93
 
94
    /**
95
     * Return the current element from the buffer.
96
     *
97
     * @see http://php.net/manual/en/iterator.current.php
98
     */
99
    public function current(): ?Row
100
    {
101
        return $this->rowBuffer;
102
    }
103
 
104
    /**
105
     * Return the key of the current element.
106
     *
107
     * @see http://php.net/manual/en/iterator.key.php
108
     */
109
    public function key(): int
110
    {
111
        return $this->numReadRows;
112
    }
113
 
114
    /**
115
     * This rewinds and skips the BOM if inserted at the beginning of the file
116
     * by moving the file pointer after it, so that it is not read.
117
     */
118
    private function rewindAndSkipBom(): void
119
    {
120
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->options->ENCODING);
121
 
122
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
123
        fseek($this->filePointer, $byteOffsetToSkipBom);
124
    }
125
 
126
    /**
127
     * @throws EncodingConversionException If unable to convert data to UTF-8
128
     */
129
    private function readDataForNextRow(): void
130
    {
131
        do {
132
            $rowData = $this->getNextUTF8EncodedRow();
133
        } while ($this->shouldReadNextRow($rowData));
134
 
135
        if (false !== $rowData) {
136
            // array_map will replace NULL values by empty strings
137
            $rowDataBufferAsArray = array_map('\\strval', $rowData);
138
            $this->rowBuffer = new Row(array_map(static function ($cellValue) {
139
                return Cell::fromValue($cellValue);
140
            }, $rowDataBufferAsArray), null);
141
            ++$this->numReadRows;
142
        } else {
143
            // If we reach this point, it means end of file was reached.
144
            // This happens when the last lines are empty lines.
145
            $this->hasReachedEndOfFile = true;
146
        }
147
    }
148
 
149
    /**
150
     * @param array<int, null|string>|bool $currentRowData
151
     *
152
     * @return bool Whether the data for the current row can be returned or if we need to keep reading
153
     */
154
    private function shouldReadNextRow($currentRowData): bool
155
    {
156
        $hasSuccessfullyFetchedRowData = (false !== $currentRowData);
157
        $hasNowReachedEndOfFile = feof($this->filePointer);
158
        $isEmptyLine = $this->isEmptyLine($currentRowData);
159
 
160
        return
161
            (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile)
162
            || (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyLine);
163
    }
164
 
165
    /**
166
     * Returns the next row, converted if necessary to UTF-8.
167
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
168
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes).
169
     *
170
     * @return array<int, null|string>|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
171
     *
172
     * @throws EncodingConversionException If unable to convert data to UTF-8
173
     */
174
    private function getNextUTF8EncodedRow(): array|false
175
    {
176
        $encodedRowData = fgetcsv(
177
            $this->filePointer,
178
            self::MAX_READ_BYTES_PER_LINE,
179
            $this->options->FIELD_DELIMITER,
180
            $this->options->FIELD_ENCLOSURE,
181
            ''
182
        );
183
        if (false === $encodedRowData) {
184
            return false;
185
        }
186
 
187
        foreach ($encodedRowData as $cellIndex => $cellValue) {
188
            switch ($this->options->ENCODING) {
189
                case EncodingHelper::ENCODING_UTF16_LE:
190
                case EncodingHelper::ENCODING_UTF32_LE:
191
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
192
                    $cellValue = ltrim($cellValue);
193
 
194
                    break;
195
 
196
                case EncodingHelper::ENCODING_UTF16_BE:
197
                case EncodingHelper::ENCODING_UTF32_BE:
198
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
199
                    $cellValue = rtrim($cellValue);
200
 
201
                    break;
202
            }
203
 
204
            $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->options->ENCODING);
205
        }
206
 
207
        return $encodedRowData;
208
    }
209
 
210
    /**
211
     * @param array<int, null|string>|bool $lineData Array containing the cells value for the line
212
     *
213
     * @return bool Whether the given line is empty
214
     */
215
    private function isEmptyLine($lineData): bool
216
    {
217
        return \is_array($lineData) && 1 === \count($lineData) && null === $lineData[0];
218
    }
219
}