| 1 | efrain | 1 | <?php
 | 
        
           |  |  | 2 |   | 
        
           |  |  | 3 | declare(strict_types=1);
 | 
        
           |  |  | 4 |   | 
        
           |  |  | 5 | namespace OpenSpout\Reader\CSV;
 | 
        
           |  |  | 6 |   | 
        
           |  |  | 7 | use OpenSpout\Common\Entity\Cell;
 | 
        
           |  |  | 8 | use OpenSpout\Common\Entity\Row;
 | 
        
           |  |  | 9 | use OpenSpout\Common\Exception\EncodingConversionException;
 | 
        
           |  |  | 10 | use OpenSpout\Common\Helper\EncodingHelper;
 | 
        
           |  |  | 11 | use OpenSpout\Reader\RowIteratorInterface;
 | 
        
           |  |  | 12 |   | 
        
           |  |  | 13 | /**
 | 
        
           |  |  | 14 |  * Iterate over CSV rows.
 | 
        
           |  |  | 15 |  */
 | 
        
           |  |  | 16 | final class RowIterator implements RowIteratorInterface
 | 
        
           |  |  | 17 | {
 | 
        
           |  |  | 18 |     /**
 | 
        
           |  |  | 19 |      * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accommodates for very long lines).
 | 
        
           |  |  | 20 |      */
 | 
        
           |  |  | 21 |     public const MAX_READ_BYTES_PER_LINE = 0;
 | 
        
           |  |  | 22 |   | 
        
           |  |  | 23 |     /** @var null|resource Pointer to the CSV file to read */
 | 
        
           |  |  | 24 |     private $filePointer;
 | 
        
           |  |  | 25 |   | 
        
           |  |  | 26 |     /** @var int Number of read rows */
 | 
        
           |  |  | 27 |     private int $numReadRows = 0;
 | 
        
           |  |  | 28 |   | 
        
           |  |  | 29 |     /** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
 | 
        
           |  |  | 30 |     private ?Row $rowBuffer = null;
 | 
        
           |  |  | 31 |   | 
        
           |  |  | 32 |     /** @var bool Indicates whether all rows have been read */
 | 
        
           |  |  | 33 |     private bool $hasReachedEndOfFile = false;
 | 
        
           |  |  | 34 |   | 
        
           |  |  | 35 |     private readonly Options $options;
 | 
        
           |  |  | 36 |   | 
        
           |  |  | 37 |     /** @var EncodingHelper Helper to work with different encodings */
 | 
        
           |  |  | 38 |     private readonly EncodingHelper $encodingHelper;
 | 
        
           |  |  | 39 |   | 
        
           |  |  | 40 |     /**
 | 
        
           |  |  | 41 |      * @param resource $filePointer Pointer to the CSV file to read
 | 
        
           |  |  | 42 |      */
 | 
        
           |  |  | 43 |     public function __construct(
 | 
        
           |  |  | 44 |         $filePointer,
 | 
        
           |  |  | 45 |         Options $options,
 | 
        
           |  |  | 46 |         EncodingHelper $encodingHelper
 | 
        
           |  |  | 47 |     ) {
 | 
        
           |  |  | 48 |         $this->filePointer = $filePointer;
 | 
        
           |  |  | 49 |         $this->options = $options;
 | 
        
           |  |  | 50 |         $this->encodingHelper = $encodingHelper;
 | 
        
           |  |  | 51 |     }
 | 
        
           |  |  | 52 |   | 
        
           |  |  | 53 |     /**
 | 
        
           |  |  | 54 |      * Rewind the Iterator to the first element.
 | 
        
           |  |  | 55 |      *
 | 
        
           |  |  | 56 |      * @see http://php.net/manual/en/iterator.rewind.php
 | 
        
           |  |  | 57 |      */
 | 
        
           |  |  | 58 |     public function rewind(): void
 | 
        
           |  |  | 59 |     {
 | 
        
           |  |  | 60 |         $this->rewindAndSkipBom();
 | 
        
           |  |  | 61 |   | 
        
           |  |  | 62 |         $this->numReadRows = 0;
 | 
        
           |  |  | 63 |         $this->rowBuffer = null;
 | 
        
           |  |  | 64 |   | 
        
           |  |  | 65 |         $this->next();
 | 
        
           |  |  | 66 |     }
 | 
        
           |  |  | 67 |   | 
        
           |  |  | 68 |     /**
 | 
        
           |  |  | 69 |      * Checks if current position is valid.
 | 
        
           |  |  | 70 |      *
 | 
        
           |  |  | 71 |      * @see http://php.net/manual/en/iterator.valid.php
 | 
        
           |  |  | 72 |      */
 | 
        
           |  |  | 73 |     public function valid(): bool
 | 
        
           |  |  | 74 |     {
 | 
        
           |  |  | 75 |         return null !== $this->filePointer && !$this->hasReachedEndOfFile;
 | 
        
           |  |  | 76 |     }
 | 
        
           |  |  | 77 |   | 
        
           |  |  | 78 |     /**
 | 
        
           |  |  | 79 |      * Move forward to next element. Reads data for the next unprocessed row.
 | 
        
           |  |  | 80 |      *
 | 
        
           |  |  | 81 |      * @see http://php.net/manual/en/iterator.next.php
 | 
        
           |  |  | 82 |      *
 | 
        
           |  |  | 83 |      * @throws EncodingConversionException If unable to convert data to UTF-8
 | 
        
           |  |  | 84 |      */
 | 
        
           |  |  | 85 |     public function next(): void
 | 
        
           |  |  | 86 |     {
 | 
        
           |  |  | 87 |         $this->hasReachedEndOfFile = feof($this->filePointer);
 | 
        
           |  |  | 88 |   | 
        
           |  |  | 89 |         if (!$this->hasReachedEndOfFile) {
 | 
        
           |  |  | 90 |             $this->readDataForNextRow();
 | 
        
           |  |  | 91 |         }
 | 
        
           |  |  | 92 |     }
 | 
        
           |  |  | 93 |   | 
        
           |  |  | 94 |     /**
 | 
        
           |  |  | 95 |      * Return the current element from the buffer.
 | 
        
           |  |  | 96 |      *
 | 
        
           |  |  | 97 |      * @see http://php.net/manual/en/iterator.current.php
 | 
        
           |  |  | 98 |      */
 | 
        
           |  |  | 99 |     public function current(): ?Row
 | 
        
           |  |  | 100 |     {
 | 
        
           |  |  | 101 |         return $this->rowBuffer;
 | 
        
           |  |  | 102 |     }
 | 
        
           |  |  | 103 |   | 
        
           |  |  | 104 |     /**
 | 
        
           |  |  | 105 |      * Return the key of the current element.
 | 
        
           |  |  | 106 |      *
 | 
        
           |  |  | 107 |      * @see http://php.net/manual/en/iterator.key.php
 | 
        
           |  |  | 108 |      */
 | 
        
           |  |  | 109 |     public function key(): int
 | 
        
           |  |  | 110 |     {
 | 
        
           |  |  | 111 |         return $this->numReadRows;
 | 
        
           |  |  | 112 |     }
 | 
        
           |  |  | 113 |   | 
        
           |  |  | 114 |     /**
 | 
        
           |  |  | 115 |      * This rewinds and skips the BOM if inserted at the beginning of the file
 | 
        
           |  |  | 116 |      * by moving the file pointer after it, so that it is not read.
 | 
        
           |  |  | 117 |      */
 | 
        
           |  |  | 118 |     private function rewindAndSkipBom(): void
 | 
        
           |  |  | 119 |     {
 | 
        
           |  |  | 120 |         $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->options->ENCODING);
 | 
        
           |  |  | 121 |   | 
        
           |  |  | 122 |         // sets the cursor after the BOM (0 means no BOM, so rewind it)
 | 
        
           |  |  | 123 |         fseek($this->filePointer, $byteOffsetToSkipBom);
 | 
        
           |  |  | 124 |     }
 | 
        
           |  |  | 125 |   | 
        
           |  |  | 126 |     /**
 | 
        
           |  |  | 127 |      * @throws EncodingConversionException If unable to convert data to UTF-8
 | 
        
           |  |  | 128 |      */
 | 
        
           |  |  | 129 |     private function readDataForNextRow(): void
 | 
        
           |  |  | 130 |     {
 | 
        
           |  |  | 131 |         do {
 | 
        
           |  |  | 132 |             $rowData = $this->getNextUTF8EncodedRow();
 | 
        
           |  |  | 133 |         } while ($this->shouldReadNextRow($rowData));
 | 
        
           |  |  | 134 |   | 
        
           |  |  | 135 |         if (false !== $rowData) {
 | 
        
           |  |  | 136 |             // array_map will replace NULL values by empty strings
 | 
        
           | 1441 | ariadna | 137 |             $rowDataBufferAsArray = array_map('\strval', $rowData);
 | 
        
           | 1 | efrain | 138 |             $this->rowBuffer = new Row(array_map(static function ($cellValue) {
 | 
        
           |  |  | 139 |                 return Cell::fromValue($cellValue);
 | 
        
           |  |  | 140 |             }, $rowDataBufferAsArray), null);
 | 
        
           |  |  | 141 |             ++$this->numReadRows;
 | 
        
           |  |  | 142 |         } else {
 | 
        
           |  |  | 143 |             // If we reach this point, it means end of file was reached.
 | 
        
           |  |  | 144 |             // This happens when the last lines are empty lines.
 | 
        
           |  |  | 145 |             $this->hasReachedEndOfFile = true;
 | 
        
           |  |  | 146 |         }
 | 
        
           |  |  | 147 |     }
 | 
        
           |  |  | 148 |   | 
        
           |  |  | 149 |     /**
 | 
        
           |  |  | 150 |      * @param array<int, null|string>|bool $currentRowData
 | 
        
           |  |  | 151 |      *
 | 
        
           |  |  | 152 |      * @return bool Whether the data for the current row can be returned or if we need to keep reading
 | 
        
           |  |  | 153 |      */
 | 
        
           |  |  | 154 |     private function shouldReadNextRow($currentRowData): bool
 | 
        
           |  |  | 155 |     {
 | 
        
           |  |  | 156 |         $hasSuccessfullyFetchedRowData = (false !== $currentRowData);
 | 
        
           |  |  | 157 |         $hasNowReachedEndOfFile = feof($this->filePointer);
 | 
        
           |  |  | 158 |         $isEmptyLine = $this->isEmptyLine($currentRowData);
 | 
        
           |  |  | 159 |   | 
        
           |  |  | 160 |         return
 | 
        
           |  |  | 161 |             (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile)
 | 
        
           |  |  | 162 |             || (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyLine);
 | 
        
           |  |  | 163 |     }
 | 
        
           |  |  | 164 |   | 
        
           |  |  | 165 |     /**
 | 
        
           |  |  | 166 |      * Returns the next row, converted if necessary to UTF-8.
 | 
        
           |  |  | 167 |      * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
 | 
        
           |  |  | 168 |      * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes).
 | 
        
           |  |  | 169 |      *
 | 
        
           |  |  | 170 |      * @return array<int, null|string>|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
 | 
        
           |  |  | 171 |      *
 | 
        
           |  |  | 172 |      * @throws EncodingConversionException If unable to convert data to UTF-8
 | 
        
           |  |  | 173 |      */
 | 
        
           |  |  | 174 |     private function getNextUTF8EncodedRow(): array|false
 | 
        
           |  |  | 175 |     {
 | 
        
           |  |  | 176 |         $encodedRowData = fgetcsv(
 | 
        
           |  |  | 177 |             $this->filePointer,
 | 
        
           |  |  | 178 |             self::MAX_READ_BYTES_PER_LINE,
 | 
        
           |  |  | 179 |             $this->options->FIELD_DELIMITER,
 | 
        
           |  |  | 180 |             $this->options->FIELD_ENCLOSURE,
 | 
        
           |  |  | 181 |             ''
 | 
        
           |  |  | 182 |         );
 | 
        
           |  |  | 183 |         if (false === $encodedRowData) {
 | 
        
           |  |  | 184 |             return false;
 | 
        
           |  |  | 185 |         }
 | 
        
           |  |  | 186 |   | 
        
           |  |  | 187 |         foreach ($encodedRowData as $cellIndex => $cellValue) {
 | 
        
           |  |  | 188 |             switch ($this->options->ENCODING) {
 | 
        
           |  |  | 189 |                 case EncodingHelper::ENCODING_UTF16_LE:
 | 
        
           |  |  | 190 |                 case EncodingHelper::ENCODING_UTF32_LE:
 | 
        
           |  |  | 191 |                     // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
 | 
        
           |  |  | 192 |                     $cellValue = ltrim($cellValue);
 | 
        
           |  |  | 193 |   | 
        
           |  |  | 194 |                     break;
 | 
        
           |  |  | 195 |   | 
        
           |  |  | 196 |                 case EncodingHelper::ENCODING_UTF16_BE:
 | 
        
           |  |  | 197 |                 case EncodingHelper::ENCODING_UTF32_BE:
 | 
        
           |  |  | 198 |                     // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
 | 
        
           |  |  | 199 |                     $cellValue = rtrim($cellValue);
 | 
        
           |  |  | 200 |   | 
        
           |  |  | 201 |                     break;
 | 
        
           |  |  | 202 |             }
 | 
        
           |  |  | 203 |   | 
        
           |  |  | 204 |             $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->options->ENCODING);
 | 
        
           |  |  | 205 |         }
 | 
        
           |  |  | 206 |   | 
        
           |  |  | 207 |         return $encodedRowData;
 | 
        
           |  |  | 208 |     }
 | 
        
           |  |  | 209 |   | 
        
           |  |  | 210 |     /**
 | 
        
           |  |  | 211 |      * @param array<int, null|string>|bool $lineData Array containing the cells value for the line
 | 
        
           |  |  | 212 |      *
 | 
        
           |  |  | 213 |      * @return bool Whether the given line is empty
 | 
        
           |  |  | 214 |      */
 | 
        
           |  |  | 215 |     private function isEmptyLine($lineData): bool
 | 
        
           |  |  | 216 |     {
 | 
        
           |  |  | 217 |         return \is_array($lineData) && 1 === \count($lineData) && null === $lineData[0];
 | 
        
           |  |  | 218 |     }
 | 
        
           |  |  | 219 | }
 |