Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Reader\ODS;
6
 
7
use DOMElement;
8
use OpenSpout\Common\Entity\Cell;
9
use OpenSpout\Common\Entity\Row;
10
use OpenSpout\Common\Exception\IOException;
11
use OpenSpout\Reader\Common\XMLProcessor;
12
use OpenSpout\Reader\Exception\InvalidValueException;
13
use OpenSpout\Reader\Exception\IteratorNotRewindableException;
14
use OpenSpout\Reader\Exception\SharedStringNotFoundException;
15
use OpenSpout\Reader\ODS\Helper\CellValueFormatter;
16
use OpenSpout\Reader\RowIteratorInterface;
17
use OpenSpout\Reader\Wrapper\XMLReader;
18
 
19
final class RowIterator implements RowIteratorInterface
20
{
21
    /**
22
     * Definition of XML nodes names used to parse data.
23
     */
24
    public const XML_NODE_TABLE = 'table:table';
25
    public const XML_NODE_ROW = 'table:table-row';
26
    public const XML_NODE_CELL = 'table:table-cell';
27
    public const MAX_COLUMNS_EXCEL = 16384;
28
 
29
    /**
30
     * Definition of XML attribute used to parse data.
31
     */
32
    public const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
33
    public const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
34
 
35
    private readonly Options $options;
36
 
37
    /** @var XMLProcessor Helper Object to process XML nodes */
38
    private readonly XMLProcessor $xmlProcessor;
39
 
40
    /** @var Helper\CellValueFormatter Helper to format cell values */
41
    private readonly Helper\CellValueFormatter $cellValueFormatter;
42
 
43
    /** @var bool Whether the iterator has already been rewound once */
44
    private bool $hasAlreadyBeenRewound = false;
45
 
46
    /** @var Row The currently processed row */
47
    private Row $currentlyProcessedRow;
48
 
49
    /** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
50
    private ?Row $rowBuffer = null;
51
 
52
    /** @var bool Indicates whether all rows have been read */
53
    private bool $hasReachedEndOfFile = false;
54
 
55
    /** @var int Last row index processed (one-based) */
56
    private int $lastRowIndexProcessed = 0;
57
 
58
    /** @var int Row index to be processed next (one-based) */
59
    private int $nextRowIndexToBeProcessed = 1;
60
 
61
    /** @var null|Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */
62
    private ?Cell $lastProcessedCell = null;
63
 
64
    /** @var int Number of times the last processed row should be repeated */
65
    private int $numRowsRepeated = 1;
66
 
67
    /** @var int Number of times the last cell value should be copied to the cells on its right */
68
    private int $numColumnsRepeated = 1;
69
 
70
    /** @var bool Whether at least one cell has been read for the row currently being processed */
71
    private bool $hasAlreadyReadOneCellInCurrentRow = false;
72
 
73
    public function __construct(
74
        Options $options,
75
        CellValueFormatter $cellValueFormatter,
76
        XMLProcessor $xmlProcessor
77
    ) {
78
        $this->cellValueFormatter = $cellValueFormatter;
79
 
80
        // Register all callbacks to process different nodes when reading the XML file
81
        $this->xmlProcessor = $xmlProcessor;
82
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
83
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
84
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
85
        $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
86
        $this->options = $options;
87
    }
88
 
89
    /**
90
     * Rewind the Iterator to the first element.
91
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
92
     *
93
     * @see http://php.net/manual/en/iterator.rewind.php
94
     *
95
     * @throws IteratorNotRewindableException If the iterator is rewound more than once
96
     */
97
    public function rewind(): void
98
    {
99
        // Because sheet and row data is located in the file, we can't rewind both the
100
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
101
        // Therefore, rewinding the row iterator has been disabled.
102
        if ($this->hasAlreadyBeenRewound) {
103
            throw new IteratorNotRewindableException();
104
        }
105
 
106
        $this->hasAlreadyBeenRewound = true;
107
        $this->lastRowIndexProcessed = 0;
108
        $this->nextRowIndexToBeProcessed = 1;
109
        $this->rowBuffer = null;
110
        $this->hasReachedEndOfFile = false;
111
 
112
        $this->next();
113
    }
114
 
115
    /**
116
     * Checks if current position is valid.
117
     *
118
     * @see http://php.net/manual/en/iterator.valid.php
119
     */
120
    public function valid(): bool
121
    {
122
        return !$this->hasReachedEndOfFile;
123
    }
124
 
125
    /**
126
     * Move forward to next element. Empty rows will be skipped.
127
     *
128
     * @see http://php.net/manual/en/iterator.next.php
129
     *
130
     * @throws SharedStringNotFoundException If a shared string was not found
131
     * @throws IOException                   If unable to read the sheet data XML
132
     */
133
    public function next(): void
134
    {
135
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
136
            $this->readDataForNextRow();
137
        }
138
 
139
        ++$this->lastRowIndexProcessed;
140
    }
141
 
142
    /**
143
     * Return the current element, from the buffer.
144
     *
145
     * @see http://php.net/manual/en/iterator.current.php
146
     */
147
    public function current(): Row
148
    {
149
        return $this->rowBuffer;
150
    }
151
 
152
    /**
153
     * Return the key of the current element.
154
     *
155
     * @see http://php.net/manual/en/iterator.key.php
156
     */
157
    public function key(): int
158
    {
159
        return $this->lastRowIndexProcessed;
160
    }
161
 
162
    /**
163
     * Returns whether we need data for the next row to be processed.
164
     * We DO need to read data if:
165
     *   - we have not read any rows yet
166
     *      OR
167
     *   - the next row to be processed immediately follows the last read row.
168
     *
169
     * @return bool whether we need data for the next row to be processed
170
     */
171
    private function doesNeedDataForNextRowToBeProcessed(): bool
172
    {
173
        $hasReadAtLeastOneRow = (0 !== $this->lastRowIndexProcessed);
174
 
175
        return
176
            !$hasReadAtLeastOneRow
177
            || $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1;
178
    }
179
 
180
    /**
181
     * @throws SharedStringNotFoundException If a shared string was not found
182
     * @throws IOException                   If unable to read the sheet data XML
183
     */
184
    private function readDataForNextRow(): void
185
    {
186
        $this->currentlyProcessedRow = new Row([], null);
187
 
188
        $this->xmlProcessor->readUntilStopped();
189
 
190
        $this->rowBuffer = $this->currentlyProcessedRow;
191
    }
192
 
193
    /**
194
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
195
     *
196
     * @return int A return code that indicates what action should the processor take next
197
     */
198
    private function processRowStartingNode(XMLReader $xmlReader): int
199
    {
200
        // Reset data from current row
201
        $this->hasAlreadyReadOneCellInCurrentRow = false;
202
        $this->lastProcessedCell = null;
203
        $this->numColumnsRepeated = 1;
204
        $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
205
 
206
        return XMLProcessor::PROCESSING_CONTINUE;
207
    }
208
 
209
    /**
210
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
211
     *
212
     * @return int A return code that indicates what action should the processor take next
213
     */
214
    private function processCellStartingNode(XMLReader $xmlReader): int
215
    {
216
        $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
217
 
218
        // NOTE: expand() will automatically decode all XML entities of the child nodes
219
        /** @var DOMElement $node */
220
        $node = $xmlReader->expand();
221
        $currentCell = $this->getCell($node);
222
 
223
        // process cell N only after having read cell N+1 (see below why)
224
        if ($this->hasAlreadyReadOneCellInCurrentRow) {
225
            for ($i = 0; $i < $this->numColumnsRepeated; ++$i) {
226
                $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
227
            }
228
        }
229
 
230
        $this->hasAlreadyReadOneCellInCurrentRow = true;
231
        $this->lastProcessedCell = $currentCell;
232
        $this->numColumnsRepeated = $currentNumColumnsRepeated;
233
 
234
        return XMLProcessor::PROCESSING_CONTINUE;
235
    }
236
 
237
    /**
238
     * @return int A return code that indicates what action should the processor take next
239
     */
240
    private function processRowEndingNode(): int
241
    {
242
        $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell);
243
 
244
        // if the fetched row is empty and we don't want to preserve it...
245
        if (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyRow) {
246
            // ... skip it
247
            return XMLProcessor::PROCESSING_CONTINUE;
248
        }
249
 
250
        // if the row is empty, we don't want to return more than one cell
251
        $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
252
        $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells();
253
 
254
        // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
255
        // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
256
        // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
257
        // with a number-columns-repeated value equals to the number of (supported columns - used columns).
258
        // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
259
        // always 16384 cells.
260
        if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
261
            for ($i = 0; $i < $actualNumColumnsRepeated; ++$i) {
262
                $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
263
            }
264
        }
265
 
266
        // If we are processing row N and the row is repeated M times,
267
        // then the next row to be processed will be row (N+M).
268
        $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
269
 
270
        // at this point, we have all the data we need for the row
271
        // so that we can populate the buffer
272
        return XMLProcessor::PROCESSING_STOP;
273
    }
274
 
275
    /**
276
     * @return int A return code that indicates what action should the processor take next
277
     */
278
    private function processTableEndingNode(): int
279
    {
280
        // The closing "</table:table>" marks the end of the file
281
        $this->hasReachedEndOfFile = true;
282
 
283
        return XMLProcessor::PROCESSING_STOP;
284
    }
285
 
286
    /**
287
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
288
     *
289
     * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
290
     */
291
    private function getNumRowsRepeatedForCurrentNode(XMLReader $xmlReader): int
292
    {
293
        $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
294
 
295
        return (null !== $numRowsRepeated) ? (int) $numRowsRepeated : 1;
296
    }
297
 
298
    /**
299
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
300
     *
301
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
302
     */
303
    private function getNumColumnsRepeatedForCurrentNode(XMLReader $xmlReader): int
304
    {
305
        $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
306
 
307
        return (null !== $numColumnsRepeated) ? (int) $numColumnsRepeated : 1;
308
    }
309
 
310
    /**
311
     * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
312
     *
313
     * @return Cell The cell set with the associated with the cell
314
     */
315
    private function getCell(DOMElement $node): Cell
316
    {
317
        try {
318
            $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
319
            $cell = Cell::fromValue($cellValue);
320
        } catch (InvalidValueException $exception) {
321
            $cell = new Cell\ErrorCell($exception->getInvalidValue(), null);
322
        }
323
 
324
        return $cell;
325
    }
326
 
327
    /**
328
     * After finishing processing each cell, a row is considered empty if it contains
329
     * no cells or if the last read cell is empty.
330
     * After finishing processing each cell, the last read cell is not part of the
331
     * row data yet (as we still need to apply the "num-columns-repeated" attribute).
332
     *
333
     * @param null|Cell $lastReadCell The last read cell
334
     *
335
     * @return bool Whether the row is empty
336
     */
337
    private function isEmptyRow(Row $currentRow, ?Cell $lastReadCell): bool
338
    {
339
        return
340
            $currentRow->isEmpty()
341
            && (null === $lastReadCell || $lastReadCell instanceof Cell\EmptyCell);
342
    }
343
}