Proyectos de Subversion Moodle

Rev

Rev 1 | | Comparar con el anterior | Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Reader\XLSX\Manager;
6
 
7
use OpenSpout\Common\Helper\Escaper\XLSX;
8
use OpenSpout\Reader\Common\Manager\RowManager;
9
use OpenSpout\Reader\Common\XMLProcessor;
10
use OpenSpout\Reader\Wrapper\XMLReader;
11
use OpenSpout\Reader\XLSX\Helper\CellValueFormatter;
12
use OpenSpout\Reader\XLSX\Options;
13
use OpenSpout\Reader\XLSX\RowIterator;
14
use OpenSpout\Reader\XLSX\Sheet;
15
use OpenSpout\Reader\XLSX\SheetHeaderReader;
1441 ariadna 16
use OpenSpout\Reader\XLSX\SheetMergeCellsReader;
1 efrain 17
 
18
/**
19
 * @internal
20
 */
21
final class SheetManager
22
{
23
    /**
24
     * Paths of XML files relative to the XLSX file root.
25
     */
26
    public const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
27
    public const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
28
 
29
    /**
30
     * Definition of XML node names used to parse data.
31
     */
32
    public const XML_NODE_WORKBOOK_PROPERTIES = 'workbookPr';
33
    public const XML_NODE_WORKBOOK_VIEW = 'workbookView';
34
    public const XML_NODE_SHEET = 'sheet';
35
    public const XML_NODE_SHEETS = 'sheets';
36
    public const XML_NODE_RELATIONSHIP = 'Relationship';
37
 
38
    /**
39
     * Definition of XML attributes used to parse data.
40
     */
41
    public const XML_ATTRIBUTE_DATE_1904 = 'date1904';
42
    public const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab';
43
    public const XML_ATTRIBUTE_R_ID = 'r:id';
44
    public const XML_ATTRIBUTE_NAME = 'name';
45
    public const XML_ATTRIBUTE_STATE = 'state';
46
    public const XML_ATTRIBUTE_ID = 'Id';
47
    public const XML_ATTRIBUTE_TARGET = 'Target';
48
 
49
    /**
50
     * State value to represent a hidden sheet.
51
     */
52
    public const SHEET_STATE_HIDDEN = 'hidden';
1441 ariadna 53
    public const SHEET_STATE_VERY_HIDDEN = 'veryHidden';
1 efrain 54
 
55
    /** @var string Path of the XLSX file being read */
56
    private readonly string $filePath;
57
 
58
    private readonly Options $options;
59
 
60
    /** @var SharedStringsManager Manages shared strings */
61
    private readonly SharedStringsManager $sharedStringsManager;
62
 
63
    /** @var XLSX Used to unescape XML data */
64
    private readonly XLSX $escaper;
65
 
66
    /** @var Sheet[] List of sheets */
67
    private array $sheets;
68
 
69
    /** @var int Index of the sheet currently read */
70
    private int $currentSheetIndex;
71
 
72
    /** @var int Index of the active sheet (0 by default) */
73
    private int $activeSheetIndex;
74
 
75
    public function __construct(
76
        string $filePath,
77
        Options $options,
78
        SharedStringsManager $sharedStringsManager,
79
        XLSX $escaper
80
    ) {
81
        $this->filePath = $filePath;
82
        $this->options = $options;
83
        $this->sharedStringsManager = $sharedStringsManager;
84
        $this->escaper = $escaper;
85
    }
86
 
87
    /**
88
     * Returns the sheets metadata of the file located at the previously given file path.
89
     * The paths to the sheets' data are read from the [Content_Types].xml file.
90
     *
91
     * @return Sheet[] Sheets within the XLSX file
92
     */
93
    public function getSheets(): array
94
    {
95
        $this->sheets = [];
96
        $this->currentSheetIndex = 0;
97
        $this->activeSheetIndex = 0; // By default, the first sheet is active
98
 
99
        $xmlReader = new XMLReader();
100
        $xmlProcessor = new XMLProcessor($xmlReader);
101
 
102
        $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_PROPERTIES, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookPropertiesStartingNode']);
103
        $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_VIEW, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookViewStartingNode']);
104
        $xmlProcessor->registerCallback(self::XML_NODE_SHEET, XMLProcessor::NODE_TYPE_START, [$this, 'processSheetStartingNode']);
105
        $xmlProcessor->registerCallback(self::XML_NODE_SHEETS, XMLProcessor::NODE_TYPE_END, [$this, 'processSheetsEndingNode']);
106
 
107
        if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
108
            $xmlProcessor->readUntilStopped();
109
            $xmlReader->close();
110
        }
111
 
112
        return $this->sheets;
113
    }
114
 
115
    /**
116
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node
117
     *
118
     * @return int A return code that indicates what action should the processor take next
119
     */
120
    private function processWorkbookPropertiesStartingNode(XMLReader $xmlReader): int
121
    {
122
        // Using "filter_var($x, FILTER_VALIDATE_BOOLEAN)" here because the value of the "date1904" attribute
123
        // may be the string "false", that is not mapped to the boolean "false" by default...
124
        $shouldUse1904Dates = filter_var($xmlReader->getAttribute(self::XML_ATTRIBUTE_DATE_1904), FILTER_VALIDATE_BOOLEAN);
125
        $this->options->SHOULD_USE_1904_DATES = $shouldUse1904Dates;
126
 
127
        return XMLProcessor::PROCESSING_CONTINUE;
128
    }
129
 
130
    /**
131
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node
132
     *
133
     * @return int A return code that indicates what action should the processor take next
134
     */
135
    private function processWorkbookViewStartingNode(XMLReader $xmlReader): int
136
    {
137
        // The "workbookView" node is located before "sheet" nodes, ensuring that
138
        // the active sheet is known before parsing sheets data.
139
        $this->activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB);
140
 
141
        return XMLProcessor::PROCESSING_CONTINUE;
142
    }
143
 
144
    /**
145
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node
146
     *
147
     * @return int A return code that indicates what action should the processor take next
148
     */
149
    private function processSheetStartingNode(XMLReader $xmlReader): int
150
    {
151
        $isSheetActive = ($this->currentSheetIndex === $this->activeSheetIndex);
152
        $this->sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $this->currentSheetIndex, $isSheetActive);
153
        ++$this->currentSheetIndex;
154
 
155
        return XMLProcessor::PROCESSING_CONTINUE;
156
    }
157
 
158
    /**
159
     * @return int A return code that indicates what action should the processor take next
160
     */
161
    private function processSheetsEndingNode(): int
162
    {
163
        return XMLProcessor::PROCESSING_STOP;
164
    }
165
 
166
    /**
167
     * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml".
168
     * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
169
     * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
170
     *
171
     * @param XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
172
     * @param int       $sheetIndexZeroBased  Index of the sheet, based on order of appearance in the workbook (zero-based)
173
     * @param bool      $isSheetActive        Whether this sheet was defined as active
174
     *
175
     * @return Sheet Sheet instance
176
     */
177
    private function getSheetFromSheetXMLNode(XMLReader $xmlReaderOnSheetNode, int $sheetIndexZeroBased, bool $isSheetActive): Sheet
178
    {
179
        $sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID);
180
        \assert(null !== $sheetId);
181
 
182
        $sheetState = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_STATE);
1441 ariadna 183
        $isSheetVisible = (self::SHEET_STATE_HIDDEN !== $sheetState && self::SHEET_STATE_VERY_HIDDEN !== $sheetState);
1 efrain 184
 
185
        $escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME);
186
        \assert(null !== $escapedSheetName);
187
        $sheetName = $this->escaper->unescape($escapedSheetName);
188
 
189
        $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
190
 
1441 ariadna 191
        $mergeCells = [];
192
        if ($this->options->SHOULD_LOAD_MERGE_CELLS) {
193
            $mergeCells = (new SheetMergeCellsReader(
194
                $this->filePath,
195
                $sheetDataXMLFilePath,
196
                $xmlReader = new XMLReader(),
197
                new XMLProcessor($xmlReader)
198
            ))->getMergeCells();
199
        }
200
 
1 efrain 201
        return new Sheet(
202
            $this->createRowIterator($this->filePath, $sheetDataXMLFilePath, $this->options, $this->sharedStringsManager),
203
            $this->createSheetHeaderReader($this->filePath, $sheetDataXMLFilePath),
204
            $sheetIndexZeroBased,
205
            $sheetName,
206
            $isSheetActive,
1441 ariadna 207
            $isSheetVisible,
208
            $mergeCells
1 efrain 209
        );
210
    }
211
 
212
    /**
213
     * @param string $sheetId The sheet ID, as defined in "workbook.xml"
214
     *
215
     * @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID
216
     */
217
    private function getSheetDataXMLFilePathForSheetId(string $sheetId): string
218
    {
219
        $sheetDataXMLFilePath = '';
220
 
221
        // find the file path of the sheet, by looking at the "workbook.xml.res" file
222
        $xmlReader = new XMLReader();
223
        if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
224
            while ($xmlReader->read()) {
225
                if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) {
226
                    $relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID);
227
 
228
                    if ($relationshipSheetId === $sheetId) {
229
                        // In workbook.xml.rels, it is only "worksheets/sheet1.xml"
230
                        // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
231
                        $sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
232
                        \assert(null !== $sheetDataXMLFilePath);
233
 
234
                        // sometimes, the sheet data file path already contains "/xl/"...
235
                        if (!str_starts_with($sheetDataXMLFilePath, '/xl/')) {
236
                            $sheetDataXMLFilePath = '/xl/'.$sheetDataXMLFilePath;
237
 
238
                            break;
239
                        }
240
                    }
241
                }
242
            }
243
 
244
            $xmlReader->close();
245
        }
246
 
247
        return $sheetDataXMLFilePath;
248
    }
249
 
250
    private function createRowIterator(
251
        string $filePath,
252
        string $sheetDataXMLFilePath,
253
        Options $options,
254
        SharedStringsManager $sharedStringsManager
255
    ): RowIterator {
256
        $workbookRelationshipsManager = new WorkbookRelationshipsManager($filePath);
257
        $styleManager = new StyleManager(
258
            $filePath,
259
            $workbookRelationshipsManager->hasStylesXMLFile()
260
                ? $workbookRelationshipsManager->getStylesXMLFilePath()
261
                : null
262
        );
263
 
264
        $cellValueFormatter = new CellValueFormatter(
265
            $sharedStringsManager,
266
            $styleManager,
267
            $options->SHOULD_FORMAT_DATES,
268
            $options->SHOULD_USE_1904_DATES,
269
            new XLSX()
270
        );
271
 
272
        return new RowIterator(
273
            $filePath,
274
            $sheetDataXMLFilePath,
275
            $options->SHOULD_PRESERVE_EMPTY_ROWS,
1441 ariadna 276
            $xmlReader = new XMLReader(),
1 efrain 277
            new XMLProcessor($xmlReader),
278
            $cellValueFormatter,
279
            new RowManager()
280
        );
281
    }
282
 
283
    private function createSheetHeaderReader(
284
        string $filePath,
285
        string $sheetDataXMLFilePath
286
    ): SheetHeaderReader {
287
        $xmlReader = new XMLReader();
288
 
289
        return new SheetHeaderReader(
290
            $filePath,
291
            $sheetDataXMLFilePath,
292
            $xmlReader,
293
            new XMLProcessor($xmlReader)
294
        );
295
    }
296
}