Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Reader\XLSX\Manager;
6
 
7
use OpenSpout\Common\Helper\Escaper\XLSX;
8
use OpenSpout\Reader\Common\Manager\RowManager;
9
use OpenSpout\Reader\Common\XMLProcessor;
10
use OpenSpout\Reader\Wrapper\XMLReader;
11
use OpenSpout\Reader\XLSX\Helper\CellValueFormatter;
12
use OpenSpout\Reader\XLSX\Options;
13
use OpenSpout\Reader\XLSX\RowIterator;
14
use OpenSpout\Reader\XLSX\Sheet;
15
use OpenSpout\Reader\XLSX\SheetHeaderReader;
16
 
17
/**
18
 * @internal
19
 */
20
final class SheetManager
21
{
22
    /**
23
     * Paths of XML files relative to the XLSX file root.
24
     */
25
    public const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
26
    public const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
27
 
28
    /**
29
     * Definition of XML node names used to parse data.
30
     */
31
    public const XML_NODE_WORKBOOK_PROPERTIES = 'workbookPr';
32
    public const XML_NODE_WORKBOOK_VIEW = 'workbookView';
33
    public const XML_NODE_SHEET = 'sheet';
34
    public const XML_NODE_SHEETS = 'sheets';
35
    public const XML_NODE_RELATIONSHIP = 'Relationship';
36
 
37
    /**
38
     * Definition of XML attributes used to parse data.
39
     */
40
    public const XML_ATTRIBUTE_DATE_1904 = 'date1904';
41
    public const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab';
42
    public const XML_ATTRIBUTE_R_ID = 'r:id';
43
    public const XML_ATTRIBUTE_NAME = 'name';
44
    public const XML_ATTRIBUTE_STATE = 'state';
45
    public const XML_ATTRIBUTE_ID = 'Id';
46
    public const XML_ATTRIBUTE_TARGET = 'Target';
47
 
48
    /**
49
     * State value to represent a hidden sheet.
50
     */
51
    public const SHEET_STATE_HIDDEN = 'hidden';
52
 
53
    /** @var string Path of the XLSX file being read */
54
    private readonly string $filePath;
55
 
56
    private readonly Options $options;
57
 
58
    /** @var SharedStringsManager Manages shared strings */
59
    private readonly SharedStringsManager $sharedStringsManager;
60
 
61
    /** @var XLSX Used to unescape XML data */
62
    private readonly XLSX $escaper;
63
 
64
    /** @var Sheet[] List of sheets */
65
    private array $sheets;
66
 
67
    /** @var int Index of the sheet currently read */
68
    private int $currentSheetIndex;
69
 
70
    /** @var int Index of the active sheet (0 by default) */
71
    private int $activeSheetIndex;
72
 
73
    public function __construct(
74
        string $filePath,
75
        Options $options,
76
        SharedStringsManager $sharedStringsManager,
77
        XLSX $escaper
78
    ) {
79
        $this->filePath = $filePath;
80
        $this->options = $options;
81
        $this->sharedStringsManager = $sharedStringsManager;
82
        $this->escaper = $escaper;
83
    }
84
 
85
    /**
86
     * Returns the sheets metadata of the file located at the previously given file path.
87
     * The paths to the sheets' data are read from the [Content_Types].xml file.
88
     *
89
     * @return Sheet[] Sheets within the XLSX file
90
     */
91
    public function getSheets(): array
92
    {
93
        $this->sheets = [];
94
        $this->currentSheetIndex = 0;
95
        $this->activeSheetIndex = 0; // By default, the first sheet is active
96
 
97
        $xmlReader = new XMLReader();
98
        $xmlProcessor = new XMLProcessor($xmlReader);
99
 
100
        $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_PROPERTIES, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookPropertiesStartingNode']);
101
        $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_VIEW, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookViewStartingNode']);
102
        $xmlProcessor->registerCallback(self::XML_NODE_SHEET, XMLProcessor::NODE_TYPE_START, [$this, 'processSheetStartingNode']);
103
        $xmlProcessor->registerCallback(self::XML_NODE_SHEETS, XMLProcessor::NODE_TYPE_END, [$this, 'processSheetsEndingNode']);
104
 
105
        if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
106
            $xmlProcessor->readUntilStopped();
107
            $xmlReader->close();
108
        }
109
 
110
        return $this->sheets;
111
    }
112
 
113
    /**
114
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node
115
     *
116
     * @return int A return code that indicates what action should the processor take next
117
     */
118
    private function processWorkbookPropertiesStartingNode(XMLReader $xmlReader): int
119
    {
120
        // Using "filter_var($x, FILTER_VALIDATE_BOOLEAN)" here because the value of the "date1904" attribute
121
        // may be the string "false", that is not mapped to the boolean "false" by default...
122
        $shouldUse1904Dates = filter_var($xmlReader->getAttribute(self::XML_ATTRIBUTE_DATE_1904), FILTER_VALIDATE_BOOLEAN);
123
        $this->options->SHOULD_USE_1904_DATES = $shouldUse1904Dates;
124
 
125
        return XMLProcessor::PROCESSING_CONTINUE;
126
    }
127
 
128
    /**
129
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node
130
     *
131
     * @return int A return code that indicates what action should the processor take next
132
     */
133
    private function processWorkbookViewStartingNode(XMLReader $xmlReader): int
134
    {
135
        // The "workbookView" node is located before "sheet" nodes, ensuring that
136
        // the active sheet is known before parsing sheets data.
137
        $this->activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB);
138
 
139
        return XMLProcessor::PROCESSING_CONTINUE;
140
    }
141
 
142
    /**
143
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node
144
     *
145
     * @return int A return code that indicates what action should the processor take next
146
     */
147
    private function processSheetStartingNode(XMLReader $xmlReader): int
148
    {
149
        $isSheetActive = ($this->currentSheetIndex === $this->activeSheetIndex);
150
        $this->sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $this->currentSheetIndex, $isSheetActive);
151
        ++$this->currentSheetIndex;
152
 
153
        return XMLProcessor::PROCESSING_CONTINUE;
154
    }
155
 
156
    /**
157
     * @return int A return code that indicates what action should the processor take next
158
     */
159
    private function processSheetsEndingNode(): int
160
    {
161
        return XMLProcessor::PROCESSING_STOP;
162
    }
163
 
164
    /**
165
     * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml".
166
     * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
167
     * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
168
     *
169
     * @param XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
170
     * @param int       $sheetIndexZeroBased  Index of the sheet, based on order of appearance in the workbook (zero-based)
171
     * @param bool      $isSheetActive        Whether this sheet was defined as active
172
     *
173
     * @return Sheet Sheet instance
174
     */
175
    private function getSheetFromSheetXMLNode(XMLReader $xmlReaderOnSheetNode, int $sheetIndexZeroBased, bool $isSheetActive): Sheet
176
    {
177
        $sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID);
178
        \assert(null !== $sheetId);
179
 
180
        $sheetState = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_STATE);
181
        $isSheetVisible = (self::SHEET_STATE_HIDDEN !== $sheetState);
182
 
183
        $escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME);
184
        \assert(null !== $escapedSheetName);
185
        $sheetName = $this->escaper->unescape($escapedSheetName);
186
 
187
        $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
188
 
189
        return new Sheet(
190
            $this->createRowIterator($this->filePath, $sheetDataXMLFilePath, $this->options, $this->sharedStringsManager),
191
            $this->createSheetHeaderReader($this->filePath, $sheetDataXMLFilePath),
192
            $sheetIndexZeroBased,
193
            $sheetName,
194
            $isSheetActive,
195
            $isSheetVisible
196
        );
197
    }
198
 
199
    /**
200
     * @param string $sheetId The sheet ID, as defined in "workbook.xml"
201
     *
202
     * @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID
203
     */
204
    private function getSheetDataXMLFilePathForSheetId(string $sheetId): string
205
    {
206
        $sheetDataXMLFilePath = '';
207
 
208
        // find the file path of the sheet, by looking at the "workbook.xml.res" file
209
        $xmlReader = new XMLReader();
210
        if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
211
            while ($xmlReader->read()) {
212
                if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) {
213
                    $relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID);
214
 
215
                    if ($relationshipSheetId === $sheetId) {
216
                        // In workbook.xml.rels, it is only "worksheets/sheet1.xml"
217
                        // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
218
                        $sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
219
                        \assert(null !== $sheetDataXMLFilePath);
220
 
221
                        // sometimes, the sheet data file path already contains "/xl/"...
222
                        if (!str_starts_with($sheetDataXMLFilePath, '/xl/')) {
223
                            $sheetDataXMLFilePath = '/xl/'.$sheetDataXMLFilePath;
224
 
225
                            break;
226
                        }
227
                    }
228
                }
229
            }
230
 
231
            $xmlReader->close();
232
        }
233
 
234
        return $sheetDataXMLFilePath;
235
    }
236
 
237
    private function createRowIterator(
238
        string $filePath,
239
        string $sheetDataXMLFilePath,
240
        Options $options,
241
        SharedStringsManager $sharedStringsManager
242
    ): RowIterator {
243
        $xmlReader = new XMLReader();
244
 
245
        $workbookRelationshipsManager = new WorkbookRelationshipsManager($filePath);
246
        $styleManager = new StyleManager(
247
            $filePath,
248
            $workbookRelationshipsManager->hasStylesXMLFile()
249
                ? $workbookRelationshipsManager->getStylesXMLFilePath()
250
                : null
251
        );
252
 
253
        $cellValueFormatter = new CellValueFormatter(
254
            $sharedStringsManager,
255
            $styleManager,
256
            $options->SHOULD_FORMAT_DATES,
257
            $options->SHOULD_USE_1904_DATES,
258
            new XLSX()
259
        );
260
 
261
        return new RowIterator(
262
            $filePath,
263
            $sheetDataXMLFilePath,
264
            $options->SHOULD_PRESERVE_EMPTY_ROWS,
265
            $xmlReader,
266
            new XMLProcessor($xmlReader),
267
            $cellValueFormatter,
268
            new RowManager()
269
        );
270
    }
271
 
272
    private function createSheetHeaderReader(
273
        string $filePath,
274
        string $sheetDataXMLFilePath
275
    ): SheetHeaderReader {
276
        $xmlReader = new XMLReader();
277
 
278
        return new SheetHeaderReader(
279
            $filePath,
280
            $sheetDataXMLFilePath,
281
            $xmlReader,
282
            new XMLProcessor($xmlReader)
283
        );
284
    }
285
}