| 1 | efrain | 1 | <?php
 | 
        
           |  |  | 2 |   | 
        
           |  |  | 3 | declare(strict_types=1);
 | 
        
           |  |  | 4 |   | 
        
           |  |  | 5 | namespace OpenSpout\Reader\XLSX\Manager;
 | 
        
           |  |  | 6 |   | 
        
           |  |  | 7 | use OpenSpout\Common\Helper\Escaper\XLSX;
 | 
        
           |  |  | 8 | use OpenSpout\Reader\Common\Manager\RowManager;
 | 
        
           |  |  | 9 | use OpenSpout\Reader\Common\XMLProcessor;
 | 
        
           |  |  | 10 | use OpenSpout\Reader\Wrapper\XMLReader;
 | 
        
           |  |  | 11 | use OpenSpout\Reader\XLSX\Helper\CellValueFormatter;
 | 
        
           |  |  | 12 | use OpenSpout\Reader\XLSX\Options;
 | 
        
           |  |  | 13 | use OpenSpout\Reader\XLSX\RowIterator;
 | 
        
           |  |  | 14 | use OpenSpout\Reader\XLSX\Sheet;
 | 
        
           |  |  | 15 | use OpenSpout\Reader\XLSX\SheetHeaderReader;
 | 
        
           |  |  | 16 |   | 
        
           |  |  | 17 | /**
 | 
        
           |  |  | 18 |  * @internal
 | 
        
           |  |  | 19 |  */
 | 
        
           |  |  | 20 | final class SheetManager
 | 
        
           |  |  | 21 | {
 | 
        
           |  |  | 22 |     /**
 | 
        
           |  |  | 23 |      * Paths of XML files relative to the XLSX file root.
 | 
        
           |  |  | 24 |      */
 | 
        
           |  |  | 25 |     public const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
 | 
        
           |  |  | 26 |     public const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
 | 
        
           |  |  | 27 |   | 
        
           |  |  | 28 |     /**
 | 
        
           |  |  | 29 |      * Definition of XML node names used to parse data.
 | 
        
           |  |  | 30 |      */
 | 
        
           |  |  | 31 |     public const XML_NODE_WORKBOOK_PROPERTIES = 'workbookPr';
 | 
        
           |  |  | 32 |     public const XML_NODE_WORKBOOK_VIEW = 'workbookView';
 | 
        
           |  |  | 33 |     public const XML_NODE_SHEET = 'sheet';
 | 
        
           |  |  | 34 |     public const XML_NODE_SHEETS = 'sheets';
 | 
        
           |  |  | 35 |     public const XML_NODE_RELATIONSHIP = 'Relationship';
 | 
        
           |  |  | 36 |   | 
        
           |  |  | 37 |     /**
 | 
        
           |  |  | 38 |      * Definition of XML attributes used to parse data.
 | 
        
           |  |  | 39 |      */
 | 
        
           |  |  | 40 |     public const XML_ATTRIBUTE_DATE_1904 = 'date1904';
 | 
        
           |  |  | 41 |     public const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab';
 | 
        
           |  |  | 42 |     public const XML_ATTRIBUTE_R_ID = 'r:id';
 | 
        
           |  |  | 43 |     public const XML_ATTRIBUTE_NAME = 'name';
 | 
        
           |  |  | 44 |     public const XML_ATTRIBUTE_STATE = 'state';
 | 
        
           |  |  | 45 |     public const XML_ATTRIBUTE_ID = 'Id';
 | 
        
           |  |  | 46 |     public const XML_ATTRIBUTE_TARGET = 'Target';
 | 
        
           |  |  | 47 |   | 
        
           |  |  | 48 |     /**
 | 
        
           |  |  | 49 |      * State value to represent a hidden sheet.
 | 
        
           |  |  | 50 |      */
 | 
        
           |  |  | 51 |     public const SHEET_STATE_HIDDEN = 'hidden';
 | 
        
           |  |  | 52 |   | 
        
           |  |  | 53 |     /** @var string Path of the XLSX file being read */
 | 
        
           |  |  | 54 |     private readonly string $filePath;
 | 
        
           |  |  | 55 |   | 
        
           |  |  | 56 |     private readonly Options $options;
 | 
        
           |  |  | 57 |   | 
        
           |  |  | 58 |     /** @var SharedStringsManager Manages shared strings */
 | 
        
           |  |  | 59 |     private readonly SharedStringsManager $sharedStringsManager;
 | 
        
           |  |  | 60 |   | 
        
           |  |  | 61 |     /** @var XLSX Used to unescape XML data */
 | 
        
           |  |  | 62 |     private readonly XLSX $escaper;
 | 
        
           |  |  | 63 |   | 
        
           |  |  | 64 |     /** @var Sheet[] List of sheets */
 | 
        
           |  |  | 65 |     private array $sheets;
 | 
        
           |  |  | 66 |   | 
        
           |  |  | 67 |     /** @var int Index of the sheet currently read */
 | 
        
           |  |  | 68 |     private int $currentSheetIndex;
 | 
        
           |  |  | 69 |   | 
        
           |  |  | 70 |     /** @var int Index of the active sheet (0 by default) */
 | 
        
           |  |  | 71 |     private int $activeSheetIndex;
 | 
        
           |  |  | 72 |   | 
        
           |  |  | 73 |     public function __construct(
 | 
        
           |  |  | 74 |         string $filePath,
 | 
        
           |  |  | 75 |         Options $options,
 | 
        
           |  |  | 76 |         SharedStringsManager $sharedStringsManager,
 | 
        
           |  |  | 77 |         XLSX $escaper
 | 
        
           |  |  | 78 |     ) {
 | 
        
           |  |  | 79 |         $this->filePath = $filePath;
 | 
        
           |  |  | 80 |         $this->options = $options;
 | 
        
           |  |  | 81 |         $this->sharedStringsManager = $sharedStringsManager;
 | 
        
           |  |  | 82 |         $this->escaper = $escaper;
 | 
        
           |  |  | 83 |     }
 | 
        
           |  |  | 84 |   | 
        
           |  |  | 85 |     /**
 | 
        
           |  |  | 86 |      * Returns the sheets metadata of the file located at the previously given file path.
 | 
        
           |  |  | 87 |      * The paths to the sheets' data are read from the [Content_Types].xml file.
 | 
        
           |  |  | 88 |      *
 | 
        
           |  |  | 89 |      * @return Sheet[] Sheets within the XLSX file
 | 
        
           |  |  | 90 |      */
 | 
        
           |  |  | 91 |     public function getSheets(): array
 | 
        
           |  |  | 92 |     {
 | 
        
           |  |  | 93 |         $this->sheets = [];
 | 
        
           |  |  | 94 |         $this->currentSheetIndex = 0;
 | 
        
           |  |  | 95 |         $this->activeSheetIndex = 0; // By default, the first sheet is active
 | 
        
           |  |  | 96 |   | 
        
           |  |  | 97 |         $xmlReader = new XMLReader();
 | 
        
           |  |  | 98 |         $xmlProcessor = new XMLProcessor($xmlReader);
 | 
        
           |  |  | 99 |   | 
        
           |  |  | 100 |         $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_PROPERTIES, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookPropertiesStartingNode']);
 | 
        
           |  |  | 101 |         $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_VIEW, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookViewStartingNode']);
 | 
        
           |  |  | 102 |         $xmlProcessor->registerCallback(self::XML_NODE_SHEET, XMLProcessor::NODE_TYPE_START, [$this, 'processSheetStartingNode']);
 | 
        
           |  |  | 103 |         $xmlProcessor->registerCallback(self::XML_NODE_SHEETS, XMLProcessor::NODE_TYPE_END, [$this, 'processSheetsEndingNode']);
 | 
        
           |  |  | 104 |   | 
        
           |  |  | 105 |         if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
 | 
        
           |  |  | 106 |             $xmlProcessor->readUntilStopped();
 | 
        
           |  |  | 107 |             $xmlReader->close();
 | 
        
           |  |  | 108 |         }
 | 
        
           |  |  | 109 |   | 
        
           |  |  | 110 |         return $this->sheets;
 | 
        
           |  |  | 111 |     }
 | 
        
           |  |  | 112 |   | 
        
           |  |  | 113 |     /**
 | 
        
           |  |  | 114 |      * @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node
 | 
        
           |  |  | 115 |      *
 | 
        
           |  |  | 116 |      * @return int A return code that indicates what action should the processor take next
 | 
        
           |  |  | 117 |      */
 | 
        
           |  |  | 118 |     private function processWorkbookPropertiesStartingNode(XMLReader $xmlReader): int
 | 
        
           |  |  | 119 |     {
 | 
        
           |  |  | 120 |         // Using "filter_var($x, FILTER_VALIDATE_BOOLEAN)" here because the value of the "date1904" attribute
 | 
        
           |  |  | 121 |         // may be the string "false", that is not mapped to the boolean "false" by default...
 | 
        
           |  |  | 122 |         $shouldUse1904Dates = filter_var($xmlReader->getAttribute(self::XML_ATTRIBUTE_DATE_1904), FILTER_VALIDATE_BOOLEAN);
 | 
        
           |  |  | 123 |         $this->options->SHOULD_USE_1904_DATES = $shouldUse1904Dates;
 | 
        
           |  |  | 124 |   | 
        
           |  |  | 125 |         return XMLProcessor::PROCESSING_CONTINUE;
 | 
        
           |  |  | 126 |     }
 | 
        
           |  |  | 127 |   | 
        
           |  |  | 128 |     /**
 | 
        
           |  |  | 129 |      * @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node
 | 
        
           |  |  | 130 |      *
 | 
        
           |  |  | 131 |      * @return int A return code that indicates what action should the processor take next
 | 
        
           |  |  | 132 |      */
 | 
        
           |  |  | 133 |     private function processWorkbookViewStartingNode(XMLReader $xmlReader): int
 | 
        
           |  |  | 134 |     {
 | 
        
           |  |  | 135 |         // The "workbookView" node is located before "sheet" nodes, ensuring that
 | 
        
           |  |  | 136 |         // the active sheet is known before parsing sheets data.
 | 
        
           |  |  | 137 |         $this->activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB);
 | 
        
           |  |  | 138 |   | 
        
           |  |  | 139 |         return XMLProcessor::PROCESSING_CONTINUE;
 | 
        
           |  |  | 140 |     }
 | 
        
           |  |  | 141 |   | 
        
           |  |  | 142 |     /**
 | 
        
           |  |  | 143 |      * @param XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node
 | 
        
           |  |  | 144 |      *
 | 
        
           |  |  | 145 |      * @return int A return code that indicates what action should the processor take next
 | 
        
           |  |  | 146 |      */
 | 
        
           |  |  | 147 |     private function processSheetStartingNode(XMLReader $xmlReader): int
 | 
        
           |  |  | 148 |     {
 | 
        
           |  |  | 149 |         $isSheetActive = ($this->currentSheetIndex === $this->activeSheetIndex);
 | 
        
           |  |  | 150 |         $this->sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $this->currentSheetIndex, $isSheetActive);
 | 
        
           |  |  | 151 |         ++$this->currentSheetIndex;
 | 
        
           |  |  | 152 |   | 
        
           |  |  | 153 |         return XMLProcessor::PROCESSING_CONTINUE;
 | 
        
           |  |  | 154 |     }
 | 
        
           |  |  | 155 |   | 
        
           |  |  | 156 |     /**
 | 
        
           |  |  | 157 |      * @return int A return code that indicates what action should the processor take next
 | 
        
           |  |  | 158 |      */
 | 
        
           |  |  | 159 |     private function processSheetsEndingNode(): int
 | 
        
           |  |  | 160 |     {
 | 
        
           |  |  | 161 |         return XMLProcessor::PROCESSING_STOP;
 | 
        
           |  |  | 162 |     }
 | 
        
           |  |  | 163 |   | 
        
           |  |  | 164 |     /**
 | 
        
           |  |  | 165 |      * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml".
 | 
        
           |  |  | 166 |      * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
 | 
        
           |  |  | 167 |      * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
 | 
        
           |  |  | 168 |      *
 | 
        
           |  |  | 169 |      * @param XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
 | 
        
           |  |  | 170 |      * @param int       $sheetIndexZeroBased  Index of the sheet, based on order of appearance in the workbook (zero-based)
 | 
        
           |  |  | 171 |      * @param bool      $isSheetActive        Whether this sheet was defined as active
 | 
        
           |  |  | 172 |      *
 | 
        
           |  |  | 173 |      * @return Sheet Sheet instance
 | 
        
           |  |  | 174 |      */
 | 
        
           |  |  | 175 |     private function getSheetFromSheetXMLNode(XMLReader $xmlReaderOnSheetNode, int $sheetIndexZeroBased, bool $isSheetActive): Sheet
 | 
        
           |  |  | 176 |     {
 | 
        
           |  |  | 177 |         $sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID);
 | 
        
           |  |  | 178 |         \assert(null !== $sheetId);
 | 
        
           |  |  | 179 |   | 
        
           |  |  | 180 |         $sheetState = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_STATE);
 | 
        
           |  |  | 181 |         $isSheetVisible = (self::SHEET_STATE_HIDDEN !== $sheetState);
 | 
        
           |  |  | 182 |   | 
        
           |  |  | 183 |         $escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME);
 | 
        
           |  |  | 184 |         \assert(null !== $escapedSheetName);
 | 
        
           |  |  | 185 |         $sheetName = $this->escaper->unescape($escapedSheetName);
 | 
        
           |  |  | 186 |   | 
        
           |  |  | 187 |         $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
 | 
        
           |  |  | 188 |   | 
        
           |  |  | 189 |         return new Sheet(
 | 
        
           |  |  | 190 |             $this->createRowIterator($this->filePath, $sheetDataXMLFilePath, $this->options, $this->sharedStringsManager),
 | 
        
           |  |  | 191 |             $this->createSheetHeaderReader($this->filePath, $sheetDataXMLFilePath),
 | 
        
           |  |  | 192 |             $sheetIndexZeroBased,
 | 
        
           |  |  | 193 |             $sheetName,
 | 
        
           |  |  | 194 |             $isSheetActive,
 | 
        
           |  |  | 195 |             $isSheetVisible
 | 
        
           |  |  | 196 |         );
 | 
        
           |  |  | 197 |     }
 | 
        
           |  |  | 198 |   | 
        
           |  |  | 199 |     /**
 | 
        
           |  |  | 200 |      * @param string $sheetId The sheet ID, as defined in "workbook.xml"
 | 
        
           |  |  | 201 |      *
 | 
        
           |  |  | 202 |      * @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID
 | 
        
           |  |  | 203 |      */
 | 
        
           |  |  | 204 |     private function getSheetDataXMLFilePathForSheetId(string $sheetId): string
 | 
        
           |  |  | 205 |     {
 | 
        
           |  |  | 206 |         $sheetDataXMLFilePath = '';
 | 
        
           |  |  | 207 |   | 
        
           |  |  | 208 |         // find the file path of the sheet, by looking at the "workbook.xml.res" file
 | 
        
           |  |  | 209 |         $xmlReader = new XMLReader();
 | 
        
           |  |  | 210 |         if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
 | 
        
           |  |  | 211 |             while ($xmlReader->read()) {
 | 
        
           |  |  | 212 |                 if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) {
 | 
        
           |  |  | 213 |                     $relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID);
 | 
        
           |  |  | 214 |   | 
        
           |  |  | 215 |                     if ($relationshipSheetId === $sheetId) {
 | 
        
           |  |  | 216 |                         // In workbook.xml.rels, it is only "worksheets/sheet1.xml"
 | 
        
           |  |  | 217 |                         // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
 | 
        
           |  |  | 218 |                         $sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
 | 
        
           |  |  | 219 |                         \assert(null !== $sheetDataXMLFilePath);
 | 
        
           |  |  | 220 |   | 
        
           |  |  | 221 |                         // sometimes, the sheet data file path already contains "/xl/"...
 | 
        
           |  |  | 222 |                         if (!str_starts_with($sheetDataXMLFilePath, '/xl/')) {
 | 
        
           |  |  | 223 |                             $sheetDataXMLFilePath = '/xl/'.$sheetDataXMLFilePath;
 | 
        
           |  |  | 224 |   | 
        
           |  |  | 225 |                             break;
 | 
        
           |  |  | 226 |                         }
 | 
        
           |  |  | 227 |                     }
 | 
        
           |  |  | 228 |                 }
 | 
        
           |  |  | 229 |             }
 | 
        
           |  |  | 230 |   | 
        
           |  |  | 231 |             $xmlReader->close();
 | 
        
           |  |  | 232 |         }
 | 
        
           |  |  | 233 |   | 
        
           |  |  | 234 |         return $sheetDataXMLFilePath;
 | 
        
           |  |  | 235 |     }
 | 
        
           |  |  | 236 |   | 
        
           |  |  | 237 |     private function createRowIterator(
 | 
        
           |  |  | 238 |         string $filePath,
 | 
        
           |  |  | 239 |         string $sheetDataXMLFilePath,
 | 
        
           |  |  | 240 |         Options $options,
 | 
        
           |  |  | 241 |         SharedStringsManager $sharedStringsManager
 | 
        
           |  |  | 242 |     ): RowIterator {
 | 
        
           |  |  | 243 |         $xmlReader = new XMLReader();
 | 
        
           |  |  | 244 |   | 
        
           |  |  | 245 |         $workbookRelationshipsManager = new WorkbookRelationshipsManager($filePath);
 | 
        
           |  |  | 246 |         $styleManager = new StyleManager(
 | 
        
           |  |  | 247 |             $filePath,
 | 
        
           |  |  | 248 |             $workbookRelationshipsManager->hasStylesXMLFile()
 | 
        
           |  |  | 249 |                 ? $workbookRelationshipsManager->getStylesXMLFilePath()
 | 
        
           |  |  | 250 |                 : null
 | 
        
           |  |  | 251 |         );
 | 
        
           |  |  | 252 |   | 
        
           |  |  | 253 |         $cellValueFormatter = new CellValueFormatter(
 | 
        
           |  |  | 254 |             $sharedStringsManager,
 | 
        
           |  |  | 255 |             $styleManager,
 | 
        
           |  |  | 256 |             $options->SHOULD_FORMAT_DATES,
 | 
        
           |  |  | 257 |             $options->SHOULD_USE_1904_DATES,
 | 
        
           |  |  | 258 |             new XLSX()
 | 
        
           |  |  | 259 |         );
 | 
        
           |  |  | 260 |   | 
        
           |  |  | 261 |         return new RowIterator(
 | 
        
           |  |  | 262 |             $filePath,
 | 
        
           |  |  | 263 |             $sheetDataXMLFilePath,
 | 
        
           |  |  | 264 |             $options->SHOULD_PRESERVE_EMPTY_ROWS,
 | 
        
           |  |  | 265 |             $xmlReader,
 | 
        
           |  |  | 266 |             new XMLProcessor($xmlReader),
 | 
        
           |  |  | 267 |             $cellValueFormatter,
 | 
        
           |  |  | 268 |             new RowManager()
 | 
        
           |  |  | 269 |         );
 | 
        
           |  |  | 270 |     }
 | 
        
           |  |  | 271 |   | 
        
           |  |  | 272 |     private function createSheetHeaderReader(
 | 
        
           |  |  | 273 |         string $filePath,
 | 
        
           |  |  | 274 |         string $sheetDataXMLFilePath
 | 
        
           |  |  | 275 |     ): SheetHeaderReader {
 | 
        
           |  |  | 276 |         $xmlReader = new XMLReader();
 | 
        
           |  |  | 277 |   | 
        
           |  |  | 278 |         return new SheetHeaderReader(
 | 
        
           |  |  | 279 |             $filePath,
 | 
        
           |  |  | 280 |             $sheetDataXMLFilePath,
 | 
        
           |  |  | 281 |             $xmlReader,
 | 
        
           |  |  | 282 |             new XMLProcessor($xmlReader)
 | 
        
           |  |  | 283 |         );
 | 
        
           |  |  | 284 |     }
 | 
        
           |  |  | 285 | }
 |