| 1 | efrain | 1 | <?php
 | 
        
           |  |  | 2 |   | 
        
           |  |  | 3 | declare(strict_types=1);
 | 
        
           |  |  | 4 |   | 
        
           |  |  | 5 | namespace OpenSpout\Reader\XLSX\Manager;
 | 
        
           |  |  | 6 |   | 
        
           |  |  | 7 | use OpenSpout\Reader\Wrapper\XMLReader;
 | 
        
           |  |  | 8 |   | 
        
           |  |  | 9 | class StyleManager implements StyleManagerInterface
 | 
        
           |  |  | 10 | {
 | 
        
           |  |  | 11 |     /**
 | 
        
           |  |  | 12 |      * Nodes used to find relevant information in the styles XML file.
 | 
        
           |  |  | 13 |      */
 | 
        
           |  |  | 14 |     final public const XML_NODE_NUM_FMTS = 'numFmts';
 | 
        
           |  |  | 15 |     final public const XML_NODE_NUM_FMT = 'numFmt';
 | 
        
           |  |  | 16 |     final public const XML_NODE_CELL_XFS = 'cellXfs';
 | 
        
           |  |  | 17 |     final public const XML_NODE_XF = 'xf';
 | 
        
           |  |  | 18 |   | 
        
           |  |  | 19 |     /**
 | 
        
           |  |  | 20 |      * Attributes used to find relevant information in the styles XML file.
 | 
        
           |  |  | 21 |      */
 | 
        
           |  |  | 22 |     final public const XML_ATTRIBUTE_NUM_FMT_ID = 'numFmtId';
 | 
        
           |  |  | 23 |     final public const XML_ATTRIBUTE_FORMAT_CODE = 'formatCode';
 | 
        
           |  |  | 24 |     final public const XML_ATTRIBUTE_APPLY_NUMBER_FORMAT = 'applyNumberFormat';
 | 
        
           |  |  | 25 |     final public const XML_ATTRIBUTE_COUNT = 'count';
 | 
        
           |  |  | 26 |   | 
        
           |  |  | 27 |     /**
 | 
        
           |  |  | 28 |      * By convention, default style ID is 0.
 | 
        
           |  |  | 29 |      */
 | 
        
           |  |  | 30 |     final public const DEFAULT_STYLE_ID = 0;
 | 
        
           |  |  | 31 |   | 
        
           |  |  | 32 |     final public const NUMBER_FORMAT_GENERAL = 'General';
 | 
        
           |  |  | 33 |   | 
        
           |  |  | 34 |     /**
 | 
        
           |  |  | 35 |      * Mapping between built-in numFmtId and the associated format - for dates only.
 | 
        
           |  |  | 36 |      *
 | 
        
           |  |  | 37 |      * @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
 | 
        
           |  |  | 38 |      */
 | 
        
           |  |  | 39 |     private const builtinNumFmtIdToNumFormatMapping = [
 | 
        
           |  |  | 40 |         14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy'
 | 
        
           |  |  | 41 |         15 => 'd-mmm-yy',
 | 
        
           |  |  | 42 |         16 => 'd-mmm',
 | 
        
           |  |  | 43 |         17 => 'mmm-yy',
 | 
        
           |  |  | 44 |         18 => 'h:mm AM/PM',
 | 
        
           |  |  | 45 |         19 => 'h:mm:ss AM/PM',
 | 
        
           |  |  | 46 |         20 => 'h:mm',
 | 
        
           |  |  | 47 |         21 => 'h:mm:ss',
 | 
        
           |  |  | 48 |         22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm',
 | 
        
           |  |  | 49 |         45 => 'mm:ss',
 | 
        
           |  |  | 50 |         46 => '[h]:mm:ss',
 | 
        
           |  |  | 51 |         47 => 'mm:ss.0',  // @NOTE: ECMA spec is 'mmss.0',
 | 
        
           |  |  | 52 |     ];
 | 
        
           |  |  | 53 |   | 
        
           |  |  | 54 |     /** @var string Path of the XLSX file being read */
 | 
        
           |  |  | 55 |     private readonly string $filePath;
 | 
        
           |  |  | 56 |   | 
        
           |  |  | 57 |     /** @var null|string Path of the styles XML file */
 | 
        
           |  |  | 58 |     private readonly ?string $stylesXMLFilePath;
 | 
        
           |  |  | 59 |   | 
        
           |  |  | 60 |     /** @var array<int, string> Array containing a mapping NUM_FMT_ID => FORMAT_CODE */
 | 
        
           |  |  | 61 |     private array $customNumberFormats;
 | 
        
           |  |  | 62 |   | 
        
           |  |  | 63 |     /** @var array<array-key, array<string, null|bool|int>> Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */
 | 
        
           |  |  | 64 |     private array $stylesAttributes;
 | 
        
           |  |  | 65 |   | 
        
           |  |  | 66 |     /** @var array<int, bool> Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */
 | 
        
           |  |  | 67 |     private array $numFmtIdToIsDateFormatCache = [];
 | 
        
           |  |  | 68 |   | 
        
           |  |  | 69 |     /**
 | 
        
           |  |  | 70 |      * @param string  $filePath          Path of the XLSX file being read
 | 
        
           |  |  | 71 |      * @param ?string $stylesXMLFilePath
 | 
        
           |  |  | 72 |      */
 | 
        
           |  |  | 73 |     public function __construct(string $filePath, ?string $stylesXMLFilePath)
 | 
        
           |  |  | 74 |     {
 | 
        
           |  |  | 75 |         $this->filePath = $filePath;
 | 
        
           |  |  | 76 |         $this->stylesXMLFilePath = $stylesXMLFilePath;
 | 
        
           |  |  | 77 |     }
 | 
        
           |  |  | 78 |   | 
        
           |  |  | 79 |     public function shouldFormatNumericValueAsDate(int $styleId): bool
 | 
        
           |  |  | 80 |     {
 | 
        
           |  |  | 81 |         if (null === $this->stylesXMLFilePath) {
 | 
        
           |  |  | 82 |             return false;
 | 
        
           |  |  | 83 |         }
 | 
        
           |  |  | 84 |   | 
        
           |  |  | 85 |         $stylesAttributes = $this->getStylesAttributes();
 | 
        
           |  |  | 86 |   | 
        
           |  |  | 87 |         // Default style (0) does not format numeric values as timestamps. Only custom styles do.
 | 
        
           |  |  | 88 |         // Also if the style ID does not exist in the styles.xml file, format as numeric value.
 | 
        
           |  |  | 89 |         // Using isset here because it is way faster than array_key_exists...
 | 
        
           |  |  | 90 |         if (self::DEFAULT_STYLE_ID === $styleId || !isset($stylesAttributes[$styleId])) {
 | 
        
           |  |  | 91 |             return false;
 | 
        
           |  |  | 92 |         }
 | 
        
           |  |  | 93 |   | 
        
           |  |  | 94 |         $styleAttributes = $stylesAttributes[$styleId];
 | 
        
           |  |  | 95 |   | 
        
           |  |  | 96 |         return $this->doesStyleIndicateDate($styleAttributes);
 | 
        
           |  |  | 97 |     }
 | 
        
           |  |  | 98 |   | 
        
           |  |  | 99 |     public function getNumberFormatCode(int $styleId): string
 | 
        
           |  |  | 100 |     {
 | 
        
           |  |  | 101 |         $stylesAttributes = $this->getStylesAttributes();
 | 
        
           |  |  | 102 |         $styleAttributes = $stylesAttributes[$styleId];
 | 
        
           |  |  | 103 |         $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
 | 
        
           |  |  | 104 |         \assert(\is_int($numFmtId));
 | 
        
           |  |  | 105 |   | 
        
           |  |  | 106 |         if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
 | 
        
           |  |  | 107 |             $numberFormatCode = self::builtinNumFmtIdToNumFormatMapping[$numFmtId];
 | 
        
           |  |  | 108 |         } else {
 | 
        
           |  |  | 109 |             $customNumberFormats = $this->getCustomNumberFormats();
 | 
        
           |  |  | 110 |             $numberFormatCode = $customNumberFormats[$numFmtId] ?? '';
 | 
        
           |  |  | 111 |         }
 | 
        
           |  |  | 112 |   | 
        
           |  |  | 113 |         return $numberFormatCode;
 | 
        
           |  |  | 114 |     }
 | 
        
           |  |  | 115 |   | 
        
           |  |  | 116 |     /**
 | 
        
           |  |  | 117 |      * @return array<int, string> The custom number formats
 | 
        
           |  |  | 118 |      */
 | 
        
           |  |  | 119 |     protected function getCustomNumberFormats(): array
 | 
        
           |  |  | 120 |     {
 | 
        
           |  |  | 121 |         if (!isset($this->customNumberFormats)) {
 | 
        
           |  |  | 122 |             $this->extractRelevantInfo();
 | 
        
           |  |  | 123 |         }
 | 
        
           |  |  | 124 |   | 
        
           |  |  | 125 |         return $this->customNumberFormats;
 | 
        
           |  |  | 126 |     }
 | 
        
           |  |  | 127 |   | 
        
           |  |  | 128 |     /**
 | 
        
           |  |  | 129 |      * @return array<array-key, array<string, null|bool|int>> The styles attributes
 | 
        
           |  |  | 130 |      */
 | 
        
           |  |  | 131 |     protected function getStylesAttributes(): array
 | 
        
           |  |  | 132 |     {
 | 
        
           |  |  | 133 |         if (!isset($this->stylesAttributes)) {
 | 
        
           |  |  | 134 |             $this->extractRelevantInfo();
 | 
        
           |  |  | 135 |         }
 | 
        
           |  |  | 136 |   | 
        
           |  |  | 137 |         return $this->stylesAttributes;
 | 
        
           |  |  | 138 |     }
 | 
        
           |  |  | 139 |   | 
        
           |  |  | 140 |     /**
 | 
        
           |  |  | 141 |      * Reads the styles.xml file and extract the relevant information from the file.
 | 
        
           |  |  | 142 |      */
 | 
        
           |  |  | 143 |     private function extractRelevantInfo(): void
 | 
        
           |  |  | 144 |     {
 | 
        
           |  |  | 145 |         $this->customNumberFormats = [];
 | 
        
           |  |  | 146 |         $this->stylesAttributes = [];
 | 
        
           |  |  | 147 |   | 
        
           |  |  | 148 |         $xmlReader = new XMLReader();
 | 
        
           |  |  | 149 |   | 
        
           |  |  | 150 |         if ($xmlReader->openFileInZip($this->filePath, $this->stylesXMLFilePath)) {
 | 
        
           |  |  | 151 |             while ($xmlReader->read()) {
 | 
        
           |  |  | 152 |                 if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)
 | 
        
           |  |  | 153 |                     && '0' !== $xmlReader->getAttribute(self::XML_ATTRIBUTE_COUNT)) {
 | 
        
           |  |  | 154 |                     $this->extractNumberFormats($xmlReader);
 | 
        
           |  |  | 155 |                 } elseif ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) {
 | 
        
           |  |  | 156 |                     $this->extractStyleAttributes($xmlReader);
 | 
        
           |  |  | 157 |                 }
 | 
        
           |  |  | 158 |             }
 | 
        
           |  |  | 159 |   | 
        
           |  |  | 160 |             $xmlReader->close();
 | 
        
           |  |  | 161 |         }
 | 
        
           |  |  | 162 |     }
 | 
        
           |  |  | 163 |   | 
        
           |  |  | 164 |     /**
 | 
        
           |  |  | 165 |      * Extracts number formats from the "numFmt" nodes.
 | 
        
           |  |  | 166 |      * For simplicity, the styles attributes are kept in memory. This is possible thanks
 | 
        
           |  |  | 167 |      * to the reuse of formats. So 1 million cells should not use 1 million formats.
 | 
        
           |  |  | 168 |      *
 | 
        
           |  |  | 169 |      * @param XMLReader $xmlReader XML Reader positioned on the "numFmts" node
 | 
        
           |  |  | 170 |      */
 | 
        
           |  |  | 171 |     private function extractNumberFormats(XMLReader $xmlReader): void
 | 
        
           |  |  | 172 |     {
 | 
        
           |  |  | 173 |         while ($xmlReader->read()) {
 | 
        
           |  |  | 174 |             if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMT)) {
 | 
        
           |  |  | 175 |                 $numFmtId = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID);
 | 
        
           |  |  | 176 |                 $formatCode = $xmlReader->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE);
 | 
        
           |  |  | 177 |                 \assert(null !== $formatCode);
 | 
        
           |  |  | 178 |                 $this->customNumberFormats[$numFmtId] = $formatCode;
 | 
        
           |  |  | 179 |             } elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_NUM_FMTS)) {
 | 
        
           |  |  | 180 |                 // Once done reading "numFmts" node's children
 | 
        
           |  |  | 181 |                 break;
 | 
        
           |  |  | 182 |             }
 | 
        
           |  |  | 183 |         }
 | 
        
           |  |  | 184 |     }
 | 
        
           |  |  | 185 |   | 
        
           |  |  | 186 |     /**
 | 
        
           |  |  | 187 |      * Extracts style attributes from the "xf" nodes, inside the "cellXfs" section.
 | 
        
           |  |  | 188 |      * For simplicity, the styles attributes are kept in memory. This is possible thanks
 | 
        
           |  |  | 189 |      * to the reuse of styles. So 1 million cells should not use 1 million styles.
 | 
        
           |  |  | 190 |      *
 | 
        
           |  |  | 191 |      * @param XMLReader $xmlReader XML Reader positioned on the "cellXfs" node
 | 
        
           |  |  | 192 |      */
 | 
        
           |  |  | 193 |     private function extractStyleAttributes(XMLReader $xmlReader): void
 | 
        
           |  |  | 194 |     {
 | 
        
           |  |  | 195 |         while ($xmlReader->read()) {
 | 
        
           |  |  | 196 |             if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) {
 | 
        
           |  |  | 197 |                 $numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID);
 | 
        
           |  |  | 198 |                 $normalizedNumFmtId = (null !== $numFmtId) ? (int) $numFmtId : null;
 | 
        
           |  |  | 199 |   | 
        
           |  |  | 200 |                 $applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT);
 | 
        
           |  |  | 201 |                 $normalizedApplyNumberFormat = (null !== $applyNumberFormat) ? (bool) $applyNumberFormat : null;
 | 
        
           |  |  | 202 |   | 
        
           |  |  | 203 |                 $this->stylesAttributes[] = [
 | 
        
           |  |  | 204 |                     self::XML_ATTRIBUTE_NUM_FMT_ID => $normalizedNumFmtId,
 | 
        
           |  |  | 205 |                     self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat,
 | 
        
           |  |  | 206 |                 ];
 | 
        
           |  |  | 207 |             } elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) {
 | 
        
           |  |  | 208 |                 // Once done reading "cellXfs" node's children
 | 
        
           |  |  | 209 |                 break;
 | 
        
           |  |  | 210 |             }
 | 
        
           |  |  | 211 |         }
 | 
        
           |  |  | 212 |     }
 | 
        
           |  |  | 213 |   | 
        
           |  |  | 214 |     /**
 | 
        
           |  |  | 215 |      * @param array<string, null|bool|int> $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId")
 | 
        
           |  |  | 216 |      *
 | 
        
           |  |  | 217 |      * @return bool Whether the style with the given attributes indicates that the number is a date
 | 
        
           |  |  | 218 |      */
 | 
        
           |  |  | 219 |     private function doesStyleIndicateDate(array $styleAttributes): bool
 | 
        
           |  |  | 220 |     {
 | 
        
           |  |  | 221 |         $applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT];
 | 
        
           |  |  | 222 |         $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
 | 
        
           |  |  | 223 |   | 
        
           |  |  | 224 |         // A style may apply a date format if it has:
 | 
        
           |  |  | 225 |         //  - "applyNumberFormat" attribute not set to "false"
 | 
        
           |  |  | 226 |         //  - "numFmtId" attribute set
 | 
        
           |  |  | 227 |         // This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format,
 | 
        
           |  |  | 228 |         // but this is not necessarily a date.
 | 
        
           |  |  | 229 |         if (false === $applyNumberFormat || !\is_int($numFmtId)) {
 | 
        
           |  |  | 230 |             return false;
 | 
        
           |  |  | 231 |         }
 | 
        
           |  |  | 232 |   | 
        
           |  |  | 233 |         return $this->doesNumFmtIdIndicateDate($numFmtId);
 | 
        
           |  |  | 234 |     }
 | 
        
           |  |  | 235 |   | 
        
           |  |  | 236 |     /**
 | 
        
           |  |  | 237 |      * Returns whether the number format ID indicates that the number is a date.
 | 
        
           |  |  | 238 |      * The result is cached to avoid recomputing the same thing over and over, as
 | 
        
           |  |  | 239 |      * "numFmtId" attributes can be shared between multiple styles.
 | 
        
           |  |  | 240 |      *
 | 
        
           |  |  | 241 |      * @return bool Whether the number format ID indicates that the number is a date
 | 
        
           |  |  | 242 |      */
 | 
        
           |  |  | 243 |     private function doesNumFmtIdIndicateDate(int $numFmtId): bool
 | 
        
           |  |  | 244 |     {
 | 
        
           |  |  | 245 |         if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) {
 | 
        
           |  |  | 246 |             $formatCode = $this->getFormatCodeForNumFmtId($numFmtId);
 | 
        
           |  |  | 247 |   | 
        
           |  |  | 248 |             $this->numFmtIdToIsDateFormatCache[$numFmtId] = (
 | 
        
           |  |  | 249 |                 $this->isNumFmtIdBuiltInDateFormat($numFmtId)
 | 
        
           |  |  | 250 |                 || $this->isFormatCodeCustomDateFormat($formatCode)
 | 
        
           |  |  | 251 |             );
 | 
        
           |  |  | 252 |         }
 | 
        
           |  |  | 253 |   | 
        
           |  |  | 254 |         return $this->numFmtIdToIsDateFormatCache[$numFmtId];
 | 
        
           |  |  | 255 |     }
 | 
        
           |  |  | 256 |   | 
        
           |  |  | 257 |     /**
 | 
        
           |  |  | 258 |      * @return null|string The custom number format or NULL if none defined for the given numFmtId
 | 
        
           |  |  | 259 |      */
 | 
        
           |  |  | 260 |     private function getFormatCodeForNumFmtId(int $numFmtId): ?string
 | 
        
           |  |  | 261 |     {
 | 
        
           |  |  | 262 |         $customNumberFormats = $this->getCustomNumberFormats();
 | 
        
           |  |  | 263 |   | 
        
           |  |  | 264 |         // Using isset here because it is way faster than array_key_exists...
 | 
        
           |  |  | 265 |         return $customNumberFormats[$numFmtId] ?? null;
 | 
        
           |  |  | 266 |     }
 | 
        
           |  |  | 267 |   | 
        
           |  |  | 268 |     /**
 | 
        
           |  |  | 269 |      * @return bool Whether the number format ID indicates that the number is a date
 | 
        
           |  |  | 270 |      */
 | 
        
           |  |  | 271 |     private function isNumFmtIdBuiltInDateFormat(int $numFmtId): bool
 | 
        
           |  |  | 272 |     {
 | 
        
           |  |  | 273 |         return \array_key_exists($numFmtId, self::builtinNumFmtIdToNumFormatMapping);
 | 
        
           |  |  | 274 |     }
 | 
        
           |  |  | 275 |   | 
        
           |  |  | 276 |     /**
 | 
        
           |  |  | 277 |      * @return bool Whether the given format code indicates that the number is a date
 | 
        
           |  |  | 278 |      */
 | 
        
           |  |  | 279 |     private function isFormatCodeCustomDateFormat(?string $formatCode): bool
 | 
        
           |  |  | 280 |     {
 | 
        
           |  |  | 281 |         // if no associated format code or if using the default "General" format
 | 
        
           |  |  | 282 |         if (null === $formatCode || 0 === strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL)) {
 | 
        
           |  |  | 283 |             return false;
 | 
        
           |  |  | 284 |         }
 | 
        
           |  |  | 285 |   | 
        
           |  |  | 286 |         return $this->isFormatCodeMatchingDateFormatPattern($formatCode);
 | 
        
           |  |  | 287 |     }
 | 
        
           |  |  | 288 |   | 
        
           |  |  | 289 |     /**
 | 
        
           |  |  | 290 |      * @return bool Whether the given format code matches a date format pattern
 | 
        
           |  |  | 291 |      */
 | 
        
           |  |  | 292 |     private function isFormatCodeMatchingDateFormatPattern(string $formatCode): bool
 | 
        
           |  |  | 293 |     {
 | 
        
           |  |  | 294 |         // Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\")
 | 
        
           |  |  | 295 |         $pattern = '((?<!\\\)\[.+?(?<!\\\)\])';
 | 
        
           |  |  | 296 |         $formatCode = preg_replace($pattern, '', $formatCode);
 | 
        
           |  |  | 297 |         \assert(null !== $formatCode);
 | 
        
           |  |  | 298 |   | 
        
           |  |  | 299 |         // Remove strings in double quotes, as they won't be interpreted as date format characters
 | 
        
           |  |  | 300 |         $formatCode = preg_replace('/"[^"]+"/', '', $formatCode);
 | 
        
           |  |  | 301 |         \assert(null !== $formatCode);
 | 
        
           |  |  | 302 |   | 
        
           |  |  | 303 |         // custom date formats contain specific characters to represent the date:
 | 
        
           |  |  | 304 |         // e - yy - m - d - h - s
 | 
        
           |  |  | 305 |         // and all of their variants (yyyy - mm - dd...)
 | 
        
           |  |  | 306 |         $dateFormatCharacters = ['e', 'yy', 'm', 'd', 'h', 's'];
 | 
        
           |  |  | 307 |   | 
        
           |  |  | 308 |         $hasFoundDateFormatCharacter = false;
 | 
        
           |  |  | 309 |         foreach ($dateFormatCharacters as $dateFormatCharacter) {
 | 
        
           |  |  | 310 |             // character not preceded by "\" (case insensitive)
 | 
        
           |  |  | 311 |             $pattern = '/(?<!\\\)'.$dateFormatCharacter.'/i';
 | 
        
           |  |  | 312 |   | 
        
           |  |  | 313 |             if (1 === preg_match($pattern, $formatCode)) {
 | 
        
           |  |  | 314 |                 $hasFoundDateFormatCharacter = true;
 | 
        
           |  |  | 315 |   | 
        
           |  |  | 316 |                 break;
 | 
        
           |  |  | 317 |             }
 | 
        
           |  |  | 318 |         }
 | 
        
           |  |  | 319 |   | 
        
           |  |  | 320 |         return $hasFoundDateFormatCharacter;
 | 
        
           |  |  | 321 |     }
 | 
        
           |  |  | 322 | }
 |