Proyectos de Subversion Moodle

Rev

Rev 1 | | Comparar con el anterior | Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Reader\XLSX\Helper;
6
 
7
use DateInterval;
8
use DateTimeImmutable;
9
use DOMElement;
10
use Exception;
11
use OpenSpout\Common\Entity\Cell;
12
use OpenSpout\Common\Helper\Escaper\XLSX;
13
use OpenSpout\Reader\Exception\InvalidValueException;
14
use OpenSpout\Reader\XLSX\Manager\SharedStringsManager;
15
use OpenSpout\Reader\XLSX\Manager\StyleManagerInterface;
16
 
17
/**
18
 * This class provides helper functions to format cell values.
19
 */
1441 ariadna 20
final readonly class CellValueFormatter
1 efrain 21
{
22
    /**
23
     * Definition of all possible cell types.
24
     */
25
    public const CELL_TYPE_INLINE_STRING = 'inlineStr';
26
    public const CELL_TYPE_STR = 'str';
27
    public const CELL_TYPE_SHARED_STRING = 's';
28
    public const CELL_TYPE_BOOLEAN = 'b';
29
    public const CELL_TYPE_NUMERIC = 'n';
30
    public const CELL_TYPE_DATE = 'd';
31
    public const CELL_TYPE_ERROR = 'e';
32
 
33
    /**
34
     * Definition of XML nodes names used to parse data.
35
     */
36
    public const XML_NODE_VALUE = 'v';
37
    public const XML_NODE_INLINE_STRING_VALUE = 't';
38
    public const XML_NODE_FORMULA = 'f';
39
 
40
    /**
41
     * Definition of XML attributes used to parse data.
42
     */
43
    public const XML_ATTRIBUTE_TYPE = 't';
44
    public const XML_ATTRIBUTE_STYLE_ID = 's';
45
 
46
    /**
47
     * Constants used for date formatting.
48
     */
49
    public const NUM_SECONDS_IN_ONE_DAY = 86400;
50
 
51
    /** @var SharedStringsManager Manages shared strings */
1441 ariadna 52
    private SharedStringsManager $sharedStringsManager;
1 efrain 53
 
54
    /** @var StyleManagerInterface Manages styles */
1441 ariadna 55
    private StyleManagerInterface $styleManager;
1 efrain 56
 
57
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
1441 ariadna 58
    private bool $shouldFormatDates;
1 efrain 59
 
60
    /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
1441 ariadna 61
    private bool $shouldUse1904Dates;
1 efrain 62
 
63
    /** @var XLSX Used to unescape XML data */
1441 ariadna 64
    private XLSX $escaper;
1 efrain 65
 
66
    /**
67
     * @param SharedStringsManager  $sharedStringsManager Manages shared strings
68
     * @param StyleManagerInterface $styleManager         Manages styles
69
     * @param bool                  $shouldFormatDates    Whether date/time values should be returned as PHP objects or be formatted as strings
70
     * @param bool                  $shouldUse1904Dates   Whether date/time values should use a calendar starting in 1904 instead of 1900
71
     * @param XLSX                  $escaper              Used to unescape XML data
72
     */
73
    public function __construct(
74
        SharedStringsManager $sharedStringsManager,
75
        StyleManagerInterface $styleManager,
76
        bool $shouldFormatDates,
77
        bool $shouldUse1904Dates,
78
        XLSX $escaper
79
    ) {
80
        $this->sharedStringsManager = $sharedStringsManager;
81
        $this->styleManager = $styleManager;
82
        $this->shouldFormatDates = $shouldFormatDates;
83
        $this->shouldUse1904Dates = $shouldUse1904Dates;
84
        $this->escaper = $escaper;
85
    }
86
 
87
    /**
88
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
89
     */
90
    public function extractAndFormatNodeValue(DOMElement $node): Cell
91
    {
92
        // Default cell type is "n"
93
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
94
        if ('' === $cellType) {
95
            $cellType = self::CELL_TYPE_NUMERIC;
96
        }
97
        $vNodeValue = $this->getVNodeValue($node);
98
 
1441 ariadna 99
        $fNodeValue = $node->getElementsByTagName(self::XML_NODE_FORMULA)->item(0)?->nodeValue;
100
        if (null !== $fNodeValue) {
101
            $computedValue = $this->formatRawValueForCellType($cellType, $node, $vNodeValue);
1 efrain 102
 
1441 ariadna 103
            return new Cell\FormulaCell(
104
                '='.$fNodeValue,
105
                null,
106
                $computedValue instanceof Cell\ErrorCell ? null : $computedValue
107
            );
1 efrain 108
        }
109
 
110
        if ('' === $vNodeValue && self::CELL_TYPE_INLINE_STRING !== $cellType) {
111
            return Cell::fromValue($vNodeValue);
112
        }
113
 
1441 ariadna 114
        $rawValue = $this->formatRawValueForCellType($cellType, $node, $vNodeValue);
1 efrain 115
 
116
        if ($rawValue instanceof Cell) {
117
            return $rawValue;
118
        }
119
 
120
        return Cell::fromValue($rawValue);
121
    }
122
 
123
    /**
124
     * Returns the cell's string value from a node's nested value node.
125
     *
126
     * @return string The value associated with the cell
127
     */
128
    private function getVNodeValue(DOMElement $node): string
129
    {
130
        // for cell types having a "v" tag containing the value.
131
        // if not, the returned value should be empty string.
132
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
133
 
134
        return (string) $vNode?->nodeValue;
135
    }
136
 
137
    /**
138
     * Returns the cell String value where string is inline.
139
     *
140
     * @return string The value associated with the cell
141
     */
142
    private function formatInlineStringCellValue(DOMElement $node): string
143
    {
144
        // inline strings are formatted this way (they can contain any number of <t> nodes):
145
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c>
146
        $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE);
147
 
148
        $cellValue = '';
149
        for ($i = 0; $i < $tNodes->count(); ++$i) {
150
            $nodeValue = $tNodes->item($i)->nodeValue;
151
            \assert(null !== $nodeValue);
152
            $cellValue .= $this->escaper->unescape($nodeValue);
153
        }
154
 
155
        return $cellValue;
156
    }
157
 
158
    /**
159
     * Returns the cell String value from shared-strings file using nodeValue index.
160
     *
161
     * @return string The value associated with the cell
162
     */
163
    private function formatSharedStringCellValue(string $nodeValue): string
164
    {
165
        // shared strings are formatted this way:
166
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
167
        $sharedStringIndex = (int) $nodeValue;
168
        $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
169
 
170
        return $this->escaper->unescape($escapedCellValue);
171
    }
172
 
173
    /**
174
     * Returns the cell String value, where string is stored in value node.
175
     *
176
     * @return string The value associated with the cell
177
     */
178
    private function formatStrCellValue(string $nodeValue): string
179
    {
180
        $escapedCellValue = trim($nodeValue);
181
 
182
        return $this->escaper->unescape($escapedCellValue);
183
    }
184
 
185
    /**
186
     * Returns the cell Numeric value from string of nodeValue.
187
     * The value can also represent a timestamp and a DateTime will be returned.
188
     *
189
     * @param int $cellStyleId 0 being the default style
190
     */
191
    private function formatNumericCellValue(float|int|string $nodeValue, int $cellStyleId): DateInterval|DateTimeImmutable|float|int|string
192
    {
193
        // Numeric values can represent numbers as well as timestamps.
194
        // We need to look at the style of the cell to determine whether it is one or the other.
195
        $formatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
196
 
197
        if (DateIntervalFormatHelper::isDurationFormat($formatCode)) {
198
            $cellValue = $this->formatExcelDateIntervalValue((float) $nodeValue, $formatCode);
199
        } elseif ($this->styleManager->shouldFormatNumericValueAsDate($cellStyleId)) {
200
            $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
201
        } else {
202
            $nodeIntValue = (int) $nodeValue;
203
            $nodeFloatValue = (float) $nodeValue;
204
            $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
205
        }
206
 
207
        return $cellValue;
208
    }
209
 
210
    private function formatExcelDateIntervalValue(float $nodeValue, string $excelFormat): DateInterval|string
211
    {
212
        $dateInterval = DateIntervalFormatHelper::createDateIntervalFromHours($nodeValue);
213
        if ($this->shouldFormatDates) {
214
            return DateIntervalFormatHelper::formatDateInterval($dateInterval, $excelFormat);
215
        }
216
 
217
        return $dateInterval;
218
    }
219
 
220
    /**
221
     * Returns a cell's PHP Date value, associated to the given timestamp.
222
     * NOTE: The timestamp is a float representing the number of days since the base Excel date:
223
     *       Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
224
     * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
225
     *
226
     * @param int $cellStyleId 0 being the default style
227
     *
228
     * @throws InvalidValueException If the value is not a valid timestamp
229
     *
230
     * @see ECMA-376 Part 1 - §18.17.4
231
     */
232
    private function formatExcelTimestampValue(float $nodeValue, int $cellStyleId): DateTimeImmutable|string
233
    {
234
        if (!$this->isValidTimestampValue($nodeValue)) {
235
            throw new InvalidValueException((string) $nodeValue);
236
        }
237
 
238
        return $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
239
    }
240
 
241
    /**
242
     * Returns whether the given timestamp is supported by SpreadsheetML.
243
     *
244
     * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
245
     */
246
    private function isValidTimestampValue(float $timestampValue): bool
247
    {
248
        // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
249
        return
250
            $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884
251
            || !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884;
252
    }
253
 
254
    /**
255
     * Returns a cell's PHP DateTime value, associated to the given timestamp.
256
     * Only the time value matters. The date part is set to the base Excel date:
257
     * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
258
     *
259
     * @param int $cellStyleId 0 being the default style
260
     */
261
    private function formatExcelTimestampValueAsDateTimeValue(float $nodeValue, int $cellStyleId): DateTimeImmutable|string
262
    {
263
        $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
264
 
265
        $daysSinceBaseDate = (int) $nodeValue;
266
        $daysSign = '+';
267
        if ($daysSinceBaseDate < 0) {
268
            $daysSinceBaseDate = abs($daysSinceBaseDate);
269
            $daysSign = '-';
270
        }
271
        $timeRemainder = fmod($nodeValue, 1);
272
        $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
273
        $secondsSign = '+';
274
        if ($secondsRemainder < 0) {
275
            $secondsRemainder = abs($secondsRemainder);
276
            $secondsSign = '-';
277
        }
278
 
279
        $dateObj = DateTimeImmutable::createFromFormat('|Y-m-d', $baseDate);
280
        \assert(false !== $dateObj);
281
        $dateObj = $dateObj->modify($daysSign.$daysSinceBaseDate.'days');
282
        \assert(false !== $dateObj);
283
        $dateObj = $dateObj->modify($secondsSign.$secondsRemainder.'seconds');
284
        \assert(false !== $dateObj);
285
 
286
        if ($this->shouldFormatDates) {
287
            $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
288
            $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
289
            $cellValue = $dateObj->format($phpDateFormat);
290
        } else {
291
            $cellValue = $dateObj;
292
        }
293
 
294
        return $cellValue;
295
    }
296
 
297
    /**
298
     * Returns the cell Boolean value from a specific node's Value.
299
     *
300
     * @return bool The value associated with the cell
301
     */
302
    private function formatBooleanCellValue(string $nodeValue): bool
303
    {
304
        return (bool) $nodeValue;
305
    }
306
 
307
    /**
308
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
309
     *
310
     * @see ECMA-376 Part 1 - §18.17.4
311
     *
312
     * @param string $nodeValue ISO 8601 Date string
313
     */
314
    private function formatDateCellValue(string $nodeValue): Cell\ErrorCell|DateTimeImmutable|string
315
    {
316
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
317
        try {
318
            $cellValue = ($this->shouldFormatDates) ? $nodeValue : new DateTimeImmutable($nodeValue);
319
        } catch (Exception) {
320
            return new Cell\ErrorCell($nodeValue, null);
321
        }
322
 
323
        return $cellValue;
324
    }
1441 ariadna 325
 
326
    private function formatRawValueForCellType(
327
        string $cellType,
328
        DOMElement $node,
329
        string $vNodeValue
330
    ): bool|Cell\ErrorCell|DateInterval|DateTimeImmutable|float|int|string {
331
        return match ($cellType) {
332
            self::CELL_TYPE_INLINE_STRING => $this->formatInlineStringCellValue($node),
333
            self::CELL_TYPE_SHARED_STRING => $this->formatSharedStringCellValue($vNodeValue),
334
            self::CELL_TYPE_STR => $this->formatStrCellValue($vNodeValue),
335
            self::CELL_TYPE_BOOLEAN => $this->formatBooleanCellValue($vNodeValue),
336
            self::CELL_TYPE_NUMERIC => $this->formatNumericCellValue(
337
                $vNodeValue,
338
                (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID)
339
            ),
340
            self::CELL_TYPE_DATE => $this->formatDateCellValue($vNodeValue),
341
            default => new Cell\ErrorCell($vNodeValue, null),
342
        };
343
    }
1 efrain 344
}