Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Reader\XLSX\Helper;
6
 
7
use DateInterval;
8
use DateTimeImmutable;
9
use DOMElement;
10
use Exception;
11
use OpenSpout\Common\Entity\Cell;
12
use OpenSpout\Common\Helper\Escaper\XLSX;
13
use OpenSpout\Reader\Exception\InvalidValueException;
14
use OpenSpout\Reader\XLSX\Manager\SharedStringsManager;
15
use OpenSpout\Reader\XLSX\Manager\StyleManagerInterface;
16
 
17
/**
18
 * This class provides helper functions to format cell values.
19
 */
20
final class CellValueFormatter
21
{
22
    /**
23
     * Definition of all possible cell types.
24
     */
25
    public const CELL_TYPE_INLINE_STRING = 'inlineStr';
26
    public const CELL_TYPE_STR = 'str';
27
    public const CELL_TYPE_SHARED_STRING = 's';
28
    public const CELL_TYPE_BOOLEAN = 'b';
29
    public const CELL_TYPE_NUMERIC = 'n';
30
    public const CELL_TYPE_DATE = 'd';
31
    public const CELL_TYPE_ERROR = 'e';
32
 
33
    /**
34
     * Definition of XML nodes names used to parse data.
35
     */
36
    public const XML_NODE_VALUE = 'v';
37
    public const XML_NODE_INLINE_STRING_VALUE = 't';
38
    public const XML_NODE_FORMULA = 'f';
39
 
40
    /**
41
     * Definition of XML attributes used to parse data.
42
     */
43
    public const XML_ATTRIBUTE_TYPE = 't';
44
    public const XML_ATTRIBUTE_STYLE_ID = 's';
45
 
46
    /**
47
     * Constants used for date formatting.
48
     */
49
    public const NUM_SECONDS_IN_ONE_DAY = 86400;
50
 
51
    /** @var SharedStringsManager Manages shared strings */
52
    private readonly SharedStringsManager $sharedStringsManager;
53
 
54
    /** @var StyleManagerInterface Manages styles */
55
    private readonly StyleManagerInterface $styleManager;
56
 
57
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
58
    private readonly bool $shouldFormatDates;
59
 
60
    /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
61
    private readonly bool $shouldUse1904Dates;
62
 
63
    /** @var XLSX Used to unescape XML data */
64
    private readonly XLSX $escaper;
65
 
66
    /**
67
     * @param SharedStringsManager  $sharedStringsManager Manages shared strings
68
     * @param StyleManagerInterface $styleManager         Manages styles
69
     * @param bool                  $shouldFormatDates    Whether date/time values should be returned as PHP objects or be formatted as strings
70
     * @param bool                  $shouldUse1904Dates   Whether date/time values should use a calendar starting in 1904 instead of 1900
71
     * @param XLSX                  $escaper              Used to unescape XML data
72
     */
73
    public function __construct(
74
        SharedStringsManager $sharedStringsManager,
75
        StyleManagerInterface $styleManager,
76
        bool $shouldFormatDates,
77
        bool $shouldUse1904Dates,
78
        XLSX $escaper
79
    ) {
80
        $this->sharedStringsManager = $sharedStringsManager;
81
        $this->styleManager = $styleManager;
82
        $this->shouldFormatDates = $shouldFormatDates;
83
        $this->shouldUse1904Dates = $shouldUse1904Dates;
84
        $this->escaper = $escaper;
85
    }
86
 
87
    /**
88
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
89
     */
90
    public function extractAndFormatNodeValue(DOMElement $node): Cell
91
    {
92
        // Default cell type is "n"
93
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
94
        if ('' === $cellType) {
95
            $cellType = self::CELL_TYPE_NUMERIC;
96
        }
97
        $vNodeValue = $this->getVNodeValue($node);
98
 
99
        if (self::CELL_TYPE_NUMERIC === $cellType) {
100
            $fNodeValue = $node->getElementsByTagName(self::XML_NODE_FORMULA)->item(0)?->nodeValue;
101
            if (null !== $fNodeValue) {
102
                $computedValue = $this->formatNumericCellValue($vNodeValue, (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID));
103
 
104
                return new Cell\FormulaCell('='.$fNodeValue, null, $computedValue);
105
            }
106
        }
107
 
108
        if ('' === $vNodeValue && self::CELL_TYPE_INLINE_STRING !== $cellType) {
109
            return Cell::fromValue($vNodeValue);
110
        }
111
 
112
        $rawValue = match ($cellType) {
113
            self::CELL_TYPE_INLINE_STRING => $this->formatInlineStringCellValue($node),
114
            self::CELL_TYPE_SHARED_STRING => $this->formatSharedStringCellValue($vNodeValue),
115
            self::CELL_TYPE_STR => $this->formatStrCellValue($vNodeValue),
116
            self::CELL_TYPE_BOOLEAN => $this->formatBooleanCellValue($vNodeValue),
117
            self::CELL_TYPE_NUMERIC => $this->formatNumericCellValue($vNodeValue, (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID)),
118
            self::CELL_TYPE_DATE => $this->formatDateCellValue($vNodeValue),
119
            default => new Cell\ErrorCell($vNodeValue, null),
120
        };
121
 
122
        if ($rawValue instanceof Cell) {
123
            return $rawValue;
124
        }
125
 
126
        return Cell::fromValue($rawValue);
127
    }
128
 
129
    /**
130
     * Returns the cell's string value from a node's nested value node.
131
     *
132
     * @return string The value associated with the cell
133
     */
134
    private function getVNodeValue(DOMElement $node): string
135
    {
136
        // for cell types having a "v" tag containing the value.
137
        // if not, the returned value should be empty string.
138
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
139
 
140
        return (string) $vNode?->nodeValue;
141
    }
142
 
143
    /**
144
     * Returns the cell String value where string is inline.
145
     *
146
     * @return string The value associated with the cell
147
     */
148
    private function formatInlineStringCellValue(DOMElement $node): string
149
    {
150
        // inline strings are formatted this way (they can contain any number of <t> nodes):
151
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c>
152
        $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE);
153
 
154
        $cellValue = '';
155
        for ($i = 0; $i < $tNodes->count(); ++$i) {
156
            $nodeValue = $tNodes->item($i)->nodeValue;
157
            \assert(null !== $nodeValue);
158
            $cellValue .= $this->escaper->unescape($nodeValue);
159
        }
160
 
161
        return $cellValue;
162
    }
163
 
164
    /**
165
     * Returns the cell String value from shared-strings file using nodeValue index.
166
     *
167
     * @return string The value associated with the cell
168
     */
169
    private function formatSharedStringCellValue(string $nodeValue): string
170
    {
171
        // shared strings are formatted this way:
172
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
173
        $sharedStringIndex = (int) $nodeValue;
174
        $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
175
 
176
        return $this->escaper->unescape($escapedCellValue);
177
    }
178
 
179
    /**
180
     * Returns the cell String value, where string is stored in value node.
181
     *
182
     * @return string The value associated with the cell
183
     */
184
    private function formatStrCellValue(string $nodeValue): string
185
    {
186
        $escapedCellValue = trim($nodeValue);
187
 
188
        return $this->escaper->unescape($escapedCellValue);
189
    }
190
 
191
    /**
192
     * Returns the cell Numeric value from string of nodeValue.
193
     * The value can also represent a timestamp and a DateTime will be returned.
194
     *
195
     * @param int $cellStyleId 0 being the default style
196
     */
197
    private function formatNumericCellValue(float|int|string $nodeValue, int $cellStyleId): DateInterval|DateTimeImmutable|float|int|string
198
    {
199
        // Numeric values can represent numbers as well as timestamps.
200
        // We need to look at the style of the cell to determine whether it is one or the other.
201
        $formatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
202
 
203
        if (DateIntervalFormatHelper::isDurationFormat($formatCode)) {
204
            $cellValue = $this->formatExcelDateIntervalValue((float) $nodeValue, $formatCode);
205
        } elseif ($this->styleManager->shouldFormatNumericValueAsDate($cellStyleId)) {
206
            $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
207
        } else {
208
            $nodeIntValue = (int) $nodeValue;
209
            $nodeFloatValue = (float) $nodeValue;
210
            $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
211
        }
212
 
213
        return $cellValue;
214
    }
215
 
216
    private function formatExcelDateIntervalValue(float $nodeValue, string $excelFormat): DateInterval|string
217
    {
218
        $dateInterval = DateIntervalFormatHelper::createDateIntervalFromHours($nodeValue);
219
        if ($this->shouldFormatDates) {
220
            return DateIntervalFormatHelper::formatDateInterval($dateInterval, $excelFormat);
221
        }
222
 
223
        return $dateInterval;
224
    }
225
 
226
    /**
227
     * Returns a cell's PHP Date value, associated to the given timestamp.
228
     * NOTE: The timestamp is a float representing the number of days since the base Excel date:
229
     *       Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
230
     * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
231
     *
232
     * @param int $cellStyleId 0 being the default style
233
     *
234
     * @throws InvalidValueException If the value is not a valid timestamp
235
     *
236
     * @see ECMA-376 Part 1 - §18.17.4
237
     */
238
    private function formatExcelTimestampValue(float $nodeValue, int $cellStyleId): DateTimeImmutable|string
239
    {
240
        if (!$this->isValidTimestampValue($nodeValue)) {
241
            throw new InvalidValueException((string) $nodeValue);
242
        }
243
 
244
        return $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
245
    }
246
 
247
    /**
248
     * Returns whether the given timestamp is supported by SpreadsheetML.
249
     *
250
     * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
251
     */
252
    private function isValidTimestampValue(float $timestampValue): bool
253
    {
254
        // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
255
        return
256
            $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884
257
            || !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884;
258
    }
259
 
260
    /**
261
     * Returns a cell's PHP DateTime value, associated to the given timestamp.
262
     * Only the time value matters. The date part is set to the base Excel date:
263
     * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
264
     *
265
     * @param int $cellStyleId 0 being the default style
266
     */
267
    private function formatExcelTimestampValueAsDateTimeValue(float $nodeValue, int $cellStyleId): DateTimeImmutable|string
268
    {
269
        $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
270
 
271
        $daysSinceBaseDate = (int) $nodeValue;
272
        $daysSign = '+';
273
        if ($daysSinceBaseDate < 0) {
274
            $daysSinceBaseDate = abs($daysSinceBaseDate);
275
            $daysSign = '-';
276
        }
277
        $timeRemainder = fmod($nodeValue, 1);
278
        $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
279
        $secondsSign = '+';
280
        if ($secondsRemainder < 0) {
281
            $secondsRemainder = abs($secondsRemainder);
282
            $secondsSign = '-';
283
        }
284
 
285
        $dateObj = DateTimeImmutable::createFromFormat('|Y-m-d', $baseDate);
286
        \assert(false !== $dateObj);
287
        $dateObj = $dateObj->modify($daysSign.$daysSinceBaseDate.'days');
288
        \assert(false !== $dateObj);
289
        $dateObj = $dateObj->modify($secondsSign.$secondsRemainder.'seconds');
290
        \assert(false !== $dateObj);
291
 
292
        if ($this->shouldFormatDates) {
293
            $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
294
            $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
295
            $cellValue = $dateObj->format($phpDateFormat);
296
        } else {
297
            $cellValue = $dateObj;
298
        }
299
 
300
        return $cellValue;
301
    }
302
 
303
    /**
304
     * Returns the cell Boolean value from a specific node's Value.
305
     *
306
     * @return bool The value associated with the cell
307
     */
308
    private function formatBooleanCellValue(string $nodeValue): bool
309
    {
310
        return (bool) $nodeValue;
311
    }
312
 
313
    /**
314
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
315
     *
316
     * @see ECMA-376 Part 1 - §18.17.4
317
     *
318
     * @param string $nodeValue ISO 8601 Date string
319
     */
320
    private function formatDateCellValue(string $nodeValue): Cell\ErrorCell|DateTimeImmutable|string
321
    {
322
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
323
        try {
324
            $cellValue = ($this->shouldFormatDates) ? $nodeValue : new DateTimeImmutable($nodeValue);
325
        } catch (Exception) {
326
            return new Cell\ErrorCell($nodeValue, null);
327
        }
328
 
329
        return $cellValue;
330
    }
331
}