Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Reader\ODS\Helper;
6
 
7
use DateInterval;
8
use DateTimeImmutable;
9
use DOMElement;
10
use DOMNode;
11
use DOMText;
12
use Exception;
13
use OpenSpout\Common\Helper\Escaper\ODS;
14
use OpenSpout\Reader\Exception\InvalidValueException;
15
 
16
/**
17
 * @internal
18
 */
19
final class CellValueFormatter
20
{
21
    /**
22
     * Definition of all possible cell types.
23
     */
24
    public const CELL_TYPE_STRING = 'string';
25
    public const CELL_TYPE_FLOAT = 'float';
26
    public const CELL_TYPE_BOOLEAN = 'boolean';
27
    public const CELL_TYPE_DATE = 'date';
28
    public const CELL_TYPE_TIME = 'time';
29
    public const CELL_TYPE_CURRENCY = 'currency';
30
    public const CELL_TYPE_PERCENTAGE = 'percentage';
31
    public const CELL_TYPE_VOID = 'void';
32
 
33
    /**
34
     * Definition of XML nodes names used to parse data.
35
     */
36
    public const XML_NODE_P = 'p';
37
    public const XML_NODE_TEXT_A = 'text:a';
38
    public const XML_NODE_TEXT_SPAN = 'text:span';
39
    public const XML_NODE_TEXT_S = 'text:s';
40
    public const XML_NODE_TEXT_TAB = 'text:tab';
41
    public const XML_NODE_TEXT_LINE_BREAK = 'text:line-break';
42
 
43
    /**
44
     * Definition of XML attributes used to parse data.
45
     */
46
    public const XML_ATTRIBUTE_TYPE = 'office:value-type';
47
    public const XML_ATTRIBUTE_VALUE = 'office:value';
48
    public const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value';
49
    public const XML_ATTRIBUTE_DATE_VALUE = 'office:date-value';
50
    public const XML_ATTRIBUTE_TIME_VALUE = 'office:time-value';
51
    public const XML_ATTRIBUTE_CURRENCY = 'office:currency';
52
    public const XML_ATTRIBUTE_C = 'text:c';
53
 
54
    /**
55
     * List of XML nodes representing whitespaces and their corresponding value.
56
     */
57
    private const WHITESPACE_XML_NODES = [
58
        self::XML_NODE_TEXT_S => ' ',
59
        self::XML_NODE_TEXT_TAB => "\t",
60
        self::XML_NODE_TEXT_LINE_BREAK => "\n",
61
    ];
62
 
63
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
64
    private readonly bool $shouldFormatDates;
65
 
66
    /** @var ODS Used to unescape XML data */
67
    private readonly ODS $escaper;
68
 
69
    /**
70
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
71
     * @param ODS  $escaper           Used to unescape XML data
72
     */
73
    public function __construct(bool $shouldFormatDates, ODS $escaper)
74
    {
75
        $this->shouldFormatDates = $shouldFormatDates;
76
        $this->escaper = $escaper;
77
    }
78
 
79
    /**
80
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
81
     *
82
     * @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
83
     *
84
     * @return bool|DateInterval|DateTimeImmutable|float|int|string The value associated with the cell, empty string if cell's type is void/undefined
85
     *
86
     * @throws InvalidValueException If the node value is not valid
87
     */
88
    public function extractAndFormatNodeValue(DOMElement $node): bool|DateInterval|DateTimeImmutable|float|int|string
89
    {
90
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
91
 
92
        return match ($cellType) {
93
            self::CELL_TYPE_STRING => $this->formatStringCellValue($node),
94
            self::CELL_TYPE_FLOAT => $this->formatFloatCellValue($node),
95
            self::CELL_TYPE_BOOLEAN => $this->formatBooleanCellValue($node),
96
            self::CELL_TYPE_DATE => $this->formatDateCellValue($node),
97
            self::CELL_TYPE_TIME => $this->formatTimeCellValue($node),
98
            self::CELL_TYPE_CURRENCY => $this->formatCurrencyCellValue($node),
99
            self::CELL_TYPE_PERCENTAGE => $this->formatPercentageCellValue($node),
100
            default => '',
101
        };
102
    }
103
 
104
    /**
105
     * Returns the cell String value.
106
     *
107
     * @return string The value associated with the cell
108
     */
109
    private function formatStringCellValue(DOMElement $node): string
110
    {
111
        $pNodeValues = [];
112
        $pNodes = $node->getElementsByTagName(self::XML_NODE_P);
113
 
114
        foreach ($pNodes as $pNode) {
115
            $pNodeValues[] = $this->extractTextValueFromNode($pNode);
116
        }
117
 
118
        $escapedCellValue = implode("\n", $pNodeValues);
119
 
120
        return $this->escaper->unescape($escapedCellValue);
121
    }
122
 
123
    /**
124
     * Returns the cell Numeric value from the given node.
125
     *
126
     * @return float|int The value associated with the cell
127
     */
128
    private function formatFloatCellValue(DOMElement $node): float|int
129
    {
130
        $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
131
 
132
        $nodeIntValue = (int) $nodeValue;
133
        $nodeFloatValue = (float) $nodeValue;
134
 
135
        return ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
136
    }
137
 
138
    /**
139
     * Returns the cell Boolean value from the given node.
140
     *
141
     * @return bool The value associated with the cell
142
     */
143
    private function formatBooleanCellValue(DOMElement $node): bool
144
    {
145
        return (bool) $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE);
146
    }
147
 
148
    /**
149
     * Returns the cell Date value from the given node.
150
     *
151
     * @throws InvalidValueException If the value is not a valid date
152
     */
153
    private function formatDateCellValue(DOMElement $node): DateTimeImmutable|string
154
    {
155
        // The XML node looks like this:
156
        // <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
157
        //   <text:p>05/19/16 04:39 PM</text:p>
158
        // </table:table-cell>
159
 
160
        if ($this->shouldFormatDates) {
161
            // The date is already formatted in the "p" tag
162
            $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
163
            $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
164
        } else {
165
            // otherwise, get it from the "date-value" attribute
166
            $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
167
 
168
            try {
169
                $cellValue = new DateTimeImmutable($nodeValue);
170
            } catch (Exception $previous) {
171
                throw new InvalidValueException($nodeValue, '', 0, $previous);
172
            }
173
        }
174
 
175
        return $cellValue;
176
    }
177
 
178
    /**
179
     * Returns the cell Time value from the given node.
180
     *
181
     * @return DateInterval|string The value associated with the cell
182
     *
183
     * @throws InvalidValueException If the value is not a valid time
184
     */
185
    private function formatTimeCellValue(DOMElement $node): DateInterval|string
186
    {
187
        // The XML node looks like this:
188
        // <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
189
        //   <text:p>01:24:00 PM</text:p>
190
        // </table:table-cell>
191
 
192
        if ($this->shouldFormatDates) {
193
            // The date is already formatted in the "p" tag
194
            $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
195
            $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
196
        } else {
197
            // otherwise, get it from the "time-value" attribute
198
            $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
199
 
200
            try {
201
                $cellValue = new DateInterval($nodeValue);
202
            } catch (Exception $previous) {
203
                throw new InvalidValueException($nodeValue, '', 0, $previous);
204
            }
205
        }
206
 
207
        return $cellValue;
208
    }
209
 
210
    /**
211
     * Returns the cell Currency value from the given node.
212
     *
213
     * @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR")
214
     */
215
    private function formatCurrencyCellValue(DOMElement $node): string
216
    {
217
        $value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
218
        $currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY);
219
 
220
        return "{$value} {$currency}";
221
    }
222
 
223
    /**
224
     * Returns the cell Percentage value from the given node.
225
     *
226
     * @return float|int The value associated with the cell
227
     */
228
    private function formatPercentageCellValue(DOMElement $node): float|int
229
    {
230
        // percentages are formatted like floats
231
        return $this->formatFloatCellValue($node);
232
    }
233
 
234
    private function extractTextValueFromNode(DOMNode $pNode): string
235
    {
236
        $textValue = '';
237
 
238
        foreach ($pNode->childNodes as $childNode) {
239
            if ($childNode instanceof DOMText) {
240
                $textValue .= $childNode->nodeValue;
241
            } elseif ($this->isWhitespaceNode($childNode->nodeName) && $childNode instanceof DOMElement) {
242
                $textValue .= $this->transformWhitespaceNode($childNode);
243
            } elseif (self::XML_NODE_TEXT_A === $childNode->nodeName || self::XML_NODE_TEXT_SPAN === $childNode->nodeName) {
244
                $textValue .= $this->extractTextValueFromNode($childNode);
245
            }
246
        }
247
 
248
        return $textValue;
249
    }
250
 
251
    /**
252
     * Returns whether the given node is a whitespace node. It must be one of these:
253
     *  - <text:s />
254
     *  - <text:tab />
255
     *  - <text:line-break />.
256
     */
257
    private function isWhitespaceNode(string $nodeName): bool
258
    {
259
        return isset(self::WHITESPACE_XML_NODES[$nodeName]);
260
    }
261
 
262
    /**
263
     * The "<text:p>" node can contain the string value directly
264
     * or contain child elements. In this case, whitespaces contain in
265
     * the child elements should be replaced by their XML equivalent:
266
     *  - space => <text:s />
267
     *  - tab => <text:tab />
268
     *  - line break => <text:line-break />.
269
     *
270
     * @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949
271
     *
272
     * @param DOMElement $node The XML node representing a whitespace
273
     *
274
     * @return string The corresponding whitespace value
275
     */
276
    private function transformWhitespaceNode(DOMElement $node): string
277
    {
278
        $countAttribute = $node->getAttribute(self::XML_ATTRIBUTE_C); // only defined for "<text:s>"
279
        $numWhitespaces = '' !== $countAttribute ? (int) $countAttribute : 1;
280
 
281
        return str_repeat(self::WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces);
282
    }
283
}