Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Common\Helper\Escaper;
6
 
7
/**
8
 * @internal
9
 */
10
final class XLSX implements EscaperInterface
11
{
12
    /** @var bool Whether the escaper has already been initialized */
13
    private bool $isAlreadyInitialized = false;
14
 
15
    /** @var string Regex pattern to detect control characters that need to be escaped */
16
    private string $escapableControlCharactersPattern;
17
 
18
    /** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
19
    private array $controlCharactersEscapingMap;
20
 
21
    /** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
22
    private array $controlCharactersEscapingReverseMap;
23
 
24
    /**
25
     * Escapes the given string to make it compatible with XLSX.
26
     *
27
     * @param string $string The string to escape
28
     *
29
     * @return string The escaped string
30
     */
31
    public function escape(string $string): string
32
    {
33
        $this->initIfNeeded();
34
 
35
        $escapedString = $this->escapeControlCharacters($string);
36
 
37
        // @NOTE: Using ENT_QUOTES as XML entities ('<', '>', '&') as well as
38
        //        single/double quotes (for XML attributes) need to be encoded.
39
        return htmlspecialchars($escapedString, ENT_QUOTES, 'UTF-8');
40
    }
41
 
42
    /**
43
     * Unescapes the given string to make it compatible with XLSX.
44
     *
45
     * @param string $string The string to unescape
46
     *
47
     * @return string The unescaped string
48
     */
49
    public function unescape(string $string): string
50
    {
51
        $this->initIfNeeded();
52
 
53
        // ==============
54
        // =   WARNING  =
55
        // ==============
56
        // It is assumed that the given string has already had its XML entities decoded.
57
        // This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation).
58
        // Therefore there is no need to call "htmlspecialchars_decode()".
59
        return $this->unescapeControlCharacters($string);
60
    }
61
 
62
    /**
63
     * Initializes the control characters if not already done.
64
     */
65
    private function initIfNeeded(): void
66
    {
67
        if (!$this->isAlreadyInitialized) {
68
            $this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
69
            $this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
70
            $this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap);
71
 
72
            $this->isAlreadyInitialized = true;
73
        }
74
    }
75
 
76
    /**
77
     * @return string Regex pattern containing all escapable control characters
78
     */
79
    private function getEscapableControlCharactersPattern(): string
80
    {
81
        // control characters values are from 0 to 1F (hex values) in the ASCII table
82
        // some characters should not be escaped though: "\t", "\r" and "\n".
83
        return '[\x00-\x08'.
84
                // skipping "\t" (0x9) and "\n" (0xA)
85
                '\x0B-\x0C'.
86
                // skipping "\r" (0xD)
87
                '\x0E-\x1F]';
88
    }
89
 
90
    /**
91
     * Builds the map containing control characters to be escaped
92
     * mapped to their escaped values.
93
     * "\t", "\r" and "\n" don't need to be escaped.
94
     *
95
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
96
     *
97
     * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
98
     *
99
     * @return string[]
100
     */
101
    private function getControlCharactersEscapingMap(): array
102
    {
103
        $controlCharactersEscapingMap = [];
104
 
105
        // control characters values are from 0 to 1F (hex values) in the ASCII table
106
        for ($charValue = 0x00; $charValue <= 0x1F; ++$charValue) {
107
            $character = \chr($charValue);
108
            if (1 === preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
109
                $charHexValue = dechex($charValue);
110
                $escapedChar = '_x'.sprintf('%04s', strtoupper($charHexValue)).'_';
111
                $controlCharactersEscapingMap[$escapedChar] = $character;
112
            }
113
        }
114
 
115
        return $controlCharactersEscapingMap;
116
    }
117
 
118
    /**
119
     * Converts PHP control characters from the given string to OpenXML escaped control characters.
120
     *
121
     * Excel escapes control characters with _xHHHH_ and also escapes any
122
     * literal strings of that type by encoding the leading underscore.
123
     * So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_.
124
     *
125
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
126
     *
127
     * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
128
     *
129
     * @param string $string String to escape
130
     */
131
    private function escapeControlCharacters(string $string): string
132
    {
133
        $escapedString = $this->escapeEscapeCharacter($string);
134
 
135
        // if no control characters
136
        if (1 !== preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
137
            return $escapedString;
138
        }
139
 
140
        return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function ($matches) {
141
            return $this->controlCharactersEscapingReverseMap[$matches[0]];
142
        }, $escapedString);
143
    }
144
 
145
    /**
146
     * Escapes the escape character: "_x0000_" -> "_x005F_x0000_".
147
     *
148
     * @param string $string String to escape
149
     *
150
     * @return string The escaped string
151
     */
152
    private function escapeEscapeCharacter(string $string): string
153
    {
154
        return preg_replace('/_(x[\dA-F]{4})_/', '_x005F_$1_', $string);
155
    }
156
 
157
    /**
158
     * Converts OpenXML escaped control characters from the given string to PHP control characters.
159
     *
160
     * Excel escapes control characters with _xHHHH_ and also escapes any
161
     * literal strings of that type by encoding the leading underscore.
162
     * So "_x0000_" -> "\0" and "_x005F_x0000_" -> "_x0000_"
163
     *
164
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
165
     *
166
     * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
167
     *
168
     * @param string $string String to unescape
169
     */
170
    private function unescapeControlCharacters(string $string): string
171
    {
172
        $unescapedString = $string;
173
 
174
        foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
175
            // only unescape characters that don't contain the escaped escape character for now
176
            $unescapedString = preg_replace("/(?<!_x005F)({$escapedCharValue})/", $charValue, $unescapedString);
177
        }
178
 
179
        return $this->unescapeEscapeCharacter($unescapedString);
180
    }
181
 
182
    /**
183
     * Unecapes the escape character: "_x005F_x0000_" => "_x0000_".
184
     *
185
     * @param string $string String to unescape
186
     *
187
     * @return string The unescaped string
188
     */
189
    private function unescapeEscapeCharacter(string $string): string
190
    {
191
        return preg_replace('/_x005F(_x[\dA-F]{4}_)/', '$1', $string);
192
    }
193
}