Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Common\Helper;
6
 
7
use Error;
8
use OpenSpout\Common\Exception\EncodingConversionException;
9
 
10
/**
11
 * @internal
12
 */
13
final class EncodingHelper
14
{
15
    /**
16
     * Definition of the encodings that can have a BOM.
17
     */
18
    public const ENCODING_UTF8 = 'UTF-8';
19
    public const ENCODING_UTF16_LE = 'UTF-16LE';
20
    public const ENCODING_UTF16_BE = 'UTF-16BE';
21
    public const ENCODING_UTF32_LE = 'UTF-32LE';
22
    public const ENCODING_UTF32_BE = 'UTF-32BE';
23
 
24
    /**
25
     * Definition of the BOMs for the different encodings.
26
     */
27
    public const BOM_UTF8 = "\xEF\xBB\xBF";
28
    public const BOM_UTF16_LE = "\xFF\xFE";
29
    public const BOM_UTF16_BE = "\xFE\xFF";
30
    public const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
31
    public const BOM_UTF32_BE = "\x00\x00\xFE\xFF";
32
 
33
    /** @var array<string, string> Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
34
    private array $supportedEncodingsWithBom;
35
 
36
    private readonly bool $canUseIconv;
37
 
38
    private readonly bool $canUseMbString;
39
 
40
    public function __construct(bool $canUseIconv, bool $canUseMbString)
41
    {
42
        $this->canUseIconv = $canUseIconv;
43
        $this->canUseMbString = $canUseMbString;
44
 
45
        $this->supportedEncodingsWithBom = [
46
            self::ENCODING_UTF8 => self::BOM_UTF8,
47
            self::ENCODING_UTF16_LE => self::BOM_UTF16_LE,
48
            self::ENCODING_UTF16_BE => self::BOM_UTF16_BE,
49
            self::ENCODING_UTF32_LE => self::BOM_UTF32_LE,
50
            self::ENCODING_UTF32_BE => self::BOM_UTF32_BE,
51
        ];
52
    }
53
 
54
    public static function factory(): self
55
    {
56
        return new self(
57
            \function_exists('iconv'),
58
            \function_exists('mb_convert_encoding'),
59
        );
60
    }
61
 
62
    /**
63
     * Returns the number of bytes to use as offset in order to skip the BOM.
64
     *
65
     * @param resource $filePointer Pointer to the file to check
66
     * @param string   $encoding    Encoding of the file to check
67
     *
68
     * @return int Bytes offset to apply to skip the BOM (0 means no BOM)
69
     */
70
    public function getBytesOffsetToSkipBOM($filePointer, string $encoding): int
71
    {
72
        $byteOffsetToSkipBom = 0;
73
 
74
        if ($this->hasBOM($filePointer, $encoding)) {
75
            $bomUsed = $this->supportedEncodingsWithBom[$encoding];
76
 
77
            // we skip the N first bytes
78
            $byteOffsetToSkipBom = \strlen($bomUsed);
79
        }
80
 
81
        return $byteOffsetToSkipBom;
82
    }
83
 
84
    /**
85
     * Attempts to convert a non UTF-8 string into UTF-8.
86
     *
87
     * @param string $string         Non UTF-8 string to be converted
88
     * @param string $sourceEncoding The encoding used to encode the source string
89
     *
90
     * @return string The converted, UTF-8 string
91
     *
92
     * @throws EncodingConversionException If conversion is not supported or if the conversion failed
93
     */
94
    public function attemptConversionToUTF8(?string $string, string $sourceEncoding): ?string
95
    {
96
        return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8);
97
    }
98
 
99
    /**
100
     * Attempts to convert a UTF-8 string into the given encoding.
101
     *
102
     * @param string $string         UTF-8 string to be converted
103
     * @param string $targetEncoding The encoding the string should be re-encoded into
104
     *
105
     * @return string The converted string, encoded with the given encoding
106
     *
107
     * @throws EncodingConversionException If conversion is not supported or if the conversion failed
108
     */
109
    public function attemptConversionFromUTF8(?string $string, string $targetEncoding): ?string
110
    {
111
        return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding);
112
    }
113
 
114
    /**
115
     * Returns whether the file identified by the given pointer has a BOM.
116
     *
117
     * @param resource $filePointer Pointer to the file to check
118
     * @param string   $encoding    Encoding of the file to check
119
     *
120
     * @return bool TRUE if the file has a BOM, FALSE otherwise
121
     */
122
    private function hasBOM($filePointer, string $encoding): bool
123
    {
124
        $hasBOM = false;
125
 
126
        rewind($filePointer);
127
 
128
        if (\array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
129
            $potentialBom = $this->supportedEncodingsWithBom[$encoding];
130
            $numBytesInBom = \strlen($potentialBom);
131
 
132
            $hasBOM = (fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
133
        }
134
 
135
        return $hasBOM;
136
    }
137
 
138
    /**
139
     * Attempts to convert the given string to the given encoding.
140
     * Depending on what is installed on the server, we will try to iconv or mbstring.
141
     *
142
     * @param string $string         string to be converted
143
     * @param string $sourceEncoding The encoding used to encode the source string
144
     * @param string $targetEncoding The encoding the string should be re-encoded into
145
     *
146
     * @return string The converted string, encoded with the given encoding
147
     *
148
     * @throws EncodingConversionException If conversion is not supported or if the conversion failed
149
     */
150
    private function attemptConversion(?string $string, string $sourceEncoding, string $targetEncoding): ?string
151
    {
152
        // if source and target encodings are the same, it's a no-op
153
        if (null === $string || $sourceEncoding === $targetEncoding) {
154
            return $string;
155
        }
156
 
157
        $convertedString = null;
158
 
159
        if ($this->canUseIconv) {
160
            set_error_handler(static function (): bool {
161
                return true;
162
            });
163
 
164
            $convertedString = iconv($sourceEncoding, $targetEncoding, $string);
165
 
166
            restore_error_handler();
167
        } elseif ($this->canUseMbString) {
168
            $errorMessage = null;
169
            set_error_handler(static function ($nr, $message) use (&$errorMessage): bool {
170
                $errorMessage = $message; // @codeCoverageIgnore
171
 
172
                return true; // @codeCoverageIgnore
173
            });
174
 
175
            try {
176
                $convertedString = mb_convert_encoding($string, $targetEncoding, $sourceEncoding);
177
            } catch (Error $error) {
178
                $errorMessage = $error->getMessage();
179
            }
180
 
181
            restore_error_handler();
182
            if (null !== $errorMessage) {
183
                $convertedString = false;
184
            }
185
        } else {
186
            throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} is not supported. Please install \"iconv\" or \"mbstring\".");
187
        }
188
 
189
        if (false === $convertedString) {
190
            throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} failed.");
191
        }
192
 
193
        return $convertedString;
194
    }
195
}