1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
namespace PhpOffice\PhpSpreadsheet\Shared;
|
|
|
4 |
|
|
|
5 |
use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
|
|
|
6 |
|
|
|
7 |
class CodePage
|
|
|
8 |
{
|
|
|
9 |
public const DEFAULT_CODE_PAGE = 'CP1252';
|
|
|
10 |
|
|
|
11 |
/** @var array */
|
|
|
12 |
private static $pageArray = [
|
|
|
13 |
|
|
|
14 |
367 => 'ASCII', // ASCII
|
|
|
15 |
437 => 'CP437', // OEM US
|
|
|
16 |
//720 => 'notsupported', // OEM Arabic
|
|
|
17 |
737 => 'CP737', // OEM Greek
|
|
|
18 |
775 => 'CP775', // OEM Baltic
|
|
|
19 |
850 => 'CP850', // OEM Latin I
|
|
|
20 |
852 => 'CP852', // OEM Latin II (Central European)
|
|
|
21 |
855 => 'CP855', // OEM Cyrillic
|
|
|
22 |
857 => 'CP857', // OEM Turkish
|
|
|
23 |
858 => 'CP858', // OEM Multilingual Latin I with Euro
|
|
|
24 |
860 => 'CP860', // OEM Portugese
|
|
|
25 |
861 => 'CP861', // OEM Icelandic
|
|
|
26 |
862 => 'CP862', // OEM Hebrew
|
|
|
27 |
863 => 'CP863', // OEM Canadian (French)
|
|
|
28 |
864 => 'CP864', // OEM Arabic
|
|
|
29 |
865 => 'CP865', // OEM Nordic
|
|
|
30 |
866 => 'CP866', // OEM Cyrillic (Russian)
|
|
|
31 |
869 => 'CP869', // OEM Greek (Modern)
|
|
|
32 |
874 => 'CP874', // ANSI Thai
|
|
|
33 |
932 => 'CP932', // ANSI Japanese Shift-JIS
|
|
|
34 |
936 => 'CP936', // ANSI Chinese Simplified GBK
|
|
|
35 |
949 => 'CP949', // ANSI Korean (Wansung)
|
|
|
36 |
950 => 'CP950', // ANSI Chinese Traditional BIG5
|
|
|
37 |
1200 => 'UTF-16LE', // UTF-16 (BIFF8)
|
|
|
38 |
1250 => 'CP1250', // ANSI Latin II (Central European)
|
|
|
39 |
1251 => 'CP1251', // ANSI Cyrillic
|
|
|
40 |
1252 => 'CP1252', // ANSI Latin I (BIFF4-BIFF7)
|
|
|
41 |
1253 => 'CP1253', // ANSI Greek
|
|
|
42 |
1254 => 'CP1254', // ANSI Turkish
|
|
|
43 |
1255 => 'CP1255', // ANSI Hebrew
|
|
|
44 |
1256 => 'CP1256', // ANSI Arabic
|
|
|
45 |
1257 => 'CP1257', // ANSI Baltic
|
|
|
46 |
1258 => 'CP1258', // ANSI Vietnamese
|
|
|
47 |
1361 => 'CP1361', // ANSI Korean (Johab)
|
|
|
48 |
10000 => 'MAC', // Apple Roman
|
|
|
49 |
10001 => 'CP932', // Macintosh Japanese
|
|
|
50 |
10002 => 'CP950', // Macintosh Chinese Traditional
|
|
|
51 |
10003 => 'CP1361', // Macintosh Korean
|
|
|
52 |
10004 => 'MACARABIC', // Apple Arabic
|
|
|
53 |
10005 => 'MACHEBREW', // Apple Hebrew
|
|
|
54 |
10006 => 'MACGREEK', // Macintosh Greek
|
|
|
55 |
10007 => 'MACCYRILLIC', // Macintosh Cyrillic
|
|
|
56 |
10008 => 'CP936', // Macintosh - Simplified Chinese (GB 2312)
|
|
|
57 |
10010 => 'MACROMANIA', // Macintosh Romania
|
|
|
58 |
10017 => 'MACUKRAINE', // Macintosh Ukraine
|
|
|
59 |
10021 => 'MACTHAI', // Macintosh Thai
|
|
|
60 |
10029 => ['MACCENTRALEUROPE', 'MAC-CENTRALEUROPE'], // Macintosh Central Europe
|
|
|
61 |
10079 => 'MACICELAND', // Macintosh Icelandic
|
|
|
62 |
10081 => 'MACTURKISH', // Macintosh Turkish
|
|
|
63 |
10082 => 'MACCROATIAN', // Macintosh Croatian
|
|
|
64 |
21010 => 'UTF-16LE', // UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
|
|
|
65 |
32768 => 'MAC', // Apple Roman
|
|
|
66 |
//32769 => 'unsupported', // ANSI Latin I (BIFF2-BIFF3)
|
|
|
67 |
65000 => 'UTF-7', // Unicode (UTF-7)
|
|
|
68 |
65001 => 'UTF-8', // Unicode (UTF-8)
|
|
|
69 |
99999 => ['unsupported'], // Unicode (UTF-8)
|
|
|
70 |
];
|
|
|
71 |
|
|
|
72 |
public static function validate(string $codePage): bool
|
|
|
73 |
{
|
|
|
74 |
return in_array($codePage, self::$pageArray, true);
|
|
|
75 |
}
|
|
|
76 |
|
|
|
77 |
/**
|
|
|
78 |
* Convert Microsoft Code Page Identifier to Code Page Name which iconv
|
|
|
79 |
* and mbstring understands.
|
|
|
80 |
*
|
|
|
81 |
* @param int $codePage Microsoft Code Page Indentifier
|
|
|
82 |
*
|
|
|
83 |
* @return string Code Page Name
|
|
|
84 |
*/
|
|
|
85 |
public static function numberToName(int $codePage): string
|
|
|
86 |
{
|
|
|
87 |
if (array_key_exists($codePage, self::$pageArray)) {
|
|
|
88 |
$value = self::$pageArray[$codePage];
|
|
|
89 |
if (is_array($value)) {
|
|
|
90 |
foreach ($value as $encoding) {
|
|
|
91 |
if (@iconv('UTF-8', $encoding, ' ') !== false) {
|
|
|
92 |
self::$pageArray[$codePage] = $encoding;
|
|
|
93 |
|
|
|
94 |
return $encoding;
|
|
|
95 |
}
|
|
|
96 |
}
|
|
|
97 |
|
|
|
98 |
throw new PhpSpreadsheetException("Code page $codePage not implemented on this system.");
|
|
|
99 |
} else {
|
|
|
100 |
return $value;
|
|
|
101 |
}
|
|
|
102 |
}
|
|
|
103 |
if ($codePage == 720 || $codePage == 32769) {
|
|
|
104 |
throw new PhpSpreadsheetException("Code page $codePage not supported."); // OEM Arabic
|
|
|
105 |
}
|
|
|
106 |
|
|
|
107 |
throw new PhpSpreadsheetException('Unknown codepage: ' . $codePage);
|
|
|
108 |
}
|
|
|
109 |
|
|
|
110 |
public static function getEncodings(): array
|
|
|
111 |
{
|
|
|
112 |
return self::$pageArray;
|
|
|
113 |
}
|
|
|
114 |
}
|