Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1441 ariadna 1
<?php
2
 
3
namespace PhpOffice\PhpSpreadsheet\Reader\Security;
4
 
5
use PhpOffice\PhpSpreadsheet\Reader;
6
 
7
class XmlScanner
8
{
9
    private const ENCODING_PATTERN = '/encoding\s*=\s*(["\'])(.+?)\1/s';
10
    private const ENCODING_UTF7 = '/encoding\s*=\s*(["\'])UTF-7\1/si';
11
 
12
    private string $pattern;
13
 
14
    /** @var ?callable */
15
    private $callback;
16
 
17
    public function __construct(string $pattern = '<!DOCTYPE')
18
    {
19
        $this->pattern = $pattern;
20
    }
21
 
22
    public static function getInstance(Reader\IReader $reader): self
23
    {
24
        $pattern = ($reader instanceof Reader\Html) ? '<!ENTITY' : '<!DOCTYPE';
25
 
26
        return new self($pattern);
27
    }
28
 
29
    public function setAdditionalCallback(callable $callback): void
30
    {
31
        $this->callback = $callback;
32
    }
33
 
34
    private static function forceString(mixed $arg): string
35
    {
36
        return is_string($arg) ? $arg : '';
37
    }
38
 
39
    private function toUtf8(string $xml): string
40
    {
41
        $charset = $this->findCharSet($xml);
42
        $foundUtf7 = $charset === 'UTF-7';
43
        if ($charset !== 'UTF-8') {
44
            $testStart = '/^.{0,4}\s*<?xml/s';
45
            $startWithXml1 = preg_match($testStart, $xml);
46
            $xml = self::forceString(mb_convert_encoding($xml, 'UTF-8', $charset));
47
            if ($startWithXml1 === 1 && preg_match($testStart, $xml) !== 1) {
48
                throw new Reader\Exception('Double encoding not permitted');
49
            }
50
            $foundUtf7 = $foundUtf7 || (preg_match(self::ENCODING_UTF7, $xml) === 1);
51
            $xml = preg_replace(self::ENCODING_PATTERN, '', $xml) ?? $xml;
52
        } else {
53
            $foundUtf7 = $foundUtf7 || (preg_match(self::ENCODING_UTF7, $xml) === 1);
54
        }
55
        if ($foundUtf7) {
56
            throw new Reader\Exception('UTF-7 encoding not permitted');
57
        }
58
        if (substr($xml, 0, Reader\Csv::UTF8_BOM_LEN) === Reader\Csv::UTF8_BOM) {
59
            $xml = substr($xml, Reader\Csv::UTF8_BOM_LEN);
60
        }
61
 
62
        return $xml;
63
    }
64
 
65
    private function findCharSet(string $xml): string
66
    {
67
        if (substr($xml, 0, 4) === "\x4c\x6f\xa7\x94") {
68
            throw new Reader\Exception('EBCDIC encoding not permitted');
69
        }
70
        $encoding = Reader\Csv::guessEncodingBom('', $xml);
71
        if ($encoding !== '') {
72
            return $encoding;
73
        }
74
        $xml = str_replace("\0", '', $xml);
75
        if (preg_match(self::ENCODING_PATTERN, $xml, $matches)) {
76
            return strtoupper($matches[2]);
77
        }
78
 
79
        return 'UTF-8';
80
    }
81
 
82
    /**
83
     * Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
84
     *
85
     * @param false|string $xml
86
     */
87
    public function scan($xml): string
88
    {
89
        // Don't rely purely on libxml_disable_entity_loader()
90
        $pattern = '/\0*' . implode('\0*', mb_str_split($this->pattern, 1, 'UTF-8')) . '\0*/';
91
 
92
        $xml = "$xml";
93
        if (preg_match($pattern, $xml)) {
94
            throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
95
        }
96
 
97
        $xml = $this->toUtf8($xml);
98
        if (preg_match($pattern, $xml)) {
99
            throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
100
        }
101
 
102
        if ($this->callback !== null) {
103
            $xml = call_user_func($this->callback, $xml);
104
        }
105
 
106
        return $xml;
107
    }
108
 
109
    /**
110
     * Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
111
     */
112
    public function scanFile(string $filestream): string
113
    {
114
        return $this->scan(file_get_contents($filestream));
115
    }
116
}