Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1441 ariadna 1
<?php
2
 
3
namespace PhpOffice\PhpSpreadsheet\Reader\Csv;
4
 
5
class Delimiter
6
{
7
    protected const POTENTIAL_DELIMETERS = [',', ';', "\t", '|', ':', ' ', '~'];
8
 
9
    /** @var resource */
10
    protected $fileHandle;
11
 
12
    protected string $escapeCharacter;
13
 
14
    protected string $enclosure;
15
 
16
    protected array $counts = [];
17
 
18
    protected int $numberLines = 0;
19
 
20
    protected ?string $delimiter = null;
21
 
22
    /**
23
     * @param resource $fileHandle
24
     */
25
    public function __construct($fileHandle, string $escapeCharacter, string $enclosure)
26
    {
27
        $this->fileHandle = $fileHandle;
28
        $this->escapeCharacter = $escapeCharacter;
29
        $this->enclosure = $enclosure;
30
 
31
        $this->countPotentialDelimiters();
32
    }
33
 
34
    public function getDefaultDelimiter(): string
35
    {
36
        return self::POTENTIAL_DELIMETERS[0];
37
    }
38
 
39
    public function linesCounted(): int
40
    {
41
        return $this->numberLines;
42
    }
43
 
44
    protected function countPotentialDelimiters(): void
45
    {
46
        $this->counts = array_fill_keys(self::POTENTIAL_DELIMETERS, []);
47
        $delimiterKeys = array_flip(self::POTENTIAL_DELIMETERS);
48
 
49
        // Count how many times each of the potential delimiters appears in each line
50
        $this->numberLines = 0;
51
        while (($line = $this->getNextLine()) !== false && (++$this->numberLines < 1000)) {
52
            $this->countDelimiterValues($line, $delimiterKeys);
53
        }
54
    }
55
 
56
    protected function countDelimiterValues(string $line, array $delimiterKeys): void
57
    {
58
        $splitString = mb_str_split($line, 1, 'UTF-8');
59
        $distribution = array_count_values($splitString);
60
        $countLine = array_intersect_key($distribution, $delimiterKeys);
61
 
62
        foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
63
            $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0;
64
        }
65
    }
66
 
67
    public function infer(): ?string
68
    {
69
        // Calculate the mean square deviations for each delimiter
70
        //     (ignoring delimiters that haven't been found consistently)
71
        $meanSquareDeviations = [];
72
        $middleIdx = floor(($this->numberLines - 1) / 2);
73
 
74
        foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
75
            $series = $this->counts[$delimiter];
76
            sort($series);
77
 
78
            $median = ($this->numberLines % 2)
79
                ? $series[$middleIdx]
80
                : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
81
 
82
            if ($median === 0) {
83
                continue;
84
            }
85
 
86
            $meanSquareDeviations[$delimiter] = array_reduce(
87
                $series,
88
                fn ($sum, $value): int|float => $sum + ($value - $median) ** 2
89
            ) / count($series);
90
        }
91
 
92
        // ... and pick the delimiter with the smallest mean square deviation
93
        //         (in case of ties, the order in potentialDelimiters is respected)
94
        $min = INF;
95
        foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
96
            if (!isset($meanSquareDeviations[$delimiter])) {
97
                continue;
98
            }
99
 
100
            if ($meanSquareDeviations[$delimiter] < $min) {
101
                $min = $meanSquareDeviations[$delimiter];
102
                $this->delimiter = $delimiter;
103
            }
104
        }
105
 
106
        return $this->delimiter;
107
    }
108
 
109
    /**
110
     * Get the next full line from the file.
111
     *
112
     * @return false|string
113
     */
114
    public function getNextLine()
115
    {
116
        $line = '';
117
        $enclosure = ($this->escapeCharacter === '' ? ''
118
                : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')'))
119
            . preg_quote($this->enclosure, '/');
120
 
121
        do {
122
            // Get the next line in the file
123
            $newLine = fgets($this->fileHandle);
124
 
125
            // Return false if there is no next line
126
            if ($newLine === false) {
127
                return false;
128
            }
129
 
130
            // Add the new line to the line passed in
131
            $line = $line . $newLine;
132
 
133
            // Drop everything that is enclosed to avoid counting false positives in enclosures
134
            $line = (string) preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
135
 
136
            // See if we have any enclosures left in the line
137
            // if we still have an enclosure then we need to read the next line as well
138
        } while (preg_match('/(' . $enclosure . ')/', $line) > 0);
139
 
140
        return ($line !== '') ? $line : false;
141
    }
142
}