Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
namespace PhpOffice\PhpSpreadsheet\Reader\Csv;
4
 
5
class Delimiter
6
{
7
    protected const POTENTIAL_DELIMETERS = [',', ';', "\t", '|', ':', ' ', '~'];
8
 
9
    /** @var resource */
10
    protected $fileHandle;
11
 
12
    /** @var string */
13
    protected $escapeCharacter;
14
 
15
    /** @var string */
16
    protected $enclosure;
17
 
18
    /** @var array */
19
    protected $counts = [];
20
 
21
    /** @var int */
22
    protected $numberLines = 0;
23
 
24
    /** @var ?string */
25
    protected $delimiter;
26
 
27
    /**
28
     * @param resource $fileHandle
29
     */
30
    public function __construct($fileHandle, string $escapeCharacter, string $enclosure)
31
    {
32
        $this->fileHandle = $fileHandle;
33
        $this->escapeCharacter = $escapeCharacter;
34
        $this->enclosure = $enclosure;
35
 
36
        $this->countPotentialDelimiters();
37
    }
38
 
39
    public function getDefaultDelimiter(): string
40
    {
41
        return self::POTENTIAL_DELIMETERS[0];
42
    }
43
 
44
    public function linesCounted(): int
45
    {
46
        return $this->numberLines;
47
    }
48
 
49
    protected function countPotentialDelimiters(): void
50
    {
51
        $this->counts = array_fill_keys(self::POTENTIAL_DELIMETERS, []);
52
        $delimiterKeys = array_flip(self::POTENTIAL_DELIMETERS);
53
 
54
        // Count how many times each of the potential delimiters appears in each line
55
        $this->numberLines = 0;
56
        while (($line = $this->getNextLine()) !== false && (++$this->numberLines < 1000)) {
57
            $this->countDelimiterValues($line, $delimiterKeys);
58
        }
59
    }
60
 
61
    protected function countDelimiterValues(string $line, array $delimiterKeys): void
62
    {
63
        $splitString = str_split($line, 1);
64
        if (is_array($splitString)) {
65
            $distribution = array_count_values($splitString);
66
            $countLine = array_intersect_key($distribution, $delimiterKeys);
67
 
68
            foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
69
                $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0;
70
            }
71
        }
72
    }
73
 
74
    public function infer(): ?string
75
    {
76
        // Calculate the mean square deviations for each delimiter
77
        //     (ignoring delimiters that haven't been found consistently)
78
        $meanSquareDeviations = [];
79
        $middleIdx = floor(($this->numberLines - 1) / 2);
80
 
81
        foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
82
            $series = $this->counts[$delimiter];
83
            sort($series);
84
 
85
            $median = ($this->numberLines % 2)
86
                ? $series[$middleIdx]
87
                : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
88
 
89
            if ($median === 0) {
90
                continue;
91
            }
92
 
93
            $meanSquareDeviations[$delimiter] = array_reduce(
94
                $series,
95
                function ($sum, $value) use ($median) {
96
                    return $sum + ($value - $median) ** 2;
97
                }
98
            ) / count($series);
99
        }
100
 
101
        // ... and pick the delimiter with the smallest mean square deviation
102
        //         (in case of ties, the order in potentialDelimiters is respected)
103
        $min = INF;
104
        foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
105
            if (!isset($meanSquareDeviations[$delimiter])) {
106
                continue;
107
            }
108
 
109
            if ($meanSquareDeviations[$delimiter] < $min) {
110
                $min = $meanSquareDeviations[$delimiter];
111
                $this->delimiter = $delimiter;
112
            }
113
        }
114
 
115
        return $this->delimiter;
116
    }
117
 
118
    /**
119
     * Get the next full line from the file.
120
     *
121
     * @return false|string
122
     */
123
    public function getNextLine()
124
    {
125
        $line = '';
126
        $enclosure = ($this->escapeCharacter === '' ? ''
127
                : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')'))
128
            . preg_quote($this->enclosure, '/');
129
 
130
        do {
131
            // Get the next line in the file
132
            $newLine = fgets($this->fileHandle);
133
 
134
            // Return false if there is no next line
135
            if ($newLine === false) {
136
                return false;
137
            }
138
 
139
            // Add the new line to the line passed in
140
            $line = $line . $newLine;
141
 
142
            // Drop everything that is enclosed to avoid counting false positives in enclosures
143
            $line = (string) preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
144
 
145
            // See if we have any enclosures left in the line
146
            // if we still have an enclosure then we need to read the next line as well
147
        } while (preg_match('/(' . $enclosure . ')/', $line) > 0);
148
 
149
        return ($line !== '') ? $line : false;
150
    }
151
}