1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
declare(strict_types=1);
|
|
|
4 |
|
|
|
5 |
namespace OpenSpout\Reader\Wrapper;
|
|
|
6 |
|
|
|
7 |
use OpenSpout\Reader\Exception\XMLProcessingException;
|
|
|
8 |
use ZipArchive;
|
|
|
9 |
|
|
|
10 |
/**
|
|
|
11 |
* @internal
|
|
|
12 |
*/
|
|
|
13 |
final class XMLReader extends \XMLReader
|
|
|
14 |
{
|
|
|
15 |
use XMLInternalErrorsHelper;
|
|
|
16 |
|
|
|
17 |
public const ZIP_WRAPPER = 'zip://';
|
|
|
18 |
|
|
|
19 |
/**
|
|
|
20 |
* Opens the XML Reader to read a file located inside a ZIP file.
|
|
|
21 |
*
|
|
|
22 |
* @param string $zipFilePath Path to the ZIP file
|
|
|
23 |
* @param string $fileInsideZipPath Relative or absolute path of the file inside the zip
|
|
|
24 |
*
|
|
|
25 |
* @return bool TRUE on success or FALSE on failure
|
|
|
26 |
*/
|
|
|
27 |
public function openFileInZip(string $zipFilePath, string $fileInsideZipPath): bool
|
|
|
28 |
{
|
|
|
29 |
$wasOpenSuccessful = false;
|
|
|
30 |
$realPathURI = $this->getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath);
|
|
|
31 |
|
|
|
32 |
// We need to check first that the file we are trying to read really exist because:
|
|
|
33 |
// - PHP emits a warning when trying to open a file that does not exist.
|
|
|
34 |
if ($this->fileExistsWithinZip($realPathURI)) {
|
|
|
35 |
$wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
|
|
|
36 |
}
|
|
|
37 |
|
|
|
38 |
return $wasOpenSuccessful;
|
|
|
39 |
}
|
|
|
40 |
|
|
|
41 |
/**
|
|
|
42 |
* Returns the real path for the given path components.
|
|
|
43 |
* This is useful to avoid issues on some Windows setup.
|
|
|
44 |
*
|
|
|
45 |
* @param string $zipFilePath Path to the ZIP file
|
|
|
46 |
* @param string $fileInsideZipPath Relative or absolute path of the file inside the zip
|
|
|
47 |
*
|
|
|
48 |
* @return string The real path URI
|
|
|
49 |
*/
|
|
|
50 |
public function getRealPathURIForFileInZip(string $zipFilePath, string $fileInsideZipPath): string
|
|
|
51 |
{
|
|
|
52 |
// The file path should not start with a '/', otherwise it won't be found
|
|
|
53 |
$fileInsideZipPathWithoutLeadingSlash = ltrim($fileInsideZipPath, '/');
|
|
|
54 |
|
|
|
55 |
return self::ZIP_WRAPPER.realpath($zipFilePath).'#'.$fileInsideZipPathWithoutLeadingSlash;
|
|
|
56 |
}
|
|
|
57 |
|
|
|
58 |
/**
|
|
|
59 |
* Move to next node in document.
|
|
|
60 |
*
|
|
|
61 |
* @see \XMLReader::read
|
|
|
62 |
*
|
|
|
63 |
* @throws XMLProcessingException If an error/warning occurred
|
|
|
64 |
*/
|
|
|
65 |
public function read(): bool
|
|
|
66 |
{
|
|
|
67 |
$this->useXMLInternalErrors();
|
|
|
68 |
|
|
|
69 |
$wasReadSuccessful = parent::read();
|
|
|
70 |
|
|
|
71 |
$this->resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured();
|
|
|
72 |
|
|
|
73 |
return $wasReadSuccessful;
|
|
|
74 |
}
|
|
|
75 |
|
|
|
76 |
/**
|
|
|
77 |
* Read until the element with the given name is found, or the end of the file.
|
|
|
78 |
*
|
|
|
79 |
* @param string $nodeName Name of the node to find
|
|
|
80 |
*
|
|
|
81 |
* @return bool TRUE on success or FALSE on failure
|
|
|
82 |
*
|
|
|
83 |
* @throws XMLProcessingException If an error/warning occurred
|
|
|
84 |
*/
|
|
|
85 |
public function readUntilNodeFound(string $nodeName): bool
|
|
|
86 |
{
|
|
|
87 |
do {
|
|
|
88 |
$wasReadSuccessful = $this->read();
|
|
|
89 |
$isNotPositionedOnStartingNode = !$this->isPositionedOnStartingNode($nodeName);
|
|
|
90 |
} while ($wasReadSuccessful && $isNotPositionedOnStartingNode);
|
|
|
91 |
|
|
|
92 |
return $wasReadSuccessful;
|
|
|
93 |
}
|
|
|
94 |
|
|
|
95 |
/**
|
|
|
96 |
* Move cursor to next node skipping all subtrees.
|
|
|
97 |
*
|
|
|
98 |
* @see \XMLReader::next
|
|
|
99 |
*
|
|
|
100 |
* @param null|string $localName The name of the next node to move to
|
|
|
101 |
*
|
|
|
102 |
* @throws XMLProcessingException If an error/warning occurred
|
|
|
103 |
*/
|
|
|
104 |
public function next($localName = null): bool
|
|
|
105 |
{
|
|
|
106 |
$this->useXMLInternalErrors();
|
|
|
107 |
|
|
|
108 |
$wasNextSuccessful = parent::next($localName);
|
|
|
109 |
|
|
|
110 |
$this->resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured();
|
|
|
111 |
|
|
|
112 |
return $wasNextSuccessful;
|
|
|
113 |
}
|
|
|
114 |
|
|
|
115 |
/**
|
|
|
116 |
* @return bool Whether the XML Reader is currently positioned on the starting node with given name
|
|
|
117 |
*/
|
|
|
118 |
public function isPositionedOnStartingNode(string $nodeName): bool
|
|
|
119 |
{
|
|
|
120 |
return $this->isPositionedOnNode($nodeName, self::ELEMENT);
|
|
|
121 |
}
|
|
|
122 |
|
|
|
123 |
/**
|
|
|
124 |
* @return bool Whether the XML Reader is currently positioned on the ending node with given name
|
|
|
125 |
*/
|
|
|
126 |
public function isPositionedOnEndingNode(string $nodeName): bool
|
|
|
127 |
{
|
|
|
128 |
return $this->isPositionedOnNode($nodeName, self::END_ELEMENT);
|
|
|
129 |
}
|
|
|
130 |
|
|
|
131 |
/**
|
|
|
132 |
* @return string The name of the current node, un-prefixed
|
|
|
133 |
*/
|
|
|
134 |
public function getCurrentNodeName(): string
|
|
|
135 |
{
|
|
|
136 |
return $this->localName;
|
|
|
137 |
}
|
|
|
138 |
|
|
|
139 |
/**
|
|
|
140 |
* Returns whether the file at the given location exists.
|
|
|
141 |
*
|
|
|
142 |
* @param string $zipStreamURI URI of a zip stream, e.g. "zip://file.zip#path/inside.xml"
|
|
|
143 |
*
|
|
|
144 |
* @return bool TRUE if the file exists, FALSE otherwise
|
|
|
145 |
*/
|
|
|
146 |
private function fileExistsWithinZip(string $zipStreamURI): bool
|
|
|
147 |
{
|
|
|
148 |
$doesFileExists = false;
|
|
|
149 |
|
|
|
150 |
$pattern = '/zip:\/\/([^#]+)#(.*)/';
|
|
|
151 |
if (1 === preg_match($pattern, $zipStreamURI, $matches)) {
|
|
|
152 |
$zipFilePath = $matches[1];
|
|
|
153 |
$innerFilePath = $matches[2];
|
|
|
154 |
|
|
|
155 |
$zip = new ZipArchive();
|
|
|
156 |
if (true === $zip->open($zipFilePath)) {
|
|
|
157 |
$doesFileExists = (false !== $zip->locateName($innerFilePath));
|
|
|
158 |
$zip->close();
|
|
|
159 |
}
|
|
|
160 |
}
|
|
|
161 |
|
|
|
162 |
return $doesFileExists;
|
|
|
163 |
}
|
|
|
164 |
|
|
|
165 |
/**
|
|
|
166 |
* @return bool Whether the XML Reader is currently positioned on the node with given name and type
|
|
|
167 |
*/
|
|
|
168 |
private function isPositionedOnNode(string $nodeName, int $nodeType): bool
|
|
|
169 |
{
|
|
|
170 |
/**
|
|
|
171 |
* In some cases, the node has a prefix (for instance, "<sheet>" can also be "<x:sheet>").
|
|
|
172 |
* So if the given node name does not have a prefix, we need to look at the unprefixed name ("localName").
|
|
|
173 |
*
|
|
|
174 |
* @see https://github.com/box/spout/issues/233
|
|
|
175 |
*/
|
|
|
176 |
$hasPrefix = str_contains($nodeName, ':');
|
|
|
177 |
$currentNodeName = ($hasPrefix) ? $this->name : $this->localName;
|
|
|
178 |
|
|
|
179 |
return $this->nodeType === $nodeType && $currentNodeName === $nodeName;
|
|
|
180 |
}
|
|
|
181 |
}
|