1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
namespace PhpXmlRpc\Helper;
|
|
|
4 |
|
|
|
5 |
use PhpXmlRpc\PhpXmlRpc;
|
|
|
6 |
use PhpXmlRpc\Traits\DeprecationLogger;
|
|
|
7 |
use PhpXmlRpc\Value;
|
|
|
8 |
|
|
|
9 |
/**
|
|
|
10 |
* Deals with parsing the XML.
|
|
|
11 |
* @see http://xmlrpc.com/spec.md
|
|
|
12 |
*
|
|
|
13 |
* @todo implement an interface to allow for alternative implementations
|
|
|
14 |
* - make access to $_xh protected, return more high-level data structures
|
|
|
15 |
* - move the private parts of $_xh to the internal-use parsing-options config
|
|
|
16 |
* - add parseRequest, parseResponse, parseValue methods
|
|
|
17 |
* @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding
|
|
|
18 |
* while parsing, which is faster than doing it later by going over the rebuilt data structure
|
|
|
19 |
* @todo rename? This is an xml-rpc parser, not a generic xml parser...
|
|
|
20 |
*
|
|
|
21 |
* @property array $xmlrpc_valid_parents deprecated - public access left in purely for BC
|
|
|
22 |
* @property int $accept deprecated - (protected) access left in purely for BC
|
|
|
23 |
*/
|
|
|
24 |
class XMLParser
|
|
|
25 |
{
|
|
|
26 |
use DeprecationLogger;
|
|
|
27 |
|
|
|
28 |
const RETURN_XMLRPCVALS = 'xmlrpcvals';
|
|
|
29 |
const RETURN_EPIVALS = 'epivals';
|
|
|
30 |
const RETURN_PHP = 'phpvals';
|
|
|
31 |
|
|
|
32 |
const ACCEPT_REQUEST = 1;
|
|
|
33 |
const ACCEPT_RESPONSE = 2;
|
|
|
34 |
const ACCEPT_VALUE = 4;
|
|
|
35 |
const ACCEPT_FAULT = 8;
|
|
|
36 |
|
|
|
37 |
/**
|
|
|
38 |
* @var int
|
|
|
39 |
* The max length beyond which data will get truncated in error messages
|
|
|
40 |
*/
|
|
|
41 |
protected $maxLogValueLength = 100;
|
|
|
42 |
|
|
|
43 |
/**
|
|
|
44 |
* @var array
|
|
|
45 |
* Used to store state during parsing and to pass parsing results to callers.
|
|
|
46 |
* Quick explanation of components:
|
|
|
47 |
* private:
|
|
|
48 |
* ac - used to accumulate values
|
|
|
49 |
* stack - array with genealogy of xml elements names, used to validate nesting of xml-rpc elements
|
|
|
50 |
* valuestack - array used for parsing arrays and structs
|
|
|
51 |
* lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings
|
|
|
52 |
* (values: 0=not looking, 1=looking, 3=found)
|
|
|
53 |
* public:
|
|
|
54 |
* isf - used to indicate an xml-rpc response fault (1), invalid xml-rpc fault (2), xml parsing fault (3)
|
|
|
55 |
* isf_reason - used for storing xml-rpc response fault string
|
|
|
56 |
* value - used to store the value in responses
|
|
|
57 |
* method - used to store method name in requests
|
|
|
58 |
* params - used to store parameters in requests
|
|
|
59 |
* pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
|
|
|
60 |
* rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode)
|
|
|
61 |
*/
|
|
|
62 |
protected $_xh = array(
|
|
|
63 |
'ac' => '',
|
|
|
64 |
'stack' => array(),
|
|
|
65 |
'valuestack' => array(),
|
|
|
66 |
'lv' => 0,
|
|
|
67 |
'isf' => 0,
|
|
|
68 |
'isf_reason' => '',
|
|
|
69 |
'value' => null,
|
|
|
70 |
'method' => false,
|
|
|
71 |
'params' => array(),
|
|
|
72 |
'pt' => array(),
|
|
|
73 |
'rt' => '',
|
|
|
74 |
);
|
|
|
75 |
|
|
|
76 |
/**
|
|
|
77 |
* @var array[]
|
|
|
78 |
*/
|
|
|
79 |
protected $xmlrpc_valid_parents = array(
|
|
|
80 |
'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
|
|
|
81 |
'BOOLEAN' => array('VALUE'),
|
|
|
82 |
'I4' => array('VALUE'),
|
|
|
83 |
'I8' => array('VALUE'),
|
|
|
84 |
'EX:I8' => array('VALUE'),
|
|
|
85 |
'INT' => array('VALUE'),
|
|
|
86 |
'STRING' => array('VALUE'),
|
|
|
87 |
'DOUBLE' => array('VALUE'),
|
|
|
88 |
'DATETIME.ISO8601' => array('VALUE'),
|
|
|
89 |
'BASE64' => array('VALUE'),
|
|
|
90 |
'MEMBER' => array('STRUCT'),
|
|
|
91 |
'NAME' => array('MEMBER'),
|
|
|
92 |
'DATA' => array('ARRAY'),
|
|
|
93 |
'ARRAY' => array('VALUE'),
|
|
|
94 |
'STRUCT' => array('VALUE'),
|
|
|
95 |
'PARAM' => array('PARAMS'),
|
|
|
96 |
'METHODNAME' => array('METHODCALL'),
|
|
|
97 |
'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
|
|
|
98 |
'FAULT' => array('METHODRESPONSE'),
|
|
|
99 |
'NIL' => array('VALUE'), // only used when extension activated
|
|
|
100 |
'EX:NIL' => array('VALUE'), // only used when extension activated
|
|
|
101 |
);
|
|
|
102 |
|
|
|
103 |
/** @var array $parsing_options */
|
|
|
104 |
protected $parsing_options = array();
|
|
|
105 |
|
|
|
106 |
/** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */
|
|
|
107 |
//protected $accept = 3;
|
|
|
108 |
|
|
|
109 |
/** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */
|
|
|
110 |
protected $maxChunkLength = 4194304;
|
|
|
111 |
/** @var array
|
|
|
112 |
* Used keys: accept, target_charset, methodname_callback, plus the ones set here.
|
|
|
113 |
* We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not
|
|
|
114 |
* the element handler methods
|
|
|
115 |
*/
|
|
|
116 |
protected $current_parsing_options = array(
|
|
|
117 |
'xmlrpc_null_extension' => false,
|
|
|
118 |
'xmlrpc_return_datetimes' => false,
|
|
|
119 |
'xmlrpc_reject_invalid_values' => false
|
|
|
120 |
);
|
|
|
121 |
|
|
|
122 |
/**
|
|
|
123 |
* @param array $options integer keys: options passed to the inner xml parser
|
|
|
124 |
* string keys:
|
|
|
125 |
* - target_charset (string)
|
|
|
126 |
* - methodname_callback (callable)
|
|
|
127 |
* - xmlrpc_null_extension (bool)
|
|
|
128 |
* - xmlrpc_return_datetimes (bool)
|
|
|
129 |
* - xmlrpc_reject_invalid_values (bool)
|
|
|
130 |
*/
|
|
|
131 |
public function __construct(array $options = array())
|
|
|
132 |
{
|
|
|
133 |
$this->parsing_options = $options;
|
|
|
134 |
}
|
|
|
135 |
|
|
|
136 |
/**
|
|
|
137 |
* Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh'].
|
|
|
138 |
* Logs to the error log any issues which do not cause the parsing to fail.
|
|
|
139 |
*
|
|
|
140 |
* @param string $data
|
|
|
141 |
* @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS
|
|
|
142 |
* @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE
|
|
|
143 |
* @param array $options integer-key options are passed to the xml parser, string-key options are used independently.
|
|
|
144 |
* These options are added to options received in the constructor.
|
|
|
145 |
* Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values
|
|
|
146 |
* are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used
|
|
|
147 |
* @return array see the definition of $this->_xh for the meaning of the results
|
|
|
148 |
* @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
|
|
|
149 |
*
|
|
|
150 |
* @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options
|
|
|
151 |
* @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so
|
|
|
152 |
* that parsing will be completely independent of global state. Note that it might incur a small perf hit...
|
|
|
153 |
*/
|
|
|
154 |
public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array())
|
|
|
155 |
{
|
|
|
156 |
$this->_xh = array(
|
|
|
157 |
'ac' => '',
|
|
|
158 |
'stack' => array(),
|
|
|
159 |
'valuestack' => array(),
|
|
|
160 |
'lv' => 0,
|
|
|
161 |
'isf' => 0,
|
|
|
162 |
'isf_reason' => '',
|
|
|
163 |
'value' => null,
|
|
|
164 |
'method' => false, // so we can check later if we got a methodname or not
|
|
|
165 |
'params' => array(),
|
|
|
166 |
'pt' => array(),
|
|
|
167 |
'rt' => '',
|
|
|
168 |
);
|
|
|
169 |
|
|
|
170 |
$len = strlen($data);
|
|
|
171 |
|
|
|
172 |
// we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop below
|
|
|
173 |
if ($len == 0) {
|
|
|
174 |
$this->_xh['isf'] = 3;
|
|
|
175 |
$this->_xh['isf_reason'] = 'XML error 5: empty document';
|
|
|
176 |
return $this->_xh;
|
|
|
177 |
}
|
|
|
178 |
|
|
|
179 |
$this->current_parsing_options = array('accept' => $accept);
|
|
|
180 |
|
|
|
181 |
$mergedOptions = $this->parsing_options;
|
|
|
182 |
foreach ($options as $key => $val) {
|
|
|
183 |
$mergedOptions[$key] = $val;
|
|
|
184 |
}
|
|
|
185 |
|
|
|
186 |
foreach ($mergedOptions as $key => $val) {
|
|
|
187 |
// q: can php be built without ctype? should we use a regexp?
|
|
|
188 |
if (is_string($key) && !ctype_digit($key)) {
|
|
|
189 |
/// @todo on invalid options, throw/error-out instead of logging an error message?
|
|
|
190 |
switch($key) {
|
|
|
191 |
case 'target_charset':
|
|
|
192 |
if (function_exists('mb_convert_encoding')) {
|
|
|
193 |
$this->current_parsing_options['target_charset'] = $val;
|
|
|
194 |
} else {
|
|
|
195 |
$this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring");
|
|
|
196 |
}
|
|
|
197 |
break;
|
|
|
198 |
|
|
|
199 |
case 'methodname_callback':
|
|
|
200 |
if (is_callable($val)) {
|
|
|
201 |
$this->current_parsing_options['methodname_callback'] = $val;
|
|
|
202 |
} else {
|
|
|
203 |
$this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable");
|
|
|
204 |
}
|
|
|
205 |
break;
|
|
|
206 |
|
|
|
207 |
case 'xmlrpc_null_extension':
|
|
|
208 |
case 'xmlrpc_return_datetimes':
|
|
|
209 |
case 'xmlrpc_reject_invalid_values':
|
|
|
210 |
$this->current_parsing_options[$key] = $val;
|
|
|
211 |
break;
|
|
|
212 |
|
|
|
213 |
default:
|
|
|
214 |
$this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": unsupported option: $key");
|
|
|
215 |
}
|
|
|
216 |
unset($mergedOptions[$key]);
|
|
|
217 |
}
|
|
|
218 |
}
|
|
|
219 |
|
|
|
220 |
if (!isset($this->current_parsing_options['xmlrpc_null_extension'])) {
|
|
|
221 |
$this->current_parsing_options['xmlrpc_null_extension'] = PhpXmlRpc::$xmlrpc_null_extension;
|
|
|
222 |
}
|
|
|
223 |
if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) {
|
|
|
224 |
$this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes;
|
|
|
225 |
}
|
|
|
226 |
if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) {
|
|
|
227 |
$this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values;
|
|
|
228 |
}
|
|
|
229 |
|
|
|
230 |
// NB: we use '' instead of null to force charset detection from the xml declaration
|
|
|
231 |
$parser = xml_parser_create('');
|
|
|
232 |
|
|
|
233 |
foreach ($mergedOptions as $key => $val) {
|
|
|
234 |
xml_parser_set_option($parser, $key, $val);
|
|
|
235 |
}
|
|
|
236 |
|
|
|
237 |
// always set this, in case someone tries to disable it via options...
|
|
|
238 |
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1);
|
|
|
239 |
|
|
|
240 |
xml_set_object($parser, $this);
|
|
|
241 |
|
|
|
242 |
switch ($returnType) {
|
|
|
243 |
case self::RETURN_PHP:
|
|
|
244 |
xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast');
|
|
|
245 |
break;
|
|
|
246 |
case self::RETURN_EPIVALS:
|
|
|
247 |
xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi');
|
|
|
248 |
break;
|
|
|
249 |
/// @todo log an error / throw / error-out on unsupported return type
|
|
|
250 |
case XMLParser::RETURN_XMLRPCVALS:
|
|
|
251 |
default:
|
|
|
252 |
xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee');
|
|
|
253 |
}
|
|
|
254 |
|
|
|
255 |
xml_set_character_data_handler($parser, 'xmlrpc_cd');
|
|
|
256 |
xml_set_default_handler($parser, 'xmlrpc_dh');
|
|
|
257 |
|
|
|
258 |
try {
|
|
|
259 |
// @see ticket #70 - we have to parse big xml docs in chunks to avoid errors
|
|
|
260 |
for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) {
|
|
|
261 |
$chunk = substr($data, $offset, $this->maxChunkLength);
|
|
|
262 |
// error handling: xml not well formed
|
|
|
263 |
if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) {
|
|
|
264 |
$errCode = xml_get_error_code($parser);
|
|
|
265 |
$errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode),
|
|
|
266 |
xml_get_current_line_number($parser), xml_get_current_column_number($parser));
|
|
|
267 |
|
|
|
268 |
$this->_xh['isf'] = 3;
|
|
|
269 |
$this->_xh['isf_reason'] = $errStr;
|
|
|
270 |
}
|
|
|
271 |
// no need to parse further if we already have a fatal error
|
|
|
272 |
if ($this->_xh['isf'] >= 2) {
|
|
|
273 |
break;
|
|
|
274 |
}
|
|
|
275 |
}
|
|
|
276 |
/// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times
|
|
|
277 |
} catch (\Exception $e) {
|
|
|
278 |
xml_parser_free($parser);
|
|
|
279 |
$this->current_parsing_options = array();
|
|
|
280 |
/// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
|
|
|
281 |
throw $e;
|
|
|
282 |
} catch (\Error $e) {
|
|
|
283 |
xml_parser_free($parser);
|
|
|
284 |
$this->current_parsing_options = array();
|
|
|
285 |
//$this->accept = $prevAccept;
|
|
|
286 |
/// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
|
|
|
287 |
throw $e;
|
|
|
288 |
}
|
|
|
289 |
|
|
|
290 |
xml_parser_free($parser);
|
|
|
291 |
$this->current_parsing_options = array();
|
|
|
292 |
|
|
|
293 |
return $this->_xh;
|
|
|
294 |
}
|
|
|
295 |
|
|
|
296 |
/**
|
|
|
297 |
* xml parser handler function for opening element tags.
|
|
|
298 |
* @internal
|
|
|
299 |
*
|
|
|
300 |
* @param resource $parser
|
|
|
301 |
* @param string $name
|
|
|
302 |
* @param $attrs
|
|
|
303 |
* @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead
|
|
|
304 |
* @return void
|
|
|
305 |
*
|
|
|
306 |
* @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
|
|
|
307 |
* and remove the checking for $this->_xh['isf'] >= 2 everywhere
|
|
|
308 |
*/
|
|
|
309 |
public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
|
|
|
310 |
{
|
|
|
311 |
// if invalid xml-rpc already detected, skip all processing
|
|
|
312 |
if ($this->_xh['isf'] >= 2) {
|
|
|
313 |
return;
|
|
|
314 |
}
|
|
|
315 |
|
|
|
316 |
// check for correct element nesting
|
|
|
317 |
if (count($this->_xh['stack']) == 0) {
|
|
|
318 |
// top level element can only be of 2 types
|
|
|
319 |
/// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
|
|
|
320 |
/// there is only a single top level element in xml anyway
|
|
|
321 |
|
|
|
322 |
// BC
|
|
|
323 |
if ($acceptSingleVals === false) {
|
|
|
324 |
$accept = $this->current_parsing_options['accept'];
|
|
|
325 |
} else {
|
|
|
326 |
$this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__ . ' is deprecated');
|
|
|
327 |
$accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE;
|
|
|
328 |
}
|
|
|
329 |
if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) ||
|
|
|
330 |
($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) ||
|
|
|
331 |
($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) ||
|
|
|
332 |
($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) {
|
|
|
333 |
$this->_xh['rt'] = strtolower($name);
|
|
|
334 |
} else {
|
|
|
335 |
$this->_xh['isf'] = 2;
|
|
|
336 |
$this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name;
|
|
|
337 |
|
|
|
338 |
return;
|
|
|
339 |
}
|
|
|
340 |
} else {
|
|
|
341 |
// not top level element: see if parent is OK
|
|
|
342 |
$parent = end($this->_xh['stack']);
|
|
|
343 |
if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) {
|
|
|
344 |
$this->_xh['isf'] = 2;
|
|
|
345 |
$this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
|
|
|
346 |
|
|
|
347 |
return;
|
|
|
348 |
}
|
|
|
349 |
}
|
|
|
350 |
|
|
|
351 |
switch ($name) {
|
|
|
352 |
// optimize for speed switch cases: most common cases first
|
|
|
353 |
case 'VALUE':
|
|
|
354 |
/// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
|
|
|
355 |
$this->_xh['vt'] = 'value'; // indicator: no value found yet
|
|
|
356 |
$this->_xh['ac'] = '';
|
|
|
357 |
$this->_xh['lv'] = 1;
|
|
|
358 |
$this->_xh['php_class'] = null;
|
|
|
359 |
break;
|
|
|
360 |
|
|
|
361 |
case 'I8':
|
|
|
362 |
case 'EX:I8':
|
|
|
363 |
if (PHP_INT_SIZE === 4) {
|
|
|
364 |
// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
|
|
|
365 |
$this->_xh['isf'] = 2;
|
|
|
366 |
$this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
|
|
|
367 |
|
|
|
368 |
return;
|
|
|
369 |
}
|
|
|
370 |
// fall through voluntarily
|
|
|
371 |
|
|
|
372 |
case 'I4':
|
|
|
373 |
case 'INT':
|
|
|
374 |
case 'STRING':
|
|
|
375 |
case 'BOOLEAN':
|
|
|
376 |
case 'DOUBLE':
|
|
|
377 |
case 'DATETIME.ISO8601':
|
|
|
378 |
case 'BASE64':
|
|
|
379 |
if ($this->_xh['vt'] != 'value') {
|
|
|
380 |
// two data elements inside a value: an error occurred!
|
|
|
381 |
$this->_xh['isf'] = 2;
|
|
|
382 |
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
|
|
|
383 |
|
|
|
384 |
return;
|
|
|
385 |
}
|
|
|
386 |
$this->_xh['ac'] = ''; // reset the accumulator
|
|
|
387 |
break;
|
|
|
388 |
|
|
|
389 |
case 'STRUCT':
|
|
|
390 |
case 'ARRAY':
|
|
|
391 |
if ($this->_xh['vt'] != 'value') {
|
|
|
392 |
// two data elements inside a value: an error occurred!
|
|
|
393 |
$this->_xh['isf'] = 2;
|
|
|
394 |
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
|
|
|
395 |
|
|
|
396 |
return;
|
|
|
397 |
}
|
|
|
398 |
// create an empty array to hold child values, and push it onto appropriate stack
|
|
|
399 |
$curVal = array(
|
|
|
400 |
'values' => array(),
|
|
|
401 |
'type' => $name,
|
|
|
402 |
);
|
|
|
403 |
// check for out-of-band information to rebuild php objs and, in case it is found, save it
|
|
|
404 |
if (@isset($attrs['PHP_CLASS'])) {
|
|
|
405 |
$curVal['php_class'] = $attrs['PHP_CLASS'];
|
|
|
406 |
}
|
|
|
407 |
$this->_xh['valuestack'][] = $curVal;
|
|
|
408 |
$this->_xh['vt'] = 'data'; // be prepared for a data element next
|
|
|
409 |
break;
|
|
|
410 |
|
|
|
411 |
case 'DATA':
|
|
|
412 |
if ($this->_xh['vt'] != 'data') {
|
|
|
413 |
// two data elements inside a value: an error occurred!
|
|
|
414 |
$this->_xh['isf'] = 2;
|
|
|
415 |
$this->_xh['isf_reason'] = "found two data elements inside an array element";
|
|
|
416 |
|
|
|
417 |
return;
|
|
|
418 |
}
|
|
|
419 |
|
|
|
420 |
case 'METHODCALL':
|
|
|
421 |
case 'METHODRESPONSE':
|
|
|
422 |
case 'PARAMS':
|
|
|
423 |
// valid elements that add little to processing
|
|
|
424 |
break;
|
|
|
425 |
|
|
|
426 |
case 'METHODNAME':
|
|
|
427 |
case 'NAME':
|
|
|
428 |
/// @todo we could check for 2 NAME elements inside a MEMBER element
|
|
|
429 |
$this->_xh['ac'] = '';
|
|
|
430 |
break;
|
|
|
431 |
|
|
|
432 |
case 'FAULT':
|
|
|
433 |
$this->_xh['isf'] = 1;
|
|
|
434 |
break;
|
|
|
435 |
|
|
|
436 |
case 'MEMBER':
|
|
|
437 |
// set member name to null, in case we do not find in the xml later on
|
|
|
438 |
$this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = null;
|
|
|
439 |
//$this->_xh['ac']='';
|
|
|
440 |
// Drop trough intentionally
|
|
|
441 |
|
|
|
442 |
case 'PARAM':
|
|
|
443 |
// clear value type, so we can check later if no value has been passed for this param/member
|
|
|
444 |
$this->_xh['vt'] = null;
|
|
|
445 |
break;
|
|
|
446 |
|
|
|
447 |
case 'NIL':
|
|
|
448 |
case 'EX:NIL':
|
|
|
449 |
if ($this->current_parsing_options['xmlrpc_null_extension']) {
|
|
|
450 |
if ($this->_xh['vt'] != 'value') {
|
|
|
451 |
// two data elements inside a value: an error occurred!
|
|
|
452 |
$this->_xh['isf'] = 2;
|
|
|
453 |
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
|
|
|
454 |
|
|
|
455 |
return;
|
|
|
456 |
}
|
|
|
457 |
// reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs
|
|
|
458 |
$this->_xh['ac'] = '';
|
|
|
459 |
|
|
|
460 |
} else {
|
|
|
461 |
$this->_xh['isf'] = 2;
|
|
|
462 |
$this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension';
|
|
|
463 |
|
|
|
464 |
return;
|
|
|
465 |
}
|
|
|
466 |
break;
|
|
|
467 |
|
|
|
468 |
default:
|
|
|
469 |
// INVALID ELEMENT: RAISE ISF so that it is later recognized
|
|
|
470 |
/// @todo feature creep = allow a callback instead
|
|
|
471 |
$this->_xh['isf'] = 2;
|
|
|
472 |
$this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
|
|
|
473 |
|
|
|
474 |
return;
|
|
|
475 |
}
|
|
|
476 |
|
|
|
477 |
// Save current element name to stack, to validate nesting
|
|
|
478 |
$this->_xh['stack'][] = $name;
|
|
|
479 |
|
|
|
480 |
/// @todo optimization creep: move this inside the big switch() above
|
|
|
481 |
if ($name != 'VALUE') {
|
|
|
482 |
$this->_xh['lv'] = 0;
|
|
|
483 |
}
|
|
|
484 |
}
|
|
|
485 |
|
|
|
486 |
/**
|
|
|
487 |
* xml parser handler function for close element tags.
|
|
|
488 |
* @internal
|
|
|
489 |
*
|
|
|
490 |
* @param resource $parser
|
|
|
491 |
* @param string $name
|
|
|
492 |
* @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility
|
|
|
493 |
* @return void
|
|
|
494 |
* @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
|
|
|
495 |
*
|
|
|
496 |
* @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
|
|
|
497 |
* and remove the checking for $this->_xh['isf'] >= 2 everywhere
|
|
|
498 |
*/
|
|
|
499 |
public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1)
|
|
|
500 |
{
|
|
|
501 |
if ($this->_xh['isf'] >= 2) {
|
|
|
502 |
return;
|
|
|
503 |
}
|
|
|
504 |
|
|
|
505 |
// push this element name from stack
|
|
|
506 |
// NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem.
|
|
|
507 |
// we also checked for proper nesting at start of elements...
|
|
|
508 |
$currElem = array_pop($this->_xh['stack']);
|
|
|
509 |
|
|
|
510 |
switch ($name) {
|
|
|
511 |
case 'VALUE':
|
|
|
512 |
// If no scalar was inside <VALUE></VALUE>, it was a string value
|
|
|
513 |
if ($this->_xh['vt'] == 'value') {
|
|
|
514 |
$this->_xh['value'] = $this->_xh['ac'];
|
|
|
515 |
$this->_xh['vt'] = Value::$xmlrpcString;
|
|
|
516 |
}
|
|
|
517 |
|
|
|
518 |
// in case there is charset conversion required, do it here, to catch both cases of string values
|
|
|
519 |
if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) {
|
|
|
520 |
$this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8');
|
|
|
521 |
}
|
|
|
522 |
|
|
|
523 |
if ($rebuildXmlrpcvals > 0) {
|
|
|
524 |
// build the xml-rpc val out of the data received, and substitute it
|
|
|
525 |
$temp = new Value($this->_xh['value'], $this->_xh['vt']);
|
|
|
526 |
// in case we got info about underlying php class, save it in the object we're rebuilding
|
|
|
527 |
if (isset($this->_xh['php_class'])) {
|
|
|
528 |
$temp->_php_class = $this->_xh['php_class'];
|
|
|
529 |
}
|
|
|
530 |
$this->_xh['value'] = $temp;
|
|
|
531 |
} elseif ($rebuildXmlrpcvals < 0) {
|
|
|
532 |
if ($this->_xh['vt'] == Value::$xmlrpcDateTime) {
|
|
|
533 |
$this->_xh['value'] = (object)array(
|
|
|
534 |
'xmlrpc_type' => 'datetime',
|
|
|
535 |
'scalar' => $this->_xh['value'],
|
|
|
536 |
'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value'])
|
|
|
537 |
);
|
|
|
538 |
} elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) {
|
|
|
539 |
$this->_xh['value'] = (object)array(
|
|
|
540 |
'xmlrpc_type' => 'base64',
|
|
|
541 |
'scalar' => $this->_xh['value']
|
|
|
542 |
);
|
|
|
543 |
}
|
|
|
544 |
} else {
|
|
|
545 |
/// @todo this should handle php-serialized objects, since std deserializing is done
|
|
|
546 |
/// by php_xmlrpc_decode, which we will not be calling...
|
|
|
547 |
//if (isset($this->_xh['php_class'])) {
|
|
|
548 |
//}
|
|
|
549 |
}
|
|
|
550 |
|
|
|
551 |
// check if we are inside an array or struct:
|
|
|
552 |
// if value just built is inside an array, let's move it into array on the stack
|
|
|
553 |
$vscount = count($this->_xh['valuestack']);
|
|
|
554 |
if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
|
|
|
555 |
$this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
|
|
|
556 |
}
|
|
|
557 |
break;
|
|
|
558 |
|
|
|
559 |
case 'STRING':
|
|
|
560 |
$this->_xh['vt'] = Value::$xmlrpcString;
|
|
|
561 |
$this->_xh['lv'] = 3; // indicate we've found a value
|
|
|
562 |
$this->_xh['value'] = $this->_xh['ac'];
|
|
|
563 |
break;
|
|
|
564 |
|
|
|
565 |
case 'BOOLEAN':
|
|
|
566 |
$this->_xh['vt'] = Value::$xmlrpcBoolean;
|
|
|
567 |
$this->_xh['lv'] = 3; // indicate we've found a value
|
|
|
568 |
// We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted,
|
|
|
569 |
// even though the spec never mentions them (see e.g. Blogger api docs)
|
|
|
570 |
// NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here
|
|
|
571 |
// Note the non-strict type check: it will allow ' 1 '
|
|
|
572 |
/// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime.
|
|
|
573 |
/// Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and
|
|
|
574 |
/// to changes in the way php compares strings (since 8.0, leading and trailing newlines are
|
|
|
575 |
/// accepted when deciding if a string numeric...)
|
|
|
576 |
if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') === 0) {
|
|
|
577 |
$this->_xh['value'] = true;
|
|
|
578 |
} else {
|
|
|
579 |
// log if receiving something strange, even though we set the value to false anyway
|
|
|
580 |
/// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL
|
|
|
581 |
if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') !== 0) {
|
|
|
582 |
if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' .
|
|
|
583 |
$this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
|
|
|
584 |
return;
|
|
|
585 |
}
|
|
|
586 |
}
|
|
|
587 |
$this->_xh['value'] = false;
|
|
|
588 |
}
|
|
|
589 |
break;
|
|
|
590 |
|
|
|
591 |
case 'EX:I8':
|
|
|
592 |
$name = 'i8';
|
|
|
593 |
// fall through voluntarily
|
|
|
594 |
case 'I4':
|
|
|
595 |
case 'I8':
|
|
|
596 |
case 'INT':
|
|
|
597 |
// NB: we build the Value object with the original xml element name found, except for ex:i8. The
|
|
|
598 |
// `Value::scalarTyp()` function will do some normalization of the data
|
|
|
599 |
$this->_xh['vt'] = strtolower($name);
|
|
|
600 |
$this->_xh['lv'] = 3; // indicate we've found a value
|
|
|
601 |
if (!preg_match(PhpXmlRpc::$xmlrpc_int_format, $this->_xh['ac'])) {
|
|
|
602 |
if (!$this->handleParsingError('non numeric data received in INT value: ' .
|
|
|
603 |
$this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
|
|
|
604 |
return;
|
|
|
605 |
}
|
|
|
606 |
/// @todo: find a better way of reporting an error value than this! Use NaN?
|
|
|
607 |
$this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
|
|
|
608 |
} else {
|
|
|
609 |
// it's ok, add it on
|
|
|
610 |
$this->_xh['value'] = (int)$this->_xh['ac'];
|
|
|
611 |
}
|
|
|
612 |
break;
|
|
|
613 |
|
|
|
614 |
case 'DOUBLE':
|
|
|
615 |
$this->_xh['vt'] = Value::$xmlrpcDouble;
|
|
|
616 |
$this->_xh['lv'] = 3; // indicate we've found a value
|
|
|
617 |
if (!preg_match(PhpXmlRpc::$xmlrpc_double_format, $this->_xh['ac'])) {
|
|
|
618 |
if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' .
|
|
|
619 |
$this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
|
|
|
620 |
return;
|
|
|
621 |
}
|
|
|
622 |
|
|
|
623 |
$this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
|
|
|
624 |
} else {
|
|
|
625 |
// it's ok, add it on
|
|
|
626 |
$this->_xh['value'] = (double)$this->_xh['ac'];
|
|
|
627 |
}
|
|
|
628 |
break;
|
|
|
629 |
|
|
|
630 |
case 'DATETIME.ISO8601':
|
|
|
631 |
$this->_xh['vt'] = Value::$xmlrpcDateTime;
|
|
|
632 |
$this->_xh['lv'] = 3; // indicate we've found a value
|
|
|
633 |
if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) {
|
|
|
634 |
if (!$this->handleParsingError('invalid data received in DATETIME value: ' .
|
|
|
635 |
$this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
|
|
|
636 |
return;
|
|
|
637 |
}
|
|
|
638 |
}
|
|
|
639 |
if ($this->current_parsing_options['xmlrpc_return_datetimes']) {
|
|
|
640 |
try {
|
|
|
641 |
$this->_xh['value'] = new \DateTime($this->_xh['ac']);
|
|
|
642 |
|
|
|
643 |
// the default regex used to validate the date string a few lines above should make this case impossible,
|
|
|
644 |
// but one never knows...
|
|
|
645 |
} catch(\Exception $e) {
|
|
|
646 |
// what to do? We can not guarantee that a valid date can be created. We return null...
|
|
|
647 |
if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' .
|
|
|
648 |
$e->getMessage(), __METHOD__)) {
|
|
|
649 |
return;
|
|
|
650 |
}
|
|
|
651 |
}
|
|
|
652 |
} else {
|
|
|
653 |
$this->_xh['value'] = $this->_xh['ac'];
|
|
|
654 |
}
|
|
|
655 |
break;
|
|
|
656 |
|
|
|
657 |
case 'BASE64':
|
|
|
658 |
$this->_xh['vt'] = Value::$xmlrpcBase64;
|
|
|
659 |
$this->_xh['lv'] = 3; // indicate we've found a value
|
|
|
660 |
if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) {
|
|
|
661 |
$v = base64_decode($this->_xh['ac'], true);
|
|
|
662 |
if ($v === false) {
|
|
|
663 |
$this->_xh['isf'] = 2;
|
|
|
664 |
$this->_xh['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh['ac']);
|
|
|
665 |
return;
|
|
|
666 |
}
|
|
|
667 |
} else {
|
|
|
668 |
$v = base64_decode($this->_xh['ac']);
|
|
|
669 |
if ($v === '' && $this->_xh['ac'] !== '') {
|
|
|
670 |
// only the empty string should decode to the empty string
|
|
|
671 |
$this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value: ' .
|
|
|
672 |
$this->truncateValueForLog($this->_xh['ac']));
|
|
|
673 |
}
|
|
|
674 |
}
|
|
|
675 |
$this->_xh['value'] = $v;
|
|
|
676 |
break;
|
|
|
677 |
|
|
|
678 |
case 'NAME':
|
|
|
679 |
$this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac'];
|
|
|
680 |
break;
|
|
|
681 |
|
|
|
682 |
case 'MEMBER':
|
|
|
683 |
// add to array in the stack the last element built, unless no VALUE or no NAME were found
|
|
|
684 |
if ($this->_xh['vt']) {
|
|
|
685 |
$vscount = count($this->_xh['valuestack']);
|
|
|
686 |
if ($this->_xh['valuestack'][$vscount - 1]['name'] === null) {
|
|
|
687 |
if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__)) {
|
|
|
688 |
return;
|
|
|
689 |
}
|
|
|
690 |
$this->_xh['valuestack'][$vscount - 1]['name'] = '';
|
|
|
691 |
}
|
|
|
692 |
$this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value'];
|
|
|
693 |
} else {
|
|
|
694 |
if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__)) {
|
|
|
695 |
return;
|
|
|
696 |
}
|
|
|
697 |
}
|
|
|
698 |
break;
|
|
|
699 |
|
|
|
700 |
case 'DATA':
|
|
|
701 |
$this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty
|
|
|
702 |
break;
|
|
|
703 |
|
|
|
704 |
case 'STRUCT':
|
|
|
705 |
case 'ARRAY':
|
|
|
706 |
// fetch out of stack array of values, and promote it to current value
|
|
|
707 |
$currVal = array_pop($this->_xh['valuestack']);
|
|
|
708 |
$this->_xh['value'] = $currVal['values'];
|
|
|
709 |
$this->_xh['vt'] = strtolower($name);
|
|
|
710 |
if (isset($currVal['php_class'])) {
|
|
|
711 |
$this->_xh['php_class'] = $currVal['php_class'];
|
|
|
712 |
}
|
|
|
713 |
break;
|
|
|
714 |
|
|
|
715 |
case 'PARAM':
|
|
|
716 |
// add to array of params the current value, unless no VALUE was found
|
|
|
717 |
/// @todo should we also check if there were two VALUE inside the PARAM?
|
|
|
718 |
if ($this->_xh['vt']) {
|
|
|
719 |
$this->_xh['params'][] = $this->_xh['value'];
|
|
|
720 |
$this->_xh['pt'][] = $this->_xh['vt'];
|
|
|
721 |
} else {
|
|
|
722 |
if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__)) {
|
|
|
723 |
return;
|
|
|
724 |
}
|
|
|
725 |
}
|
|
|
726 |
break;
|
|
|
727 |
|
|
|
728 |
case 'METHODNAME':
|
|
|
729 |
if (!preg_match(PhpXmlRpc::$xmlrpc_methodname_format, $this->_xh['ac'])) {
|
|
|
730 |
if (!$this->handleParsingError('invalid data received in METHODNAME: '.
|
|
|
731 |
$this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
|
|
|
732 |
return;
|
|
|
733 |
}
|
|
|
734 |
}
|
|
|
735 |
$methodName = trim($this->_xh['ac']);
|
|
|
736 |
$this->_xh['method'] = $methodName;
|
|
|
737 |
// we allow the callback to f.e. give us back a mangled method name by manipulating $this
|
|
|
738 |
if (isset($this->current_parsing_options['methodname_callback'])) {
|
|
|
739 |
call_user_func($this->current_parsing_options['methodname_callback'], $methodName, $this, $parser);
|
|
|
740 |
}
|
|
|
741 |
break;
|
|
|
742 |
|
|
|
743 |
case 'NIL':
|
|
|
744 |
case 'EX:NIL':
|
|
|
745 |
// NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant
|
|
|
746 |
//if ($this->current_parsing_options['xmlrpc_null_extension']) {
|
|
|
747 |
$this->_xh['vt'] = 'null';
|
|
|
748 |
$this->_xh['value'] = null;
|
|
|
749 |
$this->_xh['lv'] = 3;
|
|
|
750 |
//}
|
|
|
751 |
break;
|
|
|
752 |
|
|
|
753 |
/// @todo add extra checking:
|
|
|
754 |
/// - METHODRESPONSE should contain either a PARAMS with a single PARAM, or a FAULT
|
|
|
755 |
/// - FAULT should contain a single struct with the 2 expected members (check their name and type)
|
|
|
756 |
/// - METHODCALL should contain a methodname
|
|
|
757 |
case 'PARAMS':
|
|
|
758 |
case 'FAULT':
|
|
|
759 |
case 'METHODCALL':
|
|
|
760 |
case 'METHODRESPONSE':
|
|
|
761 |
break;
|
|
|
762 |
|
|
|
763 |
default:
|
|
|
764 |
// End of INVALID ELEMENT
|
|
|
765 |
// Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se,
|
|
|
766 |
// $this->_xh['isf'] is set to 2...
|
|
|
767 |
break;
|
|
|
768 |
}
|
|
|
769 |
}
|
|
|
770 |
|
|
|
771 |
/**
|
|
|
772 |
* Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values.
|
|
|
773 |
* @internal
|
|
|
774 |
*
|
|
|
775 |
* @param resource $parser
|
|
|
776 |
* @param string $name
|
|
|
777 |
* @return void
|
|
|
778 |
*/
|
|
|
779 |
public function xmlrpc_ee_fast($parser, $name)
|
|
|
780 |
{
|
|
|
781 |
$this->xmlrpc_ee($parser, $name, 0);
|
|
|
782 |
}
|
|
|
783 |
|
|
|
784 |
/**
|
|
|
785 |
* Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime).
|
|
|
786 |
* @internal
|
|
|
787 |
*
|
|
|
788 |
* @param resource $parser
|
|
|
789 |
* @param string $name
|
|
|
790 |
* @return void
|
|
|
791 |
*/
|
|
|
792 |
public function xmlrpc_ee_epi($parser, $name)
|
|
|
793 |
{
|
|
|
794 |
$this->xmlrpc_ee($parser, $name, -1);
|
|
|
795 |
}
|
|
|
796 |
|
|
|
797 |
/**
|
|
|
798 |
* xml parser handler function for character data.
|
|
|
799 |
* @internal
|
|
|
800 |
*
|
|
|
801 |
* @param resource $parser
|
|
|
802 |
* @param string $data
|
|
|
803 |
* @return void
|
|
|
804 |
*/
|
|
|
805 |
public function xmlrpc_cd($parser, $data)
|
|
|
806 |
{
|
|
|
807 |
// skip processing if xml fault already detected
|
|
|
808 |
if ($this->_xh['isf'] >= 2) {
|
|
|
809 |
return;
|
|
|
810 |
}
|
|
|
811 |
|
|
|
812 |
// "lookforvalue == 3" means that we've found an entire value and should discard any further character data
|
|
|
813 |
if ($this->_xh['lv'] != 3) {
|
|
|
814 |
$this->_xh['ac'] .= $data;
|
|
|
815 |
}
|
|
|
816 |
}
|
|
|
817 |
|
|
|
818 |
/**
|
|
|
819 |
* xml parser handler function for 'other stuff', i.e. not char data or element start/end tag.
|
|
|
820 |
* In fact, it only gets called on unknown entities...
|
|
|
821 |
* @internal
|
|
|
822 |
*
|
|
|
823 |
* @param $parser
|
|
|
824 |
* @param string data
|
|
|
825 |
* @return void
|
|
|
826 |
*/
|
|
|
827 |
public function xmlrpc_dh($parser, $data)
|
|
|
828 |
{
|
|
|
829 |
// skip processing if xml fault already detected
|
|
|
830 |
if ($this->_xh['isf'] >= 2) {
|
|
|
831 |
return;
|
|
|
832 |
}
|
|
|
833 |
|
|
|
834 |
if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') {
|
|
|
835 |
$this->_xh['ac'] .= $data;
|
|
|
836 |
}
|
|
|
837 |
}
|
|
|
838 |
|
|
|
839 |
/**
|
|
|
840 |
* xml charset encoding guessing helper function.
|
|
|
841 |
* Tries to determine the charset encoding of an XML chunk received over HTTP.
|
|
|
842 |
* NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,
|
|
|
843 |
* we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers,
|
|
|
844 |
* which will be most probably using UTF-8 anyway...
|
|
|
845 |
* In order of importance checks:
|
|
|
846 |
* 1. http headers
|
|
|
847 |
* 2. BOM
|
|
|
848 |
* 3. XML declaration
|
|
|
849 |
* 4. guesses using mb_detect_encoding()
|
|
|
850 |
*
|
|
|
851 |
* @param string $httpHeader the http Content-type header
|
|
|
852 |
* @param string $xmlChunk xml content buffer
|
|
|
853 |
* @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled).
|
|
|
854 |
* This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings
|
|
|
855 |
* @return string the encoding determined. Null if it can't be determined and mbstring is enabled,
|
|
|
856 |
* PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled
|
|
|
857 |
*
|
|
|
858 |
* @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!
|
|
|
859 |
* @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make
|
|
|
860 |
* the method independent of global state
|
|
|
861 |
*/
|
|
|
862 |
public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null)
|
|
|
863 |
{
|
|
|
864 |
// discussion: see http://www.yale.edu/pclt/encoding/
|
|
|
865 |
// 1 - test if encoding is specified in HTTP HEADERS
|
|
|
866 |
|
|
|
867 |
// Details:
|
|
|
868 |
// LWS: (\13\10)?( |\t)+
|
|
|
869 |
// token: (any char but excluded stuff)+
|
|
|
870 |
// quoted string: " (any char but double quotes and control chars)* "
|
|
|
871 |
// header: Content-type = ...; charset=value(; ...)*
|
|
|
872 |
// where value is of type token, no LWS allowed between 'charset' and value
|
|
|
873 |
// Note: we do not check for invalid chars in VALUE:
|
|
|
874 |
// this had better be done using pure ereg as below
|
|
|
875 |
// Note 2: we might be removing whitespace/tabs that ought to be left in if
|
|
|
876 |
// the received charset is a quoted string. But nobody uses such charset names...
|
|
|
877 |
|
|
|
878 |
/// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it?
|
|
|
879 |
$matches = array();
|
|
|
880 |
if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) {
|
|
|
881 |
return strtoupper(trim($matches[1], " \t\""));
|
|
|
882 |
}
|
|
|
883 |
|
|
|
884 |
// 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern
|
|
|
885 |
// (source: http://www.w3.org/TR/2000/REC-xml-20001006)
|
|
|
886 |
// NOTE: actually, according to the spec, even if we find the BOM and determine
|
|
|
887 |
// an encoding, we should check if there is an encoding specified
|
|
|
888 |
// in the xml declaration, and verify if they match.
|
|
|
889 |
/// @todo implement check as described above?
|
|
|
890 |
/// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)
|
|
|
891 |
if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
|
|
|
892 |
return 'UCS-4';
|
|
|
893 |
} elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
|
|
|
894 |
return 'UTF-16';
|
|
|
895 |
} elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
|
|
|
896 |
return 'UTF-8';
|
|
|
897 |
}
|
|
|
898 |
|
|
|
899 |
// 3 - test if encoding is specified in the xml declaration
|
|
|
900 |
/// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that
|
|
|
901 |
/// case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6.
|
|
|
902 |
/// For lower versions, we could attempt usage of mb_ereg...
|
|
|
903 |
// Details:
|
|
|
904 |
// SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
|
|
|
905 |
// EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
|
|
|
906 |
if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
|
|
|
907 |
'\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
|
|
|
908 |
$xmlChunk, $matches)) {
|
|
|
909 |
return strtoupper(substr($matches[2], 1, -1));
|
|
|
910 |
}
|
|
|
911 |
|
|
|
912 |
// 4 - if mbstring is available, let it do the guesswork
|
|
|
913 |
if (function_exists('mb_detect_encoding')) {
|
|
|
914 |
if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) {
|
|
|
915 |
$encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings;
|
|
|
916 |
}
|
|
|
917 |
if ($encodingPrefs) {
|
|
|
918 |
$enc = mb_detect_encoding($xmlChunk, $encodingPrefs);
|
|
|
919 |
} else {
|
|
|
920 |
$enc = mb_detect_encoding($xmlChunk);
|
|
|
921 |
}
|
|
|
922 |
// NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII...
|
|
|
923 |
// IANA also likes better US-ASCII, so go with it
|
|
|
924 |
if ($enc == 'ASCII') {
|
|
|
925 |
$enc = 'US-' . $enc;
|
|
|
926 |
}
|
|
|
927 |
|
|
|
928 |
return $enc;
|
|
|
929 |
} else {
|
|
|
930 |
// no encoding specified: as per HTTP1.1 assume it is iso-8859-1?
|
|
|
931 |
// Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types
|
|
|
932 |
// this should be the standard. And we should be getting text/xml as request and response.
|
|
|
933 |
// BUT we have to be backward compatible with the lib, which always used UTF-8 as default...
|
|
|
934 |
return PhpXmlRpc::$xmlrpc_defencoding;
|
|
|
935 |
}
|
|
|
936 |
}
|
|
|
937 |
|
|
|
938 |
/**
|
|
|
939 |
* Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration).
|
|
|
940 |
*
|
|
|
941 |
* @param string $xmlChunk
|
|
|
942 |
* @return bool
|
|
|
943 |
*
|
|
|
944 |
* @todo rename to hasEncodingDeclaration
|
|
|
945 |
*/
|
|
|
946 |
public static function hasEncoding($xmlChunk)
|
|
|
947 |
{
|
|
|
948 |
// scan the first bytes of the data for a UTF-16 (or other) BOM pattern
|
|
|
949 |
// (source: http://www.w3.org/TR/2000/REC-xml-20001006)
|
|
|
950 |
if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
|
|
|
951 |
return true;
|
|
|
952 |
} elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
|
|
|
953 |
return true;
|
|
|
954 |
} elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
|
|
|
955 |
return true;
|
|
|
956 |
}
|
|
|
957 |
|
|
|
958 |
// test if encoding is specified in the xml declaration
|
|
|
959 |
// Details:
|
|
|
960 |
// SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
|
|
|
961 |
// EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
|
|
|
962 |
if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
|
|
|
963 |
'\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
|
|
|
964 |
$xmlChunk)) {
|
|
|
965 |
return true;
|
|
|
966 |
}
|
|
|
967 |
|
|
|
968 |
return false;
|
|
|
969 |
}
|
|
|
970 |
|
|
|
971 |
/**
|
|
|
972 |
* @param string $message
|
|
|
973 |
* @param string $method method/file/line info
|
|
|
974 |
* @return bool false if the caller has to stop parsing
|
|
|
975 |
*/
|
|
|
976 |
protected function handleParsingError($message, $method = '')
|
|
|
977 |
{
|
|
|
978 |
if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) {
|
|
|
979 |
$this->_xh['isf'] = 2;
|
|
|
980 |
$this->_xh['isf_reason'] = ucfirst($message);
|
|
|
981 |
return false;
|
|
|
982 |
} else {
|
|
|
983 |
$this->getLogger()->error('XML-RPC: ' . ($method != '' ? $method . ': ' : '') . $message);
|
|
|
984 |
return true;
|
|
|
985 |
}
|
|
|
986 |
}
|
|
|
987 |
|
|
|
988 |
/**
|
|
|
989 |
* Truncates unsafe data
|
|
|
990 |
* @param string $data
|
|
|
991 |
* @return string
|
|
|
992 |
*/
|
|
|
993 |
protected function truncateValueForLog($data)
|
|
|
994 |
{
|
|
|
995 |
if (strlen($data) > $this->maxLogValueLength) {
|
|
|
996 |
return substr($data, 0, $this->maxLogValueLength - 3) . '...';
|
|
|
997 |
}
|
|
|
998 |
|
|
|
999 |
return $data;
|
|
|
1000 |
}
|
|
|
1001 |
|
|
|
1002 |
// *** BC layer ***
|
|
|
1003 |
|
|
|
1004 |
/**
|
|
|
1005 |
* xml parser handler function for opening element tags.
|
|
|
1006 |
* Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses.
|
|
|
1007 |
* @deprecated
|
|
|
1008 |
*
|
|
|
1009 |
* @param resource $parser
|
|
|
1010 |
* @param $name
|
|
|
1011 |
* @param $attrs
|
|
|
1012 |
* @return void
|
|
|
1013 |
*/
|
|
|
1014 |
public function xmlrpc_se_any($parser, $name, $attrs)
|
|
|
1015 |
{
|
|
|
1016 |
// this will be spamming the log if this method is in use...
|
|
|
1017 |
$this->logDeprecation('Method ' . __METHOD__ . ' is deprecated');
|
|
|
1018 |
|
|
|
1019 |
$this->xmlrpc_se($parser, $name, $attrs, true);
|
|
|
1020 |
}
|
|
|
1021 |
|
|
|
1022 |
public function &__get($name)
|
|
|
1023 |
{
|
|
|
1024 |
switch ($name) {
|
|
|
1025 |
case '_xh':
|
|
|
1026 |
case 'xmlrpc_valid_parents':
|
|
|
1027 |
$this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated');
|
|
|
1028 |
return $this->$name;
|
|
|
1029 |
default:
|
|
|
1030 |
/// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
|
|
|
1031 |
$trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
|
|
|
1032 |
trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
|
|
|
1033 |
$result = null;
|
|
|
1034 |
return $result;
|
|
|
1035 |
}
|
|
|
1036 |
}
|
|
|
1037 |
|
|
|
1038 |
public function __set($name, $value)
|
|
|
1039 |
{
|
|
|
1040 |
switch ($name) {
|
|
|
1041 |
// this should only ever be called by subclasses which overtook `parse()`
|
|
|
1042 |
case 'accept':
|
|
|
1043 |
$this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
|
|
|
1044 |
$this->current_parsing_options['accept'] = $value;
|
|
|
1045 |
break;
|
|
|
1046 |
case '_xh':
|
|
|
1047 |
case 'xmlrpc_valid_parents':
|
|
|
1048 |
$this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
|
|
|
1049 |
$this->$name = $value;
|
|
|
1050 |
break;
|
|
|
1051 |
default:
|
|
|
1052 |
/// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
|
|
|
1053 |
$trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
|
|
|
1054 |
trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
|
|
|
1055 |
}
|
|
|
1056 |
}
|
|
|
1057 |
|
|
|
1058 |
public function __isset($name)
|
|
|
1059 |
{
|
|
|
1060 |
switch ($name) {
|
|
|
1061 |
case 'accept':
|
|
|
1062 |
$this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
|
|
|
1063 |
return isset($this->current_parsing_options['accept']);
|
|
|
1064 |
case '_xh':
|
|
|
1065 |
case 'xmlrpc_valid_parents':
|
|
|
1066 |
$this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
|
|
|
1067 |
return isset($this->$name);
|
|
|
1068 |
default:
|
|
|
1069 |
return false;
|
|
|
1070 |
}
|
|
|
1071 |
}
|
|
|
1072 |
|
|
|
1073 |
public function __unset($name)
|
|
|
1074 |
{
|
|
|
1075 |
switch ($name) {
|
|
|
1076 |
// q: does this make sense at all?
|
|
|
1077 |
case 'accept':
|
|
|
1078 |
$this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
|
|
|
1079 |
unset($this->current_parsing_options['accept']);
|
|
|
1080 |
break;
|
|
|
1081 |
case '_xh':
|
|
|
1082 |
case 'xmlrpc_valid_parents':
|
|
|
1083 |
$this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
|
|
|
1084 |
unset($this->$name);
|
|
|
1085 |
break;
|
|
|
1086 |
default:
|
|
|
1087 |
/// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
|
|
|
1088 |
$trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
|
|
|
1089 |
trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
|
|
|
1090 |
}
|
|
|
1091 |
}
|
|
|
1092 |
}
|