Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
namespace PhpXmlRpc\Helper;
4
 
5
use PhpXmlRpc\PhpXmlRpc;
6
use PhpXmlRpc\Traits\DeprecationLogger;
7
use PhpXmlRpc\Value;
8
 
9
/**
10
 * Deals with parsing the XML.
11
 * @see http://xmlrpc.com/spec.md
12
 *
13
 * @todo implement an interface to allow for alternative implementations
14
 *       - make access to $_xh protected, return more high-level data structures
15
 *       - move the private parts of $_xh to the internal-use parsing-options config
16
 *       - add parseRequest, parseResponse, parseValue methods
17
 * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding
18
 *       while parsing, which is faster than doing it later by going over the rebuilt data structure
19
 * @todo rename? This is an xml-rpc parser, not a generic xml parser...
20
 *
21
 * @property array $xmlrpc_valid_parents deprecated - public access left in purely for BC
22
 * @property int $accept deprecated - (protected) access left in purely for BC
23
 */
24
class XMLParser
25
{
26
    use DeprecationLogger;
27
 
28
    const RETURN_XMLRPCVALS = 'xmlrpcvals';
29
    const RETURN_EPIVALS = 'epivals';
30
    const RETURN_PHP = 'phpvals';
31
 
32
    const ACCEPT_REQUEST = 1;
33
    const ACCEPT_RESPONSE = 2;
34
    const ACCEPT_VALUE = 4;
35
    const ACCEPT_FAULT = 8;
36
 
37
    /**
38
     * @var int
39
     * The max length beyond which data will get truncated in error messages
40
     */
41
    protected $maxLogValueLength = 100;
42
 
43
    /**
44
     * @var array
45
     * Used to store state during parsing and to pass parsing results to callers.
46
     * Quick explanation of components:
47
     *  private:
48
     *    ac - used to accumulate values
49
     *    stack - array with genealogy of xml elements names, used to validate nesting of xml-rpc elements
50
     *    valuestack - array used for parsing arrays and structs
51
     *    lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings
52
     *         (values: 0=not looking, 1=looking, 3=found)
53
     *  public:
54
     *    isf - used to indicate an xml-rpc response fault (1), invalid xml-rpc fault (2), xml parsing fault (3)
55
     *    isf_reason - used for storing xml-rpc response fault string
56
     *    value - used to store the value in responses
57
     *    method - used to store method name in requests
58
     *    params - used to store parameters in requests
59
     *    pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
60
     *    rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode)
61
     */
62
    protected $_xh = array(
63
        'ac' => '',
64
        'stack' => array(),
65
        'valuestack' => array(),
66
        'lv' => 0,
67
        'isf' => 0,
68
        'isf_reason' => '',
69
        'value' => null,
70
        'method' => false,
71
        'params' => array(),
72
        'pt' => array(),
73
        'rt' => '',
74
    );
75
 
76
    /**
77
     * @var array[]
78
     */
79
    protected $xmlrpc_valid_parents = array(
80
        'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
81
        'BOOLEAN' => array('VALUE'),
82
        'I4' => array('VALUE'),
83
        'I8' => array('VALUE'),
84
        'EX:I8' => array('VALUE'),
85
        'INT' => array('VALUE'),
86
        'STRING' => array('VALUE'),
87
        'DOUBLE' => array('VALUE'),
88
        'DATETIME.ISO8601' => array('VALUE'),
89
        'BASE64' => array('VALUE'),
90
        'MEMBER' => array('STRUCT'),
91
        'NAME' => array('MEMBER'),
92
        'DATA' => array('ARRAY'),
93
        'ARRAY' => array('VALUE'),
94
        'STRUCT' => array('VALUE'),
95
        'PARAM' => array('PARAMS'),
96
        'METHODNAME' => array('METHODCALL'),
97
        'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
98
        'FAULT' => array('METHODRESPONSE'),
99
        'NIL' => array('VALUE'), // only used when extension activated
100
        'EX:NIL' => array('VALUE'), // only used when extension activated
101
    );
102
 
103
    /** @var array $parsing_options */
104
    protected $parsing_options = array();
105
 
106
    /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */
107
    //protected $accept = 3;
108
 
109
    /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */
110
    protected $maxChunkLength = 4194304;
111
    /** @var array
112
     * Used keys: accept, target_charset, methodname_callback, plus the ones set here.
113
     * We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not
114
     * the element handler methods
115
     */
116
    protected $current_parsing_options = array(
117
        'xmlrpc_null_extension' => false,
118
        'xmlrpc_return_datetimes' => false,
119
        'xmlrpc_reject_invalid_values' => false
120
    );
121
 
122
    /**
123
     * @param array $options integer keys: options passed to the inner xml parser
124
     *                       string keys:
125
     *                       - target_charset (string)
126
     *                       - methodname_callback (callable)
127
     *                       - xmlrpc_null_extension (bool)
128
     *                       - xmlrpc_return_datetimes (bool)
129
     *                       - xmlrpc_reject_invalid_values (bool)
130
     */
131
    public function __construct(array $options = array())
132
    {
133
        $this->parsing_options = $options;
134
    }
135
 
136
    /**
137
     * Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh'].
138
     * Logs to the error log any issues which do not cause the parsing to fail.
139
     *
140
     * @param string $data
141
     * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS
142
     * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE
143
     * @param array $options integer-key options are passed to the xml parser, string-key options are used independently.
144
     *                       These options are added to options received in the constructor.
145
     *                       Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values
146
     *                       are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used
147
     * @return array see the definition of $this->_xh for the meaning of the results
148
     * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
149
     *
150
     * @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options
151
     * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so
152
     *       that parsing will be completely independent of global state. Note that it might incur a small perf hit...
153
     */
154
    public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array())
155
    {
156
        $this->_xh = array(
157
            'ac' => '',
158
            'stack' => array(),
159
            'valuestack' => array(),
160
            'lv' => 0,
161
            'isf' => 0,
162
            'isf_reason' => '',
163
            'value' => null,
164
            'method' => false, // so we can check later if we got a methodname or not
165
            'params' => array(),
166
            'pt' => array(),
167
            'rt' => '',
168
        );
169
 
170
        $len = strlen($data);
171
 
172
        // we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop below
173
        if ($len == 0) {
174
            $this->_xh['isf'] = 3;
175
            $this->_xh['isf_reason'] = 'XML error 5: empty document';
176
            return $this->_xh;
177
        }
178
 
179
        $this->current_parsing_options = array('accept' => $accept);
180
 
181
        $mergedOptions = $this->parsing_options;
182
        foreach ($options as $key => $val) {
183
            $mergedOptions[$key] = $val;
184
        }
185
 
186
        foreach ($mergedOptions as $key => $val) {
187
            // q: can php be built without ctype? should we use a regexp?
188
            if (is_string($key) && !ctype_digit($key)) {
189
                /// @todo on invalid options, throw/error-out instead of logging an error message?
190
                switch($key) {
191
                    case 'target_charset':
192
                        if (function_exists('mb_convert_encoding')) {
193
                            $this->current_parsing_options['target_charset'] = $val;
194
                        } else {
195
                            $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring");
196
                        }
197
                        break;
198
 
199
                    case 'methodname_callback':
200
                        if (is_callable($val)) {
201
                            $this->current_parsing_options['methodname_callback'] = $val;
202
                        } else {
203
                            $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable");
204
                        }
205
                        break;
206
 
207
                    case 'xmlrpc_null_extension':
208
                    case 'xmlrpc_return_datetimes':
209
                    case 'xmlrpc_reject_invalid_values':
210
                        $this->current_parsing_options[$key] = $val;
211
                        break;
212
 
213
                    default:
214
                        $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": unsupported option: $key");
215
                }
216
                unset($mergedOptions[$key]);
217
            }
218
        }
219
 
220
        if (!isset($this->current_parsing_options['xmlrpc_null_extension'])) {
221
            $this->current_parsing_options['xmlrpc_null_extension'] = PhpXmlRpc::$xmlrpc_null_extension;
222
        }
223
        if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) {
224
            $this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes;
225
        }
226
        if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) {
227
            $this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values;
228
        }
229
 
230
        // NB: we use '' instead of null to force charset detection from the xml declaration
231
        $parser = xml_parser_create('');
232
 
233
        foreach ($mergedOptions as $key => $val) {
234
            xml_parser_set_option($parser, $key, $val);
235
        }
236
 
237
        // always set this, in case someone tries to disable it via options...
238
        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1);
239
 
240
        xml_set_object($parser, $this);
241
 
242
        switch ($returnType) {
243
            case self::RETURN_PHP:
244
                xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast');
245
                break;
246
            case self::RETURN_EPIVALS:
247
                xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi');
248
                break;
249
            /// @todo log an error / throw / error-out on unsupported return type
250
            case XMLParser::RETURN_XMLRPCVALS:
251
            default:
252
                xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee');
253
        }
254
 
255
        xml_set_character_data_handler($parser, 'xmlrpc_cd');
256
        xml_set_default_handler($parser, 'xmlrpc_dh');
257
 
258
        try {
259
            // @see ticket #70 - we have to parse big xml docs in chunks to avoid errors
260
            for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) {
261
                $chunk = substr($data, $offset, $this->maxChunkLength);
262
                // error handling: xml not well formed
263
                if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) {
264
                    $errCode = xml_get_error_code($parser);
265
                    $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode),
266
                        xml_get_current_line_number($parser), xml_get_current_column_number($parser));
267
 
268
                    $this->_xh['isf'] = 3;
269
                    $this->_xh['isf_reason'] = $errStr;
270
                }
271
                // no need to parse further if we already have a fatal error
272
                if ($this->_xh['isf'] >= 2) {
273
                    break;
274
                }
275
            }
276
        /// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times
277
        } catch (\Exception $e) {
278
            xml_parser_free($parser);
279
            $this->current_parsing_options = array();
280
            /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
281
            throw $e;
282
        } catch (\Error $e) {
283
            xml_parser_free($parser);
284
            $this->current_parsing_options = array();
285
                //$this->accept = $prevAccept;
286
                /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
287
            throw $e;
288
        }
289
 
290
        xml_parser_free($parser);
291
        $this->current_parsing_options = array();
292
 
293
        return $this->_xh;
294
    }
295
 
296
    /**
297
     * xml parser handler function for opening element tags.
298
     * @internal
299
     *
300
     * @param resource $parser
301
     * @param string $name
302
     * @param $attrs
303
     * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead
304
     * @return void
305
     *
306
     * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
307
     *       and remove the checking for $this->_xh['isf'] >= 2 everywhere
308
     */
309
    public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
310
    {
311
        // if invalid xml-rpc already detected, skip all processing
312
        if ($this->_xh['isf'] >= 2) {
313
            return;
314
        }
315
 
316
        // check for correct element nesting
317
        if (count($this->_xh['stack']) == 0) {
318
            // top level element can only be of 2 types
319
            /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
320
            ///       there is only a single top level element in xml anyway
321
 
322
            // BC
323
            if ($acceptSingleVals === false) {
324
                $accept = $this->current_parsing_options['accept'];
325
            } else {
326
                $this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__ . ' is deprecated');
327
                $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE;
328
            }
329
            if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) ||
330
                ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) ||
331
                ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) ||
332
                ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) {
333
                $this->_xh['rt'] = strtolower($name);
334
            } else {
335
                $this->_xh['isf'] = 2;
336
                $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name;
337
 
338
                return;
339
            }
340
        } else {
341
            // not top level element: see if parent is OK
342
            $parent = end($this->_xh['stack']);
343
            if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) {
344
                $this->_xh['isf'] = 2;
345
                $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
346
 
347
                return;
348
            }
349
        }
350
 
351
        switch ($name) {
352
            // optimize for speed switch cases: most common cases first
353
            case 'VALUE':
354
                /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
355
                $this->_xh['vt'] = 'value'; // indicator: no value found yet
356
                $this->_xh['ac'] = '';
357
                $this->_xh['lv'] = 1;
358
                $this->_xh['php_class'] = null;
359
                break;
360
 
361
            case 'I8':
362
            case 'EX:I8':
363
                if (PHP_INT_SIZE === 4) {
364
                    // INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
365
                    $this->_xh['isf'] = 2;
366
                    $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
367
 
368
                    return;
369
                }
370
                // fall through voluntarily
371
 
372
            case 'I4':
373
            case 'INT':
374
            case 'STRING':
375
            case 'BOOLEAN':
376
            case 'DOUBLE':
377
            case 'DATETIME.ISO8601':
378
            case 'BASE64':
379
                if ($this->_xh['vt'] != 'value') {
380
                    // two data elements inside a value: an error occurred!
381
                    $this->_xh['isf'] = 2;
382
                    $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
383
 
384
                    return;
385
                }
386
                $this->_xh['ac'] = ''; // reset the accumulator
387
                break;
388
 
389
            case 'STRUCT':
390
            case 'ARRAY':
391
                if ($this->_xh['vt'] != 'value') {
392
                    // two data elements inside a value: an error occurred!
393
                    $this->_xh['isf'] = 2;
394
                    $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
395
 
396
                    return;
397
                }
398
                // create an empty array to hold child values, and push it onto appropriate stack
399
                $curVal = array(
400
                    'values' => array(),
401
                    'type' => $name,
402
                );
403
                // check for out-of-band information to rebuild php objs and, in case it is found, save it
404
                if (@isset($attrs['PHP_CLASS'])) {
405
                    $curVal['php_class'] = $attrs['PHP_CLASS'];
406
                }
407
                $this->_xh['valuestack'][] = $curVal;
408
                $this->_xh['vt'] = 'data'; // be prepared for a data element next
409
                break;
410
 
411
            case 'DATA':
412
                if ($this->_xh['vt'] != 'data') {
413
                    // two data elements inside a value: an error occurred!
414
                    $this->_xh['isf'] = 2;
415
                    $this->_xh['isf_reason'] = "found two data elements inside an array element";
416
 
417
                    return;
418
                }
419
 
420
            case 'METHODCALL':
421
            case 'METHODRESPONSE':
422
            case 'PARAMS':
423
                // valid elements that add little to processing
424
                break;
425
 
426
            case 'METHODNAME':
427
            case 'NAME':
428
                /// @todo we could check for 2 NAME elements inside a MEMBER element
429
                $this->_xh['ac'] = '';
430
                break;
431
 
432
            case 'FAULT':
433
                $this->_xh['isf'] = 1;
434
                break;
435
 
436
            case 'MEMBER':
437
                // set member name to null, in case we do not find in the xml later on
438
                $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = null;
439
                //$this->_xh['ac']='';
440
                // Drop trough intentionally
441
 
442
            case 'PARAM':
443
                // clear value type, so we can check later if no value has been passed for this param/member
444
                $this->_xh['vt'] = null;
445
                break;
446
 
447
            case 'NIL':
448
            case 'EX:NIL':
449
                if ($this->current_parsing_options['xmlrpc_null_extension']) {
450
                    if ($this->_xh['vt'] != 'value') {
451
                        // two data elements inside a value: an error occurred!
452
                        $this->_xh['isf'] = 2;
453
                        $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
454
 
455
                        return;
456
                    }
457
                    // reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs
458
                    $this->_xh['ac'] = '';
459
 
460
                } else {
461
                    $this->_xh['isf'] = 2;
462
                    $this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension';
463
 
464
                    return;
465
                }
466
                break;
467
 
468
            default:
469
                // INVALID ELEMENT: RAISE ISF so that it is later recognized
470
                /// @todo feature creep = allow a callback instead
471
                $this->_xh['isf'] = 2;
472
                $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
473
 
474
                return;
475
        }
476
 
477
        // Save current element name to stack, to validate nesting
478
        $this->_xh['stack'][] = $name;
479
 
480
        /// @todo optimization creep: move this inside the big switch() above
481
        if ($name != 'VALUE') {
482
            $this->_xh['lv'] = 0;
483
        }
484
    }
485
 
486
    /**
487
     * xml parser handler function for close element tags.
488
     * @internal
489
     *
490
     * @param resource $parser
491
     * @param string $name
492
     * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility
493
     * @return void
494
     * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
495
     *
496
     * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
497
     *       and remove the checking for $this->_xh['isf'] >= 2 everywhere
498
     */
499
    public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1)
500
    {
501
        if ($this->_xh['isf'] >= 2) {
502
            return;
503
        }
504
 
505
        // push this element name from stack
506
        // NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem.
507
        // we also checked for proper nesting at start of elements...
508
        $currElem = array_pop($this->_xh['stack']);
509
 
510
        switch ($name) {
511
            case 'VALUE':
512
                // If no scalar was inside <VALUE></VALUE>, it was a string value
513
                if ($this->_xh['vt'] == 'value') {
514
                    $this->_xh['value'] = $this->_xh['ac'];
515
                    $this->_xh['vt'] = Value::$xmlrpcString;
516
                }
517
 
518
                // in case there is charset conversion required, do it here, to catch both cases of string values
519
                if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) {
520
                    $this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8');
521
                }
522
 
523
                if ($rebuildXmlrpcvals > 0) {
524
                    // build the xml-rpc val out of the data received, and substitute it
525
                    $temp = new Value($this->_xh['value'], $this->_xh['vt']);
526
                    // in case we got info about underlying php class, save it in the object we're rebuilding
527
                    if (isset($this->_xh['php_class'])) {
528
                        $temp->_php_class = $this->_xh['php_class'];
529
                    }
530
                    $this->_xh['value'] = $temp;
531
                } elseif ($rebuildXmlrpcvals < 0) {
532
                    if ($this->_xh['vt'] == Value::$xmlrpcDateTime) {
533
                        $this->_xh['value'] = (object)array(
534
                            'xmlrpc_type' => 'datetime',
535
                            'scalar' => $this->_xh['value'],
536
                            'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value'])
537
                        );
538
                    } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) {
539
                        $this->_xh['value'] = (object)array(
540
                            'xmlrpc_type' => 'base64',
541
                            'scalar' => $this->_xh['value']
542
                        );
543
                    }
544
                } else {
545
                    /// @todo this should handle php-serialized objects, since std deserializing is done
546
                    ///       by php_xmlrpc_decode, which we will not be calling...
547
                    //if (isset($this->_xh['php_class'])) {
548
                    //}
549
                }
550
 
551
                // check if we are inside an array or struct:
552
                // if value just built is inside an array, let's move it into array on the stack
553
                $vscount = count($this->_xh['valuestack']);
554
                if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
555
                    $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
556
                }
557
                break;
558
 
559
            case 'STRING':
560
                $this->_xh['vt'] = Value::$xmlrpcString;
561
                $this->_xh['lv'] = 3; // indicate we've found a value
562
                $this->_xh['value'] = $this->_xh['ac'];
563
                break;
564
 
565
            case 'BOOLEAN':
566
                $this->_xh['vt'] = Value::$xmlrpcBoolean;
567
                $this->_xh['lv'] = 3; // indicate we've found a value
568
                // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted,
569
                // even though the spec never mentions them (see e.g. Blogger api docs)
570
                // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here
571
                // Note the non-strict type check: it will allow ' 1 '
572
                /// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime.
573
                ///       Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and
574
                ///       to changes in the way php compares strings (since 8.0, leading and trailing newlines are
575
                ///       accepted when deciding if a string numeric...)
576
                if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') === 0) {
577
                    $this->_xh['value'] = true;
578
                } else {
579
                    // log if receiving something strange, even though we set the value to false anyway
580
                    /// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL
581
                    if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') !== 0) {
582
                        if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' .
583
                            $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
584
                            return;
585
                        }
586
                    }
587
                    $this->_xh['value'] = false;
588
                }
589
                break;
590
 
591
            case 'EX:I8':
592
                $name = 'i8';
593
                // fall through voluntarily
594
            case 'I4':
595
            case 'I8':
596
            case 'INT':
597
                // NB: we build the Value object with the original xml element name found, except for ex:i8. The
598
                // `Value::scalarTyp()` function will do some normalization of the data
599
                $this->_xh['vt'] = strtolower($name);
600
                $this->_xh['lv'] = 3; // indicate we've found a value
601
                if (!preg_match(PhpXmlRpc::$xmlrpc_int_format, $this->_xh['ac'])) {
602
                    if (!$this->handleParsingError('non numeric data received in INT value: ' .
603
                        $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
604
                        return;
605
                    }
606
                    /// @todo: find a better way of reporting an error value than this! Use NaN?
607
                    $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
608
                } else {
609
                    // it's ok, add it on
610
                    $this->_xh['value'] = (int)$this->_xh['ac'];
611
                }
612
                break;
613
 
614
            case 'DOUBLE':
615
                $this->_xh['vt'] = Value::$xmlrpcDouble;
616
                $this->_xh['lv'] = 3; // indicate we've found a value
617
                if (!preg_match(PhpXmlRpc::$xmlrpc_double_format, $this->_xh['ac'])) {
618
                    if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' .
619
                        $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
620
                        return;
621
                    }
622
 
623
                    $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
624
                } else {
625
                    // it's ok, add it on
626
                    $this->_xh['value'] = (double)$this->_xh['ac'];
627
                }
628
                break;
629
 
630
            case 'DATETIME.ISO8601':
631
                $this->_xh['vt'] = Value::$xmlrpcDateTime;
632
                $this->_xh['lv'] = 3; // indicate we've found a value
633
                if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) {
634
                    if (!$this->handleParsingError('invalid data received in DATETIME value: ' .
635
                        $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
636
                        return;
637
                    }
638
                }
639
                if ($this->current_parsing_options['xmlrpc_return_datetimes']) {
640
                    try {
641
                        $this->_xh['value'] = new \DateTime($this->_xh['ac']);
642
 
643
                    // the default regex used to validate the date string a few lines above should make this case impossible,
644
                    // but one never knows...
645
                    } catch(\Exception $e) {
646
                        // what to do? We can not guarantee that a valid date can be created. We return null...
647
                        if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' .
648
                            $e->getMessage(), __METHOD__)) {
649
                            return;
650
                        }
651
                    }
652
                } else {
653
                    $this->_xh['value'] = $this->_xh['ac'];
654
                }
655
                break;
656
 
657
            case 'BASE64':
658
                $this->_xh['vt'] = Value::$xmlrpcBase64;
659
                $this->_xh['lv'] = 3; // indicate we've found a value
660
                if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) {
661
                    $v = base64_decode($this->_xh['ac'], true);
662
                    if ($v === false) {
663
                        $this->_xh['isf'] = 2;
664
                        $this->_xh['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh['ac']);
665
                        return;
666
                    }
667
                } else {
668
                    $v = base64_decode($this->_xh['ac']);
669
                    if ($v === '' && $this->_xh['ac'] !== '') {
670
                        // only the empty string should decode to the empty string
671
                        $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value: ' .
672
                            $this->truncateValueForLog($this->_xh['ac']));
673
                    }
674
                }
675
                $this->_xh['value'] = $v;
676
                break;
677
 
678
            case 'NAME':
679
                $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac'];
680
                break;
681
 
682
            case 'MEMBER':
683
                // add to array in the stack the last element built, unless no VALUE or no NAME were found
684
                if ($this->_xh['vt']) {
685
                    $vscount = count($this->_xh['valuestack']);
686
                    if ($this->_xh['valuestack'][$vscount - 1]['name'] === null) {
687
                        if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__)) {
688
                            return;
689
                        }
690
                        $this->_xh['valuestack'][$vscount - 1]['name'] = '';
691
                    }
692
                    $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value'];
693
                } else {
694
                    if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__)) {
695
                        return;
696
                    }
697
                }
698
                break;
699
 
700
            case 'DATA':
701
                $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty
702
                break;
703
 
704
            case 'STRUCT':
705
            case 'ARRAY':
706
                // fetch out of stack array of values, and promote it to current value
707
                $currVal = array_pop($this->_xh['valuestack']);
708
                $this->_xh['value'] = $currVal['values'];
709
                $this->_xh['vt'] = strtolower($name);
710
                if (isset($currVal['php_class'])) {
711
                    $this->_xh['php_class'] = $currVal['php_class'];
712
                }
713
                break;
714
 
715
            case 'PARAM':
716
                // add to array of params the current value, unless no VALUE was found
717
                /// @todo should we also check if there were two VALUE inside the PARAM?
718
                if ($this->_xh['vt']) {
719
                    $this->_xh['params'][] = $this->_xh['value'];
720
                    $this->_xh['pt'][] = $this->_xh['vt'];
721
                } else {
722
                    if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__)) {
723
                        return;
724
                    }
725
                }
726
                break;
727
 
728
            case 'METHODNAME':
729
                if (!preg_match(PhpXmlRpc::$xmlrpc_methodname_format, $this->_xh['ac'])) {
730
                    if (!$this->handleParsingError('invalid data received in METHODNAME: '.
731
                        $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
732
                        return;
733
                    }
734
                }
735
                $methodName = trim($this->_xh['ac']);
736
                $this->_xh['method'] = $methodName;
737
                // we allow the callback to f.e. give us back a mangled method name by manipulating $this
738
                if (isset($this->current_parsing_options['methodname_callback'])) {
739
                    call_user_func($this->current_parsing_options['methodname_callback'], $methodName, $this, $parser);
740
                }
741
                break;
742
 
743
            case 'NIL':
744
            case 'EX:NIL':
745
                // NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant
746
                //if ($this->current_parsing_options['xmlrpc_null_extension']) {
747
                    $this->_xh['vt'] = 'null';
748
                    $this->_xh['value'] = null;
749
                    $this->_xh['lv'] = 3;
750
                //}
751
                break;
752
 
753
            /// @todo add extra checking:
754
            ///       - METHODRESPONSE should contain either a PARAMS with a single PARAM, or a FAULT
755
            ///       - FAULT should contain a single struct with the 2 expected members (check their name and type)
756
            ///       - METHODCALL should contain a methodname
757
            case 'PARAMS':
758
            case 'FAULT':
759
            case 'METHODCALL':
760
            case 'METHODRESPONSE':
761
                break;
762
 
763
            default:
764
                // End of INVALID ELEMENT
765
                // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se,
766
                // $this->_xh['isf'] is set to 2...
767
                break;
768
        }
769
    }
770
 
771
    /**
772
     * Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values.
773
     * @internal
774
     *
775
     * @param resource $parser
776
     * @param string $name
777
     * @return void
778
     */
779
    public function xmlrpc_ee_fast($parser, $name)
780
    {
781
        $this->xmlrpc_ee($parser, $name, 0);
782
    }
783
 
784
    /**
785
     * Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime).
786
     * @internal
787
     *
788
     * @param resource $parser
789
     * @param string $name
790
     * @return void
791
     */
792
    public function xmlrpc_ee_epi($parser, $name)
793
    {
794
        $this->xmlrpc_ee($parser, $name, -1);
795
    }
796
 
797
    /**
798
     * xml parser handler function for character data.
799
     * @internal
800
     *
801
     * @param resource $parser
802
     * @param string $data
803
     * @return void
804
     */
805
    public function xmlrpc_cd($parser, $data)
806
    {
807
        // skip processing if xml fault already detected
808
        if ($this->_xh['isf'] >= 2) {
809
            return;
810
        }
811
 
812
        // "lookforvalue == 3" means that we've found an entire value and should discard any further character data
813
        if ($this->_xh['lv'] != 3) {
814
            $this->_xh['ac'] .= $data;
815
        }
816
    }
817
 
818
    /**
819
     * xml parser handler function for 'other stuff', i.e. not char data or element start/end tag.
820
     * In fact, it only gets called on unknown entities...
821
     * @internal
822
     *
823
     * @param $parser
824
     * @param string data
825
     * @return void
826
     */
827
    public function xmlrpc_dh($parser, $data)
828
    {
829
        // skip processing if xml fault already detected
830
        if ($this->_xh['isf'] >= 2) {
831
            return;
832
        }
833
 
834
        if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') {
835
            $this->_xh['ac'] .= $data;
836
        }
837
    }
838
 
839
    /**
840
     * xml charset encoding guessing helper function.
841
     * Tries to determine the charset encoding of an XML chunk received over HTTP.
842
     * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,
843
     * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers,
844
     * which will be most probably using UTF-8 anyway...
845
     * In order of importance checks:
846
     * 1. http headers
847
     * 2. BOM
848
     * 3. XML declaration
849
     * 4. guesses using mb_detect_encoding()
850
     *
851
     * @param string $httpHeader the http Content-type header
852
     * @param string $xmlChunk xml content buffer
853
     * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled).
854
     *                              This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings
855
     * @return string the encoding determined. Null if it can't be determined and mbstring is enabled,
856
     *                PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled
857
     *
858
     * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!
859
     * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make
860
     *       the method independent of global state
861
     */
862
    public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null)
863
    {
864
        // discussion: see http://www.yale.edu/pclt/encoding/
865
        // 1 - test if encoding is specified in HTTP HEADERS
866
 
867
        // Details:
868
        // LWS:           (\13\10)?( |\t)+
869
        // token:         (any char but excluded stuff)+
870
        // quoted string: " (any char but double quotes and control chars)* "
871
        // header:        Content-type = ...; charset=value(; ...)*
872
        //   where value is of type token, no LWS allowed between 'charset' and value
873
        // Note: we do not check for invalid chars in VALUE:
874
        //   this had better be done using pure ereg as below
875
        // Note 2: we might be removing whitespace/tabs that ought to be left in if
876
        //   the received charset is a quoted string. But nobody uses such charset names...
877
 
878
        /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it?
879
        $matches = array();
880
        if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) {
881
            return strtoupper(trim($matches[1], " \t\""));
882
        }
883
 
884
        // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern
885
        //     (source: http://www.w3.org/TR/2000/REC-xml-20001006)
886
        //     NOTE: actually, according to the spec, even if we find the BOM and determine
887
        //     an encoding, we should check if there is an encoding specified
888
        //     in the xml declaration, and verify if they match.
889
        /// @todo implement check as described above?
890
        /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)
891
        if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
892
            return 'UCS-4';
893
        } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
894
            return 'UTF-16';
895
        } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
896
            return 'UTF-8';
897
        }
898
 
899
        // 3 - test if encoding is specified in the xml declaration
900
        /// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that
901
        ///       case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6.
902
        ///       For lower versions, we could attempt usage of mb_ereg...
903
        // Details:
904
        // SPACE:         (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
905
        // EQ:            SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
906
        if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
907
            '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
908
            $xmlChunk, $matches)) {
909
            return strtoupper(substr($matches[2], 1, -1));
910
        }
911
 
912
        // 4 - if mbstring is available, let it do the guesswork
913
        if (function_exists('mb_detect_encoding')) {
914
            if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) {
915
                $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings;
916
            }
917
            if ($encodingPrefs) {
918
                $enc = mb_detect_encoding($xmlChunk, $encodingPrefs);
919
            } else {
920
                $enc = mb_detect_encoding($xmlChunk);
921
            }
922
            // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII...
923
            // IANA also likes better US-ASCII, so go with it
924
            if ($enc == 'ASCII') {
925
                $enc = 'US-' . $enc;
926
            }
927
 
928
            return $enc;
929
        } else {
930
            // no encoding specified: as per HTTP1.1 assume it is iso-8859-1?
931
            // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types
932
            // this should be the standard. And we should be getting text/xml as request and response.
933
            // BUT we have to be backward compatible with the lib, which always used UTF-8 as default...
934
            return PhpXmlRpc::$xmlrpc_defencoding;
935
        }
936
    }
937
 
938
    /**
939
     * Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration).
940
     *
941
     * @param string $xmlChunk
942
     * @return bool
943
     *
944
     * @todo rename to hasEncodingDeclaration
945
     */
946
    public static function hasEncoding($xmlChunk)
947
    {
948
        // scan the first bytes of the data for a UTF-16 (or other) BOM pattern
949
        //     (source: http://www.w3.org/TR/2000/REC-xml-20001006)
950
        if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
951
            return true;
952
        } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
953
            return true;
954
        } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
955
            return true;
956
        }
957
 
958
        // test if encoding is specified in the xml declaration
959
        // Details:
960
        // SPACE:         (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
961
        // EQ:            SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
962
        if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
963
            '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
964
            $xmlChunk)) {
965
            return true;
966
        }
967
 
968
        return false;
969
    }
970
 
971
    /**
972
     * @param string $message
973
     * @param string $method method/file/line info
974
     * @return bool false if the caller has to stop parsing
975
     */
976
    protected function handleParsingError($message, $method = '')
977
    {
978
        if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) {
979
            $this->_xh['isf'] = 2;
980
            $this->_xh['isf_reason'] = ucfirst($message);
981
            return false;
982
        } else {
983
            $this->getLogger()->error('XML-RPC: ' . ($method != '' ? $method . ': ' : '') . $message);
984
            return true;
985
        }
986
    }
987
 
988
    /**
989
     * Truncates unsafe data
990
     * @param string $data
991
     * @return string
992
     */
993
    protected function truncateValueForLog($data)
994
    {
995
        if (strlen($data) > $this->maxLogValueLength) {
996
            return substr($data, 0, $this->maxLogValueLength - 3) . '...';
997
        }
998
 
999
        return $data;
1000
    }
1001
 
1002
    // *** BC layer ***
1003
 
1004
    /**
1005
     * xml parser handler function for opening element tags.
1006
     * Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses.
1007
     * @deprecated
1008
     *
1009
     * @param resource $parser
1010
     * @param $name
1011
     * @param $attrs
1012
     * @return void
1013
     */
1014
    public function xmlrpc_se_any($parser, $name, $attrs)
1015
    {
1016
        // this will be spamming the log if this method is in use...
1017
        $this->logDeprecation('Method ' . __METHOD__ . ' is deprecated');
1018
 
1019
        $this->xmlrpc_se($parser, $name, $attrs, true);
1020
    }
1021
 
1022
    public function &__get($name)
1023
    {
1024
        switch ($name) {
1025
            case '_xh':
1026
            case 'xmlrpc_valid_parents':
1027
                $this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated');
1028
                return $this->$name;
1029
            default:
1030
                /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1031
                $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
1032
                trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
1033
                $result = null;
1034
                return $result;
1035
        }
1036
    }
1037
 
1038
    public function __set($name, $value)
1039
    {
1040
        switch ($name) {
1041
            // this should only ever be called by subclasses which overtook `parse()`
1042
            case 'accept':
1043
                $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
1044
                $this->current_parsing_options['accept'] = $value;
1045
                break;
1046
            case '_xh':
1047
            case 'xmlrpc_valid_parents':
1048
                $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
1049
                $this->$name = $value;
1050
                break;
1051
            default:
1052
                /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1053
                $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
1054
                trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
1055
        }
1056
    }
1057
 
1058
    public function __isset($name)
1059
    {
1060
        switch ($name) {
1061
            case 'accept':
1062
                $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
1063
                return isset($this->current_parsing_options['accept']);
1064
            case '_xh':
1065
            case 'xmlrpc_valid_parents':
1066
                $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
1067
                return isset($this->$name);
1068
            default:
1069
                return false;
1070
        }
1071
    }
1072
 
1073
    public function __unset($name)
1074
    {
1075
        switch ($name) {
1076
            // q: does this make sense at all?
1077
            case 'accept':
1078
                $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
1079
                unset($this->current_parsing_options['accept']);
1080
                break;
1081
            case '_xh':
1082
            case 'xmlrpc_valid_parents':
1083
                $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
1084
                unset($this->$name);
1085
                break;
1086
            default:
1087
                /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1088
                $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
1089
                trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
1090
        }
1091
    }
1092
}