Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
namespace JmesPath;
3
 
4
use JmesPath\Lexer as T;
5
 
6
/**
7
 * JMESPath Pratt parser
8
 * @link http://hall.org.ua/halls/wizzard/pdf/Vaughan.Pratt.TDOP.pdf
9
 */
10
class Parser
11
{
12
    /** @var Lexer */
13
    private $lexer;
14
    private $tokens;
15
    private $token;
16
    private $tpos;
17
    private $expression;
18
    private static $nullToken = ['type' => T::T_EOF];
19
    private static $currentNode = ['type' => T::T_CURRENT];
20
 
21
    private static $bp = [
22
        T::T_EOF               => 0,
23
        T::T_QUOTED_IDENTIFIER => 0,
24
        T::T_IDENTIFIER        => 0,
25
        T::T_RBRACKET          => 0,
26
        T::T_RPAREN            => 0,
27
        T::T_COMMA             => 0,
28
        T::T_RBRACE            => 0,
29
        T::T_NUMBER            => 0,
30
        T::T_CURRENT           => 0,
31
        T::T_EXPREF            => 0,
32
        T::T_COLON             => 0,
33
        T::T_PIPE              => 1,
34
        T::T_OR                => 2,
35
        T::T_AND               => 3,
36
        T::T_COMPARATOR        => 5,
37
        T::T_FLATTEN           => 9,
38
        T::T_STAR              => 20,
39
        T::T_FILTER            => 21,
40
        T::T_DOT               => 40,
41
        T::T_NOT               => 45,
42
        T::T_LBRACE            => 50,
43
        T::T_LBRACKET          => 55,
44
        T::T_LPAREN            => 60,
45
    ];
46
 
47
    /** @var array Acceptable tokens after a dot token */
48
    private static $afterDot = [
49
        T::T_IDENTIFIER        => true, // foo.bar
50
        T::T_QUOTED_IDENTIFIER => true, // foo."bar"
51
        T::T_STAR              => true, // foo.*
52
        T::T_LBRACE            => true, // foo[1]
53
        T::T_LBRACKET          => true, // foo{a: 0}
54
        T::T_FILTER            => true, // foo.[?bar==10]
55
    ];
56
 
57
    /**
58
     * @param Lexer|null $lexer Lexer used to tokenize expressions
59
     */
60
    public function __construct(Lexer $lexer = null)
61
    {
62
        $this->lexer = $lexer ?: new Lexer();
63
    }
64
 
65
    /**
66
     * Parses a JMESPath expression into an AST
67
     *
68
     * @param string $expression JMESPath expression to compile
69
     *
70
     * @return array Returns an array based AST
71
     * @throws SyntaxErrorException
72
     */
73
    public function parse($expression)
74
    {
75
        $this->expression = $expression;
76
        $this->tokens = $this->lexer->tokenize($expression);
77
        $this->tpos = -1;
78
        $this->next();
79
        $result = $this->expr();
80
 
81
        if ($this->token['type'] === T::T_EOF) {
82
            return $result;
83
        }
84
 
85
        throw $this->syntax('Did not reach the end of the token stream');
86
    }
87
 
88
    /**
89
     * Parses an expression while rbp < lbp.
90
     *
91
     * @param int   $rbp  Right bound precedence
92
     *
93
     * @return array
94
     */
95
    private function expr($rbp = 0)
96
    {
97
        $left = $this->{"nud_{$this->token['type']}"}();
98
        while ($rbp < self::$bp[$this->token['type']]) {
99
            $left = $this->{"led_{$this->token['type']}"}($left);
100
        }
101
 
102
        return $left;
103
    }
104
 
105
    private function nud_identifier()
106
    {
107
        $token = $this->token;
108
        $this->next();
109
        return ['type' => 'field', 'value' => $token['value']];
110
    }
111
 
112
    private function nud_quoted_identifier()
113
    {
114
        $token = $this->token;
115
        $this->next();
116
        $this->assertNotToken(T::T_LPAREN);
117
        return ['type' => 'field', 'value' => $token['value']];
118
    }
119
 
120
    private function nud_current()
121
    {
122
        $this->next();
123
        return self::$currentNode;
124
    }
125
 
126
    private function nud_literal()
127
    {
128
        $token = $this->token;
129
        $this->next();
130
        return ['type' => 'literal', 'value' => $token['value']];
131
    }
132
 
133
    private function nud_expref()
134
    {
135
        $this->next();
136
        return ['type' => T::T_EXPREF, 'children' => [$this->expr(self::$bp[T::T_EXPREF])]];
137
    }
138
 
139
    private function nud_not()
140
    {
141
        $this->next();
142
        return ['type' => T::T_NOT, 'children' => [$this->expr(self::$bp[T::T_NOT])]];
143
    }
144
 
145
    private function nud_lparen()
146
    {
147
        $this->next();
148
        $result = $this->expr(0);
149
        if ($this->token['type'] !== T::T_RPAREN) {
150
            throw $this->syntax('Unclosed `(`');
151
        }
152
        $this->next();
153
        return $result;
154
    }
155
 
156
    private function nud_lbrace()
157
    {
158
        static $validKeys = [T::T_QUOTED_IDENTIFIER => true, T::T_IDENTIFIER => true];
159
        $this->next($validKeys);
160
        $pairs = [];
161
 
162
        do {
163
            $pairs[] = $this->parseKeyValuePair();
164
            if ($this->token['type'] == T::T_COMMA) {
165
                $this->next($validKeys);
166
            }
167
        } while ($this->token['type'] !== T::T_RBRACE);
168
 
169
        $this->next();
170
 
171
        return['type' => 'multi_select_hash', 'children' => $pairs];
172
    }
173
 
174
    private function nud_flatten()
175
    {
176
        return $this->led_flatten(self::$currentNode);
177
    }
178
 
179
    private function nud_filter()
180
    {
181
        return $this->led_filter(self::$currentNode);
182
    }
183
 
184
    private function nud_star()
185
    {
186
        return $this->parseWildcardObject(self::$currentNode);
187
    }
188
 
189
    private function nud_lbracket()
190
    {
191
        $this->next();
192
        $type = $this->token['type'];
193
        if ($type == T::T_NUMBER || $type == T::T_COLON) {
194
            return $this->parseArrayIndexExpression();
195
        } elseif ($type == T::T_STAR && $this->lookahead() == T::T_RBRACKET) {
196
            return $this->parseWildcardArray();
197
        } else {
198
            return $this->parseMultiSelectList();
199
        }
200
    }
201
 
202
    private function led_lbracket(array $left)
203
    {
204
        static $nextTypes = [T::T_NUMBER => true, T::T_COLON => true, T::T_STAR => true];
205
        $this->next($nextTypes);
206
        switch ($this->token['type']) {
207
            case T::T_NUMBER:
208
            case T::T_COLON:
209
                return [
210
                    'type' => 'subexpression',
211
                    'children' => [$left, $this->parseArrayIndexExpression()]
212
                ];
213
            default:
214
                return $this->parseWildcardArray($left);
215
        }
216
    }
217
 
218
    private function led_flatten(array $left)
219
    {
220
        $this->next();
221
 
222
        return [
223
            'type'     => 'projection',
224
            'from'     => 'array',
225
            'children' => [
226
                ['type' => T::T_FLATTEN, 'children' => [$left]],
227
                $this->parseProjection(self::$bp[T::T_FLATTEN])
228
            ]
229
        ];
230
    }
231
 
232
    private function led_dot(array $left)
233
    {
234
        $this->next(self::$afterDot);
235
 
236
        if ($this->token['type'] == T::T_STAR) {
237
            return $this->parseWildcardObject($left);
238
        }
239
 
240
        return [
241
            'type'     => 'subexpression',
242
            'children' => [$left, $this->parseDot(self::$bp[T::T_DOT])]
243
        ];
244
    }
245
 
246
    private function led_or(array $left)
247
    {
248
        $this->next();
249
        return [
250
            'type'     => T::T_OR,
251
            'children' => [$left, $this->expr(self::$bp[T::T_OR])]
252
        ];
253
    }
254
 
255
    private function led_and(array $left)
256
    {
257
        $this->next();
258
        return [
259
            'type'     => T::T_AND,
260
            'children' => [$left, $this->expr(self::$bp[T::T_AND])]
261
        ];
262
    }
263
 
264
    private function led_pipe(array $left)
265
    {
266
        $this->next();
267
        return [
268
            'type'     => T::T_PIPE,
269
            'children' => [$left, $this->expr(self::$bp[T::T_PIPE])]
270
        ];
271
    }
272
 
273
    private function led_lparen(array $left)
274
    {
275
        $args = [];
276
        $this->next();
277
 
278
        while ($this->token['type'] != T::T_RPAREN) {
279
            $args[] = $this->expr(0);
280
            if ($this->token['type'] == T::T_COMMA) {
281
                $this->next();
282
            }
283
        }
284
 
285
        $this->next();
286
 
287
        return [
288
            'type'     => 'function',
289
            'value'    => $left['value'],
290
            'children' => $args
291
        ];
292
    }
293
 
294
    private function led_filter(array $left)
295
    {
296
        $this->next();
297
        $expression = $this->expr();
298
        if ($this->token['type'] != T::T_RBRACKET) {
299
            throw $this->syntax('Expected a closing rbracket for the filter');
300
        }
301
 
302
        $this->next();
303
        $rhs = $this->parseProjection(self::$bp[T::T_FILTER]);
304
 
305
        return [
306
            'type'       => 'projection',
307
            'from'       => 'array',
308
            'children'   => [
309
                $left ?: self::$currentNode,
310
                [
311
                    'type' => 'condition',
312
                    'children' => [$expression, $rhs]
313
                ]
314
            ]
315
        ];
316
    }
317
 
318
    private function led_comparator(array $left)
319
    {
320
        $token = $this->token;
321
        $this->next();
322
 
323
        return [
324
            'type'     => T::T_COMPARATOR,
325
            'value'    => $token['value'],
326
            'children' => [$left, $this->expr(self::$bp[T::T_COMPARATOR])]
327
        ];
328
    }
329
 
330
    private function parseProjection($bp)
331
    {
332
        $type = $this->token['type'];
333
        if (self::$bp[$type] < 10) {
334
            return self::$currentNode;
335
        } elseif ($type == T::T_DOT) {
336
            $this->next(self::$afterDot);
337
            return $this->parseDot($bp);
338
        } elseif ($type == T::T_LBRACKET || $type == T::T_FILTER) {
339
            return $this->expr($bp);
340
        }
341
 
342
        throw $this->syntax('Syntax error after projection');
343
    }
344
 
345
    private function parseDot($bp)
346
    {
347
        if ($this->token['type'] == T::T_LBRACKET) {
348
            $this->next();
349
            return $this->parseMultiSelectList();
350
        }
351
 
352
        return $this->expr($bp);
353
    }
354
 
355
    private function parseKeyValuePair()
356
    {
357
        static $validColon = [T::T_COLON => true];
358
        $key = $this->token['value'];
359
        $this->next($validColon);
360
        $this->next();
361
 
362
        return [
363
            'type'     => 'key_val_pair',
364
            'value'    => $key,
365
            'children' => [$this->expr()]
366
        ];
367
    }
368
 
369
    private function parseWildcardObject(array $left = null)
370
    {
371
        $this->next();
372
 
373
        return [
374
            'type'     => 'projection',
375
            'from'     => 'object',
376
            'children' => [
377
                $left ?: self::$currentNode,
378
                $this->parseProjection(self::$bp[T::T_STAR])
379
            ]
380
        ];
381
    }
382
 
383
    private function parseWildcardArray(array $left = null)
384
    {
385
        static $getRbracket = [T::T_RBRACKET => true];
386
        $this->next($getRbracket);
387
        $this->next();
388
 
389
        return [
390
            'type'     => 'projection',
391
            'from'     => 'array',
392
            'children' => [
393
                $left ?: self::$currentNode,
394
                $this->parseProjection(self::$bp[T::T_STAR])
395
            ]
396
        ];
397
    }
398
 
399
    /**
400
     * Parses an array index expression (e.g., [0], [1:2:3]
401
     */
402
    private function parseArrayIndexExpression()
403
    {
404
        static $matchNext = [
405
            T::T_NUMBER   => true,
406
            T::T_COLON    => true,
407
            T::T_RBRACKET => true
408
        ];
409
 
410
        $pos = 0;
411
        $parts = [null, null, null];
412
        $expected = $matchNext;
413
 
414
        do {
415
            if ($this->token['type'] == T::T_COLON) {
416
                $pos++;
417
                $expected = $matchNext;
418
            } elseif ($this->token['type'] == T::T_NUMBER) {
419
                $parts[$pos] = $this->token['value'];
420
                $expected = [T::T_COLON => true, T::T_RBRACKET => true];
421
            }
422
            $this->next($expected);
423
        } while ($this->token['type'] != T::T_RBRACKET);
424
 
425
        // Consume the closing bracket
426
        $this->next();
427
 
428
        if ($pos === 0) {
429
            // No colons were found so this is a simple index extraction
430
            return ['type' => 'index', 'value' => $parts[0]];
431
        }
432
 
433
        if ($pos > 2) {
434
            throw $this->syntax('Invalid array slice syntax: too many colons');
435
        }
436
 
437
        // Sliced array from start (e.g., [2:])
438
        return [
439
            'type'     => 'projection',
440
            'from'     => 'array',
441
            'children' => [
442
                ['type' => 'slice', 'value' => $parts],
443
                $this->parseProjection(self::$bp[T::T_STAR])
444
            ]
445
        ];
446
    }
447
 
448
    private function parseMultiSelectList()
449
    {
450
        $nodes = [];
451
 
452
        do {
453
            $nodes[] = $this->expr();
454
            if ($this->token['type'] == T::T_COMMA) {
455
                $this->next();
456
                $this->assertNotToken(T::T_RBRACKET);
457
            }
458
        } while ($this->token['type'] !== T::T_RBRACKET);
459
        $this->next();
460
 
461
        return ['type' => 'multi_select_list', 'children' => $nodes];
462
    }
463
 
464
    private function syntax($msg)
465
    {
466
        return new SyntaxErrorException($msg, $this->token, $this->expression);
467
    }
468
 
469
    private function lookahead()
470
    {
471
        return (!isset($this->tokens[$this->tpos + 1]))
472
            ? T::T_EOF
473
            : $this->tokens[$this->tpos + 1]['type'];
474
    }
475
 
476
    private function next(array $match = null)
477
    {
478
        if (!isset($this->tokens[$this->tpos + 1])) {
479
            $this->token = self::$nullToken;
480
        } else {
481
            $this->token = $this->tokens[++$this->tpos];
482
        }
483
 
484
        if ($match && !isset($match[$this->token['type']])) {
485
            throw $this->syntax($match);
486
        }
487
    }
488
 
489
    private function assertNotToken($type)
490
    {
491
        if ($this->token['type'] == $type) {
492
            throw $this->syntax("Token {$this->tpos} not allowed to be $type");
493
        }
494
    }
495
 
496
    /**
497
     * @internal Handles undefined tokens without paying the cost of validation
498
     */
499
    public function __call($method, $args)
500
    {
501
        $prefix = substr($method, 0, 4);
502
        if ($prefix == 'nud_' || $prefix == 'led_') {
503
            $token = substr($method, 4);
504
            $message = "Unexpected \"$token\" token ($method). Expected one of"
505
                . " the following tokens: "
506
                . implode(', ', array_map(function ($i) {
507
                    return '"' . substr($i, 4) . '"';
508
                }, array_filter(
509
                    get_class_methods($this),
510
                    function ($i) use ($prefix) {
511
                        return strpos($i, $prefix) === 0;
512
                    }
513
                )));
514
            throw $this->syntax($message);
515
        }
516
 
517
        throw new \BadMethodCallException("Call to undefined method $method");
518
    }
519
}