Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
namespace JmesPath;
3
 
4
/**
5
 * Tokenizes JMESPath expressions
6
 */
7
class Lexer
8
{
9
    const T_DOT = 'dot';
10
    const T_STAR = 'star';
11
    const T_COMMA = 'comma';
12
    const T_COLON = 'colon';
13
    const T_CURRENT = 'current';
14
    const T_EXPREF = 'expref';
15
    const T_LPAREN = 'lparen';
16
    const T_RPAREN = 'rparen';
17
    const T_LBRACE = 'lbrace';
18
    const T_RBRACE = 'rbrace';
19
    const T_LBRACKET = 'lbracket';
20
    const T_RBRACKET = 'rbracket';
21
    const T_FLATTEN = 'flatten';
22
    const T_IDENTIFIER = 'identifier';
23
    const T_NUMBER = 'number';
24
    const T_QUOTED_IDENTIFIER = 'quoted_identifier';
25
    const T_UNKNOWN = 'unknown';
26
    const T_PIPE = 'pipe';
27
    const T_OR = 'or';
28
    const T_AND = 'and';
29
    const T_NOT = 'not';
30
    const T_FILTER = 'filter';
31
    const T_LITERAL = 'literal';
32
    const T_EOF = 'eof';
33
    const T_COMPARATOR = 'comparator';
34
 
35
    const STATE_IDENTIFIER = 0;
36
    const STATE_NUMBER = 1;
37
    const STATE_SINGLE_CHAR = 2;
38
    const STATE_WHITESPACE = 3;
39
    const STATE_STRING_LITERAL = 4;
40
    const STATE_QUOTED_STRING = 5;
41
    const STATE_JSON_LITERAL = 6;
42
    const STATE_LBRACKET = 7;
43
    const STATE_PIPE = 8;
44
    const STATE_LT = 9;
45
    const STATE_GT = 10;
46
    const STATE_EQ = 11;
47
    const STATE_NOT = 12;
48
    const STATE_AND = 13;
49
 
50
    /** @var array We know what token we are consuming based on each char */
51
    private static $transitionTable = [
52
        '<'  => self::STATE_LT,
53
        '>'  => self::STATE_GT,
54
        '='  => self::STATE_EQ,
55
        '!'  => self::STATE_NOT,
56
        '['  => self::STATE_LBRACKET,
57
        '|'  => self::STATE_PIPE,
58
        '&'  => self::STATE_AND,
59
        '`'  => self::STATE_JSON_LITERAL,
60
        '"'  => self::STATE_QUOTED_STRING,
61
        "'"  => self::STATE_STRING_LITERAL,
62
        '-'  => self::STATE_NUMBER,
63
        '0'  => self::STATE_NUMBER,
64
        '1'  => self::STATE_NUMBER,
65
        '2'  => self::STATE_NUMBER,
66
        '3'  => self::STATE_NUMBER,
67
        '4'  => self::STATE_NUMBER,
68
        '5'  => self::STATE_NUMBER,
69
        '6'  => self::STATE_NUMBER,
70
        '7'  => self::STATE_NUMBER,
71
        '8'  => self::STATE_NUMBER,
72
        '9'  => self::STATE_NUMBER,
73
        ' '  => self::STATE_WHITESPACE,
74
        "\t" => self::STATE_WHITESPACE,
75
        "\n" => self::STATE_WHITESPACE,
76
        "\r" => self::STATE_WHITESPACE,
77
        '.'  => self::STATE_SINGLE_CHAR,
78
        '*'  => self::STATE_SINGLE_CHAR,
79
        ']'  => self::STATE_SINGLE_CHAR,
80
        ','  => self::STATE_SINGLE_CHAR,
81
        ':'  => self::STATE_SINGLE_CHAR,
82
        '@'  => self::STATE_SINGLE_CHAR,
83
        '('  => self::STATE_SINGLE_CHAR,
84
        ')'  => self::STATE_SINGLE_CHAR,
85
        '{'  => self::STATE_SINGLE_CHAR,
86
        '}'  => self::STATE_SINGLE_CHAR,
87
        '_'  => self::STATE_IDENTIFIER,
88
        'A'  => self::STATE_IDENTIFIER,
89
        'B'  => self::STATE_IDENTIFIER,
90
        'C'  => self::STATE_IDENTIFIER,
91
        'D'  => self::STATE_IDENTIFIER,
92
        'E'  => self::STATE_IDENTIFIER,
93
        'F'  => self::STATE_IDENTIFIER,
94
        'G'  => self::STATE_IDENTIFIER,
95
        'H'  => self::STATE_IDENTIFIER,
96
        'I'  => self::STATE_IDENTIFIER,
97
        'J'  => self::STATE_IDENTIFIER,
98
        'K'  => self::STATE_IDENTIFIER,
99
        'L'  => self::STATE_IDENTIFIER,
100
        'M'  => self::STATE_IDENTIFIER,
101
        'N'  => self::STATE_IDENTIFIER,
102
        'O'  => self::STATE_IDENTIFIER,
103
        'P'  => self::STATE_IDENTIFIER,
104
        'Q'  => self::STATE_IDENTIFIER,
105
        'R'  => self::STATE_IDENTIFIER,
106
        'S'  => self::STATE_IDENTIFIER,
107
        'T'  => self::STATE_IDENTIFIER,
108
        'U'  => self::STATE_IDENTIFIER,
109
        'V'  => self::STATE_IDENTIFIER,
110
        'W'  => self::STATE_IDENTIFIER,
111
        'X'  => self::STATE_IDENTIFIER,
112
        'Y'  => self::STATE_IDENTIFIER,
113
        'Z'  => self::STATE_IDENTIFIER,
114
        'a'  => self::STATE_IDENTIFIER,
115
        'b'  => self::STATE_IDENTIFIER,
116
        'c'  => self::STATE_IDENTIFIER,
117
        'd'  => self::STATE_IDENTIFIER,
118
        'e'  => self::STATE_IDENTIFIER,
119
        'f'  => self::STATE_IDENTIFIER,
120
        'g'  => self::STATE_IDENTIFIER,
121
        'h'  => self::STATE_IDENTIFIER,
122
        'i'  => self::STATE_IDENTIFIER,
123
        'j'  => self::STATE_IDENTIFIER,
124
        'k'  => self::STATE_IDENTIFIER,
125
        'l'  => self::STATE_IDENTIFIER,
126
        'm'  => self::STATE_IDENTIFIER,
127
        'n'  => self::STATE_IDENTIFIER,
128
        'o'  => self::STATE_IDENTIFIER,
129
        'p'  => self::STATE_IDENTIFIER,
130
        'q'  => self::STATE_IDENTIFIER,
131
        'r'  => self::STATE_IDENTIFIER,
132
        's'  => self::STATE_IDENTIFIER,
133
        't'  => self::STATE_IDENTIFIER,
134
        'u'  => self::STATE_IDENTIFIER,
135
        'v'  => self::STATE_IDENTIFIER,
136
        'w'  => self::STATE_IDENTIFIER,
137
        'x'  => self::STATE_IDENTIFIER,
138
        'y'  => self::STATE_IDENTIFIER,
139
        'z'  => self::STATE_IDENTIFIER,
140
    ];
141
 
142
    /** @var array Valid identifier characters after first character */
143
    private $validIdentifier = [
144
        'A' => true, 'B' => true, 'C' => true, 'D' => true, 'E' => true,
145
        'F' => true, 'G' => true, 'H' => true, 'I' => true, 'J' => true,
146
        'K' => true, 'L' => true, 'M' => true, 'N' => true, 'O' => true,
147
        'P' => true, 'Q' => true, 'R' => true, 'S' => true, 'T' => true,
148
        'U' => true, 'V' => true, 'W' => true, 'X' => true, 'Y' => true,
149
        'Z' => true, 'a' => true, 'b' => true, 'c' => true, 'd' => true,
150
        'e' => true, 'f' => true, 'g' => true, 'h' => true, 'i' => true,
151
        'j' => true, 'k' => true, 'l' => true, 'm' => true, 'n' => true,
152
        'o' => true, 'p' => true, 'q' => true, 'r' => true, 's' => true,
153
        't' => true, 'u' => true, 'v' => true, 'w' => true, 'x' => true,
154
        'y' => true, 'z' => true, '_' => true, '0' => true, '1' => true,
155
        '2' => true, '3' => true, '4' => true, '5' => true, '6' => true,
156
        '7' => true, '8' => true, '9' => true,
157
    ];
158
 
159
    /** @var array Valid number characters after the first character */
160
    private $numbers = [
161
        '0' => true, '1' => true, '2' => true, '3' => true, '4' => true,
162
        '5' => true, '6' => true, '7' => true, '8' => true, '9' => true
163
    ];
164
 
165
    /** @var array Map of simple single character tokens */
166
    private $simpleTokens = [
167
        '.' => self::T_DOT,
168
        '*' => self::T_STAR,
169
        ']' => self::T_RBRACKET,
170
        ',' => self::T_COMMA,
171
        ':' => self::T_COLON,
172
        '@' => self::T_CURRENT,
173
        '(' => self::T_LPAREN,
174
        ')' => self::T_RPAREN,
175
        '{' => self::T_LBRACE,
176
        '}' => self::T_RBRACE,
177
    ];
178
 
179
    /**
180
     * Tokenize the JMESPath expression into an array of tokens hashes that
181
     * contain a 'type', 'value', and 'key'.
182
     *
183
     * @param string $input JMESPath input
184
     *
185
     * @return array
186
     * @throws SyntaxErrorException
187
     */
188
    public function tokenize($input)
189
    {
190
        $tokens = [];
191
 
192
        if ($input === '') {
193
            goto eof;
194
        }
195
 
196
        $chars = str_split($input);
197
 
198
        while (false !== ($current = current($chars))) {
199
 
200
            // Every character must be in the transition character table.
201
            if (!isset(self::$transitionTable[$current])) {
202
                $tokens[] = [
203
                    'type'  => self::T_UNKNOWN,
204
                    'pos'   => key($chars),
205
                    'value' => $current
206
                ];
207
                next($chars);
208
                continue;
209
            }
210
 
211
            $state = self::$transitionTable[$current];
212
 
213
            if ($state === self::STATE_SINGLE_CHAR) {
214
 
215
                // Consume simple tokens like ".", ",", "@", etc.
216
                $tokens[] = [
217
                    'type'  => $this->simpleTokens[$current],
218
                    'pos'   => key($chars),
219
                    'value' => $current
220
                ];
221
                next($chars);
222
 
223
            } elseif ($state === self::STATE_IDENTIFIER) {
224
 
225
                // Consume identifiers
226
                $start = key($chars);
227
                $buffer = '';
228
                do {
229
                    $buffer .= $current;
230
                    $current = next($chars);
231
                } while ($current !== false && isset($this->validIdentifier[$current]));
232
                $tokens[] = [
233
                    'type'  => self::T_IDENTIFIER,
234
                    'value' => $buffer,
235
                    'pos'   => $start
236
                ];
237
 
238
            } elseif ($state === self::STATE_WHITESPACE) {
239
 
240
                // Skip whitespace
241
                next($chars);
242
 
243
            } elseif ($state === self::STATE_LBRACKET) {
244
 
245
                // Consume "[", "[?", and "[]"
246
                $position = key($chars);
247
                $actual = next($chars);
248
                if ($actual === ']') {
249
                    next($chars);
250
                    $tokens[] = [
251
                        'type'  => self::T_FLATTEN,
252
                        'pos'   => $position,
253
                        'value' => '[]'
254
                    ];
255
                } elseif ($actual === '?') {
256
                    next($chars);
257
                    $tokens[] = [
258
                        'type'  => self::T_FILTER,
259
                        'pos'   => $position,
260
                        'value' => '[?'
261
                    ];
262
                } else {
263
                    $tokens[] = [
264
                        'type'  => self::T_LBRACKET,
265
                        'pos'   => $position,
266
                        'value' => '['
267
                    ];
268
                }
269
 
270
            } elseif ($state === self::STATE_STRING_LITERAL) {
271
 
272
                // Consume raw string literals
273
                $t = $this->inside($chars, "'", self::T_LITERAL);
274
                $t['value'] = str_replace("\\'", "'", $t['value']);
275
                $tokens[] = $t;
276
 
277
            } elseif ($state === self::STATE_PIPE) {
278
 
279
                // Consume pipe and OR
280
                $tokens[] = $this->matchOr($chars, '|', '|', self::T_OR, self::T_PIPE);
281
 
282
            } elseif ($state == self::STATE_JSON_LITERAL) {
283
 
284
                // Consume JSON literals
285
                $token = $this->inside($chars, '`', self::T_LITERAL);
286
                if ($token['type'] === self::T_LITERAL) {
287
                    $token['value'] = str_replace('\\`', '`', $token['value']);
288
                    $token = $this->parseJson($token);
289
                }
290
                $tokens[] = $token;
291
 
292
            } elseif ($state == self::STATE_NUMBER) {
293
 
294
                // Consume numbers
295
                $start = key($chars);
296
                $buffer = '';
297
                do {
298
                    $buffer .= $current;
299
                    $current = next($chars);
300
                } while ($current !== false && isset($this->numbers[$current]));
301
                $tokens[] = [
302
                    'type'  => self::T_NUMBER,
303
                    'value' => (int)$buffer,
304
                    'pos'   => $start
305
                ];
306
 
307
            } elseif ($state === self::STATE_QUOTED_STRING) {
308
 
309
                // Consume quoted identifiers
310
                $token = $this->inside($chars, '"', self::T_QUOTED_IDENTIFIER);
311
                if ($token['type'] === self::T_QUOTED_IDENTIFIER) {
312
                    $token['value'] = '"' . $token['value'] . '"';
313
                    $token = $this->parseJson($token);
314
                }
315
                $tokens[] = $token;
316
 
317
            } elseif ($state === self::STATE_EQ) {
318
 
319
                // Consume equals
320
                $tokens[] = $this->matchOr($chars, '=', '=', self::T_COMPARATOR, self::T_UNKNOWN);
321
 
322
            } elseif ($state == self::STATE_AND) {
323
 
324
                $tokens[] = $this->matchOr($chars, '&', '&', self::T_AND, self::T_EXPREF);
325
 
326
            } elseif ($state === self::STATE_NOT) {
327
 
328
                // Consume not equal
329
                $tokens[] = $this->matchOr($chars, '!', '=', self::T_COMPARATOR, self::T_NOT);
330
 
331
            } else {
332
 
333
                // either '<' or '>'
334
                // Consume less than and greater than
335
                $tokens[] = $this->matchOr($chars, $current, '=', self::T_COMPARATOR, self::T_COMPARATOR);
336
 
337
            }
338
        }
339
 
340
        eof:
341
        $tokens[] = [
342
            'type'  => self::T_EOF,
343
            'pos'   => mb_strlen($input, 'UTF-8'),
344
            'value' => null
345
        ];
346
 
347
        return $tokens;
348
    }
349
 
350
    /**
351
     * Returns a token based on whether or not the next token matches the
352
     * expected value. If it does, a token of "$type" is returned. Otherwise,
353
     * a token of "$orElse" type is returned.
354
     *
355
     * @param array  $chars    Array of characters by reference.
356
     * @param string $current  The current character.
357
     * @param string $expected Expected character.
358
     * @param string $type     Expected result type.
359
     * @param string $orElse   Otherwise return a token of this type.
360
     *
361
     * @return array Returns a conditional token.
362
     */
363
    private function matchOr(array &$chars, $current, $expected, $type, $orElse)
364
    {
365
        if (next($chars) === $expected) {
366
            next($chars);
367
            return [
368
                'type'  => $type,
369
                'pos'   => key($chars) - 1,
370
                'value' => $current . $expected
371
            ];
372
        }
373
 
374
        return [
375
            'type'  => $orElse,
376
            'pos'   => key($chars) - 1,
377
            'value' => $current
378
        ];
379
    }
380
 
381
    /**
382
     * Returns a token the is the result of consuming inside of delimiter
383
     * characters. Escaped delimiters will be adjusted before returning a
384
     * value. If the token is not closed, "unknown" is returned.
385
     *
386
     * @param array  $chars Array of characters by reference.
387
     * @param string $delim The delimiter character.
388
     * @param string $type  Token type.
389
     *
390
     * @return array Returns the consumed token.
391
     */
392
    private function inside(array &$chars, $delim, $type)
393
    {
394
        $position = key($chars);
395
        $current = next($chars);
396
        $buffer = '';
397
 
398
        while ($current !== $delim) {
399
            if ($current === '\\') {
400
                $buffer .= '\\';
401
                $current = next($chars);
402
            }
403
            if ($current === false) {
404
                // Unclosed delimiter
405
                return [
406
                    'type'  => self::T_UNKNOWN,
407
                    'value' => $buffer,
408
                    'pos'   => $position
409
                ];
410
            }
411
            $buffer .= $current;
412
            $current = next($chars);
413
        }
414
 
415
        next($chars);
416
 
417
        return ['type' => $type, 'value' => $buffer, 'pos' => $position];
418
    }
419
 
420
    /**
421
     * Parses a JSON token or sets the token type to "unknown" on error.
422
     *
423
     * @param array $token Token that needs parsing.
424
     *
425
     * @return array Returns a token with a parsed value.
426
     */
427
    private function parseJson(array $token)
428
    {
429
        $value = json_decode($token['value'], true);
430
 
431
        if ($error = json_last_error()) {
432
            // Legacy support for elided quotes. Try to parse again by adding
433
            // quotes around the bad input value.
434
            $value = json_decode('"' . $token['value'] . '"', true);
435
            if ($error = json_last_error()) {
436
                $token['type'] = self::T_UNKNOWN;
437
                return $token;
438
            }
439
        }
440
 
441
        $token['value'] = $value;
442
        return $token;
443
    }
444
}