Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
/**
4
 * PHP lexer code snarfed from the CVS tree for the lamplib project at
5
 * http://sourceforge.net/projects/lamplib
6
 * This project is administered by Markus Baker, Harry Fuecks and Matt
7
 * Mitchell, and the project  code is in the public domain.
8
 *
9
 * Thanks, guys!
10
 *
11
 * @package   moodlecore
12
 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
13
 * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
14
 */
15
 
16
    /** LEXER_ENTER = 1 */
17
    define("LEXER_ENTER", 1);
18
    /** LEXER_MATCHED = 2 */
19
    define("LEXER_MATCHED", 2);
20
    /** LEXER_UNMATCHED = 3 */
21
    define("LEXER_UNMATCHED", 3);
22
    /** LEXER_EXIT = 4 */
23
    define("LEXER_EXIT", 4);
24
    /** LEXER_SPECIAL = 5 */
25
    define("LEXER_SPECIAL", 5);
26
 
27
    /**
28
     * Compounded regular expression. Any of
29
     * the contained patterns could match and
30
     * when one does it's label is returned.
31
     * @package   moodlecore
32
     * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
33
     * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
34
     */
35
    class ParallelRegex {
36
        var $_patterns;
37
        var $_labels;
38
        var $_regex;
39
        var $_case;
40
 
41
        /**
42
         *    Constructor. Starts with no patterns.
43
         *    @param bool $case    True for case sensitive, false
44
         *                    for insensitive.
45
         *    @access public
46
         */
47
        public function __construct($case) {
48
            $this->_case = $case;
49
            $this->_patterns = array();
50
            $this->_labels = array();
51
            $this->_regex = null;
52
        }
53
 
54
        /**
55
         * Old syntax of class constructor. Deprecated in PHP7.
56
         *
57
         * @deprecated since Moodle 3.1
58
         */
59
        public function ParallelRegex($case) {
60
            debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
61
            self::__construct($case);
62
        }
63
 
64
        /**
65
         *    Adds a pattern with an optional label.
66
         *    @param string $pattern      Perl style regex, but ( and )
67
         *                         lose the usual meaning.
68
         *    @param string $label        Label of regex to be returned
69
         *                         on a match.
70
         *    @access public
71
         */
72
        function addPattern($pattern, $label = true) {
73
            $count = count($this->_patterns);
74
            $this->_patterns[$count] = $pattern;
75
            $this->_labels[$count] = $label;
76
            $this->_regex = null;
77
        }
78
 
79
        /**
80
         *    Attempts to match all patterns at once against
81
         *    a string.
82
         *    @param string $subject      String to match against.
83
         *    @param string $match        First matched portion of
84
         *                         subject.
85
         *    @return bool             True on success.
86
         *    @access public
87
         */
88
        function match($subject, &$match) {
89
            if (count($this->_patterns) == 0) {
90
                return false;
91
            }
92
            if (!preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
93
                $match = "";
94
                return false;
95
            }
96
            $match = $matches[0];
97
            for ($i = 1; $i < count($matches); $i++) {
98
                if ($matches[$i]) {
99
                    return $this->_labels[$i - 1];
100
                }
101
            }
102
            return true;
103
        }
104
 
105
        /**
106
         *    Compounds the patterns into a single
107
         *    regular expression separated with the
108
         *    "or" operator. Caches the regex.
109
         *    Will automatically escape (, ) and / tokens.
110
         *    @access private
111
         */
112
        function _getCompoundedRegex() {
113
            if ($this->_regex == null) {
114
                for ($i = 0; $i < count($this->_patterns); $i++) {
115
                    $this->_patterns[$i] = '(' . str_replace(
116
                            array('/', '(', ')'),
117
                            array('\/', '\(', '\)'),
118
                            $this->_patterns[$i]) . ')';
119
                }
120
                $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
121
            }
122
            return $this->_regex;
123
        }
124
 
125
        /**
126
         *    Accessor for perl regex mode flags to use.
127
         *    @return string       Flags as string.
128
         *    @access private
129
         */
130
        function _getPerlMatchingFlags() {
131
            return ($this->_case ? "msS" : "msSi");
132
        }
133
    }
134
 
135
    /**
136
     * States for a stack machine.
137
     *
138
     * @package   moodlecore
139
     * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
140
     * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
141
     */
142
    class StateStack {
143
        var $_stack;
144
 
145
        /**
146
         *    Constructor. Starts in named state.
147
         *    @param string $start        Starting state name.
148
         *    @access public
149
         */
150
        public function __construct($start) {
151
            $this->_stack = array($start);
152
        }
153
 
154
        /**
155
         * Old syntax of class constructor. Deprecated in PHP7.
156
         *
157
         * @deprecated since Moodle 3.1
158
         */
159
        public function StateStack($start) {
160
            debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
161
            self::__construct($start);
162
        }
163
 
164
        /**
165
         *    Accessor for current state.
166
         *    @return string State as string.
167
         *    @access public
168
         */
169
        function getCurrent() {
170
            return $this->_stack[count($this->_stack) - 1];
171
        }
172
 
173
        /**
174
         *    Adds a state to the stack and sets it
175
         *    to be the current state.
176
         *    @param string $state        New state.
177
         *    @access public
178
         */
179
        function enter($state) {
180
            array_push($this->_stack, $state);
181
        }
182
 
183
        /**
184
         *    Leaves the current state and reverts
185
         *    to the previous one.
186
         *    @return bool     False if we drop off
187
         *                the bottom of the list.
188
         *    @access public
189
         */
190
        function leave() {
191
            if (count($this->_stack) == 1) {
192
                return false;
193
            }
194
            array_pop($this->_stack);
195
            return true;
196
        }
197
    }
198
 
199
    /**
200
     * Accepts text and breaks it into tokens.
201
     * Some optimisation to make the sure the
202
     * content is only scanned by the PHP regex
203
     * parser once. Lexer modes must not start
204
     * with leading underscores.
205
     *
206
     * @package   moodlecore
207
     * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
208
     * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
209
     */
210
    class Lexer {
211
        var $_regexes;
212
        var $_parser;
213
        var $_mode;
214
        var $_mode_handlers;
215
        var $_case;
216
 
217
        /**
218
         *    Sets up the lexer in case insensitive matching
219
         *    by default.
220
         *    @param object $parser     Handling strategy by
221
         *                       reference.
222
         *    @param string $start      Starting handler.
223
         *    @param bool $case       True for case sensitive.
224
         *    @access public
225
         */
226
        public function __construct(&$parser, $start = "accept", $case = false) {
227
            $this->_case = $case;
228
            $this->_regexes = array();
229
            $this->_parser = &$parser;
230
            $this->_mode = new StateStack($start);
231
            $this->_mode_handlers = array();
232
        }
233
 
234
        /**
235
         * Old syntax of class constructor. Deprecated in PHP7.
236
         *
237
         * @deprecated since Moodle 3.1
238
         */
239
        public function Lexer(&$parser, $start = "accept", $case = false) {
240
            debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
241
            self::__construct($parser, $start, $case);
242
        }
243
 
244
        /**
245
         *    Adds a token search pattern for a particular
246
         *    parsing mode. The pattern does not change the
247
         *    current mode.
248
         *    @param string $pattern      Perl style regex, but ( and )
249
         *                         lose the usual meaning.
250
         *    @param string $mode         Should only apply this
251
         *                         pattern when dealing with
252
         *                         this type of input.
253
         *    @access public
254
         */
255
        function addPattern($pattern, $mode = "accept") {
256
            if (!isset($this->_regexes[$mode])) {
257
                $this->_regexes[$mode] = new ParallelRegex($this->_case);
258
            }
259
            $this->_regexes[$mode]->addPattern($pattern);
260
        }
261
 
262
        /**
263
         *    Adds a pattern that will enter a new parsing
264
         *    mode. Useful for entering parenthesis, strings,
265
         *    tags, etc.
266
         *    @param string $pattern      Perl style regex, but ( and )
267
         *                         lose the usual meaning.
268
         *    @param string $mode         Should only apply this
269
         *                         pattern when dealing with
270
         *                         this type of input.
271
         *    @param string $new_mode     Change parsing to this new
272
         *                         nested mode.
273
         *    @access public
274
         */
275
        function addEntryPattern($pattern, $mode, $new_mode) {
276
            if (!isset($this->_regexes[$mode])) {
277
                $this->_regexes[$mode] = new ParallelRegex($this->_case);
278
            }
279
            $this->_regexes[$mode]->addPattern($pattern, $new_mode);
280
        }
281
 
282
        /**
283
         *    Adds a pattern that will exit the current mode
284
         *    and re-enter the previous one.
285
         *    @param string $pattern      Perl style regex, but ( and )
286
         *                         lose the usual meaning.
287
         *    @param string $mode         Mode to leave.
288
         *    @access public
289
         */
290
        function addExitPattern($pattern, $mode) {
291
            if (!isset($this->_regexes[$mode])) {
292
                $this->_regexes[$mode] = new ParallelRegex($this->_case);
293
            }
294
            $this->_regexes[$mode]->addPattern($pattern, "__exit");
295
        }
296
 
297
        /**
298
         *    Adds a pattern that has a special mode.
299
         *    Acts as an entry and exit pattern in one go.
300
         *    @param string $pattern      Perl style regex, but ( and )
301
         *                         lose the usual meaning.
302
         *    @param string $mode         Should only apply this
303
         *                         pattern when dealing with
304
         *                         this type of input.
305
         *    @param string $special      Use this mode for this one token.
306
         *    @access public
307
         */
308
        function addSpecialPattern($pattern, $mode, $special) {
309
            if (!isset($this->_regexes[$mode])) {
310
                $this->_regexes[$mode] = new ParallelRegex($this->_case);
311
            }
312
            $this->_regexes[$mode]->addPattern($pattern, "_$special");
313
        }
314
 
315
        /**
316
         *    Adds a mapping from a mode to another handler.
317
         *    @param string $mode        Mode to be remapped.
318
         *    @param string $handler     New target handler.
319
         *    @access public
320
         */
321
        function mapHandler($mode, $handler) {
322
            $this->_mode_handlers[$mode] = $handler;
323
        }
324
 
325
        /**
326
         *    Splits the page text into tokens. Will fail
327
         *    if the handlers report an error or if no
328
         *    content is consumed. If successful then each
329
         *    unparsed and parsed token invokes a call to the
330
         *    held listener.
331
         *    @param string $raw        Raw HTML text.
332
         *    @return bool           True on success, else false.
333
         *    @access public
334
         */
335
        function parse($raw) {
336
            if (!isset($this->_parser)) {
337
                return false;
338
            }
339
            $length = strlen($raw);
340
            while (is_array($parsed = $this->_reduce($raw))) {
341
                list($unmatched, $matched, $mode) = $parsed;
342
                if (!$this->_dispatchTokens($unmatched, $matched, $mode)) {
343
                    return false;
344
                }
345
                if (strlen($raw) == $length) {
346
                    return false;
347
                }
348
                $length = strlen($raw);
349
            }
350
            if (!$parsed) {
351
                return false;
352
            }
353
            return $this->_invokeParser($raw, LEXER_UNMATCHED);
354
        }
355
 
356
        /**
357
         *    Sends the matched token and any leading unmatched
358
         *    text to the parser changing the lexer to a new
359
         *    mode if one is listed.
360
         *    @param string $unmatched    Unmatched leading portion.
361
         *    @param string $matched      Actual token match.
362
         *    @param string $mode         Mode after match. The "_exit"
363
         *                         mode causes a stack pop. An
364
         *                         false mode causes no change.
365
         *    @return bool              False if there was any error
366
         *                         from the parser.
367
         *    @access private
368
         */
369
        function _dispatchTokens($unmatched, $matched, $mode = false) {
370
            if (!$this->_invokeParser($unmatched, LEXER_UNMATCHED)) {
371
                return false;
372
            }
373
            if ($mode === "__exit") {
374
                if (!$this->_invokeParser($matched, LEXER_EXIT)) {
375
                    return false;
376
                }
377
                return $this->_mode->leave();
378
            }
379
            if (strncmp($mode, "_", 1) == 0) {
380
                $mode = substr($mode, 1);
381
                $this->_mode->enter($mode);
382
                if (!$this->_invokeParser($matched, LEXER_SPECIAL)) {
383
                    return false;
384
                }
385
                return $this->_mode->leave();
386
            }
387
            if (is_string($mode)) {
388
                $this->_mode->enter($mode);
389
                return $this->_invokeParser($matched, LEXER_ENTER);
390
            }
391
            return $this->_invokeParser($matched, LEXER_MATCHED);
392
        }
393
 
394
        /**
395
         *    Calls the parser method named after the current
396
         *    mode. Empty content will be ignored.
397
         *    @param string $content        Text parsed.
398
         *    @param string $is_match       Token is recognised rather
399
         *                           than unparsed data.
400
         *    @access private
401
         */
402
        function _invokeParser($content, $is_match) {
403
            if (($content === "") || ($content === false)) {
404
                return true;
405
            }
406
            $handler = $this->_mode->getCurrent();
407
            if (isset($this->_mode_handlers[$handler])) {
408
                $handler = $this->_mode_handlers[$handler];
409
            }
410
            return $this->_parser->$handler($content, $is_match);
411
        }
412
 
413
        /**
414
         *    Tries to match a chunk of text and if successful
415
         *    removes the recognised chunk and any leading
416
         *    unparsed data. Empty strings will not be matched.
417
         *    @param string $raw  The subject to parse. This is the
418
         *                        content that will be eaten.
419
         *    @return bool|array  Three item list of unparsed
420
         *                        content followed by the
421
         *                        recognised token and finally the
422
         *                        action the parser is to take.
423
         *                        True if no match, false if there
424
         *                        is a parsing error.
425
         *    @access private
426
         */
427
        function _reduce(&$raw) {
428
            if (!isset($this->_regexes[$this->_mode->getCurrent()])) {
429
                return false;
430
            }
431
            if ($raw === "") {
432
                return true;
433
            }
434
            if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
435
                $count = strpos($raw, $match);
436
                $unparsed = substr($raw, 0, $count);
437
                $raw = substr($raw, $count + strlen($match));
438
                return array($unparsed, $match, $action);
439
            }
440
            return true;
441
        }
442
    }
443
?>