Proyectos de Subversion Moodle

Rev

Rev 1 | | Comparar con el anterior | Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
// This file is part of Moodle - http://moodle.org/
4
//
5
// Moodle is free software: you can redistribute it and/or modify
6
// it under the terms of the GNU General Public License as published by
7
// the Free Software Foundation, either version 3 of the License, or
8
// (at your option) any later version.
9
//
10
// Moodle is distributed in the hope that it will be useful,
11
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
// GNU General Public License for more details.
14
//
15
// You should have received a copy of the GNU General Public License
16
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
17
 
18
/**
19
 * @package moodlecore
20
 * @subpackage backup-xml
21
 * @copyright 2010 onwards Eloy Lafuente (stronk7) {@link http://stronk7.com}
22
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23
 */
24
 
25
/**
26
 * Class implementing one SAX progressive push parser.
27
 *
28
 * SAX parser able to process XML content from files/variables. It supports
29
 * attributes and case folding and works only with UTF-8 content. It's one
30
 * progressive push parser because, intead of loading big crunchs of information
31
 * in memory, it "publishes" (pushes) small information in a "propietary array format" througt
32
 * the corresponding @progressive_parser_processor, that will be the responsibe for
33
 * returning information into handy formats to higher levels.
34
 *
35
 * Note that, while this progressive parser is able to process any XML file, it is
36
 * 100% progressive so it publishes the information in the original order it's parsed (that's
37
 * the expected behaviour) so information belonging to the same path can be returned in
38
 * different chunks if there are inner levels/paths in the middle. Be warned!
39
 *
40
 * The "propietary array format" that the parser publishes to the @progressive_parser_processor
41
 * is this:
42
 *    array (
43
 *        'path' => path where the tags belong to,
44
 *        'level'=> level (1-based) of the tags
45
 *        'tags  => array (
46
 *            'name' => name of the tag,
47
 *            'attrs'=> array( name of the attr => value of the attr),
48
 *            'cdata => cdata of the tag
49
 *        )
50
 *    )
51
 *
52
 * TODO: Finish phpdocs
53
 */
54
class progressive_parser {
55
 
56
    protected $xml_parser; // PHP's low level XML SAX parser
57
    protected $file;       // full path to file being progressively parsed | => mutually exclusive
58
    protected $contents;   // contents being progressively parsed          |
59
 
60
    /**
61
     * @var progressive_parser_processor to be used to publish processed information
62
     */
63
    protected $processor;
64
 
65
    protected $level;      // level of the current tag
66
    protected $path;       // path of the current tag
67
    protected $accum;      // accumulated char data of the current tag
68
    protected $attrs;      // attributes of the current tag
69
 
70
    protected $topush;     // array containing current level information being parsed to be "pushed"
71
    protected $prevlevel;  // level of the previous tag processed - to detect pushing places
72
    protected $currtag;    // name/value/attributes of the tag being processed
73
 
74
    /**
75
     * @var \core\progress\base Progress tracker called for each action
76
     */
77
    protected $progress;
78
 
79
    public function __construct($case_folding = false) {
80
        $this->xml_parser = xml_parser_create('UTF-8');
81
        xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, $case_folding);
82
        xml_set_element_handler($this->xml_parser, array($this, 'start_tag'), array($this, 'end_tag'));
83
        xml_set_character_data_handler($this->xml_parser, array($this, 'char_data'));
84
 
85
        $this->file     = null;
86
        $this->contents = null;
87
        $this->level    = 0;
88
        $this->path     = '';
89
        $this->accum    = '';
90
        $this->attrs    = array();
91
        $this->topush  = array();
92
        $this->prevlevel = 0;
93
        $this->currtag   = array();
94
    }
95
 
96
    /*
97
     * Sets the XML file to be processed by the parser
98
     */
99
    public function set_file($file) {
100
        if (!file_exists($file) || (!is_readable($file))) {
101
            throw new progressive_parser_exception('invalid_file_to_parse');
102
        }
103
        $this->file = $file;
104
        $this->contents = null;
105
    }
106
 
107
    /*
108
     * Sets the XML contents to be processed by the parser
109
     */
110
    public function set_contents($contents) {
111
        if (empty($contents)) {
112
            throw new progressive_parser_exception('invalid_contents_to_parse');
113
        }
114
        $this->contents = $contents;
115
        $this->file = null;
116
    }
117
 
118
    /*
119
     * Define the @progressive_parser_processor in charge of processing the parsed chunks
120
     */
121
    public function set_processor($processor) {
122
        if (!$processor instanceof progressive_parser_processor) {
123
            throw new progressive_parser_exception('invalid_parser_processor');
124
        }
125
        $this->processor = $processor;
126
    }
127
 
128
    /**
129
     * Sets the progress tracker for the parser. If set, the tracker will be
130
     * called to report indeterminate progress for each chunk of XML.
131
     *
132
     * The caller should have already called start_progress on the progress tracker.
133
     *
134
     * @param \core\progress\base $progress Progress tracker
135
     */
136
    public function set_progress(\core\progress\base $progress) {
137
        $this->progress = $progress;
138
    }
139
 
140
    /*
141
     * Process the XML, delegating found chunks to the @progressive_parser_processor
142
     */
143
    public function process() {
144
        if (empty($this->processor)) {
145
            throw new progressive_parser_exception('undefined_parser_processor');
146
        }
147
        if (empty($this->file) && empty($this->contents)) {
148
            throw new progressive_parser_exception('undefined_xml_to_parse');
149
        }
150
        if (is_null($this->xml_parser)) {
151
            throw new progressive_parser_exception('progressive_parser_already_used');
152
        }
153
        if ($this->file) {
154
            $fh = fopen($this->file, 'r');
155
            while ($buffer = fread($fh, 8192)) {
156
                $this->parse($buffer, feof($fh));
157
            }
158
            fclose($fh);
159
        } else {
160
            $this->parse($this->contents, true);
161
        }
162
        xml_parser_free($this->xml_parser);
163
        $this->xml_parser = null;
164
    }
165
 
166
    /**
167
     * Provides one cross-platform dirname function for
168
     * handling parser paths, see MDL-24381
169
     */
170
    public static function dirname($path) {
171
        return str_replace('\\', '/', dirname($path));
172
    }
173
 
174
// Protected API starts here
175
 
176
    protected function parse($data, $eof) {
177
        if (!xml_parse($this->xml_parser, $data, $eof)) {
178
            throw new progressive_parser_exception(
179
                'xml_parsing_error', null,
180
                sprintf('XML error: %s at line %d, column %d',
181
                        xml_error_string(xml_get_error_code($this->xml_parser)),
182
                        xml_get_current_line_number($this->xml_parser),
183
                        xml_get_current_column_number($this->xml_parser)));
184
        }
185
    }
186
 
187
    protected function publish($data) {
188
        $this->processor->receive_chunk($data);
189
        if (!empty($this->progress)) {
190
            // Report indeterminate progress.
191
            $this->progress->progress();
192
        }
193
    }
194
 
195
    /**
196
     * Inform to the processor that we have started parsing one path
197
     */
198
    protected function inform_start($path) {
199
        $this->processor->before_path($path);
200
    }
201
 
202
    /**
203
     * Inform to the processor that we have finished parsing one path
204
     */
205
    protected function inform_end($path) {
206
        $this->processor->after_path($path);
207
    }
208
 
209
    protected function postprocess_cdata($data) {
210
        return $this->processor->process_cdata($data);
211
    }
212
 
213
    protected function start_tag($parser, $tag, $attributes) {
214
 
215
        // Normal update of parser internals
216
        $this->level++;
217
        $this->path .= '/' . $tag;
218
        $this->accum = '';
219
        $this->attrs = !empty($attributes) ? $attributes : array();
220
 
221
        // Inform processor we are about to start one tag
222
        $this->inform_start($this->path);
223
 
224
        // Entering a new inner level, publish all the information available
225
        if ($this->level > $this->prevlevel) {
226
            if (!empty($this->currtag) && (!empty($this->currtag['attrs']) || !empty($this->currtag['cdata']))) {
227
                // We always add the last not-empty repetition. Empty ones are ignored.
228
                if (isset($this->topush['tags'][$this->currtag['name']]) && trim($this->currtag['cdata']) === '') {
229
                    // Do nothing, the tag already exists and the repetition is empty
230
                } else {
231
                    $this->topush['tags'][$this->currtag['name']] = $this->currtag;
232
                }
233
            }
234
            if (!empty($this->topush['tags'])) {
235
                $this->publish($this->topush);
236
            }
237
            $this->currtag = array();
238
            $this->topush = array();
239
        }
240
 
241
        // If not set, build to push common header
242
        if (empty($this->topush)) {
243
            $this->topush['path']  = progressive_parser::dirname($this->path);
244
            $this->topush['level'] = $this->level;
245
            $this->topush['tags']  = array();
246
        }
247
 
248
        // Handling a new tag, create it
249
        $this->currtag['name'] = $tag;
250
        // And add attributes if present
251
        if ($this->attrs) {
252
            $this->currtag['attrs'] = $this->attrs;
253
        }
254
 
255
        // For the records
256
        $this->prevlevel = $this->level;
257
    }
258
 
259
    protected function end_tag($parser, $tag) {
260
 
261
        // Ending rencently started tag, add value to current tag
262
        if ($this->level == $this->prevlevel) {
263
            $this->currtag['cdata'] = $this->postprocess_cdata($this->accum);
264
            // We always add the last not-empty repetition. Empty ones are ignored.
265
            if (isset($this->topush['tags'][$this->currtag['name']]) && trim($this->currtag['cdata']) === '') {
266
                // Do nothing, the tag already exists and the repetition is empty
267
            } else {
268
                $this->topush['tags'][$this->currtag['name']] = $this->currtag;
269
            }
270
            $this->currtag = array();
271
        }
272
 
273
        // Leaving one level, publish all the information available
274
        if ($this->level < $this->prevlevel) {
275
            if (!empty($this->topush['tags'])) {
276
                $this->publish($this->topush);
277
            }
278
            $this->currtag = array();
279
            $this->topush = array();
280
        }
281
 
282
        // For the records
283
        $this->prevlevel = $this->level;
284
 
285
        // Inform processor we have finished one tag
286
        $this->inform_end($this->path);
287
 
288
        // Normal update of parser internals
289
        $this->level--;
290
        $this->path = progressive_parser::dirname($this->path);
291
    }
292
 
293
    protected function char_data($parser, $data) {
294
        $this->accum .= $data;
295
    }
296
}
297
 
298
/*
299
 * Exception class used by all the @progressive_parser stuff
300
 */
301
class progressive_parser_exception extends moodle_exception {
302
 
303
    public function __construct($errorcode, $a=NULL, $debuginfo=null) {
304
        parent::__construct($errorcode, 'error', '', $a, $debuginfo);
305
    }
306
}