Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
// This file is part of Moodle - http://moodle.org/
4
//
5
// Moodle is free software: you can redistribute it and/or modify
6
// it under the terms of the GNU General Public License as published by
7
// the Free Software Foundation, either version 3 of the License, or
8
// (at your option) any later version.
9
//
10
// Moodle is distributed in the hope that it will be useful,
11
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
// GNU General Public License for more details.
14
//
15
// You should have received a copy of the GNU General Public License
16
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
17
 
18
/**
19
 * @package moodlecore
20
 * @subpackage backup-xml
21
 * @copyright 2010 onwards Eloy Lafuente (stronk7) {@link http://stronk7.com}
22
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23
 */
24
 
25
/**
26
 * Class implementing one SAX progressive push parser.
27
 *
28
 * SAX parser able to process XML content from files/variables. It supports
29
 * attributes and case folding and works only with UTF-8 content. It's one
30
 * progressive push parser because, intead of loading big crunchs of information
31
 * in memory, it "publishes" (pushes) small information in a "propietary array format" througt
32
 * the corresponding @progressive_parser_processor, that will be the responsibe for
33
 * returning information into handy formats to higher levels.
34
 *
35
 * Note that, while this progressive parser is able to process any XML file, it is
36
 * 100% progressive so it publishes the information in the original order it's parsed (that's
37
 * the expected behaviour) so information belonging to the same path can be returned in
38
 * different chunks if there are inner levels/paths in the middle. Be warned!
39
 *
40
 * The "propietary array format" that the parser publishes to the @progressive_parser_processor
41
 * is this:
42
 *    array (
43
 *        'path' => path where the tags belong to,
44
 *        'level'=> level (1-based) of the tags
45
 *        'tags  => array (
46
 *            'name' => name of the tag,
47
 *            'attrs'=> array( name of the attr => value of the attr),
48
 *            'cdata => cdata of the tag
49
 *        )
50
 *    )
51
 *
52
 * TODO: Finish phpdocs
53
 */
54
class progressive_parser {
55
 
56
    protected $xml_parser; // PHP's low level XML SAX parser
57
    protected $file;       // full path to file being progressively parsed | => mutually exclusive
58
    protected $contents;   // contents being progressively parsed          |
59
 
60
    /**
61
     * @var progressive_parser_processor to be used to publish processed information
62
     */
63
    protected $processor;
64
 
65
    protected $level;      // level of the current tag
66
    protected $path;       // path of the current tag
67
    protected $accum;      // accumulated char data of the current tag
68
    protected $attrs;      // attributes of the current tag
69
 
70
    protected $topush;     // array containing current level information being parsed to be "pushed"
71
    protected $prevlevel;  // level of the previous tag processed - to detect pushing places
72
    protected $currtag;    // name/value/attributes of the tag being processed
73
 
74
    /**
75
     * @var \core\progress\base Progress tracker called for each action
76
     */
77
    protected $progress;
78
 
79
    public function __construct($case_folding = false) {
80
        $this->xml_parser = xml_parser_create('UTF-8');
81
        xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, $case_folding);
82
        xml_set_object($this->xml_parser, $this);
83
        xml_set_element_handler($this->xml_parser, array($this, 'start_tag'), array($this, 'end_tag'));
84
        xml_set_character_data_handler($this->xml_parser, array($this, 'char_data'));
85
 
86
        $this->file     = null;
87
        $this->contents = null;
88
        $this->level    = 0;
89
        $this->path     = '';
90
        $this->accum    = '';
91
        $this->attrs    = array();
92
        $this->topush  = array();
93
        $this->prevlevel = 0;
94
        $this->currtag   = array();
95
    }
96
 
97
    /*
98
     * Sets the XML file to be processed by the parser
99
     */
100
    public function set_file($file) {
101
        if (!file_exists($file) || (!is_readable($file))) {
102
            throw new progressive_parser_exception('invalid_file_to_parse');
103
        }
104
        $this->file = $file;
105
        $this->contents = null;
106
    }
107
 
108
    /*
109
     * Sets the XML contents to be processed by the parser
110
     */
111
    public function set_contents($contents) {
112
        if (empty($contents)) {
113
            throw new progressive_parser_exception('invalid_contents_to_parse');
114
        }
115
        $this->contents = $contents;
116
        $this->file = null;
117
    }
118
 
119
    /*
120
     * Define the @progressive_parser_processor in charge of processing the parsed chunks
121
     */
122
    public function set_processor($processor) {
123
        if (!$processor instanceof progressive_parser_processor) {
124
            throw new progressive_parser_exception('invalid_parser_processor');
125
        }
126
        $this->processor = $processor;
127
    }
128
 
129
    /**
130
     * Sets the progress tracker for the parser. If set, the tracker will be
131
     * called to report indeterminate progress for each chunk of XML.
132
     *
133
     * The caller should have already called start_progress on the progress tracker.
134
     *
135
     * @param \core\progress\base $progress Progress tracker
136
     */
137
    public function set_progress(\core\progress\base $progress) {
138
        $this->progress = $progress;
139
    }
140
 
141
    /*
142
     * Process the XML, delegating found chunks to the @progressive_parser_processor
143
     */
144
    public function process() {
145
        if (empty($this->processor)) {
146
            throw new progressive_parser_exception('undefined_parser_processor');
147
        }
148
        if (empty($this->file) && empty($this->contents)) {
149
            throw new progressive_parser_exception('undefined_xml_to_parse');
150
        }
151
        if (is_null($this->xml_parser)) {
152
            throw new progressive_parser_exception('progressive_parser_already_used');
153
        }
154
        if ($this->file) {
155
            $fh = fopen($this->file, 'r');
156
            while ($buffer = fread($fh, 8192)) {
157
                $this->parse($buffer, feof($fh));
158
            }
159
            fclose($fh);
160
        } else {
161
            $this->parse($this->contents, true);
162
        }
163
        xml_parser_free($this->xml_parser);
164
        $this->xml_parser = null;
165
    }
166
 
167
    /**
168
     * Provides one cross-platform dirname function for
169
     * handling parser paths, see MDL-24381
170
     */
171
    public static function dirname($path) {
172
        return str_replace('\\', '/', dirname($path));
173
    }
174
 
175
// Protected API starts here
176
 
177
    protected function parse($data, $eof) {
178
        if (!xml_parse($this->xml_parser, $data, $eof)) {
179
            throw new progressive_parser_exception(
180
                'xml_parsing_error', null,
181
                sprintf('XML error: %s at line %d, column %d',
182
                        xml_error_string(xml_get_error_code($this->xml_parser)),
183
                        xml_get_current_line_number($this->xml_parser),
184
                        xml_get_current_column_number($this->xml_parser)));
185
        }
186
    }
187
 
188
    protected function publish($data) {
189
        $this->processor->receive_chunk($data);
190
        if (!empty($this->progress)) {
191
            // Report indeterminate progress.
192
            $this->progress->progress();
193
        }
194
    }
195
 
196
    /**
197
     * Inform to the processor that we have started parsing one path
198
     */
199
    protected function inform_start($path) {
200
        $this->processor->before_path($path);
201
    }
202
 
203
    /**
204
     * Inform to the processor that we have finished parsing one path
205
     */
206
    protected function inform_end($path) {
207
        $this->processor->after_path($path);
208
    }
209
 
210
    protected function postprocess_cdata($data) {
211
        return $this->processor->process_cdata($data);
212
    }
213
 
214
    protected function start_tag($parser, $tag, $attributes) {
215
 
216
        // Normal update of parser internals
217
        $this->level++;
218
        $this->path .= '/' . $tag;
219
        $this->accum = '';
220
        $this->attrs = !empty($attributes) ? $attributes : array();
221
 
222
        // Inform processor we are about to start one tag
223
        $this->inform_start($this->path);
224
 
225
        // Entering a new inner level, publish all the information available
226
        if ($this->level > $this->prevlevel) {
227
            if (!empty($this->currtag) && (!empty($this->currtag['attrs']) || !empty($this->currtag['cdata']))) {
228
                // We always add the last not-empty repetition. Empty ones are ignored.
229
                if (isset($this->topush['tags'][$this->currtag['name']]) && trim($this->currtag['cdata']) === '') {
230
                    // Do nothing, the tag already exists and the repetition is empty
231
                } else {
232
                    $this->topush['tags'][$this->currtag['name']] = $this->currtag;
233
                }
234
            }
235
            if (!empty($this->topush['tags'])) {
236
                $this->publish($this->topush);
237
            }
238
            $this->currtag = array();
239
            $this->topush = array();
240
        }
241
 
242
        // If not set, build to push common header
243
        if (empty($this->topush)) {
244
            $this->topush['path']  = progressive_parser::dirname($this->path);
245
            $this->topush['level'] = $this->level;
246
            $this->topush['tags']  = array();
247
        }
248
 
249
        // Handling a new tag, create it
250
        $this->currtag['name'] = $tag;
251
        // And add attributes if present
252
        if ($this->attrs) {
253
            $this->currtag['attrs'] = $this->attrs;
254
        }
255
 
256
        // For the records
257
        $this->prevlevel = $this->level;
258
    }
259
 
260
    protected function end_tag($parser, $tag) {
261
 
262
        // Ending rencently started tag, add value to current tag
263
        if ($this->level == $this->prevlevel) {
264
            $this->currtag['cdata'] = $this->postprocess_cdata($this->accum);
265
            // We always add the last not-empty repetition. Empty ones are ignored.
266
            if (isset($this->topush['tags'][$this->currtag['name']]) && trim($this->currtag['cdata']) === '') {
267
                // Do nothing, the tag already exists and the repetition is empty
268
            } else {
269
                $this->topush['tags'][$this->currtag['name']] = $this->currtag;
270
            }
271
            $this->currtag = array();
272
        }
273
 
274
        // Leaving one level, publish all the information available
275
        if ($this->level < $this->prevlevel) {
276
            if (!empty($this->topush['tags'])) {
277
                $this->publish($this->topush);
278
            }
279
            $this->currtag = array();
280
            $this->topush = array();
281
        }
282
 
283
        // For the records
284
        $this->prevlevel = $this->level;
285
 
286
        // Inform processor we have finished one tag
287
        $this->inform_end($this->path);
288
 
289
        // Normal update of parser internals
290
        $this->level--;
291
        $this->path = progressive_parser::dirname($this->path);
292
    }
293
 
294
    protected function char_data($parser, $data) {
295
        $this->accum .= $data;
296
    }
297
}
298
 
299
/*
300
 * Exception class used by all the @progressive_parser stuff
301
 */
302
class progressive_parser_exception extends moodle_exception {
303
 
304
    public function __construct($errorcode, $a=NULL, $debuginfo=null) {
305
        parent::__construct($errorcode, 'error', '', $a, $debuginfo);
306
    }
307
}