Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
/**
4
 * HTML Purifier's internal representation of a URI.
5
 * @note
6
 *      Internal data-structures are completely escaped. If the data needs
7
 *      to be used in a non-URI context (which is very unlikely), be sure
8
 *      to decode it first. The URI may not necessarily be well-formed until
9
 *      validate() is called.
10
 */
11
class HTMLPurifier_URI
12
{
13
    /**
14
     * @type string
15
     */
16
    public $scheme;
17
 
18
    /**
19
     * @type string
20
     */
21
    public $userinfo;
22
 
23
    /**
24
     * @type string
25
     */
26
    public $host;
27
 
28
    /**
29
     * @type int
30
     */
31
    public $port;
32
 
33
    /**
34
     * @type string
35
     */
36
    public $path;
37
 
38
    /**
39
     * @type string
40
     */
41
    public $query;
42
 
43
    /**
44
     * @type string
45
     */
46
    public $fragment;
47
 
48
    /**
49
     * @param string $scheme
50
     * @param string $userinfo
51
     * @param string $host
52
     * @param int $port
53
     * @param string $path
54
     * @param string $query
55
     * @param string $fragment
56
     * @note Automatically normalizes scheme and port
57
     */
58
    public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
59
    {
60
        $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
61
        $this->userinfo = $userinfo;
62
        $this->host = $host;
63
        $this->port = is_null($port) ? $port : (int)$port;
64
        $this->path = $path;
65
        $this->query = $query;
66
        $this->fragment = $fragment;
67
    }
68
 
69
    /**
70
     * Retrieves a scheme object corresponding to the URI's scheme/default
71
     * @param HTMLPurifier_Config $config
72
     * @param HTMLPurifier_Context $context
73
     * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
74
     */
75
    public function getSchemeObj($config, $context)
76
    {
77
        $registry = HTMLPurifier_URISchemeRegistry::instance();
78
        if ($this->scheme !== null) {
79
            $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
80
            if (!$scheme_obj) {
81
                return false;
82
            } // invalid scheme, clean it out
83
        } else {
84
            // no scheme: retrieve the default one
85
            $def = $config->getDefinition('URI');
86
            $scheme_obj = $def->getDefaultScheme($config, $context);
87
            if (!$scheme_obj) {
88
                if ($def->defaultScheme !== null) {
89
                    // something funky happened to the default scheme object
90
                    trigger_error(
91
                        'Default scheme object "' . $def->defaultScheme . '" was not readable',
92
                        E_USER_WARNING
93
                    );
94
                } // suppress error if it's null
95
                return false;
96
            }
97
        }
98
        return $scheme_obj;
99
    }
100
 
101
    /**
102
     * Generic validation method applicable for all schemes. May modify
103
     * this URI in order to get it into a compliant form.
104
     * @param HTMLPurifier_Config $config
105
     * @param HTMLPurifier_Context $context
106
     * @return bool True if validation/filtering succeeds, false if failure
107
     */
108
    public function validate($config, $context)
109
    {
110
        // ABNF definitions from RFC 3986
111
        $chars_sub_delims = '!$&\'()*+,;=';
112
        $chars_gen_delims = ':/?#[]@';
113
        $chars_pchar = $chars_sub_delims . ':@';
114
 
115
        // validate host
116
        if (!is_null($this->host)) {
117
            $host_def = new HTMLPurifier_AttrDef_URI_Host();
118
            $this->host = $host_def->validate($this->host, $config, $context);
119
            if ($this->host === false) {
120
                $this->host = null;
121
            }
122
        }
123
 
124
        // validate scheme
125
        // NOTE: It's not appropriate to check whether or not this
126
        // scheme is in our registry, since a URIFilter may convert a
127
        // URI that we don't allow into one we do.  So instead, we just
128
        // check if the scheme can be dropped because there is no host
129
        // and it is our default scheme.
130
        if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
131
            // support for relative paths is pretty abysmal when the
132
            // scheme is present, so axe it when possible
133
            $def = $config->getDefinition('URI');
134
            if ($def->defaultScheme === $this->scheme) {
135
                $this->scheme = null;
136
            }
137
        }
138
 
139
        // validate username
140
        if (!is_null($this->userinfo)) {
141
            $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
142
            $this->userinfo = $encoder->encode($this->userinfo);
143
        }
144
 
145
        // validate port
146
        if (!is_null($this->port)) {
147
            if ($this->port < 1 || $this->port > 65535) {
148
                $this->port = null;
149
            }
150
        }
151
 
152
        // validate path
153
        $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
154
        if (!is_null($this->host)) { // this catches $this->host === ''
155
            // path-abempty (hier and relative)
156
            // http://www.example.com/my/path
157
            // //www.example.com/my/path (looks odd, but works, and
158
            //                            recognized by most browsers)
159
            // (this set is valid or invalid on a scheme by scheme
160
            // basis, so we'll deal with it later)
161
            // file:///my/path
162
            // ///my/path
163
            $this->path = $segments_encoder->encode($this->path);
164
        } elseif ($this->path !== '') {
165
            if ($this->path[0] === '/') {
166
                // path-absolute (hier and relative)
167
                // http:/my/path
168
                // /my/path
169
                if (strlen($this->path) >= 2 && $this->path[1] === '/') {
170
                    // This could happen if both the host gets stripped
171
                    // out
172
                    // http://my/path
173
                    // //my/path
174
                    $this->path = '';
175
                } else {
176
                    $this->path = $segments_encoder->encode($this->path);
177
                }
178
            } elseif (!is_null($this->scheme)) {
179
                // path-rootless (hier)
180
                // http:my/path
181
                // Short circuit evaluation means we don't need to check nz
182
                $this->path = $segments_encoder->encode($this->path);
183
            } else {
184
                // path-noscheme (relative)
185
                // my/path
186
                // (once again, not checking nz)
187
                $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
188
                $c = strpos($this->path, '/');
189
                if ($c !== false) {
190
                    $this->path =
191
                        $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
192
                        $segments_encoder->encode(substr($this->path, $c));
193
                } else {
194
                    $this->path = $segment_nc_encoder->encode($this->path);
195
                }
196
            }
197
        } else {
198
            // path-empty (hier and relative)
199
            $this->path = ''; // just to be safe
200
        }
201
 
202
        // qf = query and fragment
203
        $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
204
 
205
        if (!is_null($this->query)) {
206
            $this->query = $qf_encoder->encode($this->query);
207
        }
208
 
209
        if (!is_null($this->fragment)) {
210
            $this->fragment = $qf_encoder->encode($this->fragment);
211
        }
212
        return true;
213
    }
214
 
215
    /**
216
     * Convert URI back to string
217
     * @return string URI appropriate for output
218
     */
219
    public function toString()
220
    {
221
        // reconstruct authority
222
        $authority = null;
223
        // there is a rendering difference between a null authority
224
        // (http:foo-bar) and an empty string authority
225
        // (http:///foo-bar).
226
        if (!is_null($this->host)) {
227
            $authority = '';
228
            if (!is_null($this->userinfo)) {
229
                $authority .= $this->userinfo . '@';
230
            }
231
            $authority .= $this->host;
232
            if (!is_null($this->port)) {
233
                $authority .= ':' . $this->port;
234
            }
235
        }
236
 
237
        // Reconstruct the result
238
        // One might wonder about parsing quirks from browsers after
239
        // this reconstruction.  Unfortunately, parsing behavior depends
240
        // on what *scheme* was employed (file:///foo is handled *very*
241
        // differently than http:///foo), so unfortunately we have to
242
        // defer to the schemes to do the right thing.
243
        $result = '';
244
        if (!is_null($this->scheme)) {
245
            $result .= $this->scheme . ':';
246
        }
247
        if (!is_null($authority)) {
248
            $result .= '//' . $authority;
249
        }
250
        $result .= $this->path;
251
        if (!is_null($this->query)) {
252
            $result .= '?' . $this->query;
253
        }
254
        if (!is_null($this->fragment)) {
255
            $result .= '#' . $this->fragment;
256
        }
257
 
258
        return $result;
259
    }
260
 
261
    /**
262
     * Returns true if this URL might be considered a 'local' URL given
263
     * the current context.  This is true when the host is null, or
264
     * when it matches the host supplied to the configuration.
265
     *
266
     * Note that this does not do any scheme checking, so it is mostly
267
     * only appropriate for metadata that doesn't care about protocol
268
     * security.  isBenign is probably what you actually want.
269
     * @param HTMLPurifier_Config $config
270
     * @param HTMLPurifier_Context $context
271
     * @return bool
272
     */
273
    public function isLocal($config, $context)
274
    {
275
        if ($this->host === null) {
276
            return true;
277
        }
278
        $uri_def = $config->getDefinition('URI');
279
        if ($uri_def->host === $this->host) {
280
            return true;
281
        }
282
        return false;
283
    }
284
 
285
    /**
286
     * Returns true if this URL should be considered a 'benign' URL,
287
     * that is:
288
     *
289
     *      - It is a local URL (isLocal), and
290
     *      - It has a equal or better level of security
291
     * @param HTMLPurifier_Config $config
292
     * @param HTMLPurifier_Context $context
293
     * @return bool
294
     */
295
    public function isBenign($config, $context)
296
    {
297
        if (!$this->isLocal($config, $context)) {
298
            return false;
299
        }
300
 
301
        $scheme_obj = $this->getSchemeObj($config, $context);
302
        if (!$scheme_obj) {
303
            return false;
304
        } // conservative approach
305
 
306
        $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
307
        if ($current_scheme_obj->secure) {
308
            if (!$scheme_obj->secure) {
309
                return false;
310
            }
311
        }
312
        return true;
313
    }
314
}
315
 
316
// vim: et sw=4 sts=4