1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
/**
|
|
|
4 |
* Parses a URI into the components and fragment identifier as specified
|
|
|
5 |
* by RFC 3986.
|
|
|
6 |
*/
|
|
|
7 |
class HTMLPurifier_URIParser
|
|
|
8 |
{
|
|
|
9 |
|
|
|
10 |
/**
|
|
|
11 |
* Instance of HTMLPurifier_PercentEncoder to do normalization with.
|
|
|
12 |
*/
|
|
|
13 |
protected $percentEncoder;
|
|
|
14 |
|
|
|
15 |
public function __construct()
|
|
|
16 |
{
|
|
|
17 |
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
|
|
18 |
}
|
|
|
19 |
|
|
|
20 |
/**
|
|
|
21 |
* Parses a URI.
|
|
|
22 |
* @param $uri string URI to parse
|
|
|
23 |
* @return HTMLPurifier_URI representation of URI. This representation has
|
|
|
24 |
* not been validated yet and may not conform to RFC.
|
|
|
25 |
*/
|
|
|
26 |
public function parse($uri)
|
|
|
27 |
{
|
|
|
28 |
$uri = $this->percentEncoder->normalize($uri);
|
|
|
29 |
|
|
|
30 |
// Regexp is as per Appendix B.
|
|
|
31 |
// Note that ["<>] are an addition to the RFC's recommended
|
|
|
32 |
// characters, because they represent external delimeters.
|
|
|
33 |
$r_URI = '!'.
|
|
|
34 |
'(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
|
|
|
35 |
'(//([^/?#"<>]*))?'. // 4. Authority
|
|
|
36 |
'([^?#"<>]*)'. // 5. Path
|
|
|
37 |
'(\?([^#"<>]*))?'. // 7. Query
|
|
|
38 |
'(#([^"<>]*))?'. // 8. Fragment
|
|
|
39 |
'!';
|
|
|
40 |
|
|
|
41 |
$matches = array();
|
|
|
42 |
$result = preg_match($r_URI, $uri, $matches);
|
|
|
43 |
|
|
|
44 |
if (!$result) return false; // *really* invalid URI
|
|
|
45 |
|
|
|
46 |
// seperate out parts
|
|
|
47 |
$scheme = !empty($matches[1]) ? $matches[2] : null;
|
|
|
48 |
$authority = !empty($matches[3]) ? $matches[4] : null;
|
|
|
49 |
$path = $matches[5]; // always present, can be empty
|
|
|
50 |
$query = !empty($matches[6]) ? $matches[7] : null;
|
|
|
51 |
$fragment = !empty($matches[8]) ? $matches[9] : null;
|
|
|
52 |
|
|
|
53 |
// further parse authority
|
|
|
54 |
if ($authority !== null) {
|
|
|
55 |
$r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
|
|
56 |
$matches = array();
|
|
|
57 |
preg_match($r_authority, $authority, $matches);
|
|
|
58 |
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
|
|
59 |
$host = !empty($matches[3]) ? $matches[3] : '';
|
|
|
60 |
$port = !empty($matches[4]) ? (int) $matches[5] : null;
|
|
|
61 |
} else {
|
|
|
62 |
$port = $host = $userinfo = null;
|
|
|
63 |
}
|
|
|
64 |
|
|
|
65 |
return new HTMLPurifier_URI(
|
|
|
66 |
$scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
|
|
67 |
}
|
|
|
68 |
|
|
|
69 |
}
|
|
|
70 |
|
|
|
71 |
// vim: et sw=4 sts=4
|