| 1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
/**
|
|
|
4 |
* Class that handles operations involving percent-encoding in URIs.
|
|
|
5 |
*
|
|
|
6 |
* @warning
|
|
|
7 |
* Be careful when reusing instances of PercentEncoder. The object
|
|
|
8 |
* you use for normalize() SHOULD NOT be used for encode(), or
|
|
|
9 |
* vice-versa.
|
|
|
10 |
*/
|
|
|
11 |
class HTMLPurifier_PercentEncoder
|
|
|
12 |
{
|
|
|
13 |
|
|
|
14 |
/**
|
|
|
15 |
* Reserved characters to preserve when using encode().
|
|
|
16 |
* @type array
|
|
|
17 |
*/
|
|
|
18 |
protected $preserve = array();
|
|
|
19 |
|
|
|
20 |
/**
|
|
|
21 |
* String of characters that should be preserved while using encode().
|
|
|
22 |
* @param bool $preserve
|
|
|
23 |
*/
|
|
|
24 |
public function __construct($preserve = false)
|
|
|
25 |
{
|
|
|
26 |
// unreserved letters, ought to const-ify
|
|
|
27 |
for ($i = 48; $i <= 57; $i++) { // digits
|
|
|
28 |
$this->preserve[$i] = true;
|
|
|
29 |
}
|
|
|
30 |
for ($i = 65; $i <= 90; $i++) { // upper-case
|
|
|
31 |
$this->preserve[$i] = true;
|
|
|
32 |
}
|
|
|
33 |
for ($i = 97; $i <= 122; $i++) { // lower-case
|
|
|
34 |
$this->preserve[$i] = true;
|
|
|
35 |
}
|
|
|
36 |
$this->preserve[45] = true; // Dash -
|
|
|
37 |
$this->preserve[46] = true; // Period .
|
|
|
38 |
$this->preserve[95] = true; // Underscore _
|
|
|
39 |
$this->preserve[126]= true; // Tilde ~
|
|
|
40 |
|
|
|
41 |
// extra letters not to escape
|
|
|
42 |
if ($preserve !== false) {
|
|
|
43 |
for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
|
|
|
44 |
$this->preserve[ord($preserve[$i])] = true;
|
|
|
45 |
}
|
|
|
46 |
}
|
|
|
47 |
}
|
|
|
48 |
|
|
|
49 |
/**
|
|
|
50 |
* Our replacement for urlencode, it encodes all non-reserved characters,
|
|
|
51 |
* as well as any extra characters that were instructed to be preserved.
|
|
|
52 |
* @note
|
|
|
53 |
* Assumes that the string has already been normalized, making any
|
|
|
54 |
* and all percent escape sequences valid. Percents will not be
|
|
|
55 |
* re-escaped, regardless of their status in $preserve
|
|
|
56 |
* @param string $string String to be encoded
|
|
|
57 |
* @return string Encoded string.
|
|
|
58 |
*/
|
|
|
59 |
public function encode($string)
|
|
|
60 |
{
|
|
|
61 |
$ret = '';
|
|
|
62 |
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
|
|
63 |
if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
|
|
|
64 |
$ret .= '%' . sprintf('%02X', $int);
|
|
|
65 |
} else {
|
|
|
66 |
$ret .= $string[$i];
|
|
|
67 |
}
|
|
|
68 |
}
|
|
|
69 |
return $ret;
|
|
|
70 |
}
|
|
|
71 |
|
|
|
72 |
/**
|
|
|
73 |
* Fix up percent-encoding by decoding unreserved characters and normalizing.
|
|
|
74 |
* @warning This function is affected by $preserve, even though the
|
|
|
75 |
* usual desired behavior is for this not to preserve those
|
|
|
76 |
* characters. Be careful when reusing instances of PercentEncoder!
|
|
|
77 |
* @param string $string String to normalize
|
|
|
78 |
* @return string
|
|
|
79 |
*/
|
|
|
80 |
public function normalize($string)
|
|
|
81 |
{
|
|
|
82 |
if ($string == '') {
|
|
|
83 |
return '';
|
|
|
84 |
}
|
|
|
85 |
$parts = explode('%', $string);
|
|
|
86 |
$ret = array_shift($parts);
|
|
|
87 |
foreach ($parts as $part) {
|
|
|
88 |
$length = strlen($part);
|
|
|
89 |
if ($length < 2) {
|
|
|
90 |
$ret .= '%25' . $part;
|
|
|
91 |
continue;
|
|
|
92 |
}
|
|
|
93 |
$encoding = substr($part, 0, 2);
|
|
|
94 |
$text = substr($part, 2);
|
|
|
95 |
if (!ctype_xdigit($encoding)) {
|
|
|
96 |
$ret .= '%25' . $part;
|
|
|
97 |
continue;
|
|
|
98 |
}
|
|
|
99 |
$int = hexdec($encoding);
|
|
|
100 |
if (isset($this->preserve[$int])) {
|
|
|
101 |
$ret .= chr($int) . $text;
|
|
|
102 |
continue;
|
|
|
103 |
}
|
|
|
104 |
$encoding = strtoupper($encoding);
|
|
|
105 |
$ret .= '%' . $encoding . $text;
|
|
|
106 |
}
|
|
|
107 |
return $ret;
|
|
|
108 |
}
|
|
|
109 |
}
|
|
|
110 |
|
|
|
111 |
// vim: et sw=4 sts=4
|