1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
/**
|
|
|
4 |
* Removes all unrecognized tags from the list of tokens.
|
|
|
5 |
*
|
|
|
6 |
* This strategy iterates through all the tokens and removes unrecognized
|
|
|
7 |
* tokens. If a token is not recognized but a TagTransform is defined for
|
|
|
8 |
* that element, the element will be transformed accordingly.
|
|
|
9 |
*/
|
|
|
10 |
|
|
|
11 |
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|
|
12 |
{
|
|
|
13 |
|
|
|
14 |
/**
|
|
|
15 |
* @param HTMLPurifier_Token[] $tokens
|
|
|
16 |
* @param HTMLPurifier_Config $config
|
|
|
17 |
* @param HTMLPurifier_Context $context
|
|
|
18 |
* @return array|HTMLPurifier_Token[]
|
|
|
19 |
*/
|
|
|
20 |
public function execute($tokens, $config, $context)
|
|
|
21 |
{
|
|
|
22 |
$definition = $config->getHTMLDefinition();
|
|
|
23 |
$generator = new HTMLPurifier_Generator($config, $context);
|
|
|
24 |
$result = array();
|
|
|
25 |
|
|
|
26 |
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
|
|
|
27 |
$remove_invalid_img = $config->get('Core.RemoveInvalidImg');
|
|
|
28 |
|
|
|
29 |
// currently only used to determine if comments should be kept
|
|
|
30 |
$trusted = $config->get('HTML.Trusted');
|
|
|
31 |
$comment_lookup = $config->get('HTML.AllowedComments');
|
|
|
32 |
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
|
|
|
33 |
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
|
|
|
34 |
|
|
|
35 |
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
|
|
36 |
$hidden_elements = $config->get('Core.HiddenElements');
|
|
|
37 |
|
|
|
38 |
// remove script contents compatibility
|
|
|
39 |
if ($remove_script_contents === true) {
|
|
|
40 |
$hidden_elements['script'] = true;
|
|
|
41 |
} elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
|
|
|
42 |
unset($hidden_elements['script']);
|
|
|
43 |
}
|
|
|
44 |
|
|
|
45 |
$attr_validator = new HTMLPurifier_AttrValidator();
|
|
|
46 |
|
|
|
47 |
// removes tokens until it reaches a closing tag with its value
|
|
|
48 |
$remove_until = false;
|
|
|
49 |
|
|
|
50 |
// converts comments into text tokens when this is equal to a tag name
|
|
|
51 |
$textify_comments = false;
|
|
|
52 |
|
|
|
53 |
$token = false;
|
|
|
54 |
$context->register('CurrentToken', $token);
|
|
|
55 |
|
|
|
56 |
$e = false;
|
|
|
57 |
if ($config->get('Core.CollectErrors')) {
|
|
|
58 |
$e =& $context->get('ErrorCollector');
|
|
|
59 |
}
|
|
|
60 |
|
|
|
61 |
foreach ($tokens as $token) {
|
|
|
62 |
if ($remove_until) {
|
|
|
63 |
if (empty($token->is_tag) || $token->name !== $remove_until) {
|
|
|
64 |
continue;
|
|
|
65 |
}
|
|
|
66 |
}
|
|
|
67 |
if (!empty($token->is_tag)) {
|
|
|
68 |
// DEFINITION CALL
|
|
|
69 |
|
|
|
70 |
// before any processing, try to transform the element
|
|
|
71 |
if (isset($definition->info_tag_transform[$token->name])) {
|
|
|
72 |
$original_name = $token->name;
|
|
|
73 |
// there is a transformation for this tag
|
|
|
74 |
// DEFINITION CALL
|
|
|
75 |
$token = $definition->
|
|
|
76 |
info_tag_transform[$token->name]->transform($token, $config, $context);
|
|
|
77 |
if ($e) {
|
|
|
78 |
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
|
|
|
79 |
}
|
|
|
80 |
}
|
|
|
81 |
|
|
|
82 |
if (isset($definition->info[$token->name])) {
|
|
|
83 |
// mostly everything's good, but
|
|
|
84 |
// we need to make sure required attributes are in order
|
|
|
85 |
if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
|
|
|
86 |
$definition->info[$token->name]->required_attr &&
|
|
|
87 |
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
|
|
|
88 |
) {
|
|
|
89 |
$attr_validator->validateToken($token, $config, $context);
|
|
|
90 |
$ok = true;
|
|
|
91 |
foreach ($definition->info[$token->name]->required_attr as $name) {
|
|
|
92 |
if (!isset($token->attr[$name])) {
|
|
|
93 |
$ok = false;
|
|
|
94 |
break;
|
|
|
95 |
}
|
|
|
96 |
}
|
|
|
97 |
if (!$ok) {
|
|
|
98 |
if ($e) {
|
|
|
99 |
$e->send(
|
|
|
100 |
E_ERROR,
|
|
|
101 |
'Strategy_RemoveForeignElements: Missing required attribute',
|
|
|
102 |
$name
|
|
|
103 |
);
|
|
|
104 |
}
|
|
|
105 |
continue;
|
|
|
106 |
}
|
|
|
107 |
$token->armor['ValidateAttributes'] = true;
|
|
|
108 |
}
|
|
|
109 |
|
|
|
110 |
if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
|
|
|
111 |
$textify_comments = $token->name;
|
|
|
112 |
} elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
|
|
|
113 |
$textify_comments = false;
|
|
|
114 |
}
|
|
|
115 |
|
|
|
116 |
} elseif ($escape_invalid_tags) {
|
|
|
117 |
// invalid tag, generate HTML representation and insert in
|
|
|
118 |
if ($e) {
|
|
|
119 |
$e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
|
|
|
120 |
}
|
|
|
121 |
$token = new HTMLPurifier_Token_Text(
|
|
|
122 |
$generator->generateFromToken($token)
|
|
|
123 |
);
|
|
|
124 |
} else {
|
|
|
125 |
// check if we need to destroy all of the tag's children
|
|
|
126 |
// CAN BE GENERICIZED
|
|
|
127 |
if (isset($hidden_elements[$token->name])) {
|
|
|
128 |
if ($token instanceof HTMLPurifier_Token_Start) {
|
|
|
129 |
$remove_until = $token->name;
|
|
|
130 |
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
|
|
|
131 |
// do nothing: we're still looking
|
|
|
132 |
} else {
|
|
|
133 |
$remove_until = false;
|
|
|
134 |
}
|
|
|
135 |
if ($e) {
|
|
|
136 |
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
|
|
|
137 |
}
|
|
|
138 |
} else {
|
|
|
139 |
if ($e) {
|
|
|
140 |
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
|
|
|
141 |
}
|
|
|
142 |
}
|
|
|
143 |
continue;
|
|
|
144 |
}
|
|
|
145 |
} elseif ($token instanceof HTMLPurifier_Token_Comment) {
|
|
|
146 |
// textify comments in script tags when they are allowed
|
|
|
147 |
if ($textify_comments !== false) {
|
|
|
148 |
$data = $token->data;
|
|
|
149 |
$token = new HTMLPurifier_Token_Text($data);
|
|
|
150 |
} elseif ($trusted || $check_comments) {
|
|
|
151 |
// always cleanup comments
|
|
|
152 |
$trailing_hyphen = false;
|
|
|
153 |
if ($e) {
|
|
|
154 |
// perform check whether or not there's a trailing hyphen
|
|
|
155 |
if (substr($token->data, -1) == '-') {
|
|
|
156 |
$trailing_hyphen = true;
|
|
|
157 |
}
|
|
|
158 |
}
|
|
|
159 |
$token->data = rtrim($token->data, '-');
|
|
|
160 |
$found_double_hyphen = false;
|
|
|
161 |
while (strpos($token->data, '--') !== false) {
|
|
|
162 |
$found_double_hyphen = true;
|
|
|
163 |
$token->data = str_replace('--', '-', $token->data);
|
|
|
164 |
}
|
|
|
165 |
if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
|
|
|
166 |
($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
|
|
|
167 |
// OK good
|
|
|
168 |
if ($e) {
|
|
|
169 |
if ($trailing_hyphen) {
|
|
|
170 |
$e->send(
|
|
|
171 |
E_NOTICE,
|
|
|
172 |
'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
|
|
|
173 |
);
|
|
|
174 |
}
|
|
|
175 |
if ($found_double_hyphen) {
|
|
|
176 |
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
|
|
177 |
}
|
|
|
178 |
}
|
|
|
179 |
} else {
|
|
|
180 |
if ($e) {
|
|
|
181 |
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
|
|
182 |
}
|
|
|
183 |
continue;
|
|
|
184 |
}
|
|
|
185 |
} else {
|
|
|
186 |
// strip comments
|
|
|
187 |
if ($e) {
|
|
|
188 |
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
|
|
189 |
}
|
|
|
190 |
continue;
|
|
|
191 |
}
|
|
|
192 |
} elseif ($token instanceof HTMLPurifier_Token_Text) {
|
|
|
193 |
} else {
|
|
|
194 |
continue;
|
|
|
195 |
}
|
|
|
196 |
$result[] = $token;
|
|
|
197 |
}
|
|
|
198 |
if ($remove_until && $e) {
|
|
|
199 |
// we removed tokens until the end, throw error
|
|
|
200 |
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
|
|
|
201 |
}
|
|
|
202 |
$context->destroy('CurrentToken');
|
|
|
203 |
return $result;
|
|
|
204 |
}
|
|
|
205 |
}
|
|
|
206 |
|
|
|
207 |
// vim: et sw=4 sts=4
|