1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
/**
|
|
|
4 |
* Represents an XHTML 1.1 module, with information on elements, tags
|
|
|
5 |
* and attributes.
|
|
|
6 |
* @note Even though this is technically XHTML 1.1, it is also used for
|
|
|
7 |
* regular HTML parsing. We are using modulization as a convenient
|
|
|
8 |
* way to represent the internals of HTMLDefinition, and our
|
|
|
9 |
* implementation is by no means conforming and does not directly
|
|
|
10 |
* use the normative DTDs or XML schemas.
|
|
|
11 |
* @note The public variables in a module should almost directly
|
|
|
12 |
* correspond to the variables in HTMLPurifier_HTMLDefinition.
|
|
|
13 |
* However, the prefix info carries no special meaning in these
|
|
|
14 |
* objects (include it anyway if that's the correspondence though).
|
|
|
15 |
* @todo Consider making some member functions protected
|
|
|
16 |
*/
|
|
|
17 |
|
|
|
18 |
class HTMLPurifier_HTMLModule
|
|
|
19 |
{
|
|
|
20 |
|
|
|
21 |
// -- Overloadable ----------------------------------------------------
|
|
|
22 |
|
|
|
23 |
/**
|
|
|
24 |
* Short unique string identifier of the module.
|
|
|
25 |
* @type string
|
|
|
26 |
*/
|
|
|
27 |
public $name;
|
|
|
28 |
|
|
|
29 |
/**
|
|
|
30 |
* Informally, a list of elements this module changes.
|
|
|
31 |
* Not used in any significant way.
|
|
|
32 |
* @type array
|
|
|
33 |
*/
|
|
|
34 |
public $elements = array();
|
|
|
35 |
|
|
|
36 |
/**
|
|
|
37 |
* Associative array of element names to element definitions.
|
|
|
38 |
* Some definitions may be incomplete, to be merged in later
|
|
|
39 |
* with the full definition.
|
|
|
40 |
* @type array
|
|
|
41 |
*/
|
|
|
42 |
public $info = array();
|
|
|
43 |
|
|
|
44 |
/**
|
|
|
45 |
* Associative array of content set names to content set additions.
|
|
|
46 |
* This is commonly used to, say, add an A element to the Inline
|
|
|
47 |
* content set. This corresponds to an internal variable $content_sets
|
|
|
48 |
* and NOT info_content_sets member variable of HTMLDefinition.
|
|
|
49 |
* @type array
|
|
|
50 |
*/
|
|
|
51 |
public $content_sets = array();
|
|
|
52 |
|
|
|
53 |
/**
|
|
|
54 |
* Associative array of attribute collection names to attribute
|
|
|
55 |
* collection additions. More rarely used for adding attributes to
|
|
|
56 |
* the global collections. Example is the StyleAttribute module adding
|
|
|
57 |
* the style attribute to the Core. Corresponds to HTMLDefinition's
|
|
|
58 |
* attr_collections->info, since the object's data is only info,
|
|
|
59 |
* with extra behavior associated with it.
|
|
|
60 |
* @type array
|
|
|
61 |
*/
|
|
|
62 |
public $attr_collections = array();
|
|
|
63 |
|
|
|
64 |
/**
|
|
|
65 |
* Associative array of deprecated tag name to HTMLPurifier_TagTransform.
|
|
|
66 |
* @type array
|
|
|
67 |
*/
|
|
|
68 |
public $info_tag_transform = array();
|
|
|
69 |
|
|
|
70 |
/**
|
|
|
71 |
* List of HTMLPurifier_AttrTransform to be performed before validation.
|
|
|
72 |
* @type array
|
|
|
73 |
*/
|
|
|
74 |
public $info_attr_transform_pre = array();
|
|
|
75 |
|
|
|
76 |
/**
|
|
|
77 |
* List of HTMLPurifier_AttrTransform to be performed after validation.
|
|
|
78 |
* @type array
|
|
|
79 |
*/
|
|
|
80 |
public $info_attr_transform_post = array();
|
|
|
81 |
|
|
|
82 |
/**
|
|
|
83 |
* List of HTMLPurifier_Injector to be performed during well-formedness fixing.
|
|
|
84 |
* An injector will only be invoked if all of it's pre-requisites are met;
|
|
|
85 |
* if an injector fails setup, there will be no error; it will simply be
|
|
|
86 |
* silently disabled.
|
|
|
87 |
* @type array
|
|
|
88 |
*/
|
|
|
89 |
public $info_injector = array();
|
|
|
90 |
|
|
|
91 |
/**
|
|
|
92 |
* Boolean flag that indicates whether or not getChildDef is implemented.
|
|
|
93 |
* For optimization reasons: may save a call to a function. Be sure
|
|
|
94 |
* to set it if you do implement getChildDef(), otherwise it will have
|
|
|
95 |
* no effect!
|
|
|
96 |
* @type bool
|
|
|
97 |
*/
|
|
|
98 |
public $defines_child_def = false;
|
|
|
99 |
|
|
|
100 |
/**
|
|
|
101 |
* Boolean flag whether or not this module is safe. If it is not safe, all
|
|
|
102 |
* of its members are unsafe. Modules are safe by default (this might be
|
|
|
103 |
* slightly dangerous, but it doesn't make much sense to force HTML Purifier,
|
|
|
104 |
* which is based off of safe HTML, to explicitly say, "This is safe," even
|
|
|
105 |
* though there are modules which are "unsafe")
|
|
|
106 |
*
|
|
|
107 |
* @type bool
|
|
|
108 |
* @note Previously, safety could be applied at an element level granularity.
|
|
|
109 |
* We've removed this ability, so in order to add "unsafe" elements
|
|
|
110 |
* or attributes, a dedicated module with this property set to false
|
|
|
111 |
* must be used.
|
|
|
112 |
*/
|
|
|
113 |
public $safe = true;
|
|
|
114 |
|
|
|
115 |
/**
|
|
|
116 |
* Retrieves a proper HTMLPurifier_ChildDef subclass based on
|
|
|
117 |
* content_model and content_model_type member variables of
|
|
|
118 |
* the HTMLPurifier_ElementDef class. There is a similar function
|
|
|
119 |
* in HTMLPurifier_HTMLDefinition.
|
|
|
120 |
* @param HTMLPurifier_ElementDef $def
|
|
|
121 |
* @return HTMLPurifier_ChildDef subclass
|
|
|
122 |
*/
|
|
|
123 |
public function getChildDef($def)
|
|
|
124 |
{
|
|
|
125 |
return false;
|
|
|
126 |
}
|
|
|
127 |
|
|
|
128 |
// -- Convenience -----------------------------------------------------
|
|
|
129 |
|
|
|
130 |
/**
|
|
|
131 |
* Convenience function that sets up a new element
|
|
|
132 |
* @param string $element Name of element to add
|
|
|
133 |
* @param string|bool $type What content set should element be registered to?
|
|
|
134 |
* Set as false to skip this step.
|
|
|
135 |
* @param string|HTMLPurifier_ChildDef $contents Allowed children in form of:
|
|
|
136 |
* "$content_model_type: $content_model"
|
|
|
137 |
* @param array|string $attr_includes What attribute collections to register to
|
|
|
138 |
* element?
|
|
|
139 |
* @param array $attr What unique attributes does the element define?
|
|
|
140 |
* @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
|
|
|
141 |
* @return HTMLPurifier_ElementDef Created element definition object, so you
|
|
|
142 |
* can set advanced parameters
|
|
|
143 |
*/
|
|
|
144 |
public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
|
|
|
145 |
{
|
|
|
146 |
$this->elements[] = $element;
|
|
|
147 |
// parse content_model
|
|
|
148 |
list($content_model_type, $content_model) = $this->parseContents($contents);
|
|
|
149 |
// merge in attribute inclusions
|
|
|
150 |
$this->mergeInAttrIncludes($attr, $attr_includes);
|
|
|
151 |
// add element to content sets
|
|
|
152 |
if ($type) {
|
|
|
153 |
$this->addElementToContentSet($element, $type);
|
|
|
154 |
}
|
|
|
155 |
// create element
|
|
|
156 |
$this->info[$element] = HTMLPurifier_ElementDef::create(
|
|
|
157 |
$content_model,
|
|
|
158 |
$content_model_type,
|
|
|
159 |
$attr
|
|
|
160 |
);
|
|
|
161 |
// literal object $contents means direct child manipulation
|
|
|
162 |
if (!is_string($contents)) {
|
|
|
163 |
$this->info[$element]->child = $contents;
|
|
|
164 |
}
|
|
|
165 |
return $this->info[$element];
|
|
|
166 |
}
|
|
|
167 |
|
|
|
168 |
/**
|
|
|
169 |
* Convenience function that creates a totally blank, non-standalone
|
|
|
170 |
* element.
|
|
|
171 |
* @param string $element Name of element to create
|
|
|
172 |
* @return HTMLPurifier_ElementDef Created element
|
|
|
173 |
*/
|
|
|
174 |
public function addBlankElement($element)
|
|
|
175 |
{
|
|
|
176 |
if (!isset($this->info[$element])) {
|
|
|
177 |
$this->elements[] = $element;
|
|
|
178 |
$this->info[$element] = new HTMLPurifier_ElementDef();
|
|
|
179 |
$this->info[$element]->standalone = false;
|
|
|
180 |
} else {
|
|
|
181 |
trigger_error("Definition for $element already exists in module, cannot redefine");
|
|
|
182 |
}
|
|
|
183 |
return $this->info[$element];
|
|
|
184 |
}
|
|
|
185 |
|
|
|
186 |
/**
|
|
|
187 |
* Convenience function that registers an element to a content set
|
|
|
188 |
* @param string $element Element to register
|
|
|
189 |
* @param string $type Name content set (warning: case sensitive, usually upper-case
|
|
|
190 |
* first letter)
|
|
|
191 |
*/
|
|
|
192 |
public function addElementToContentSet($element, $type)
|
|
|
193 |
{
|
|
|
194 |
if (!isset($this->content_sets[$type])) {
|
|
|
195 |
$this->content_sets[$type] = '';
|
|
|
196 |
} else {
|
|
|
197 |
$this->content_sets[$type] .= ' | ';
|
|
|
198 |
}
|
|
|
199 |
$this->content_sets[$type] .= $element;
|
|
|
200 |
}
|
|
|
201 |
|
|
|
202 |
/**
|
|
|
203 |
* Convenience function that transforms single-string contents
|
|
|
204 |
* into separate content model and content model type
|
|
|
205 |
* @param string $contents Allowed children in form of:
|
|
|
206 |
* "$content_model_type: $content_model"
|
|
|
207 |
* @return array
|
|
|
208 |
* @note If contents is an object, an array of two nulls will be
|
|
|
209 |
* returned, and the callee needs to take the original $contents
|
|
|
210 |
* and use it directly.
|
|
|
211 |
*/
|
|
|
212 |
public function parseContents($contents)
|
|
|
213 |
{
|
|
|
214 |
if (!is_string($contents)) {
|
|
|
215 |
return array(null, null);
|
|
|
216 |
} // defer
|
|
|
217 |
switch ($contents) {
|
|
|
218 |
// check for shorthand content model forms
|
|
|
219 |
case 'Empty':
|
|
|
220 |
return array('empty', '');
|
|
|
221 |
case 'Inline':
|
|
|
222 |
return array('optional', 'Inline | #PCDATA');
|
|
|
223 |
case 'Flow':
|
|
|
224 |
return array('optional', 'Flow | #PCDATA');
|
|
|
225 |
}
|
|
|
226 |
list($content_model_type, $content_model) = explode(':', $contents);
|
|
|
227 |
$content_model_type = strtolower(trim($content_model_type));
|
|
|
228 |
$content_model = trim($content_model);
|
|
|
229 |
return array($content_model_type, $content_model);
|
|
|
230 |
}
|
|
|
231 |
|
|
|
232 |
/**
|
|
|
233 |
* Convenience function that merges a list of attribute includes into
|
|
|
234 |
* an attribute array.
|
|
|
235 |
* @param array $attr Reference to attr array to modify
|
|
|
236 |
* @param array $attr_includes Array of includes / string include to merge in
|
|
|
237 |
*/
|
|
|
238 |
public function mergeInAttrIncludes(&$attr, $attr_includes)
|
|
|
239 |
{
|
|
|
240 |
if (!is_array($attr_includes)) {
|
|
|
241 |
if (empty($attr_includes)) {
|
|
|
242 |
$attr_includes = array();
|
|
|
243 |
} else {
|
|
|
244 |
$attr_includes = array($attr_includes);
|
|
|
245 |
}
|
|
|
246 |
}
|
|
|
247 |
$attr[0] = $attr_includes;
|
|
|
248 |
}
|
|
|
249 |
|
|
|
250 |
/**
|
|
|
251 |
* Convenience function that generates a lookup table with boolean
|
|
|
252 |
* true as value.
|
|
|
253 |
* @param string $list List of values to turn into a lookup
|
|
|
254 |
* @note You can also pass an arbitrary number of arguments in
|
|
|
255 |
* place of the regular argument
|
|
|
256 |
* @return array array equivalent of list
|
|
|
257 |
*/
|
|
|
258 |
public function makeLookup($list)
|
|
|
259 |
{
|
|
|
260 |
$args = func_get_args();
|
|
|
261 |
if (is_string($list)) {
|
|
|
262 |
$list = $args;
|
|
|
263 |
}
|
|
|
264 |
$ret = array();
|
|
|
265 |
foreach ($list as $value) {
|
|
|
266 |
if (is_null($value)) {
|
|
|
267 |
continue;
|
|
|
268 |
}
|
|
|
269 |
$ret[$value] = true;
|
|
|
270 |
}
|
|
|
271 |
return $ret;
|
|
|
272 |
}
|
|
|
273 |
|
|
|
274 |
/**
|
|
|
275 |
* Lazy load construction of the module after determining whether
|
|
|
276 |
* or not it's needed, and also when a finalized configuration object
|
|
|
277 |
* is available.
|
|
|
278 |
* @param HTMLPurifier_Config $config
|
|
|
279 |
*/
|
|
|
280 |
public function setup($config)
|
|
|
281 |
{
|
|
|
282 |
}
|
|
|
283 |
}
|
|
|
284 |
|
|
|
285 |
// vim: et sw=4 sts=4
|