WebSVN – Moodle – Autoría – /lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php

Rev	Autor	Línea Nro.	Línea
1	efrain	1	`<?php`
		2
		3	`/**`
		4	`* Injector that auto paragraphs text in the root node based on`
		5	`* double-spacing.`
		6	`* @todo Ensure all states are unit tested, including variations as well.`
		7	`* @todo Make a graph of the flow control for this Injector.`
		8	`*/`
		9	`class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector`
		10	`{`
		11	`/**`
		12	`* @type string`
		13	`*/`
		14	`public $name = 'AutoParagraph';`
		15
		16	`/**`
		17	`* @type array`
		18	`*/`
		19	`public $needed = array('p');`
		20
		21	`/**`
		22	`* @return HTMLPurifier_Token_Start`
		23	`*/`
		24	`private function _pStart()`
		25	`{`
		26	`$par = new HTMLPurifier_Token_Start('p');`
		27	`$par->armor['MakeWellFormed_TagClosedError'] = true;`
		28	`return $par;`
		29	`}`
		30
		31	`/**`
		32	`* @param HTMLPurifier_Token_Text $token`
		33	`*/`
		34	`public function handleText(&$token)`
		35	`{`
		36	`$text = $token->data;`
		37	`// Does the current parent allow <p> tags?`
		38	`if ($this->allowsElement('p')) {`
		39	`if (empty($this->currentNesting) \|\| strpos($text, "\n\n") !== false) {`
		40	`// Note that we have differing behavior when dealing with text`
		41	`// in the anonymous root node, or a node inside the document.`
		42	`// If the text as a double-newline, the treatment is the same;`
		43	`// if it doesn't, see the next if-block if you're in the document.`
		44
		45	`$i = $nesting = null;`
		46	`if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {`
		47	`// State 1.1: ... ^ (whitespace, then document end)`
		48	`// ----`
		49	`// This is a degenerate case`
		50	`} else {`
		51	`if (!$token->is_whitespace \|\| $this->_isInline($current)) {`
		52	`// State 1.2: PAR1`
		53	`// ----`
		54
		55	`// State 1.3: PAR1\n\nPAR2`
		56	`// ------------`
		57
		58	`// State 1.4: <div>PAR1\n\nPAR2 (see State 2)`
		59	`// ------------`
		60	`$token = array($this->_pStart());`
		61	`$this->_splitText($text, $token);`
		62	`} else {`
		63	`// State 1.5: \n<hr />`
		64	`// --`
		65	`}`
		66	`}`
		67	`} else {`
		68	`// State 2: <div>PAR1... (similar to 1.4)`
		69	`// ----`
		70
		71	`// We're in an element that allows paragraph tags, but we're not`
		72	`// sure if we're going to need them.`
		73	`if ($this->_pLookAhead()) {`
		74	`// State 2.1: <div>PAR1<b>PAR1\n\nPAR2`
		75	`// ----`
		76	`// Note: This will always be the first child, since any`
		77	`// previous inline element would have triggered this very`
		78	`// same routine, and found the double newline. One possible`
		79	`// exception would be a comment.`
		80	`$token = array($this->_pStart(), $token);`
		81	`} else {`
		82	`// State 2.2.1: <div>PAR1<div>`
		83	`// ----`
		84
		85	`// State 2.2.2: <div>PAR1<b>PAR1</b></div>`
		86	`// ----`
		87	`}`
		88	`}`
		89	`// Is the current parent a <p> tag?`
		90	`} elseif (!empty($this->currentNesting) &&`
		91	`$this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {`
		92	`// State 3.1: ...<p>PAR1`
		93	`// ----`
		94
		95	`// State 3.2: ...<p>PAR1\n\nPAR2`
		96	`// ------------`
		97	`$token = array();`
		98	`$this->_splitText($text, $token);`
		99	`// Abort!`
		100	`} else {`
		101	`// State 4.1: ...<b>PAR1`
		102	`// ----`
		103
		104	`// State 4.2: ...<b>PAR1\n\nPAR2`
		105	`// ------------`
		106	`}`
		107	`}`
		108
		109	`/**`
		110	`* @param HTMLPurifier_Token $token`
		111	`*/`
		112	`public function handleElement(&$token)`
		113	`{`
		114	`// We don't have to check if we're already in a <p> tag for block`
		115	`// tokens, because the tag would have been autoclosed by MakeWellFormed.`
		116	`if ($this->allowsElement('p')) {`
		117	`if (!empty($this->currentNesting)) {`
		118	`if ($this->_isInline($token)) {`
		119	`// State 1: <div>...<b>`
		120	`// ---`
		121	`// Check if this token is adjacent to the parent token`
		122	`// (seek backwards until token isn't whitespace)`
		123	`$i = null;`
		124	`$this->backward($i, $prev);`
		125
		126	`if (!$prev instanceof HTMLPurifier_Token_Start) {`
		127	`// Token wasn't adjacent`
		128	`if ($prev instanceof HTMLPurifier_Token_Text &&`
		129	`substr($prev->data, -2) === "\n\n"`
		130	`) {`
		131	`// State 1.1.4: <div><p>PAR1</p>\n\n<b>`
		132	`// ---`
		133	`// Quite frankly, this should be handled by splitText`
		134	`$token = array($this->_pStart(), $token);`
		135	`} else {`
		136	`// State 1.1.1: <div><p>PAR1</p><b>`
		137	`// ---`
		138	`// State 1.1.2: <div><br /><b>`
		139	`// ---`
		140	`// State 1.1.3: <div>PAR<b>`
		141	`// ---`
		142	`}`
		143	`} else {`
		144	`// State 1.2.1: <div><b>`
		145	`// ---`
		146	`// Lookahead to see if <p> is needed.`
		147	`if ($this->_pLookAhead()) {`
		148	`// State 1.3.1: <div><b>PAR1\n\nPAR2`
		149	`// ---`
		150	`$token = array($this->_pStart(), $token);`
		151	`} else {`
		152	`// State 1.3.2: <div><b>PAR1</b></div>`
		153	`// ---`
		154
		155	`// State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>`
		156	`// ---`
		157	`}`
		158	`}`
		159	`} else {`
		160	`// State 2.3: ...<div>`
		161	`// -----`
		162	`}`
		163	`} else {`
		164	`if ($this->_isInline($token)) {`
		165	`// State 3.1: <b>`
		166	`// ---`
		167	`// This is where the {p} tag is inserted, not reflected in`
		168	`// inputTokens yet, however.`
		169	`$token = array($this->_pStart(), $token);`
		170	`} else {`
		171	`// State 3.2: <div>`
		172	`// -----`
		173	`}`
		174
		175	`$i = null;`
		176	`if ($this->backward($i, $prev)) {`
		177	`if (!$prev instanceof HTMLPurifier_Token_Text) {`
		178	`// State 3.1.1: ...</p>{p}<b>`
		179	`// ---`
		180	`// State 3.2.1: ...</p><div>`
		181	`// -----`
		182	`if (!is_array($token)) {`
		183	`$token = array($token);`
		184	`}`
		185	`array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));`
		186	`} else {`
		187	`// State 3.1.2: ...</p>\n\n{p}<b>`
		188	`// ---`
		189	`// State 3.2.2: ...</p>\n\n<div>`
		190	`// -----`
		191	`// Note: PAR<ELEM> cannot occur because PAR would have been`
		192	`// wrapped in <p> tags.`
		193	`}`
		194	`}`
		195	`}`
		196	`} else {`
		197	`// State 2.2: <ul><li>`
		198	`// ----`
		199	`// State 2.4: <p><b>`
		200	`// ---`
		201	`}`
		202	`}`
		203
		204	`/**`
		205	`* Splits up a text in paragraph tokens and appends them`
		206	`* to the result stream that will replace the original`
		207	`* @param string $data String text data that will be processed`
		208	`* into paragraphs`
		209	`* @param HTMLPurifier_Token[] $result Reference to array of tokens that the`
		210	`* tags will be appended onto`
		211	`*/`
		212	`private function _splitText($data, &$result)`
		213	`{`
		214	`$raw_paragraphs = explode("\n\n", $data);`
		215	`$paragraphs = array(); // without empty paragraphs`
		216	`$needs_start = false;`
		217	`$needs_end = false;`
		218
		219	`$c = count($raw_paragraphs);`
		220	`if ($c == 1) {`
		221	`// There were no double-newlines, abort quickly. In theory this`
		222	`// should never happen.`
		223	`$result[] = new HTMLPurifier_Token_Text($data);`
		224	`return;`
		225	`}`
		226	`for ($i = 0; $i < $c; $i++) {`
		227	`$par = $raw_paragraphs[$i];`
		228	`if (trim($par) !== '') {`
		229	`$paragraphs[] = $par;`
		230	`} else {`
		231	`if ($i == 0) {`
		232	`// Double newline at the front`
		233	`if (empty($result)) {`
		234	`// The empty result indicates that the AutoParagraph`
		235	`// injector did not add any start paragraph tokens.`
		236	`// This means that we have been in a paragraph for`
		237	`// a while, and the newline means we should start a new one.`
		238	`$result[] = new HTMLPurifier_Token_End('p');`
		239	`$result[] = new HTMLPurifier_Token_Text("\n\n");`
		240	`// However, the start token should only be added if`
		241	`// there is more processing to be done (i.e. there are`
		242	`// real paragraphs in here). If there are none, the`
		243	`// next start paragraph tag will be handled by the`
		244	`// next call to the injector`
		245	`$needs_start = true;`
		246	`} else {`
		247	`// We just started a new paragraph!`
		248	`// Reinstate a double-newline for presentation's sake, since`
		249	`// it was in the source code.`
		250	`array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));`
		251	`}`
		252	`} elseif ($i + 1 == $c) {`
		253	`// Double newline at the end`
		254	`// There should be a trailing </p> when we're finally done.`
		255	`$needs_end = true;`
		256	`}`
		257	`}`
		258	`}`
		259
		260	`// Check if this was just a giant blob of whitespace. Move this earlier,`
		261	`// perhaps?`
		262	`if (empty($paragraphs)) {`
		263	`return;`
		264	`}`
		265
		266	`// Add the start tag indicated by \n\n at the beginning of $data`
		267	`if ($needs_start) {`
		268	`$result[] = $this->_pStart();`
		269	`}`
		270
		271	`// Append the paragraphs onto the result`
		272	`foreach ($paragraphs as $par) {`
		273	`$result[] = new HTMLPurifier_Token_Text($par);`
		274	`$result[] = new HTMLPurifier_Token_End('p');`
		275	`$result[] = new HTMLPurifier_Token_Text("\n\n");`
		276	`$result[] = $this->_pStart();`
		277	`}`
		278
		279	`// Remove trailing start token; Injector will handle this later if`
		280	`// it was indeed needed. This prevents from needing to do a lookahead,`
		281	`// at the cost of a lookbehind later.`
		282	`array_pop($result);`
		283
		284	`// If there is no need for an end tag, remove all of it and let`
		285	`// MakeWellFormed close it later.`
		286	`if (!$needs_end) {`
		287	`array_pop($result); // removes \n\n`
		288	`array_pop($result); // removes </p>`
		289	`}`
		290	`}`
		291
		292	`/**`
		293	`* Returns true if passed token is inline (and, ergo, allowed in`
		294	`* paragraph tags)`
		295	`* @param HTMLPurifier_Token $token`
		296	`* @return bool`
		297	`*/`
		298	`private function _isInline($token)`
		299	`{`
		300	`return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);`
		301	`}`
		302
		303	`/**`
		304	`* Looks ahead in the token list and determines whether or not we need`
		305	`* to insert a <p> tag.`
		306	`* @return bool`
		307	`*/`
		308	`private function _pLookAhead()`
		309	`{`
		310	`if ($this->currentToken instanceof HTMLPurifier_Token_Start) {`
		311	`$nesting = 1;`
		312	`} else {`
		313	`$nesting = 0;`
		314	`}`
		315	`$ok = false;`
		316	`$i = null;`
		317	`while ($this->forwardUntilEndToken($i, $current, $nesting)) {`
		318	`$result = $this->_checkNeedsP($current);`
		319	`if ($result !== null) {`
		320	`$ok = $result;`
		321	`break;`
		322	`}`
		323	`}`
		324	`return $ok;`
		325	`}`
		326
		327	`/**`
		328	`* Determines if a particular token requires an earlier inline token`
		329	`* to get a paragraph. This should be used with _forwardUntilEndToken`
		330	`* @param HTMLPurifier_Token $current`
		331	`* @return bool`
		332	`*/`
		333	`private function _checkNeedsP($current)`
		334	`{`
		335	`if ($current instanceof HTMLPurifier_Token_Start) {`
		336	`if (!$this->_isInline($current)) {`
		337	`// <div>PAR1<div>`
		338	`// ----`
		339	`// Terminate early, since we hit a block element`
		340	`return false;`
		341	`}`
		342	`} elseif ($current instanceof HTMLPurifier_Token_Text) {`
		343	`if (strpos($current->data, "\n\n") !== false) {`
		344	`// <div>PAR1<b>PAR1\n\nPAR2`
		345	`// ----`
		346	`return true;`
		347	`} else {`
		348	`// <div>PAR1<b>PAR1...`
		349	`// ----`
		350	`}`
		351	`}`
		352	`return null;`
		353	`}`
		354	`}`
		355
		356	`// vim: et sw=4 sts=4`

Proyectos de Subversion Moodle

(root)/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php – Rev 1