Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
/**
3
 * Markdown Extra - A text-to-HTML conversion tool for web writers
4
 *
5
 * @package   php-markdown
6
 * @author    Michel Fortin <michel.fortin@michelf.com>
7
 * @copyright 2004-2022 Michel Fortin <https://michelf.com/projects/php-markdown/>
8
 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9
 */
10
 
11
namespace Michelf;
12
 
13
/**
14
 * Markdown Extra Parser Class
15
 */
16
class MarkdownExtra extends \Michelf\Markdown {
17
	/**
18
	 * Configuration variables
19
	 */
20
	/**
21
	 * Prefix for footnote ids.
22
	 */
23
	public string $fn_id_prefix = "";
24
 
25
	/**
26
	 * Optional title attribute for footnote links.
27
	 */
28
	public string $fn_link_title = "";
29
 
30
	/**
31
	 * Optional class attribute for footnote links and backlinks.
32
	 */
33
	public string $fn_link_class     = "footnote-ref";
34
	public string $fn_backlink_class = "footnote-backref";
35
 
36
	/**
37
	 * Content to be displayed within footnote backlinks. The default is '↩';
38
	 * the U+FE0E on the end is a Unicode variant selector used to prevent iOS
39
	 * from displaying the arrow character as an emoji.
40
	 * Optionally use '^^' and '%%' to refer to the footnote number and
41
	 * reference number respectively. {@see parseFootnotePlaceholders()}
42
	 */
43
	public string $fn_backlink_html = '&#8617;&#xFE0E;';
44
 
45
	/**
46
	 * Optional title and aria-label attributes for footnote backlinks for
47
	 * added accessibility (to ensure backlink uniqueness).
48
	 * Use '^^' and '%%' to refer to the footnote number and reference number
49
	 * respectively. {@see parseFootnotePlaceholders()}
50
	 */
51
	public string $fn_backlink_title = "";
52
	public string $fn_backlink_label = "";
53
 
54
	/**
55
	 * Class name for table cell alignment (%% replaced left/center/right)
56
	 * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
57
	 * If empty, the align attribute is used instead of a class name.
58
	 */
59
	public string $table_align_class_tmpl = '';
60
 
61
	/**
62
	 * Optional class prefix for fenced code block.
63
	 */
64
	public string $code_class_prefix = "";
65
 
66
	/**
67
	 * Class attribute for code blocks goes on the `code` tag;
68
	 * setting this to true will put attributes on the `pre` tag instead.
69
	 */
70
	public bool $code_attr_on_pre = false;
71
 
72
	/**
73
	 * Predefined abbreviations.
74
	 */
75
	public array $predef_abbr = array();
76
 
77
	/**
78
	 * Only convert atx-style headers if there's a space between the header and #
79
	 */
80
	public bool $hashtag_protection = false;
81
 
82
	/**
83
	 * Determines whether footnotes should be appended to the end of the document.
84
	 * If true, footnote html can be retrieved from $this->footnotes_assembled.
85
	 */
86
	public bool $omit_footnotes = false;
87
 
88
 
89
	/**
90
	 * After parsing, the HTML for the list of footnotes appears here.
91
	 * This is available only if $omit_footnotes == true.
92
	 *
93
	 * Note: when placing the content of `footnotes_assembled` on the page,
94
	 * consider adding the attribute `role="doc-endnotes"` to the `div` or
95
	 * `section` that will enclose the list of footnotes so they are
96
	 * reachable to accessibility tools the same way they would be with the
97
	 * default HTML output.
98
	 */
99
	public ?string $footnotes_assembled = null;
100
 
101
	/**
102
	 * Parser implementation
103
	 */
104
 
105
	/**
106
	 * Constructor function. Initialize the parser object.
107
	 * @return void
108
	 */
109
	public function __construct() {
110
		// Add extra escapable characters before parent constructor
111
		// initialize the table.
112
		$this->escape_chars .= ':|';
113
 
114
		// Insert extra document, block, and span transformations.
115
		// Parent constructor will do the sorting.
116
		$this->document_gamut += array(
117
			"doFencedCodeBlocks" => 5,
118
			"stripFootnotes"     => 15,
119
			"stripAbbreviations" => 25,
120
			"appendFootnotes"    => 50,
121
		);
122
		$this->block_gamut += array(
123
			"doFencedCodeBlocks" => 5,
124
			"doTables"           => 15,
125
			"doDefLists"         => 45,
126
		);
127
		$this->span_gamut += array(
128
			"doFootnotes"        => 5,
129
			"doAbbreviations"    => 70,
130
		);
131
 
132
		$this->enhanced_ordered_list = true;
133
		parent::__construct();
134
	}
135
 
136
 
137
	/**
138
	 * Extra variables used during extra transformations.
139
	 */
140
	protected array $footnotes = array();
141
	protected array $footnotes_ordered = array();
142
	protected array $footnotes_ref_count = array();
143
	protected array $footnotes_numbers = array();
144
	protected array $abbr_desciptions = array();
145
	protected string $abbr_word_re = '';
146
 
147
	/**
148
	 * Give the current footnote number.
149
	 */
150
	protected int $footnote_counter = 1;
151
 
152
    /**
153
	 * Ref attribute for links
154
	 */
155
	protected array $ref_attr = array();
156
 
157
	/**
158
	 * Setting up Extra-specific variables.
159
	 */
160
	protected function setup() {
161
		parent::setup();
162
 
163
		$this->footnotes = array();
164
		$this->footnotes_ordered = array();
165
		$this->footnotes_ref_count = array();
166
		$this->footnotes_numbers = array();
167
		$this->abbr_desciptions = array();
168
		$this->abbr_word_re = '';
169
		$this->footnote_counter = 1;
170
		$this->footnotes_assembled = null;
171
 
172
		foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
173
			if ($this->abbr_word_re)
174
				$this->abbr_word_re .= '|';
175
			$this->abbr_word_re .= preg_quote($abbr_word);
176
			$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
177
		}
178
	}
179
 
180
	/**
181
	 * Clearing Extra-specific variables.
182
	 */
183
	protected function teardown() {
184
		$this->footnotes = array();
185
		$this->footnotes_ordered = array();
186
		$this->footnotes_ref_count = array();
187
		$this->footnotes_numbers = array();
188
		$this->abbr_desciptions = array();
189
		$this->abbr_word_re = '';
190
 
191
		if ( ! $this->omit_footnotes )
192
			$this->footnotes_assembled = null;
193
 
194
		parent::teardown();
195
	}
196
 
197
 
198
	/**
199
	 * Extra attribute parser
200
	 */
201
	/**
202
	 * Expression to use to catch attributes (includes the braces)
203
	 */
204
	protected string $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
205
 
206
	/**
207
	 * Expression to use when parsing in a context when no capture is desired
208
	 */
209
	protected string $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
210
 
211
	/**
212
	 * Parse attributes caught by the $this->id_class_attr_catch_re expression
213
	 * and return the HTML-formatted list of attributes.
214
	 *
215
	 * Currently supported attributes are .class and #id.
216
	 *
217
	 * In addition, this method also supports supplying a default Id value,
218
	 * which will be used to populate the id attribute in case it was not
219
	 * overridden.
220
	 * @param  string $tag_name
221
	 * @param  string $attr
222
	 * @param  mixed  $defaultIdValue
223
	 * @param  array  $classes
224
	 * @return string
225
	 */
226
	protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
227
		if (empty($attr) && !$defaultIdValue && empty($classes)) {
228
			return "";
229
		}
230
 
231
		// Split on components
232
		preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
233
		$elements = $matches[0];
234
 
235
		// Handle classes and IDs (only first ID taken into account)
236
		$attributes = array();
237
		$id = false;
238
		foreach ($elements as $element) {
239
			if ($element[0] === '.') {
240
				$classes[] = substr($element, 1);
241
			} else if ($element[0] === '#') {
242
				if ($id === false) $id = substr($element, 1);
243
			} else if (strpos($element, '=') > 0) {
244
				$parts = explode('=', $element, 2);
245
				$attributes[] = $parts[0] . '="' . $parts[1] . '"';
246
			}
247
		}
248
 
249
		if ($id === false || $id === '') {
250
			$id = $defaultIdValue;
251
		}
252
 
253
		// Compose attributes as string
254
		$attr_str = "";
255
		if (!empty($id)) {
256
			$attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
257
		}
258
		if (!empty($classes)) {
259
			$attr_str .= ' class="'. implode(" ", $classes) . '"';
260
		}
261
		if (!$this->no_markup && !empty($attributes)) {
262
			$attr_str .= ' '.implode(" ", $attributes);
263
		}
264
		return $attr_str;
265
	}
266
 
267
	/**
268
	 * Strips link definitions from text, stores the URLs and titles in
269
	 * hash references.
270
	 * @param  string $text
271
	 * @return string
272
	 */
273
	protected function stripLinkDefinitions($text) {
274
		$less_than_tab = $this->tab_width - 1;
275
 
276
		// Link defs are in the form: ^[id]: url "optional title"
277
		$text = preg_replace_callback('{
278
							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
279
							  [ ]*
280
							  \n?				# maybe *one* newline
281
							  [ ]*
282
							(?:
283
							  <(.+?)>			# url = $2
284
							|
285
							  (\S+?)			# url = $3
286
							)
287
							  [ ]*
288
							  \n?				# maybe one newline
289
							  [ ]*
290
							(?:
291
								(?<=\s)			# lookbehind for whitespace
292
								["(]
293
								(.*?)			# title = $4
294
								[")]
295
								[ ]*
296
							)?	# title is optional
297
					(?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
298
							(?:\n+|\Z)
299
			}xm',
300
			array($this, '_stripLinkDefinitions_callback'),
301
			$text);
302
		return $text;
303
	}
304
 
305
	/**
306
	 * Strip link definition callback
307
	 * @param  array $matches
308
	 * @return string
309
	 */
310
	protected function _stripLinkDefinitions_callback($matches) {
311
		$link_id = strtolower($matches[1]);
312
		$url = $matches[2] == '' ? $matches[3] : $matches[2];
313
		$this->urls[$link_id] = $url;
314
		$this->titles[$link_id] =& $matches[4];
315
		$this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
316
		return ''; // String that will replace the block
317
	}
318
 
319
 
320
	/**
321
	 * HTML block parser
322
	 */
323
	/**
324
	 * Tags that are always treated as block tags
325
	 */
326
	protected string $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure|details|summary';
327
 
328
	/**
329
	 * Tags treated as block tags only if the opening tag is alone on its line
330
	 */
331
	protected string $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
332
 
333
	/**
334
	 * Tags where markdown="1" default to span mode:
335
	 */
336
	protected string $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
337
 
338
	/**
339
	 * Tags which must not have their contents modified, no matter where
340
	 * they appear
341
	 */
342
	protected string $clean_tags_re = 'script|style|math|svg';
343
 
344
	/**
345
	 * Tags that do not need to be closed.
346
	 */
347
	protected string $auto_close_tags_re = 'hr|img|param|source|track';
348
 
349
	/**
350
	 * Hashify HTML Blocks and "clean tags".
351
	 *
352
	 * We only want to do this for block-level HTML tags, such as headers,
353
	 * lists, and tables. That's because we still want to wrap <p>s around
354
	 * "paragraphs" that are wrapped in non-block-level tags, such as anchors,
355
	 * phrase emphasis, and spans. The list of tags we're looking for is
356
	 * hard-coded.
357
	 *
358
	 * This works by calling _HashHTMLBlocks_InMarkdown, which then calls
359
	 * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
360
	 * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
361
	 *  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
362
	 * These two functions are calling each other. It's recursive!
363
	 * @param  string $text
364
	 * @return string
365
	 */
366
	protected function hashHTMLBlocks($text) {
367
		if ($this->no_markup) {
368
			return $text;
369
		}
370
 
371
		// Call the HTML-in-Markdown hasher.
372
		list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
373
 
374
		return $text;
375
	}
376
 
377
	/**
378
	 * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
379
	 *
380
	 * *   $indent is the number of space to be ignored when checking for code
381
	 *     blocks. This is important because if we don't take the indent into
382
	 *     account, something like this (which looks right) won't work as expected:
383
	 *
384
	 *     <div>
385
	 *         <div markdown="1">
386
	 *         Hello World.  <-- Is this a Markdown code block or text?
387
	 *         </div>  <-- Is this a Markdown code block or a real tag?
388
	 *     <div>
389
	 *
390
	 *     If you don't like this, just don't indent the tag on which
391
	 *     you apply the markdown="1" attribute.
392
	 *
393
	 * *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
394
	 *     tag with that name. Nested tags supported.
395
	 *
396
	 * *   If $span is true, text inside must treated as span. So any double
397
	 *     newline will be replaced by a single newline so that it does not create
398
	 *     paragraphs.
399
	 *
400
	 * Returns an array of that form: ( processed text , remaining text )
401
	 *
402
	 * @param  string  $text
403
	 * @param  integer $indent
404
	 * @param  string  $enclosing_tag_re
405
	 * @param  boolean $span
406
	 * @return array
407
	 */
408
	protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
409
										$enclosing_tag_re = '', $span = false)
410
	{
411
 
412
		if ($text === '') return array('', '');
413
 
414
		// Regex to check for the presense of newlines around a block tag.
415
		$newline_before_re = '/(?:^\n?|\n\n)*$/';
416
		$newline_after_re =
417
			'{
418
				^						# Start of text following the tag.
419
				(?>[ ]*<!--.*?-->)?		# Optional comment.
420
				[ ]*\n					# Must be followed by newline.
421
			}xs';
422
 
423
		// Regex to match any tag.
424
		$block_tag_re =
425
			'{
426
				(					# $2: Capture whole tag.
427
					</?					# Any opening or closing tag.
428
						(?>				# Tag name.
429
							' . $this->block_tags_re . '			|
430
							' . $this->context_block_tags_re . '	|
431
							' . $this->clean_tags_re . '        	|
432
							(?!\s)'.$enclosing_tag_re . '
433
						)
434
						(?:
435
							(?=[\s"\'/a-zA-Z0-9])	# Allowed characters after tag name.
436
							(?>
437
								".*?"		|	# Double quotes (can contain `>`)
438
								\'.*?\'   	|	# Single quotes (can contain `>`)
439
								.+?				# Anything but quotes and `>`.
440
							)*?
441
						)?
442
					>					# End of tag.
443
				|
444
					<!--    .*?     -->	# HTML Comment
445
				|
446
					<\?.*?\?> | <%.*?%>	# Processing instruction
447
				|
448
					<!\[CDATA\[.*?\]\]>	# CData Block
449
				' . ( !$span ? ' # If not in span.
450
				|
451
					# Indented code block
452
					(?: ^[ ]*\n | ^ | \n[ ]*\n )
453
					[ ]{' . ($indent + 4) . '}[^\n]* \n
454
					(?>
455
						(?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n
456
					)*
457
				|
458
					# Fenced code block marker
459
					(?<= ^ | \n )
460
					[ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,})
461
					[ ]*
462
					(?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
463
					[ ]*
464
					(?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes
465
					[ ]*
466
					(?= \n )
467
				' : '' ) . ' # End (if not is span).
468
				|
469
					# Code span marker
470
					# Note, this regex needs to go after backtick fenced
471
					# code blocks but it should also be kept outside of the
472
					# "if not in span" condition adding backticks to the parser
473
					`+
474
				)
475
			}xs';
476
 
477
 
478
		$depth = 0;		// Current depth inside the tag tree.
479
		$parsed = "";	// Parsed text that will be returned.
480
 
481
		// Loop through every tag until we find the closing tag of the parent
482
		// or loop until reaching the end of text if no parent tag specified.
483
		do {
484
			// Split the text using the first $tag_match pattern found.
485
			// Text before  pattern will be first in the array, text after
486
			// pattern will be at the end, and between will be any catches made
487
			// by the pattern.
488
			$parts = preg_split($block_tag_re, $text, 2,
489
								PREG_SPLIT_DELIM_CAPTURE);
490
 
491
			// If in Markdown span mode, add a empty-string span-level hash
492
			// after each newline to prevent triggering any block element.
493
			if ($span) {
494
				$void = $this->hashPart("", ':');
495
				$newline = "\n$void";
496
				$parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
497
			}
498
 
499
			$parsed .= $parts[0]; // Text before current tag.
500
 
501
			// If end of $text has been reached. Stop loop.
502
			if (count($parts) < 3) {
503
				$text = "";
504
				break;
505
			}
506
 
507
			$tag  = $parts[1]; // Tag to handle.
508
			$text = $parts[2]; // Remaining text after current tag.
509
 
510
			// Check for: Fenced code block marker.
511
			// Note: need to recheck the whole tag to disambiguate backtick
512
			// fences from code spans
513
			if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) {
514
				// Fenced code block marker: find matching end marker.
515
				$fence_indent = strlen($capture[1]); // use captured indent in re
516
				$fence_re = $capture[2]; // use captured fence in re
517
				if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text,
518
					$matches))
519
				{
520
					// End marker found: pass text unchanged until marker.
521
					$parsed .= $tag . $matches[0];
522
					$text = substr($text, strlen($matches[0]));
523
				}
524
				else {
525
					// No end marker: just skip it.
526
					$parsed .= $tag;
527
				}
528
			}
529
			// Check for: Indented code block.
530
			else if ($tag[0] === "\n" || $tag[0] === " ") {
531
				// Indented code block: pass it unchanged, will be handled
532
				// later.
533
				$parsed .= $tag;
534
			}
535
			// Check for: Code span marker
536
			// Note: need to check this after backtick fenced code blocks
537
			else if ($tag[0] === "`") {
538
				// Find corresponding end marker.
539
				$tag_re = preg_quote($tag);
540
				if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}',
541
					$text, $matches))
542
				{
543
					// End marker found: pass text unchanged until marker.
544
					$parsed .= $tag . $matches[0];
545
					$text = substr($text, strlen($matches[0]));
546
				}
547
				else {
548
					// Unmatched marker: just skip it.
549
					$parsed .= $tag;
550
				}
551
			}
552
			// Check for: Opening Block level tag or
553
			//            Opening Context Block tag (like ins and del)
554
			//               used as a block tag (tag is alone on it's line).
555
			else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) ||
556
				(	preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) &&
557
					preg_match($newline_before_re, $parsed) &&
558
					preg_match($newline_after_re, $text)	)
559
				)
560
			{
561
				// Need to parse tag and following text using the HTML parser.
562
				list($block_text, $text) =
563
					$this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
564
 
565
				// Make sure it stays outside of any paragraph by adding newlines.
566
				$parsed .= "\n\n$block_text\n\n";
567
			}
568
			// Check for: Clean tag (like script, math)
569
			//            HTML Comments, processing instructions.
570
			else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) ||
571
				$tag[1] === '!' || $tag[1] === '?')
572
			{
573
				// Need to parse tag and following text using the HTML parser.
574
				// (don't check for markdown attribute)
575
				list($block_text, $text) =
576
					$this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
577
 
578
				$parsed .= $block_text;
579
			}
580
			// Check for: Tag with same name as enclosing tag.
581
			else if ($enclosing_tag_re !== '' &&
582
				// Same name as enclosing tag.
583
				preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag))
584
			{
585
				// Increase/decrease nested tag count.
586
				if ($tag[1] === '/') {
587
					$depth--;
588
				} else if ($tag[strlen($tag)-2] !== '/') {
589
					$depth++;
590
				}
591
 
592
				if ($depth < 0) {
593
					// Going out of parent element. Clean up and break so we
594
					// return to the calling function.
595
					$text = $tag . $text;
596
					break;
597
				}
598
 
599
				$parsed .= $tag;
600
			}
601
			else {
602
				$parsed .= $tag;
603
			}
604
			// @phpstan-ignore-next-line
605
		} while ($depth >= 0);
606
 
607
		return array($parsed, $text);
608
	}
609
 
610
	/**
611
	 * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
612
	 *
613
	 * *   Calls $hash_method to convert any blocks.
614
	 * *   Stops when the first opening tag closes.
615
	 * *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
616
	 *     (it is not inside clean tags)
617
	 *
618
	 * Returns an array of that form: ( processed text , remaining text )
619
	 * @param  string $text
620
	 * @param  string $hash_method
621
	 * @param  bool $md_attr Handle `markdown="1"` attribute
622
	 * @return array
623
	 */
624
	protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
625
		if ($text === '') return array('', '');
626
 
627
		// Regex to match `markdown` attribute inside of a tag.
628
		$markdown_attr_re = '
629
			{
630
				\s*			# Eat whitespace before the `markdown` attribute
631
				markdown
632
				\s*=\s*
633
				(?>
634
					(["\'])		# $1: quote delimiter
635
					(.*?)		# $2: attribute value
636
					\1			# matching delimiter
637
				|
638
					([^\s>]*)	# $3: unquoted attribute value
639
				)
640
				()				# $4: make $3 always defined (avoid warnings)
641
			}xs';
642
 
643
		// Regex to match any tag.
644
		$tag_re = '{
645
				(					# $2: Capture whole tag.
646
					</?					# Any opening or closing tag.
647
						[\w:$]+			# Tag name.
648
						(?:
649
							(?=[\s"\'/a-zA-Z0-9])	# Allowed characters after tag name.
650
							(?>
651
								".*?"		|	# Double quotes (can contain `>`)
652
								\'.*?\'   	|	# Single quotes (can contain `>`)
653
								.+?				# Anything but quotes and `>`.
654
							)*?
655
						)?
656
					>					# End of tag.
657
				|
658
					<!--    .*?     -->	# HTML Comment
659
				|
660
					<\?.*?\?> | <%.*?%>	# Processing instruction
661
				|
662
					<!\[CDATA\[.*?\]\]>	# CData Block
663
				)
664
			}xs';
665
 
666
		$original_text = $text;		// Save original text in case of faliure.
667
 
668
		$depth		= 0;	// Current depth inside the tag tree.
669
		$block_text	= "";	// Temporary text holder for current text.
670
		$parsed		= "";	// Parsed text that will be returned.
671
		$base_tag_name_re = '';
672
 
673
		// Get the name of the starting tag.
674
		// (This pattern makes $base_tag_name_re safe without quoting.)
675
		if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
676
			$base_tag_name_re = $matches[1];
677
 
678
		// Loop through every tag until we find the corresponding closing tag.
679
		do {
680
			// Split the text using the first $tag_match pattern found.
681
			// Text before  pattern will be first in the array, text after
682
			// pattern will be at the end, and between will be any catches made
683
			// by the pattern.
684
			$parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
685
 
686
			if ($parts === false || count($parts) < 3) {
687
				// End of $text reached with unbalenced tag(s).
688
				// In that case, we return original text unchanged and pass the
689
				// first character as filtered to prevent an infinite loop in the
690
				// parent function.
691
				return array($original_text[0], substr($original_text, 1));
692
			}
693
 
694
			$block_text .= $parts[0]; // Text before current tag.
695
			$tag         = $parts[1]; // Tag to handle.
696
			$text        = $parts[2]; // Remaining text after current tag.
697
 
698
			// Check for: Auto-close tag (like <hr/>)
699
			//			 Comments and Processing Instructions.
700
			if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) ||
701
				$tag[1] === '!' || $tag[1] === '?')
702
			{
703
				// Just add the tag to the block as if it was text.
704
				$block_text .= $tag;
705
			}
706
			else {
707
				// Increase/decrease nested tag count. Only do so if
708
				// the tag's name match base tag's.
709
				if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) {
710
					if ($tag[1] === '/') {
711
						$depth--;
712
					} else if ($tag[strlen($tag)-2] !== '/') {
713
						$depth++;
714
					}
715
				}
716
 
717
				// Check for `markdown="1"` attribute and handle it.
718
				if ($md_attr &&
719
					preg_match($markdown_attr_re, $tag, $attr_m) &&
720
					preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
721
				{
722
					// Remove `markdown` attribute from opening tag.
723
					$tag = preg_replace($markdown_attr_re, '', $tag);
724
 
725
					// Check if text inside this tag must be parsed in span mode.
726
					$mode = $attr_m[2] . $attr_m[3];
727
					$span_mode = $mode === 'span' || ($mode !== 'block' &&
728
						preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag));
729
 
730
					// Calculate indent before tag.
731
					if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
732
						$strlen = $this->utf8_strlen;
733
						$indent = $strlen($matches[1], 'UTF-8');
734
					} else {
735
						$indent = 0;
736
					}
737
 
738
					// End preceding block with this tag.
739
					$block_text .= $tag;
740
					$parsed .= $this->$hash_method($block_text);
741
 
742
					// Get enclosing tag name for the ParseMarkdown function.
743
					// (This pattern makes $tag_name_re safe without quoting.)
744
					preg_match('/^<([\w:$]*)\b/', $tag, $matches);
745
					$tag_name_re = $matches[1];
746
 
747
					// Parse the content using the HTML-in-Markdown parser.
748
					list ($block_text, $text)
749
						= $this->_hashHTMLBlocks_inMarkdown($text, $indent,
750
							$tag_name_re, $span_mode);
751
 
752
					// Outdent markdown text.
753
					if ($indent > 0) {
754
						$block_text = preg_replace("/^[ ]{1,$indent}/m", "",
755
													$block_text);
756
					}
757
 
758
					// Append tag content to parsed text.
759
					if (!$span_mode) {
760
						$parsed .= "\n\n$block_text\n\n";
761
					} else {
762
						$parsed .= (string) $block_text;
763
					}
764
 
765
					// Start over with a new block.
766
					$block_text = "";
767
				}
768
				else $block_text .= $tag;
769
			}
770
 
771
		} while ($depth > 0);
772
 
773
		// Hash last block text that wasn't processed inside the loop.
774
		$parsed .= $this->$hash_method($block_text);
775
 
776
		return array($parsed, $text);
777
	}
778
 
779
	/**
780
	 * Called whenever a tag must be hashed when a function inserts a "clean" tag
781
	 * in $text, it passes through this function and is automaticaly escaped,
782
	 * blocking invalid nested overlap.
783
	 * @param  string $text
784
	 * @return string
785
	 */
786
	protected function hashClean($text) {
787
		return $this->hashPart($text, 'C');
788
	}
789
 
790
	/**
791
	 * Turn Markdown link shortcuts into XHTML <a> tags.
792
	 * @param  string $text
793
	 * @return string
794
	 */
795
	protected function doAnchors($text) {
796
		if ($this->in_anchor) {
797
			return $text;
798
		}
799
		$this->in_anchor = true;
800
 
801
		// First, handle reference-style links: [link text] [id]
802
		$text = preg_replace_callback('{
803
			(					# wrap whole match in $1
804
			  \[
805
				(' . $this->nested_brackets_re . ')	# link text = $2
806
			  \]
807
 
808
			  [ ]?				# one optional space
809
			  (?:\n[ ]*)?		# one optional newline followed by spaces
810
 
811
			  \[
812
				(.*?)		# id = $3
813
			  \]
814
			)
815
			}xs',
816
			array($this, '_doAnchors_reference_callback'), $text);
817
 
818
		// Next, inline-style links: [link text](url "optional title")
819
		$text = preg_replace_callback('{
820
			(				# wrap whole match in $1
821
			  \[
822
				(' . $this->nested_brackets_re . ')	# link text = $2
823
			  \]
824
			  \(			# literal paren
825
				[ \n]*
826
				(?:
827
					<(.+?)>	# href = $3
828
				|
829
					(' . $this->nested_url_parenthesis_re . ')	# href = $4
830
				)
831
				[ \n]*
832
				(			# $5
833
				  ([\'"])	# quote char = $6
834
				  (.*?)		# Title = $7
835
				  \6		# matching quote
836
				  [ \n]*	# ignore any spaces/tabs between closing quote and )
837
				)?			# title is optional
838
			  \)
839
			  (?:[ ]? ' . $this->id_class_attr_catch_re . ' )?	 # $8 = id/class attributes
840
			)
841
			}xs',
842
			array($this, '_doAnchors_inline_callback'), $text);
843
 
844
		// Last, handle reference-style shortcuts: [link text]
845
		// These must come last in case you've also got [link text][1]
846
		// or [link text](/foo)
847
		$text = preg_replace_callback('{
848
			(					# wrap whole match in $1
849
			  \[
850
				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
851
			  \]
852
			)
853
			}xs',
854
			array($this, '_doAnchors_reference_callback'), $text);
855
 
856
		$this->in_anchor = false;
857
		return $text;
858
	}
859
 
860
	/**
861
	 * Callback for reference anchors
862
	 * @param  array $matches
863
	 * @return string
864
	 */
865
	protected function _doAnchors_reference_callback($matches) {
866
		$whole_match =  $matches[1];
867
		$link_text   =  $matches[2];
868
		$link_id     =& $matches[3];
869
 
870
		if ($link_id == "") {
871
			// for shortcut links like [this][] or [this].
872
			$link_id = $link_text;
873
		}
874
 
875
		// lower-case and turn embedded newlines into spaces
876
		$link_id = strtolower($link_id);
877
		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
878
 
879
		if (isset($this->urls[$link_id])) {
880
			$url = $this->urls[$link_id];
881
			$url = $this->encodeURLAttribute($url);
882
 
883
			$result = "<a href=\"$url\"";
884
			if ( isset( $this->titles[$link_id] ) ) {
885
				$title = $this->titles[$link_id];
886
				$title = $this->encodeAttribute($title);
887
				$result .=  " title=\"$title\"";
888
			}
889
			if (isset($this->ref_attr[$link_id]))
890
				$result .= $this->ref_attr[$link_id];
891
 
892
			$link_text = $this->runSpanGamut($link_text);
893
			$result .= ">$link_text</a>";
894
			$result = $this->hashPart($result);
895
		}
896
		else {
897
			$result = $whole_match;
898
		}
899
		return $result;
900
	}
901
 
902
	/**
903
	 * Callback for inline anchors
904
	 * @param  array $matches
905
	 * @return string
906
	 */
907
	protected function _doAnchors_inline_callback($matches) {
908
		$link_text		=  $this->runSpanGamut($matches[2]);
909
		$url			=  $matches[3] === '' ? $matches[4] : $matches[3];
910
		$title_quote		=& $matches[6];
911
		$title			=& $matches[7];
912
		$attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
913
 
914
		// if the URL was of the form <s p a c e s> it got caught by the HTML
915
		// tag parser and hashed. Need to reverse the process before using the URL.
916
		$unhashed = $this->unhash($url);
917
		if ($unhashed !== $url)
918
			$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
919
 
920
		$url = $this->encodeURLAttribute($url);
921
 
922
		$result = "<a href=\"$url\"";
923
		if (isset($title) && $title_quote) {
924
			$title = $this->encodeAttribute($title);
925
			$result .=  " title=\"$title\"";
926
		}
927
		$result .= $attr;
928
 
929
		$link_text = $this->runSpanGamut($link_text);
930
		$result .= ">$link_text</a>";
931
 
932
		return $this->hashPart($result);
933
	}
934
 
935
	/**
936
	 * Turn Markdown image shortcuts into <img> tags.
937
	 * @param  string $text
938
	 * @return string
939
	 */
940
	protected function doImages($text) {
941
		// First, handle reference-style labeled images: ![alt text][id]
942
		$text = preg_replace_callback('{
943
			(				# wrap whole match in $1
944
			  !\[
945
				(' . $this->nested_brackets_re . ')		# alt text = $2
946
			  \]
947
 
948
			  [ ]?				# one optional space
949
			  (?:\n[ ]*)?		# one optional newline followed by spaces
950
 
951
			  \[
952
				(.*?)		# id = $3
953
			  \]
954
 
955
			)
956
			}xs',
957
			array($this, '_doImages_reference_callback'), $text);
958
 
959
		// Next, handle inline images:  ![alt text](url "optional title")
960
		// Don't forget: encode * and _
961
		$text = preg_replace_callback('{
962
			(				# wrap whole match in $1
963
			  !\[
964
				(' . $this->nested_brackets_re . ')		# alt text = $2
965
			  \]
966
			  \s?			# One optional whitespace character
967
			  \(			# literal paren
968
				[ \n]*
969
				(?:
970
					<(\S*)>	# src url = $3
971
				|
972
					(' . $this->nested_url_parenthesis_re . ')	# src url = $4
973
				)
974
				[ \n]*
975
				(			# $5
976
				  ([\'"])	# quote char = $6
977
				  (.*?)		# title = $7
978
				  \6		# matching quote
979
				  [ \n]*
980
				)?			# title is optional
981
			  \)
982
			  (?:[ ]? ' . $this->id_class_attr_catch_re . ' )?	 # $8 = id/class attributes
983
			)
984
			}xs',
985
			array($this, '_doImages_inline_callback'), $text);
986
 
987
		return $text;
988
	}
989
 
990
	/**
991
	 * Callback for referenced images
992
	 * @param  array $matches
993
	 * @return string
994
	 */
995
	protected function _doImages_reference_callback($matches) {
996
		$whole_match = $matches[1];
997
		$alt_text    = $matches[2];
998
		$link_id     = strtolower($matches[3]);
999
 
1000
		if ($link_id === "") {
1001
			$link_id = strtolower($alt_text); // for shortcut links like ![this][].
1002
		}
1003
 
1004
		$alt_text = $this->encodeAttribute($alt_text);
1005
		if (isset($this->urls[$link_id])) {
1006
			$url = $this->encodeURLAttribute($this->urls[$link_id]);
1007
			$result = "<img src=\"$url\" alt=\"$alt_text\"";
1008
			if (isset($this->titles[$link_id])) {
1009
				$title = $this->titles[$link_id];
1010
				$title = $this->encodeAttribute($title);
1011
				$result .=  " title=\"$title\"";
1012
			}
1013
			if (isset($this->ref_attr[$link_id])) {
1014
				$result .= $this->ref_attr[$link_id];
1015
			}
1016
			$result .= $this->empty_element_suffix;
1017
			$result = $this->hashPart($result);
1018
		}
1019
		else {
1020
			// If there's no such link ID, leave intact:
1021
			$result = $whole_match;
1022
		}
1023
 
1024
		return $result;
1025
	}
1026
 
1027
	/**
1028
	 * Callback for inline images
1029
	 * @param  array $matches
1030
	 * @return string
1031
	 */
1032
	protected function _doImages_inline_callback($matches) {
1033
		$alt_text		= $matches[2];
1034
		$url			= $matches[3] === '' ? $matches[4] : $matches[3];
1035
		$title_quote		=& $matches[6];
1036
		$title			=& $matches[7];
1037
		$attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
1038
 
1039
		$alt_text = $this->encodeAttribute($alt_text);
1040
		$url = $this->encodeURLAttribute($url);
1041
		$result = "<img src=\"$url\" alt=\"$alt_text\"";
1042
		if (isset($title) && $title_quote) {
1043
			$title = $this->encodeAttribute($title);
1044
			$result .=  " title=\"$title\""; // $title already quoted
1045
		}
1046
		$result .= $attr;
1047
		$result .= $this->empty_element_suffix;
1048
 
1049
		return $this->hashPart($result);
1050
	}
1051
 
1052
	/**
1053
	 * Process markdown headers. Redefined to add ID and class attribute support.
1054
	 * @param  string $text
1055
	 * @return string
1056
	 */
1057
	protected function doHeaders($text) {
1058
		// Setext-style headers:
1059
		//  Header 1  {#header1}
1060
		//	  ========
1061
		//
1062
		//	  Header 2  {#header2 .class1 .class2}
1063
		//	  --------
1064
		//
1065
		$text = preg_replace_callback(
1066
			'{
1067
				(^.+?)								# $1: Header text
1068
				(?:[ ]+ ' . $this->id_class_attr_catch_re . ' )?	 # $3 = id/class attributes
1069
				[ ]*\n(=+|-+)[ ]*\n+				# $3: Header footer
1070
			}mx',
1071
			array($this, '_doHeaders_callback_setext'), $text);
1072
 
1073
		// atx-style headers:
1074
		//	# Header 1        {#header1}
1075
		//	## Header 2       {#header2}
1076
		//	## Header 2 with closing hashes ##  {#header3.class1.class2}
1077
		//	...
1078
		//	###### Header 6   {.class2}
1079
		//
1080
		$text = preg_replace_callback('{
1081
				^(\#{1,6})	# $1 = string of #\'s
1082
				[ ]'.($this->hashtag_protection ? '+' : '*').'
1083
				(.+?)		# $2 = Header text
1084
				[ ]*
1085
				\#*			# optional closing #\'s (not counted)
1086
				(?:[ ]+ ' . $this->id_class_attr_catch_re . ' )?	 # $3 = id/class attributes
1087
				[ ]*
1088
				\n+
1089
			}xm',
1090
			array($this, '_doHeaders_callback_atx'), $text);
1091
 
1092
		return $text;
1093
	}
1094
 
1095
	/**
1096
	 * Callback for setext headers
1097
	 * @param  array $matches
1098
	 * @return string
1099
	 */
1100
	protected function _doHeaders_callback_setext($matches) {
1101
		if ($matches[3] === '-' && preg_match('{^- }', $matches[1])) {
1102
			return $matches[0];
1103
		}
1104
 
1105
		$level = $matches[3][0] === '=' ? 1 : 2;
1106
 
1107
		$defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
1108
 
1109
		$attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
1110
		$block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>";
1111
		return "\n" . $this->hashBlock($block) . "\n\n";
1112
	}
1113
 
1114
	/**
1115
	 * Callback for atx headers
1116
	 * @param  array $matches
1117
	 * @return string
1118
	 */
1119
	protected function _doHeaders_callback_atx($matches) {
1120
		$level = strlen($matches[1]);
1121
 
1122
		$defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1123
		$attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1124
		$block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>";
1125
		return "\n" . $this->hashBlock($block) . "\n\n";
1126
	}
1127
 
1128
	/**
1129
	 * Form HTML tables.
1130
	 * @param  string $text
1131
	 * @return string
1132
	 */
1133
	protected function doTables($text) {
1134
		$less_than_tab = $this->tab_width - 1;
1135
		// Find tables with leading pipe.
1136
		//
1137
		//	| Header 1 | Header 2
1138
		//	| -------- | --------
1139
		//	| Cell 1   | Cell 2
1140
		//	| Cell 3   | Cell 4
1141
		$text = preg_replace_callback('
1142
			{
1143
				^							# Start of a line
1144
				[ ]{0,' . $less_than_tab . '}	# Allowed whitespace.
1145
				[|]							# Optional leading pipe (present)
1146
				(.+) \n						# $1: Header row (at least one pipe)
1147
 
1148
				[ ]{0,' . $less_than_tab . '}	# Allowed whitespace.
1149
				[|] ([ ]*[-:]+[-| :]*) \n	# $2: Header underline
1150
 
1151
				(							# $3: Cells
1152
					(?>
1153
						[ ]*				# Allowed whitespace.
1154
						[|] .* \n			# Row content.
1155
					)*
1156
				)
1157
				(?=\n|\Z)					# Stop at final double newline.
1158
			}xm',
1159
			array($this, '_doTable_leadingPipe_callback'), $text);
1160
 
1161
		// Find tables without leading pipe.
1162
		//
1163
		//	Header 1 | Header 2
1164
		//	-------- | --------
1165
		//	Cell 1   | Cell 2
1166
		//	Cell 3   | Cell 4
1167
		$text = preg_replace_callback('
1168
			{
1169
				^							# Start of a line
1170
				[ ]{0,' . $less_than_tab . '}	# Allowed whitespace.
1171
				(\S.*[|].*) \n				# $1: Header row (at least one pipe)
1172
 
1173
				[ ]{0,' . $less_than_tab . '}	# Allowed whitespace.
1174
				([-:]+[ ]*[|][-| :]*) \n	# $2: Header underline
1175
 
1176
				(							# $3: Cells
1177
					(?>
1178
						.* [|] .* \n		# Row content
1179
					)*
1180
				)
1181
				(?=\n|\Z)					# Stop at final double newline.
1182
			}xm',
1183
			array($this, '_DoTable_callback'), $text);
1184
 
1185
		return $text;
1186
	}
1187
 
1188
	/**
1189
	 * Callback for removing the leading pipe for each row
1190
	 * @param  array $matches
1191
	 * @return string
1192
	 */
1193
	protected function _doTable_leadingPipe_callback($matches) {
1194
		$head		= $matches[1];
1195
		$underline	= $matches[2];
1196
		$content	= $matches[3];
1197
 
1198
		$content	= preg_replace('/^ *[|]/m', '', $content);
1199
 
1200
		return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1201
	}
1202
 
1203
	/**
1204
	 * Make the align attribute in a table
1205
	 * @param  string $alignname
1206
	 * @return string
1207
	 */
1208
	protected function _doTable_makeAlignAttr($alignname) {
1209
		if (empty($this->table_align_class_tmpl)) {
1210
			return " align=\"$alignname\"";
1211
		}
1212
 
1213
		$classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1214
		return " class=\"$classname\"";
1215
	}
1216
 
1217
	/**
1218
	 * Calback for processing tables
1219
	 * @param  array $matches
1220
	 * @return string
1221
	 */
1222
	protected function _doTable_callback($matches) {
1223
		$head		= $matches[1];
1224
		$underline	= $matches[2];
1225
		$content	= $matches[3];
1226
		$attr       = [];
1227
 
1228
		// Remove any tailing pipes for each line.
1229
		$head		= preg_replace('/[|] *$/m', '', $head);
1230
		$underline	= preg_replace('/[|] *$/m', '', $underline);
1231
		$content	= preg_replace('/[|] *$/m', '', $content);
1232
 
1233
		// Reading alignement from header underline.
1234
		$separators	= preg_split('/ *[|] */', $underline);
1235
		foreach ($separators as $n => $s) {
1236
			if (preg_match('/^ *-+: *$/', $s))
1237
				$attr[$n] = $this->_doTable_makeAlignAttr('right');
1238
			else if (preg_match('/^ *:-+: *$/', $s))
1239
				$attr[$n] = $this->_doTable_makeAlignAttr('center');
1240
			else if (preg_match('/^ *:-+ *$/', $s))
1241
				$attr[$n] = $this->_doTable_makeAlignAttr('left');
1242
			else
1243
				$attr[$n] = '';
1244
		}
1245
 
1246
		// Parsing span elements, including code spans, character escapes,
1247
		// and inline HTML tags, so that pipes inside those gets ignored.
1248
		$head		= $this->parseSpan($head);
1249
		$headers	= preg_split('/ *[|] */', $head);
1250
		$col_count	= count($headers);
1251
		$attr       = array_pad($attr, $col_count, '');
1252
 
1253
		// Write column headers.
1254
		$text = "<table>\n";
1255
		$text .= "<thead>\n";
1256
		$text .= "<tr>\n";
1257
		foreach ($headers as $n => $header) {
1258
			$text .= "  <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n";
1259
		}
1260
		$text .= "</tr>\n";
1261
		$text .= "</thead>\n";
1262
 
1263
		// Split content by row.
1264
		$rows = explode("\n", trim($content, "\n"));
1265
 
1266
		$text .= "<tbody>\n";
1267
		foreach ($rows as $row) {
1268
			// Parsing span elements, including code spans, character escapes,
1269
			// and inline HTML tags, so that pipes inside those gets ignored.
1270
			$row = $this->parseSpan($row);
1271
 
1272
			// Split row by cell.
1273
			$row_cells = preg_split('/ *[|] */', $row, $col_count);
1274
			$row_cells = array_pad($row_cells, $col_count, '');
1275
 
1276
			$text .= "<tr>\n";
1277
			foreach ($row_cells as $n => $cell) {
1278
				$text .= "  <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n";
1279
			}
1280
			$text .= "</tr>\n";
1281
		}
1282
		$text .= "</tbody>\n";
1283
		$text .= "</table>";
1284
 
1285
		return $this->hashBlock($text) . "\n";
1286
	}
1287
 
1288
	/**
1289
	 * Form HTML definition lists.
1290
	 * @param  string $text
1291
	 * @return string
1292
	 */
1293
	protected function doDefLists($text) {
1294
		$less_than_tab = $this->tab_width - 1;
1295
 
1296
		// Re-usable pattern to match any entire dl list:
1297
		$whole_list_re = '(?>
1298
			(								# $1 = whole list
1299
			  (								# $2
1300
				[ ]{0,' . $less_than_tab . '}
1301
				((?>.*\S.*\n)+)				# $3 = defined term
1302
				\n?
1303
				[ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1304
			  )
1305
			  (?s:.+?)
1306
			  (								# $4
1307
				  \z
1308
				|
1309
				  \n{2,}
1310
				  (?=\S)
1311
				  (?!						# Negative lookahead for another term
1312
					[ ]{0,' . $less_than_tab . '}
1313
					(?: \S.*\n )+?			# defined term
1314
					\n?
1315
					[ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1316
				  )
1317
				  (?!						# Negative lookahead for another definition
1318
					[ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1319
				  )
1320
			  )
1321
			)
1322
		)'; // mx
1323
 
1324
		$text = preg_replace_callback('{
1325
				(?>\A\n?|(?<=\n\n))
1326
				' . $whole_list_re . '
1327
			}mx',
1328
			array($this, '_doDefLists_callback'), $text);
1329
 
1330
		return $text;
1331
	}
1332
 
1333
	/**
1334
	 * Callback for processing definition lists
1335
	 * @param  array $matches
1336
	 * @return string
1337
	 */
1338
	protected function _doDefLists_callback($matches) {
1339
		// Re-usable patterns to match list item bullets and number markers:
1340
		$list = $matches[1];
1341
 
1342
		// Turn double returns into triple returns, so that we can make a
1343
		// paragraph for the last item in a list, if necessary:
1344
		$result = trim($this->processDefListItems($list));
1345
		$result = "<dl>\n" . $result . "\n</dl>";
1346
		return $this->hashBlock($result) . "\n\n";
1347
	}
1348
 
1349
	/**
1350
	 * Process the contents of a single definition list, splitting it
1351
	 * into individual term and definition list items.
1352
	 * @param  string $list_str
1353
	 * @return string
1354
	 */
1355
	protected function processDefListItems($list_str) {
1356
 
1357
		$less_than_tab = $this->tab_width - 1;
1358
 
1359
		// Trim trailing blank lines:
1360
		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1361
 
1362
		// Process definition terms.
1363
		$list_str = preg_replace_callback('{
1364
			(?>\A\n?|\n\n+)						# leading line
1365
			(									# definition terms = $1
1366
				[ ]{0,' . $less_than_tab . '}	# leading whitespace
1367
				(?!\:[ ]|[ ])					# negative lookahead for a definition
1368
												#   mark (colon) or more whitespace.
1369
				(?> \S.* \n)+?					# actual term (not whitespace).
1370
			)
1371
			(?=\n?[ ]{0,3}:[ ])					# lookahead for following line feed
1372
												#   with a definition mark.
1373
			}xm',
1374
			array($this, '_processDefListItems_callback_dt'), $list_str);
1375
 
1376
		// Process actual definitions.
1377
		$list_str = preg_replace_callback('{
1378
			\n(\n+)?							# leading line = $1
1379
			(									# marker space = $2
1380
				[ ]{0,' . $less_than_tab . '}	# whitespace before colon
1381
				\:[ ]+							# definition mark (colon)
1382
			)
1383
			((?s:.+?))							# definition text = $3
1384
			(?= \n+ 							# stop at next definition mark,
1385
				(?:								# next term or end of text
1386
					[ ]{0,' . $less_than_tab . '} \:[ ]	|
1387
					<dt> | \z
1388
				)
1389
			)
1390
			}xm',
1391
			array($this, '_processDefListItems_callback_dd'), $list_str);
1392
 
1393
		return $list_str;
1394
	}
1395
 
1396
	/**
1397
	 * Callback for <dt> elements in definition lists
1398
	 * @param  array $matches
1399
	 * @return string
1400
	 */
1401
	protected function _processDefListItems_callback_dt($matches) {
1402
		$terms = explode("\n", trim($matches[1]));
1403
		$text = '';
1404
		foreach ($terms as $term) {
1405
			$term = $this->runSpanGamut(trim($term));
1406
			$text .= "\n<dt>" . $term . "</dt>";
1407
		}
1408
		return $text . "\n";
1409
	}
1410
 
1411
	/**
1412
	 * Callback for <dd> elements in definition lists
1413
	 * @param  array $matches
1414
	 * @return string
1415
	 */
1416
	protected function _processDefListItems_callback_dd($matches) {
1417
		$leading_line	= $matches[1];
1418
		$marker_space	= $matches[2];
1419
		$def			= $matches[3];
1420
 
1421
		if ($leading_line || preg_match('/\n{2,}/', $def)) {
1422
			// Replace marker with the appropriate whitespace indentation
1423
			$def = str_repeat(' ', strlen($marker_space)) . $def;
1424
			$def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1425
			$def = "\n". $def ."\n";
1426
		}
1427
		else {
1428
			$def = rtrim($def);
1429
			$def = $this->runSpanGamut($this->outdent($def));
1430
		}
1431
 
1432
		return "\n<dd>" . $def . "</dd>\n";
1433
	}
1434
 
1435
	/**
1436
	 * Adding the fenced code block syntax to regular Markdown:
1437
	 *
1438
	 * ~~~
1439
	 * Code block
1440
	 * ~~~
1441
	 *
1442
	 * @param  string $text
1443
	 * @return string
1444
	 */
1445
	protected function doFencedCodeBlocks($text) {
1446
 
1447
		$text = preg_replace_callback('{
1448
				(?:\n|\A)
1449
				# 1: Opening marker
1450
				(
1451
					(?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1452
				)
1453
				[ ]*
1454
				(?:
1455
					\.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1456
				)?
1457
				[ ]*
1458
				(?:
1459
					' . $this->id_class_attr_catch_re . ' # 3: Extra attributes
1460
				)?
1461
				[ ]* \n # Whitespace and newline following marker.
1462
 
1463
				# 4: Content
1464
				(
1465
					(?>
1466
						(?!\1 [ ]* \n)	# Not a closing marker.
1467
						.*\n+
1468
					)+
1469
				)
1470
 
1471
				# Closing marker.
1472
				\1 [ ]* (?= \n )
1473
			}xm',
1474
			array($this, '_doFencedCodeBlocks_callback'), $text);
1475
 
1476
		return $text;
1477
	}
1478
 
1479
	/**
1480
	 * Callback to process fenced code blocks
1481
	 * @param  array $matches
1482
	 * @return string
1483
	 */
1484
	protected function _doFencedCodeBlocks_callback($matches) {
1485
		$classname =& $matches[2];
1486
		$attrs     =& $matches[3];
1487
		$codeblock = $matches[4];
1488
 
1489
		if ($this->code_block_content_func) {
1490
			$codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
1491
		} else {
1492
			$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1493
		}
1494
 
1495
		$codeblock = preg_replace_callback('/^\n+/',
1496
			array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1497
 
1498
		$classes = array();
1499
		if ($classname !== "") {
1500
			if ($classname[0] === '.') {
1501
				$classname = substr($classname, 1);
1502
			}
1503
			$classes[] = $this->code_class_prefix . $classname;
1504
		}
1505
		$attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
1506
		$pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
1507
		$code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1508
		$codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1509
 
1510
		return "\n\n".$this->hashBlock($codeblock)."\n\n";
1511
	}
1512
 
1513
	/**
1514
	 * Replace new lines in fenced code blocks
1515
	 * @param  array $matches
1516
	 * @return string
1517
	 */
1518
	protected function _doFencedCodeBlocks_newlines($matches) {
1519
		return str_repeat("<br$this->empty_element_suffix",
1520
			strlen($matches[0]));
1521
	}
1522
 
1523
	/**
1524
	 * Redefining emphasis markers so that emphasis by underscore does not
1525
	 * work in the middle of a word.
1526
	 * @var array
1527
	 */
1528
	protected array $em_relist = array(
1529
		''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1530
		'*' => '(?<![\s*])\*(?!\*)',
1531
		'_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1532
	);
1533
	protected array $strong_relist = array(
1534
		''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1535
		'**' => '(?<![\s*])\*\*(?!\*)',
1536
		'__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1537
	);
1538
	protected array $em_strong_relist = array(
1539
		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1540
		'***' => '(?<![\s*])\*\*\*(?!\*)',
1541
		'___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1542
	);
1543
 
1544
	/**
1545
	 * Parse text into paragraphs
1546
	 * @param  string $text String to process in paragraphs
1547
	 * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1548
	 * @return string       HTML output
1549
	 */
1550
	protected function formParagraphs($text, $wrap_in_p = true) {
1551
		// Strip leading and trailing lines:
1552
		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1553
 
1554
		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1555
 
1556
		// Wrap <p> tags and unhashify HTML blocks
1557
		foreach ($grafs as $key => $value) {
1558
			$value = trim($this->runSpanGamut($value));
1559
 
1560
			// Check if this should be enclosed in a paragraph.
1561
			// Clean tag hashes & block tag hashes are left alone.
1562
			$is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1563
 
1564
			if ($is_p) {
1565
				$value = "<p>$value</p>";
1566
			}
1567
			$grafs[$key] = $value;
1568
		}
1569
 
1570
		// Join grafs in one text, then unhash HTML tags.
1571
		$text = implode("\n\n", $grafs);
1572
 
1573
		// Finish by removing any tag hashes still present in $text.
1574
		$text = $this->unhash($text);
1575
 
1576
		return $text;
1577
	}
1578
 
1579
 
1580
	/**
1581
	 * Footnotes - Strips link definitions from text, stores the URLs and
1582
	 * titles in hash references.
1583
	 * @param  string $text
1584
	 * @return string
1585
	 */
1586
	protected function stripFootnotes($text) {
1587
		$less_than_tab = $this->tab_width - 1;
1588
 
1589
		// Link defs are in the form: [^id]: url "optional title"
1590
		$text = preg_replace_callback('{
1591
			^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?:	# note_id = $1
1592
			  [ ]*
1593
			  \n?					# maybe *one* newline
1594
			(						# text = $2 (no blank lines allowed)
1595
				(?:
1596
					.+				# actual text
1597
				|
1598
					\n				# newlines but
1599
					(?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1600
					(?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1601
									# by non-indented content
1602
				)*
1603
			)
1604
			}xm',
1605
			array($this, '_stripFootnotes_callback'),
1606
			$text);
1607
		return $text;
1608
	}
1609
 
1610
	/**
1611
	 * Callback for stripping footnotes
1612
	 * @param  array $matches
1613
	 * @return string
1614
	 */
1615
	protected function _stripFootnotes_callback($matches) {
1616
		$note_id = $this->fn_id_prefix . $matches[1];
1617
		$this->footnotes[$note_id] = $this->outdent($matches[2]);
1618
		return ''; // String that will replace the block
1619
	}
1620
 
1621
	/**
1622
	 * Replace footnote references in $text [^id] with a special text-token
1623
	 * which will be replaced by the actual footnote marker in appendFootnotes.
1624
	 * @param  string $text
1625
	 * @return string
1626
	 */
1627
	protected function doFootnotes($text) {
1628
		if (!$this->in_anchor) {
1629
			$text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1630
		}
1631
		return $text;
1632
	}
1633
 
1634
	/**
1635
	 * Append footnote list to text
1636
	 * @param  string $text
1637
	 * @return string
1638
	 */
1639
	protected function appendFootnotes($text) {
1640
		$text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1641
			array($this, '_appendFootnotes_callback'), $text);
1642
 
1643
		if ( ! empty( $this->footnotes_ordered ) ) {
1644
			$this->_doFootnotes();
1645
			if ( ! $this->omit_footnotes ) {
1646
				$text .= "\n\n";
1647
				$text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n";
1648
				$text .= "<hr" . $this->empty_element_suffix . "\n";
1649
				$text .= $this->footnotes_assembled;
1650
				$text .= "</div>";
1651
			}
1652
		}
1653
		return $text;
1654
	}
1655
 
1656
 
1657
	/**
1658
	 * Generates the HTML for footnotes.  Called by appendFootnotes, even if
1659
	 * footnotes are not being appended.
1660
	 * @return void
1661
	 */
1662
	protected function _doFootnotes() {
1663
		$attr = array();
1664
		if ($this->fn_backlink_class !== "") {
1665
			$class = $this->fn_backlink_class;
1666
			$class = $this->encodeAttribute($class);
1667
			$attr['class'] = " class=\"$class\"";
1668
		}
1669
		$attr['role'] = " role=\"doc-backlink\"";
1670
		$num = 0;
1671
 
1672
		$text = "<ol>\n\n";
1673
		while (!empty($this->footnotes_ordered)) {
1674
			$footnote = reset($this->footnotes_ordered);
1675
			$note_id = key($this->footnotes_ordered);
1676
			unset($this->footnotes_ordered[$note_id]);
1677
			$ref_count = $this->footnotes_ref_count[$note_id];
1678
			unset($this->footnotes_ref_count[$note_id]);
1679
			unset($this->footnotes[$note_id]);
1680
 
1681
			$footnote .= "\n"; // Need to append newline before parsing.
1682
			$footnote = $this->runBlockGamut("$footnote\n");
1683
			$footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1684
				array($this, '_appendFootnotes_callback'), $footnote);
1685
 
1686
			$num++;
1687
			$note_id = $this->encodeAttribute($note_id);
1688
 
1689
			// Prepare backlink, multiple backlinks if multiple references
1690
			// Do not create empty backlinks if the html is blank
1691
			$backlink = "";
1692
			if (!empty($this->fn_backlink_html)) {
1693
				for ($ref_num = 1; $ref_num <= $ref_count; ++$ref_num) {
1694
					if (!empty($this->fn_backlink_title)) {
1695
						$attr['title'] = ' title="' . $this->encodeAttribute($this->fn_backlink_title) . '"';
1696
					}
1697
					if (!empty($this->fn_backlink_label)) {
1698
						$attr['label'] = ' aria-label="' . $this->encodeAttribute($this->fn_backlink_label) . '"';
1699
					}
1700
					$parsed_attr = $this->parseFootnotePlaceholders(
1701
						implode('', $attr),
1702
						$num,
1703
						$ref_num
1704
					);
1705
					$backlink_text = $this->parseFootnotePlaceholders(
1706
						$this->fn_backlink_html,
1707
						$num,
1708
						$ref_num
1709
					);
1710
					$ref_count_mark = $ref_num > 1 ? $ref_num : '';
1711
					$backlink .= " <a href=\"#fnref$ref_count_mark:$note_id\"$parsed_attr>$backlink_text</a>";
1712
				}
1713
				$backlink = trim($backlink);
1714
			}
1715
 
1716
			// Add backlink to last paragraph; create new paragraph if needed.
1717
			if (!empty($backlink)) {
1718
				if (preg_match('{</p>$}', $footnote)) {
1719
					$footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1720
				} else {
1721
					$footnote .= "\n\n<p>$backlink</p>";
1722
				}
1723
			}
1724
 
1725
			$text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n";
1726
			$text .= $footnote . "\n";
1727
			$text .= "</li>\n\n";
1728
		}
1729
		$text .= "</ol>\n";
1730
 
1731
		$this->footnotes_assembled = $text;
1732
	}
1733
 
1734
	/**
1735
	 * Callback for appending footnotes
1736
	 * @param  array $matches
1737
	 * @return string
1738
	 */
1739
	protected function _appendFootnotes_callback($matches) {
1740
		$node_id = $this->fn_id_prefix . $matches[1];
1741
 
1742
		// Create footnote marker only if it has a corresponding footnote *and*
1743
		// the footnote hasn't been used by another marker.
1744
		if (isset($this->footnotes[$node_id])) {
1745
			$num =& $this->footnotes_numbers[$node_id];
1746
			if (!isset($num)) {
1747
				// Transfer footnote content to the ordered list and give it its
1748
				// number
1749
				$this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1750
				$this->footnotes_ref_count[$node_id] = 1;
1751
				$num = $this->footnote_counter++;
1752
				$ref_count_mark = '';
1753
			} else {
1754
				$ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1755
			}
1756
 
1757
			$attr = "";
1758
			if ($this->fn_link_class !== "") {
1759
				$class = $this->fn_link_class;
1760
				$class = $this->encodeAttribute($class);
1761
				$attr .= " class=\"$class\"";
1762
			}
1763
			if ($this->fn_link_title !== "") {
1764
				$title = $this->fn_link_title;
1765
				$title = $this->encodeAttribute($title);
1766
				$attr .= " title=\"$title\"";
1767
			}
1768
			$attr .= " role=\"doc-noteref\"";
1769
 
1770
			$attr = str_replace("%%", $num, $attr);
1771
			$node_id = $this->encodeAttribute($node_id);
1772
 
1773
			return
1774
				"<sup id=\"fnref$ref_count_mark:$node_id\">".
1775
				"<a href=\"#fn:$node_id\"$attr>$num</a>".
1776
				"</sup>";
1777
		}
1778
 
1779
		return "[^" . $matches[1] . "]";
1780
	}
1781
 
1782
	/**
1783
	 * Build footnote label by evaluating any placeholders.
1784
	 * - ^^  footnote number
1785
	 * - %%  footnote reference number (Nth reference to footnote number)
1786
	 * @param  string $label
1787
	 * @param  int    $footnote_number
1788
	 * @param  int    $reference_number
1789
	 * @return string
1790
	 */
1791
	protected function parseFootnotePlaceholders($label, $footnote_number, $reference_number) {
1792
		return str_replace(
1793
			array('^^', '%%'),
1794
			array($footnote_number, $reference_number),
1795
			$label
1796
		);
1797
	}
1798
 
1799
 
1800
	/**
1801
	 * Abbreviations - strips abbreviations from text, stores titles in hash
1802
	 * references.
1803
	 * @param  string $text
1804
	 * @return string
1805
	 */
1806
	protected function stripAbbreviations($text) {
1807
		$less_than_tab = $this->tab_width - 1;
1808
 
1809
		// Link defs are in the form: [id]*: url "optional title"
1810
		$text = preg_replace_callback('{
1811
			^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?:	# abbr_id = $1
1812
			(.*)					# text = $2 (no blank lines allowed)
1813
			}xm',
1814
			array($this, '_stripAbbreviations_callback'),
1815
			$text);
1816
		return $text;
1817
	}
1818
 
1819
	/**
1820
	 * Callback for stripping abbreviations
1821
	 * @param  array $matches
1822
	 * @return string
1823
	 */
1824
	protected function _stripAbbreviations_callback($matches) {
1825
		$abbr_word = $matches[1];
1826
		$abbr_desc = $matches[2];
1827
		if ($this->abbr_word_re) {
1828
			$this->abbr_word_re .= '|';
1829
		}
1830
		$this->abbr_word_re .= preg_quote($abbr_word);
1831
		$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1832
		return ''; // String that will replace the block
1833
	}
1834
 
1835
	/**
1836
	 * Find defined abbreviations in text and wrap them in <abbr> elements.
1837
	 * @param  string $text
1838
	 * @return string
1839
	 */
1840
	protected function doAbbreviations($text) {
1841
		if ($this->abbr_word_re) {
1842
			// cannot use the /x modifier because abbr_word_re may
1843
			// contain significant spaces:
1844
			$text = preg_replace_callback('{' .
1845
				'(?<![\w\x1A])' .
1846
				'(?:' . $this->abbr_word_re . ')' .
1847
				'(?![\w\x1A])' .
1848
				'}',
1849
				array($this, '_doAbbreviations_callback'), $text);
1850
		}
1851
		return $text;
1852
	}
1853
 
1854
	/**
1855
	 * Callback for processing abbreviations
1856
	 * @param  array $matches
1857
	 * @return string
1858
	 */
1859
	protected function _doAbbreviations_callback($matches) {
1860
		$abbr = $matches[0];
1861
		if (isset($this->abbr_desciptions[$abbr])) {
1862
			$desc = $this->abbr_desciptions[$abbr];
1863
			if (empty($desc)) {
1864
				return $this->hashPart("<abbr>$abbr</abbr>");
1865
			}
1866
			$desc = $this->encodeAttribute($desc);
1867
			return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1868
		}
1869
		return $matches[0];
1870
	}
1871
}