Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
/**
3
 * Markdown  -  A text-to-HTML conversion tool for web writers
4
 *
5
 * @package   php-markdown
6
 * @author    Michel Fortin <michel.fortin@michelf.com>
7
 * @copyright 2004-2022 Michel Fortin <https://michelf.com/projects/php-markdown/>
8
 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9
 */
10
 
11
namespace Michelf;
12
 
13
/**
14
 * Markdown Parser Class
15
 */
16
class Markdown implements MarkdownInterface {
17
	/**
18
	 * Define the package version
19
	 * @var string
20
	 */
21
	const MARKDOWNLIB_VERSION = "2.0.0";
22
 
23
	/**
24
	 * Simple function interface - Initialize the parser and return the result
25
	 * of its transform method. This will work fine for derived classes too.
26
	 *
27
	 * @api
28
	 *
29
	 * @param  string $text
30
	 * @return string
31
	 */
32
	public static function defaultTransform(string $text): string {
33
		// Take parser class on which this function was called.
34
		$parser_class = static::class;
35
 
36
		// Try to take parser from the static parser list
37
		static $parser_list;
38
		$parser =& $parser_list[$parser_class];
39
 
40
		// Create the parser it not already set
41
		if (!$parser) {
42
			$parser = new $parser_class;
43
		}
44
 
45
		// Transform text using parser.
46
		return $parser->transform($text);
47
	}
48
 
49
	/**
50
	 * Configuration variables
51
	 */
52
	/**
53
	 * Change to ">" for HTML output.
54
	 */
55
	public string $empty_element_suffix = " />";
56
 
57
	/**
58
	 * The width of indentation of the output markup
59
	 */
60
	public int $tab_width = 4;
61
 
62
	/**
63
	 * Change to `true` to disallow markup or entities.
64
	 */
65
	public bool $no_markup   = false;
66
	public bool $no_entities = false;
67
 
68
 
69
	/**
70
	 * Change to `true` to enable line breaks on \n without two trailling spaces
71
	 * @var boolean
72
	 */
73
	public bool $hard_wrap = false;
74
 
75
	/**
76
	 * Predefined URLs and titles for reference links and images.
77
	 */
78
	public array $predef_urls   = array();
79
	public array $predef_titles = array();
80
 
81
	/**
82
	 * Optional filter function for URLs
83
	 * @var callable|null
84
	 */
85
	public $url_filter_func = null;
86
 
87
	/**
88
	 * Optional header id="" generation callback function.
89
	 * @var callable|null
90
	 */
91
	public $header_id_func = null;
92
 
93
	/**
94
	 * Optional function for converting code block content to HTML
95
	 * @var callable|null
96
	 */
97
	public $code_block_content_func = null;
98
 
99
	/**
100
	 * Optional function for converting code span content to HTML.
101
	 * @var callable|null
102
	 */
103
	public $code_span_content_func = null;
104
 
105
	/**
106
	 * Class attribute to toggle "enhanced ordered list" behaviour
107
	 * setting this to true will allow ordered lists to start from the index
108
	 * number that is defined first.
109
	 *
110
	 * For example:
111
	 * 2. List item two
112
	 * 3. List item three
113
	 *
114
	 * Becomes:
115
	 * <ol start="2">
116
	 * <li>List item two</li>
117
	 * <li>List item three</li>
118
	 * </ol>
119
	 */
120
	public bool $enhanced_ordered_list = false;
121
 
122
	/**
123
	 * Parser implementation
124
	 */
125
	/**
126
	 * Regex to match balanced [brackets].
127
	 * Needed to insert a maximum bracked depth while converting to PHP.
128
	 */
129
	protected int $nested_brackets_depth = 6;
130
	protected string $nested_brackets_re;
131
 
132
	protected int $nested_url_parenthesis_depth = 4;
133
	protected string $nested_url_parenthesis_re;
134
 
135
	/**
136
	 * Table of hash values for escaped characters:
137
	 */
138
	protected string $escape_chars = '\`*_{}[]()>#+-.!';
139
	protected string $escape_chars_re;
140
 
141
	/**
142
	 * Constructor function. Initialize appropriate member variables.
143
	 * @return void
144
	 */
145
	public function __construct() {
146
		$this->_initDetab();
147
		$this->prepareItalicsAndBold();
148
 
149
		$this->nested_brackets_re =
150
			str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
151
			str_repeat('\])*', $this->nested_brackets_depth);
152
 
153
		$this->nested_url_parenthesis_re =
154
			str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
155
			str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
156
 
157
		$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
158
 
159
		// Sort document, block, and span gamut in ascendent priority order.
160
		asort($this->document_gamut);
161
		asort($this->block_gamut);
162
		asort($this->span_gamut);
163
	}
164
 
165
 
166
	/**
167
	 * Internal hashes used during transformation.
168
	 */
169
	protected array $urls        = array();
170
	protected array $titles      = array();
171
	protected array $html_hashes = array();
172
 
173
	/**
174
	 * Status flag to avoid invalid nesting.
175
	 */
176
	protected bool $in_anchor = false;
177
 
178
	/**
179
	 * Status flag to avoid invalid nesting.
180
	 */
181
	protected bool $in_emphasis_processing = false;
182
 
183
	/**
184
	 * Called before the transformation process starts to setup parser states.
185
	 * @return void
186
	 */
187
	protected function setup() {
188
		// Clear global hashes.
189
		$this->urls        = $this->predef_urls;
190
		$this->titles      = $this->predef_titles;
191
		$this->html_hashes = array();
192
		$this->in_anchor   = false;
193
		$this->in_emphasis_processing = false;
194
	}
195
 
196
	/**
197
	 * Called after the transformation process to clear any variable which may
198
	 * be taking up memory unnecessarly.
199
	 * @return void
200
	 */
201
	protected function teardown() {
202
		$this->urls        = array();
203
		$this->titles      = array();
204
		$this->html_hashes = array();
205
	}
206
 
207
	/**
208
	 * Main function. Performs some preprocessing on the input text and pass
209
	 * it through the document gamut.
210
	 *
211
	 * @api
212
	 *
213
	 * @param  string $text
214
	 * @return string
215
	 */
216
	public function transform(string $text): string {
217
		$this->setup();
218
 
219
		# Remove UTF-8 BOM and marker character in input, if present.
220
		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
221
 
222
		# Standardize line endings:
223
		#   DOS to Unix and Mac to Unix
224
		$text = preg_replace('{\r\n?}', "\n", $text);
225
 
226
		# Make sure $text ends with a couple of newlines:
227
		$text .= "\n\n";
228
 
229
		# Convert all tabs to spaces.
230
		$text = $this->detab($text);
231
 
232
		# Turn block-level HTML blocks into hash entries
233
		$text = $this->hashHTMLBlocks($text);
234
 
235
		# Strip any lines consisting only of spaces and tabs.
236
		# This makes subsequent regexen easier to write, because we can
237
		# match consecutive blank lines with /\n+/ instead of something
238
		# contorted like /[ ]*\n+/ .
239
		$text = preg_replace('/^[ ]+$/m', '', $text);
240
 
241
		# Run document gamut methods.
242
		foreach ($this->document_gamut as $method => $priority) {
243
			$text = $this->$method($text);
244
		}
245
 
246
		$this->teardown();
247
 
248
		return $text . "\n";
249
	}
250
 
251
	/**
252
	 * Define the document gamut
253
	 */
254
	protected array $document_gamut = array(
255
		// Strip link definitions, store in hashes.
256
		"stripLinkDefinitions" => 20,
257
		"runBasicBlockGamut"   => 30,
258
	);
259
 
260
	/**
261
	 * Strips link definitions from text, stores the URLs and titles in
262
	 * hash references
263
	 * @param  string $text
264
	 * @return string
265
	 */
266
	protected function stripLinkDefinitions($text) {
267
 
268
		$less_than_tab = $this->tab_width - 1;
269
 
270
		// Link defs are in the form: ^[id]: url "optional title"
271
		$text = preg_replace_callback('{
272
							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
273
							  [ ]*
274
							  \n?				# maybe *one* newline
275
							  [ ]*
276
							(?:
277
							  <(.+?)>			# url = $2
278
							|
279
							  (\S+?)			# url = $3
280
							)
281
							  [ ]*
282
							  \n?				# maybe one newline
283
							  [ ]*
284
							(?:
285
								(?<=\s)			# lookbehind for whitespace
286
								["(]
287
								(.*?)			# title = $4
288
								[")]
289
								[ ]*
290
							)?	# title is optional
291
							(?:\n+|\Z)
292
			}xm',
293
			array($this, '_stripLinkDefinitions_callback'),
294
			$text
295
		);
296
		return $text;
297
	}
298
 
299
	/**
300
	 * The callback to strip link definitions
301
	 * @param  array $matches
302
	 * @return string
303
	 */
304
	protected function _stripLinkDefinitions_callback($matches) {
305
		$link_id = strtolower($matches[1]);
306
		$url = $matches[2] == '' ? $matches[3] : $matches[2];
307
		$this->urls[$link_id] = $url;
308
		$this->titles[$link_id] =& $matches[4];
309
		return ''; // String that will replace the block
310
	}
311
 
312
	/**
313
	 * Hashify HTML blocks
314
	 * @param  string $text
315
	 * @return string
316
	 */
317
	protected function hashHTMLBlocks($text) {
318
		if ($this->no_markup) {
319
			return $text;
320
		}
321
 
322
		$less_than_tab = $this->tab_width - 1;
323
 
324
		/**
325
		 * Hashify HTML blocks:
326
		 *
327
		 * We only want to do this for block-level HTML tags, such as headers,
328
		 * lists, and tables. That's because we still want to wrap <p>s around
329
		 * "paragraphs" that are wrapped in non-block-level tags, such as
330
		 * anchors, phrase emphasis, and spans. The list of tags we're looking
331
		 * for is hard-coded:
332
		 *
333
		 * *  List "a" is made of tags which can be both inline or block-level.
334
		 *    These will be treated block-level when the start tag is alone on
335
		 *    its line, otherwise they're not matched here and will be taken as
336
		 *    inline later.
337
		 * *  List "b" is made of tags which are always block-level;
338
		 */
339
		$block_tags_a_re = 'ins|del';
340
		$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
341
						   'script|noscript|style|form|fieldset|iframe|math|svg|'.
342
						   'article|section|nav|aside|hgroup|header|footer|'.
343
						   'figure|details|summary';
344
 
345
		// Regular expression for the content of a block tag.
346
		$nested_tags_level = 4;
347
		$attr = '
348
			(?>				# optional tag attributes
349
			  \s			# starts with whitespace
350
			  (?>
351
				[^>"/]+		# text outside quotes
352
			  |
353
				/+(?!>)		# slash not followed by ">"
354
			  |
355
				"[^"]*"		# text inside double quotes (tolerate ">")
356
			  |
357
				\'[^\']*\'	# text inside single quotes (tolerate ">")
358
			  )*
359
			)?
360
			';
361
		$content =
362
			str_repeat('
363
				(?>
364
				  [^<]+			# content without tag
365
				|
366
				  <\2			# nested opening tag
367
					'.$attr.'	# attributes
368
					(?>
369
					  />
370
					|
371
					  >', $nested_tags_level).	// end of opening tag
372
					  '.*?'.					// last level nested tag content
373
			str_repeat('
374
					  </\2\s*>	# closing nested tag
375
					)
376
				  |
377
					<(?!/\2\s*>	# other tags with a different name
378
				  )
379
				)*',
380
				$nested_tags_level);
381
		$content2 = str_replace('\2', '\3', $content);
382
 
383
		/**
384
		 * First, look for nested blocks, e.g.:
385
		 * 	<div>
386
		 * 		<div>
387
		 * 		tags for inner block must be indented.
388
		 * 		</div>
389
		 * 	</div>
390
		 *
391
		 * The outermost tags must start at the left margin for this to match,
392
		 * and the inner nested divs must be indented.
393
		 * We need to do this before the next, more liberal match, because the
394
		 * next match will start at the first `<div>` and stop at the
395
		 * first `</div>`.
396
		 */
397
		$text = preg_replace_callback('{(?>
398
			(?>
399
				(?<=\n)			# Starting on its own line
400
				|				# or
401
				\A\n?			# the at beginning of the doc
402
			)
403
			(						# save in $1
404
 
405
			  # Match from `\n<tag>` to `</tag>\n`, handling nested tags
406
			  # in between.
407
 
408
						[ ]{0,'.$less_than_tab.'}
409
						<('.$block_tags_b_re.')# start tag = $2
410
						'.$attr.'>			# attributes followed by > and \n
411
						'.$content.'		# content, support nesting
412
						</\2>				# the matching end tag
413
						[ ]*				# trailing spaces/tabs
414
						(?=\n+|\Z)	# followed by a newline or end of document
415
 
416
			| # Special version for tags of group a.
417
 
418
						[ ]{0,'.$less_than_tab.'}
419
						<('.$block_tags_a_re.')# start tag = $3
420
						'.$attr.'>[ ]*\n	# attributes followed by >
421
						'.$content2.'		# content, support nesting
422
						</\3>				# the matching end tag
423
						[ ]*				# trailing spaces/tabs
424
						(?=\n+|\Z)	# followed by a newline or end of document
425
 
426
			| # Special case just for <hr />. It was easier to make a special
427
			  # case than to make the other regex more complicated.
428
 
429
						[ ]{0,'.$less_than_tab.'}
430
						<(hr)				# start tag = $2
431
						'.$attr.'			# attributes
432
						/?>					# the matching end tag
433
						[ ]*
434
						(?=\n{2,}|\Z)		# followed by a blank line or end of document
435
 
436
			| # Special case for standalone HTML comments:
437
 
438
					[ ]{0,'.$less_than_tab.'}
439
					(?s:
440
						<!-- .*? -->
441
					)
442
					[ ]*
443
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
444
 
445
			| # PHP and ASP-style processor instructions (<? and <%)
446
 
447
					[ ]{0,'.$less_than_tab.'}
448
					(?s:
449
						<([?%])			# $2
450
						.*?
451
						\2>
452
					)
453
					[ ]*
454
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
455
 
456
			)
457
			)}Sxmi',
458
			array($this, '_hashHTMLBlocks_callback'),
459
			$text
460
		);
461
 
462
		return $text;
463
	}
464
 
465
	/**
466
	 * The callback for hashing HTML blocks
467
	 * @param  string $matches
468
	 * @return string
469
	 */
470
	protected function _hashHTMLBlocks_callback($matches) {
471
		$text = $matches[1];
472
		$key  = $this->hashBlock($text);
473
		return "\n\n$key\n\n";
474
	}
475
 
476
	/**
477
	 * Called whenever a tag must be hashed when a function insert an atomic
478
	 * element in the text stream. Passing $text to through this function gives
479
	 * a unique text-token which will be reverted back when calling unhash.
480
	 *
481
	 * The $boundary argument specify what character should be used to surround
482
	 * the token. By convension, "B" is used for block elements that needs not
483
	 * to be wrapped into paragraph tags at the end, ":" is used for elements
484
	 * that are word separators and "X" is used in the general case.
485
	 *
486
	 * @param  string $text
487
	 * @param  string $boundary
488
	 * @return string
489
	 */
490
	protected function hashPart($text, $boundary = 'X') {
491
		// Swap back any tag hash found in $text so we do not have to `unhash`
492
		// multiple times at the end.
493
		$text = $this->unhash($text);
494
 
495
		// Then hash the block.
496
		static $i = 0;
497
		$key = "$boundary\x1A" . ++$i . $boundary;
498
		$this->html_hashes[$key] = $text;
499
		return $key; // String that will replace the tag.
500
	}
501
 
502
	/**
503
	 * Shortcut function for hashPart with block-level boundaries.
504
	 * @param  string $text
505
	 * @return string
506
	 */
507
	protected function hashBlock($text) {
508
		return $this->hashPart($text, 'B');
509
	}
510
 
511
	/**
512
	 * Define the block gamut - these are all the transformations that form
513
	 * block-level tags like paragraphs, headers, and list items.
514
	 */
515
	protected array $block_gamut = array(
516
		"doHeaders"         => 10,
517
		"doHorizontalRules" => 20,
518
		"doLists"           => 40,
519
		"doCodeBlocks"      => 50,
520
		"doBlockQuotes"     => 60,
521
	);
522
 
523
	/**
524
	 * Run block gamut tranformations.
525
	 *
526
	 * We need to escape raw HTML in Markdown source before doing anything
527
	 * else. This need to be done for each block, and not only at the
528
	 * begining in the Markdown function since hashed blocks can be part of
529
	 * list items and could have been indented. Indented blocks would have
530
	 * been seen as a code block in a previous pass of hashHTMLBlocks.
531
	 *
532
	 * @param  string $text
533
	 * @return string
534
	 */
535
	protected function runBlockGamut($text) {
536
		$text = $this->hashHTMLBlocks($text);
537
		return $this->runBasicBlockGamut($text);
538
	}
539
 
540
	/**
541
	 * Run block gamut tranformations, without hashing HTML blocks. This is
542
	 * useful when HTML blocks are known to be already hashed, like in the first
543
	 * whole-document pass.
544
	 *
545
	 * @param  string $text
546
	 * @return string
547
	 */
548
	protected function runBasicBlockGamut($text) {
549
 
550
		foreach ($this->block_gamut as $method => $priority) {
551
			$text = $this->$method($text);
552
		}
553
 
554
		// Finally form paragraph and restore hashed blocks.
555
		$text = $this->formParagraphs($text);
556
 
557
		return $text;
558
	}
559
 
560
	/**
561
	 * Convert horizontal rules
562
	 * @param  string $text
563
	 * @return string
564
	 */
565
	protected function doHorizontalRules($text) {
566
		return preg_replace(
567
			'{
568
				^[ ]{0,3}	# Leading space
569
				([-*_])		# $1: First marker
570
				(?>			# Repeated marker group
571
					[ ]{0,2}	# Zero, one, or two spaces.
572
					\1			# Marker character
573
				){2,}		# Group repeated at least twice
574
				[ ]*		# Tailing spaces
575
				$			# End of line.
576
			}mx',
577
			"\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
578
			$text
579
		);
580
	}
581
 
582
	/**
583
	 * These are all the transformations that occur *within* block-level
584
	 * tags like paragraphs, headers, and list items.
585
	 */
586
	protected array $span_gamut = array(
587
		// Process character escapes, code spans, and inline HTML
588
		// in one shot.
589
		"parseSpan"           => -30,
590
		// Process anchor and image tags. Images must come first,
591
		// because ![foo][f] looks like an anchor.
592
		"doImages"            =>  10,
593
		"doAnchors"           =>  20,
594
		// Make links out of things like `<https://example.com/>`
595
		// Must come after doAnchors, because you can use < and >
596
		// delimiters in inline links like [this](<url>).
597
		"doAutoLinks"         =>  30,
598
		"encodeAmpsAndAngles" =>  40,
599
		"doItalicsAndBold"    =>  50,
600
		"doHardBreaks"        =>  60,
601
	);
602
 
603
	/**
604
	 * Run span gamut transformations
605
	 * @param  string $text
606
	 * @return string
607
	 */
608
	protected function runSpanGamut($text) {
609
		foreach ($this->span_gamut as $method => $priority) {
610
			$text = $this->$method($text);
611
		}
612
 
613
		return $text;
614
	}
615
 
616
	/**
617
	 * Do hard breaks
618
	 * @param  string $text
619
	 * @return string
620
	 */
621
	protected function doHardBreaks($text) {
622
		if ($this->hard_wrap) {
623
			return preg_replace_callback('/ *\n/',
624
				array($this, '_doHardBreaks_callback'), $text);
625
		} else {
626
			return preg_replace_callback('/ {2,}\n/',
627
				array($this, '_doHardBreaks_callback'), $text);
628
		}
629
	}
630
 
631
	/**
632
	 * Trigger part hashing for the hard break (callback method)
633
	 * @param  array $matches
634
	 * @return string
635
	 */
636
	protected function _doHardBreaks_callback($matches) {
637
		return $this->hashPart("<br$this->empty_element_suffix\n");
638
	}
639
 
640
	/**
641
	 * Turn Markdown link shortcuts into XHTML <a> tags.
642
	 * @param  string $text
643
	 * @return string
644
	 */
645
	protected function doAnchors($text) {
646
		if ($this->in_anchor) {
647
			return $text;
648
		}
649
		$this->in_anchor = true;
650
 
651
		// First, handle reference-style links: [link text] [id]
652
		$text = preg_replace_callback('{
653
			(					# wrap whole match in $1
654
			  \[
655
				('.$this->nested_brackets_re.')	# link text = $2
656
			  \]
657
 
658
			  [ ]?				# one optional space
659
			  (?:\n[ ]*)?		# one optional newline followed by spaces
660
 
661
			  \[
662
				(.*?)		# id = $3
663
			  \]
664
			)
665
			}xs',
666
			array($this, '_doAnchors_reference_callback'), $text);
667
 
668
		// Next, inline-style links: [link text](url "optional title")
669
		$text = preg_replace_callback('{
670
			(				# wrap whole match in $1
671
			  \[
672
				('.$this->nested_brackets_re.')	# link text = $2
673
			  \]
674
			  \(			# literal paren
675
				[ \n]*
676
				(?:
677
					<(.+?)>	# href = $3
678
				|
679
					('.$this->nested_url_parenthesis_re.')	# href = $4
680
				)
681
				[ \n]*
682
				(			# $5
683
				  ([\'"])	# quote char = $6
684
				  (.*?)		# Title = $7
685
				  \6		# matching quote
686
				  [ \n]*	# ignore any spaces/tabs between closing quote and )
687
				)?			# title is optional
688
			  \)
689
			)
690
			}xs',
691
			array($this, '_doAnchors_inline_callback'), $text);
692
 
693
		// Last, handle reference-style shortcuts: [link text]
694
		// These must come last in case you've also got [link text][1]
695
		// or [link text](/foo)
696
		$text = preg_replace_callback('{
697
			(					# wrap whole match in $1
698
			  \[
699
				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
700
			  \]
701
			)
702
			}xs',
703
			array($this, '_doAnchors_reference_callback'), $text);
704
 
705
		$this->in_anchor = false;
706
		return $text;
707
	}
708
 
709
	/**
710
	 * Callback method to parse referenced anchors
711
	 * @param  array $matches
712
	 * @return string
713
	 */
714
	protected function _doAnchors_reference_callback($matches) {
715
		$whole_match =  $matches[1];
716
		$link_text   =  $matches[2];
717
		$link_id     =& $matches[3];
718
 
719
		if ($link_id == "") {
720
			// for shortcut links like [this][] or [this].
721
			$link_id = $link_text;
722
		}
723
 
724
		// lower-case and turn embedded newlines into spaces
725
		$link_id = strtolower($link_id);
726
		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
727
 
728
		if (isset($this->urls[$link_id])) {
729
			$url = $this->urls[$link_id];
730
			$url = $this->encodeURLAttribute($url);
731
 
732
			$result = "<a href=\"$url\"";
733
			if ( isset( $this->titles[$link_id] ) ) {
734
				$title = $this->titles[$link_id];
735
				$title = $this->encodeAttribute($title);
736
				$result .=  " title=\"$title\"";
737
			}
738
 
739
			$link_text = $this->runSpanGamut($link_text);
740
			$result .= ">$link_text</a>";
741
			$result = $this->hashPart($result);
742
		} else {
743
			$result = $whole_match;
744
		}
745
		return $result;
746
	}
747
 
748
	/**
749
	 * Callback method to parse inline anchors
750
	 * @param  array $matches
751
	 * @return string
752
	 */
753
	protected function _doAnchors_inline_callback($matches) {
754
		$link_text		=  $this->runSpanGamut($matches[2]);
755
		$url			=  $matches[3] === '' ? $matches[4] : $matches[3];
756
		$title			=& $matches[7];
757
 
758
		// If the URL was of the form <s p a c e s> it got caught by the HTML
759
		// tag parser and hashed. Need to reverse the process before using
760
		// the URL.
761
		$unhashed = $this->unhash($url);
762
		if ($unhashed !== $url)
763
			$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
764
 
765
		$url = $this->encodeURLAttribute($url);
766
 
767
		$result = "<a href=\"$url\"";
768
		if ($title) {
769
			$title = $this->encodeAttribute($title);
770
			$result .=  " title=\"$title\"";
771
		}
772
 
773
		$link_text = $this->runSpanGamut($link_text);
774
		$result .= ">$link_text</a>";
775
 
776
		return $this->hashPart($result);
777
	}
778
 
779
	/**
780
	 * Turn Markdown image shortcuts into <img> tags.
781
	 * @param  string $text
782
	 * @return string
783
	 */
784
	protected function doImages($text) {
785
		// First, handle reference-style labeled images: ![alt text][id]
786
		$text = preg_replace_callback('{
787
			(				# wrap whole match in $1
788
			  !\[
789
				('.$this->nested_brackets_re.')		# alt text = $2
790
			  \]
791
 
792
			  [ ]?				# one optional space
793
			  (?:\n[ ]*)?		# one optional newline followed by spaces
794
 
795
			  \[
796
				(.*?)		# id = $3
797
			  \]
798
 
799
			)
800
			}xs',
801
			array($this, '_doImages_reference_callback'), $text);
802
 
803
		// Next, handle inline images:  ![alt text](url "optional title")
804
		// Don't forget: encode * and _
805
		$text = preg_replace_callback('{
806
			(				# wrap whole match in $1
807
			  !\[
808
				('.$this->nested_brackets_re.')		# alt text = $2
809
			  \]
810
			  \s?			# One optional whitespace character
811
			  \(			# literal paren
812
				[ \n]*
813
				(?:
814
					<(\S*)>	# src url = $3
815
				|
816
					('.$this->nested_url_parenthesis_re.')	# src url = $4
817
				)
818
				[ \n]*
819
				(			# $5
820
				  ([\'"])	# quote char = $6
821
				  (.*?)		# title = $7
822
				  \6		# matching quote
823
				  [ \n]*
824
				)?			# title is optional
825
			  \)
826
			)
827
			}xs',
828
			array($this, '_doImages_inline_callback'), $text);
829
 
830
		return $text;
831
	}
832
 
833
	/**
834
	 * Callback to parse references image tags
835
	 * @param  array $matches
836
	 * @return string
837
	 */
838
	protected function _doImages_reference_callback($matches) {
839
		$whole_match = $matches[1];
840
		$alt_text    = $matches[2];
841
		$link_id     = strtolower($matches[3]);
842
 
843
		if ($link_id == "") {
844
			$link_id = strtolower($alt_text); // for shortcut links like ![this][].
845
		}
846
 
847
		$alt_text = $this->encodeAttribute($alt_text);
848
		if (isset($this->urls[$link_id])) {
849
			$url = $this->encodeURLAttribute($this->urls[$link_id]);
850
			$result = "<img src=\"$url\" alt=\"$alt_text\"";
851
			if (isset($this->titles[$link_id])) {
852
				$title = $this->titles[$link_id];
853
				$title = $this->encodeAttribute($title);
854
				$result .=  " title=\"$title\"";
855
			}
856
			$result .= $this->empty_element_suffix;
857
			$result = $this->hashPart($result);
858
		} else {
859
			// If there's no such link ID, leave intact:
860
			$result = $whole_match;
861
		}
862
 
863
		return $result;
864
	}
865
 
866
	/**
867
	 * Callback to parse inline image tags
868
	 * @param  array $matches
869
	 * @return string
870
	 */
871
	protected function _doImages_inline_callback($matches) {
872
		$whole_match	= $matches[1];
873
		$alt_text		= $matches[2];
874
		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
875
		$title			=& $matches[7];
876
 
877
		$alt_text = $this->encodeAttribute($alt_text);
878
		$url = $this->encodeURLAttribute($url);
879
		$result = "<img src=\"$url\" alt=\"$alt_text\"";
880
		if (isset($title)) {
881
			$title = $this->encodeAttribute($title);
882
			$result .=  " title=\"$title\""; // $title already quoted
883
		}
884
		$result .= $this->empty_element_suffix;
885
 
886
		return $this->hashPart($result);
887
	}
888
 
889
	/**
890
	 * Parse Markdown heading elements to HTML
891
	 * @param  string $text
892
	 * @return string
893
	 */
894
	protected function doHeaders($text) {
895
		/**
896
		 * Setext-style headers:
897
		 *	  Header 1
898
		 *	  ========
899
		 *
900
		 *	  Header 2
901
		 *	  --------
902
		 */
903
		$text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
904
			array($this, '_doHeaders_callback_setext'), $text);
905
 
906
		/**
907
		 * atx-style headers:
908
		 *   # Header 1
909
		 *   ## Header 2
910
		 *   ## Header 2 with closing hashes ##
911
		 *   ...
912
		 *   ###### Header 6
913
		 */
914
		$text = preg_replace_callback('{
915
				^(\#{1,6})	# $1 = string of #\'s
916
				[ ]*
917
				(.+?)		# $2 = Header text
918
				[ ]*
919
				\#*			# optional closing #\'s (not counted)
920
				\n+
921
			}xm',
922
			array($this, '_doHeaders_callback_atx'), $text);
923
 
924
		return $text;
925
	}
926
 
927
	/**
928
	 * Setext header parsing callback
929
	 * @param  array $matches
930
	 * @return string
931
	 */
932
	protected function _doHeaders_callback_setext($matches) {
933
		// Terrible hack to check we haven't found an empty list item.
934
		if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
935
			return $matches[0];
936
		}
937
 
938
		$level = $matches[2][0] == '=' ? 1 : 2;
939
 
940
		// ID attribute generation
941
		$idAtt = $this->_generateIdFromHeaderValue($matches[1]);
942
 
943
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
944
		return "\n" . $this->hashBlock($block) . "\n\n";
945
	}
946
 
947
	/**
948
	 * ATX header parsing callback
949
	 * @param  array $matches
950
	 * @return string
951
	 */
952
	protected function _doHeaders_callback_atx($matches) {
953
		// ID attribute generation
954
		$idAtt = $this->_generateIdFromHeaderValue($matches[2]);
955
 
956
		$level = strlen($matches[1]);
957
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
958
		return "\n" . $this->hashBlock($block) . "\n\n";
959
	}
960
 
961
	/**
962
	 * If a header_id_func property is set, we can use it to automatically
963
	 * generate an id attribute.
964
	 *
965
	 * This method returns a string in the form id="foo", or an empty string
966
	 * otherwise.
967
	 * @param  string $headerValue
968
	 * @return string
969
	 */
970
	protected function _generateIdFromHeaderValue($headerValue) {
971
		if (!is_callable($this->header_id_func)) {
972
			return "";
973
		}
974
 
975
		$idValue = call_user_func($this->header_id_func, $headerValue);
976
		if (!$idValue) {
977
			return "";
978
		}
979
 
980
		return ' id="' . $this->encodeAttribute($idValue) . '"';
981
	}
982
 
983
	/**
984
	 * Form HTML ordered (numbered) and unordered (bulleted) lists.
985
	 * @param  string $text
986
	 * @return string
987
	 */
988
	protected function doLists($text) {
989
		$less_than_tab = $this->tab_width - 1;
990
 
991
		// Re-usable patterns to match list item bullets and number markers:
992
		$marker_ul_re  = '[*+-]';
993
		$marker_ol_re  = '\d+[\.]';
994
 
995
		$markers_relist = array(
996
			$marker_ul_re => $marker_ol_re,
997
			$marker_ol_re => $marker_ul_re,
998
			);
999
 
1000
		foreach ($markers_relist as $marker_re => $other_marker_re) {
1001
			// Re-usable pattern to match any entirel ul or ol list:
1002
			$whole_list_re = '
1003
				(								# $1 = whole list
1004
				  (								# $2
1005
					([ ]{0,'.$less_than_tab.'})	# $3 = number of spaces
1006
					('.$marker_re.')			# $4 = first list item marker
1007
					[ ]+
1008
				  )
1009
				  (?s:.+?)
1010
				  (								# $5
1011
					  \z
1012
					|
1013
					  \n{2,}
1014
					  (?=\S)
1015
					  (?!						# Negative lookahead for another list item marker
1016
						[ ]*
1017
						'.$marker_re.'[ ]+
1018
					  )
1019
					|
1020
					  (?=						# Lookahead for another kind of list
1021
					    \n
1022
						\3						# Must have the same indentation
1023
						'.$other_marker_re.'[ ]+
1024
					  )
1025
				  )
1026
				)
1027
			'; // mx
1028
 
1029
			// We use a different prefix before nested lists than top-level lists.
1030
			//See extended comment in _ProcessListItems().
1031
 
1032
			if ($this->list_level) {
1033
				$text = preg_replace_callback('{
1034
						^
1035
						'.$whole_list_re.'
1036
					}mx',
1037
					array($this, '_doLists_callback'), $text);
1038
			} else {
1039
				$text = preg_replace_callback('{
1040
						(?:(?<=\n)\n|\A\n?) # Must eat the newline
1041
						'.$whole_list_re.'
1042
					}mx',
1043
					array($this, '_doLists_callback'), $text);
1044
			}
1045
		}
1046
 
1047
		return $text;
1048
	}
1049
 
1050
	/**
1051
	 * List parsing callback
1052
	 * @param  array $matches
1053
	 * @return string
1054
	 */
1055
	protected function _doLists_callback($matches) {
1056
		// Re-usable patterns to match list item bullets and number markers:
1057
		$marker_ul_re  = '[*+-]';
1058
		$marker_ol_re  = '\d+[\.]';
1059
		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1060
		$marker_ol_start_re = '[0-9]+';
1061
 
1062
		$list = $matches[1];
1063
		$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1064
 
1065
		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1066
 
1067
		$list .= "\n";
1068
		$result = $this->processListItems($list, $marker_any_re);
1069
 
1070
		$ol_start = 1;
1071
		if ($this->enhanced_ordered_list) {
1072
			// Get the start number for ordered list.
1073
			if ($list_type == 'ol') {
1074
				$ol_start_array = array();
1075
				$ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1076
				if ($ol_start_check){
1077
					$ol_start = $ol_start_array[0];
1078
				}
1079
			}
1080
		}
1081
 
1082
		if ($ol_start > 1 && $list_type == 'ol'){
1083
			$result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1084
		} else {
1085
			$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1086
		}
1087
		return "\n". $result ."\n\n";
1088
	}
1089
 
1090
	/**
1091
	 * Nesting tracker for list levels
1092
	 */
1093
	protected int $list_level = 0;
1094
 
1095
	/**
1096
	 * Process the contents of a single ordered or unordered list, splitting it
1097
	 * into individual list items.
1098
	 * @param  string $list_str
1099
	 * @param  string $marker_any_re
1100
	 * @return string
1101
	 */
1102
	protected function processListItems($list_str, $marker_any_re) {
1103
		/**
1104
		 * The $this->list_level global keeps track of when we're inside a list.
1105
		 * Each time we enter a list, we increment it; when we leave a list,
1106
		 * we decrement. If it's zero, we're not in a list anymore.
1107
		 *
1108
		 * We do this because when we're not inside a list, we want to treat
1109
		 * something like this:
1110
		 *
1111
		 *		I recommend upgrading to version
1112
		 *		8. Oops, now this line is treated
1113
		 *		as a sub-list.
1114
		 *
1115
		 * As a single paragraph, despite the fact that the second line starts
1116
		 * with a digit-period-space sequence.
1117
		 *
1118
		 * Whereas when we're inside a list (or sub-list), that line will be
1119
		 * treated as the start of a sub-list. What a kludge, huh? This is
1120
		 * an aspect of Markdown's syntax that's hard to parse perfectly
1121
		 * without resorting to mind-reading. Perhaps the solution is to
1122
		 * change the syntax rules such that sub-lists must start with a
1123
		 * starting cardinal number; e.g. "1." or "a.".
1124
		 */
1125
		$this->list_level++;
1126
 
1127
		// Trim trailing blank lines:
1128
		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1129
 
1130
		$list_str = preg_replace_callback('{
1131
			(\n)?							# leading line = $1
1132
			(^[ ]*)							# leading whitespace = $2
1133
			('.$marker_any_re.'				# list marker and space = $3
1134
				(?:[ ]+|(?=\n))	# space only required if item is not empty
1135
			)
1136
			((?s:.*?))						# list item text   = $4
1137
			(?:(\n+(?=\n))|\n)				# tailing blank line = $5
1138
			(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1139
			}xm',
1140
			array($this, '_processListItems_callback'), $list_str);
1141
 
1142
		$this->list_level--;
1143
		return $list_str;
1144
	}
1145
 
1146
	/**
1147
	 * List item parsing callback
1148
	 * @param  array $matches
1149
	 * @return string
1150
	 */
1151
	protected function _processListItems_callback($matches) {
1152
		$item = $matches[4];
1153
		$leading_line =& $matches[1];
1154
		$leading_space =& $matches[2];
1155
		$marker_space = $matches[3];
1156
		$tailing_blank_line =& $matches[5];
1157
 
1158
		if ($leading_line || $tailing_blank_line ||
1159
			preg_match('/\n{2,}/', $item))
1160
		{
1161
			// Replace marker with the appropriate whitespace indentation
1162
			$item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1163
			$item = $this->runBlockGamut($this->outdent($item)."\n");
1164
		} else {
1165
			// Recursion for sub-lists:
1166
			$item = $this->doLists($this->outdent($item));
1167
			$item = $this->formParagraphs($item, false);
1168
		}
1169
 
1170
		return "<li>" . $item . "</li>\n";
1171
	}
1172
 
1173
	/**
1174
	 * Process Markdown `<pre><code>` blocks.
1175
	 * @param  string $text
1176
	 * @return string
1177
	 */
1178
	protected function doCodeBlocks($text) {
1179
		$text = preg_replace_callback('{
1180
				(?:\n\n|\A\n?)
1181
				(	            # $1 = the code block -- one or more lines, starting with a space/tab
1182
				  (?>
1183
					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1184
					.*\n+
1185
				  )+
1186
				)
1187
				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
1188
			}xm',
1189
			array($this, '_doCodeBlocks_callback'), $text);
1190
 
1191
		return $text;
1192
	}
1193
 
1194
	/**
1195
	 * Code block parsing callback
1196
	 * @param  array $matches
1197
	 * @return string
1198
	 */
1199
	protected function _doCodeBlocks_callback($matches) {
1200
		$codeblock = $matches[1];
1201
 
1202
		$codeblock = $this->outdent($codeblock);
1203
		if (is_callable($this->code_block_content_func)) {
1204
			$codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1205
		} else {
1206
			$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1207
		}
1208
 
1209
		# trim leading newlines and trailing newlines
1210
		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1211
 
1212
		$codeblock = "<pre><code>$codeblock\n</code></pre>";
1213
		return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1214
	}
1215
 
1216
	/**
1217
	 * Create a code span markup for $code. Called from handleSpanToken.
1218
	 * @param  string $code
1219
	 * @return string
1220
	 */
1221
	protected function makeCodeSpan($code) {
1222
		if (is_callable($this->code_span_content_func)) {
1223
			$code = call_user_func($this->code_span_content_func, $code);
1224
		} else {
1225
			$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1226
		}
1227
		return $this->hashPart("<code>$code</code>");
1228
	}
1229
 
1230
	/**
1231
	 * Define the emphasis operators with their regex matches
1232
	 * @var array
1233
	 */
1234
	protected array $em_relist = array(
1235
		''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1236
		'*' => '(?<![\s*])\*(?!\*)',
1237
		'_' => '(?<![\s_])_(?!_)',
1238
	);
1239
 
1240
	/**
1241
	 * Define the strong operators with their regex matches
1242
	 * @var array
1243
	 */
1244
	protected array $strong_relist = array(
1245
		''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1246
		'**' => '(?<![\s*])\*\*(?!\*)',
1247
		'__' => '(?<![\s_])__(?!_)',
1248
	);
1249
 
1250
	/**
1251
	 * Define the emphasis + strong operators with their regex matches
1252
	 * @var array
1253
	 */
1254
	protected array $em_strong_relist = array(
1255
		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1256
		'***' => '(?<![\s*])\*\*\*(?!\*)',
1257
		'___' => '(?<![\s_])___(?!_)',
1258
	);
1259
 
1260
	/**
1261
	 * Container for prepared regular expressions
1262
	 */
1263
	protected ?array $em_strong_prepared_relist = null;
1264
 
1265
	/**
1266
	 * Prepare regular expressions for searching emphasis tokens in any
1267
	 * context.
1268
	 * @return void
1269
	 */
1270
	protected function prepareItalicsAndBold() {
1271
		foreach ($this->em_relist as $em => $em_re) {
1272
			foreach ($this->strong_relist as $strong => $strong_re) {
1273
				// Construct list of allowed token expressions.
1274
				$token_relist = array();
1275
				if (isset($this->em_strong_relist["$em$strong"])) {
1276
					$token_relist[] = $this->em_strong_relist["$em$strong"];
1277
				}
1278
				$token_relist[] = $em_re;
1279
				$token_relist[] = $strong_re;
1280
 
1281
				// Construct master expression from list.
1282
				$token_re = '{(' . implode('|', $token_relist) . ')}';
1283
				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
1284
			}
1285
		}
1286
	}
1287
 
1288
	/**
1289
	 * Convert Markdown italics (emphasis) and bold (strong) to HTML
1290
	 * @param  string $text
1291
	 * @return string
1292
	 */
1293
	protected function doItalicsAndBold($text) {
1294
		if ($this->in_emphasis_processing) {
1295
			return $text; // avoid reentrency
1296
		}
1297
		$this->in_emphasis_processing = true;
1298
 
1299
		$token_stack = array('');
1300
		$text_stack = array('');
1301
		$em = '';
1302
		$strong = '';
1303
		$tree_char_em = false;
1304
 
1305
		while (1) {
1306
			// Get prepared regular expression for seraching emphasis tokens
1307
			// in current context.
1308
			$token_re = $this->em_strong_prepared_relist["$em$strong"];
1309
 
1310
			// Each loop iteration search for the next emphasis token.
1311
			// Each token is then passed to handleSpanToken.
1312
			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1313
			$text_stack[0] .= $parts[0];
1314
			$token =& $parts[1];
1315
			$text =& $parts[2];
1316
 
1317
			if (empty($token)) {
1318
				// Reached end of text span: empty stack without emitting.
1319
				// any more emphasis.
1320
				while ($token_stack[0]) {
1321
					$text_stack[1] .= array_shift($token_stack);
1322
					$text_stack[0] .= array_shift($text_stack);
1323
				}
1324
				break;
1325
			}
1326
 
1327
			$token_len = strlen($token);
1328
			if ($tree_char_em) {
1329
				// Reached closing marker while inside a three-char emphasis.
1330
				if ($token_len == 3) {
1331
					// Three-char closing marker, close em and strong.
1332
					array_shift($token_stack);
1333
					$span = array_shift($text_stack);
1334
					$span = $this->runSpanGamut($span);
1335
					$span = "<strong><em>$span</em></strong>";
1336
					$text_stack[0] .= $this->hashPart($span);
1337
					$em = '';
1338
					$strong = '';
1339
				} else {
1340
					// Other closing marker: close one em or strong and
1341
					// change current token state to match the other
1342
					$token_stack[0] = str_repeat($token[0], 3-$token_len);
1343
					$tag = $token_len == 2 ? "strong" : "em";
1344
					$span = $text_stack[0];
1345
					$span = $this->runSpanGamut($span);
1346
					$span = "<$tag>$span</$tag>";
1347
					$text_stack[0] = $this->hashPart($span);
1348
					$$tag = ''; // $$tag stands for $em or $strong
1349
				}
1350
				$tree_char_em = false;
1351
			} else if ($token_len == 3) {
1352
				if ($em) {
1353
					// Reached closing marker for both em and strong.
1354
					// Closing strong marker:
1355
					for ($i = 0; $i < 2; ++$i) {
1356
						$shifted_token = array_shift($token_stack);
1357
						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
1358
						$span = array_shift($text_stack);
1359
						$span = $this->runSpanGamut($span);
1360
						$span = "<$tag>$span</$tag>";
1361
						$text_stack[0] .= $this->hashPart($span);
1362
						$$tag = ''; // $$tag stands for $em or $strong
1363
					}
1364
				} else {
1365
					// Reached opening three-char emphasis marker. Push on token
1366
					// stack; will be handled by the special condition above.
1367
					$em = $token[0];
1368
					$strong = "$em$em";
1369
					array_unshift($token_stack, $token);
1370
					array_unshift($text_stack, '');
1371
					$tree_char_em = true;
1372
				}
1373
			} else if ($token_len == 2) {
1374
				if ($strong) {
1375
					// Unwind any dangling emphasis marker:
1376
					if (strlen($token_stack[0]) == 1) {
1377
						$text_stack[1] .= array_shift($token_stack);
1378
						$text_stack[0] .= array_shift($text_stack);
1379
						$em = '';
1380
					}
1381
					// Closing strong marker:
1382
					array_shift($token_stack);
1383
					$span = array_shift($text_stack);
1384
					$span = $this->runSpanGamut($span);
1385
					$span = "<strong>$span</strong>";
1386
					$text_stack[0] .= $this->hashPart($span);
1387
					$strong = '';
1388
				} else {
1389
					array_unshift($token_stack, $token);
1390
					array_unshift($text_stack, '');
1391
					$strong = $token;
1392
				}
1393
			} else {
1394
				// Here $token_len == 1
1395
				if ($em) {
1396
					if (strlen($token_stack[0]) == 1) {
1397
						// Closing emphasis marker:
1398
						array_shift($token_stack);
1399
						$span = array_shift($text_stack);
1400
						$span = $this->runSpanGamut($span);
1401
						$span = "<em>$span</em>";
1402
						$text_stack[0] .= $this->hashPart($span);
1403
						$em = '';
1404
					} else {
1405
						$text_stack[0] .= $token;
1406
					}
1407
				} else {
1408
					array_unshift($token_stack, $token);
1409
					array_unshift($text_stack, '');
1410
					$em = $token;
1411
				}
1412
			}
1413
		}
1414
		$this->in_emphasis_processing = false;
1415
		return $text_stack[0];
1416
	}
1417
 
1418
	/**
1419
	 * Parse Markdown blockquotes to HTML
1420
	 * @param  string $text
1421
	 * @return string
1422
	 */
1423
	protected function doBlockQuotes($text) {
1424
		$text = preg_replace_callback('/
1425
			  (								# Wrap whole match in $1
1426
				(?>
1427
				  ^[ ]*>[ ]?			# ">" at the start of a line
1428
					.+\n					# rest of the first line
1429
				  (.+\n)*					# subsequent consecutive lines
1430
				  \n*						# blanks
1431
				)+
1432
			  )
1433
			/xm',
1434
			array($this, '_doBlockQuotes_callback'), $text);
1435
 
1436
		return $text;
1437
	}
1438
 
1439
	/**
1440
	 * Blockquote parsing callback
1441
	 * @param  array $matches
1442
	 * @return string
1443
	 */
1444
	protected function _doBlockQuotes_callback($matches) {
1445
		$bq = $matches[1];
1446
		// trim one level of quoting - trim whitespace-only lines
1447
		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1448
		$bq = $this->runBlockGamut($bq); // recurse
1449
 
1450
		$bq = preg_replace('/^/m', "  ", $bq);
1451
		// These leading spaces cause problem with <pre> content,
1452
		// so we need to fix that:
1453
		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1454
			array($this, '_doBlockQuotes_callback2'), $bq);
1455
 
1456
		return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1457
	}
1458
 
1459
	/**
1460
	 * Blockquote parsing callback
1461
	 * @param  array $matches
1462
	 * @return string
1463
	 */
1464
	protected function _doBlockQuotes_callback2($matches) {
1465
		$pre = $matches[1];
1466
		$pre = preg_replace('/^  /m', '', $pre);
1467
		return $pre;
1468
	}
1469
 
1470
	/**
1471
	 * Parse paragraphs
1472
	 *
1473
	 * @param  string $text String to process in paragraphs
1474
	 * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1475
	 * @return string
1476
	 */
1477
	protected function formParagraphs($text, $wrap_in_p = true) {
1478
		// Strip leading and trailing lines:
1479
		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1480
 
1481
		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1482
 
1483
		// Wrap <p> tags and unhashify HTML blocks
1484
		foreach ($grafs as $key => $value) {
1485
			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1486
				// Is a paragraph.
1487
				$value = $this->runSpanGamut($value);
1488
				if ($wrap_in_p) {
1489
					$value = preg_replace('/^([ ]*)/', "<p>", $value);
1490
					$value .= "</p>";
1491
				}
1492
				$grafs[$key] = $this->unhash($value);
1493
			} else {
1494
				// Is a block.
1495
				// Modify elements of @grafs in-place...
1496
				$graf = $value;
1497
				$block = $this->html_hashes[$graf];
1498
				$graf = $block;
1499
//				if (preg_match('{
1500
//					\A
1501
//					(							# $1 = <div> tag
1502
//					  <div  \s+
1503
//					  [^>]*
1504
//					  \b
1505
//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1506
//					  1
1507
//					  \2
1508
//					  [^>]*
1509
//					  >
1510
//					)
1511
//					(							# $3 = contents
1512
//					.*
1513
//					)
1514
//					(</div>)					# $4 = closing tag
1515
//					\z
1516
//					}xs', $block, $matches))
1517
//				{
1518
//					list(, $div_open, , $div_content, $div_close) = $matches;
1519
//
1520
//					// We can't call Markdown(), because that resets the hash;
1521
//					// that initialization code should be pulled into its own sub, though.
1522
//					$div_content = $this->hashHTMLBlocks($div_content);
1523
//
1524
//					// Run document gamut methods on the content.
1525
//					foreach ($this->document_gamut as $method => $priority) {
1526
//						$div_content = $this->$method($div_content);
1527
//					}
1528
//
1529
//					$div_open = preg_replace(
1530
//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1531
//
1532
//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1533
//				}
1534
				$grafs[$key] = $graf;
1535
			}
1536
		}
1537
 
1538
		return implode("\n\n", $grafs);
1539
	}
1540
 
1541
	/**
1542
	 * Encode text for a double-quoted HTML attribute. This function
1543
	 * is *not* suitable for attributes enclosed in single quotes.
1544
	 * @param  string $text
1545
	 * @return string
1546
	 */
1547
	protected function encodeAttribute($text) {
1548
		$text = $this->encodeAmpsAndAngles($text);
1549
		$text = str_replace('"', '&quot;', $text);
1550
		return $text;
1551
	}
1552
 
1553
	/**
1554
	 * Encode text for a double-quoted HTML attribute containing a URL,
1555
	 * applying the URL filter if set. Also generates the textual
1556
	 * representation for the URL (removing mailto: or tel:) storing it in $text.
1557
	 * This function is *not* suitable for attributes enclosed in single quotes.
1558
	 *
1559
	 * @param  string $url
1560
	 * @param  string $text Passed by reference
1561
	 * @return string        URL
1562
	 */
1563
	protected function encodeURLAttribute($url, &$text = null) {
1564
		if (is_callable($this->url_filter_func)) {
1565
			$url = call_user_func($this->url_filter_func, $url);
1566
		}
1567
 
1568
		if (preg_match('{^mailto:}i', $url)) {
1569
			$url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1570
		} else if (preg_match('{^tel:}i', $url)) {
1571
			$url = $this->encodeAttribute($url);
1572
			$text = substr($url, 4);
1573
		} else {
1574
			$url = $this->encodeAttribute($url);
1575
			$text = $url;
1576
		}
1577
 
1578
		return $url;
1579
	}
1580
 
1581
	/**
1582
	 * Smart processing for ampersands and angle brackets that need to
1583
	 * be encoded. Valid character entities are left alone unless the
1584
	 * no-entities mode is set.
1585
	 * @param  string $text
1586
	 * @return string
1587
	 */
1588
	protected function encodeAmpsAndAngles($text) {
1589
		if ($this->no_entities) {
1590
			$text = str_replace('&', '&amp;', $text);
1591
		} else {
1592
			// Ampersand-encoding based entirely on Nat Irons's Amputator
1593
			// MT plugin: <http://bumppo.net/projects/amputator/>
1594
			$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1595
								'&amp;', $text);
1596
		}
1597
		// Encode remaining <'s
1598
		$text = str_replace('<', '&lt;', $text);
1599
 
1600
		return $text;
1601
	}
1602
 
1603
	/**
1604
	 * Parse Markdown automatic links to anchor HTML tags
1605
	 * @param  string $text
1606
	 * @return string
1607
	 */
1608
	protected function doAutoLinks($text) {
1609
		$text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1610
			array($this, '_doAutoLinks_url_callback'), $text);
1611
 
1612
		// Email addresses: <address@domain.foo>
1613
		$text = preg_replace_callback('{
1614
			<
1615
			(?:mailto:)?
1616
			(
1617
				(?:
1618
					[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1619
				|
1620
					".*?"
1621
				)
1622
				\@
1623
				(?:
1624
					[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1625
				|
1626
					\[[\d.a-fA-F:]+\]	# IPv4 & IPv6
1627
				)
1628
			)
1629
			>
1630
			}xi',
1631
			array($this, '_doAutoLinks_email_callback'), $text);
1632
 
1633
		return $text;
1634
	}
1635
 
1636
	/**
1637
	 * Parse URL callback
1638
	 * @param  array $matches
1639
	 * @return string
1640
	 */
1641
	protected function _doAutoLinks_url_callback($matches) {
1642
		$url = $this->encodeURLAttribute($matches[1], $text);
1643
		$link = "<a href=\"$url\">$text</a>";
1644
		return $this->hashPart($link);
1645
	}
1646
 
1647
	/**
1648
	 * Parse email address callback
1649
	 * @param  array $matches
1650
	 * @return string
1651
	 */
1652
	protected function _doAutoLinks_email_callback($matches) {
1653
		$addr = $matches[1];
1654
		$url = $this->encodeURLAttribute("mailto:$addr", $text);
1655
		$link = "<a href=\"$url\">$text</a>";
1656
		return $this->hashPart($link);
1657
	}
1658
 
1659
	/**
1660
	 * Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1661
	 *
1662
	 * Output: the same text but with most characters encoded as either a
1663
	 *         decimal or hex entity, in the hopes of foiling most address
1664
	 *         harvesting spam bots. E.g.:
1665
	 *
1666
	 *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1667
	 *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1668
	 *        &#x6d;
1669
	 *
1670
	 * Note: the additional output $tail is assigned the same value as the
1671
	 * ouput, minus the number of characters specified by $head_length.
1672
	 *
1673
	 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1674
	 * With some optimizations by Milian Wolff. Forced encoding of HTML
1675
	 * attribute special characters by Allan Odgaard.
1676
	 *
1677
	 * @param  string  $text
1678
	 * @param  string  $tail Passed by reference
1679
	 * @param  integer $head_length
1680
	 * @return string
1681
	 */
1682
	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1683
		if ($text == "") {
1684
			return $tail = "";
1685
		}
1686
 
1687
		$chars = preg_split('/(?<!^)(?!$)/', $text);
1688
		$seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1689
 
1690
		foreach ($chars as $key => $char) {
1691
			$ord = ord($char);
1692
			// Ignore non-ascii chars.
1693
			if ($ord < 128) {
1694
				$r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1695
				// roughly 10% raw, 45% hex, 45% dec
1696
				// '@' *must* be encoded. I insist.
1697
				// '"' and '>' have to be encoded inside the attribute
1698
				if ($r > 90 && strpos('@"&>', $char) === false) {
1699
					/* do nothing */
1700
				} else if ($r < 45) {
1701
					$chars[$key] = '&#x'.dechex($ord).';';
1702
				} else {
1703
					$chars[$key] = '&#'.$ord.';';
1704
				}
1705
			}
1706
		}
1707
 
1708
		$text = implode('', $chars);
1709
		$tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1710
 
1711
		return $text;
1712
	}
1713
 
1714
	/**
1715
	 * Take the string $str and parse it into tokens, hashing embeded HTML,
1716
	 * escaped characters and handling code spans.
1717
	 * @param  string $str
1718
	 * @return string
1719
	 */
1720
	protected function parseSpan($str) {
1721
		$output = '';
1722
 
1723
		$span_re = '{
1724
				(
1725
					\\\\'.$this->escape_chars_re.'
1726
				|
1727
					(?<![`\\\\])
1728
					`+						# code span marker
1729
			'.( $this->no_markup ? '' : '
1730
				|
1731
					<!--    .*?     -->		# comment
1732
				|
1733
					<\?.*?\?> | <%.*?%>		# processing instruction
1734
				|
1735
					<[!$]?[-a-zA-Z0-9:_]+	# regular tags
1736
					(?>
1737
						\s
1738
						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1739
					)?
1740
					>
1741
				|
1742
					<[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1743
				|
1744
					</[-a-zA-Z0-9:_]+\s*> # closing tag
1745
			').'
1746
				)
1747
				}xs';
1748
 
1749
		while (1) {
1750
			// Each loop iteration seach for either the next tag, the next
1751
			// openning code span marker, or the next escaped character.
1752
			// Each token is then passed to handleSpanToken.
1753
			$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1754
 
1755
			// Create token from text preceding tag.
1756
			if ($parts[0] != "") {
1757
				$output .= $parts[0];
1758
			}
1759
 
1760
			// Check if we reach the end.
1761
			if (isset($parts[1])) {
1762
				$output .= $this->handleSpanToken($parts[1], $parts[2]);
1763
				$str = $parts[2];
1764
			} else {
1765
				break;
1766
			}
1767
		}
1768
 
1769
		return $output;
1770
	}
1771
 
1772
	/**
1773
	 * Handle $token provided by parseSpan by determining its nature and
1774
	 * returning the corresponding value that should replace it.
1775
	 * @param  string $token
1776
	 * @param  string $str Passed by reference
1777
	 * @return string
1778
	 */
1779
	protected function handleSpanToken($token, &$str) {
1780
		switch ($token[0]) {
1781
			case "\\":
1782
				return $this->hashPart("&#". ord($token[1]). ";");
1783
			case "`":
1784
				// Search for end marker in remaining text.
1785
				if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1786
					$str, $matches))
1787
				{
1788
					$str = $matches[2];
1789
					$codespan = $this->makeCodeSpan($matches[1]);
1790
					return $this->hashPart($codespan);
1791
				}
1792
				return $token; // Return as text since no ending marker found.
1793
			default:
1794
				return $this->hashPart($token);
1795
		}
1796
	}
1797
 
1798
	/**
1799
	 * Remove one level of line-leading tabs or spaces
1800
	 * @param  string $text
1801
	 * @return string
1802
	 */
1803
	protected function outdent($text) {
1804
		return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1805
	}
1806
 
1807
 
1808
	/**
1809
	 * String length function for detab. `_initDetab` will create a function to
1810
	 * handle UTF-8 if the default function does not exist.
1811
	 * can be a string or function
1812
	 */
1813
	protected $utf8_strlen = 'mb_strlen';
1814
 
1815
	/**
1816
	 * Replace tabs with the appropriate amount of spaces.
1817
	 *
1818
	 * For each line we separate the line in blocks delemited by tab characters.
1819
	 * Then we reconstruct every line by adding the  appropriate number of space
1820
	 * between each blocks.
1821
	 *
1822
	 * @param  string $text
1823
	 * @return string
1824
	 */
1825
	protected function detab($text) {
1826
		$text = preg_replace_callback('/^.*\t.*$/m',
1827
			array($this, '_detab_callback'), $text);
1828
 
1829
		return $text;
1830
	}
1831
 
1832
	/**
1833
	 * Replace tabs callback
1834
	 * @param  string $matches
1835
	 * @return string
1836
	 */
1837
	protected function _detab_callback($matches) {
1838
		$line = $matches[0];
1839
		$strlen = $this->utf8_strlen; // strlen function for UTF-8.
1840
 
1841
		// Split in blocks.
1842
		$blocks = explode("\t", $line);
1843
		// Add each blocks to the line.
1844
		$line = $blocks[0];
1845
		unset($blocks[0]); // Do not add first block twice.
1846
		foreach ($blocks as $block) {
1847
			// Calculate amount of space, insert spaces, insert block.
1848
			$amount = $this->tab_width -
1849
				$strlen($line, 'UTF-8') % $this->tab_width;
1850
			$line .= str_repeat(" ", $amount) . $block;
1851
		}
1852
		return $line;
1853
	}
1854
 
1855
	/**
1856
	 * Check for the availability of the function in the `utf8_strlen` property
1857
	 * (initially `mb_strlen`). If the function is not available, create a
1858
	 * function that will loosely count the number of UTF-8 characters with a
1859
	 * regular expression.
1860
	 * @return void
1861
	 */
1862
	protected function _initDetab() {
1863
 
1864
		if (function_exists($this->utf8_strlen)) {
1865
			return;
1866
		}
1867
 
1868
		$this->utf8_strlen = fn($text) => preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1869
	}
1870
 
1871
	/**
1872
	 * Swap back in all the tags hashed by _HashHTMLBlocks.
1873
	 * @param  string $text
1874
	 * @return string
1875
	 */
1876
	protected function unhash($text) {
1877
		return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1878
			array($this, '_unhash_callback'), $text);
1879
	}
1880
 
1881
	/**
1882
	 * Unhashing callback
1883
	 * @param  array $matches
1884
	 * @return string
1885
	 */
1886
	protected function _unhash_callback($matches) {
1887
		return $this->html_hashes[$matches[0]];
1888
	}
1889
}