1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
// This file is part of Moodle - http://moodle.org/
|
|
|
4 |
//
|
|
|
5 |
// Moodle is free software: you can redistribute it and/or modify
|
|
|
6 |
// it under the terms of the GNU General Public License as published by
|
|
|
7 |
// the Free Software Foundation, either version 3 of the License, or
|
|
|
8 |
// (at your option) any later version.
|
|
|
9 |
//
|
|
|
10 |
// Moodle is distributed in the hope that it will be useful,
|
|
|
11 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
12 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
13 |
// GNU General Public License for more details.
|
|
|
14 |
//
|
|
|
15 |
// You should have received a copy of the GNU General Public License
|
|
|
16 |
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
17 |
|
|
|
18 |
/**
|
|
|
19 |
* Utility function to convert wiki-like to Markdown format
|
|
|
20 |
*
|
|
|
21 |
* @package core
|
|
|
22 |
* @subpackage lib
|
|
|
23 |
* @copyright Howard Miller, 2005
|
|
|
24 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
25 |
*/
|
|
|
26 |
|
|
|
27 |
defined('MOODLE_INTERNAL') || die();
|
|
|
28 |
|
|
|
29 |
/**#@+
|
|
|
30 |
* state defines
|
|
|
31 |
*/
|
|
|
32 |
define( "STATE_NONE",1 ); // blank line has been detected, so looking for first line on next para
|
|
|
33 |
define( "STATE_PARAGRAPH",2 ); // currently processing vanilla paragraph
|
|
|
34 |
define( "STATE_BLOCKQUOTE",3 ); // currently processing blockquote section
|
|
|
35 |
define( "STATE_PREFORM",4 ); // currently processing preformatted text
|
|
|
36 |
define( "STATE_NOTIKI",5 ); // currently processing preformatted / no formatting
|
|
|
37 |
/**#@-*/
|
|
|
38 |
/**#@+
|
|
|
39 |
* list defines
|
|
|
40 |
*/
|
|
|
41 |
define( "LIST_NONE", 1 ); // no lists active
|
|
|
42 |
define( "LIST_UNORDERED", 2 ); // unordered list active
|
|
|
43 |
define( "LIST_ORDERED", 3 ); // ordered list active
|
|
|
44 |
define( "LIST_DEFINITION", 4 ); // definition list active
|
|
|
45 |
/**#@-*/
|
|
|
46 |
|
|
|
47 |
/**
|
|
|
48 |
* @package moodlecore
|
|
|
49 |
* @copyright Howard Miller, 2005
|
|
|
50 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
51 |
*/
|
|
|
52 |
class WikiToMarkdown {
|
|
|
53 |
|
|
|
54 |
var $block_state;
|
|
|
55 |
var $list_state;
|
|
|
56 |
var $list_depth;
|
|
|
57 |
var $list_backtrack;
|
|
|
58 |
var $output; // output buffer
|
|
|
59 |
var $courseid;
|
|
|
60 |
|
|
|
61 |
function close_block($state ) {
|
|
|
62 |
// provide appropriate closure for block according to state
|
|
|
63 |
|
|
|
64 |
// if in list close this first
|
|
|
65 |
$lclose = "";
|
|
|
66 |
if ($this->list_state != LIST_NONE) {
|
|
|
67 |
$lclose = $this->do_list( " ",true );
|
|
|
68 |
}
|
|
|
69 |
|
|
|
70 |
$sclose = "";
|
|
|
71 |
switch ($state) {
|
|
|
72 |
case STATE_PARAGRAPH:
|
|
|
73 |
$sclose = "\n";
|
|
|
74 |
break;
|
|
|
75 |
case STATE_BLOCKQUOTE:
|
|
|
76 |
$sclose = "\n";
|
|
|
77 |
break;
|
|
|
78 |
case STATE_PREFORM:
|
|
|
79 |
$sclose = "</pre>\n";
|
|
|
80 |
break;
|
|
|
81 |
case STATE_NOTIKI:
|
|
|
82 |
$sclose = "\n";
|
|
|
83 |
break;
|
|
|
84 |
}
|
|
|
85 |
|
|
|
86 |
return $lclose . $sclose;
|
|
|
87 |
}
|
|
|
88 |
|
|
|
89 |
function do_replace($line, $mark, $tag ) {
|
|
|
90 |
// do the regex thingy for things like bold, italic etc
|
|
|
91 |
// $mark is the magic character, and $tag the HTML tag to insert
|
|
|
92 |
|
|
|
93 |
// BODGE: replace inline $mark characters in places where we want them ignored
|
|
|
94 |
// they will be put back after main substitutue, stops problems with eg, and/or
|
|
|
95 |
$bodge = chr(1);
|
|
|
96 |
$line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line );
|
|
|
97 |
|
|
|
98 |
$regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i';
|
|
|
99 |
$replace = '\\1<'.$tag.'>\\2</'.$tag.'>\\3';
|
|
|
100 |
$line = preg_replace( $regex, $replace, $line );
|
|
|
101 |
|
|
|
102 |
// BODGE: back we go
|
|
|
103 |
$line = preg_replace( '/'.$bodge.'/i', $mark, $line );
|
|
|
104 |
|
|
|
105 |
return $line;
|
|
|
106 |
}
|
|
|
107 |
|
|
|
108 |
|
|
|
109 |
function do_replace_markdown($line, $mark, $tag ) {
|
|
|
110 |
// do the regex thingy for things like bold, italic etc
|
|
|
111 |
// $mark is the magic character, and $tag the HTML tag to insert
|
|
|
112 |
// MARKDOWN version does not generate HTML tags, just straigt replace
|
|
|
113 |
|
|
|
114 |
// BODGE: replace inline $mark characters in places where we want them ignored
|
|
|
115 |
// they will be put back after main substitutue, stops problems with eg, and/or
|
|
|
116 |
$bodge = chr(1);
|
|
|
117 |
$line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line );
|
|
|
118 |
|
|
|
119 |
$regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i';
|
|
|
120 |
$replace = '\\1'.$tag.'\\2'.$tag.'\\3';
|
|
|
121 |
$line = preg_replace( $regex, $replace, $line );
|
|
|
122 |
|
|
|
123 |
// BODGE: back we go
|
|
|
124 |
$line = preg_replace( '/'.$bodge.'/i', $mark, $line );
|
|
|
125 |
|
|
|
126 |
return $line;
|
|
|
127 |
}
|
|
|
128 |
|
|
|
129 |
|
|
|
130 |
function do_replace_sub($line, $mark, $tag ) {
|
|
|
131 |
// do regex for subscript and superscript (slightly different)
|
|
|
132 |
// $mark is the magic character and $tag the HTML tag to insert
|
|
|
133 |
|
|
|
134 |
$regex = '/'.$mark.'([^'.$mark.']*)'.$mark.'/i';
|
|
|
135 |
$replace = '<'.$tag.'>\\1</'.$tag.'>';
|
|
|
136 |
|
|
|
137 |
return preg_replace( $regex, $replace, $line );
|
|
|
138 |
}
|
|
|
139 |
|
|
|
140 |
function do_list($line, $blank=false ) {
|
|
|
141 |
// handle line with list character on it
|
|
|
142 |
// if blank line implies drop to level 0
|
|
|
143 |
|
|
|
144 |
// get magic character and then delete it from the line if not blank
|
|
|
145 |
if ($blank) {
|
|
|
146 |
$listchar="";
|
|
|
147 |
$count = 0;
|
|
|
148 |
}
|
|
|
149 |
else {
|
|
|
150 |
$listchar = $line[0];
|
|
|
151 |
$count = strspn( $line, $listchar );
|
|
|
152 |
$line = preg_replace( "/^[".$listchar."]+ /i", "", $line );
|
|
|
153 |
}
|
|
|
154 |
|
|
|
155 |
// find what sort of list this character represents
|
|
|
156 |
$list_tag = "";
|
|
|
157 |
$list_close_tag = "";
|
|
|
158 |
$item_tag = "";
|
|
|
159 |
$item_close_tag = "";
|
|
|
160 |
$list_style = LIST_NONE;
|
|
|
161 |
switch ($listchar) {
|
|
|
162 |
case '*':
|
|
|
163 |
$list_tag = "";
|
|
|
164 |
$list_close_tag = "";
|
|
|
165 |
$item_tag = "*";
|
|
|
166 |
$item_close_tag = "";
|
|
|
167 |
$list_style = LIST_UNORDERED;
|
|
|
168 |
break;
|
|
|
169 |
case '#':
|
|
|
170 |
$list_tag = "";
|
|
|
171 |
$list_close_tag = "";
|
|
|
172 |
$item_tag = "1.";
|
|
|
173 |
$item_close_tag = "";
|
|
|
174 |
$list_style = LIST_ORDERED;
|
|
|
175 |
break;
|
|
|
176 |
case ';':
|
|
|
177 |
$list_tag = "<dl>";
|
|
|
178 |
$list_close_tag = "</dl>";
|
|
|
179 |
$item_tag = "<dd>";
|
|
|
180 |
$item_close_tag = "</dd>";
|
|
|
181 |
$list_style = LIST_DEFINITION;
|
|
|
182 |
break;
|
|
|
183 |
case ':':
|
|
|
184 |
$list_tag = "<dl>";
|
|
|
185 |
$list_close_tag = "</dl>";
|
|
|
186 |
$item_tag = "<dt>";
|
|
|
187 |
$item_close_tag = "</dt>";
|
|
|
188 |
$list_style = LIST_DEFINITION;
|
|
|
189 |
break;
|
|
|
190 |
}
|
|
|
191 |
|
|
|
192 |
// tag opening/closing regime now - fun bit :-)
|
|
|
193 |
$tags = "";
|
|
|
194 |
|
|
|
195 |
// if depth has reduced do number of closes to restore level
|
|
|
196 |
for ($i=$this->list_depth; $i>$count; $i-- ) {
|
|
|
197 |
$close_tag = array_pop( $this->list_backtrack );
|
|
|
198 |
$tags = $tags . $close_tag;
|
|
|
199 |
}
|
|
|
200 |
|
|
|
201 |
// if depth has increased do number of opens to balance
|
|
|
202 |
for ($i=$this->list_depth; $i<$count; $i++ ) {
|
|
|
203 |
array_push( $this->list_backtrack, "$list_close_tag" );
|
|
|
204 |
$tags = $tags . "$list_tag";
|
|
|
205 |
}
|
|
|
206 |
|
|
|
207 |
// ok, so list state is now same as style and depth same as count
|
|
|
208 |
$this->list_state = $list_style;
|
|
|
209 |
$this->list_depth = $count;
|
|
|
210 |
|
|
|
211 |
// get indent
|
|
|
212 |
$indent = substr( " ",1,$count-1 );
|
|
|
213 |
|
|
|
214 |
if ($blank) {
|
|
|
215 |
$newline = $tags;
|
|
|
216 |
}
|
|
|
217 |
else {
|
|
|
218 |
$newline = $tags . $indent . "$item_tag " . $line . "$item_close_tag";
|
|
|
219 |
}
|
|
|
220 |
|
|
|
221 |
return $newline;
|
|
|
222 |
}
|
|
|
223 |
|
|
|
224 |
|
|
|
225 |
function line_replace($line ) {
|
|
|
226 |
// return line after various formatting replacements
|
|
|
227 |
// have been made - order is vital to stop them interfering with each other
|
|
|
228 |
|
|
|
229 |
global $CFG;
|
|
|
230 |
|
|
|
231 |
// ---- (at least) means a <hr />
|
|
|
232 |
// MARKDOWN: no change so leave
|
|
|
233 |
|
|
|
234 |
// is this a list line (starts with * # ; :)
|
|
|
235 |
if (preg_match( "/^([*]+|[#]+|[;]+|[:]+) /i", $line )) {
|
|
|
236 |
$line = $this->do_list( $line );
|
|
|
237 |
}
|
|
|
238 |
|
|
|
239 |
// typographic conventions
|
|
|
240 |
// MARKDOWN: no equiv. so convert to entity as before
|
|
|
241 |
// $line = str_replace( "--", "—", $line );
|
|
|
242 |
// $line = str_replace( " - ", " – ", $line );
|
|
|
243 |
$line = str_replace( "...", " … ", $line );
|
|
|
244 |
$line = str_replace( "(R)", "®", $line );
|
|
|
245 |
$line = str_replace( "(r)", "®", $line );
|
|
|
246 |
$line = str_replace( "(TM)", "™", $line );
|
|
|
247 |
$line = str_replace( "(tm)", "™", $line );
|
|
|
248 |
$line = str_replace( "(C)", "©", $line );
|
|
|
249 |
$line = str_replace( "1/4", "¼", $line );
|
|
|
250 |
$line = str_replace( "1/2", "½", $line );
|
|
|
251 |
$line = str_replace( "3/4", "¾", $line );
|
|
|
252 |
$line = preg_replace( "/([[:digit:]]+[[:space:]]*)x([[:space:]]*[[:digit:]]+)/i", "\\1×\\2", $line ); // (digits) x (digits) - multiply
|
|
|
253 |
// do formatting tags
|
|
|
254 |
// NOTE: The / replacement *has* to be first, or it will screw the
|
|
|
255 |
// HTML tags that are added by the other ones
|
|
|
256 |
// MARKDOWN: only bold and italic change, rest are just HTML
|
|
|
257 |
$line = $this->do_replace_markdown( $line, "\*", "**" );
|
|
|
258 |
$line = $this->do_replace_markdown( $line, "/", "*" );
|
|
|
259 |
$line = $this->do_replace( $line, "\+", "ins" );
|
|
|
260 |
// $line = $this->do_replace( $line, "-", "del" );
|
|
|
261 |
$line = $this->do_replace_sub( $line, "~", "sub" );
|
|
|
262 |
$line = $this->do_replace_sub( $line, "\^", "sup" );
|
|
|
263 |
$line = $this->do_replace( $line, "%", "code" );
|
|
|
264 |
$line = $this->do_replace( $line, "@", "cite" );
|
|
|
265 |
|
|
|
266 |
// convert urls into proper link with optional link text URL(text)
|
|
|
267 |
// MARDOWN: HTML conversion should work fine
|
|
|
268 |
$line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i",
|
|
|
269 |
"\\1[\\5](\\2://\\3\\4)", $line);
|
|
|
270 |
$line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i",
|
|
|
271 |
"\\1[\\5](http://www.\\2\\3)", $line);
|
|
|
272 |
|
|
|
273 |
// make urls (with and without httpd) into proper links
|
|
|
274 |
$line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])/i",
|
|
|
275 |
"\\1<\\2://\\3\\4>", $line);
|
|
|
276 |
$line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])/i",
|
|
|
277 |
"\\1<http://www.\\2\\3\>", $line);
|
|
|
278 |
|
|
|
279 |
// make email addresses into mailtos....
|
|
|
280 |
// MARKDOWN doesn't quite support this, so do as html
|
|
|
281 |
$line = preg_replace("/([[:space:]]|^)([[:alnum:]._-]+@[[:alnum:]._-]+)\(([^)]+)\)/i",
|
|
|
282 |
"\\1<a href=\"mailto:\\2\">\\3</a>", $line);
|
|
|
283 |
|
|
|
284 |
// !# at the beginning of any lines means a heading
|
|
|
285 |
// MARKDOWN: value (1-6) becomes number of hashes
|
|
|
286 |
if (preg_match( "/^!([1-6]) (.*)$/i", $line, $regs )) {
|
|
|
287 |
$depth = substr( $line, 1, 1 );
|
|
|
288 |
$out = substr( '##########', 0, $depth);
|
|
|
289 |
$line = preg_replace( "/^!([1-6]) (.*)$/i", "$out \\2", $line );
|
|
|
290 |
}
|
|
|
291 |
|
|
|
292 |
// acronym handing, example HTML(Hypertext Markyp Language)
|
|
|
293 |
// MARKDOWN: no equiv. so just leave as HTML
|
|
|
294 |
$line = preg_replace( "/([A-Z]+)\(([^)]+)\)/", "<acronym title=\"\\2\">\\1</acronym>", $line );
|
|
|
295 |
|
|
|
296 |
// Replace resource link >>##(Description Text)
|
|
|
297 |
// MARKDOWN: change to MD web link style
|
|
|
298 |
$line = preg_replace("/ ([a-zA-Z]+):([0-9]+)\(([^)]+)\)/i",
|
|
|
299 |
" [\\3](".$CFG->wwwroot."/mod/\\1/view.php?id=\\2) ", $line );
|
|
|
300 |
|
|
|
301 |
$coursefileurl = array(moodle_url::make_legacyfile_url($this->courseid, null));
|
|
|
302 |
|
|
|
303 |
// Replace picture resource link
|
|
|
304 |
$line = preg_replace("#/([a-zA-Z0-9./_-]+)(png|gif|jpg)\(([^)]+)\)#i",
|
|
|
305 |
"", $line );
|
|
|
306 |
|
|
|
307 |
// Replace file resource link
|
|
|
308 |
$line = preg_replace("#file:/([[:alnum:]/._-]+)\(([^)]+)\)#i",
|
|
|
309 |
"[\\2](".$coursefileurl."/\\1)", $line );
|
|
|
310 |
|
|
|
311 |
return $line;
|
|
|
312 |
}
|
|
|
313 |
|
|
|
314 |
function convert($content,$courseid ) {
|
|
|
315 |
|
|
|
316 |
// main entry point for processing Wiki-like text
|
|
|
317 |
// $content is string containing text with Wiki-Like formatting
|
|
|
318 |
// return: string containing Markdown formatting
|
|
|
319 |
|
|
|
320 |
// initialisation stuff
|
|
|
321 |
$this->output = "";
|
|
|
322 |
$this->block_state = STATE_NONE;
|
|
|
323 |
$this->list_state = LIST_NONE;
|
|
|
324 |
$this->list_depth = 0;
|
|
|
325 |
$this->list_backtrack = array();
|
|
|
326 |
$this->courseid = $courseid;
|
|
|
327 |
|
|
|
328 |
// split content into array of single lines
|
|
|
329 |
$lines = explode( "\n",$content );
|
|
|
330 |
$buffer = "";
|
|
|
331 |
|
|
|
332 |
// run through lines
|
|
|
333 |
foreach( $lines as $line ) {
|
|
|
334 |
// is this a blank line?
|
|
|
335 |
$blank_line = preg_match( "/^[[:blank:]\r]*$/i", $line );
|
|
|
336 |
if ($blank_line) {
|
|
|
337 |
// first end current block according to state
|
|
|
338 |
$buffer = $buffer . $this->close_block( $this->block_state );
|
|
|
339 |
$this->block_state = STATE_NONE;
|
|
|
340 |
continue;
|
|
|
341 |
}
|
|
|
342 |
|
|
|
343 |
// act now depending on current block state
|
|
|
344 |
if ($this->block_state == STATE_NONE) {
|
|
|
345 |
// first character of line defines block type
|
|
|
346 |
if (preg_match( "/^> /i",$line )) {
|
|
|
347 |
// blockquote
|
|
|
348 |
$buffer = $buffer . $this->line_replace( $line ). "\n";
|
|
|
349 |
$this->block_state = STATE_BLOCKQUOTE;
|
|
|
350 |
}
|
|
|
351 |
else
|
|
|
352 |
if (preg_match( "/^ /i",$line) ) {
|
|
|
353 |
// preformatted text
|
|
|
354 |
// MARKDOWN: no real equiv. so just use <pre>
|
|
|
355 |
$buffer = $buffer . "<pre>\n";
|
|
|
356 |
$buffer = $buffer . $this->line_replace($line) . "\n";
|
|
|
357 |
$this->block_state = STATE_PREFORM;
|
|
|
358 |
}
|
|
|
359 |
else
|
|
|
360 |
if (preg_match("/^\% /i",$line) ) {
|
|
|
361 |
// preformatted text - no processing
|
|
|
362 |
// MARKDOWN: this is MD code form of a paragraph
|
|
|
363 |
$buffer = $buffer . " " . preg_replace( "/^\%/i","",$line) . "\n";
|
|
|
364 |
$this->block_state = STATE_NOTIKI;
|
|
|
365 |
}
|
|
|
366 |
else {
|
|
|
367 |
// ordinary paragraph
|
|
|
368 |
$buffer = $buffer . $this->line_replace($line) . "\n";
|
|
|
369 |
$this->block_state = STATE_PARAGRAPH;
|
|
|
370 |
}
|
|
|
371 |
continue;
|
|
|
372 |
}
|
|
|
373 |
|
|
|
374 |
if (($this->block_state == STATE_PARAGRAPH) |
|
|
|
375 |
($this->block_state == STATE_BLOCKQUOTE) |
|
|
|
376 |
($this->block_state == STATE_PREFORM) ) {
|
|
|
377 |
$buffer = $buffer . $this->line_replace($line) . "\n";
|
|
|
378 |
continue;
|
|
|
379 |
}
|
|
|
380 |
elseif ($this->block_state == STATE_NOTIKI) {
|
|
|
381 |
$buffer = $buffer . " " .$line . "\n";
|
|
|
382 |
}
|
|
|
383 |
}
|
|
|
384 |
|
|
|
385 |
// close off any block level tags
|
|
|
386 |
$buffer = $buffer . $this->close_block( $this->block_state );
|
|
|
387 |
|
|
|
388 |
//return $buffer;
|
|
|
389 |
return $buffer;
|
|
|
390 |
}
|
|
|
391 |
}
|