1 |
efrain |
1 |
<?php
|
|
|
2 |
// This file is part of Moodle - http://moodle.org/
|
|
|
3 |
//
|
|
|
4 |
// Moodle is free software: you can redistribute it and/or modify
|
|
|
5 |
// it under the terms of the GNU General Public License as published by
|
|
|
6 |
// the Free Software Foundation, either version 3 of the License, or
|
|
|
7 |
// (at your option) any later version.
|
|
|
8 |
//
|
|
|
9 |
// Moodle is distributed in the hope that it will be useful,
|
|
|
10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
12 |
// GNU General Public License for more details.
|
|
|
13 |
//
|
|
|
14 |
// You should have received a copy of the GNU General Public License
|
|
|
15 |
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
16 |
|
|
|
17 |
namespace core;
|
|
|
18 |
|
|
|
19 |
/**
|
|
|
20 |
* Content formatting methods for Moodle.
|
|
|
21 |
*
|
|
|
22 |
* @package core
|
|
|
23 |
* @copyright 2023 Andrew Lyons <andrew@nicols.co.uk>
|
|
|
24 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
25 |
*/
|
|
|
26 |
class formatting {
|
|
|
27 |
/** @var bool Whether to apply forceclean */
|
|
|
28 |
protected ?bool $forceclean;
|
|
|
29 |
|
|
|
30 |
/** @var bool Whether to apply striptags */
|
|
|
31 |
protected ?bool $striptags;
|
|
|
32 |
|
|
|
33 |
/** @var bool Whether to apply filters */
|
|
|
34 |
protected ?bool $filterall;
|
|
|
35 |
|
|
|
36 |
/** @var array A string cache for format_string */
|
|
|
37 |
protected $formatstringcache = [];
|
|
|
38 |
|
|
|
39 |
/**
|
|
|
40 |
* Given a simple string, this function returns the string
|
|
|
41 |
* processed by enabled string filters if $CFG->filterall is enabled
|
|
|
42 |
*
|
|
|
43 |
* This function should be used to print short strings (non html) that
|
|
|
44 |
* need filter processing e.g. activity titles, post subjects,
|
|
|
45 |
* glossary concepts.
|
|
|
46 |
*
|
|
|
47 |
* @param null|string $string The string to be filtered. Should be plain text, expect
|
|
|
48 |
* possibly for multilang tags.
|
|
|
49 |
* @param boolean $striplinks To strip any link in the result text.
|
|
|
50 |
* @param null|context $context The context used for formatting
|
|
|
51 |
* @param bool $filter Whether to apply filters
|
|
|
52 |
* @param bool $escape Whether to escape ampersands
|
|
|
53 |
* @return string
|
|
|
54 |
*/
|
|
|
55 |
public function format_string(
|
|
|
56 |
?string $string,
|
|
|
57 |
bool $striplinks = true,
|
|
|
58 |
?context $context = null,
|
|
|
59 |
bool $filter = true,
|
|
|
60 |
bool $escape = true,
|
|
|
61 |
): string {
|
|
|
62 |
global $PAGE;
|
|
|
63 |
|
|
|
64 |
if ($string === '' || is_null($string)) {
|
|
|
65 |
// No need to do any filters and cleaning.
|
|
|
66 |
return '';
|
|
|
67 |
}
|
|
|
68 |
|
|
|
69 |
if (!$this->should_filter_string()) {
|
|
|
70 |
return strip_tags($string);
|
|
|
71 |
}
|
|
|
72 |
|
|
|
73 |
if (count($this->formatstringcache) > 2000) {
|
|
|
74 |
// This number might need some tuning to limit memory usage in cron.
|
|
|
75 |
$this->formatstringcache = [];
|
|
|
76 |
}
|
|
|
77 |
|
|
|
78 |
if ($context === null) {
|
|
|
79 |
// Fallback to $PAGE->context this may be problematic in CLI and other non-standard pages :-(.
|
|
|
80 |
// In future we may want to add debugging here.
|
|
|
81 |
$context = $PAGE->context;
|
|
|
82 |
if (!$context) {
|
|
|
83 |
// We did not find any context? weird.
|
|
|
84 |
throw new \coding_exception(
|
|
|
85 |
'Unable to identify context for format_string()',
|
|
|
86 |
);
|
|
|
87 |
}
|
|
|
88 |
}
|
|
|
89 |
|
|
|
90 |
// Calculate md5.
|
|
|
91 |
$cachekeys = [
|
|
|
92 |
$string,
|
|
|
93 |
$striplinks,
|
|
|
94 |
$context->id,
|
|
|
95 |
$escape,
|
|
|
96 |
current_language(),
|
|
|
97 |
$filter,
|
|
|
98 |
];
|
|
|
99 |
$md5 = md5(implode('<+>', $cachekeys));
|
|
|
100 |
|
|
|
101 |
// Fetch from cache if possible.
|
|
|
102 |
if (array_key_exists($md5, $this->formatstringcache)) {
|
|
|
103 |
return $this->formatstringcache[$md5];
|
|
|
104 |
}
|
|
|
105 |
|
|
|
106 |
// First replace all ampersands not followed by html entity code
|
|
|
107 |
// Regular expression moved to its own method for easier unit testing.
|
|
|
108 |
if ($escape) {
|
|
|
109 |
$string = replace_ampersands_not_followed_by_entity($string);
|
|
|
110 |
}
|
|
|
111 |
|
|
|
112 |
if (!empty($this->get_filterall()) && $filter) {
|
|
|
113 |
$filtermanager = \filter_manager::instance();
|
|
|
114 |
$filtermanager->setup_page_for_filters($PAGE, $context); // Setup global stuff filters may have.
|
|
|
115 |
$string = $filtermanager->filter_string($string, $context);
|
|
|
116 |
}
|
|
|
117 |
|
|
|
118 |
// If the site requires it, strip ALL tags from this string.
|
|
|
119 |
if (!empty($this->get_striptags())) {
|
|
|
120 |
if ($escape) {
|
|
|
121 |
$string = str_replace(['<', '>'], ['<', '>'], strip_tags($string));
|
|
|
122 |
} else {
|
|
|
123 |
$string = strip_tags($string);
|
|
|
124 |
}
|
|
|
125 |
} else {
|
|
|
126 |
// Otherwise strip just links if that is required (default).
|
|
|
127 |
if ($striplinks) {
|
|
|
128 |
// Strip links in string.
|
|
|
129 |
$string = strip_links($string);
|
|
|
130 |
}
|
|
|
131 |
$string = clean_text($string);
|
|
|
132 |
}
|
|
|
133 |
|
|
|
134 |
// Store to cache.
|
|
|
135 |
$this->formatstringcache[$md5] = $string;
|
|
|
136 |
|
|
|
137 |
return $string;
|
|
|
138 |
}
|
|
|
139 |
|
|
|
140 |
/**
|
|
|
141 |
* Given text in a variety of format codings, this function returns the text as safe HTML.
|
|
|
142 |
*
|
|
|
143 |
* This function should mainly be used for long strings like posts,
|
|
|
144 |
* answers, glossary items etc. For short strings {@link format_string()}.
|
|
|
145 |
*
|
|
|
146 |
* @param null|string $text The text to be formatted. This is raw text originally from user input.
|
|
|
147 |
* @param string $format Identifier of the text format to be used
|
|
|
148 |
* [FORMAT_MOODLE, FORMAT_HTML, FORMAT_PLAIN, FORMAT_MARKDOWN]
|
|
|
149 |
* @param null|context $context The context used for filtering
|
|
|
150 |
* @param bool $trusted If true the string won't be cleaned.
|
|
|
151 |
* Note: FORMAT_MARKDOWN does not support trusted text.
|
|
|
152 |
* @param null|bool $clean If true the string will be cleaned.
|
|
|
153 |
* Note: This parameter is overridden if the text is trusted
|
|
|
154 |
* @param bool $filter If true the string will be run through applicable filters as well.
|
|
|
155 |
* @param bool $para If true then the returned string will be wrapped in div tags.
|
|
|
156 |
* @param bool $newlines If true then lines newline breaks will be converted to HTML newline breaks.
|
|
|
157 |
* @param bool $overflowdiv If set to true the formatted text will be encased in a div
|
|
|
158 |
* @param bool $blanktarget If true all <a> tags will have target="_blank" added unless target is explicitly specified.
|
|
|
159 |
* @param bool $allowid If true then id attributes will not be removed, even when using htmlpurifier.
|
|
|
160 |
* @return string
|
|
|
161 |
*/
|
|
|
162 |
public function format_text(
|
|
|
163 |
?string $text,
|
|
|
164 |
string $format = FORMAT_MOODLE,
|
|
|
165 |
?context $context = null,
|
|
|
166 |
bool $trusted = false,
|
|
|
167 |
?bool $clean = null,
|
|
|
168 |
bool $filter = true,
|
|
|
169 |
bool $para = true,
|
|
|
170 |
bool $newlines = true,
|
|
|
171 |
bool $overflowdiv = false,
|
|
|
172 |
bool $blanktarget = false,
|
|
|
173 |
bool $allowid = false,
|
|
|
174 |
): string {
|
|
|
175 |
global $CFG, $PAGE;
|
|
|
176 |
|
|
|
177 |
if ($text === '' || is_null($text)) {
|
|
|
178 |
// No need to do any filters and cleaning.
|
|
|
179 |
return '';
|
|
|
180 |
}
|
|
|
181 |
|
|
|
182 |
if ($format == FORMAT_MARKDOWN) {
|
|
|
183 |
// Markdown format cannot be trusted in trusttext areas,
|
|
|
184 |
// because we do not know how to sanitise it before editing.
|
|
|
185 |
$trusted = false;
|
|
|
186 |
}
|
|
|
187 |
if ($clean === null) {
|
|
|
188 |
if ($trusted && trusttext_active()) {
|
|
|
189 |
// No cleaning if text trusted and clean not specified.
|
|
|
190 |
$clean = false;
|
|
|
191 |
} else {
|
|
|
192 |
$clean = true;
|
|
|
193 |
}
|
|
|
194 |
}
|
|
|
195 |
if (!empty($this->get_forceclean())) {
|
|
|
196 |
// Whatever the caller claims, the admin wants all content cleaned anyway.
|
|
|
197 |
$clean = true;
|
|
|
198 |
}
|
|
|
199 |
|
|
|
200 |
// Calculate best context.
|
|
|
201 |
if (!$this->should_filter_string()) {
|
|
|
202 |
// Do not filter anything during installation or before upgrade completes.
|
|
|
203 |
$context = null;
|
|
|
204 |
} else if ($context === null) {
|
|
|
205 |
// Fallback to $PAGE->context this may be problematic in CLI and other non-standard pages.
|
|
|
206 |
// In future we may want to add debugging here.
|
|
|
207 |
$context = $PAGE->context;
|
|
|
208 |
}
|
|
|
209 |
|
|
|
210 |
if (!$context) {
|
|
|
211 |
// Either install/upgrade or something has gone really wrong because context does not exist (yet?).
|
|
|
212 |
$filter = false;
|
|
|
213 |
}
|
|
|
214 |
|
|
|
215 |
if ($filter) {
|
|
|
216 |
$filtermanager = \filter_manager::instance();
|
|
|
217 |
$filtermanager->setup_page_for_filters($PAGE, $context); // Setup global stuff filters may have.
|
|
|
218 |
$filteroptions = [
|
|
|
219 |
'originalformat' => $format,
|
|
|
220 |
'noclean' => !$clean,
|
|
|
221 |
];
|
|
|
222 |
} else {
|
|
|
223 |
$filtermanager = new \null_filter_manager();
|
|
|
224 |
$filteroptions = [];
|
|
|
225 |
}
|
|
|
226 |
|
|
|
227 |
switch ($format) {
|
|
|
228 |
case FORMAT_HTML:
|
|
|
229 |
$filteroptions['stage'] = 'pre_format';
|
|
|
230 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
231 |
// Text is already in HTML format, so just continue to the next filtering stage.
|
|
|
232 |
$filteroptions['stage'] = 'pre_clean';
|
|
|
233 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
234 |
if ($clean) {
|
|
|
235 |
$text = clean_text($text, FORMAT_HTML, [
|
|
|
236 |
'allowid' => $allowid,
|
|
|
237 |
]);
|
|
|
238 |
}
|
|
|
239 |
$filteroptions['stage'] = 'post_clean';
|
|
|
240 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
241 |
break;
|
|
|
242 |
|
|
|
243 |
case FORMAT_PLAIN:
|
|
|
244 |
$text = s($text); // Cleans dangerous JS.
|
|
|
245 |
$text = rebuildnolinktag($text);
|
|
|
246 |
$text = str_replace(' ', ' ', $text);
|
|
|
247 |
$text = nl2br($text);
|
|
|
248 |
break;
|
|
|
249 |
|
|
|
250 |
case FORMAT_MARKDOWN:
|
|
|
251 |
$filteroptions['stage'] = 'pre_format';
|
|
|
252 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
253 |
$text = markdown_to_html($text);
|
|
|
254 |
$filteroptions['stage'] = 'pre_clean';
|
|
|
255 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
256 |
if ($clean) {
|
|
|
257 |
$text = clean_text($text, FORMAT_HTML, [
|
|
|
258 |
'allowid' => $allowid,
|
|
|
259 |
]);
|
|
|
260 |
}
|
|
|
261 |
$filteroptions['stage'] = 'post_clean';
|
|
|
262 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
263 |
break;
|
|
|
264 |
|
|
|
265 |
case FORMAT_MOODLE:
|
|
|
266 |
$filteroptions['stage'] = 'pre_format';
|
|
|
267 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
268 |
$text = text_to_html($text, null, $para, $newlines);
|
|
|
269 |
$filteroptions['stage'] = 'pre_clean';
|
|
|
270 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
271 |
if ($clean) {
|
|
|
272 |
$text = clean_text($text, FORMAT_HTML, [
|
|
|
273 |
'allowid' => $allowid,
|
|
|
274 |
]);
|
|
|
275 |
}
|
|
|
276 |
$filteroptions['stage'] = 'post_clean';
|
|
|
277 |
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
|
278 |
break;
|
|
|
279 |
default: // FORMAT_MOODLE or anything else.
|
|
|
280 |
throw new \coding_exception("Unkown format passed to format_text: {$format}");
|
|
|
281 |
}
|
|
|
282 |
|
|
|
283 |
if ($filter) {
|
|
|
284 |
// At this point there should not be any draftfile links any more,
|
|
|
285 |
// this happens when developers forget to post process the text.
|
|
|
286 |
// The only potential problem is that somebody might try to format
|
|
|
287 |
// the text before storing into database which would be itself big bug.
|
|
|
288 |
$text = str_replace("\"$CFG->wwwroot/draftfile.php", "\"$CFG->wwwroot/brokenfile.php#", $text);
|
|
|
289 |
|
|
|
290 |
if ($CFG->debugdeveloper) {
|
|
|
291 |
if (strpos($text, '@@PLUGINFILE@@/') !== false) {
|
|
|
292 |
debugging(
|
|
|
293 |
'Before calling format_text(), the content must be processed with file_rewrite_pluginfile_urls()',
|
|
|
294 |
DEBUG_DEVELOPER
|
|
|
295 |
);
|
|
|
296 |
}
|
|
|
297 |
}
|
|
|
298 |
}
|
|
|
299 |
|
|
|
300 |
if (!empty($overflowdiv)) {
|
|
|
301 |
$text = \html_writer::tag('div', $text, ['class' => 'no-overflow']);
|
|
|
302 |
}
|
|
|
303 |
|
|
|
304 |
if ($blanktarget) {
|
|
|
305 |
$domdoc = new \DOMDocument();
|
|
|
306 |
libxml_use_internal_errors(true);
|
|
|
307 |
$domdoc->loadHTML('<?xml version="1.0" encoding="UTF-8" ?>' . $text);
|
|
|
308 |
libxml_clear_errors();
|
|
|
309 |
foreach ($domdoc->getElementsByTagName('a') as $link) {
|
|
|
310 |
if ($link->hasAttribute('target') && strpos($link->getAttribute('target'), '_blank') === false) {
|
|
|
311 |
continue;
|
|
|
312 |
}
|
|
|
313 |
$link->setAttribute('target', '_blank');
|
|
|
314 |
if (strpos($link->getAttribute('rel'), 'noreferrer') === false) {
|
|
|
315 |
$link->setAttribute('rel', trim($link->getAttribute('rel') . ' noreferrer'));
|
|
|
316 |
}
|
|
|
317 |
}
|
|
|
318 |
|
|
|
319 |
// This regex is nasty and I don't like it. The correct way to solve this is by loading the HTML like so:
|
|
|
320 |
// $domdoc->loadHTML($text, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); however it seems like some libxml
|
|
|
321 |
// versions don't work properly and end up leaving <html><body>, so I'm forced to use
|
|
|
322 |
// this regex to remove those tags as a preventive measure.
|
|
|
323 |
$text = trim(preg_replace(
|
|
|
324 |
'~<(?:!DOCTYPE|/?(?:html|body))[^>]*>\s*~i',
|
|
|
325 |
'',
|
|
|
326 |
$domdoc->saveHTML($domdoc->documentElement),
|
|
|
327 |
));
|
|
|
328 |
}
|
|
|
329 |
|
|
|
330 |
return $text;
|
|
|
331 |
}
|
|
|
332 |
|
|
|
333 |
/**
|
|
|
334 |
* Set the value of the forceclean setting.
|
|
|
335 |
*
|
|
|
336 |
* @param bool $forceclean
|
|
|
337 |
* @return self
|
|
|
338 |
*/
|
|
|
339 |
public function set_forceclean(bool $forceclean): self {
|
|
|
340 |
$this->forceclean = $forceclean;
|
|
|
341 |
|
|
|
342 |
return $this;
|
|
|
343 |
}
|
|
|
344 |
|
|
|
345 |
/**
|
|
|
346 |
* Get the current forceclean value.
|
|
|
347 |
*
|
|
|
348 |
* @return bool
|
|
|
349 |
*/
|
|
|
350 |
public function get_forceclean(): bool {
|
|
|
351 |
global $CFG;
|
|
|
352 |
|
|
|
353 |
if (isset($this->forceclean)) {
|
|
|
354 |
return $this->forceclean;
|
|
|
355 |
}
|
|
|
356 |
|
|
|
357 |
if (isset($CFG->forceclean)) {
|
|
|
358 |
return $CFG->forceclean;
|
|
|
359 |
}
|
|
|
360 |
|
|
|
361 |
return false;
|
|
|
362 |
}
|
|
|
363 |
|
|
|
364 |
/**
|
|
|
365 |
* Set the value of the striptags setting.
|
|
|
366 |
*
|
|
|
367 |
* @param bool $striptags
|
|
|
368 |
* @return formatting
|
|
|
369 |
*/
|
|
|
370 |
public function set_striptags(bool $striptags): self {
|
|
|
371 |
$this->striptags = $striptags;
|
|
|
372 |
|
|
|
373 |
return $this;
|
|
|
374 |
}
|
|
|
375 |
|
|
|
376 |
/**
|
|
|
377 |
* Get the current striptags value.
|
|
|
378 |
*
|
|
|
379 |
* Reverts to CFG->formatstringstriptags if not set.
|
|
|
380 |
*
|
|
|
381 |
* @return bool
|
|
|
382 |
*/
|
|
|
383 |
public function get_striptags(): bool {
|
|
|
384 |
global $CFG;
|
|
|
385 |
|
|
|
386 |
if (isset($this->striptags)) {
|
|
|
387 |
return $this->striptags;
|
|
|
388 |
}
|
|
|
389 |
|
|
|
390 |
return $CFG->formatstringstriptags;
|
|
|
391 |
}
|
|
|
392 |
|
|
|
393 |
/**
|
|
|
394 |
* Set the value of the filterall setting.
|
|
|
395 |
*
|
|
|
396 |
* @param bool $filterall
|
|
|
397 |
* @return formatting
|
|
|
398 |
*/
|
|
|
399 |
public function set_filterall(bool $filterall): self {
|
|
|
400 |
$this->filterall = $filterall;
|
|
|
401 |
|
|
|
402 |
return $this;
|
|
|
403 |
}
|
|
|
404 |
|
|
|
405 |
/**
|
|
|
406 |
* Get the current filterall value.
|
|
|
407 |
*
|
|
|
408 |
* Reverts to CFG->filterall if not set.
|
|
|
409 |
*
|
|
|
410 |
* @return bool
|
|
|
411 |
*/
|
|
|
412 |
public function get_filterall(): bool {
|
|
|
413 |
global $CFG;
|
|
|
414 |
|
|
|
415 |
if (isset($this->filterall)) {
|
|
|
416 |
return $this->filterall;
|
|
|
417 |
}
|
|
|
418 |
|
|
|
419 |
return $CFG->filterall;
|
|
|
420 |
}
|
|
|
421 |
|
|
|
422 |
/**
|
|
|
423 |
* During initial install, or upgrade from a really old version of Moodle, we should not filter strings at all.
|
|
|
424 |
*
|
|
|
425 |
* @return bool
|
|
|
426 |
*/
|
|
|
427 |
protected function should_filter_string(): bool {
|
|
|
428 |
global $CFG;
|
|
|
429 |
|
|
|
430 |
if (empty($CFG->version) || $CFG->version < 2013051400 || during_initial_install()) {
|
|
|
431 |
// Do not filter anything during installation or before upgrade completes.
|
|
|
432 |
return false;
|
|
|
433 |
}
|
|
|
434 |
|
|
|
435 |
return true;
|
|
|
436 |
}
|
|
|
437 |
}
|