Proyectos de Subversion Moodle

Rev

Ir a la última revisión | | Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
 
17
namespace core;
18
 
19
/**
20
 * Content formatting methods for Moodle.
21
 *
22
 * @package   core
23
 * @copyright 2023 Andrew Lyons <andrew@nicols.co.uk>
24
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25
 */
26
class formatting {
27
    /** @var bool Whether to apply forceclean */
28
    protected ?bool $forceclean;
29
 
30
    /** @var bool Whether to apply striptags */
31
    protected ?bool $striptags;
32
 
33
    /** @var bool Whether to apply filters */
34
    protected ?bool $filterall;
35
 
36
    /** @var array A string cache for format_string */
37
    protected $formatstringcache = [];
38
 
39
    /**
40
     * Given a simple string, this function returns the string
41
     * processed by enabled string filters if $CFG->filterall is enabled
42
     *
43
     * This function should be used to print short strings (non html) that
44
     * need filter processing e.g. activity titles, post subjects,
45
     * glossary concepts.
46
     *
47
     * @param null|string $string The string to be filtered. Should be plain text, expect
48
     * possibly for multilang tags.
49
     * @param boolean $striplinks To strip any link in the result text.
50
     * @param null|context $context The context used for formatting
51
     * @param bool $filter Whether to apply filters
52
     * @param bool $escape Whether to escape ampersands
53
     * @return string
54
     */
55
    public function format_string(
56
        ?string $string,
57
        bool $striplinks = true,
58
        ?context $context = null,
59
        bool $filter = true,
60
        bool $escape = true,
61
    ): string {
62
        global $PAGE;
63
 
64
        if ($string === '' || is_null($string)) {
65
            // No need to do any filters and cleaning.
66
            return '';
67
        }
68
 
69
        if (!$this->should_filter_string()) {
70
            return strip_tags($string);
71
        }
72
 
73
        if (count($this->formatstringcache) > 2000) {
74
            // This number might need some tuning to limit memory usage in cron.
75
            $this->formatstringcache = [];
76
        }
77
 
78
        if ($context === null) {
79
            // Fallback to $PAGE->context this may be problematic in CLI and other non-standard pages :-(.
80
            // In future we may want to add debugging here.
81
            $context = $PAGE->context;
82
            if (!$context) {
83
                // We did not find any context? weird.
84
                throw new \coding_exception(
85
                    'Unable to identify context for format_string()',
86
                );
87
            }
88
        }
89
 
90
        // Calculate md5.
91
        $cachekeys = [
92
            $string,
93
            $striplinks,
94
            $context->id,
95
            $escape,
96
            current_language(),
97
            $filter,
98
        ];
99
        $md5 = md5(implode('<+>', $cachekeys));
100
 
101
        // Fetch from cache if possible.
102
        if (array_key_exists($md5, $this->formatstringcache)) {
103
            return $this->formatstringcache[$md5];
104
        }
105
 
106
        // First replace all ampersands not followed by html entity code
107
        // Regular expression moved to its own method for easier unit testing.
108
        if ($escape) {
109
            $string = replace_ampersands_not_followed_by_entity($string);
110
        }
111
 
112
        if (!empty($this->get_filterall()) && $filter) {
113
            $filtermanager = \filter_manager::instance();
114
            $filtermanager->setup_page_for_filters($PAGE, $context); // Setup global stuff filters may have.
115
            $string = $filtermanager->filter_string($string, $context);
116
        }
117
 
118
        // If the site requires it, strip ALL tags from this string.
119
        if (!empty($this->get_striptags())) {
120
            if ($escape) {
121
                $string = str_replace(['<', '>'], ['&lt;', '&gt;'], strip_tags($string));
122
            } else {
123
                $string = strip_tags($string);
124
            }
125
        } else {
126
            // Otherwise strip just links if that is required (default).
127
            if ($striplinks) {
128
                // Strip links in string.
129
                $string = strip_links($string);
130
            }
131
            $string = clean_text($string);
132
        }
133
 
134
        // Store to cache.
135
        $this->formatstringcache[$md5] = $string;
136
 
137
        return $string;
138
    }
139
 
140
    /**
141
     * Given text in a variety of format codings, this function returns the text as safe HTML.
142
     *
143
     * This function should mainly be used for long strings like posts,
144
     * answers, glossary items etc. For short strings {@link format_string()}.
145
     *
146
     * @param null|string $text The text to be formatted. This is raw text originally from user input.
147
     * @param string $format Identifier of the text format to be used
148
     *              [FORMAT_MOODLE, FORMAT_HTML, FORMAT_PLAIN, FORMAT_MARKDOWN]
149
     * @param null|context $context The context used for filtering
150
     * @param bool $trusted If true the string won't be cleaned.
151
     *              Note: FORMAT_MARKDOWN does not support trusted text.
152
     * @param null|bool $clean If true the string will be cleaned.
153
     *              Note: This parameter is overridden if the text is trusted
154
     * @param bool $filter If true the string will be run through applicable filters as well.
155
     * @param bool $para If true then the returned string will be wrapped in div tags.
156
     * @param bool $newlines If true then lines newline breaks will be converted to HTML newline breaks.
157
     * @param bool $overflowdiv If set to true the formatted text will be encased in a div
158
     * @param bool $blanktarget If true all <a> tags will have target="_blank" added unless target is explicitly specified.
159
     * @param bool $allowid If true then id attributes will not be removed, even when using htmlpurifier.
160
     * @return string
161
     */
162
    public function format_text(
163
        ?string $text,
164
        string $format = FORMAT_MOODLE,
165
        ?context $context = null,
166
        bool $trusted = false,
167
        ?bool $clean = null,
168
        bool $filter = true,
169
        bool $para = true,
170
        bool $newlines = true,
171
        bool $overflowdiv = false,
172
        bool $blanktarget = false,
173
        bool $allowid = false,
174
    ): string {
175
        global $CFG, $PAGE;
176
 
177
        if ($text === '' || is_null($text)) {
178
            // No need to do any filters and cleaning.
179
            return '';
180
        }
181
 
182
        if ($format == FORMAT_MARKDOWN) {
183
            // Markdown format cannot be trusted in trusttext areas,
184
            // because we do not know how to sanitise it before editing.
185
            $trusted = false;
186
        }
187
        if ($clean === null) {
188
            if ($trusted && trusttext_active()) {
189
                // No cleaning if text trusted and clean not specified.
190
                $clean = false;
191
            } else {
192
                $clean = true;
193
            }
194
        }
195
        if (!empty($this->get_forceclean())) {
196
            // Whatever the caller claims, the admin wants all content cleaned anyway.
197
            $clean = true;
198
        }
199
 
200
        // Calculate best context.
201
        if (!$this->should_filter_string()) {
202
            // Do not filter anything during installation or before upgrade completes.
203
            $context = null;
204
        } else if ($context === null) {
205
            // Fallback to $PAGE->context this may be problematic in CLI and other non-standard pages.
206
            // In future we may want to add debugging here.
207
            $context = $PAGE->context;
208
        }
209
 
210
        if (!$context) {
211
            // Either install/upgrade or something has gone really wrong because context does not exist (yet?).
212
            $filter = false;
213
        }
214
 
215
        if ($filter) {
216
            $filtermanager = \filter_manager::instance();
217
            $filtermanager->setup_page_for_filters($PAGE, $context); // Setup global stuff filters may have.
218
            $filteroptions = [
219
                'originalformat' => $format,
220
                'noclean' => !$clean,
221
            ];
222
        } else {
223
            $filtermanager = new \null_filter_manager();
224
            $filteroptions = [];
225
        }
226
 
227
        switch ($format) {
228
            case FORMAT_HTML:
229
                $filteroptions['stage'] = 'pre_format';
230
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
231
                // Text is already in HTML format, so just continue to the next filtering stage.
232
                $filteroptions['stage'] = 'pre_clean';
233
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
234
                if ($clean) {
235
                    $text = clean_text($text, FORMAT_HTML, [
236
                        'allowid' => $allowid,
237
                    ]);
238
                }
239
                $filteroptions['stage'] = 'post_clean';
240
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
241
                break;
242
 
243
            case FORMAT_PLAIN:
244
                $text = s($text); // Cleans dangerous JS.
245
                $text = rebuildnolinktag($text);
246
                $text = str_replace('  ', '&nbsp; ', $text);
247
                $text = nl2br($text);
248
                break;
249
 
250
            case FORMAT_MARKDOWN:
251
                $filteroptions['stage'] = 'pre_format';
252
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
253
                $text = markdown_to_html($text);
254
                $filteroptions['stage'] = 'pre_clean';
255
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
256
                if ($clean) {
257
                    $text = clean_text($text, FORMAT_HTML, [
258
                        'allowid' => $allowid,
259
                    ]);
260
                }
261
                $filteroptions['stage'] = 'post_clean';
262
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
263
                break;
264
 
265
            case FORMAT_MOODLE:
266
                $filteroptions['stage'] = 'pre_format';
267
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
268
                $text = text_to_html($text, null, $para, $newlines);
269
                $filteroptions['stage'] = 'pre_clean';
270
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
271
                if ($clean) {
272
                    $text = clean_text($text, FORMAT_HTML, [
273
                        'allowid' => $allowid,
274
                    ]);
275
                }
276
                $filteroptions['stage'] = 'post_clean';
277
                $text = $filtermanager->filter_text($text, $context, $filteroptions);
278
                break;
279
            default:  // FORMAT_MOODLE or anything else.
280
                throw new \coding_exception("Unkown format passed to format_text: {$format}");
281
        }
282
 
283
        if ($filter) {
284
            // At this point there should not be any draftfile links any more,
285
            // this happens when developers forget to post process the text.
286
            // The only potential problem is that somebody might try to format
287
            // the text before storing into database which would be itself big bug.
288
            $text = str_replace("\"$CFG->wwwroot/draftfile.php", "\"$CFG->wwwroot/brokenfile.php#", $text);
289
 
290
            if ($CFG->debugdeveloper) {
291
                if (strpos($text, '@@PLUGINFILE@@/') !== false) {
292
                    debugging(
293
                        'Before calling format_text(), the content must be processed with file_rewrite_pluginfile_urls()',
294
                        DEBUG_DEVELOPER
295
                    );
296
                }
297
            }
298
        }
299
 
300
        if (!empty($overflowdiv)) {
301
            $text = \html_writer::tag('div', $text, ['class' => 'no-overflow']);
302
        }
303
 
304
        if ($blanktarget) {
305
            $domdoc = new \DOMDocument();
306
            libxml_use_internal_errors(true);
307
            $domdoc->loadHTML('<?xml version="1.0" encoding="UTF-8" ?>' . $text);
308
            libxml_clear_errors();
309
            foreach ($domdoc->getElementsByTagName('a') as $link) {
310
                if ($link->hasAttribute('target') && strpos($link->getAttribute('target'), '_blank') === false) {
311
                    continue;
312
                }
313
                $link->setAttribute('target', '_blank');
314
                if (strpos($link->getAttribute('rel'), 'noreferrer') === false) {
315
                    $link->setAttribute('rel', trim($link->getAttribute('rel') . ' noreferrer'));
316
                }
317
            }
318
 
319
            // This regex is nasty and I don't like it. The correct way to solve this is by loading the HTML like so:
320
            // $domdoc->loadHTML($text, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); however it seems like some libxml
321
            // versions don't work properly and end up leaving <html><body>, so I'm forced to use
322
            // this regex to remove those tags as a preventive measure.
323
            $text = trim(preg_replace(
324
                '~<(?:!DOCTYPE|/?(?:html|body))[^>]*>\s*~i',
325
                '',
326
                $domdoc->saveHTML($domdoc->documentElement),
327
            ));
328
        }
329
 
330
        return $text;
331
    }
332
 
333
    /**
334
     * Set the value of the forceclean setting.
335
     *
336
     * @param bool $forceclean
337
     * @return self
338
     */
339
    public function set_forceclean(bool $forceclean): self {
340
        $this->forceclean = $forceclean;
341
 
342
        return $this;
343
    }
344
 
345
    /**
346
     * Get the current forceclean value.
347
     *
348
     * @return bool
349
     */
350
    public function get_forceclean(): bool {
351
        global $CFG;
352
 
353
        if (isset($this->forceclean)) {
354
            return $this->forceclean;
355
        }
356
 
357
        if (isset($CFG->forceclean)) {
358
            return $CFG->forceclean;
359
        }
360
 
361
        return false;
362
    }
363
 
364
    /**
365
     * Set the value of the striptags setting.
366
     *
367
     * @param bool $striptags
368
     * @return formatting
369
     */
370
    public function set_striptags(bool $striptags): self {
371
        $this->striptags = $striptags;
372
 
373
        return $this;
374
    }
375
 
376
    /**
377
     * Get the current striptags value.
378
     *
379
     * Reverts to CFG->formatstringstriptags if not set.
380
     *
381
     * @return bool
382
     */
383
    public function get_striptags(): bool {
384
        global $CFG;
385
 
386
        if (isset($this->striptags)) {
387
            return $this->striptags;
388
        }
389
 
390
        return $CFG->formatstringstriptags;
391
    }
392
 
393
    /**
394
     * Set the value of the filterall setting.
395
     *
396
     * @param bool $filterall
397
     * @return formatting
398
     */
399
    public function set_filterall(bool $filterall): self {
400
        $this->filterall = $filterall;
401
 
402
        return $this;
403
    }
404
 
405
    /**
406
     * Get the current filterall value.
407
     *
408
     * Reverts to CFG->filterall if not set.
409
     *
410
     * @return bool
411
     */
412
    public function get_filterall(): bool {
413
        global $CFG;
414
 
415
        if (isset($this->filterall)) {
416
            return $this->filterall;
417
        }
418
 
419
        return $CFG->filterall;
420
    }
421
 
422
    /**
423
     * During initial install, or upgrade from a really old version of Moodle, we should not filter strings at all.
424
     *
425
     * @return bool
426
     */
427
    protected function should_filter_string(): bool {
428
        global $CFG;
429
 
430
        if (empty($CFG->version) || $CFG->version < 2013051400 || during_initial_install()) {
431
            // Do not filter anything during installation or before upgrade completes.
432
            return false;
433
        }
434
 
435
        return true;
436
    }
437
}