Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
 
17
/**
18
 * Document representation.
19
 *
20
 * @package    core_search
21
 * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
22
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23
 */
24
 
25
namespace core_search;
26
 
27
use context;
28
 
29
defined('MOODLE_INTERNAL') || die();
30
 
31
/**
32
 * Represents a document to index.
33
 *
34
 * Note that, if you are writting a search engine and you want to change \core_search\document
35
 * behaviour, you can overwrite this class, will be automatically loaded from \search_YOURENGINE\document.
36
 *
37
 * @package    core_search
38
 * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
39
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
40
 */
41
class document implements \renderable, \templatable {
42
 
43
    /**
44
     * @var array $data The document data.
45
     */
46
    protected $data = array();
47
 
48
    /**
49
     * @var array Extra data needed to render the document.
50
     */
51
    protected $extradata = array();
52
 
53
    /**
54
     * @var \moodle_url Link to the document.
55
     */
56
    protected $docurl = null;
57
 
58
    /**
59
     * @var \moodle_url Link to the document context.
60
     */
61
    protected $contexturl = null;
62
 
63
    /**
64
     * @var \core_search\document_icon Document icon instance.
65
     */
66
    protected $docicon = null;
67
 
68
    /**
69
     * @var int|null The content field filearea.
70
     */
71
    protected $contentfilearea = null;
72
 
73
    /**
74
     * @var int|null The content field itemid.
75
     */
76
    protected $contentitemid = null;
77
 
78
    /**
79
     * @var bool Should be set to true if document hasn't been indexed before. False if unknown.
80
     */
81
    protected $isnew = false;
82
 
83
    /**
84
     * @var \stored_file[] An array of stored files to attach to the document.
85
     */
86
    protected $files = array();
87
 
88
    /**
89
     * Change list (for engine implementers):
90
     * 2017091700 - add optional field groupid
91
     *
92
     * @var int Schema version number (update if any change)
93
     */
94
    const SCHEMA_VERSION = 2017091700;
95
 
96
    /**
97
     * All required fields any doc should contain.
98
     *
99
     * We have to choose a format to specify field types, using solr format as we have to choose one and solr is the
100
     * default search engine.
101
     *
102
     * Search engine plugins are responsible of setting their appropriate field types and map these naming to whatever format
103
     * they need.
104
     *
105
     * @var array
106
     */
107
    protected static $requiredfields = array(
108
        'id' => array(
109
            'type' => 'string',
110
            'stored' => true,
111
            'indexed' => false
112
        ),
113
        'itemid' => array(
114
            'type' => 'int',
115
            'stored' => true,
116
            'indexed' => true
117
        ),
118
        'title' => array(
119
            'type' => 'text',
120
            'stored' => true,
121
            'indexed' => true,
122
            'mainquery' => true
123
        ),
124
        'content' => array(
125
            'type' => 'text',
126
            'stored' => true,
127
            'indexed' => true,
128
            'mainquery' => true
129
        ),
130
        'contextid' => array(
131
            'type' => 'int',
132
            'stored' => true,
133
            'indexed' => true
134
        ),
135
        'areaid' => array(
136
            'type' => 'string',
137
            'stored' => true,
138
            'indexed' => true
139
        ),
140
        'type' => array(
141
            'type' => 'int',
142
            'stored' => true,
143
            'indexed' => true
144
        ),
145
        'courseid' => array(
146
            'type' => 'int',
147
            'stored' => true,
148
            'indexed' => true
149
        ),
150
        'owneruserid' => array(
151
            'type' => 'int',
152
            'stored' => true,
153
            'indexed' => true
154
        ),
155
        'modified' => array(
156
            'type' => 'tdate',
157
            'stored' => true,
158
            'indexed' => true
159
        ),
160
    );
161
 
162
    /**
163
     * All optional fields docs can contain.
164
     *
165
     * Although it matches solr fields format, this is just to define the field types. Search
166
     * engine plugins are responsible of setting their appropriate field types and map these
167
     * naming to whatever format they need.
168
     *
169
     * @var array
170
     */
171
    protected static $optionalfields = array(
172
        'userid' => array(
173
            'type' => 'int',
174
            'stored' => true,
175
            'indexed' => true
176
        ),
177
        'groupid' => array(
178
            'type' => 'int',
179
            'stored' => true,
180
            'indexed' => true
181
        ),
182
        'description1' => array(
183
            'type' => 'text',
184
            'stored' => true,
185
            'indexed' => true,
186
            'mainquery' => true
187
        ),
188
        'description2' => array(
189
            'type' => 'text',
190
            'stored' => true,
191
            'indexed' => true,
192
            'mainquery' => true
193
        )
194
    );
195
 
196
    /**
197
     * Any fields that are engine specifc. These are fields that are solely used by a search engine plugin
198
     * for internal purposes.
199
     *
200
     * Field names should be prefixed with engine name to avoid potential conflict with core fields.
201
     *
202
     * Uses same format as fields above.
203
     *
204
     * @var array
205
     */
206
    protected static $enginefields = array();
207
 
208
    /**
209
     * We ensure that the document has a unique id across search areas.
210
     *
211
     * @param int $itemid An id unique to the search area
212
     * @param string $componentname The search area component Frankenstyle name
213
     * @param string $areaname The area name (the search area class name)
214
     * @return void
215
     */
216
    public function __construct($itemid, $componentname, $areaname) {
217
 
218
        if (!is_numeric($itemid)) {
219
            throw new \coding_exception('The itemid should be an integer');
220
        }
221
 
222
        $this->data['areaid'] = \core_search\manager::generate_areaid($componentname, $areaname);
223
        $this->data['id'] = $this->data['areaid'] . '-' . $itemid;
224
        $this->data['itemid'] = intval($itemid);
225
    }
226
 
227
    /**
228
     * Add a stored file to the document.
229
     *
230
     * @param \stored_file|int $file The file to add, or file id.
231
     * @return void
232
     */
233
    public function add_stored_file($file) {
234
        if (is_numeric($file)) {
235
            $this->files[$file] = $file;
236
        } else {
237
            $this->files[$file->get_id()] = $file;
238
        }
239
    }
240
 
241
    /**
242
     * Returns the array of attached files.
243
     *
244
     * @return \stored_file[]
245
     */
246
    public function get_files() {
247
        // The files array can contain stored file ids, so we need to get instances if asked.
248
        foreach ($this->files as $id => $listfile) {
249
            if (is_numeric($listfile)) {
250
                $fs = get_file_storage();
251
 
252
                if ($file = $fs->get_file_by_id($id)) {
253
                    $this->files[$id] = $file;
254
                } else {
255
                    unset($this->files[$id]); // Index is out of date and referencing a file that does not exist.
256
                }
257
            }
258
        }
259
 
260
        return $this->files;
261
    }
262
 
263
    /**
264
     * Setter.
265
     *
266
     * Basic checkings to prevent common issues.
267
     *
268
     * If the field is a string tags will be stripped, if it is an integer or a date it
269
     * will be casted to a PHP integer. tdate fields values are expected to be timestamps.
270
     *
271
     * @throws \coding_exception
272
     * @param string $fieldname The field name
273
     * @param string|int $value The value to store
274
     * @return string|int The stored value
275
     */
276
    public function set($fieldname, $value) {
277
 
278
        if (!empty(static::$requiredfields[$fieldname])) {
279
            $fielddata = static::$requiredfields[$fieldname];
280
        } else if (!empty(static::$optionalfields[$fieldname])) {
281
            $fielddata = static::$optionalfields[$fieldname];
282
        } else if (!empty(static::$enginefields[$fieldname])) {
283
            $fielddata = static::$enginefields[$fieldname];
284
        }
285
 
286
        if (empty($fielddata)) {
287
            throw new \coding_exception('"' . $fieldname . '" field does not exist.');
288
        }
289
 
290
        // tdate fields should be set as timestamps, later they might be converted to
291
        // a date format, it depends on the search engine.
292
        if (($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') && !is_numeric($value)) {
293
            throw new \coding_exception('"' . $fieldname . '" value should be an integer and its value is "' . $value . '"');
294
        }
295
 
296
        // We want to be strict here, there might be engines that expect us to
297
        // provide them data with the proper type already set.
298
        if ($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') {
299
            $this->data[$fieldname] = intval($value);
300
        } else {
301
            // Remove disallowed Unicode characters.
302
            $value = \core_text::remove_unicode_non_characters($value);
303
 
304
            // Replace all groups of line breaks and spaces by single spaces.
305
            $this->data[$fieldname] = preg_replace("/\s+/u", " ", $value);
306
            if ($this->data[$fieldname] === null) {
307
                if (isset($this->data['id'])) {
308
                    $docid = $this->data['id'];
309
                } else {
310
                    $docid = '(unknown)';
311
                }
312
                throw new \moodle_exception('error_indexing', 'search', '', null, '"' . $fieldname .
313
                        '" value causes preg_replace error (may be caused by unusual characters) ' .
314
                        'in document with id "' . $docid . '"');
315
            }
316
        }
317
 
318
        return $this->data[$fieldname];
319
    }
320
 
321
    /**
322
     * Sets data to this->extradata
323
     *
324
     * This data can be retrieved using \core_search\document->get($fieldname).
325
     *
326
     * @param string $fieldname
327
     * @param string $value
328
     * @return void
329
     */
330
    public function set_extra($fieldname, $value) {
331
        $this->extradata[$fieldname] = $value;
332
    }
333
 
334
    /**
335
     * Getter.
336
     *
337
     * Use self::is_set if you are not sure if this field is set or not
338
     * as otherwise it will trigger a \coding_exception
339
     *
340
     * @throws \coding_exception
341
     * @param string $field
342
     * @return string|int
343
     */
344
    public function get($field) {
345
 
346
        if (isset($this->data[$field])) {
347
            return $this->data[$field];
348
        }
349
 
350
        // Fallback to extra data.
351
        if (isset($this->extradata[$field])) {
352
            return $this->extradata[$field];
353
        }
354
 
355
        throw new \coding_exception('Field "' . $field . '" is not set in the document');
356
    }
357
 
358
    /**
359
     * Checks if a field is set.
360
     *
361
     * @param string $field
362
     * @return bool
363
     */
364
    public function is_set($field) {
365
        return (isset($this->data[$field]) || isset($this->extradata[$field]));
366
    }
367
 
368
    /**
369
     * Set if this is a new document. False if unknown.
370
     *
371
     * @param bool $new
372
     */
373
    public function set_is_new($new) {
374
       $this->isnew = (bool)$new;
375
    }
376
 
377
    /**
378
     * Returns if the document is new. False if unknown.
379
     *
380
     * @return bool
381
     */
382
    public function get_is_new() {
383
       return $this->isnew;
384
    }
385
 
386
    /**
387
     * Returns all default fields definitions.
388
     *
389
     * @return array
390
     */
391
    public static function get_default_fields_definition() {
392
        return static::$requiredfields + static::$optionalfields + static::$enginefields;
393
    }
394
 
395
    /**
396
     * Formats the timestamp preparing the time fields to be inserted into the search engine.
397
     *
398
     * By default it just returns a timestamp so any search engine could just store integers
399
     * and use integers comparison to get documents between x and y timestamps, but search
400
     * engines might be interested in using their own field formats. They can do it extending
401
     * this class in \search_xxx\document.
402
     *
403
     * @param int $timestamp
404
     * @return string
405
     */
406
    public static function format_time_for_engine($timestamp) {
407
        return $timestamp;
408
    }
409
 
410
    /**
411
     * Formats a string value for the search engine.
412
     *
413
     * Search engines may overwrite this method to apply restrictions, like limiting the size.
414
     * The default behaviour is just returning the string.
415
     *
416
     * @param string $string
417
     * @return string
418
     */
419
    public static function format_string_for_engine($string) {
420
        return $string;
421
    }
422
 
423
    /**
424
     * Formats a text value for the search engine.
425
     *
426
     * Search engines may overwrite this method to apply restrictions, like limiting the size.
427
     * The default behaviour is just returning the string.
428
     *
429
     * @param string $text
430
     * @return string
431
     */
432
    public static function format_text_for_engine($text) {
433
        return $text;
434
    }
435
 
436
    /**
437
     * Returns a timestamp from the value stored in the search engine.
438
     *
439
     * By default it just returns a timestamp so any search engine could just store integers
440
     * and use integers comparison to get documents between x and y timestamps, but search
441
     * engines might be interested in using their own field formats. They should do it extending
442
     * this class in \search_xxx\document.
443
     *
444
     * @param string $time
445
     * @return int
446
     */
447
    public static function import_time_from_engine($time) {
448
        return $time;
449
    }
450
 
451
    /**
452
     * Returns how text is returned from the search engine.
453
     *
454
     * @return int
455
     */
456
    protected function get_text_format() {
457
        return FORMAT_PLAIN;
458
    }
459
 
460
    /**
461
     * Fills the document with data coming from the search engine.
462
     *
463
     * @throws \core_search\engine_exception
464
     * @param array $docdata
465
     * @return void
466
     */
467
    public function set_data_from_engine($docdata) {
468
        $fields = static::$requiredfields + static::$optionalfields + static::$enginefields;
469
        foreach ($fields as $fieldname => $field) {
470
 
471
            // Optional params might not be there.
472
            if (isset($docdata[$fieldname])) {
473
                if ($field['type'] === 'tdate') {
474
                    // Time fields may need a preprocessing.
475
                    $this->set($fieldname, static::import_time_from_engine($docdata[$fieldname]));
476
                } else {
477
                    // No way we can make this work if there is any multivalue field.
478
                    if (is_array($docdata[$fieldname])) {
479
                        throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $fieldname);
480
                    }
481
                    $this->set($fieldname, $docdata[$fieldname]);
482
                }
483
            }
484
        }
485
    }
486
 
487
    /**
488
     * Sets the document url.
489
     *
490
     * @param \moodle_url $url
491
     * @return void
492
     */
493
    public function set_doc_url(\moodle_url $url) {
494
        $this->docurl = $url;
495
    }
496
 
497
    /**
498
     * Gets the url to the doc.
499
     *
500
     * @return \moodle_url
501
     */
502
    public function get_doc_url() {
503
        return $this->docurl;
504
    }
505
 
506
    /**
507
     * Sets document icon instance.
508
     *
509
     * @param \core_search\document_icon $docicon
510
     */
511
    public function set_doc_icon(document_icon $docicon) {
512
        $this->docicon = $docicon;
513
    }
514
 
515
    /**
516
     * Gets document icon instance.
517
     *
518
     * @return \core_search\document_icon
519
     */
520
    public function get_doc_icon() {
521
        return $this->docicon;
522
    }
523
 
524
    public function set_context_url(\moodle_url $url) {
525
        $this->contexturl = $url;
526
    }
527
 
528
    /**
529
     * Gets the url to the context.
530
     *
531
     * @return \moodle_url
532
     */
533
    public function get_context_url() {
534
        return $this->contexturl;
535
    }
536
 
537
    /**
538
     * Returns the document ready to submit to the search engine.
539
     *
540
     * @throws \coding_exception
541
     * @return array
542
     */
543
    public function export_for_engine() {
544
        // Set any unset defaults.
545
        $this->apply_defaults();
546
 
547
        // We don't want to affect the document instance.
548
        $data = $this->data;
549
 
550
        // Apply specific engine-dependant formats and restrictions.
551
        foreach (static::$requiredfields as $fieldname => $field) {
552
 
553
            // We also check that we have everything we need.
554
            if (!isset($data[$fieldname])) {
555
                throw new \coding_exception('Missing "' . $fieldname . '" field in document with id "' . $this->data['id'] . '"');
556
            }
557
 
558
            if ($field['type'] === 'tdate') {
559
                // Overwrite the timestamp with the engine dependant format.
560
                $data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
561
            } else if ($field['type'] === 'string') {
562
                // Overwrite the string with the engine dependant format.
563
                $data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
564
            } else if ($field['type'] === 'text') {
565
                // Overwrite the text with the engine dependant format.
566
                $data[$fieldname] = static::format_text_for_engine($data[$fieldname]);
567
            }
568
 
569
        }
570
 
571
        $fields = static::$optionalfields + static::$enginefields;
572
        foreach ($fields as $fieldname => $field) {
573
            if (!isset($data[$fieldname])) {
574
                continue;
575
            }
576
            if ($field['type'] === 'tdate') {
577
                // Overwrite the timestamp with the engine dependant format.
578
                $data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
579
            } else if ($field['type'] === 'string') {
580
                // Overwrite the string with the engine dependant format.
581
                $data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
582
            } else if ($field['type'] === 'text') {
583
                // Overwrite the text with the engine dependant format.
584
                $data[$fieldname] = static::format_text_for_engine($data[$fieldname]);
585
            }
586
        }
587
 
588
        return $data;
589
    }
590
 
591
    /**
592
     * Apply any defaults to unset fields before export. Called after document building, but before export.
593
     *
594
     * Sub-classes of this should make sure to call parent::apply_defaults().
595
     */
596
    protected function apply_defaults() {
597
        // Set the default type, TYPE_TEXT.
598
        if (!isset($this->data['type'])) {
599
            $this->data['type'] = manager::TYPE_TEXT;
600
        }
601
    }
602
 
603
    /**
604
     * Export the document data to be used as a template context.
605
     *
606
     * Just delegates all the processing to export_doc_info, also used by external functions.
607
     * Adding more info than the required one as people might be interested in extending the template.
608
     *
609
     * @param \renderer_base $output The renderer.
610
     * @return array
611
     */
612
    public function export_for_template(\renderer_base $output): array {
613
        $docdata = $this->export_doc($output);
614
        return $docdata;
615
    }
616
 
617
    /**
618
     * Returns the current docuement information.
619
     *
620
     * Adding more info than the required one as themers and ws clients might be interested in showing more stuff.
621
     *
622
     * Although content is a required field when setting up the document, it accepts '' (empty) values
623
     * as they may be the result of striping out HTML.
624
     *
625
     * SECURITY NOTE: It is the responsibility of the document to properly escape any text to be displayed.
626
     * The renderer will output the content without any further cleaning.
627
     *
628
     * @param \renderer_base $output The renderer.
629
     * @return array
630
     */
631
    public function export_doc(\renderer_base $output): array {
632
        global $USER, $CFG;
633
        require_once($CFG->dirroot . '/course/lib.php');
634
 
635
        list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($this->get('areaid'));
636
        $context = context::instance_by_id($this->get('contextid'));
637
 
638
        $searcharea = \core_search\manager::get_search_area($this->data['areaid']);
639
        $title = $this->is_set('title') ? $this->format_text($searcharea->get_document_display_title($this)) : '';
640
        $data = [
641
            'itemid' => $this->get('itemid'),
642
            'componentname' => $componentname,
643
            'areaname' => $areaname,
644
            'courseurl' => (course_get_url($this->get('courseid')))->out(false),
645
            'coursefullname' => format_string($this->get('coursefullname'), true, ['context' => $context->id]),
646
            'modified' => userdate($this->get('modified')),
647
            'timemodified' => $this->get('modified'),
648
            'title' => ($title !== '') ? $title : get_string('notitle', 'search'),
649
            'docurl' => ($this->get_doc_url())->out(false),
650
            'content' => $this->is_set('content') ? $this->format_text($this->get('content')) : null,
651
            'contextid' => $this->get('contextid'),
652
            'contexturl' => ($this->get_context_url())->out(false),
653
            'description1' => $this->is_set('description1') ? $this->format_text($this->get('description1')) : null,
654
            'description2' => $this->is_set('description2') ? $this->format_text($this->get('description2')) : null,
655
        ];
656
 
657
        // Now take any attached any files.
658
        $files = $this->get_files();
659
        if (!empty($files)) {
660
            if (count($files) > 1) {
661
                $filenames = [];
662
                foreach ($files as $file) {
663
                    $filenames[] = format_string($file->get_filename(), true, ['context' => $context->id]);
664
                }
665
                $data['multiplefiles'] = true;
666
                $data['filenames'] = $filenames;
667
            } else {
668
                $file = reset($files);
669
                $data['filename'] = format_string($file->get_filename(), true, ['context' => $context->id]);
670
            }
671
        }
672
 
673
        if ($this->is_set('userid')) {
674
            if ($this->get('userid') == $USER->id ||
675
                    (has_capability('moodle/user:viewdetails', $context) &&
676
                    has_capability('moodle/course:viewparticipants', $context))) {
677
                $data['userurl'] = (new \moodle_url(
678
                    '/user/view.php',
679
                    ['id' => $this->get('userid'), 'course' => $this->get('courseid')]
680
                ))->out(false);
681
                $data['userfullname'] = format_string($this->get('userfullname'), true, ['context' => $context->id]);
682
                $data['userid'] = $this->get('userid');
683
            }
684
        }
685
 
686
        if ($docicon = $this->get_doc_icon()) {
687
            $data['icon'] = $output->image_url($docicon->get_name(), $docicon->get_component());
688
            $data['iconurl'] = $data['icon']->out(false);
689
        }
690
        $data['textformat'] = $this->get_text_format();
691
 
692
        return $data;
693
    }
694
 
695
    /**
696
     * Formats a text string coming from the search engine.
697
     *
698
     * By default just return the text as it is:
699
     * - Search areas are responsible of sending just plain data, the search engine may
700
     *   append HTML or markdown to it (highlighing for example).
701
     * - The view is responsible of shortening the text if it is too big
702
     *
703
     * @param  string $text Text to format
704
     * @return string HTML text to be renderer
705
     */
706
    protected function format_text($text) {
707
        return format_text($text, $this->get_text_format(), array('context' => $this->get('contextid')));
708
    }
709
}