Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
 
17
/**
18
 * Solr engine.
19
 *
20
 * @package    search_solr
21
 * @copyright  2015 Daniel Neis Araujo
22
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23
 */
24
 
25
namespace search_solr;
26
 
27
defined('MOODLE_INTERNAL') || die();
28
 
29
/**
30
 * Solr engine.
31
 *
32
 * @package    search_solr
33
 * @copyright  2015 Daniel Neis Araujo
34
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35
 */
36
class engine extends \core_search\engine {
37
 
38
    /**
39
     * @var string The date format used by solr.
40
     */
41
    const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';
42
 
43
    /**
44
     * @var int Commit documents interval (number of miliseconds).
45
     */
46
    const AUTOCOMMIT_WITHIN = 15000;
47
 
48
    /**
49
     * The maximum number of results to fetch at a time.
50
     */
51
    const QUERY_SIZE = 120;
52
 
53
    /**
54
     * Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending.
55
     */
56
    const FRAG_SIZE = 510;
57
 
58
    /**
59
     * Marker for the start of a highlight.
60
     */
61
    const HIGHLIGHT_START = '@@HI_S@@';
62
 
63
    /**
64
     * Marker for the end of a highlight.
65
     */
66
    const HIGHLIGHT_END = '@@HI_E@@';
67
 
68
    /** @var float Boost value for matching course in location-ordered searches */
69
    const COURSE_BOOST = 1;
70
 
71
    /** @var float Boost value for matching context (in addition to course boost) */
72
    const CONTEXT_BOOST = 0.5;
73
 
74
    /**
75
     * @var \SolrClient
76
     */
77
    protected $client = null;
78
 
79
    /**
80
     * @var bool True if we should reuse SolrClients, false if not.
81
     */
82
    protected $cacheclient = true;
83
 
84
    /**
85
     * @var \curl Direct curl object.
86
     */
87
    protected $curl = null;
88
 
89
    /**
90
     * @var array Fields that can be highlighted.
91
     */
92
    protected $highlightfields = array('title', 'content', 'description1', 'description2');
93
 
94
    /**
95
     * @var int Number of total docs reported by Sorl for the last query.
96
     */
97
    protected $totalenginedocs = 0;
98
 
99
    /**
100
     * @var int Number of docs we have processed for the last query.
101
     */
102
    protected $processeddocs = 0;
103
 
104
    /**
105
     * @var int Number of docs that have been skipped while processing the last query.
106
     */
107
    protected $skippeddocs = 0;
108
 
109
    /**
110
     * Solr server major version.
111
     *
112
     * @var int
113
     */
114
    protected $solrmajorversion = null;
115
 
116
    /**
117
     * Initialises the search engine configuration.
118
     *
119
     * @param bool $alternateconfiguration If true, use alternate configuration settings
120
     * @return void
121
     */
122
    public function __construct(bool $alternateconfiguration = false) {
123
        parent::__construct($alternateconfiguration);
124
 
125
        $curlversion = curl_version();
126
        if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) {
127
            // There is a flaw with curl 7.35.0 that causes problems with client reuse.
128
            $this->cacheclient = false;
129
        }
130
    }
131
 
132
    /**
133
     * Prepares a Solr query, applies filters and executes it returning its results.
134
     *
135
     * @throws \core_search\engine_exception
136
     * @param  \stdClass $filters Containing query and filters.
137
     * @param  \stdClass $accessinfo Information about areas user can access.
138
     * @param  int       $limit The maximum number of results to return.
139
     * @return \core_search\document[] Results or false if no results
140
     */
141
    public function execute_query($filters, $accessinfo, $limit = 0) {
142
        global $USER;
143
 
144
        if (empty($limit)) {
145
            $limit = \core_search\manager::MAX_RESULTS;
146
        }
147
 
148
        // If there is any problem we trigger the exception as soon as possible.
149
        $client = $this->get_search_client();
150
 
151
        // Create the query object.
152
        $query = $this->create_user_query($filters, $accessinfo);
153
 
154
        // If the query cannot have results, return none.
155
        if (!$query) {
156
            return [];
157
        }
158
 
159
        // We expect good match rates, so for our first get, we will get a small number of records.
160
        // This significantly speeds solr response time for first few pages.
161
        $query->setRows(min($limit * 3, static::QUERY_SIZE));
162
        $response = $this->get_query_response($query);
163
 
164
        // Get count data out of the response, and reset our counters.
165
        list($included, $found) = $this->get_response_counts($response);
166
        $this->totalenginedocs = $found;
167
        $this->processeddocs = 0;
168
        $this->skippeddocs = 0;
169
        if ($included == 0 || $this->totalenginedocs == 0) {
170
            // No results.
171
            return array();
172
        }
173
 
174
        // Get valid documents out of the response.
175
        $results = $this->process_response($response, $limit);
176
 
177
        // We have processed all the docs in the response at this point.
178
        $this->processeddocs += $included;
179
 
180
        // If we haven't reached the limit, and there are more docs left in Solr, lets keep trying.
181
        while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) {
182
            // Offset the start of the query, and since we are making another call, get more per call.
183
            $query->setStart($this->processeddocs);
184
            $query->setRows(static::QUERY_SIZE);
185
 
186
            $response = $this->get_query_response($query);
187
            list($included, $found) = $this->get_response_counts($response);
188
            if ($included == 0 || $found == 0) {
189
                // No new results were found. Found being empty would be weird, so we will just return.
190
                return $results;
191
            }
192
            $this->totalenginedocs = $found;
193
 
194
            // Get the new response docs, limiting to remaining we need, then add it to the end of the results array.
195
            $newdocs = $this->process_response($response, $limit - count($results));
196
            $results = array_merge($results, $newdocs);
197
 
198
            // Add to our processed docs count.
199
            $this->processeddocs += $included;
200
        }
201
 
202
        return $results;
203
    }
204
 
205
    /**
206
     * Takes a query and returns the response in SolrObject format.
207
     *
208
     * @param  SolrQuery  $query Solr query object.
209
     * @return SolrObject|false Response document or false on error.
210
     */
211
    protected function get_query_response($query) {
212
        try {
213
            return $this->get_search_client()->query($query)->getResponse();
214
        } catch (\SolrClientException $ex) {
215
            debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
216
            $this->queryerror = $ex->getMessage();
217
            return false;
218
        } catch (\SolrServerException $ex) {
219
            debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
220
            $this->queryerror = $ex->getMessage();
221
            return false;
222
        }
223
    }
224
 
225
    /**
226
     * Returns the total number of documents available for the most recently call to execute_query.
227
     *
228
     * @return int
229
     */
230
    public function get_query_total_count() {
231
        // Return the total engine count minus the docs we have determined are bad.
232
        return $this->totalenginedocs - $this->skippeddocs;
233
    }
234
 
235
    /**
236
     * Returns count information for a provided response. Will return 0, 0 for invalid or empty responses.
237
     *
238
     * @param SolrDocument $response The response document from Solr.
239
     * @return array A two part array. First how many response docs are in the response.
240
     *               Second, how many results are vailable in the engine.
241
     */
242
    protected function get_response_counts($response) {
243
        $found = 0;
244
        $included = 0;
245
 
246
        if (isset($response->grouped->solr_filegroupingid->ngroups)) {
247
            // Get the number of results for file grouped queries.
248
            $found = $response->grouped->solr_filegroupingid->ngroups;
249
            $included = count($response->grouped->solr_filegroupingid->groups);
250
        } else if (isset($response->response->numFound)) {
251
            // Get the number of results for standard queries.
252
            $found = $response->response->numFound;
253
            if ($found > 0 && is_array($response->response->docs)) {
254
                $included = count($response->response->docs);
255
            }
256
        }
257
 
258
        return array($included, $found);
259
    }
260
 
261
    /**
262
     * Prepares a new query object with needed limits, filters, etc.
263
     *
264
     * @param \stdClass $filters Containing query and filters.
265
     * @param \stdClass $accessinfo Information about contexts the user can access
266
     * @return \SolrDisMaxQuery|null Query object or null if they can't get any results
267
     */
268
    protected function create_user_query($filters, $accessinfo) {
269
        global $USER;
270
 
271
        // Let's keep these changes internal.
272
        $data = clone $filters;
273
 
274
        $query = new \SolrDisMaxQuery();
275
 
276
        $this->set_query($query, self::replace_underlines($data->q));
277
        $this->add_fields($query);
278
 
279
        // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
280
        // we are really interested in caching contexts filters instead.
281
        if (!empty($data->title)) {
282
            $query->addFilterQuery('{!field cache=false f=title}' . $data->title);
283
        }
284
        if (!empty($data->areaids)) {
285
            // If areaids are specified, we want to get any that match.
286
            $query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')');
287
        }
288
        if (!empty($data->courseids)) {
289
            $query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')');
290
        }
291
        if (!empty($data->groupids)) {
292
            $query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')');
293
        }
294
        if (!empty($data->userids)) {
295
            $query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')');
296
        }
297
 
298
        if (!empty($data->timestart) or !empty($data->timeend)) {
299
            if (empty($data->timestart)) {
300
                $data->timestart = '*';
301
            } else {
302
                $data->timestart = \search_solr\document::format_time_for_engine($data->timestart);
303
            }
304
            if (empty($data->timeend)) {
305
                $data->timeend = '*';
306
            } else {
307
                $data->timeend = \search_solr\document::format_time_for_engine($data->timeend);
308
            }
309
 
310
            // No cache.
311
            $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');
312
        }
313
 
314
        // Restrict to users who are supposed to be able to see a particular result.
315
        $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')');
316
 
317
        // And finally restrict it to the context where the user can access, we want this one cached.
318
        // If the user can access all contexts $usercontexts value is just true, we don't need to filter
319
        // in that case.
320
        if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) {
321
            // Join all area contexts into a single array and implode.
322
            $allcontexts = array();
323
            foreach ($accessinfo->usercontexts as $areaid => $areacontexts) {
324
                if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) {
325
                    // Skip unused areas.
326
                    continue;
327
                }
328
                foreach ($areacontexts as $contextid) {
329
                    // Ensure they are unique.
330
                    $allcontexts[$contextid] = $contextid;
331
                }
332
            }
333
            if (empty($allcontexts)) {
334
                // This means there are no valid contexts for them, so they get no results.
335
                return null;
336
            }
337
            $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');
338
        }
339
 
340
        if (!$accessinfo->everything && $accessinfo->separategroupscontexts) {
341
            // Add another restriction to handle group ids. If there are any contexts using separate
342
            // groups, then results in that context will not show unless you belong to the group.
343
            // (Note: Access all groups is taken care of earlier, when computing these arrays.)
344
 
345
            // This special exceptions list allows for particularly pig-headed developers to create
346
            // multiple search areas within the same module, where one of them uses separate
347
            // groups and the other uses visible groups. It is a little inefficient, but this should
348
            // be rare.
349
            $exceptions = '';
350
            if ($accessinfo->visiblegroupscontextsareas) {
351
                foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) {
352
                    $exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' .
353
                            implode(' OR ', $areaids) . '))';
354
                }
355
            }
356
 
357
            if ($accessinfo->usergroups) {
358
                // Either the document has no groupid, or the groupid is one that the user
359
                // belongs to, or the context is not one of the separate groups contexts.
360
                $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
361
                        'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' .
362
                        '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
363
                        $exceptions);
364
            } else {
365
                // Either the document has no groupid, or the context is not a restricted one.
366
                $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
367
                        '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
368
                        $exceptions);
369
            }
370
        }
371
 
372
        if ($this->file_indexing_enabled()) {
373
            // Now group records by solr_filegroupingid. Limit to 3 results per group.
374
            $query->setGroup(true);
375
            $query->setGroupLimit(3);
376
            $query->setGroupNGroups(true);
377
            $query->addGroupField('solr_filegroupingid');
378
        } else {
379
            // Make sure we only get text files, in case the index has pre-existing files.
380
            $query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT);
381
        }
382
 
383
        // If ordering by location, add in boost for the relevant course or context ids.
384
        if (!empty($filters->order) && $filters->order === 'location') {
385
            $coursecontext = $filters->context->get_course_context();
386
            $query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST);
387
            if ($filters->context->contextlevel !== CONTEXT_COURSE) {
388
                // If it's a block or activity, also add a boost for the specific context id.
389
                $query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST);
390
            }
391
        }
392
 
393
        return $query;
394
    }
395
 
396
    /**
397
     * Prepares a new query by setting the query, start offset and rows to return.
398
     *
399
     * @param SolrQuery $query
400
     * @param object    $q Containing query and filters.
401
     */
402
    protected function set_query($query, $q) {
403
        // Set hightlighting.
404
        $query->setHighlight(true);
405
        foreach ($this->highlightfields as $field) {
406
            $query->addHighlightField($field);
407
        }
408
        $query->setHighlightFragsize(static::FRAG_SIZE);
409
        $query->setHighlightSimplePre(self::HIGHLIGHT_START);
410
        $query->setHighlightSimplePost(self::HIGHLIGHT_END);
411
        $query->setHighlightMergeContiguous(true);
412
 
413
        $query->setQuery($q);
414
 
415
        // A reasonable max.
416
        $query->setRows(static::QUERY_SIZE);
417
    }
418
 
419
    /**
420
     * Sets fields to be returned in the result.
421
     *
422
     * @param SolrDisMaxQuery|SolrQuery $query object.
423
     */
424
    public function add_fields($query) {
425
        $documentclass = $this->get_document_classname();
426
        $fields = $documentclass::get_default_fields_definition();
427
 
428
        $dismax = false;
429
        if ($query instanceof \SolrDisMaxQuery) {
430
            $dismax = true;
431
        }
432
 
433
        foreach ($fields as $key => $field) {
434
            $query->addField($key);
435
            if ($dismax && !empty($field['mainquery'])) {
436
                // Add fields the main query should be run against.
437
                // Due to a regression in the PECL solr extension, https://bugs.php.net/bug.php?id=72740,
438
                // a boost value is required, even if it is optional; to avoid boosting one among other fields,
439
                // the explicit boost value will be the default one, for every field.
440
                $query->addQueryField($key, 1);
441
            }
442
        }
443
    }
444
 
445
    /**
446
     * Finds the key common to both highlighing and docs array returned from response.
447
     * @param object $response containing results.
448
     */
449
    public function add_highlight_content($response) {
450
        if (!isset($response->highlighting)) {
451
            // There is no highlighting to add.
452
            return;
453
        }
454
 
455
        $highlightedobject = $response->highlighting;
456
        foreach ($response->response->docs as $doc) {
457
            $x = $doc->id;
458
            $highlighteddoc = $highlightedobject->$x;
459
            $this->merge_highlight_field_values($doc, $highlighteddoc);
460
        }
461
    }
462
 
463
    /**
464
     * Adds the highlighting array values to docs array values.
465
     *
466
     * @throws \core_search\engine_exception
467
     * @param object $doc containing the results.
468
     * @param object $highlighteddoc containing the highlighted results values.
469
     */
470
    public function merge_highlight_field_values($doc, $highlighteddoc) {
471
 
472
        foreach ($this->highlightfields as $field) {
473
            if (!empty($doc->$field)) {
474
 
475
                // Check that the returned value is not an array. No way we can make this work with multivalued solr fields.
476
                if (is_array($doc->{$field})) {
477
                    throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);
478
                }
479
 
480
                if (!empty($highlighteddoc->$field)) {
481
                    // Replace by the highlighted result.
482
                    $doc->$field = reset($highlighteddoc->$field);
483
                }
484
            }
485
        }
486
    }
487
 
488
    /**
489
     * Filters the response on Moodle side.
490
     *
491
     * @param SolrObject $response Solr object containing the response return from solr server.
492
     * @param int        $limit The maximum number of results to return. 0 for all.
493
     * @param bool       $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access.
494
     * @return array $results containing final results to be displayed.
495
     */
496
    protected function process_response($response, $limit = 0, $skipaccesscheck = false) {
497
        global $USER;
498
 
499
        if (empty($response)) {
500
            return array();
501
        }
502
 
503
        if (isset($response->grouped)) {
504
            return $this->grouped_files_process_response($response, $limit);
505
        }
506
 
507
        $userid = $USER->id;
508
        $noownerid = \core_search\manager::NO_OWNER_ID;
509
 
510
        $numgranted = 0;
511
 
512
        if (!$docs = $response->response->docs) {
513
            return array();
514
        }
515
 
516
        $out = array();
517
        if (!empty($response->response->numFound)) {
518
            $this->add_highlight_content($response);
519
 
520
            // Iterate through the results checking its availability and whether they are available for the user or not.
521
            foreach ($docs as $key => $docdata) {
522
                if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) {
523
                    // If owneruserid is set, no other user should be able to access this record.
524
                    continue;
525
                }
526
 
527
                if (!$searcharea = $this->get_search_area($docdata->areaid)) {
528
                    continue;
529
                }
530
 
531
                $docdata = $this->standarize_solr_obj($docdata);
532
 
533
                if ($skipaccesscheck) {
534
                    $access = \core_search\manager::ACCESS_GRANTED;
535
                } else {
536
                    $access = $searcharea->check_access($docdata['itemid']);
537
                }
538
                switch ($access) {
539
                    case \core_search\manager::ACCESS_DELETED:
540
                        $this->delete_by_id($docdata['id']);
541
                        // Remove one from our processed and total counters, since we promptly deleted.
542
                        $this->processeddocs--;
543
                        $this->totalenginedocs--;
544
                        break;
545
                    case \core_search\manager::ACCESS_DENIED:
546
                        $this->skippeddocs++;
547
                        break;
548
                    case \core_search\manager::ACCESS_GRANTED:
549
                        $numgranted++;
550
 
551
                        // Add the doc.
552
                        $out[] = $this->to_document($searcharea, $docdata);
553
                        break;
554
                }
555
 
556
                // Stop when we hit our limit.
557
                if (!empty($limit) && count($out) >= $limit) {
558
                    break;
559
                }
560
            }
561
        }
562
 
563
        return $out;
564
    }
565
 
566
    /**
567
     * Processes grouped file results into documents, with attached matching files.
568
     *
569
     * @param SolrObject $response The response returned from solr server
570
     * @param int        $limit The maximum number of results to return. 0 for all.
571
     * @return array Final results to be displayed.
572
     */
573
    protected function grouped_files_process_response($response, $limit = 0) {
574
        // If we can't find the grouping, or there are no matches in the grouping, return empty.
575
        if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) {
576
            return array();
577
        }
578
 
579
        $numgranted = 0;
580
        $orderedids = array();
581
        $completedocs = array();
582
        $incompletedocs = array();
583
 
584
        $highlightingobj = $response->highlighting;
585
 
586
        // Each group represents a "master document".
587
        $groups = $response->grouped->solr_filegroupingid->groups;
588
        foreach ($groups as $group) {
589
            $groupid = $group->groupValue;
590
            $groupdocs = $group->doclist->docs;
591
            $firstdoc = reset($groupdocs);
592
 
593
            if (!$searcharea = $this->get_search_area($firstdoc->areaid)) {
594
                // Well, this is a problem.
595
                continue;
596
            }
597
 
598
            // Check for access.
599
            $access = $searcharea->check_access($firstdoc->itemid);
600
            switch ($access) {
601
                case \core_search\manager::ACCESS_DELETED:
602
                    // If deleted from Moodle, delete from index and then continue.
603
                    $this->delete_by_id($firstdoc->id);
604
                    // Remove one from our processed and total counters, since we promptly deleted.
605
                    $this->processeddocs--;
606
                    $this->totalenginedocs--;
607
                    continue 2;
608
                    break;
609
                case \core_search\manager::ACCESS_DENIED:
610
                    // This means we should just skip for the current user.
611
                    $this->skippeddocs++;
612
                    continue 2;
613
                    break;
614
            }
615
            $numgranted++;
616
 
617
            $maindoc = false;
618
            $fileids = array();
619
            // Seperate the main document and any files returned.
620
            foreach ($groupdocs as $groupdoc) {
621
                if ($groupdoc->id == $groupid) {
622
                    $maindoc = $groupdoc;
623
                } else if (isset($groupdoc->solr_fileid)) {
624
                    $fileids[] = $groupdoc->solr_fileid;
625
                }
626
            }
627
 
628
            // Store the id of this group, in order, for later merging.
629
            $orderedids[] = $groupid;
630
 
631
            if (!$maindoc) {
632
                // We don't have the main doc, store what we know for later building.
633
                $incompletedocs[$groupid] = $fileids;
634
            } else {
635
                if (isset($highlightingobj->$groupid)) {
636
                    // Merge the highlighting for this doc.
637
                    $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid);
638
                }
639
                $docdata = $this->standarize_solr_obj($maindoc);
640
                $doc = $this->to_document($searcharea, $docdata);
641
                // Now we need to attach the result files to the doc.
642
                foreach ($fileids as $fileid) {
643
                    $doc->add_stored_file($fileid);
644
                }
645
                $completedocs[$groupid] = $doc;
646
            }
647
 
648
            if (!empty($limit) && $numgranted >= $limit) {
649
                // We have hit the max results, we will just ignore the rest.
650
                break;
651
            }
652
        }
653
 
654
        $incompletedocs = $this->get_missing_docs($incompletedocs);
655
 
656
        $out = array();
657
        // Now merge the complete and incomplete documents, in results order.
658
        foreach ($orderedids as $docid) {
659
            if (isset($completedocs[$docid])) {
660
                $out[] = $completedocs[$docid];
661
            } else if (isset($incompletedocs[$docid])) {
662
                $out[] = $incompletedocs[$docid];
663
            }
664
        }
665
 
666
        return $out;
667
    }
668
 
669
    /**
670
     * Retreive any missing main documents and attach provided files.
671
     *
672
     * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value
673
     * associated to the key should be an array of stored_files or stored file ids to attach to the result document.
674
     *
675
     * Return array also indexed by document id.
676
     *
677
     * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach.
678
     * @return document[]
679
     */
680
    protected function get_missing_docs($missingdocs) {
681
        if (empty($missingdocs)) {
682
            return array();
683
        }
684
 
685
        $docids = array_keys($missingdocs);
686
 
687
        // Build a custom query that will get all the missing documents.
688
        $query = new \SolrQuery();
689
        $this->set_query($query, '*');
690
        $this->add_fields($query);
691
        $query->setRows(count($docids));
692
        $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')');
693
 
694
        $response = $this->get_query_response($query);
695
        // We know the missing docs have already been checked for access, so don't recheck.
696
        $results = $this->process_response($response, 0, true);
697
 
698
        $out = array();
699
        foreach ($results as $result) {
700
            $resultid = $result->get('id');
701
            if (!isset($missingdocs[$resultid])) {
702
                // We got a result we didn't expect. Skip it.
703
                continue;
704
            }
705
            // Attach the files.
706
            foreach ($missingdocs[$resultid] as $filedoc) {
707
                $result->add_stored_file($filedoc);
708
            }
709
            $out[$resultid] = $result;
710
        }
711
 
712
        return $out;
713
    }
714
 
715
    /**
716
     * Returns a standard php array from a \SolrObject instance.
717
     *
718
     * @param \SolrObject $obj
719
     * @return array The returned document as an array.
720
     */
721
    public function standarize_solr_obj(\SolrObject $obj) {
722
        $properties = $obj->getPropertyNames();
723
 
724
        $docdata = array();
725
        foreach($properties as $name) {
726
            // http://php.net/manual/en/solrobject.getpropertynames.php#98018.
727
            $name = trim($name);
728
            $docdata[$name] = $obj->offsetGet($name);
729
        }
730
        return $docdata;
731
    }
732
 
733
    /**
734
     * Adds a document to the search engine.
735
     *
736
     * This does not commit to the search engine.
737
     *
738
     * @param document $document
739
     * @param bool     $fileindexing True if file indexing is to be used
740
     * @return bool
741
     */
742
    public function add_document($document, $fileindexing = false) {
743
        $docdata = $document->export_for_engine();
744
 
745
        if (!$this->add_solr_document($docdata)) {
746
            return false;
747
        }
748
 
749
        if ($fileindexing) {
750
            // This will take care of updating all attached files in the index.
751
            $this->process_document_files($document);
752
        }
753
 
754
        return true;
755
    }
756
 
757
    /**
758
     * Adds a batch of documents to the engine at once.
759
     *
760
     * @param \core_search\document[] $documents Documents to add
761
     * @param bool $fileindexing If true, indexes files (these are done one at a time)
762
     * @return int[] Array of three elements: successfully processed, failed processed, batch count
763
     */
764
    public function add_document_batch(array $documents, bool $fileindexing = false): array {
765
        $docdatabatch = [];
766
        foreach ($documents as $document) {
767
            $docdatabatch[] = $document->export_for_engine();
768
        }
769
 
770
        $resultcounts = $this->add_solr_documents($docdatabatch);
771
 
772
        // Files are processed one document at a time (if there are files it's slow anyway).
773
        if ($fileindexing) {
774
            foreach ($documents as $document) {
775
                // This will take care of updating all attached files in the index.
776
                $this->process_document_files($document);
777
            }
778
        }
779
 
780
        return $resultcounts;
781
    }
782
 
783
    /**
784
     * Replaces underlines at edges of words in the content with spaces.
785
     *
786
     * For example '_frogs_' will become 'frogs', '_frogs and toads_' will become 'frogs and toads',
787
     * and 'frogs_and_toads' will be left as 'frogs_and_toads'.
788
     *
789
     * The reason for this is that for italic content_to_text puts _italic_ underlines at the start
790
     * and end of the italicised phrase (not between words). Solr treats underlines as part of the
791
     * word, which means that if you search for a word in italic then you can't find it.
792
     *
793
     * @param string $str String to replace
794
     * @return string Replaced string
795
     */
796
    protected static function replace_underlines(string $str): string {
797
        return preg_replace('~\b_|_\b~', '', $str);
798
    }
799
 
800
    /**
801
     * Creates a Solr document object.
802
     *
803
     * @param array $doc Array of document fields
804
     * @return \SolrInputDocument Created document
805
     */
806
    protected function create_solr_document(array $doc): \SolrInputDocument {
807
        $solrdoc = new \SolrInputDocument();
808
 
809
        // Replace underlines in the content with spaces. The reason for this is that for italic
810
        // text, content_to_text puts _italic_ underlines. Solr treats underlines as part of the
811
        // word, which means that if you search for a word in italic then you can't find it.
812
        if (array_key_exists('content', $doc)) {
813
            $doc['content'] = self::replace_underlines($doc['content']);
814
        }
815
 
816
        // Set all the fields.
817
        foreach ($doc as $field => $value) {
818
            $solrdoc->addField($field, $value);
819
        }
820
 
821
        return $solrdoc;
822
    }
823
 
824
    /**
825
     * Adds a text document to the search engine.
826
     *
827
     * @param array $doc
828
     * @return bool
829
     */
830
    protected function add_solr_document($doc) {
831
        $solrdoc = $this->create_solr_document($doc);
832
 
833
        try {
834
            $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);
835
            return true;
836
        } catch (\SolrClientException $e) {
837
            debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);
838
        } catch (\SolrServerException $e) {
839
            // We only use the first line of the message, as it's a fully java stacktrace behind it.
840
            $msg = strtok($e->getMessage(), "\n");
841
            debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER);
842
        }
843
 
844
        return false;
845
    }
846
 
847
    /**
848
     * Adds multiple text documents to the search engine.
849
     *
850
     * @param array $docs Array of documents (each an array of fields) to add
851
     * @return int[] Array of success, failure, batch count
852
     * @throws \core_search\engine_exception
853
     */
854
    protected function add_solr_documents(array $docs): array {
855
        $solrdocs = [];
856
        foreach ($docs as $doc) {
857
            $solrdocs[] = $this->create_solr_document($doc);
858
        }
859
 
860
        try {
861
            // Add documents in a batch and report that they all succeeded.
862
            $this->get_search_client()->addDocuments($solrdocs, true, static::AUTOCOMMIT_WITHIN);
863
            return [count($solrdocs), 0, 1];
864
        } catch (\SolrClientException $e) {
865
            // If there is an exception, fall through...
866
            $donothing = true;
867
        } catch (\SolrServerException $e) {
868
            // If there is an exception, fall through...
869
            $donothing = true;
870
        }
871
 
872
        // When there is an error, we fall back to adding them individually so that we can report
873
        // which document(s) failed. Since it overwrites, adding the successful ones multiple
874
        // times won't hurt.
875
        $success = 0;
876
        $failure = 0;
877
        $batches = 0;
878
        foreach ($docs as $doc) {
879
            $result = $this->add_solr_document($doc);
880
            $batches++;
881
            if ($result) {
882
                $success++;
883
            } else {
884
                $failure++;
885
            }
886
        }
887
 
888
        return [$success, $failure, $batches];
889
    }
890
 
891
    /**
892
     * Index files attached to the docuemnt, ensuring the index matches the current document files.
893
     *
894
     * For documents that aren't known to be new, we check the index for existing files.
895
     * - New files we will add.
896
     * - Existing and unchanged files we will skip.
897
     * - File that are in the index but not on the document will be deleted from the index.
898
     * - Files that have changed will be re-indexed.
899
     *
900
     * @param document $document
901
     */
902
    protected function process_document_files($document) {
903
        if (!$this->file_indexing_enabled()) {
904
            return;
905
        }
906
 
907
        // Maximum rows to process at a time.
908
        $rows = 500;
909
 
910
        // Get the attached files.
911
        $files = $document->get_files();
912
 
913
        // If this isn't a new document, we need to check the exiting indexed files.
914
        if (!$document->get_is_new()) {
915
            // We do this progressively, so we can handle lots of files cleanly.
916
            list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);
917
            $count = 0;
918
            $idstodelete = array();
919
 
920
            do {
921
                // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.
922
                foreach ($indexedfiles as $indexedfile) {
923
                    $fileid = $indexedfile->solr_fileid;
924
 
925
                    if (isset($files[$fileid])) {
926
                        // Check for changes that would mean we need to re-index the file. If so, just leave in $files.
927
                        // Filelib does not guarantee time modified is updated, so we will check important values.
928
                        if ($indexedfile->modified != $files[$fileid]->get_timemodified()) {
929
                            continue;
930
                        }
931
                        if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {
932
                            continue;
933
                        }
934
                        if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
935
                            continue;
936
                        }
937
                        if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE &&
938
                                $this->file_is_indexable($files[$fileid])) {
939
                            // This means that the last time we indexed this file, filtering blocked it.
940
                            // Current settings say it is indexable, so we will allow it to be indexed.
941
                            continue;
942
                        }
943
 
944
                        // If the file is already indexed, we can just remove it from the files array and skip it.
945
                        unset($files[$fileid]);
946
                    } else {
947
                        // This means we have found a file that is no longer attached, so we need to delete from the index.
948
                        // We do it later, since this is progressive, and it could reorder results.
949
                        $idstodelete[] = $indexedfile->id;
950
                    }
951
                }
952
                $count += $rows;
953
 
954
                if ($count < $numfound) {
955
                    // If we haven't hit the total count yet, fetch the next batch.
956
                    list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);
957
                }
958
 
959
            } while ($count < $numfound);
960
 
961
            // Delete files that are no longer attached.
962
            foreach ($idstodelete as $id) {
963
                // We directly delete the item using the client, as the engine delete_by_id won't work on file docs.
964
                $this->get_search_client()->deleteById($id);
965
            }
966
        }
967
 
968
        // Now we can actually index all the remaining files.
969
        foreach ($files as $file) {
970
            $this->add_stored_file($document, $file);
971
        }
972
    }
973
 
974
    /**
975
     * Get the currently indexed files for a particular document, returns the total count, and a subset of files.
976
     *
977
     * @param document $document
978
     * @param int      $start The row to start the results on. Zero indexed.
979
     * @param int      $rows The number of rows to fetch
980
     * @return array   A two element array, the first is the total number of availble results, the second is an array
981
     *                 of documents for the current request.
982
     */
983
    protected function get_indexed_files($document, $start = 0, $rows = 500) {
984
        // Build a custom query that will get any document files that are in our solr_filegroupingid.
985
        $query = new \SolrQuery();
986
 
987
        // We want to get all file records tied to a document.
988
        // For efficiency, we are building our own, stripped down, query.
989
        $query->setQuery('*');
990
        $query->setRows($rows);
991
        $query->setStart($start);
992
        // We want a consistent sorting.
993
        $query->addSortField('id');
994
 
995
        // We only want the bare minimum of fields.
996
        $query->addField('id');
997
        $query->addField('modified');
998
        $query->addField('title');
999
        $query->addField('solr_fileid');
1000
        $query->addField('solr_filecontenthash');
1001
        $query->addField('solr_fileindexstatus');
1002
 
1003
        $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');
1004
        $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);
1005
 
1006
        $response = $this->get_query_response($query);
1007
        if (empty($response->response->numFound)) {
1008
            return array(0, array());
1009
        }
1010
 
1011
        return array($response->response->numFound, $this->convert_file_results($response));
1012
    }
1013
 
1014
    /**
1015
     * A very lightweight handler for getting information about already indexed files from a Solr response.
1016
     *
1017
     * @param SolrObject $responsedoc A Solr response document
1018
     * @return stdClass[] An array of objects that contain the basic information for file processing.
1019
     */
1020
    protected function convert_file_results($responsedoc) {
1021
        if (!$docs = $responsedoc->response->docs) {
1022
            return array();
1023
        }
1024
 
1025
        $out = array();
1026
 
1027
        foreach ($docs as $doc) {
1028
            // Copy the bare minimim needed info.
1029
            $result = new \stdClass();
1030
            $result->id = $doc->id;
1031
            $result->modified = document::import_time_from_engine($doc->modified);
1032
            $result->title = $doc->title;
1033
            $result->solr_fileid = $doc->solr_fileid;
1034
            $result->solr_filecontenthash = $doc->solr_filecontenthash;
1035
            $result->solr_fileindexstatus = $doc->solr_fileindexstatus;
1036
            $out[] = $result;
1037
        }
1038
 
1039
        return $out;
1040
    }
1041
 
1042
    /**
1043
     * Adds a file to the search engine.
1044
     *
1045
     * Notes about Solr and Tika indexing. We do not send the mime type, only the filename.
1046
     * Tika has much better content type detection than Moodle, and we will have many more doc failures
1047
     * if we try to send mime types.
1048
     *
1049
     * @param document $document
1050
     * @param \stored_file $storedfile
1051
     * @return void
1052
     */
1053
    protected function add_stored_file($document, $storedfile) {
1054
        $filedoc = $document->export_file_for_engine($storedfile);
1055
 
1056
        if (!$this->file_is_indexable($storedfile)) {
1057
            // For files that we don't consider indexable, we will still place a reference in the search engine.
1058
            $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE;
1059
            $this->add_solr_document($filedoc);
1060
            return;
1061
        }
1062
 
1063
        $curl = $this->get_curl_object();
1064
 
1065
        $url = $this->get_connection_url('/update/extract');
1066
 
1067
        // Return results as XML.
1068
        $url->param('wt', 'xml');
1069
 
1070
        // This will prevent solr from automatically making fields for every tika output.
1071
        $url->param('uprefix', 'ignored_');
1072
 
1073
        // Control how content is captured. This will keep our file content clean of non-important metadata.
1074
        $url->param('captureAttr', 'true');
1075
        // Move the content to a field for indexing.
1076
        $url->param('fmap.content', 'solr_filecontent');
1077
 
1078
        // These are common fields that matches the standard *_point dynamic field and causes an error.
1079
        $url->param('fmap.media_white_point', 'ignored_mwp');
1080
        $url->param('fmap.media_black_point', 'ignored_mbp');
1081
 
1082
        // Copy each key to the url with literal.
1083
        // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names.
1084
        foreach ($filedoc as $key => $value) {
1085
            // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours.
1086
            $url->param('fmap.'.$key, 'ignored_'.$key);
1087
            // Place data in a tmp field.
1088
            $url->param('literal.mdltmp_'.$key, $value);
1089
            // Then move to the final field.
1090
            $url->param('fmap.mdltmp_'.$key, $key);
1091
        }
1092
 
1093
        // This sets the true filename for Tika.
1094
        $url->param('resource.name', $storedfile->get_filename());
1095
 
1096
        // A giant block of code that is really just error checking around the curl request.
1097
        try {
1098
            // We have to post the file directly in binary data (not using multipart) to avoid
1099
            // Solr bug SOLR-15039 which can cause incorrect data when you use multipart upload.
1100
            // Note this loads the whole file into memory; see limit in file_is_indexable().
1101
            $result = $curl->post($url->out(false), $storedfile->get_content());
1102
 
1103
            $code = $curl->get_errno();
1104
            $info = $curl->get_info();
1105
 
1106
            // Now error handling. It is just informational, since we aren't tracking per file/doc results.
1107
            if ($code != 0) {
1108
                // This means an internal cURL error occurred error is in result.
1109
                $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.';
1110
                debugging($message, DEBUG_DEVELOPER);
1111
            } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) {
1112
                // Unexpected HTTP response code.
1113
                $message = 'Error while indexing file with document id '.$filedoc['id'];
1114
                // Try to get error message out of msg or title if it exists.
1115
                if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) {
1116
                    $message .= ': '.$matches[1];
1117
                } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) {
1118
                    $message .= ': '.$matches[1];
1119
                }
1120
                // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter.
1121
                if (CLI_SCRIPT && !PHPUNIT_TEST) {
1122
                    mtrace($message);
1123
                }
1124
            } else {
1125
                // Check for the expected status field.
1126
                if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) {
1127
                    // Now check for the expected status of 0, if not, error.
1128
                    if ((int)$matches[1] !== 0) {
1129
                        $message = 'Unexpected Solr status code '.(int)$matches[1];
1130
                        $message .= ' while indexing file with document id '.$filedoc['id'].'.';
1131
                        debugging($message, DEBUG_DEVELOPER);
1132
                    } else {
1133
                        // The document was successfully indexed.
1134
                        return;
1135
                    }
1136
                } else {
1137
                    // We received an unprocessable response.
1138
                    $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': ';
1139
                    $message .= strtok($result, "\n");
1140
                    debugging($message, DEBUG_DEVELOPER);
1141
                }
1142
            }
1143
        } catch (\Exception $e) {
1144
            // There was an error, but we are not tracking per-file success, so we just continue on.
1145
            debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER);
1146
        }
1147
 
1148
        // If we get here, the document was not indexed due to an error. So we will index just the base info without the file.
1149
        $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR;
1150
        $this->add_solr_document($filedoc);
1151
    }
1152
 
1153
    /**
1154
     * Checks to see if a passed file is indexable.
1155
     *
1156
     * @param \stored_file $file The file to check
1157
     * @return bool True if the file can be indexed
1158
     */
1159
    protected function file_is_indexable($file) {
1160
        if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) {
1161
            // The file is too big to index.
1162
            return false;
1163
        }
1164
 
1165
        // Because we now load files into memory to index them in Solr, we also have to ensure that
1166
        // we don't try to index anything bigger than the memory limit (less 100MB for safety).
1167
        // Memory limit in cron is MEMORY_EXTRA which is usually 256 or 384MB but can be increased
1168
        // in config, so this will allow files over 100MB to be indexed.
1169
        $limit = ini_get('memory_limit');
1170
        if ($limit && $limit != -1) {
1171
            $limitbytes = get_real_size($limit);
1172
            if ($file->get_filesize() > $limitbytes) {
1173
                return false;
1174
            }
1175
        }
1176
 
1177
        $mime = $file->get_mimetype();
1178
 
1179
        if ($mime == 'application/vnd.moodle.backup') {
1180
            // We don't index Moodle backup files. There is nothing usefully indexable in them.
1181
            return false;
1182
        }
1183
 
1184
        return true;
1185
    }
1186
 
1187
    /**
1188
     * Commits all pending changes.
1189
     *
1190
     * @return void
1191
     */
1192
    protected function commit() {
1193
        $this->get_search_client()->commit();
1194
    }
1195
 
1196
    /**
1197
     * Do any area cleanup needed, and do anything to confirm contents.
1198
     *
1199
     * Return false to prevent the search area completed time and stats from being updated.
1200
     *
1201
     * @param \core_search\base $searcharea The search area that was complete
1202
     * @param int $numdocs The number of documents that were added to the index
1203
     * @param bool $fullindex True if a full index is being performed
1204
     * @return bool True means that data is considered indexed
1205
     */
1206
    public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
1207
        $this->commit();
1208
 
1209
        return true;
1210
    }
1211
 
1212
    /**
1213
     * Return true if file indexing is supported and enabled. False otherwise.
1214
     *
1215
     * @return bool
1216
     */
1217
    public function file_indexing_enabled() {
1218
        return (bool)$this->config->fileindexing;
1219
    }
1220
 
1221
    /**
1222
     * Deletes the specified document.
1223
     *
1224
     * @param string $id The document id to delete
1225
     * @return void
1226
     */
1227
    public function delete_by_id($id) {
1228
        // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid.
1229
        $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id);
1230
        $this->commit();
1231
    }
1232
 
1233
    /**
1234
     * Delete all area's documents.
1235
     *
1236
     * @param string $areaid
1237
     * @return void
1238
     */
1239
    public function delete($areaid = null) {
1240
        if ($areaid) {
1241
            $this->get_search_client()->deleteByQuery('areaid:' . $areaid);
1242
        } else {
1243
            $this->get_search_client()->deleteByQuery('*:*');
1244
        }
1245
        $this->commit();
1246
    }
1247
 
1248
    /**
1249
     * Pings the Solr server using search_solr config
1250
     *
1251
     * @return true|string Returns true if all good or an error string.
1252
     */
1253
    public function is_server_ready() {
1254
 
1255
        $configured = $this->is_server_configured();
1256
        if ($configured !== true) {
1257
            return $configured;
1258
        }
1259
 
1260
        // As part of the above we have already checked that we can contact the server. For pages
1261
        // where performance is important, we skip doing a full schema check as well.
1262
        if ($this->should_skip_schema_check()) {
1263
            return true;
1264
        }
1265
 
1266
        // Update schema if required/possible.
1267
        $schemalatest = $this->check_latest_schema();
1268
        if ($schemalatest !== true) {
1269
            return $schemalatest;
1270
        }
1271
 
1272
        // Check that the schema is already set up.
1273
        try {
1274
            $schema = new schema($this);
1275
            $schema->validate_setup();
1276
        } catch (\moodle_exception $e) {
1277
            return $e->getMessage();
1278
        }
1279
 
1280
        return true;
1281
    }
1282
 
1283
    /**
1284
     * Is the solr server properly configured?.
1285
     *
1286
     * @return true|string Returns true if all good or an error string.
1287
     */
1288
    public function is_server_configured() {
1289
 
1290
        if (empty($this->config->server_hostname) || empty($this->config->indexname)) {
1291
            return 'No solr configuration found';
1292
        }
1293
 
1294
        if (!$client = $this->get_search_client(false)) {
1295
            return get_string('engineserverstatus', 'search');
1296
        }
1297
 
1298
        try {
1299
            if ($this->get_solr_major_version() < 4) {
1300
                // Minimum solr 4.0.
1301
                return get_string('minimumsolr4', 'search_solr');
1302
            }
1303
        } catch (\SolrClientException $ex) {
1304
            debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
1305
            return get_string('engineserverstatus', 'search');
1306
        } catch (\SolrServerException $ex) {
1307
            debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
1308
            return get_string('engineserverstatus', 'search');
1309
        }
1310
 
1311
        return true;
1312
    }
1313
 
1314
    /**
1315
     * Returns the solr server major version.
1316
     *
1317
     * @return int
1318
     */
1319
    public function get_solr_major_version() {
1320
        if ($this->solrmajorversion !== null) {
1321
            return $this->solrmajorversion;
1322
        }
1323
 
1324
        // We should really ping first the server to see if the specified indexname is valid but
1325
        // we want to minimise solr server requests as they are expensive. system() emits a warning
1326
        // if it can not connect to the configured index in the configured server.
1327
        $systemdata = @$this->get_search_client()->system();
1328
        $solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version');
1329
        $this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.')));
1330
 
1331
        return $this->solrmajorversion;
1332
    }
1333
 
1334
    /**
1335
     * Checks if the PHP Solr extension is available.
1336
     *
1337
     * @return bool
1338
     */
1339
    public function is_installed() {
1340
        return function_exists('solr_get_version');
1341
    }
1342
 
1343
    /**
1344
     * Returns the solr client instance.
1345
     *
1346
     * We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl.
1347
     *
1348
     * @throws \core_search\engine_exception
1349
     * @param bool $triggerexception
1350
     * @return \SolrClient
1351
     */
1352
    protected function get_search_client($triggerexception = true) {
1353
        global $CFG;
1354
 
1355
        // Type comparison as it is set to false if not available.
1356
        if ($this->client !== null) {
1357
            return $this->client;
1358
        }
1359
 
1360
        $options = array(
1361
            'hostname' => $this->config->server_hostname,
1362
            'path'     => '/solr/' . $this->config->indexname,
1363
            'login'    => !empty($this->config->server_username) ? $this->config->server_username : '',
1364
            'password' => !empty($this->config->server_password) ? $this->config->server_password : '',
1365
            'port'     => !empty($this->config->server_port) ? $this->config->server_port : '',
1366
            'secure' => !empty($this->config->secure) ? true : false,
1367
            'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',
1368
            'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',
1369
            'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',
1370
            'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',
1371
            'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',
1372
            'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'
1373
        );
1374
 
1375
        if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) {
1376
            $options['proxy_host'] = $CFG->proxyhost;
1377
            if (!empty($CFG->proxyport)) {
1378
                $options['proxy_port'] = $CFG->proxyport;
1379
            }
1380
            if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) {
1381
                $options['proxy_login'] = $CFG->proxyuser;
1382
                $options['proxy_password'] = $CFG->proxypassword;
1383
            }
1384
        }
1385
 
1386
        if (!class_exists('\SolrClient')) {
1387
            throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr');
1388
        }
1389
 
1390
        $client = new \SolrClient($options);
1391
 
1392
        if ($client === false && $triggerexception) {
1393
            throw new \core_search\engine_exception('engineserverstatus', 'search');
1394
        }
1395
 
1396
        if ($this->cacheclient) {
1397
            $this->client = $client;
1398
        }
1399
 
1400
        return $client;
1401
    }
1402
 
1403
    /**
1404
     * Returns a curl object for conntecting to solr.
1405
     *
1406
     * @return \curl
1407
     */
1408
    public function get_curl_object() {
1409
        if (!is_null($this->curl)) {
1410
            return $this->curl;
1411
        }
1412
 
1413
        // Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports.
1414
        $this->curl = new \curl(['ignoresecurity' => true]);
1415
 
1416
        $options = array();
1417
        // Build the SSL options. Based on pecl-solr and general testing.
1418
        if (!empty($this->config->secure)) {
1419
            if (!empty($this->config->ssl_cert)) {
1420
                $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert;
1421
                $options['CURLOPT_SSLCERTTYPE'] = 'PEM';
1422
            }
1423
 
1424
            if (!empty($this->config->ssl_key)) {
1425
                $options['CURLOPT_SSLKEY'] = $this->config->ssl_key;
1426
                $options['CURLOPT_SSLKEYTYPE'] = 'PEM';
1427
            }
1428
 
1429
            if (!empty($this->config->ssl_keypassword)) {
1430
                $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword;
1431
            }
1432
 
1433
            if (!empty($this->config->ssl_cainfo)) {
1434
                $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo;
1435
            }
1436
 
1437
            if (!empty($this->config->ssl_capath)) {
1438
                $options['CURLOPT_CAPATH'] = $this->config->ssl_capath;
1439
            }
1440
        }
1441
 
1442
        // Set timeout as for Solr client.
1443
        $options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30';
1444
 
1445
        $this->curl->setopt($options);
1446
 
1447
        if (!empty($this->config->server_username) && !empty($this->config->server_password)) {
1448
            $authorization = $this->config->server_username . ':' . $this->config->server_password;
1449
            $this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization));
1450
        }
1451
 
1452
        return $this->curl;
1453
    }
1454
 
1455
    /**
1456
     * Return a Moodle url object for the server connection.
1457
     *
1458
     * @param string $path The solr path to append.
1459
     * @return \moodle_url
1460
     */
1461
    public function get_connection_url($path) {
1462
        // Must use the proper protocol, or SSL will fail.
1463
        $protocol = !empty($this->config->secure) ? 'https' : 'http';
1464
        $url = $protocol . '://' . rtrim($this->config->server_hostname, '/');
1465
        if (!empty($this->config->server_port)) {
1466
            $url .= ':' . $this->config->server_port;
1467
        }
1468
        $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');
1469
 
1470
        return new \moodle_url($url);
1471
    }
1472
 
1473
    /**
1474
     * Solr includes group support in the execute_query function.
1475
     *
1476
     * @return bool True
1477
     */
1478
    public function supports_group_filtering() {
1479
        return true;
1480
    }
1481
 
1482
    protected function update_schema($oldversion, $newversion) {
1483
        // Construct schema.
1484
        $schema = new schema($this);
1485
        $cansetup = $schema->can_setup_server();
1486
        if ($cansetup !== true) {
1487
            return $cansetup;
1488
        }
1489
 
1490
        switch ($newversion) {
1491
            // This version just requires a setup call to add new fields.
1492
            case 2017091700:
1493
                $setup = true;
1494
                break;
1495
 
1496
            // If we don't know about the schema version we might not have implemented the
1497
            // change correctly, so return.
1498
            default:
1499
                return get_string('schemaversionunknown', 'search');
1500
        }
1501
 
1502
        if ($setup) {
1503
            $schema->setup();
1504
        }
1505
 
1506
        return true;
1507
    }
1508
 
1509
    /**
1510
     * Solr supports sort by location within course contexts or below.
1511
     *
1512
     * @param \context $context Context that the user requested search from
1513
     * @return array Array from order name => display text
1514
     */
1515
    public function get_supported_orders(\context $context) {
1516
        $orders = parent::get_supported_orders($context);
1517
 
1518
        // If not within a course, no other kind of sorting supported.
1519
        $coursecontext = $context->get_course_context(false);
1520
        if ($coursecontext) {
1521
            // Within a course or activity/block, support sort by location.
1522
            $orders['location'] = get_string('order_location', 'search',
1523
                    $context->get_context_name());
1524
        }
1525
 
1526
        return $orders;
1527
    }
1528
 
1529
    /**
1530
     * Solr supports search by user id.
1531
     *
1532
     * @return bool True
1533
     */
1534
    public function supports_users() {
1535
        return true;
1536
    }
1537
 
1538
    /**
1539
     * Solr supports adding documents in a batch.
1540
     *
1541
     * @return bool True
1542
     */
1543
    public function supports_add_document_batch(): bool {
1544
        return true;
1545
    }
1546
 
1547
    /**
1548
     * Solr supports deleting the index for a context.
1549
     *
1550
     * @param int $oldcontextid Context that has been deleted
1551
     * @return bool True to indicate that any data was actually deleted
1552
     * @throws \core_search\engine_exception
1553
     */
1554
    public function delete_index_for_context(int $oldcontextid) {
1555
        $client = $this->get_search_client();
1556
        try {
1557
            $client->deleteByQuery('contextid:' . $oldcontextid);
1558
            $client->commit(true);
1559
            return true;
1560
        } catch (\Exception $e) {
1561
            throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
1562
        }
1563
    }
1564
 
1565
    /**
1566
     * Solr supports deleting the index for a course.
1567
     *
1568
     * @param int $oldcourseid
1569
     * @return bool True to indicate that any data was actually deleted
1570
     * @throws \core_search\engine_exception
1571
     */
1572
    public function delete_index_for_course(int $oldcourseid) {
1573
        $client = $this->get_search_client();
1574
        try {
1575
            $client->deleteByQuery('courseid:' . $oldcourseid);
1576
            $client->commit(true);
1577
            return true;
1578
        } catch (\Exception $e) {
1579
            throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
1580
        }
1581
    }
1582
 
1583
    /**
1584
     * Checks if an alternate configuration has been defined.
1585
     *
1586
     * @return bool True if alternate configuration is available
1587
     */
1588
    public function has_alternate_configuration(): bool {
1589
        return !empty($this->config->alternateserver_hostname) &&
1590
                !empty($this->config->alternateindexname) &&
1591
                !empty($this->config->alternateserver_port);
1592
    }
1593
}