1 |
efrain |
1 |
<?php
|
|
|
2 |
// This file is part of Moodle - http://moodle.org/
|
|
|
3 |
//
|
|
|
4 |
// Moodle is free software: you can redistribute it and/or modify
|
|
|
5 |
// it under the terms of the GNU General Public License as published by
|
|
|
6 |
// the Free Software Foundation, either version 3 of the License, or
|
|
|
7 |
// (at your option) any later version.
|
|
|
8 |
//
|
|
|
9 |
// Moodle is distributed in the hope that it will be useful,
|
|
|
10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
12 |
// GNU General Public License for more details.
|
|
|
13 |
//
|
|
|
14 |
// You should have received a copy of the GNU General Public License
|
|
|
15 |
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
16 |
|
|
|
17 |
/**
|
|
|
18 |
* Solr engine.
|
|
|
19 |
*
|
|
|
20 |
* @package search_solr
|
|
|
21 |
* @copyright 2015 Daniel Neis Araujo
|
|
|
22 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
23 |
*/
|
|
|
24 |
|
|
|
25 |
namespace search_solr;
|
|
|
26 |
|
|
|
27 |
defined('MOODLE_INTERNAL') || die();
|
|
|
28 |
|
|
|
29 |
/**
|
|
|
30 |
* Solr engine.
|
|
|
31 |
*
|
|
|
32 |
* @package search_solr
|
|
|
33 |
* @copyright 2015 Daniel Neis Araujo
|
|
|
34 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
35 |
*/
|
|
|
36 |
class engine extends \core_search\engine {
|
|
|
37 |
|
|
|
38 |
/**
|
|
|
39 |
* @var string The date format used by solr.
|
|
|
40 |
*/
|
|
|
41 |
const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';
|
|
|
42 |
|
|
|
43 |
/**
|
|
|
44 |
* @var int Commit documents interval (number of miliseconds).
|
|
|
45 |
*/
|
|
|
46 |
const AUTOCOMMIT_WITHIN = 15000;
|
|
|
47 |
|
|
|
48 |
/**
|
|
|
49 |
* The maximum number of results to fetch at a time.
|
|
|
50 |
*/
|
|
|
51 |
const QUERY_SIZE = 120;
|
|
|
52 |
|
|
|
53 |
/**
|
|
|
54 |
* Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending.
|
|
|
55 |
*/
|
|
|
56 |
const FRAG_SIZE = 510;
|
|
|
57 |
|
|
|
58 |
/**
|
|
|
59 |
* Marker for the start of a highlight.
|
|
|
60 |
*/
|
|
|
61 |
const HIGHLIGHT_START = '@@HI_S@@';
|
|
|
62 |
|
|
|
63 |
/**
|
|
|
64 |
* Marker for the end of a highlight.
|
|
|
65 |
*/
|
|
|
66 |
const HIGHLIGHT_END = '@@HI_E@@';
|
|
|
67 |
|
|
|
68 |
/** @var float Boost value for matching course in location-ordered searches */
|
|
|
69 |
const COURSE_BOOST = 1;
|
|
|
70 |
|
|
|
71 |
/** @var float Boost value for matching context (in addition to course boost) */
|
|
|
72 |
const CONTEXT_BOOST = 0.5;
|
|
|
73 |
|
|
|
74 |
/**
|
|
|
75 |
* @var \SolrClient
|
|
|
76 |
*/
|
|
|
77 |
protected $client = null;
|
|
|
78 |
|
|
|
79 |
/**
|
|
|
80 |
* @var bool True if we should reuse SolrClients, false if not.
|
|
|
81 |
*/
|
|
|
82 |
protected $cacheclient = true;
|
|
|
83 |
|
|
|
84 |
/**
|
|
|
85 |
* @var \curl Direct curl object.
|
|
|
86 |
*/
|
|
|
87 |
protected $curl = null;
|
|
|
88 |
|
|
|
89 |
/**
|
|
|
90 |
* @var array Fields that can be highlighted.
|
|
|
91 |
*/
|
|
|
92 |
protected $highlightfields = array('title', 'content', 'description1', 'description2');
|
|
|
93 |
|
|
|
94 |
/**
|
|
|
95 |
* @var int Number of total docs reported by Sorl for the last query.
|
|
|
96 |
*/
|
|
|
97 |
protected $totalenginedocs = 0;
|
|
|
98 |
|
|
|
99 |
/**
|
|
|
100 |
* @var int Number of docs we have processed for the last query.
|
|
|
101 |
*/
|
|
|
102 |
protected $processeddocs = 0;
|
|
|
103 |
|
|
|
104 |
/**
|
|
|
105 |
* @var int Number of docs that have been skipped while processing the last query.
|
|
|
106 |
*/
|
|
|
107 |
protected $skippeddocs = 0;
|
|
|
108 |
|
|
|
109 |
/**
|
|
|
110 |
* Solr server major version.
|
|
|
111 |
*
|
|
|
112 |
* @var int
|
|
|
113 |
*/
|
|
|
114 |
protected $solrmajorversion = null;
|
|
|
115 |
|
|
|
116 |
/**
|
|
|
117 |
* Initialises the search engine configuration.
|
|
|
118 |
*
|
|
|
119 |
* @param bool $alternateconfiguration If true, use alternate configuration settings
|
|
|
120 |
* @return void
|
|
|
121 |
*/
|
|
|
122 |
public function __construct(bool $alternateconfiguration = false) {
|
|
|
123 |
parent::__construct($alternateconfiguration);
|
|
|
124 |
|
|
|
125 |
$curlversion = curl_version();
|
|
|
126 |
if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) {
|
|
|
127 |
// There is a flaw with curl 7.35.0 that causes problems with client reuse.
|
|
|
128 |
$this->cacheclient = false;
|
|
|
129 |
}
|
|
|
130 |
}
|
|
|
131 |
|
|
|
132 |
/**
|
|
|
133 |
* Prepares a Solr query, applies filters and executes it returning its results.
|
|
|
134 |
*
|
|
|
135 |
* @throws \core_search\engine_exception
|
|
|
136 |
* @param \stdClass $filters Containing query and filters.
|
|
|
137 |
* @param \stdClass $accessinfo Information about areas user can access.
|
|
|
138 |
* @param int $limit The maximum number of results to return.
|
|
|
139 |
* @return \core_search\document[] Results or false if no results
|
|
|
140 |
*/
|
|
|
141 |
public function execute_query($filters, $accessinfo, $limit = 0) {
|
|
|
142 |
global $USER;
|
|
|
143 |
|
|
|
144 |
if (empty($limit)) {
|
|
|
145 |
$limit = \core_search\manager::MAX_RESULTS;
|
|
|
146 |
}
|
|
|
147 |
|
|
|
148 |
// If there is any problem we trigger the exception as soon as possible.
|
|
|
149 |
$client = $this->get_search_client();
|
|
|
150 |
|
|
|
151 |
// Create the query object.
|
|
|
152 |
$query = $this->create_user_query($filters, $accessinfo);
|
|
|
153 |
|
|
|
154 |
// If the query cannot have results, return none.
|
|
|
155 |
if (!$query) {
|
|
|
156 |
return [];
|
|
|
157 |
}
|
|
|
158 |
|
|
|
159 |
// We expect good match rates, so for our first get, we will get a small number of records.
|
|
|
160 |
// This significantly speeds solr response time for first few pages.
|
|
|
161 |
$query->setRows(min($limit * 3, static::QUERY_SIZE));
|
|
|
162 |
$response = $this->get_query_response($query);
|
|
|
163 |
|
|
|
164 |
// Get count data out of the response, and reset our counters.
|
|
|
165 |
list($included, $found) = $this->get_response_counts($response);
|
|
|
166 |
$this->totalenginedocs = $found;
|
|
|
167 |
$this->processeddocs = 0;
|
|
|
168 |
$this->skippeddocs = 0;
|
|
|
169 |
if ($included == 0 || $this->totalenginedocs == 0) {
|
|
|
170 |
// No results.
|
|
|
171 |
return array();
|
|
|
172 |
}
|
|
|
173 |
|
|
|
174 |
// Get valid documents out of the response.
|
|
|
175 |
$results = $this->process_response($response, $limit);
|
|
|
176 |
|
|
|
177 |
// We have processed all the docs in the response at this point.
|
|
|
178 |
$this->processeddocs += $included;
|
|
|
179 |
|
|
|
180 |
// If we haven't reached the limit, and there are more docs left in Solr, lets keep trying.
|
|
|
181 |
while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) {
|
|
|
182 |
// Offset the start of the query, and since we are making another call, get more per call.
|
|
|
183 |
$query->setStart($this->processeddocs);
|
|
|
184 |
$query->setRows(static::QUERY_SIZE);
|
|
|
185 |
|
|
|
186 |
$response = $this->get_query_response($query);
|
|
|
187 |
list($included, $found) = $this->get_response_counts($response);
|
|
|
188 |
if ($included == 0 || $found == 0) {
|
|
|
189 |
// No new results were found. Found being empty would be weird, so we will just return.
|
|
|
190 |
return $results;
|
|
|
191 |
}
|
|
|
192 |
$this->totalenginedocs = $found;
|
|
|
193 |
|
|
|
194 |
// Get the new response docs, limiting to remaining we need, then add it to the end of the results array.
|
|
|
195 |
$newdocs = $this->process_response($response, $limit - count($results));
|
|
|
196 |
$results = array_merge($results, $newdocs);
|
|
|
197 |
|
|
|
198 |
// Add to our processed docs count.
|
|
|
199 |
$this->processeddocs += $included;
|
|
|
200 |
}
|
|
|
201 |
|
|
|
202 |
return $results;
|
|
|
203 |
}
|
|
|
204 |
|
|
|
205 |
/**
|
|
|
206 |
* Takes a query and returns the response in SolrObject format.
|
|
|
207 |
*
|
|
|
208 |
* @param SolrQuery $query Solr query object.
|
|
|
209 |
* @return SolrObject|false Response document or false on error.
|
|
|
210 |
*/
|
|
|
211 |
protected function get_query_response($query) {
|
|
|
212 |
try {
|
|
|
213 |
return $this->get_search_client()->query($query)->getResponse();
|
|
|
214 |
} catch (\SolrClientException $ex) {
|
|
|
215 |
debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
|
|
|
216 |
$this->queryerror = $ex->getMessage();
|
|
|
217 |
return false;
|
|
|
218 |
} catch (\SolrServerException $ex) {
|
|
|
219 |
debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
|
|
|
220 |
$this->queryerror = $ex->getMessage();
|
|
|
221 |
return false;
|
|
|
222 |
}
|
|
|
223 |
}
|
|
|
224 |
|
|
|
225 |
/**
|
|
|
226 |
* Returns the total number of documents available for the most recently call to execute_query.
|
|
|
227 |
*
|
|
|
228 |
* @return int
|
|
|
229 |
*/
|
|
|
230 |
public function get_query_total_count() {
|
|
|
231 |
// Return the total engine count minus the docs we have determined are bad.
|
|
|
232 |
return $this->totalenginedocs - $this->skippeddocs;
|
|
|
233 |
}
|
|
|
234 |
|
|
|
235 |
/**
|
|
|
236 |
* Returns count information for a provided response. Will return 0, 0 for invalid or empty responses.
|
|
|
237 |
*
|
|
|
238 |
* @param SolrDocument $response The response document from Solr.
|
|
|
239 |
* @return array A two part array. First how many response docs are in the response.
|
|
|
240 |
* Second, how many results are vailable in the engine.
|
|
|
241 |
*/
|
|
|
242 |
protected function get_response_counts($response) {
|
|
|
243 |
$found = 0;
|
|
|
244 |
$included = 0;
|
|
|
245 |
|
|
|
246 |
if (isset($response->grouped->solr_filegroupingid->ngroups)) {
|
|
|
247 |
// Get the number of results for file grouped queries.
|
|
|
248 |
$found = $response->grouped->solr_filegroupingid->ngroups;
|
|
|
249 |
$included = count($response->grouped->solr_filegroupingid->groups);
|
|
|
250 |
} else if (isset($response->response->numFound)) {
|
|
|
251 |
// Get the number of results for standard queries.
|
|
|
252 |
$found = $response->response->numFound;
|
|
|
253 |
if ($found > 0 && is_array($response->response->docs)) {
|
|
|
254 |
$included = count($response->response->docs);
|
|
|
255 |
}
|
|
|
256 |
}
|
|
|
257 |
|
|
|
258 |
return array($included, $found);
|
|
|
259 |
}
|
|
|
260 |
|
|
|
261 |
/**
|
|
|
262 |
* Prepares a new query object with needed limits, filters, etc.
|
|
|
263 |
*
|
|
|
264 |
* @param \stdClass $filters Containing query and filters.
|
|
|
265 |
* @param \stdClass $accessinfo Information about contexts the user can access
|
|
|
266 |
* @return \SolrDisMaxQuery|null Query object or null if they can't get any results
|
|
|
267 |
*/
|
|
|
268 |
protected function create_user_query($filters, $accessinfo) {
|
|
|
269 |
global $USER;
|
|
|
270 |
|
|
|
271 |
// Let's keep these changes internal.
|
|
|
272 |
$data = clone $filters;
|
|
|
273 |
|
|
|
274 |
$query = new \SolrDisMaxQuery();
|
|
|
275 |
|
|
|
276 |
$this->set_query($query, self::replace_underlines($data->q));
|
|
|
277 |
$this->add_fields($query);
|
|
|
278 |
|
|
|
279 |
// Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
|
|
|
280 |
// we are really interested in caching contexts filters instead.
|
|
|
281 |
if (!empty($data->title)) {
|
|
|
282 |
$query->addFilterQuery('{!field cache=false f=title}' . $data->title);
|
|
|
283 |
}
|
|
|
284 |
if (!empty($data->areaids)) {
|
|
|
285 |
// If areaids are specified, we want to get any that match.
|
|
|
286 |
$query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')');
|
|
|
287 |
}
|
|
|
288 |
if (!empty($data->courseids)) {
|
|
|
289 |
$query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')');
|
|
|
290 |
}
|
|
|
291 |
if (!empty($data->groupids)) {
|
|
|
292 |
$query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')');
|
|
|
293 |
}
|
|
|
294 |
if (!empty($data->userids)) {
|
|
|
295 |
$query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')');
|
|
|
296 |
}
|
|
|
297 |
|
|
|
298 |
if (!empty($data->timestart) or !empty($data->timeend)) {
|
|
|
299 |
if (empty($data->timestart)) {
|
|
|
300 |
$data->timestart = '*';
|
|
|
301 |
} else {
|
|
|
302 |
$data->timestart = \search_solr\document::format_time_for_engine($data->timestart);
|
|
|
303 |
}
|
|
|
304 |
if (empty($data->timeend)) {
|
|
|
305 |
$data->timeend = '*';
|
|
|
306 |
} else {
|
|
|
307 |
$data->timeend = \search_solr\document::format_time_for_engine($data->timeend);
|
|
|
308 |
}
|
|
|
309 |
|
|
|
310 |
// No cache.
|
|
|
311 |
$query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');
|
|
|
312 |
}
|
|
|
313 |
|
|
|
314 |
// Restrict to users who are supposed to be able to see a particular result.
|
|
|
315 |
$query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')');
|
|
|
316 |
|
|
|
317 |
// And finally restrict it to the context where the user can access, we want this one cached.
|
|
|
318 |
// If the user can access all contexts $usercontexts value is just true, we don't need to filter
|
|
|
319 |
// in that case.
|
|
|
320 |
if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) {
|
|
|
321 |
// Join all area contexts into a single array and implode.
|
|
|
322 |
$allcontexts = array();
|
|
|
323 |
foreach ($accessinfo->usercontexts as $areaid => $areacontexts) {
|
|
|
324 |
if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) {
|
|
|
325 |
// Skip unused areas.
|
|
|
326 |
continue;
|
|
|
327 |
}
|
|
|
328 |
foreach ($areacontexts as $contextid) {
|
|
|
329 |
// Ensure they are unique.
|
|
|
330 |
$allcontexts[$contextid] = $contextid;
|
|
|
331 |
}
|
|
|
332 |
}
|
|
|
333 |
if (empty($allcontexts)) {
|
|
|
334 |
// This means there are no valid contexts for them, so they get no results.
|
|
|
335 |
return null;
|
|
|
336 |
}
|
|
|
337 |
$query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');
|
|
|
338 |
}
|
|
|
339 |
|
|
|
340 |
if (!$accessinfo->everything && $accessinfo->separategroupscontexts) {
|
|
|
341 |
// Add another restriction to handle group ids. If there are any contexts using separate
|
|
|
342 |
// groups, then results in that context will not show unless you belong to the group.
|
|
|
343 |
// (Note: Access all groups is taken care of earlier, when computing these arrays.)
|
|
|
344 |
|
|
|
345 |
// This special exceptions list allows for particularly pig-headed developers to create
|
|
|
346 |
// multiple search areas within the same module, where one of them uses separate
|
|
|
347 |
// groups and the other uses visible groups. It is a little inefficient, but this should
|
|
|
348 |
// be rare.
|
|
|
349 |
$exceptions = '';
|
|
|
350 |
if ($accessinfo->visiblegroupscontextsareas) {
|
|
|
351 |
foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) {
|
|
|
352 |
$exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' .
|
|
|
353 |
implode(' OR ', $areaids) . '))';
|
|
|
354 |
}
|
|
|
355 |
}
|
|
|
356 |
|
|
|
357 |
if ($accessinfo->usergroups) {
|
|
|
358 |
// Either the document has no groupid, or the groupid is one that the user
|
|
|
359 |
// belongs to, or the context is not one of the separate groups contexts.
|
|
|
360 |
$query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
|
|
|
361 |
'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' .
|
|
|
362 |
'(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
|
|
|
363 |
$exceptions);
|
|
|
364 |
} else {
|
|
|
365 |
// Either the document has no groupid, or the context is not a restricted one.
|
|
|
366 |
$query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
|
|
|
367 |
'(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
|
|
|
368 |
$exceptions);
|
|
|
369 |
}
|
|
|
370 |
}
|
|
|
371 |
|
|
|
372 |
if ($this->file_indexing_enabled()) {
|
|
|
373 |
// Now group records by solr_filegroupingid. Limit to 3 results per group.
|
|
|
374 |
$query->setGroup(true);
|
|
|
375 |
$query->setGroupLimit(3);
|
|
|
376 |
$query->setGroupNGroups(true);
|
|
|
377 |
$query->addGroupField('solr_filegroupingid');
|
|
|
378 |
} else {
|
|
|
379 |
// Make sure we only get text files, in case the index has pre-existing files.
|
|
|
380 |
$query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT);
|
|
|
381 |
}
|
|
|
382 |
|
|
|
383 |
// If ordering by location, add in boost for the relevant course or context ids.
|
|
|
384 |
if (!empty($filters->order) && $filters->order === 'location') {
|
|
|
385 |
$coursecontext = $filters->context->get_course_context();
|
|
|
386 |
$query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST);
|
|
|
387 |
if ($filters->context->contextlevel !== CONTEXT_COURSE) {
|
|
|
388 |
// If it's a block or activity, also add a boost for the specific context id.
|
|
|
389 |
$query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST);
|
|
|
390 |
}
|
|
|
391 |
}
|
|
|
392 |
|
|
|
393 |
return $query;
|
|
|
394 |
}
|
|
|
395 |
|
|
|
396 |
/**
|
|
|
397 |
* Prepares a new query by setting the query, start offset and rows to return.
|
|
|
398 |
*
|
|
|
399 |
* @param SolrQuery $query
|
|
|
400 |
* @param object $q Containing query and filters.
|
|
|
401 |
*/
|
|
|
402 |
protected function set_query($query, $q) {
|
|
|
403 |
// Set hightlighting.
|
|
|
404 |
$query->setHighlight(true);
|
|
|
405 |
foreach ($this->highlightfields as $field) {
|
|
|
406 |
$query->addHighlightField($field);
|
|
|
407 |
}
|
|
|
408 |
$query->setHighlightFragsize(static::FRAG_SIZE);
|
|
|
409 |
$query->setHighlightSimplePre(self::HIGHLIGHT_START);
|
|
|
410 |
$query->setHighlightSimplePost(self::HIGHLIGHT_END);
|
|
|
411 |
$query->setHighlightMergeContiguous(true);
|
|
|
412 |
|
|
|
413 |
$query->setQuery($q);
|
|
|
414 |
|
|
|
415 |
// A reasonable max.
|
|
|
416 |
$query->setRows(static::QUERY_SIZE);
|
|
|
417 |
}
|
|
|
418 |
|
|
|
419 |
/**
|
|
|
420 |
* Sets fields to be returned in the result.
|
|
|
421 |
*
|
|
|
422 |
* @param SolrDisMaxQuery|SolrQuery $query object.
|
|
|
423 |
*/
|
|
|
424 |
public function add_fields($query) {
|
|
|
425 |
$documentclass = $this->get_document_classname();
|
|
|
426 |
$fields = $documentclass::get_default_fields_definition();
|
|
|
427 |
|
|
|
428 |
$dismax = false;
|
|
|
429 |
if ($query instanceof \SolrDisMaxQuery) {
|
|
|
430 |
$dismax = true;
|
|
|
431 |
}
|
|
|
432 |
|
|
|
433 |
foreach ($fields as $key => $field) {
|
|
|
434 |
$query->addField($key);
|
|
|
435 |
if ($dismax && !empty($field['mainquery'])) {
|
|
|
436 |
// Add fields the main query should be run against.
|
|
|
437 |
// Due to a regression in the PECL solr extension, https://bugs.php.net/bug.php?id=72740,
|
|
|
438 |
// a boost value is required, even if it is optional; to avoid boosting one among other fields,
|
|
|
439 |
// the explicit boost value will be the default one, for every field.
|
|
|
440 |
$query->addQueryField($key, 1);
|
|
|
441 |
}
|
|
|
442 |
}
|
|
|
443 |
}
|
|
|
444 |
|
|
|
445 |
/**
|
|
|
446 |
* Finds the key common to both highlighing and docs array returned from response.
|
|
|
447 |
* @param object $response containing results.
|
|
|
448 |
*/
|
|
|
449 |
public function add_highlight_content($response) {
|
|
|
450 |
if (!isset($response->highlighting)) {
|
|
|
451 |
// There is no highlighting to add.
|
|
|
452 |
return;
|
|
|
453 |
}
|
|
|
454 |
|
|
|
455 |
$highlightedobject = $response->highlighting;
|
|
|
456 |
foreach ($response->response->docs as $doc) {
|
|
|
457 |
$x = $doc->id;
|
|
|
458 |
$highlighteddoc = $highlightedobject->$x;
|
|
|
459 |
$this->merge_highlight_field_values($doc, $highlighteddoc);
|
|
|
460 |
}
|
|
|
461 |
}
|
|
|
462 |
|
|
|
463 |
/**
|
|
|
464 |
* Adds the highlighting array values to docs array values.
|
|
|
465 |
*
|
|
|
466 |
* @throws \core_search\engine_exception
|
|
|
467 |
* @param object $doc containing the results.
|
|
|
468 |
* @param object $highlighteddoc containing the highlighted results values.
|
|
|
469 |
*/
|
|
|
470 |
public function merge_highlight_field_values($doc, $highlighteddoc) {
|
|
|
471 |
|
|
|
472 |
foreach ($this->highlightfields as $field) {
|
|
|
473 |
if (!empty($doc->$field)) {
|
|
|
474 |
|
|
|
475 |
// Check that the returned value is not an array. No way we can make this work with multivalued solr fields.
|
|
|
476 |
if (is_array($doc->{$field})) {
|
|
|
477 |
throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);
|
|
|
478 |
}
|
|
|
479 |
|
|
|
480 |
if (!empty($highlighteddoc->$field)) {
|
|
|
481 |
// Replace by the highlighted result.
|
|
|
482 |
$doc->$field = reset($highlighteddoc->$field);
|
|
|
483 |
}
|
|
|
484 |
}
|
|
|
485 |
}
|
|
|
486 |
}
|
|
|
487 |
|
|
|
488 |
/**
|
|
|
489 |
* Filters the response on Moodle side.
|
|
|
490 |
*
|
|
|
491 |
* @param SolrObject $response Solr object containing the response return from solr server.
|
|
|
492 |
* @param int $limit The maximum number of results to return. 0 for all.
|
|
|
493 |
* @param bool $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access.
|
|
|
494 |
* @return array $results containing final results to be displayed.
|
|
|
495 |
*/
|
|
|
496 |
protected function process_response($response, $limit = 0, $skipaccesscheck = false) {
|
|
|
497 |
global $USER;
|
|
|
498 |
|
|
|
499 |
if (empty($response)) {
|
|
|
500 |
return array();
|
|
|
501 |
}
|
|
|
502 |
|
|
|
503 |
if (isset($response->grouped)) {
|
|
|
504 |
return $this->grouped_files_process_response($response, $limit);
|
|
|
505 |
}
|
|
|
506 |
|
|
|
507 |
$userid = $USER->id;
|
|
|
508 |
$noownerid = \core_search\manager::NO_OWNER_ID;
|
|
|
509 |
|
|
|
510 |
$numgranted = 0;
|
|
|
511 |
|
|
|
512 |
if (!$docs = $response->response->docs) {
|
|
|
513 |
return array();
|
|
|
514 |
}
|
|
|
515 |
|
|
|
516 |
$out = array();
|
|
|
517 |
if (!empty($response->response->numFound)) {
|
|
|
518 |
$this->add_highlight_content($response);
|
|
|
519 |
|
|
|
520 |
// Iterate through the results checking its availability and whether they are available for the user or not.
|
|
|
521 |
foreach ($docs as $key => $docdata) {
|
|
|
522 |
if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) {
|
|
|
523 |
// If owneruserid is set, no other user should be able to access this record.
|
|
|
524 |
continue;
|
|
|
525 |
}
|
|
|
526 |
|
|
|
527 |
if (!$searcharea = $this->get_search_area($docdata->areaid)) {
|
|
|
528 |
continue;
|
|
|
529 |
}
|
|
|
530 |
|
|
|
531 |
$docdata = $this->standarize_solr_obj($docdata);
|
|
|
532 |
|
|
|
533 |
if ($skipaccesscheck) {
|
|
|
534 |
$access = \core_search\manager::ACCESS_GRANTED;
|
|
|
535 |
} else {
|
|
|
536 |
$access = $searcharea->check_access($docdata['itemid']);
|
|
|
537 |
}
|
|
|
538 |
switch ($access) {
|
|
|
539 |
case \core_search\manager::ACCESS_DELETED:
|
|
|
540 |
$this->delete_by_id($docdata['id']);
|
|
|
541 |
// Remove one from our processed and total counters, since we promptly deleted.
|
|
|
542 |
$this->processeddocs--;
|
|
|
543 |
$this->totalenginedocs--;
|
|
|
544 |
break;
|
|
|
545 |
case \core_search\manager::ACCESS_DENIED:
|
|
|
546 |
$this->skippeddocs++;
|
|
|
547 |
break;
|
|
|
548 |
case \core_search\manager::ACCESS_GRANTED:
|
|
|
549 |
$numgranted++;
|
|
|
550 |
|
|
|
551 |
// Add the doc.
|
|
|
552 |
$out[] = $this->to_document($searcharea, $docdata);
|
|
|
553 |
break;
|
|
|
554 |
}
|
|
|
555 |
|
|
|
556 |
// Stop when we hit our limit.
|
|
|
557 |
if (!empty($limit) && count($out) >= $limit) {
|
|
|
558 |
break;
|
|
|
559 |
}
|
|
|
560 |
}
|
|
|
561 |
}
|
|
|
562 |
|
|
|
563 |
return $out;
|
|
|
564 |
}
|
|
|
565 |
|
|
|
566 |
/**
|
|
|
567 |
* Processes grouped file results into documents, with attached matching files.
|
|
|
568 |
*
|
|
|
569 |
* @param SolrObject $response The response returned from solr server
|
|
|
570 |
* @param int $limit The maximum number of results to return. 0 for all.
|
|
|
571 |
* @return array Final results to be displayed.
|
|
|
572 |
*/
|
|
|
573 |
protected function grouped_files_process_response($response, $limit = 0) {
|
|
|
574 |
// If we can't find the grouping, or there are no matches in the grouping, return empty.
|
|
|
575 |
if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) {
|
|
|
576 |
return array();
|
|
|
577 |
}
|
|
|
578 |
|
|
|
579 |
$numgranted = 0;
|
|
|
580 |
$orderedids = array();
|
|
|
581 |
$completedocs = array();
|
|
|
582 |
$incompletedocs = array();
|
|
|
583 |
|
|
|
584 |
$highlightingobj = $response->highlighting;
|
|
|
585 |
|
|
|
586 |
// Each group represents a "master document".
|
|
|
587 |
$groups = $response->grouped->solr_filegroupingid->groups;
|
|
|
588 |
foreach ($groups as $group) {
|
|
|
589 |
$groupid = $group->groupValue;
|
|
|
590 |
$groupdocs = $group->doclist->docs;
|
|
|
591 |
$firstdoc = reset($groupdocs);
|
|
|
592 |
|
|
|
593 |
if (!$searcharea = $this->get_search_area($firstdoc->areaid)) {
|
|
|
594 |
// Well, this is a problem.
|
|
|
595 |
continue;
|
|
|
596 |
}
|
|
|
597 |
|
|
|
598 |
// Check for access.
|
|
|
599 |
$access = $searcharea->check_access($firstdoc->itemid);
|
|
|
600 |
switch ($access) {
|
|
|
601 |
case \core_search\manager::ACCESS_DELETED:
|
|
|
602 |
// If deleted from Moodle, delete from index and then continue.
|
|
|
603 |
$this->delete_by_id($firstdoc->id);
|
|
|
604 |
// Remove one from our processed and total counters, since we promptly deleted.
|
|
|
605 |
$this->processeddocs--;
|
|
|
606 |
$this->totalenginedocs--;
|
|
|
607 |
continue 2;
|
|
|
608 |
break;
|
|
|
609 |
case \core_search\manager::ACCESS_DENIED:
|
|
|
610 |
// This means we should just skip for the current user.
|
|
|
611 |
$this->skippeddocs++;
|
|
|
612 |
continue 2;
|
|
|
613 |
break;
|
|
|
614 |
}
|
|
|
615 |
$numgranted++;
|
|
|
616 |
|
|
|
617 |
$maindoc = false;
|
|
|
618 |
$fileids = array();
|
|
|
619 |
// Seperate the main document and any files returned.
|
|
|
620 |
foreach ($groupdocs as $groupdoc) {
|
|
|
621 |
if ($groupdoc->id == $groupid) {
|
|
|
622 |
$maindoc = $groupdoc;
|
|
|
623 |
} else if (isset($groupdoc->solr_fileid)) {
|
|
|
624 |
$fileids[] = $groupdoc->solr_fileid;
|
|
|
625 |
}
|
|
|
626 |
}
|
|
|
627 |
|
|
|
628 |
// Store the id of this group, in order, for later merging.
|
|
|
629 |
$orderedids[] = $groupid;
|
|
|
630 |
|
|
|
631 |
if (!$maindoc) {
|
|
|
632 |
// We don't have the main doc, store what we know for later building.
|
|
|
633 |
$incompletedocs[$groupid] = $fileids;
|
|
|
634 |
} else {
|
|
|
635 |
if (isset($highlightingobj->$groupid)) {
|
|
|
636 |
// Merge the highlighting for this doc.
|
|
|
637 |
$this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid);
|
|
|
638 |
}
|
|
|
639 |
$docdata = $this->standarize_solr_obj($maindoc);
|
|
|
640 |
$doc = $this->to_document($searcharea, $docdata);
|
|
|
641 |
// Now we need to attach the result files to the doc.
|
|
|
642 |
foreach ($fileids as $fileid) {
|
|
|
643 |
$doc->add_stored_file($fileid);
|
|
|
644 |
}
|
|
|
645 |
$completedocs[$groupid] = $doc;
|
|
|
646 |
}
|
|
|
647 |
|
|
|
648 |
if (!empty($limit) && $numgranted >= $limit) {
|
|
|
649 |
// We have hit the max results, we will just ignore the rest.
|
|
|
650 |
break;
|
|
|
651 |
}
|
|
|
652 |
}
|
|
|
653 |
|
|
|
654 |
$incompletedocs = $this->get_missing_docs($incompletedocs);
|
|
|
655 |
|
|
|
656 |
$out = array();
|
|
|
657 |
// Now merge the complete and incomplete documents, in results order.
|
|
|
658 |
foreach ($orderedids as $docid) {
|
|
|
659 |
if (isset($completedocs[$docid])) {
|
|
|
660 |
$out[] = $completedocs[$docid];
|
|
|
661 |
} else if (isset($incompletedocs[$docid])) {
|
|
|
662 |
$out[] = $incompletedocs[$docid];
|
|
|
663 |
}
|
|
|
664 |
}
|
|
|
665 |
|
|
|
666 |
return $out;
|
|
|
667 |
}
|
|
|
668 |
|
|
|
669 |
/**
|
|
|
670 |
* Retreive any missing main documents and attach provided files.
|
|
|
671 |
*
|
|
|
672 |
* The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value
|
|
|
673 |
* associated to the key should be an array of stored_files or stored file ids to attach to the result document.
|
|
|
674 |
*
|
|
|
675 |
* Return array also indexed by document id.
|
|
|
676 |
*
|
|
|
677 |
* @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach.
|
|
|
678 |
* @return document[]
|
|
|
679 |
*/
|
|
|
680 |
protected function get_missing_docs($missingdocs) {
|
|
|
681 |
if (empty($missingdocs)) {
|
|
|
682 |
return array();
|
|
|
683 |
}
|
|
|
684 |
|
|
|
685 |
$docids = array_keys($missingdocs);
|
|
|
686 |
|
|
|
687 |
// Build a custom query that will get all the missing documents.
|
|
|
688 |
$query = new \SolrQuery();
|
|
|
689 |
$this->set_query($query, '*');
|
|
|
690 |
$this->add_fields($query);
|
|
|
691 |
$query->setRows(count($docids));
|
|
|
692 |
$query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')');
|
|
|
693 |
|
|
|
694 |
$response = $this->get_query_response($query);
|
|
|
695 |
// We know the missing docs have already been checked for access, so don't recheck.
|
|
|
696 |
$results = $this->process_response($response, 0, true);
|
|
|
697 |
|
|
|
698 |
$out = array();
|
|
|
699 |
foreach ($results as $result) {
|
|
|
700 |
$resultid = $result->get('id');
|
|
|
701 |
if (!isset($missingdocs[$resultid])) {
|
|
|
702 |
// We got a result we didn't expect. Skip it.
|
|
|
703 |
continue;
|
|
|
704 |
}
|
|
|
705 |
// Attach the files.
|
|
|
706 |
foreach ($missingdocs[$resultid] as $filedoc) {
|
|
|
707 |
$result->add_stored_file($filedoc);
|
|
|
708 |
}
|
|
|
709 |
$out[$resultid] = $result;
|
|
|
710 |
}
|
|
|
711 |
|
|
|
712 |
return $out;
|
|
|
713 |
}
|
|
|
714 |
|
|
|
715 |
/**
|
|
|
716 |
* Returns a standard php array from a \SolrObject instance.
|
|
|
717 |
*
|
|
|
718 |
* @param \SolrObject $obj
|
|
|
719 |
* @return array The returned document as an array.
|
|
|
720 |
*/
|
|
|
721 |
public function standarize_solr_obj(\SolrObject $obj) {
|
|
|
722 |
$properties = $obj->getPropertyNames();
|
|
|
723 |
|
|
|
724 |
$docdata = array();
|
|
|
725 |
foreach($properties as $name) {
|
|
|
726 |
// http://php.net/manual/en/solrobject.getpropertynames.php#98018.
|
|
|
727 |
$name = trim($name);
|
|
|
728 |
$docdata[$name] = $obj->offsetGet($name);
|
|
|
729 |
}
|
|
|
730 |
return $docdata;
|
|
|
731 |
}
|
|
|
732 |
|
|
|
733 |
/**
|
|
|
734 |
* Adds a document to the search engine.
|
|
|
735 |
*
|
|
|
736 |
* This does not commit to the search engine.
|
|
|
737 |
*
|
|
|
738 |
* @param document $document
|
|
|
739 |
* @param bool $fileindexing True if file indexing is to be used
|
|
|
740 |
* @return bool
|
|
|
741 |
*/
|
|
|
742 |
public function add_document($document, $fileindexing = false) {
|
|
|
743 |
$docdata = $document->export_for_engine();
|
|
|
744 |
|
|
|
745 |
if (!$this->add_solr_document($docdata)) {
|
|
|
746 |
return false;
|
|
|
747 |
}
|
|
|
748 |
|
|
|
749 |
if ($fileindexing) {
|
|
|
750 |
// This will take care of updating all attached files in the index.
|
|
|
751 |
$this->process_document_files($document);
|
|
|
752 |
}
|
|
|
753 |
|
|
|
754 |
return true;
|
|
|
755 |
}
|
|
|
756 |
|
|
|
757 |
/**
|
|
|
758 |
* Adds a batch of documents to the engine at once.
|
|
|
759 |
*
|
|
|
760 |
* @param \core_search\document[] $documents Documents to add
|
|
|
761 |
* @param bool $fileindexing If true, indexes files (these are done one at a time)
|
|
|
762 |
* @return int[] Array of three elements: successfully processed, failed processed, batch count
|
|
|
763 |
*/
|
|
|
764 |
public function add_document_batch(array $documents, bool $fileindexing = false): array {
|
|
|
765 |
$docdatabatch = [];
|
|
|
766 |
foreach ($documents as $document) {
|
|
|
767 |
$docdatabatch[] = $document->export_for_engine();
|
|
|
768 |
}
|
|
|
769 |
|
|
|
770 |
$resultcounts = $this->add_solr_documents($docdatabatch);
|
|
|
771 |
|
|
|
772 |
// Files are processed one document at a time (if there are files it's slow anyway).
|
|
|
773 |
if ($fileindexing) {
|
|
|
774 |
foreach ($documents as $document) {
|
|
|
775 |
// This will take care of updating all attached files in the index.
|
|
|
776 |
$this->process_document_files($document);
|
|
|
777 |
}
|
|
|
778 |
}
|
|
|
779 |
|
|
|
780 |
return $resultcounts;
|
|
|
781 |
}
|
|
|
782 |
|
|
|
783 |
/**
|
|
|
784 |
* Replaces underlines at edges of words in the content with spaces.
|
|
|
785 |
*
|
|
|
786 |
* For example '_frogs_' will become 'frogs', '_frogs and toads_' will become 'frogs and toads',
|
|
|
787 |
* and 'frogs_and_toads' will be left as 'frogs_and_toads'.
|
|
|
788 |
*
|
|
|
789 |
* The reason for this is that for italic content_to_text puts _italic_ underlines at the start
|
|
|
790 |
* and end of the italicised phrase (not between words). Solr treats underlines as part of the
|
|
|
791 |
* word, which means that if you search for a word in italic then you can't find it.
|
|
|
792 |
*
|
|
|
793 |
* @param string $str String to replace
|
|
|
794 |
* @return string Replaced string
|
|
|
795 |
*/
|
|
|
796 |
protected static function replace_underlines(string $str): string {
|
|
|
797 |
return preg_replace('~\b_|_\b~', '', $str);
|
|
|
798 |
}
|
|
|
799 |
|
|
|
800 |
/**
|
|
|
801 |
* Creates a Solr document object.
|
|
|
802 |
*
|
|
|
803 |
* @param array $doc Array of document fields
|
|
|
804 |
* @return \SolrInputDocument Created document
|
|
|
805 |
*/
|
|
|
806 |
protected function create_solr_document(array $doc): \SolrInputDocument {
|
|
|
807 |
$solrdoc = new \SolrInputDocument();
|
|
|
808 |
|
|
|
809 |
// Replace underlines in the content with spaces. The reason for this is that for italic
|
|
|
810 |
// text, content_to_text puts _italic_ underlines. Solr treats underlines as part of the
|
|
|
811 |
// word, which means that if you search for a word in italic then you can't find it.
|
|
|
812 |
if (array_key_exists('content', $doc)) {
|
|
|
813 |
$doc['content'] = self::replace_underlines($doc['content']);
|
|
|
814 |
}
|
|
|
815 |
|
|
|
816 |
// Set all the fields.
|
|
|
817 |
foreach ($doc as $field => $value) {
|
|
|
818 |
$solrdoc->addField($field, $value);
|
|
|
819 |
}
|
|
|
820 |
|
|
|
821 |
return $solrdoc;
|
|
|
822 |
}
|
|
|
823 |
|
|
|
824 |
/**
|
|
|
825 |
* Adds a text document to the search engine.
|
|
|
826 |
*
|
|
|
827 |
* @param array $doc
|
|
|
828 |
* @return bool
|
|
|
829 |
*/
|
|
|
830 |
protected function add_solr_document($doc) {
|
|
|
831 |
$solrdoc = $this->create_solr_document($doc);
|
|
|
832 |
|
|
|
833 |
try {
|
|
|
834 |
$result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);
|
|
|
835 |
return true;
|
|
|
836 |
} catch (\SolrClientException $e) {
|
|
|
837 |
debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);
|
|
|
838 |
} catch (\SolrServerException $e) {
|
|
|
839 |
// We only use the first line of the message, as it's a fully java stacktrace behind it.
|
|
|
840 |
$msg = strtok($e->getMessage(), "\n");
|
|
|
841 |
debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER);
|
|
|
842 |
}
|
|
|
843 |
|
|
|
844 |
return false;
|
|
|
845 |
}
|
|
|
846 |
|
|
|
847 |
/**
|
|
|
848 |
* Adds multiple text documents to the search engine.
|
|
|
849 |
*
|
|
|
850 |
* @param array $docs Array of documents (each an array of fields) to add
|
|
|
851 |
* @return int[] Array of success, failure, batch count
|
|
|
852 |
* @throws \core_search\engine_exception
|
|
|
853 |
*/
|
|
|
854 |
protected function add_solr_documents(array $docs): array {
|
|
|
855 |
$solrdocs = [];
|
|
|
856 |
foreach ($docs as $doc) {
|
|
|
857 |
$solrdocs[] = $this->create_solr_document($doc);
|
|
|
858 |
}
|
|
|
859 |
|
|
|
860 |
try {
|
|
|
861 |
// Add documents in a batch and report that they all succeeded.
|
|
|
862 |
$this->get_search_client()->addDocuments($solrdocs, true, static::AUTOCOMMIT_WITHIN);
|
|
|
863 |
return [count($solrdocs), 0, 1];
|
|
|
864 |
} catch (\SolrClientException $e) {
|
|
|
865 |
// If there is an exception, fall through...
|
|
|
866 |
$donothing = true;
|
|
|
867 |
} catch (\SolrServerException $e) {
|
|
|
868 |
// If there is an exception, fall through...
|
|
|
869 |
$donothing = true;
|
|
|
870 |
}
|
|
|
871 |
|
|
|
872 |
// When there is an error, we fall back to adding them individually so that we can report
|
|
|
873 |
// which document(s) failed. Since it overwrites, adding the successful ones multiple
|
|
|
874 |
// times won't hurt.
|
|
|
875 |
$success = 0;
|
|
|
876 |
$failure = 0;
|
|
|
877 |
$batches = 0;
|
|
|
878 |
foreach ($docs as $doc) {
|
|
|
879 |
$result = $this->add_solr_document($doc);
|
|
|
880 |
$batches++;
|
|
|
881 |
if ($result) {
|
|
|
882 |
$success++;
|
|
|
883 |
} else {
|
|
|
884 |
$failure++;
|
|
|
885 |
}
|
|
|
886 |
}
|
|
|
887 |
|
|
|
888 |
return [$success, $failure, $batches];
|
|
|
889 |
}
|
|
|
890 |
|
|
|
891 |
/**
|
|
|
892 |
* Index files attached to the docuemnt, ensuring the index matches the current document files.
|
|
|
893 |
*
|
|
|
894 |
* For documents that aren't known to be new, we check the index for existing files.
|
|
|
895 |
* - New files we will add.
|
|
|
896 |
* - Existing and unchanged files we will skip.
|
|
|
897 |
* - File that are in the index but not on the document will be deleted from the index.
|
|
|
898 |
* - Files that have changed will be re-indexed.
|
|
|
899 |
*
|
|
|
900 |
* @param document $document
|
|
|
901 |
*/
|
|
|
902 |
protected function process_document_files($document) {
|
|
|
903 |
if (!$this->file_indexing_enabled()) {
|
|
|
904 |
return;
|
|
|
905 |
}
|
|
|
906 |
|
|
|
907 |
// Maximum rows to process at a time.
|
|
|
908 |
$rows = 500;
|
|
|
909 |
|
|
|
910 |
// Get the attached files.
|
|
|
911 |
$files = $document->get_files();
|
|
|
912 |
|
|
|
913 |
// If this isn't a new document, we need to check the exiting indexed files.
|
|
|
914 |
if (!$document->get_is_new()) {
|
|
|
915 |
// We do this progressively, so we can handle lots of files cleanly.
|
|
|
916 |
list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);
|
|
|
917 |
$count = 0;
|
|
|
918 |
$idstodelete = array();
|
|
|
919 |
|
|
|
920 |
do {
|
|
|
921 |
// Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.
|
|
|
922 |
foreach ($indexedfiles as $indexedfile) {
|
|
|
923 |
$fileid = $indexedfile->solr_fileid;
|
|
|
924 |
|
|
|
925 |
if (isset($files[$fileid])) {
|
|
|
926 |
// Check for changes that would mean we need to re-index the file. If so, just leave in $files.
|
|
|
927 |
// Filelib does not guarantee time modified is updated, so we will check important values.
|
|
|
928 |
if ($indexedfile->modified != $files[$fileid]->get_timemodified()) {
|
|
|
929 |
continue;
|
|
|
930 |
}
|
|
|
931 |
if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {
|
|
|
932 |
continue;
|
|
|
933 |
}
|
|
|
934 |
if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
|
|
|
935 |
continue;
|
|
|
936 |
}
|
|
|
937 |
if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE &&
|
|
|
938 |
$this->file_is_indexable($files[$fileid])) {
|
|
|
939 |
// This means that the last time we indexed this file, filtering blocked it.
|
|
|
940 |
// Current settings say it is indexable, so we will allow it to be indexed.
|
|
|
941 |
continue;
|
|
|
942 |
}
|
|
|
943 |
|
|
|
944 |
// If the file is already indexed, we can just remove it from the files array and skip it.
|
|
|
945 |
unset($files[$fileid]);
|
|
|
946 |
} else {
|
|
|
947 |
// This means we have found a file that is no longer attached, so we need to delete from the index.
|
|
|
948 |
// We do it later, since this is progressive, and it could reorder results.
|
|
|
949 |
$idstodelete[] = $indexedfile->id;
|
|
|
950 |
}
|
|
|
951 |
}
|
|
|
952 |
$count += $rows;
|
|
|
953 |
|
|
|
954 |
if ($count < $numfound) {
|
|
|
955 |
// If we haven't hit the total count yet, fetch the next batch.
|
|
|
956 |
list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);
|
|
|
957 |
}
|
|
|
958 |
|
|
|
959 |
} while ($count < $numfound);
|
|
|
960 |
|
|
|
961 |
// Delete files that are no longer attached.
|
|
|
962 |
foreach ($idstodelete as $id) {
|
|
|
963 |
// We directly delete the item using the client, as the engine delete_by_id won't work on file docs.
|
|
|
964 |
$this->get_search_client()->deleteById($id);
|
|
|
965 |
}
|
|
|
966 |
}
|
|
|
967 |
|
|
|
968 |
// Now we can actually index all the remaining files.
|
|
|
969 |
foreach ($files as $file) {
|
|
|
970 |
$this->add_stored_file($document, $file);
|
|
|
971 |
}
|
|
|
972 |
}
|
|
|
973 |
|
|
|
974 |
/**
|
|
|
975 |
* Get the currently indexed files for a particular document, returns the total count, and a subset of files.
|
|
|
976 |
*
|
|
|
977 |
* @param document $document
|
|
|
978 |
* @param int $start The row to start the results on. Zero indexed.
|
|
|
979 |
* @param int $rows The number of rows to fetch
|
|
|
980 |
* @return array A two element array, the first is the total number of availble results, the second is an array
|
|
|
981 |
* of documents for the current request.
|
|
|
982 |
*/
|
|
|
983 |
protected function get_indexed_files($document, $start = 0, $rows = 500) {
|
|
|
984 |
// Build a custom query that will get any document files that are in our solr_filegroupingid.
|
|
|
985 |
$query = new \SolrQuery();
|
|
|
986 |
|
|
|
987 |
// We want to get all file records tied to a document.
|
|
|
988 |
// For efficiency, we are building our own, stripped down, query.
|
|
|
989 |
$query->setQuery('*');
|
|
|
990 |
$query->setRows($rows);
|
|
|
991 |
$query->setStart($start);
|
|
|
992 |
// We want a consistent sorting.
|
|
|
993 |
$query->addSortField('id');
|
|
|
994 |
|
|
|
995 |
// We only want the bare minimum of fields.
|
|
|
996 |
$query->addField('id');
|
|
|
997 |
$query->addField('modified');
|
|
|
998 |
$query->addField('title');
|
|
|
999 |
$query->addField('solr_fileid');
|
|
|
1000 |
$query->addField('solr_filecontenthash');
|
|
|
1001 |
$query->addField('solr_fileindexstatus');
|
|
|
1002 |
|
|
|
1003 |
$query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');
|
|
|
1004 |
$query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);
|
|
|
1005 |
|
|
|
1006 |
$response = $this->get_query_response($query);
|
|
|
1007 |
if (empty($response->response->numFound)) {
|
|
|
1008 |
return array(0, array());
|
|
|
1009 |
}
|
|
|
1010 |
|
|
|
1011 |
return array($response->response->numFound, $this->convert_file_results($response));
|
|
|
1012 |
}
|
|
|
1013 |
|
|
|
1014 |
/**
|
|
|
1015 |
* A very lightweight handler for getting information about already indexed files from a Solr response.
|
|
|
1016 |
*
|
|
|
1017 |
* @param SolrObject $responsedoc A Solr response document
|
|
|
1018 |
* @return stdClass[] An array of objects that contain the basic information for file processing.
|
|
|
1019 |
*/
|
|
|
1020 |
protected function convert_file_results($responsedoc) {
|
|
|
1021 |
if (!$docs = $responsedoc->response->docs) {
|
|
|
1022 |
return array();
|
|
|
1023 |
}
|
|
|
1024 |
|
|
|
1025 |
$out = array();
|
|
|
1026 |
|
|
|
1027 |
foreach ($docs as $doc) {
|
|
|
1028 |
// Copy the bare minimim needed info.
|
|
|
1029 |
$result = new \stdClass();
|
|
|
1030 |
$result->id = $doc->id;
|
|
|
1031 |
$result->modified = document::import_time_from_engine($doc->modified);
|
|
|
1032 |
$result->title = $doc->title;
|
|
|
1033 |
$result->solr_fileid = $doc->solr_fileid;
|
|
|
1034 |
$result->solr_filecontenthash = $doc->solr_filecontenthash;
|
|
|
1035 |
$result->solr_fileindexstatus = $doc->solr_fileindexstatus;
|
|
|
1036 |
$out[] = $result;
|
|
|
1037 |
}
|
|
|
1038 |
|
|
|
1039 |
return $out;
|
|
|
1040 |
}
|
|
|
1041 |
|
|
|
1042 |
/**
|
|
|
1043 |
* Adds a file to the search engine.
|
|
|
1044 |
*
|
|
|
1045 |
* Notes about Solr and Tika indexing. We do not send the mime type, only the filename.
|
|
|
1046 |
* Tika has much better content type detection than Moodle, and we will have many more doc failures
|
|
|
1047 |
* if we try to send mime types.
|
|
|
1048 |
*
|
|
|
1049 |
* @param document $document
|
|
|
1050 |
* @param \stored_file $storedfile
|
|
|
1051 |
* @return void
|
|
|
1052 |
*/
|
|
|
1053 |
protected function add_stored_file($document, $storedfile) {
|
|
|
1054 |
$filedoc = $document->export_file_for_engine($storedfile);
|
|
|
1055 |
|
|
|
1056 |
if (!$this->file_is_indexable($storedfile)) {
|
|
|
1057 |
// For files that we don't consider indexable, we will still place a reference in the search engine.
|
|
|
1058 |
$filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE;
|
|
|
1059 |
$this->add_solr_document($filedoc);
|
|
|
1060 |
return;
|
|
|
1061 |
}
|
|
|
1062 |
|
|
|
1063 |
$curl = $this->get_curl_object();
|
|
|
1064 |
|
|
|
1065 |
$url = $this->get_connection_url('/update/extract');
|
|
|
1066 |
|
|
|
1067 |
// Return results as XML.
|
|
|
1068 |
$url->param('wt', 'xml');
|
|
|
1069 |
|
|
|
1070 |
// This will prevent solr from automatically making fields for every tika output.
|
|
|
1071 |
$url->param('uprefix', 'ignored_');
|
|
|
1072 |
|
|
|
1073 |
// Control how content is captured. This will keep our file content clean of non-important metadata.
|
|
|
1074 |
$url->param('captureAttr', 'true');
|
|
|
1075 |
// Move the content to a field for indexing.
|
|
|
1076 |
$url->param('fmap.content', 'solr_filecontent');
|
|
|
1077 |
|
|
|
1078 |
// These are common fields that matches the standard *_point dynamic field and causes an error.
|
|
|
1079 |
$url->param('fmap.media_white_point', 'ignored_mwp');
|
|
|
1080 |
$url->param('fmap.media_black_point', 'ignored_mbp');
|
|
|
1081 |
|
|
|
1082 |
// Copy each key to the url with literal.
|
|
|
1083 |
// We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names.
|
|
|
1084 |
foreach ($filedoc as $key => $value) {
|
|
|
1085 |
// This will take any fields from tika that match our schema and discard them, so they don't overwrite ours.
|
|
|
1086 |
$url->param('fmap.'.$key, 'ignored_'.$key);
|
|
|
1087 |
// Place data in a tmp field.
|
|
|
1088 |
$url->param('literal.mdltmp_'.$key, $value);
|
|
|
1089 |
// Then move to the final field.
|
|
|
1090 |
$url->param('fmap.mdltmp_'.$key, $key);
|
|
|
1091 |
}
|
|
|
1092 |
|
|
|
1093 |
// This sets the true filename for Tika.
|
|
|
1094 |
$url->param('resource.name', $storedfile->get_filename());
|
|
|
1095 |
|
|
|
1096 |
// A giant block of code that is really just error checking around the curl request.
|
|
|
1097 |
try {
|
|
|
1098 |
// We have to post the file directly in binary data (not using multipart) to avoid
|
|
|
1099 |
// Solr bug SOLR-15039 which can cause incorrect data when you use multipart upload.
|
|
|
1100 |
// Note this loads the whole file into memory; see limit in file_is_indexable().
|
|
|
1101 |
$result = $curl->post($url->out(false), $storedfile->get_content());
|
|
|
1102 |
|
|
|
1103 |
$code = $curl->get_errno();
|
|
|
1104 |
$info = $curl->get_info();
|
|
|
1105 |
|
|
|
1106 |
// Now error handling. It is just informational, since we aren't tracking per file/doc results.
|
|
|
1107 |
if ($code != 0) {
|
|
|
1108 |
// This means an internal cURL error occurred error is in result.
|
|
|
1109 |
$message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.';
|
|
|
1110 |
debugging($message, DEBUG_DEVELOPER);
|
|
|
1111 |
} else if (isset($info['http_code']) && ($info['http_code'] !== 200)) {
|
|
|
1112 |
// Unexpected HTTP response code.
|
|
|
1113 |
$message = 'Error while indexing file with document id '.$filedoc['id'];
|
|
|
1114 |
// Try to get error message out of msg or title if it exists.
|
|
|
1115 |
if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) {
|
|
|
1116 |
$message .= ': '.$matches[1];
|
|
|
1117 |
} else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) {
|
|
|
1118 |
$message .= ': '.$matches[1];
|
|
|
1119 |
}
|
|
|
1120 |
// This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter.
|
|
|
1121 |
if (CLI_SCRIPT && !PHPUNIT_TEST) {
|
|
|
1122 |
mtrace($message);
|
|
|
1123 |
}
|
|
|
1124 |
} else {
|
|
|
1125 |
// Check for the expected status field.
|
|
|
1126 |
if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) {
|
|
|
1127 |
// Now check for the expected status of 0, if not, error.
|
|
|
1128 |
if ((int)$matches[1] !== 0) {
|
|
|
1129 |
$message = 'Unexpected Solr status code '.(int)$matches[1];
|
|
|
1130 |
$message .= ' while indexing file with document id '.$filedoc['id'].'.';
|
|
|
1131 |
debugging($message, DEBUG_DEVELOPER);
|
|
|
1132 |
} else {
|
|
|
1133 |
// The document was successfully indexed.
|
|
|
1134 |
return;
|
|
|
1135 |
}
|
|
|
1136 |
} else {
|
|
|
1137 |
// We received an unprocessable response.
|
|
|
1138 |
$message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': ';
|
|
|
1139 |
$message .= strtok($result, "\n");
|
|
|
1140 |
debugging($message, DEBUG_DEVELOPER);
|
|
|
1141 |
}
|
|
|
1142 |
}
|
|
|
1143 |
} catch (\Exception $e) {
|
|
|
1144 |
// There was an error, but we are not tracking per-file success, so we just continue on.
|
|
|
1145 |
debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER);
|
|
|
1146 |
}
|
|
|
1147 |
|
|
|
1148 |
// If we get here, the document was not indexed due to an error. So we will index just the base info without the file.
|
|
|
1149 |
$filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR;
|
|
|
1150 |
$this->add_solr_document($filedoc);
|
|
|
1151 |
}
|
|
|
1152 |
|
|
|
1153 |
/**
|
|
|
1154 |
* Checks to see if a passed file is indexable.
|
|
|
1155 |
*
|
|
|
1156 |
* @param \stored_file $file The file to check
|
|
|
1157 |
* @return bool True if the file can be indexed
|
|
|
1158 |
*/
|
|
|
1159 |
protected function file_is_indexable($file) {
|
|
|
1160 |
if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) {
|
|
|
1161 |
// The file is too big to index.
|
|
|
1162 |
return false;
|
|
|
1163 |
}
|
|
|
1164 |
|
|
|
1165 |
// Because we now load files into memory to index them in Solr, we also have to ensure that
|
|
|
1166 |
// we don't try to index anything bigger than the memory limit (less 100MB for safety).
|
|
|
1167 |
// Memory limit in cron is MEMORY_EXTRA which is usually 256 or 384MB but can be increased
|
|
|
1168 |
// in config, so this will allow files over 100MB to be indexed.
|
|
|
1169 |
$limit = ini_get('memory_limit');
|
|
|
1170 |
if ($limit && $limit != -1) {
|
|
|
1171 |
$limitbytes = get_real_size($limit);
|
|
|
1172 |
if ($file->get_filesize() > $limitbytes) {
|
|
|
1173 |
return false;
|
|
|
1174 |
}
|
|
|
1175 |
}
|
|
|
1176 |
|
|
|
1177 |
$mime = $file->get_mimetype();
|
|
|
1178 |
|
|
|
1179 |
if ($mime == 'application/vnd.moodle.backup') {
|
|
|
1180 |
// We don't index Moodle backup files. There is nothing usefully indexable in them.
|
|
|
1181 |
return false;
|
|
|
1182 |
}
|
|
|
1183 |
|
|
|
1184 |
return true;
|
|
|
1185 |
}
|
|
|
1186 |
|
|
|
1187 |
/**
|
|
|
1188 |
* Commits all pending changes.
|
|
|
1189 |
*
|
|
|
1190 |
* @return void
|
|
|
1191 |
*/
|
|
|
1192 |
protected function commit() {
|
|
|
1193 |
$this->get_search_client()->commit();
|
|
|
1194 |
}
|
|
|
1195 |
|
|
|
1196 |
/**
|
|
|
1197 |
* Do any area cleanup needed, and do anything to confirm contents.
|
|
|
1198 |
*
|
|
|
1199 |
* Return false to prevent the search area completed time and stats from being updated.
|
|
|
1200 |
*
|
|
|
1201 |
* @param \core_search\base $searcharea The search area that was complete
|
|
|
1202 |
* @param int $numdocs The number of documents that were added to the index
|
|
|
1203 |
* @param bool $fullindex True if a full index is being performed
|
|
|
1204 |
* @return bool True means that data is considered indexed
|
|
|
1205 |
*/
|
|
|
1206 |
public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
|
|
|
1207 |
$this->commit();
|
|
|
1208 |
|
|
|
1209 |
return true;
|
|
|
1210 |
}
|
|
|
1211 |
|
|
|
1212 |
/**
|
|
|
1213 |
* Return true if file indexing is supported and enabled. False otherwise.
|
|
|
1214 |
*
|
|
|
1215 |
* @return bool
|
|
|
1216 |
*/
|
|
|
1217 |
public function file_indexing_enabled() {
|
|
|
1218 |
return (bool)$this->config->fileindexing;
|
|
|
1219 |
}
|
|
|
1220 |
|
|
|
1221 |
/**
|
|
|
1222 |
* Deletes the specified document.
|
|
|
1223 |
*
|
|
|
1224 |
* @param string $id The document id to delete
|
|
|
1225 |
* @return void
|
|
|
1226 |
*/
|
|
|
1227 |
public function delete_by_id($id) {
|
|
|
1228 |
// We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid.
|
|
|
1229 |
$this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id);
|
|
|
1230 |
$this->commit();
|
|
|
1231 |
}
|
|
|
1232 |
|
|
|
1233 |
/**
|
|
|
1234 |
* Delete all area's documents.
|
|
|
1235 |
*
|
|
|
1236 |
* @param string $areaid
|
|
|
1237 |
* @return void
|
|
|
1238 |
*/
|
|
|
1239 |
public function delete($areaid = null) {
|
|
|
1240 |
if ($areaid) {
|
|
|
1241 |
$this->get_search_client()->deleteByQuery('areaid:' . $areaid);
|
|
|
1242 |
} else {
|
|
|
1243 |
$this->get_search_client()->deleteByQuery('*:*');
|
|
|
1244 |
}
|
|
|
1245 |
$this->commit();
|
|
|
1246 |
}
|
|
|
1247 |
|
|
|
1248 |
/**
|
|
|
1249 |
* Pings the Solr server using search_solr config
|
|
|
1250 |
*
|
|
|
1251 |
* @return true|string Returns true if all good or an error string.
|
|
|
1252 |
*/
|
|
|
1253 |
public function is_server_ready() {
|
|
|
1254 |
|
|
|
1255 |
$configured = $this->is_server_configured();
|
|
|
1256 |
if ($configured !== true) {
|
|
|
1257 |
return $configured;
|
|
|
1258 |
}
|
|
|
1259 |
|
|
|
1260 |
// As part of the above we have already checked that we can contact the server. For pages
|
|
|
1261 |
// where performance is important, we skip doing a full schema check as well.
|
|
|
1262 |
if ($this->should_skip_schema_check()) {
|
|
|
1263 |
return true;
|
|
|
1264 |
}
|
|
|
1265 |
|
|
|
1266 |
// Update schema if required/possible.
|
|
|
1267 |
$schemalatest = $this->check_latest_schema();
|
|
|
1268 |
if ($schemalatest !== true) {
|
|
|
1269 |
return $schemalatest;
|
|
|
1270 |
}
|
|
|
1271 |
|
|
|
1272 |
// Check that the schema is already set up.
|
|
|
1273 |
try {
|
|
|
1274 |
$schema = new schema($this);
|
|
|
1275 |
$schema->validate_setup();
|
|
|
1276 |
} catch (\moodle_exception $e) {
|
|
|
1277 |
return $e->getMessage();
|
|
|
1278 |
}
|
|
|
1279 |
|
|
|
1280 |
return true;
|
|
|
1281 |
}
|
|
|
1282 |
|
|
|
1283 |
/**
|
|
|
1284 |
* Is the solr server properly configured?.
|
|
|
1285 |
*
|
|
|
1286 |
* @return true|string Returns true if all good or an error string.
|
|
|
1287 |
*/
|
|
|
1288 |
public function is_server_configured() {
|
|
|
1289 |
|
|
|
1290 |
if (empty($this->config->server_hostname) || empty($this->config->indexname)) {
|
|
|
1291 |
return 'No solr configuration found';
|
|
|
1292 |
}
|
|
|
1293 |
|
|
|
1294 |
if (!$client = $this->get_search_client(false)) {
|
|
|
1295 |
return get_string('engineserverstatus', 'search');
|
|
|
1296 |
}
|
|
|
1297 |
|
|
|
1298 |
try {
|
|
|
1299 |
if ($this->get_solr_major_version() < 4) {
|
|
|
1300 |
// Minimum solr 4.0.
|
|
|
1301 |
return get_string('minimumsolr4', 'search_solr');
|
|
|
1302 |
}
|
|
|
1303 |
} catch (\SolrClientException $ex) {
|
|
|
1304 |
debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
|
|
|
1305 |
return get_string('engineserverstatus', 'search');
|
|
|
1306 |
} catch (\SolrServerException $ex) {
|
|
|
1307 |
debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
|
|
|
1308 |
return get_string('engineserverstatus', 'search');
|
|
|
1309 |
}
|
|
|
1310 |
|
|
|
1311 |
return true;
|
|
|
1312 |
}
|
|
|
1313 |
|
|
|
1314 |
/**
|
|
|
1315 |
* Returns the solr server major version.
|
|
|
1316 |
*
|
|
|
1317 |
* @return int
|
|
|
1318 |
*/
|
|
|
1319 |
public function get_solr_major_version() {
|
|
|
1320 |
if ($this->solrmajorversion !== null) {
|
|
|
1321 |
return $this->solrmajorversion;
|
|
|
1322 |
}
|
|
|
1323 |
|
|
|
1324 |
// We should really ping first the server to see if the specified indexname is valid but
|
|
|
1325 |
// we want to minimise solr server requests as they are expensive. system() emits a warning
|
|
|
1326 |
// if it can not connect to the configured index in the configured server.
|
|
|
1327 |
$systemdata = @$this->get_search_client()->system();
|
|
|
1328 |
$solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version');
|
|
|
1329 |
$this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.')));
|
|
|
1330 |
|
|
|
1331 |
return $this->solrmajorversion;
|
|
|
1332 |
}
|
|
|
1333 |
|
|
|
1334 |
/**
|
|
|
1335 |
* Checks if the PHP Solr extension is available.
|
|
|
1336 |
*
|
|
|
1337 |
* @return bool
|
|
|
1338 |
*/
|
|
|
1339 |
public function is_installed() {
|
|
|
1340 |
return function_exists('solr_get_version');
|
|
|
1341 |
}
|
|
|
1342 |
|
|
|
1343 |
/**
|
|
|
1344 |
* Returns the solr client instance.
|
|
|
1345 |
*
|
|
|
1346 |
* We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl.
|
|
|
1347 |
*
|
|
|
1348 |
* @throws \core_search\engine_exception
|
|
|
1349 |
* @param bool $triggerexception
|
|
|
1350 |
* @return \SolrClient
|
|
|
1351 |
*/
|
|
|
1352 |
protected function get_search_client($triggerexception = true) {
|
|
|
1353 |
global $CFG;
|
|
|
1354 |
|
|
|
1355 |
// Type comparison as it is set to false if not available.
|
|
|
1356 |
if ($this->client !== null) {
|
|
|
1357 |
return $this->client;
|
|
|
1358 |
}
|
|
|
1359 |
|
|
|
1360 |
$options = array(
|
|
|
1361 |
'hostname' => $this->config->server_hostname,
|
|
|
1362 |
'path' => '/solr/' . $this->config->indexname,
|
|
|
1363 |
'login' => !empty($this->config->server_username) ? $this->config->server_username : '',
|
|
|
1364 |
'password' => !empty($this->config->server_password) ? $this->config->server_password : '',
|
|
|
1365 |
'port' => !empty($this->config->server_port) ? $this->config->server_port : '',
|
|
|
1366 |
'secure' => !empty($this->config->secure) ? true : false,
|
|
|
1367 |
'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',
|
|
|
1368 |
'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',
|
|
|
1369 |
'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',
|
|
|
1370 |
'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',
|
|
|
1371 |
'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',
|
|
|
1372 |
'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'
|
|
|
1373 |
);
|
|
|
1374 |
|
|
|
1375 |
if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) {
|
|
|
1376 |
$options['proxy_host'] = $CFG->proxyhost;
|
|
|
1377 |
if (!empty($CFG->proxyport)) {
|
|
|
1378 |
$options['proxy_port'] = $CFG->proxyport;
|
|
|
1379 |
}
|
|
|
1380 |
if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) {
|
|
|
1381 |
$options['proxy_login'] = $CFG->proxyuser;
|
|
|
1382 |
$options['proxy_password'] = $CFG->proxypassword;
|
|
|
1383 |
}
|
|
|
1384 |
}
|
|
|
1385 |
|
|
|
1386 |
if (!class_exists('\SolrClient')) {
|
|
|
1387 |
throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr');
|
|
|
1388 |
}
|
|
|
1389 |
|
|
|
1390 |
$client = new \SolrClient($options);
|
|
|
1391 |
|
|
|
1392 |
if ($client === false && $triggerexception) {
|
|
|
1393 |
throw new \core_search\engine_exception('engineserverstatus', 'search');
|
|
|
1394 |
}
|
|
|
1395 |
|
|
|
1396 |
if ($this->cacheclient) {
|
|
|
1397 |
$this->client = $client;
|
|
|
1398 |
}
|
|
|
1399 |
|
|
|
1400 |
return $client;
|
|
|
1401 |
}
|
|
|
1402 |
|
|
|
1403 |
/**
|
|
|
1404 |
* Returns a curl object for conntecting to solr.
|
|
|
1405 |
*
|
|
|
1406 |
* @return \curl
|
|
|
1407 |
*/
|
|
|
1408 |
public function get_curl_object() {
|
|
|
1409 |
if (!is_null($this->curl)) {
|
|
|
1410 |
return $this->curl;
|
|
|
1411 |
}
|
|
|
1412 |
|
|
|
1413 |
// Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports.
|
|
|
1414 |
$this->curl = new \curl(['ignoresecurity' => true]);
|
|
|
1415 |
|
|
|
1416 |
$options = array();
|
|
|
1417 |
// Build the SSL options. Based on pecl-solr and general testing.
|
|
|
1418 |
if (!empty($this->config->secure)) {
|
|
|
1419 |
if (!empty($this->config->ssl_cert)) {
|
|
|
1420 |
$options['CURLOPT_SSLCERT'] = $this->config->ssl_cert;
|
|
|
1421 |
$options['CURLOPT_SSLCERTTYPE'] = 'PEM';
|
|
|
1422 |
}
|
|
|
1423 |
|
|
|
1424 |
if (!empty($this->config->ssl_key)) {
|
|
|
1425 |
$options['CURLOPT_SSLKEY'] = $this->config->ssl_key;
|
|
|
1426 |
$options['CURLOPT_SSLKEYTYPE'] = 'PEM';
|
|
|
1427 |
}
|
|
|
1428 |
|
|
|
1429 |
if (!empty($this->config->ssl_keypassword)) {
|
|
|
1430 |
$options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword;
|
|
|
1431 |
}
|
|
|
1432 |
|
|
|
1433 |
if (!empty($this->config->ssl_cainfo)) {
|
|
|
1434 |
$options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo;
|
|
|
1435 |
}
|
|
|
1436 |
|
|
|
1437 |
if (!empty($this->config->ssl_capath)) {
|
|
|
1438 |
$options['CURLOPT_CAPATH'] = $this->config->ssl_capath;
|
|
|
1439 |
}
|
|
|
1440 |
}
|
|
|
1441 |
|
|
|
1442 |
// Set timeout as for Solr client.
|
|
|
1443 |
$options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30';
|
|
|
1444 |
|
|
|
1445 |
$this->curl->setopt($options);
|
|
|
1446 |
|
|
|
1447 |
if (!empty($this->config->server_username) && !empty($this->config->server_password)) {
|
|
|
1448 |
$authorization = $this->config->server_username . ':' . $this->config->server_password;
|
|
|
1449 |
$this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization));
|
|
|
1450 |
}
|
|
|
1451 |
|
|
|
1452 |
return $this->curl;
|
|
|
1453 |
}
|
|
|
1454 |
|
|
|
1455 |
/**
|
|
|
1456 |
* Return a Moodle url object for the server connection.
|
|
|
1457 |
*
|
|
|
1458 |
* @param string $path The solr path to append.
|
|
|
1459 |
* @return \moodle_url
|
|
|
1460 |
*/
|
|
|
1461 |
public function get_connection_url($path) {
|
|
|
1462 |
// Must use the proper protocol, or SSL will fail.
|
|
|
1463 |
$protocol = !empty($this->config->secure) ? 'https' : 'http';
|
|
|
1464 |
$url = $protocol . '://' . rtrim($this->config->server_hostname, '/');
|
|
|
1465 |
if (!empty($this->config->server_port)) {
|
|
|
1466 |
$url .= ':' . $this->config->server_port;
|
|
|
1467 |
}
|
|
|
1468 |
$url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');
|
|
|
1469 |
|
|
|
1470 |
return new \moodle_url($url);
|
|
|
1471 |
}
|
|
|
1472 |
|
|
|
1473 |
/**
|
|
|
1474 |
* Solr includes group support in the execute_query function.
|
|
|
1475 |
*
|
|
|
1476 |
* @return bool True
|
|
|
1477 |
*/
|
|
|
1478 |
public function supports_group_filtering() {
|
|
|
1479 |
return true;
|
|
|
1480 |
}
|
|
|
1481 |
|
|
|
1482 |
protected function update_schema($oldversion, $newversion) {
|
|
|
1483 |
// Construct schema.
|
|
|
1484 |
$schema = new schema($this);
|
|
|
1485 |
$cansetup = $schema->can_setup_server();
|
|
|
1486 |
if ($cansetup !== true) {
|
|
|
1487 |
return $cansetup;
|
|
|
1488 |
}
|
|
|
1489 |
|
|
|
1490 |
switch ($newversion) {
|
|
|
1491 |
// This version just requires a setup call to add new fields.
|
|
|
1492 |
case 2017091700:
|
|
|
1493 |
$setup = true;
|
|
|
1494 |
break;
|
|
|
1495 |
|
|
|
1496 |
// If we don't know about the schema version we might not have implemented the
|
|
|
1497 |
// change correctly, so return.
|
|
|
1498 |
default:
|
|
|
1499 |
return get_string('schemaversionunknown', 'search');
|
|
|
1500 |
}
|
|
|
1501 |
|
|
|
1502 |
if ($setup) {
|
|
|
1503 |
$schema->setup();
|
|
|
1504 |
}
|
|
|
1505 |
|
|
|
1506 |
return true;
|
|
|
1507 |
}
|
|
|
1508 |
|
|
|
1509 |
/**
|
|
|
1510 |
* Solr supports sort by location within course contexts or below.
|
|
|
1511 |
*
|
|
|
1512 |
* @param \context $context Context that the user requested search from
|
|
|
1513 |
* @return array Array from order name => display text
|
|
|
1514 |
*/
|
|
|
1515 |
public function get_supported_orders(\context $context) {
|
|
|
1516 |
$orders = parent::get_supported_orders($context);
|
|
|
1517 |
|
|
|
1518 |
// If not within a course, no other kind of sorting supported.
|
|
|
1519 |
$coursecontext = $context->get_course_context(false);
|
|
|
1520 |
if ($coursecontext) {
|
|
|
1521 |
// Within a course or activity/block, support sort by location.
|
|
|
1522 |
$orders['location'] = get_string('order_location', 'search',
|
|
|
1523 |
$context->get_context_name());
|
|
|
1524 |
}
|
|
|
1525 |
|
|
|
1526 |
return $orders;
|
|
|
1527 |
}
|
|
|
1528 |
|
|
|
1529 |
/**
|
|
|
1530 |
* Solr supports search by user id.
|
|
|
1531 |
*
|
|
|
1532 |
* @return bool True
|
|
|
1533 |
*/
|
|
|
1534 |
public function supports_users() {
|
|
|
1535 |
return true;
|
|
|
1536 |
}
|
|
|
1537 |
|
|
|
1538 |
/**
|
|
|
1539 |
* Solr supports adding documents in a batch.
|
|
|
1540 |
*
|
|
|
1541 |
* @return bool True
|
|
|
1542 |
*/
|
|
|
1543 |
public function supports_add_document_batch(): bool {
|
|
|
1544 |
return true;
|
|
|
1545 |
}
|
|
|
1546 |
|
|
|
1547 |
/**
|
|
|
1548 |
* Solr supports deleting the index for a context.
|
|
|
1549 |
*
|
|
|
1550 |
* @param int $oldcontextid Context that has been deleted
|
|
|
1551 |
* @return bool True to indicate that any data was actually deleted
|
|
|
1552 |
* @throws \core_search\engine_exception
|
|
|
1553 |
*/
|
|
|
1554 |
public function delete_index_for_context(int $oldcontextid) {
|
|
|
1555 |
$client = $this->get_search_client();
|
|
|
1556 |
try {
|
|
|
1557 |
$client->deleteByQuery('contextid:' . $oldcontextid);
|
|
|
1558 |
$client->commit(true);
|
|
|
1559 |
return true;
|
|
|
1560 |
} catch (\Exception $e) {
|
|
|
1561 |
throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
|
|
|
1562 |
}
|
|
|
1563 |
}
|
|
|
1564 |
|
|
|
1565 |
/**
|
|
|
1566 |
* Solr supports deleting the index for a course.
|
|
|
1567 |
*
|
|
|
1568 |
* @param int $oldcourseid
|
|
|
1569 |
* @return bool True to indicate that any data was actually deleted
|
|
|
1570 |
* @throws \core_search\engine_exception
|
|
|
1571 |
*/
|
|
|
1572 |
public function delete_index_for_course(int $oldcourseid) {
|
|
|
1573 |
$client = $this->get_search_client();
|
|
|
1574 |
try {
|
|
|
1575 |
$client->deleteByQuery('courseid:' . $oldcourseid);
|
|
|
1576 |
$client->commit(true);
|
|
|
1577 |
return true;
|
|
|
1578 |
} catch (\Exception $e) {
|
|
|
1579 |
throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
|
|
|
1580 |
}
|
|
|
1581 |
}
|
|
|
1582 |
|
|
|
1583 |
/**
|
|
|
1584 |
* Checks if an alternate configuration has been defined.
|
|
|
1585 |
*
|
|
|
1586 |
* @return bool True if alternate configuration is available
|
|
|
1587 |
*/
|
|
|
1588 |
public function has_alternate_configuration(): bool {
|
|
|
1589 |
return !empty($this->config->alternateserver_hostname) &&
|
|
|
1590 |
!empty($this->config->alternateindexname) &&
|
|
|
1591 |
!empty($this->config->alternateserver_port);
|
|
|
1592 |
}
|
|
|
1593 |
}
|