1 |
efrain |
1 |
<?php
|
|
|
2 |
// This file is part of Moodle - http://moodle.org/
|
|
|
3 |
//
|
|
|
4 |
// Moodle is free software: you can redistribute it and/or modify
|
|
|
5 |
// it under the terms of the GNU General Public License as published by
|
|
|
6 |
// the Free Software Foundation, either version 3 of the License, or
|
|
|
7 |
// (at your option) any later version.
|
|
|
8 |
//
|
|
|
9 |
// Moodle is distributed in the hope that it will be useful,
|
|
|
10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
12 |
// GNU General Public License for more details.
|
|
|
13 |
//
|
|
|
14 |
// You should have received a copy of the GNU General Public License
|
|
|
15 |
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
16 |
|
|
|
17 |
/**
|
|
|
18 |
* Runs an analysis of the site.
|
|
|
19 |
*
|
|
|
20 |
* @package core_analytics
|
|
|
21 |
* @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
|
|
|
22 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
23 |
*/
|
|
|
24 |
|
|
|
25 |
namespace core_analytics;
|
|
|
26 |
|
|
|
27 |
defined('MOODLE_INTERNAL') || die();
|
|
|
28 |
|
|
|
29 |
/**
|
|
|
30 |
* Runs an analysis of the site.
|
|
|
31 |
*
|
|
|
32 |
* @package core_analytics
|
|
|
33 |
* @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
|
|
|
34 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
35 |
*/
|
|
|
36 |
class analysis {
|
|
|
37 |
|
|
|
38 |
/**
|
|
|
39 |
* @var \core_analytics\local\analyser\base
|
|
|
40 |
*/
|
|
|
41 |
private $analyser;
|
|
|
42 |
|
|
|
43 |
/**
|
|
|
44 |
* @var bool Whether to calculate the target or not in this run.
|
|
|
45 |
*/
|
|
|
46 |
private $includetarget;
|
|
|
47 |
|
|
|
48 |
/**
|
|
|
49 |
* @var \core_analytics\local\analysis\result
|
|
|
50 |
*/
|
|
|
51 |
private $result;
|
|
|
52 |
|
|
|
53 |
/**
|
|
|
54 |
* @var \core\lock\lock
|
|
|
55 |
*/
|
|
|
56 |
private $lock;
|
|
|
57 |
|
|
|
58 |
/**
|
|
|
59 |
* Constructor.
|
|
|
60 |
*
|
|
|
61 |
* @param \core_analytics\local\analyser\base $analyser
|
|
|
62 |
* @param bool $includetarget Whether to calculate the target or not.
|
|
|
63 |
* @param \core_analytics\local\analysis\result $result
|
|
|
64 |
*/
|
|
|
65 |
public function __construct(\core_analytics\local\analyser\base $analyser, bool $includetarget,
|
|
|
66 |
\core_analytics\local\analysis\result $result) {
|
|
|
67 |
$this->analyser = $analyser;
|
|
|
68 |
$this->includetarget = $includetarget;
|
|
|
69 |
$this->result = $result;
|
|
|
70 |
|
|
|
71 |
// We cache the first time analysables were analysed because time-splitting methods can depend on these info.
|
|
|
72 |
self::fill_firstanalyses_cache($this->analyser->get_modelid());
|
|
|
73 |
}
|
|
|
74 |
|
|
|
75 |
/**
|
|
|
76 |
* Runs the analysis.
|
|
|
77 |
*
|
|
|
78 |
* @param \context[] $contexts Restrict the analysis to these contexts. No context restrictions if null.
|
|
|
79 |
* @return null
|
|
|
80 |
*/
|
|
|
81 |
public function run(array $contexts = []) {
|
|
|
82 |
|
|
|
83 |
$options = $this->analyser->get_options();
|
|
|
84 |
|
|
|
85 |
// Time limit control.
|
|
|
86 |
$modeltimelimit = intval(get_config('analytics', 'modeltimelimit'));
|
|
|
87 |
|
|
|
88 |
if ($this->includetarget) {
|
|
|
89 |
$action = 'training';
|
|
|
90 |
} else {
|
|
|
91 |
$action = 'prediction';
|
|
|
92 |
}
|
|
|
93 |
$analysables = $this->analyser->get_analysables_iterator($action, $contexts);
|
|
|
94 |
|
|
|
95 |
$processedanalysables = $this->get_processed_analysables();
|
|
|
96 |
|
|
|
97 |
$inittime = microtime(true);
|
|
|
98 |
foreach ($analysables as $analysable) {
|
|
|
99 |
$processed = false;
|
|
|
100 |
|
|
|
101 |
if (!$analysable) {
|
|
|
102 |
continue;
|
|
|
103 |
}
|
|
|
104 |
|
|
|
105 |
$analysableresults = $this->process_analysable($analysable);
|
|
|
106 |
if ($analysableresults) {
|
|
|
107 |
$processed = $this->result->add_analysable_results($analysableresults);
|
|
|
108 |
if (!$processed) {
|
|
|
109 |
$errors = array();
|
|
|
110 |
foreach ($analysableresults as $timesplittingid => $result) {
|
|
|
111 |
$str = '';
|
|
|
112 |
if (count($analysableresults) > 1) {
|
|
|
113 |
$str .= $timesplittingid . ': ';
|
|
|
114 |
}
|
|
|
115 |
$str .= $result->message;
|
|
|
116 |
$errors[] = $str;
|
|
|
117 |
}
|
|
|
118 |
|
|
|
119 |
$a = new \stdClass();
|
|
|
120 |
$a->analysableid = $analysable->get_name();
|
|
|
121 |
$a->errors = implode(', ', $errors);
|
|
|
122 |
$this->analyser->add_log(get_string('analysablenotused', 'analytics', $a));
|
|
|
123 |
}
|
|
|
124 |
}
|
|
|
125 |
|
|
|
126 |
if (!$options['evaluation']) {
|
|
|
127 |
|
|
|
128 |
if (empty($processedanalysables[$analysable->get_id()]) ||
|
|
|
129 |
$this->analyser->get_target()->always_update_analysis_time() || $processed) {
|
|
|
130 |
// We store the list of processed analysables even if the target does not always_update_analysis_time(),
|
|
|
131 |
// what always_update_analysis_time controls is the update of the data.
|
|
|
132 |
$this->update_analysable_analysed_time($processedanalysables, $analysable->get_id());
|
|
|
133 |
}
|
|
|
134 |
|
|
|
135 |
// Apply time limit.
|
|
|
136 |
$timespent = microtime(true) - $inittime;
|
|
|
137 |
if ($modeltimelimit <= $timespent) {
|
|
|
138 |
break;
|
|
|
139 |
}
|
|
|
140 |
}
|
|
|
141 |
}
|
|
|
142 |
|
|
|
143 |
// Force GC to clean up the indicator instances used during the last iteration.
|
|
|
144 |
$this->analyser->instantiate_indicators();
|
|
|
145 |
}
|
|
|
146 |
|
|
|
147 |
/**
|
|
|
148 |
* Get analysables that have been already processed.
|
|
|
149 |
*
|
|
|
150 |
* @return \stdClass[]
|
|
|
151 |
*/
|
|
|
152 |
protected function get_processed_analysables(): array {
|
|
|
153 |
global $DB;
|
|
|
154 |
|
|
|
155 |
$params = array('modelid' => $this->analyser->get_modelid());
|
|
|
156 |
$params['action'] = ($this->includetarget) ? 'training' : 'prediction';
|
|
|
157 |
$select = 'modelid = :modelid and action = :action';
|
|
|
158 |
|
|
|
159 |
// Weird select fields ordering for performance (analysableid key matching, analysableid is also unique by modelid).
|
|
|
160 |
return $DB->get_records_select('analytics_used_analysables', $select,
|
|
|
161 |
$params, 'timeanalysed DESC', 'analysableid, modelid, action, firstanalysis, timeanalysed, id AS primarykey');
|
|
|
162 |
}
|
|
|
163 |
|
|
|
164 |
/**
|
|
|
165 |
* Processes an analysable
|
|
|
166 |
*
|
|
|
167 |
* This method returns the general analysable status, an array of files by time splitting method and
|
|
|
168 |
* an error message if there is any problem.
|
|
|
169 |
*
|
|
|
170 |
* @param \core_analytics\analysable $analysable
|
|
|
171 |
* @return \stdClass[] Results objects by time splitting method
|
|
|
172 |
*/
|
|
|
173 |
public function process_analysable(\core_analytics\analysable $analysable): array {
|
|
|
174 |
|
|
|
175 |
// Target instances scope is per-analysable (it can't be lower as calculations run once per
|
|
|
176 |
// analysable, not time splitting method nor time range).
|
|
|
177 |
$target = call_user_func(array($this->analyser->get_target(), 'instance'));
|
|
|
178 |
|
|
|
179 |
// We need to check that the analysable is valid for the target even if we don't include targets
|
|
|
180 |
// as we still need to discard invalid analysables for the target.
|
|
|
181 |
$isvalidresult = $target->is_valid_analysable($analysable, $this->includetarget);
|
|
|
182 |
if ($isvalidresult !== true) {
|
|
|
183 |
$a = new \stdClass();
|
|
|
184 |
$a->analysableid = $analysable->get_name();
|
|
|
185 |
$a->result = $isvalidresult;
|
|
|
186 |
$this->analyser->add_log(get_string('analysablenotvalidfortarget', 'analytics', $a));
|
|
|
187 |
return array();
|
|
|
188 |
}
|
|
|
189 |
|
|
|
190 |
// Process all provided time splitting methods.
|
|
|
191 |
$results = array();
|
|
|
192 |
foreach ($this->analyser->get_timesplittings() as $timesplitting) {
|
|
|
193 |
|
|
|
194 |
$cachedresult = $this->result->retrieve_cached_result($timesplitting, $analysable);
|
|
|
195 |
if ($cachedresult) {
|
|
|
196 |
$result = new \stdClass();
|
|
|
197 |
$result->result = $cachedresult;
|
|
|
198 |
$results[$timesplitting->get_id()] = $result;
|
|
|
199 |
continue;
|
|
|
200 |
}
|
|
|
201 |
|
|
|
202 |
$results[$timesplitting->get_id()] = $this->process_time_splitting($timesplitting, $analysable, $target);
|
|
|
203 |
}
|
|
|
204 |
|
|
|
205 |
return $results;
|
|
|
206 |
}
|
|
|
207 |
|
|
|
208 |
/**
|
|
|
209 |
* Processes the analysable samples using the provided time splitting method.
|
|
|
210 |
*
|
|
|
211 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
212 |
* @param \core_analytics\analysable $analysable
|
|
|
213 |
* @param \core_analytics\local\target\base $target
|
|
|
214 |
* @return \stdClass Results object.
|
|
|
215 |
*/
|
|
|
216 |
protected function process_time_splitting(\core_analytics\local\time_splitting\base $timesplitting,
|
|
|
217 |
\core_analytics\analysable $analysable, \core_analytics\local\target\base $target): \stdClass {
|
|
|
218 |
|
|
|
219 |
$options = $this->analyser->get_options();
|
|
|
220 |
|
|
|
221 |
$result = new \stdClass();
|
|
|
222 |
|
|
|
223 |
$timesplitting->set_modelid($this->analyser->get_modelid());
|
|
|
224 |
if (!$timesplitting->is_valid_analysable($analysable)) {
|
|
|
225 |
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
|
|
|
226 |
$result->message = get_string('invalidanalysablefortimesplitting', 'analytics',
|
|
|
227 |
$timesplitting->get_name());
|
|
|
228 |
return $result;
|
|
|
229 |
}
|
|
|
230 |
$timesplitting->set_analysable($analysable);
|
|
|
231 |
|
|
|
232 |
if (CLI_SCRIPT && !PHPUNIT_TEST) {
|
|
|
233 |
mtrace('Analysing id "' . $analysable->get_id() . '" with "' . $timesplitting->get_name() .
|
|
|
234 |
'" time splitting method...');
|
|
|
235 |
}
|
|
|
236 |
|
|
|
237 |
// What is a sample is defined by the analyser, it can be an enrolment, a course, a user, a question
|
|
|
238 |
// attempt... it is on what we will base indicators calculations.
|
|
|
239 |
list($sampleids, $samplesdata) = $this->analyser->get_all_samples($analysable);
|
|
|
240 |
|
|
|
241 |
if (count($sampleids) === 0) {
|
|
|
242 |
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
|
|
|
243 |
$result->message = get_string('nodata', 'analytics');
|
|
|
244 |
return $result;
|
|
|
245 |
}
|
|
|
246 |
|
|
|
247 |
if ($this->includetarget) {
|
|
|
248 |
// All ranges are used when we are calculating data for training.
|
|
|
249 |
$ranges = $timesplitting->get_training_ranges();
|
|
|
250 |
} else {
|
|
|
251 |
// The latest range that has not yet been used for prediction (it depends on the time range where we are right now).
|
|
|
252 |
$ranges = $timesplitting->get_most_recent_prediction_range();
|
|
|
253 |
}
|
|
|
254 |
|
|
|
255 |
// There is no need to keep track of the evaluated samples and ranges as we always evaluate the whole dataset.
|
|
|
256 |
if ($options['evaluation'] === false) {
|
|
|
257 |
|
|
|
258 |
if (empty($ranges)) {
|
|
|
259 |
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
|
|
|
260 |
$result->message = get_string('noranges', 'analytics');
|
|
|
261 |
return $result;
|
|
|
262 |
}
|
|
|
263 |
|
|
|
264 |
// We skip all samples that are already part of a training dataset, even if they have not been used for prediction.
|
|
|
265 |
if (!$target::based_on_assumptions()) {
|
|
|
266 |
// Targets based on assumptions can not be trained.
|
|
|
267 |
$this->filter_out_train_samples($sampleids, $timesplitting);
|
|
|
268 |
}
|
|
|
269 |
|
|
|
270 |
if (count($sampleids) === 0) {
|
|
|
271 |
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
|
|
|
272 |
$result->message = get_string('nonewdata', 'analytics');
|
|
|
273 |
return $result;
|
|
|
274 |
}
|
|
|
275 |
|
|
|
276 |
// Only when processing data for predictions.
|
|
|
277 |
if (!$this->includetarget) {
|
|
|
278 |
// We also filter out samples and ranges that have already been used for predictions.
|
|
|
279 |
$predictsamplesrecord = $this->filter_out_prediction_samples_and_ranges($sampleids, $ranges, $timesplitting);
|
|
|
280 |
}
|
|
|
281 |
|
|
|
282 |
if (count($sampleids) === 0) {
|
|
|
283 |
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
|
|
|
284 |
$result->message = get_string('nonewdata', 'analytics');
|
|
|
285 |
return $result;
|
|
|
286 |
}
|
|
|
287 |
|
|
|
288 |
if (count($ranges) === 0) {
|
|
|
289 |
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
|
|
|
290 |
$result->message = get_string('nonewranges', 'analytics');
|
|
|
291 |
return $result;
|
|
|
292 |
}
|
|
|
293 |
}
|
|
|
294 |
|
|
|
295 |
// Flag the model + analysable + timesplitting as being analysed (prevent concurrent executions).
|
|
|
296 |
if (!$this->init_analysable_analysis($timesplitting->get_id(), $analysable->get_id())) {
|
|
|
297 |
// If this model + analysable + timesplitting combination is being analysed we skip this process.
|
|
|
298 |
$result->status = \core_analytics\model::NO_DATASET;
|
|
|
299 |
$result->message = get_string('analysisinprogress', 'analytics');
|
|
|
300 |
return $result;
|
|
|
301 |
}
|
|
|
302 |
|
|
|
303 |
// Remove samples the target consider invalid.
|
|
|
304 |
try {
|
|
|
305 |
$target->add_sample_data($samplesdata);
|
|
|
306 |
$target->filter_out_invalid_samples($sampleids, $analysable, $this->includetarget);
|
|
|
307 |
} catch (\Throwable $e) {
|
|
|
308 |
$this->finish_analysable_analysis();
|
|
|
309 |
throw $e;
|
|
|
310 |
}
|
|
|
311 |
|
|
|
312 |
if (!$sampleids) {
|
|
|
313 |
$result->status = \core_analytics\model::NO_DATASET;
|
|
|
314 |
$result->message = get_string('novalidsamples', 'analytics');
|
|
|
315 |
$this->finish_analysable_analysis();
|
|
|
316 |
return $result;
|
|
|
317 |
}
|
|
|
318 |
|
|
|
319 |
try {
|
|
|
320 |
// Instantiate empty indicators to ensure that no garbage is dragged from previous analyses.
|
|
|
321 |
$indicators = $this->analyser->instantiate_indicators();
|
|
|
322 |
foreach ($indicators as $key => $indicator) {
|
|
|
323 |
// The analyser attaches the main entities the sample depends on and are provided to the
|
|
|
324 |
// indicator to calculate the sample.
|
|
|
325 |
$indicators[$key]->add_sample_data($samplesdata);
|
|
|
326 |
}
|
|
|
327 |
|
|
|
328 |
// Here we start the memory intensive process that will last until $data var is
|
|
|
329 |
// unset (until the method is finished basically).
|
|
|
330 |
$data = $this->calculate($timesplitting, $sampleids, $ranges, $target);
|
|
|
331 |
} catch (\Throwable $e) {
|
|
|
332 |
$this->finish_analysable_analysis();
|
|
|
333 |
throw $e;
|
|
|
334 |
}
|
|
|
335 |
|
|
|
336 |
if (!$data) {
|
|
|
337 |
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
|
|
|
338 |
$result->message = get_string('novaliddata', 'analytics');
|
|
|
339 |
$this->finish_analysable_analysis();
|
|
|
340 |
return $result;
|
|
|
341 |
}
|
|
|
342 |
|
|
|
343 |
try {
|
|
|
344 |
// No need to keep track of analysed stuff when evaluating.
|
|
|
345 |
if ($options['evaluation'] === false) {
|
|
|
346 |
// Save the samples that have been already analysed so they are not analysed again in future.
|
|
|
347 |
|
|
|
348 |
if ($this->includetarget) {
|
|
|
349 |
$this->save_train_samples($sampleids, $timesplitting);
|
|
|
350 |
} else {
|
|
|
351 |
// The variable $predictsamplesrecord will always be set as filter_out_prediction_samples_and_ranges
|
|
|
352 |
// will always be called before it (no evaluation mode and no includetarget).
|
|
|
353 |
$this->save_prediction_samples($sampleids, $ranges, $timesplitting, $predictsamplesrecord);
|
|
|
354 |
}
|
|
|
355 |
}
|
|
|
356 |
|
|
|
357 |
// We need to pass all the analysis data.
|
|
|
358 |
$formattedresult = $this->result->format_result($data, $target, $timesplitting, $analysable);
|
|
|
359 |
|
|
|
360 |
} catch (\Throwable $e) {
|
|
|
361 |
$this->finish_analysable_analysis();
|
|
|
362 |
throw $e;
|
|
|
363 |
}
|
|
|
364 |
|
|
|
365 |
if (!$formattedresult) {
|
|
|
366 |
$this->finish_analysable_analysis();
|
|
|
367 |
throw new \moodle_exception('errorcannotwritedataset', 'analytics');
|
|
|
368 |
}
|
|
|
369 |
|
|
|
370 |
$result->status = \core_analytics\model::OK;
|
|
|
371 |
$result->message = get_string('successfullyanalysed', 'analytics');
|
|
|
372 |
$result->result = $formattedresult;
|
|
|
373 |
|
|
|
374 |
// Flag the model + analysable + timesplitting as analysed.
|
|
|
375 |
$this->finish_analysable_analysis();
|
|
|
376 |
|
|
|
377 |
return $result;
|
|
|
378 |
}
|
|
|
379 |
|
|
|
380 |
/**
|
|
|
381 |
* Calculates indicators and targets.
|
|
|
382 |
*
|
|
|
383 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
384 |
* @param array $sampleids
|
|
|
385 |
* @param array $ranges
|
|
|
386 |
* @param \core_analytics\local\target\base $target
|
|
|
387 |
* @return array|null
|
|
|
388 |
*/
|
|
|
389 |
public function calculate(\core_analytics\local\time_splitting\base $timesplitting, array &$sampleids,
|
|
|
390 |
array $ranges, \core_analytics\local\target\base $target): ?array {
|
|
|
391 |
|
|
|
392 |
$calculatedtarget = null;
|
|
|
393 |
if ($this->includetarget) {
|
|
|
394 |
// We first calculate the target because analysable data may still be invalid or none
|
|
|
395 |
// of the analysable samples may be valid.
|
|
|
396 |
$calculatedtarget = $target->calculate($sampleids, $timesplitting->get_analysable());
|
|
|
397 |
|
|
|
398 |
// We remove samples we can not calculate their target.
|
|
|
399 |
$sampleids = array_filter($sampleids, function($sampleid) use ($calculatedtarget) {
|
|
|
400 |
if (is_null($calculatedtarget[$sampleid])) {
|
|
|
401 |
return false;
|
|
|
402 |
}
|
|
|
403 |
return true;
|
|
|
404 |
});
|
|
|
405 |
}
|
|
|
406 |
|
|
|
407 |
// No need to continue calculating if the target couldn't be calculated for any sample.
|
|
|
408 |
if (empty($sampleids)) {
|
|
|
409 |
return null;
|
|
|
410 |
}
|
|
|
411 |
|
|
|
412 |
$dataset = $this->calculate_indicators($timesplitting, $sampleids, $ranges);
|
|
|
413 |
|
|
|
414 |
if (empty($dataset)) {
|
|
|
415 |
return null;
|
|
|
416 |
}
|
|
|
417 |
|
|
|
418 |
// Now that we have the indicators in place we can add the time range indicators (and target if provided) to each of them.
|
|
|
419 |
$this->fill_dataset($timesplitting, $dataset, $calculatedtarget);
|
|
|
420 |
|
|
|
421 |
$this->add_context_metadata($timesplitting, $dataset, $target);
|
|
|
422 |
|
|
|
423 |
if (!PHPUNIT_TEST && CLI_SCRIPT) {
|
|
|
424 |
echo PHP_EOL;
|
|
|
425 |
}
|
|
|
426 |
|
|
|
427 |
return $dataset;
|
|
|
428 |
}
|
|
|
429 |
|
|
|
430 |
/**
|
|
|
431 |
* Calculates indicators.
|
|
|
432 |
*
|
|
|
433 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
434 |
* @param array $sampleids
|
|
|
435 |
* @param array $ranges
|
|
|
436 |
* @return array
|
|
|
437 |
*/
|
|
|
438 |
protected function calculate_indicators(\core_analytics\local\time_splitting\base $timesplitting, array $sampleids,
|
|
|
439 |
array $ranges): array {
|
|
|
440 |
global $DB;
|
|
|
441 |
|
|
|
442 |
$options = $this->analyser->get_options();
|
|
|
443 |
|
|
|
444 |
$dataset = array();
|
|
|
445 |
|
|
|
446 |
// Faster to run 1 db query per range.
|
|
|
447 |
$existingcalculations = array();
|
|
|
448 |
if ($timesplitting->cache_indicator_calculations()) {
|
|
|
449 |
foreach ($ranges as $rangeindex => $range) {
|
|
|
450 |
// Load existing calculations.
|
|
|
451 |
$existingcalculations[$rangeindex] = \core_analytics\manager::get_indicator_calculations(
|
|
|
452 |
$timesplitting->get_analysable(), $range['start'], $range['end'], $this->analyser->get_samples_origin());
|
|
|
453 |
}
|
|
|
454 |
}
|
|
|
455 |
|
|
|
456 |
// Here we store samples which calculations are not all null.
|
|
|
457 |
$notnulls = array();
|
|
|
458 |
|
|
|
459 |
// Fill the dataset samples with indicators data.
|
|
|
460 |
$newcalculations = array();
|
|
|
461 |
foreach ($this->analyser->get_indicators() as $indicator) {
|
|
|
462 |
|
|
|
463 |
// Hook to allow indicators to store analysable-dependant data.
|
|
|
464 |
$indicator->fill_per_analysable_caches($timesplitting->get_analysable());
|
|
|
465 |
|
|
|
466 |
// Per-range calculations.
|
|
|
467 |
foreach ($ranges as $rangeindex => $range) {
|
|
|
468 |
|
|
|
469 |
// Indicator instances are per-range.
|
|
|
470 |
$rangeindicator = clone $indicator;
|
|
|
471 |
|
|
|
472 |
$prevcalculations = array();
|
|
|
473 |
if (!empty($existingcalculations[$rangeindex][$rangeindicator->get_id()])) {
|
|
|
474 |
$prevcalculations = $existingcalculations[$rangeindex][$rangeindicator->get_id()];
|
|
|
475 |
}
|
|
|
476 |
|
|
|
477 |
// Calculate the indicator for each sample in this time range.
|
|
|
478 |
list($samplesfeatures, $newindicatorcalculations, $indicatornotnulls) = $rangeindicator->calculate($sampleids,
|
|
|
479 |
$this->analyser->get_samples_origin(), $range['start'], $range['end'], $prevcalculations);
|
|
|
480 |
|
|
|
481 |
// Associate the extra data generated by the indicator to this range index.
|
|
|
482 |
$rangeindicator->save_calculation_info($timesplitting, $rangeindex);
|
|
|
483 |
|
|
|
484 |
// Free memory ASAP.
|
|
|
485 |
unset($rangeindicator);
|
|
|
486 |
gc_collect_cycles();
|
|
|
487 |
gc_mem_caches();
|
|
|
488 |
|
|
|
489 |
// Copy the features data to the dataset.
|
|
|
490 |
foreach ($samplesfeatures as $analysersampleid => $features) {
|
|
|
491 |
|
|
|
492 |
$uniquesampleid = $timesplitting->append_rangeindex($analysersampleid, $rangeindex);
|
|
|
493 |
|
|
|
494 |
if (!isset($notnulls[$uniquesampleid]) && !empty($indicatornotnulls[$analysersampleid])) {
|
|
|
495 |
$notnulls[$uniquesampleid] = $uniquesampleid;
|
|
|
496 |
}
|
|
|
497 |
|
|
|
498 |
// Init the sample if it is still empty.
|
|
|
499 |
if (!isset($dataset[$uniquesampleid])) {
|
|
|
500 |
$dataset[$uniquesampleid] = array();
|
|
|
501 |
}
|
|
|
502 |
|
|
|
503 |
// Append the features indicator features at the end of the sample.
|
|
|
504 |
$dataset[$uniquesampleid] = array_merge($dataset[$uniquesampleid], $features);
|
|
|
505 |
}
|
|
|
506 |
|
|
|
507 |
if (!$options['evaluation'] && $timesplitting->cache_indicator_calculations()) {
|
|
|
508 |
$timecreated = time();
|
|
|
509 |
foreach ($newindicatorcalculations as $sampleid => $calculatedvalue) {
|
|
|
510 |
// Prepare the new calculations to be stored into DB.
|
|
|
511 |
|
|
|
512 |
$indcalc = new \stdClass();
|
|
|
513 |
$indcalc->contextid = $timesplitting->get_analysable()->get_context()->id;
|
|
|
514 |
$indcalc->starttime = $range['start'];
|
|
|
515 |
$indcalc->endtime = $range['end'];
|
|
|
516 |
$indcalc->sampleid = $sampleid;
|
|
|
517 |
$indcalc->sampleorigin = $this->analyser->get_samples_origin();
|
|
|
518 |
$indcalc->indicator = $indicator->get_id();
|
|
|
519 |
$indcalc->value = $calculatedvalue;
|
|
|
520 |
$indcalc->timecreated = $timecreated;
|
|
|
521 |
$newcalculations[] = $indcalc;
|
|
|
522 |
}
|
|
|
523 |
}
|
|
|
524 |
}
|
|
|
525 |
|
|
|
526 |
if (!$options['evaluation'] && $timesplitting->cache_indicator_calculations()) {
|
|
|
527 |
$batchsize = self::get_insert_batch_size();
|
|
|
528 |
if (count($newcalculations) > $batchsize) {
|
|
|
529 |
// We don't want newcalculations array to grow too much as we already keep the
|
|
|
530 |
// system memory busy storing $dataset contents.
|
|
|
531 |
|
|
|
532 |
// Insert from the beginning.
|
|
|
533 |
$remaining = array_splice($newcalculations, $batchsize);
|
|
|
534 |
|
|
|
535 |
// Sorry mssql and oracle, this will be slow.
|
|
|
536 |
$DB->insert_records('analytics_indicator_calc', $newcalculations);
|
|
|
537 |
$newcalculations = $remaining;
|
|
|
538 |
}
|
|
|
539 |
}
|
|
|
540 |
}
|
|
|
541 |
|
|
|
542 |
if (!$options['evaluation'] && $timesplitting->cache_indicator_calculations() && $newcalculations) {
|
|
|
543 |
// Insert the remaining records.
|
|
|
544 |
$DB->insert_records('analytics_indicator_calc', $newcalculations);
|
|
|
545 |
}
|
|
|
546 |
|
|
|
547 |
// Delete rows where all calculations are null.
|
|
|
548 |
// We still store the indicator calculation and we still store the sample id as
|
|
|
549 |
// processed so we don't have to process this sample again, but we exclude it
|
|
|
550 |
// from the dataset because it is not useful.
|
|
|
551 |
$nulls = array_diff_key($dataset, $notnulls);
|
|
|
552 |
foreach ($nulls as $uniqueid => $ignoredvalues) {
|
|
|
553 |
unset($dataset[$uniqueid]);
|
|
|
554 |
}
|
|
|
555 |
|
|
|
556 |
return $dataset;
|
|
|
557 |
}
|
|
|
558 |
|
|
|
559 |
/**
|
|
|
560 |
* Adds time range indicators and the target to each sample.
|
|
|
561 |
*
|
|
|
562 |
* This will identify the sample as belonging to a specific range.
|
|
|
563 |
*
|
|
|
564 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
565 |
* @param array $dataset
|
|
|
566 |
* @param array|null $calculatedtarget
|
|
|
567 |
* @return null
|
|
|
568 |
*/
|
|
|
569 |
protected function fill_dataset(\core_analytics\local\time_splitting\base $timesplitting,
|
|
|
570 |
array &$dataset, ?array $calculatedtarget = null) {
|
|
|
571 |
|
|
|
572 |
$nranges = count($timesplitting->get_distinct_ranges());
|
|
|
573 |
|
|
|
574 |
foreach ($dataset as $uniquesampleid => $unmodified) {
|
|
|
575 |
|
|
|
576 |
list($analysersampleid, $rangeindex) = $timesplitting->infer_sample_info($uniquesampleid);
|
|
|
577 |
|
|
|
578 |
// No need to add range features if this time splitting method only defines one time range.
|
|
|
579 |
if ($nranges > 1) {
|
|
|
580 |
|
|
|
581 |
// 1 column for each range.
|
|
|
582 |
$timeindicators = array_fill(0, $nranges, 0);
|
|
|
583 |
|
|
|
584 |
$timeindicators[$rangeindex] = 1;
|
|
|
585 |
|
|
|
586 |
$dataset[$uniquesampleid] = array_merge($timeindicators, $dataset[$uniquesampleid]);
|
|
|
587 |
}
|
|
|
588 |
|
|
|
589 |
if ($calculatedtarget) {
|
|
|
590 |
// Add this sampleid's calculated target and the end.
|
|
|
591 |
$dataset[$uniquesampleid][] = $calculatedtarget[$analysersampleid];
|
|
|
592 |
|
|
|
593 |
} else {
|
|
|
594 |
// Add this sampleid, it will be used to identify the prediction that comes back from
|
|
|
595 |
// the predictions processor.
|
|
|
596 |
array_unshift($dataset[$uniquesampleid], $uniquesampleid);
|
|
|
597 |
}
|
|
|
598 |
}
|
|
|
599 |
}
|
|
|
600 |
|
|
|
601 |
/**
|
|
|
602 |
* Updates the analysable analysis time.
|
|
|
603 |
*
|
|
|
604 |
* @param array $processedanalysables
|
|
|
605 |
* @param int $analysableid
|
|
|
606 |
* @return null
|
|
|
607 |
*/
|
|
|
608 |
protected function update_analysable_analysed_time(array $processedanalysables, int $analysableid) {
|
|
|
609 |
global $DB;
|
|
|
610 |
|
|
|
611 |
$now = time();
|
|
|
612 |
|
|
|
613 |
if (!empty($processedanalysables[$analysableid])) {
|
|
|
614 |
$obj = $processedanalysables[$analysableid];
|
|
|
615 |
|
|
|
616 |
$obj->id = $obj->primarykey;
|
|
|
617 |
unset($obj->primarykey);
|
|
|
618 |
|
|
|
619 |
$obj->timeanalysed = $now;
|
|
|
620 |
|
|
|
621 |
$DB->update_record('analytics_used_analysables', $obj);
|
|
|
622 |
|
|
|
623 |
} else {
|
|
|
624 |
|
|
|
625 |
$obj = new \stdClass();
|
|
|
626 |
$obj->modelid = $this->analyser->get_modelid();
|
|
|
627 |
$obj->action = ($this->includetarget) ? 'training' : 'prediction';
|
|
|
628 |
$obj->analysableid = $analysableid;
|
|
|
629 |
$obj->firstanalysis = $now;
|
|
|
630 |
$obj->timeanalysed = $now;
|
|
|
631 |
|
|
|
632 |
$obj->primarykey = $DB->insert_record('analytics_used_analysables', $obj);
|
|
|
633 |
|
|
|
634 |
// Update the cache just in case it is used in the same request.
|
|
|
635 |
$key = $this->analyser->get_modelid() . '_' . $analysableid;
|
|
|
636 |
$cache = \cache::make('core', 'modelfirstanalyses');
|
|
|
637 |
$cache->set($key, $now);
|
|
|
638 |
}
|
|
|
639 |
}
|
|
|
640 |
|
|
|
641 |
/**
|
|
|
642 |
* Fills a cache containing the first time each analysable in the provided model was analysed.
|
|
|
643 |
*
|
|
|
644 |
* @param int $modelid
|
|
|
645 |
* @param int|null $analysableid
|
|
|
646 |
* @return null
|
|
|
647 |
*/
|
|
|
648 |
public static function fill_firstanalyses_cache(int $modelid, ?int $analysableid = null) {
|
|
|
649 |
global $DB;
|
|
|
650 |
|
|
|
651 |
// Using composed keys instead of cache $identifiers because of MDL-65358.
|
|
|
652 |
$primarykey = $DB->sql_concat($modelid, "'_'", 'analysableid');
|
|
|
653 |
$sql = "SELECT $primarykey AS id, MIN(firstanalysis) AS firstanalysis
|
|
|
654 |
FROM {analytics_used_analysables} aua
|
|
|
655 |
WHERE modelid = :modelid";
|
|
|
656 |
$params = ['modelid' => $modelid];
|
|
|
657 |
|
|
|
658 |
if ($analysableid) {
|
|
|
659 |
$sql .= " AND analysableid = :analysableid";
|
|
|
660 |
$params['analysableid'] = $analysableid;
|
|
|
661 |
}
|
|
|
662 |
|
|
|
663 |
$sql .= " GROUP BY modelid, analysableid ORDER BY analysableid";
|
|
|
664 |
|
|
|
665 |
$firstanalyses = $DB->get_records_sql($sql, $params);
|
|
|
666 |
if ($firstanalyses) {
|
|
|
667 |
$cache = \cache::make('core', 'modelfirstanalyses');
|
|
|
668 |
|
|
|
669 |
$firstanalyses = array_map(function($record) {
|
|
|
670 |
return $record->firstanalysis;
|
|
|
671 |
}, $firstanalyses);
|
|
|
672 |
|
|
|
673 |
$cache->set_many($firstanalyses);
|
|
|
674 |
}
|
|
|
675 |
|
|
|
676 |
return $firstanalyses;
|
|
|
677 |
}
|
|
|
678 |
|
|
|
679 |
/**
|
|
|
680 |
* Adds dataset context info.
|
|
|
681 |
*
|
|
|
682 |
* The final dataset document will look like this:
|
|
|
683 |
* ----------------------------------------------------
|
|
|
684 |
* metadata1,metadata2,metadata3,.....
|
|
|
685 |
* value1, value2, value3,.....
|
|
|
686 |
*
|
|
|
687 |
* header1,header2,header3,header4,.....
|
|
|
688 |
* stud1value1,stud1value2,stud1value3,stud1value4,.....
|
|
|
689 |
* stud2value1,stud2value2,stud2value3,stud2value4,.....
|
|
|
690 |
* .....
|
|
|
691 |
* ----------------------------------------------------
|
|
|
692 |
*
|
|
|
693 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
694 |
* @param array $dataset
|
|
|
695 |
* @param \core_analytics\local\target\base $target
|
|
|
696 |
* @return null
|
|
|
697 |
*/
|
|
|
698 |
protected function add_context_metadata(\core_analytics\local\time_splitting\base $timesplitting, array &$dataset,
|
|
|
699 |
\core_analytics\local\target\base $target) {
|
|
|
700 |
$headers = $this->get_headers($timesplitting, $target);
|
|
|
701 |
|
|
|
702 |
// This will also reset samples' dataset keys.
|
|
|
703 |
array_unshift($dataset, $headers);
|
|
|
704 |
}
|
|
|
705 |
|
|
|
706 |
/**
|
|
|
707 |
* Returns the headers for the csv file based on the indicators and the target.
|
|
|
708 |
*
|
|
|
709 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
710 |
* @param \core_analytics\local\target\base $target
|
|
|
711 |
* @return string[]
|
|
|
712 |
*/
|
|
|
713 |
public function get_headers(\core_analytics\local\time_splitting\base $timesplitting,
|
|
|
714 |
\core_analytics\local\target\base $target): array {
|
|
|
715 |
// 3rd column will contain the indicator ids.
|
|
|
716 |
$headers = array();
|
|
|
717 |
|
|
|
718 |
if (!$this->includetarget) {
|
|
|
719 |
// The first column is the sampleid.
|
|
|
720 |
$headers[] = 'sampleid';
|
|
|
721 |
}
|
|
|
722 |
|
|
|
723 |
// We always have 1 column for each time splitting method range, it does not depend on how
|
|
|
724 |
// many ranges we calculated.
|
|
|
725 |
$ranges = $timesplitting->get_distinct_ranges();
|
|
|
726 |
if (count($ranges) > 1) {
|
|
|
727 |
foreach ($ranges as $rangeindex) {
|
|
|
728 |
$headers[] = 'range/' . $rangeindex;
|
|
|
729 |
}
|
|
|
730 |
}
|
|
|
731 |
|
|
|
732 |
// Model indicators.
|
|
|
733 |
foreach ($this->analyser->get_indicators() as $indicator) {
|
|
|
734 |
$headers = array_merge($headers, $indicator::get_feature_headers());
|
|
|
735 |
}
|
|
|
736 |
|
|
|
737 |
// The target as well.
|
|
|
738 |
if ($this->includetarget) {
|
|
|
739 |
$headers[] = $target->get_id();
|
|
|
740 |
}
|
|
|
741 |
|
|
|
742 |
return $headers;
|
|
|
743 |
}
|
|
|
744 |
|
|
|
745 |
/**
|
|
|
746 |
* Filters out samples that have already been used for training.
|
|
|
747 |
*
|
|
|
748 |
* @param int[] $sampleids
|
|
|
749 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
750 |
* @return null
|
|
|
751 |
*/
|
|
|
752 |
protected function filter_out_train_samples(array &$sampleids, \core_analytics\local\time_splitting\base $timesplitting) {
|
|
|
753 |
global $DB;
|
|
|
754 |
|
|
|
755 |
$params = array('modelid' => $this->analyser->get_modelid(), 'analysableid' => $timesplitting->get_analysable()->get_id(),
|
|
|
756 |
'timesplitting' => $timesplitting->get_id());
|
|
|
757 |
|
|
|
758 |
$trainingsamples = $DB->get_records('analytics_train_samples', $params);
|
|
|
759 |
|
|
|
760 |
// Skip each file trained samples.
|
|
|
761 |
foreach ($trainingsamples as $trainingfile) {
|
|
|
762 |
|
|
|
763 |
$usedsamples = json_decode($trainingfile->sampleids, true);
|
|
|
764 |
|
|
|
765 |
if (!empty($usedsamples)) {
|
|
|
766 |
// Reset $sampleids to $sampleids minus this file's $usedsamples.
|
|
|
767 |
$sampleids = array_diff_key($sampleids, $usedsamples);
|
|
|
768 |
}
|
|
|
769 |
}
|
|
|
770 |
}
|
|
|
771 |
|
|
|
772 |
/**
|
|
|
773 |
* Filters out samples that have already been used for prediction.
|
|
|
774 |
*
|
|
|
775 |
* @param int[] $sampleids
|
|
|
776 |
* @param array $ranges
|
|
|
777 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
778 |
* @return \stdClass|null The analytics_predict_samples record or null
|
|
|
779 |
*/
|
|
|
780 |
protected function filter_out_prediction_samples_and_ranges(array &$sampleids, array &$ranges,
|
|
|
781 |
\core_analytics\local\time_splitting\base $timesplitting) {
|
|
|
782 |
|
|
|
783 |
if (count($ranges) > 1) {
|
|
|
784 |
throw new \coding_exception('$ranges argument should only contain one range');
|
|
|
785 |
}
|
|
|
786 |
|
|
|
787 |
$rangeindex = key($ranges);
|
|
|
788 |
$predictedrange = $this->get_predict_samples_record($timesplitting, $rangeindex);
|
|
|
789 |
|
|
|
790 |
if (!$predictedrange) {
|
|
|
791 |
// Nothing to filter out.
|
|
|
792 |
return null;
|
|
|
793 |
}
|
|
|
794 |
|
|
|
795 |
$predictedrange->sampleids = json_decode($predictedrange->sampleids, true);
|
|
|
796 |
$missingsamples = array_diff_key($sampleids, $predictedrange->sampleids);
|
|
|
797 |
if (count($missingsamples) === 0) {
|
|
|
798 |
// All samples already calculated.
|
|
|
799 |
unset($ranges[$rangeindex]);
|
|
|
800 |
return null;
|
|
|
801 |
}
|
|
|
802 |
|
|
|
803 |
// Replace the list of samples by the one excluding samples that already got predictions at this range.
|
|
|
804 |
$sampleids = $missingsamples;
|
|
|
805 |
|
|
|
806 |
return $predictedrange;
|
|
|
807 |
}
|
|
|
808 |
|
|
|
809 |
/**
|
|
|
810 |
* Returns a predict samples record.
|
|
|
811 |
*
|
|
|
812 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
813 |
* @param int $rangeindex
|
|
|
814 |
* @return \stdClass|false
|
|
|
815 |
*/
|
|
|
816 |
private function get_predict_samples_record(\core_analytics\local\time_splitting\base $timesplitting, int $rangeindex) {
|
|
|
817 |
global $DB;
|
|
|
818 |
|
|
|
819 |
$params = array('modelid' => $this->analyser->get_modelid(), 'analysableid' => $timesplitting->get_analysable()->get_id(),
|
|
|
820 |
'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex);
|
|
|
821 |
$predictedrange = $DB->get_record('analytics_predict_samples', $params);
|
|
|
822 |
|
|
|
823 |
return $predictedrange;
|
|
|
824 |
}
|
|
|
825 |
|
|
|
826 |
/**
|
|
|
827 |
* Saves samples that have just been used for training.
|
|
|
828 |
*
|
|
|
829 |
* @param int[] $sampleids
|
|
|
830 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
831 |
* @return null
|
|
|
832 |
*/
|
|
|
833 |
protected function save_train_samples(array $sampleids, \core_analytics\local\time_splitting\base $timesplitting) {
|
|
|
834 |
global $DB;
|
|
|
835 |
|
|
|
836 |
$trainingsamples = new \stdClass();
|
|
|
837 |
$trainingsamples->modelid = $this->analyser->get_modelid();
|
|
|
838 |
$trainingsamples->analysableid = $timesplitting->get_analysable()->get_id();
|
|
|
839 |
$trainingsamples->timesplitting = $timesplitting->get_id();
|
|
|
840 |
|
|
|
841 |
$trainingsamples->sampleids = json_encode($sampleids);
|
|
|
842 |
$trainingsamples->timecreated = time();
|
|
|
843 |
|
|
|
844 |
$DB->insert_record('analytics_train_samples', $trainingsamples);
|
|
|
845 |
}
|
|
|
846 |
|
|
|
847 |
/**
|
|
|
848 |
* Saves samples that have just been used for prediction.
|
|
|
849 |
*
|
|
|
850 |
* @param int[] $sampleids
|
|
|
851 |
* @param array $ranges
|
|
|
852 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
853 |
* @param \stdClass|null $predictsamplesrecord The existing record or null if there is no record yet.
|
|
|
854 |
* @return null
|
|
|
855 |
*/
|
|
|
856 |
protected function save_prediction_samples(array $sampleids, array $ranges,
|
|
|
857 |
\core_analytics\local\time_splitting\base $timesplitting, ?\stdClass $predictsamplesrecord = null) {
|
|
|
858 |
global $DB;
|
|
|
859 |
|
|
|
860 |
if (count($ranges) > 1) {
|
|
|
861 |
throw new \coding_exception('$ranges argument should only contain one range');
|
|
|
862 |
}
|
|
|
863 |
|
|
|
864 |
$rangeindex = key($ranges);
|
|
|
865 |
|
|
|
866 |
if ($predictsamplesrecord) {
|
|
|
867 |
// Append the new samples used for prediction.
|
|
|
868 |
$predictsamplesrecord->sampleids = json_encode($predictsamplesrecord->sampleids + $sampleids);
|
|
|
869 |
$predictsamplesrecord->timemodified = time();
|
|
|
870 |
$DB->update_record('analytics_predict_samples', $predictsamplesrecord);
|
|
|
871 |
} else {
|
|
|
872 |
$predictsamplesrecord = (object)[
|
|
|
873 |
'modelid' => $this->analyser->get_modelid(),
|
|
|
874 |
'analysableid' => $timesplitting->get_analysable()->get_id(),
|
|
|
875 |
'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex
|
|
|
876 |
];
|
|
|
877 |
$predictsamplesrecord->sampleids = json_encode($sampleids);
|
|
|
878 |
$predictsamplesrecord->timecreated = time();
|
|
|
879 |
$predictsamplesrecord->timemodified = $predictsamplesrecord->timecreated;
|
|
|
880 |
$DB->insert_record('analytics_predict_samples', $predictsamplesrecord);
|
|
|
881 |
}
|
|
|
882 |
}
|
|
|
883 |
|
|
|
884 |
/**
|
|
|
885 |
* Flags the analysable element as in-analysis and stores a lock for it.
|
|
|
886 |
*
|
|
|
887 |
* @param string $timesplittingid
|
|
|
888 |
* @param int $analysableid
|
|
|
889 |
* @return bool Success or not
|
|
|
890 |
*/
|
|
|
891 |
private function init_analysable_analysis(string $timesplittingid, int $analysableid) {
|
|
|
892 |
|
|
|
893 |
// Do not include $this->includetarget as we don't want the same analysable to be analysed for training
|
|
|
894 |
// and prediction at the same time.
|
|
|
895 |
$lockkey = 'modelid:' . $this->analyser->get_modelid() . '-analysableid:' . $analysableid .
|
|
|
896 |
'-timesplitting:' . self::clean_time_splitting_id($timesplittingid);
|
|
|
897 |
|
|
|
898 |
// Large timeout as processes may be quite long.
|
|
|
899 |
$lockfactory = \core\lock\lock_config::get_lock_factory('core_analytics');
|
|
|
900 |
|
|
|
901 |
// If it is not ready in 10 secs skip this model + analysable + timesplittingmethod combination
|
|
|
902 |
// it will attempt it again during next cron run.
|
|
|
903 |
if (!$this->lock = $lockfactory->get_lock($lockkey, 10)) {
|
|
|
904 |
return false;
|
|
|
905 |
}
|
|
|
906 |
return true;
|
|
|
907 |
}
|
|
|
908 |
|
|
|
909 |
|
|
|
910 |
/**
|
|
|
911 |
* Remove all possibly problematic chars from the time splitting method id (id = its full class name).
|
|
|
912 |
*
|
|
|
913 |
* @param string $timesplittingid
|
|
|
914 |
* @return string
|
|
|
915 |
*/
|
|
|
916 |
public static function clean_time_splitting_id($timesplittingid) {
|
|
|
917 |
$timesplittingid = str_replace('\\', '-', $timesplittingid);
|
|
|
918 |
return clean_param($timesplittingid, PARAM_ALPHANUMEXT);
|
|
|
919 |
}
|
|
|
920 |
|
|
|
921 |
/**
|
|
|
922 |
* Mark the currently analysed analysable+timesplitting as analysed.
|
|
|
923 |
*
|
|
|
924 |
* @return null
|
|
|
925 |
*/
|
|
|
926 |
private function finish_analysable_analysis() {
|
|
|
927 |
$this->lock->release();
|
|
|
928 |
}
|
|
|
929 |
|
|
|
930 |
/**
|
|
|
931 |
* Returns the batch size used for insert_records.
|
|
|
932 |
*
|
|
|
933 |
* This method tries to find the best batch size without getting
|
|
|
934 |
* into dml internals. Maximum 1000 records to save memory.
|
|
|
935 |
*
|
|
|
936 |
* @return int
|
|
|
937 |
*/
|
|
|
938 |
private static function get_insert_batch_size(): int {
|
|
|
939 |
global $DB;
|
|
|
940 |
|
|
|
941 |
$dbconfig = $DB->export_dbconfig();
|
|
|
942 |
|
|
|
943 |
// 500 is pgsql default so using 1000 is fine, no other db driver uses a hardcoded value.
|
|
|
944 |
if (empty($dbconfig) || empty($dbconfig->dboptions) || empty($dbconfig->dboptions['bulkinsertsize'])) {
|
|
|
945 |
return 1000;
|
|
|
946 |
}
|
|
|
947 |
|
|
|
948 |
$bulkinsert = $dbconfig->dboptions['bulkinsertsize'];
|
|
|
949 |
if ($bulkinsert < 1000) {
|
|
|
950 |
return $bulkinsert;
|
|
|
951 |
}
|
|
|
952 |
|
|
|
953 |
while ($bulkinsert > 1000) {
|
|
|
954 |
$bulkinsert = round($bulkinsert / 2, 0);
|
|
|
955 |
}
|
|
|
956 |
|
|
|
957 |
return (int)$bulkinsert;
|
|
|
958 |
}
|
|
|
959 |
}
|