Proyectos de Subversion Moodle

Rev

Rev 11 | | Comparar con el anterior | Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
 
17
namespace core_analytics;
18
 
19
defined('MOODLE_INTERNAL') || die();
20
 
21
global $CFG;
22
require_once(__DIR__ . '/fixtures/test_indicator_max.php');
23
require_once(__DIR__ . '/fixtures/test_indicator_min.php');
24
require_once(__DIR__ . '/fixtures/test_indicator_null.php');
25
require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
26
require_once(__DIR__ . '/fixtures/test_indicator_random.php');
27
require_once(__DIR__ . '/fixtures/test_indicator_multiclass.php');
28
require_once(__DIR__ . '/fixtures/test_target_shortname.php');
29
require_once(__DIR__ . '/fixtures/test_target_shortname_multiclass.php');
30
require_once(__DIR__ . '/fixtures/test_static_target_shortname.php');
31
 
32
require_once(__DIR__ . '/../../course/lib.php');
33
 
1441 ariadna 34
use core_analytics\tests\mlbackend_helper_trait;
35
 
1 efrain 36
/**
37
 * Unit tests for evaluation, training and prediction.
38
 *
39
 * NOTE: in order to execute this test using a separate server for the
40
 *       python ML backend you need to define these variables in your config.php file:
41
 *
42
 * define('TEST_MLBACKEND_PYTHON_HOST', '127.0.0.1');
43
 * define('TEST_MLBACKEND_PYTHON_PORT', 5000);
44
 * define('TEST_MLBACKEND_PYTHON_USERNAME', 'default');
45
 * define('TEST_MLBACKEND_PYTHON_PASSWORD', 'sshhhh');
46
 *
47
 * @package   core_analytics
48
 * @copyright 2017 David Monllaó {@link http://www.davidmonllao.com}
49
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
50
 */
1441 ariadna 51
final class prediction_test extends \advanced_testcase {
1 efrain 52
 
1441 ariadna 53
    use mlbackend_helper_trait;
54
 
1 efrain 55
    /**
56
     * Purge all the mlbackend outputs.
57
     *
58
     * This is done automatically for mlbackends using the web server dataroot but
59
     * other mlbackends may store files elsewhere and these files need to be removed.
60
     *
61
     * @return null
62
     */
63
    public function tearDown(): void {
64
        $this->setAdminUser();
65
 
66
        $models = \core_analytics\manager::get_all_models();
67
        foreach ($models as $model) {
68
            $model->delete();
69
        }
1441 ariadna 70
        parent::tearDown();
1 efrain 71
    }
72
 
73
    /**
74
     * test_static_prediction
75
     *
76
     * @return void
77
     */
11 efrain 78
    public function test_static_prediction(): void {
1 efrain 79
        global $DB;
80
 
81
        $this->resetAfterTest(true);
82
        $this->setAdminuser();
83
 
84
        $model = $this->add_perfect_model('test_static_target_shortname');
85
        $model->enable('\core\analytics\time_splitting\no_splitting');
86
        $this->assertEquals(1, $model->is_enabled());
87
        $this->assertEquals(1, $model->is_trained());
88
 
89
        // No training for static models.
90
        $results = $model->train();
91
        $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
92
        $this->assertEmpty($trainedsamples);
93
        $this->assertEmpty($DB->count_records('analytics_used_files',
94
            array('modelid' => $model->get_id(), 'action' => 'trained')));
95
 
96
        // Now we create 2 hidden courses (only hidden courses are getting predictions).
97
        $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
98
        $course1 = $this->getDataGenerator()->create_course($courseparams);
99
        $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
100
        $course2 = $this->getDataGenerator()->create_course($courseparams);
101
 
102
        $result = $model->predict();
103
 
104
        // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
105
        $correct = array($course1->id => 1, $course2->id => 0);
106
        foreach ($result->predictions as $uniquesampleid => $predictiondata) {
107
            list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
108
 
109
            // The range index is not important here, both ranges prediction will be the same.
110
            $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
111
        }
112
 
113
        // 1 range for each analysable.
114
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
115
        $this->assertCount(2, $predictedranges);
116
        // 2 predictions for each range.
117
        $this->assertEquals(2, $DB->count_records('analytics_predictions',
118
            array('modelid' => $model->get_id())));
119
 
120
        // No new generated records as there are no new courses available.
121
        $model->predict();
122
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
123
        $this->assertCount(2, $predictedranges);
124
        $this->assertEquals(2, $DB->count_records('analytics_predictions',
125
            array('modelid' => $model->get_id())));
126
    }
127
 
128
    /**
129
     * test_model_contexts
130
     */
11 efrain 131
    public function test_model_contexts(): void {
1 efrain 132
        global $DB;
133
 
134
        $this->resetAfterTest(true);
135
        $this->setAdminuser();
136
 
137
        $misc = $DB->get_record('course_categories', ['name' => get_string('defaultcategoryname')]);
138
        $miscctx = \context_coursecat::instance($misc->id);
139
 
140
        $category = $this->getDataGenerator()->create_category();
141
        $categoryctx = \context_coursecat::instance($category->id);
142
 
143
        // One course per category.
144
        $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0,
145
            'category' => $category->id);
146
        $course1 = $this->getDataGenerator()->create_course($courseparams);
147
        $course1ctx = \context_course::instance($course1->id);
148
        $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0,
149
            'category' => $misc->id);
150
        $course2 = $this->getDataGenerator()->create_course($courseparams);
151
 
152
        $model = $this->add_perfect_model('test_static_target_shortname');
153
 
154
        // Just 1 category.
155
        $model->update(true, false, '\core\analytics\time_splitting\no_splitting', false, [$categoryctx->id]);
156
        $this->assertCount(1, $model->predict()->predictions);
157
 
158
        // Now with 2 categories.
159
        $model->update(true, false, false, false, [$categoryctx->id, $miscctx->id]);
160
 
161
        // The courses in the new category are processed.
162
        $this->assertCount(1, $model->predict()->predictions);
163
 
164
        // Clear the predictions generated by the model and predict() again.
165
        $model->clear();
166
        $this->assertCount(2, $model->predict()->predictions);
167
 
168
        // Course context restriction.
169
        $model->update(true, false, '\core\analytics\time_splitting\no_splitting', false, [$course1ctx->id]);
170
 
171
        // Nothing new as the course was already analysed.
172
        $result = $model->predict();
173
        $this->assertTrue(empty($result->predictions));
174
 
175
        $model->clear();
176
        $this->assertCount(1, $model->predict()->predictions);
177
    }
178
 
179
    /**
180
     * test_ml_training_and_prediction
181
     *
182
     * @dataProvider provider_ml_training_and_prediction
183
     * @param string $timesplittingid
184
     * @param int $predictedrangeindex
185
     * @param int $nranges
186
     * @param string $predictionsprocessorclass
187
     * @param array $forcedconfig
188
     * @return void
189
     */
190
    public function test_ml_training_and_prediction($timesplittingid, $predictedrangeindex, $nranges, $predictionsprocessorclass,
11 efrain 191
            $forcedconfig): void {
1 efrain 192
        global $DB;
193
 
194
        $this->resetAfterTest(true);
195
 
196
        $this->set_forced_config($forcedconfig);
197
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
198
 
199
        $this->setAdminuser();
200
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
201
 
202
        // Generate training data.
203
        $ncourses = 10;
204
        $this->generate_courses($ncourses);
205
 
206
        $model = $this->add_perfect_model();
207
 
208
        $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
209
 
210
        // No samples trained yet.
211
        $this->assertEquals(0, $DB->count_records('analytics_train_samples', array('modelid' => $model->get_id())));
212
 
213
        $results = $model->train();
214
        $this->assertEquals(1, $model->is_enabled());
215
        $this->assertEquals(1, $model->is_trained());
216
 
217
        // 20 courses * the 3 model indicators * the number of time ranges of this time splitting method.
218
        $indicatorcalc = 20 * 3 * $nranges;
219
        $this->assertEquals($indicatorcalc, $DB->count_records('analytics_indicator_calc'));
220
 
221
        // 1 training file was created.
222
        $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
223
        $this->assertCount(1, $trainedsamples);
224
        $samples = json_decode(reset($trainedsamples)->sampleids, true);
225
        $this->assertCount($ncourses * 2, $samples);
226
        $this->assertEquals(1, $DB->count_records('analytics_used_files',
227
            array('modelid' => $model->get_id(), 'action' => 'trained')));
228
        // Check that analysable files for training are stored under labelled filearea.
229
        $fs = get_file_storage();
230
        $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
231
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
232
        $this->assertEmpty($fs->get_directory_files(\context_system::instance()->id, 'analytics',
233
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
234
 
235
        $params = [
236
            'startdate' => mktime(0, 0, 0, 10, 24, 2015),
237
            'enddate' => mktime(0, 0, 0, 2, 24, 2016),
238
        ];
239
        $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
240
        $course1 = $this->getDataGenerator()->create_course($courseparams);
241
        $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
242
        $course2 = $this->getDataGenerator()->create_course($courseparams);
243
 
244
        // They will not be skipped for prediction though.
245
        $result = $model->predict();
246
 
247
        // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
248
        $correct = array($course1->id => 1, $course2->id => 0);
249
        foreach ($result->predictions as $uniquesampleid => $predictiondata) {
250
            list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
251
 
252
            // The range index is not important here, both ranges prediction will be the same.
253
            $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
254
        }
255
 
256
        // 1 range will be predicted.
257
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
258
        $this->assertCount(1, $predictedranges);
259
        foreach ($predictedranges as $predictedrange) {
260
            $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
261
            $sampleids = json_decode($predictedrange->sampleids, true);
262
            $this->assertCount(2, $sampleids);
263
            $this->assertContainsEquals($course1->id, $sampleids);
264
            $this->assertContainsEquals($course2->id, $sampleids);
265
        }
266
        $this->assertEquals(1, $DB->count_records('analytics_used_files',
267
            array('modelid' => $model->get_id(), 'action' => 'predicted')));
268
        // 2 predictions.
269
        $this->assertEquals(2, $DB->count_records('analytics_predictions',
270
            array('modelid' => $model->get_id())));
271
 
272
        // Check that analysable files to get predictions are stored under unlabelled filearea.
273
        $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
274
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
275
        $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
276
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
277
 
278
        // No new generated files nor records as there are no new courses available.
279
        $model->predict();
280
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
281
        $this->assertCount(1, $predictedranges);
282
        foreach ($predictedranges as $predictedrange) {
283
            $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
284
        }
285
        $this->assertEquals(1, $DB->count_records('analytics_used_files',
286
            array('modelid' => $model->get_id(), 'action' => 'predicted')));
287
        $this->assertEquals(2, $DB->count_records('analytics_predictions',
288
            array('modelid' => $model->get_id())));
289
 
290
        // New samples that can be used for prediction.
291
        $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
292
        $course3 = $this->getDataGenerator()->create_course($courseparams);
293
        $courseparams = $params + array('shortname' => 'dddddd', 'fullname' => 'dddddd', 'visible' => 0);
294
        $course4 = $this->getDataGenerator()->create_course($courseparams);
295
 
296
        $result = $model->predict();
297
 
298
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
299
        $this->assertCount(1, $predictedranges);
300
        foreach ($predictedranges as $predictedrange) {
301
            $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
302
            $sampleids = json_decode($predictedrange->sampleids, true);
303
            $this->assertCount(4, $sampleids);
304
            $this->assertContainsEquals($course1->id, $sampleids);
305
            $this->assertContainsEquals($course2->id, $sampleids);
306
            $this->assertContainsEquals($course3->id, $sampleids);
307
            $this->assertContainsEquals($course4->id, $sampleids);
308
        }
309
        $this->assertEquals(2, $DB->count_records('analytics_used_files',
310
            array('modelid' => $model->get_id(), 'action' => 'predicted')));
311
        $this->assertEquals(4, $DB->count_records('analytics_predictions',
312
            array('modelid' => $model->get_id())));
313
        $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
314
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
315
        $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
316
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
317
 
318
        // New visible course (for training).
319
        $course5 = $this->getDataGenerator()->create_course(array('shortname' => 'aaa', 'fullname' => 'aa'));
320
        $course6 = $this->getDataGenerator()->create_course();
321
        $result = $model->train();
322
        $this->assertEquals(2, $DB->count_records('analytics_used_files',
323
            array('modelid' => $model->get_id(), 'action' => 'trained')));
324
        $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
325
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
326
        $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
327
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
328
 
329
        // Confirm that the files associated to the model are deleted on clear and on delete. The ML backend deletion
330
        // processes will be triggered by these actions and any exception there would result in a failed test.
331
        $model->clear();
332
        $this->assertEquals(0, $DB->count_records('analytics_used_files',
333
            array('modelid' => $model->get_id(), 'action' => 'trained')));
334
        $this->assertCount(0, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
335
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
336
        $this->assertCount(0, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
337
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
338
        $model->delete();
339
 
340
        set_config('enabled_stores', '', 'tool_log');
341
        get_log_manager(true);
342
    }
343
 
344
    /**
345
     * provider_ml_training_and_prediction
346
     *
347
     * @return array
348
     */
1441 ariadna 349
    public static function provider_ml_training_and_prediction(): array {
1 efrain 350
        $cases = array(
351
            'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 0, 1),
352
            'quarters' => array('\core\analytics\time_splitting\quarters', 3, 4)
353
        );
354
 
355
        // We need to test all system prediction processors.
1441 ariadna 356
        return static::add_prediction_processors($cases);
1 efrain 357
    }
358
 
359
    /**
360
     * test_ml_export_import
361
     *
362
     * @param string $predictionsprocessorclass The class name
363
     * @param array $forcedconfig
364
     * @dataProvider provider_ml_processors
365
     */
11 efrain 366
    public function test_ml_export_import($predictionsprocessorclass, $forcedconfig): void {
1 efrain 367
        $this->resetAfterTest(true);
368
 
369
        $this->set_forced_config($forcedconfig);
370
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
371
 
372
        $this->setAdminuser();
373
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
374
 
375
        // Generate training data.
376
        $ncourses = 10;
377
        $this->generate_courses($ncourses);
378
 
379
        $model = $this->add_perfect_model();
380
 
381
        $model->update(true, false, '\core\analytics\time_splitting\quarters', get_class($predictionsprocessor));
382
 
383
        $model->train();
384
        $this->assertTrue($model->trained_locally());
385
 
386
        $this->generate_courses(10, ['visible' => 0]);
387
 
388
        $originalresults = $model->predict();
389
 
390
        $zipfilename = 'model-zip-' . microtime() . '.zip';
391
        $zipfilepath = $model->export_model($zipfilename);
392
 
393
        $modelconfig = new \core_analytics\model_config();
394
        list($modelconfig, $mlbackend) = $modelconfig->extract_import_contents($zipfilepath);
395
        $this->assertNotFalse($mlbackend);
396
 
397
        $importmodel = \core_analytics\model::import_model($zipfilepath);
398
        $importmodel->enable();
399
 
400
        // Now predict using the imported model without prior training.
401
        $importedmodelresults = $importmodel->predict();
402
 
403
        foreach ($originalresults->predictions as $sampleid => $prediction) {
404
            $this->assertEquals($importedmodelresults->predictions[$sampleid]->prediction, $prediction->prediction);
405
        }
406
 
407
        $this->assertFalse($importmodel->trained_locally());
408
 
409
        $zipfilename = 'model-zip-' . microtime() . '.zip';
410
        $zipfilepath = $model->export_model($zipfilename, false);
411
 
412
        $modelconfig = new \core_analytics\model_config();
413
        list($modelconfig, $mlbackend) = $modelconfig->extract_import_contents($zipfilepath);
414
        $this->assertFalse($mlbackend);
415
 
416
        set_config('enabled_stores', '', 'tool_log');
417
        get_log_manager(true);
418
    }
419
 
420
    /**
421
     * provider_ml_processors
422
     *
423
     * @return array
424
     */
1441 ariadna 425
    public static function provider_ml_processors(): array {
1 efrain 426
        $cases = [
427
            'case' => [],
428
        ];
429
 
430
        // We need to test all system prediction processors.
1441 ariadna 431
        return static::add_prediction_processors($cases);
1 efrain 432
    }
433
    /**
434
     * Test the system classifiers returns.
435
     *
436
     * This test checks that all mlbackend plugins in the system are able to return proper status codes
437
     * even under weird situations.
438
     *
439
     * @dataProvider provider_ml_classifiers_return
440
     * @param int $success
441
     * @param int $nsamples
442
     * @param int $classes
443
     * @param string $predictionsprocessorclass
444
     * @param array $forcedconfig
445
     * @return void
446
     */
11 efrain 447
    public function test_ml_classifiers_return($success, $nsamples, $classes, $predictionsprocessorclass, $forcedconfig): void {
1 efrain 448
        $this->resetAfterTest();
449
 
450
        $this->set_forced_config($forcedconfig);
451
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
452
 
453
        if ($nsamples % count($classes) != 0) {
454
            throw new \coding_exception('The number of samples should be divisible by the number of classes');
455
        }
456
        $samplesperclass = $nsamples / count($classes);
457
 
458
        // Metadata (we pass 2 classes even if $classes only provides 1 class samples as we want to test
459
        // what the backend does in this case.
460
        $dataset = "nfeatures,targetclasses,targettype" . PHP_EOL;
461
        $dataset .= "3,\"[0,1]\",\"discrete\"" . PHP_EOL;
462
 
463
        // Headers.
464
        $dataset .= "feature1,feature2,feature3,target" . PHP_EOL;
465
        foreach ($classes as $class) {
466
            for ($i = 0; $i < $samplesperclass; $i++) {
467
                $dataset .= "1,0,1,$class" . PHP_EOL;
468
            }
469
        }
470
 
471
        $trainingfile = array(
472
            'contextid' => \context_system::instance()->id,
473
            'component' => 'analytics',
474
            'filearea' => 'labelled',
475
            'itemid' => 123,
476
            'filepath' => '/',
477
            'filename' => 'whocares.csv'
478
        );
479
        $fs = get_file_storage();
480
        $dataset = $fs->create_file_from_string($trainingfile, $dataset);
481
 
482
        // Training should work correctly if at least 1 sample of each class is included.
483
        $dir = make_request_directory();
484
        $modeluniqueid = 'whatever' . microtime();
485
        $result = $predictionsprocessor->train_classification($modeluniqueid, $dataset, $dir);
486
 
487
        switch ($success) {
488
            case 'yes':
489
                $this->assertEquals(\core_analytics\model::OK, $result->status);
490
                break;
491
            case 'no':
492
                $this->assertNotEquals(\core_analytics\model::OK, $result->status);
493
                break;
494
            case 'maybe':
495
            default:
496
                // We just check that an object is returned so we don't have an empty check,
497
                // what we really want to check is that an exception was not thrown.
498
                $this->assertInstanceOf(\stdClass::class, $result);
499
        }
500
 
501
        // Purge the directory used in this test (useful in case the mlbackend is storing files
502
        // somewhere out of the default moodledata/models dir.
503
        $predictionsprocessor->delete_output_dir($dir, $modeluniqueid);
504
    }
505
 
506
    /**
507
     * test_ml_classifiers_return provider
508
     *
509
     * We can not be very specific here as test_ml_classifiers_return only checks that
510
     * mlbackend plugins behave and expected and control properly backend errors even
511
     * under weird situations.
512
     *
513
     * @return array
514
     */
1441 ariadna 515
    public static function provider_ml_classifiers_return(): array {
1 efrain 516
        // Using verbose options as the first argument for readability.
517
        $cases = array(
518
            '1-samples' => array('maybe', 1, [0]),
519
            '2-samples-same-class' => array('maybe', 2, [0]),
520
            '2-samples-different-classes' => array('yes', 2, [0, 1]),
521
            '4-samples-different-classes' => array('yes', 4, [0, 1])
522
        );
523
 
524
        // We need to test all system prediction processors.
1441 ariadna 525
        return static::add_prediction_processors($cases);
1 efrain 526
    }
527
 
528
    /**
529
     * Tests correct multi-classification.
530
     *
531
     * @dataProvider provider_test_multi_classifier
532
     * @param string $timesplittingid
533
     * @param string $predictionsprocessorclass
534
     * @param array|null $forcedconfig
535
     * @throws coding_exception
536
     * @throws moodle_exception
537
     */
11 efrain 538
    public function test_ml_multi_classifier($timesplittingid, $predictionsprocessorclass, $forcedconfig): void {
1 efrain 539
        global $DB;
540
 
541
        $this->resetAfterTest(true);
542
        $this->setAdminuser();
543
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
544
 
545
        $this->set_forced_config($forcedconfig);
546
 
547
        $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
548
        if ($predictionsprocessor->is_ready() !== true) {
549
            $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
550
        }
551
        // Generate training courses.
552
        $ncourses = 5;
1441 ariadna 553
        $this->generate_courses(ncourses: $ncourses, ismulticlass: true);
1 efrain 554
        $model = $this->add_multiclass_model();
555
        $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
556
        $results = $model->train();
557
 
558
        $params = [
559
            'startdate' => mktime(0, 0, 0, 10, 24, 2015),
560
            'enddate' => mktime(0, 0, 0, 2, 24, 2016),
561
        ];
562
        $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
563
        $course1 = $this->getDataGenerator()->create_course($courseparams);
564
        $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
565
        $course2 = $this->getDataGenerator()->create_course($courseparams);
566
        $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
567
        $course3 = $this->getDataGenerator()->create_course($courseparams);
568
 
569
        // They will not be skipped for prediction though.
570
        $result = $model->predict();
571
        // The $course1 predictions should be 0 == 'a', $course2 should be 1 == 'b' and $course3 should be 2 == 'c'.
572
        $correct = array($course1->id => 0, $course2->id => 1, $course3->id => 2);
573
        foreach ($result->predictions as $uniquesampleid => $predictiondata) {
574
            list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
575
 
576
            // The range index is not important here, both ranges prediction will be the same.
577
            $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
578
        }
579
 
580
        set_config('enabled_stores', '', 'tool_log');
581
        get_log_manager(true);
582
    }
583
 
584
    /**
585
     * Provider for the multi_classification test.
586
     *
587
     * @return array
588
     */
1441 ariadna 589
    public static function provider_test_multi_classifier(): array {
1 efrain 590
        $cases = array(
591
            'notimesplitting' => array('\core\analytics\time_splitting\no_splitting'),
592
        );
593
 
594
        // Add all system prediction processors.
1441 ariadna 595
        return static::add_prediction_processors($cases);
1 efrain 596
    }
597
 
598
    /**
599
     * Basic test to check that prediction processors work as expected.
600
     *
601
     * @coversNothing
602
     * @dataProvider provider_ml_test_evaluation_configuration
603
     * @param string $modelquality
604
     * @param int $ncourses
605
     * @param array $expected
606
     * @param string $predictionsprocessorclass
607
     * @param array $forcedconfig
608
     * @return void
609
     */
610
    public function test_ml_evaluation_configuration($modelquality, $ncourses, $expected, $predictionsprocessorclass,
11 efrain 611
            $forcedconfig): void {
1 efrain 612
        $this->resetAfterTest(true);
613
 
614
        $this->set_forced_config($forcedconfig);
615
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
616
 
617
        $this->setAdminuser();
618
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
619
 
620
        $sometimesplittings = '\core\analytics\time_splitting\single_range,' .
621
            '\core\analytics\time_splitting\quarters';
622
        set_config('defaulttimesplittingsevaluation', $sometimesplittings, 'analytics');
623
 
624
        if ($modelquality === 'perfect') {
625
            $model = $this->add_perfect_model();
626
        } else if ($modelquality === 'random') {
627
            $model = $this->add_random_model();
628
        } else {
629
            throw new \coding_exception('Only perfect and random accepted as $modelquality values');
630
        }
631
 
632
        // Generate training data.
633
        $this->generate_courses($ncourses);
634
 
635
        $model->update(false, false, false, get_class($predictionsprocessor));
636
        $results = $model->evaluate();
637
 
638
        // We check that the returned status includes at least $expectedcode code.
639
        foreach ($results as $timesplitting => $result) {
640
            $message = 'The returned status code ' . $result->status . ' should include ' . $expected[$timesplitting];
641
            $filtered = $result->status & $expected[$timesplitting];
642
            $this->assertEquals($expected[$timesplitting], $filtered, $message);
643
 
644
            $options = ['evaluation' => true, 'reuseprevanalysed' => true];
645
            $result = new \core_analytics\local\analysis\result_file($model->get_id(), true, $options);
646
            $timesplittingobj = \core_analytics\manager::get_time_splitting($timesplitting);
647
            $analysable = new \core_analytics\site();
648
            $cachedanalysis = $result->retrieve_cached_result($timesplittingobj, $analysable);
649
            $this->assertInstanceOf(\stored_file::class, $cachedanalysis);
650
        }
651
 
652
        set_config('enabled_stores', '', 'tool_log');
653
        get_log_manager(true);
654
    }
655
 
656
    /**
657
     * Tests the evaluation of already trained models.
658
     *
659
     * @coversNothing
660
     * @dataProvider provider_ml_processors
661
     * @param  string $predictionsprocessorclass
662
     * @param array $forcedconfig
663
     * @return null
664
     */
11 efrain 665
    public function test_ml_evaluation_trained_model($predictionsprocessorclass, $forcedconfig): void {
1 efrain 666
        $this->resetAfterTest(true);
667
 
668
        $this->set_forced_config($forcedconfig);
669
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
670
 
671
        $this->setAdminuser();
672
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
673
 
674
        $model = $this->add_perfect_model();
675
 
676
        // Generate training data.
677
        $this->generate_courses(50);
678
 
679
        $model->update(true, false, '\\core\\analytics\\time_splitting\\quarters', get_class($predictionsprocessor));
680
        $model->train();
681
 
682
        $zipfilename = 'model-zip-' . microtime() . '.zip';
683
        $zipfilepath = $model->export_model($zipfilename);
684
        $importmodel = \core_analytics\model::import_model($zipfilepath);
685
 
686
        $results = $importmodel->evaluate(['mode' => 'trainedmodel']);
687
        $this->assertEquals(0, $results['\\core\\analytics\\time_splitting\\quarters']->status);
688
        $this->assertEquals(1, $results['\\core\\analytics\\time_splitting\\quarters']->score);
689
 
690
        set_config('enabled_stores', '', 'tool_log');
691
        get_log_manager(true);
692
    }
693
 
694
    /**
695
     * test_read_indicator_calculations
696
     *
697
     * @return void
698
     */
11 efrain 699
    public function test_read_indicator_calculations(): void {
1 efrain 700
        global $DB;
701
 
702
        $this->resetAfterTest(true);
703
 
704
        $starttime = 123;
705
        $endtime = 321;
706
        $sampleorigin = 'whatever';
707
 
708
        $indicator = $this->getMockBuilder('test_indicator_max')->onlyMethods(['calculate_sample'])->getMock();
709
        $indicator->expects($this->never())->method('calculate_sample');
710
 
711
        $existingcalcs = array(111 => 1, 222 => -1);
712
        $sampleids = array(111 => 111, 222 => 222);
713
        list($values, $unused) = $indicator->calculate($sampleids, $sampleorigin, $starttime, $endtime, $existingcalcs);
714
    }
715
 
716
    /**
717
     * test_not_null_samples
718
     */
11 efrain 719
    public function test_not_null_samples(): void {
1 efrain 720
        $this->resetAfterTest(true);
721
 
722
        $timesplitting = \core_analytics\manager::get_time_splitting('\core\analytics\time_splitting\quarters');
723
        $timesplitting->set_analysable(new \core_analytics\site());
724
 
725
        $ranges = array(
726
            array('start' => 111, 'end' => 222, 'time' => 222),
727
            array('start' => 222, 'end' => 333, 'time' => 333)
728
        );
729
        $samples = array(123 => 123, 321 => 321);
730
 
731
        $target = \core_analytics\manager::get_target('test_target_shortname');
732
        $indicators = array('test_indicator_null', 'test_indicator_min');
733
        foreach ($indicators as $key => $indicator) {
734
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
735
        }
736
        $model = \core_analytics\model::create($target, $indicators, '\core\analytics\time_splitting\no_splitting');
737
 
738
        $analyser = $model->get_analyser();
739
        $result = new \core_analytics\local\analysis\result_array($model->get_id(), false, $analyser->get_options());
740
        $analysis = new \core_analytics\analysis($analyser, false, $result);
741
 
742
        // Samples with at least 1 not null value are returned.
743
        $params = array(
744
            $timesplitting,
745
            $samples,
746
            $ranges
747
        );
748
        $dataset = \phpunit_util::call_internal_method($analysis, 'calculate_indicators', $params,
749
            '\core_analytics\analysis');
750
        $this->assertArrayHasKey('123-0', $dataset);
751
        $this->assertArrayHasKey('123-1', $dataset);
752
        $this->assertArrayHasKey('321-0', $dataset);
753
        $this->assertArrayHasKey('321-1', $dataset);
754
 
755
 
756
        $indicators = array('test_indicator_null');
757
        foreach ($indicators as $key => $indicator) {
758
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
759
        }
760
        $model = \core_analytics\model::create($target, $indicators, '\core\analytics\time_splitting\no_splitting');
761
 
762
        $analyser = $model->get_analyser();
763
        $result = new \core_analytics\local\analysis\result_array($model->get_id(), false, $analyser->get_options());
764
        $analysis = new \core_analytics\analysis($analyser, false, $result);
765
 
766
        // Samples with only null values are not returned.
767
        $params = array(
768
            $timesplitting,
769
            $samples,
770
            $ranges
771
        );
772
        $dataset = \phpunit_util::call_internal_method($analysis, 'calculate_indicators', $params,
773
            '\core_analytics\analysis');
774
        $this->assertArrayNotHasKey('123-0', $dataset);
775
        $this->assertArrayNotHasKey('123-1', $dataset);
776
        $this->assertArrayNotHasKey('321-0', $dataset);
777
        $this->assertArrayNotHasKey('321-1', $dataset);
778
    }
779
 
780
    /**
781
     * provider_ml_test_evaluation_configuration
782
     *
783
     * @return array
784
     */
1441 ariadna 785
    public static function provider_ml_test_evaluation_configuration(): array {
1 efrain 786
        $cases = array(
787
            'bad' => array(
788
                'modelquality' => 'random',
789
                'ncourses' => 50,
1441 ariadna 790
                'expected' => array(
1 efrain 791
                    '\core\analytics\time_splitting\single_range' => \core_analytics\model::LOW_SCORE,
792
                    '\core\analytics\time_splitting\quarters' => \core_analytics\model::LOW_SCORE,
793
                )
794
            ),
795
            'good' => array(
796
                'modelquality' => 'perfect',
797
                'ncourses' => 50,
1441 ariadna 798
                'expected' => array(
1 efrain 799
                    '\core\analytics\time_splitting\single_range' => \core_analytics\model::OK,
800
                    '\core\analytics\time_splitting\quarters' => \core_analytics\model::OK,
801
                )
802
            )
803
        );
1441 ariadna 804
        return static::add_prediction_processors($cases);
1 efrain 805
    }
806
 
807
    /**
808
     * add_random_model
809
     *
810
     * @return \core_analytics\model
811
     */
812
    protected function add_random_model() {
813
 
814
        $target = \core_analytics\manager::get_target('test_target_shortname');
815
        $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_random');
816
        foreach ($indicators as $key => $indicator) {
817
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
818
        }
819
 
820
        $model = \core_analytics\model::create($target, $indicators);
821
 
822
        // To load db defaults as well.
823
        return new \core_analytics\model($model->get_id());
824
    }
825
 
826
    /**
827
     * add_perfect_model
828
     *
829
     * @param string $targetclass
830
     * @return \core_analytics\model
831
     */
832
    protected function add_perfect_model($targetclass = 'test_target_shortname') {
833
        $target = \core_analytics\manager::get_target($targetclass);
834
        $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
835
        foreach ($indicators as $key => $indicator) {
836
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
837
        }
838
 
839
        $model = \core_analytics\model::create($target, $indicators);
840
 
841
        // To load db defaults as well.
842
        return new \core_analytics\model($model->get_id());
843
    }
844
 
845
    /**
846
     * Generates model for multi-classification
847
     *
848
     * @param string $targetclass
849
     * @return \core_analytics\model
850
     * @throws coding_exception
851
     * @throws moodle_exception
852
     */
853
    public function add_multiclass_model($targetclass = 'test_target_shortname_multiclass') {
854
        $target = \core_analytics\manager::get_target($targetclass);
855
        $indicators = array('test_indicator_fullname', 'test_indicator_multiclass');
856
        foreach ($indicators as $key => $indicator) {
857
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
858
        }
859
 
860
        $model = \core_analytics\model::create($target, $indicators);
861
        return new \core_analytics\model($model->get_id());
862
    }
863
 
864
    /**
865
     * Forces some configuration values.
866
     *
867
     * @param array $forcedconfig
868
     */
869
    protected function set_forced_config($forcedconfig) {
870
        \core_analytics\manager::reset_prediction_processors();
871
 
872
        if (empty($forcedconfig)) {
873
            return;
874
        }
875
        foreach ($forcedconfig as $pluginname => $pluginconfig) {
876
            foreach ($pluginconfig as $name => $value) {
877
                set_config($name, $value, $pluginname);
878
            }
879
        }
880
    }
881
 
882
    /**
883
     * Is the provided processor ready using the current configuration in the site?
884
     *
885
     * @param  string  $predictionsprocessorclass
886
     * @return \core_analytics\predictor
887
     */
888
    protected function is_predictions_processor_ready(string $predictionsprocessorclass) {
889
        // We repeat the test for all prediction processors.
890
        $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
891
        $ready = $predictionsprocessor->is_ready();
892
        if ($ready !== true) {
893
            $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready: ' . $ready);
894
        }
895
 
896
        return $predictionsprocessor;
897
    }
898
 
899
    /**
900
     * add_prediction_processors
901
     *
902
     * @param array $cases
903
     * @return array
904
     */
1441 ariadna 905
    protected static function add_prediction_processors($cases): array {
1 efrain 906
        $return = array();
907
 
908
        if (defined('TEST_MLBACKEND_PYTHON_HOST') && defined('TEST_MLBACKEND_PYTHON_PORT')
909
                && defined('TEST_MLBACKEND_PYTHON_USERNAME') && defined('TEST_MLBACKEND_PYTHON_USERNAME')) {
910
            $testpythonserver = true;
911
        }
912
 
913
        // We need to test all prediction processors in the system.
914
        $predictionprocessors = \core_analytics\manager::get_all_prediction_processors();
915
        foreach ($predictionprocessors as $classfullname => $predictionsprocessor) {
916
            foreach ($cases as $key => $case) {
917
 
918
                if (!$predictionsprocessor instanceof \mlbackend_python\processor || empty($testpythonserver)) {
1441 ariadna 919
                    $extraparams = ['predictionsprocessorclass' => $classfullname, 'forcedconfig' => null];
1 efrain 920
                    $return[$key . '-' . $classfullname] = $case + $extraparams;
921
                } else {
922
 
923
                    // We want the configuration to be forced during the test as things like importing models create new
924
                    // instances of ML backend processors during the process.
925
                    $forcedconfig = ['mlbackend_python' => ['useserver' => true, 'host' => TEST_MLBACKEND_PYTHON_HOST,
926
                        'port' => TEST_MLBACKEND_PYTHON_PORT, 'secure' => false, 'username' => TEST_MLBACKEND_PYTHON_USERNAME,
927
                        'password' => TEST_MLBACKEND_PYTHON_PASSWORD]];
928
                    $casekey = $key . '-' . $classfullname . '-server';
1441 ariadna 929
                    $return[$casekey] = $case + ['predictionsprocessorclass' => $classfullname, 'forcedconfig' => $forcedconfig];
1 efrain 930
                }
931
            }
932
        }
933
 
934
        return $return;
935
    }
936
}