Proyectos de Subversion Moodle

Rev

Rev 1 | | Comparar con el anterior | Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
 
17
namespace core_analytics;
18
 
19
defined('MOODLE_INTERNAL') || die();
20
 
21
global $CFG;
22
require_once(__DIR__ . '/fixtures/test_indicator_max.php');
23
require_once(__DIR__ . '/fixtures/test_indicator_min.php');
24
require_once(__DIR__ . '/fixtures/test_indicator_null.php');
25
require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
26
require_once(__DIR__ . '/fixtures/test_indicator_random.php');
27
require_once(__DIR__ . '/fixtures/test_indicator_multiclass.php');
28
require_once(__DIR__ . '/fixtures/test_target_shortname.php');
29
require_once(__DIR__ . '/fixtures/test_target_shortname_multiclass.php');
30
require_once(__DIR__ . '/fixtures/test_static_target_shortname.php');
31
 
32
require_once(__DIR__ . '/../../course/lib.php');
33
 
34
/**
35
 * Unit tests for evaluation, training and prediction.
36
 *
37
 * NOTE: in order to execute this test using a separate server for the
38
 *       python ML backend you need to define these variables in your config.php file:
39
 *
40
 * define('TEST_MLBACKEND_PYTHON_HOST', '127.0.0.1');
41
 * define('TEST_MLBACKEND_PYTHON_PORT', 5000);
42
 * define('TEST_MLBACKEND_PYTHON_USERNAME', 'default');
43
 * define('TEST_MLBACKEND_PYTHON_PASSWORD', 'sshhhh');
44
 *
45
 * @package   core_analytics
46
 * @copyright 2017 David Monllaó {@link http://www.davidmonllao.com}
47
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
48
 */
49
class prediction_test extends \advanced_testcase {
50
 
51
    /**
52
     * Purge all the mlbackend outputs.
53
     *
54
     * This is done automatically for mlbackends using the web server dataroot but
55
     * other mlbackends may store files elsewhere and these files need to be removed.
56
     *
57
     * @return null
58
     */
59
    public function tearDown(): void {
60
        $this->setAdminUser();
61
 
62
        $models = \core_analytics\manager::get_all_models();
63
        foreach ($models as $model) {
64
            $model->delete();
65
        }
66
    }
67
 
68
    /**
69
     * test_static_prediction
70
     *
71
     * @return void
72
     */
11 efrain 73
    public function test_static_prediction(): void {
1 efrain 74
        global $DB;
75
 
76
        $this->resetAfterTest(true);
77
        $this->setAdminuser();
78
 
79
        $model = $this->add_perfect_model('test_static_target_shortname');
80
        $model->enable('\core\analytics\time_splitting\no_splitting');
81
        $this->assertEquals(1, $model->is_enabled());
82
        $this->assertEquals(1, $model->is_trained());
83
 
84
        // No training for static models.
85
        $results = $model->train();
86
        $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
87
        $this->assertEmpty($trainedsamples);
88
        $this->assertEmpty($DB->count_records('analytics_used_files',
89
            array('modelid' => $model->get_id(), 'action' => 'trained')));
90
 
91
        // Now we create 2 hidden courses (only hidden courses are getting predictions).
92
        $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
93
        $course1 = $this->getDataGenerator()->create_course($courseparams);
94
        $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
95
        $course2 = $this->getDataGenerator()->create_course($courseparams);
96
 
97
        $result = $model->predict();
98
 
99
        // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
100
        $correct = array($course1->id => 1, $course2->id => 0);
101
        foreach ($result->predictions as $uniquesampleid => $predictiondata) {
102
            list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
103
 
104
            // The range index is not important here, both ranges prediction will be the same.
105
            $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
106
        }
107
 
108
        // 1 range for each analysable.
109
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
110
        $this->assertCount(2, $predictedranges);
111
        // 2 predictions for each range.
112
        $this->assertEquals(2, $DB->count_records('analytics_predictions',
113
            array('modelid' => $model->get_id())));
114
 
115
        // No new generated records as there are no new courses available.
116
        $model->predict();
117
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
118
        $this->assertCount(2, $predictedranges);
119
        $this->assertEquals(2, $DB->count_records('analytics_predictions',
120
            array('modelid' => $model->get_id())));
121
    }
122
 
123
    /**
124
     * test_model_contexts
125
     */
11 efrain 126
    public function test_model_contexts(): void {
1 efrain 127
        global $DB;
128
 
129
        $this->resetAfterTest(true);
130
        $this->setAdminuser();
131
 
132
        $misc = $DB->get_record('course_categories', ['name' => get_string('defaultcategoryname')]);
133
        $miscctx = \context_coursecat::instance($misc->id);
134
 
135
        $category = $this->getDataGenerator()->create_category();
136
        $categoryctx = \context_coursecat::instance($category->id);
137
 
138
        // One course per category.
139
        $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0,
140
            'category' => $category->id);
141
        $course1 = $this->getDataGenerator()->create_course($courseparams);
142
        $course1ctx = \context_course::instance($course1->id);
143
        $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0,
144
            'category' => $misc->id);
145
        $course2 = $this->getDataGenerator()->create_course($courseparams);
146
 
147
        $model = $this->add_perfect_model('test_static_target_shortname');
148
 
149
        // Just 1 category.
150
        $model->update(true, false, '\core\analytics\time_splitting\no_splitting', false, [$categoryctx->id]);
151
        $this->assertCount(1, $model->predict()->predictions);
152
 
153
        // Now with 2 categories.
154
        $model->update(true, false, false, false, [$categoryctx->id, $miscctx->id]);
155
 
156
        // The courses in the new category are processed.
157
        $this->assertCount(1, $model->predict()->predictions);
158
 
159
        // Clear the predictions generated by the model and predict() again.
160
        $model->clear();
161
        $this->assertCount(2, $model->predict()->predictions);
162
 
163
        // Course context restriction.
164
        $model->update(true, false, '\core\analytics\time_splitting\no_splitting', false, [$course1ctx->id]);
165
 
166
        // Nothing new as the course was already analysed.
167
        $result = $model->predict();
168
        $this->assertTrue(empty($result->predictions));
169
 
170
        $model->clear();
171
        $this->assertCount(1, $model->predict()->predictions);
172
    }
173
 
174
    /**
175
     * test_ml_training_and_prediction
176
     *
177
     * @dataProvider provider_ml_training_and_prediction
178
     * @param string $timesplittingid
179
     * @param int $predictedrangeindex
180
     * @param int $nranges
181
     * @param string $predictionsprocessorclass
182
     * @param array $forcedconfig
183
     * @return void
184
     */
185
    public function test_ml_training_and_prediction($timesplittingid, $predictedrangeindex, $nranges, $predictionsprocessorclass,
11 efrain 186
            $forcedconfig): void {
1 efrain 187
        global $DB;
188
 
189
        $this->resetAfterTest(true);
190
 
191
        $this->set_forced_config($forcedconfig);
192
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
193
 
194
        $this->setAdminuser();
195
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
196
 
197
        // Generate training data.
198
        $ncourses = 10;
199
        $this->generate_courses($ncourses);
200
 
201
        $model = $this->add_perfect_model();
202
 
203
        $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
204
 
205
        // No samples trained yet.
206
        $this->assertEquals(0, $DB->count_records('analytics_train_samples', array('modelid' => $model->get_id())));
207
 
208
        $results = $model->train();
209
        $this->assertEquals(1, $model->is_enabled());
210
        $this->assertEquals(1, $model->is_trained());
211
 
212
        // 20 courses * the 3 model indicators * the number of time ranges of this time splitting method.
213
        $indicatorcalc = 20 * 3 * $nranges;
214
        $this->assertEquals($indicatorcalc, $DB->count_records('analytics_indicator_calc'));
215
 
216
        // 1 training file was created.
217
        $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
218
        $this->assertCount(1, $trainedsamples);
219
        $samples = json_decode(reset($trainedsamples)->sampleids, true);
220
        $this->assertCount($ncourses * 2, $samples);
221
        $this->assertEquals(1, $DB->count_records('analytics_used_files',
222
            array('modelid' => $model->get_id(), 'action' => 'trained')));
223
        // Check that analysable files for training are stored under labelled filearea.
224
        $fs = get_file_storage();
225
        $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
226
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
227
        $this->assertEmpty($fs->get_directory_files(\context_system::instance()->id, 'analytics',
228
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
229
 
230
        $params = [
231
            'startdate' => mktime(0, 0, 0, 10, 24, 2015),
232
            'enddate' => mktime(0, 0, 0, 2, 24, 2016),
233
        ];
234
        $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
235
        $course1 = $this->getDataGenerator()->create_course($courseparams);
236
        $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
237
        $course2 = $this->getDataGenerator()->create_course($courseparams);
238
 
239
        // They will not be skipped for prediction though.
240
        $result = $model->predict();
241
 
242
        // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
243
        $correct = array($course1->id => 1, $course2->id => 0);
244
        foreach ($result->predictions as $uniquesampleid => $predictiondata) {
245
            list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
246
 
247
            // The range index is not important here, both ranges prediction will be the same.
248
            $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
249
        }
250
 
251
        // 1 range will be predicted.
252
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
253
        $this->assertCount(1, $predictedranges);
254
        foreach ($predictedranges as $predictedrange) {
255
            $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
256
            $sampleids = json_decode($predictedrange->sampleids, true);
257
            $this->assertCount(2, $sampleids);
258
            $this->assertContainsEquals($course1->id, $sampleids);
259
            $this->assertContainsEquals($course2->id, $sampleids);
260
        }
261
        $this->assertEquals(1, $DB->count_records('analytics_used_files',
262
            array('modelid' => $model->get_id(), 'action' => 'predicted')));
263
        // 2 predictions.
264
        $this->assertEquals(2, $DB->count_records('analytics_predictions',
265
            array('modelid' => $model->get_id())));
266
 
267
        // Check that analysable files to get predictions are stored under unlabelled filearea.
268
        $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
269
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
270
        $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
271
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
272
 
273
        // No new generated files nor records as there are no new courses available.
274
        $model->predict();
275
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
276
        $this->assertCount(1, $predictedranges);
277
        foreach ($predictedranges as $predictedrange) {
278
            $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
279
        }
280
        $this->assertEquals(1, $DB->count_records('analytics_used_files',
281
            array('modelid' => $model->get_id(), 'action' => 'predicted')));
282
        $this->assertEquals(2, $DB->count_records('analytics_predictions',
283
            array('modelid' => $model->get_id())));
284
 
285
        // New samples that can be used for prediction.
286
        $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
287
        $course3 = $this->getDataGenerator()->create_course($courseparams);
288
        $courseparams = $params + array('shortname' => 'dddddd', 'fullname' => 'dddddd', 'visible' => 0);
289
        $course4 = $this->getDataGenerator()->create_course($courseparams);
290
 
291
        $result = $model->predict();
292
 
293
        $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
294
        $this->assertCount(1, $predictedranges);
295
        foreach ($predictedranges as $predictedrange) {
296
            $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
297
            $sampleids = json_decode($predictedrange->sampleids, true);
298
            $this->assertCount(4, $sampleids);
299
            $this->assertContainsEquals($course1->id, $sampleids);
300
            $this->assertContainsEquals($course2->id, $sampleids);
301
            $this->assertContainsEquals($course3->id, $sampleids);
302
            $this->assertContainsEquals($course4->id, $sampleids);
303
        }
304
        $this->assertEquals(2, $DB->count_records('analytics_used_files',
305
            array('modelid' => $model->get_id(), 'action' => 'predicted')));
306
        $this->assertEquals(4, $DB->count_records('analytics_predictions',
307
            array('modelid' => $model->get_id())));
308
        $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
309
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
310
        $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
311
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
312
 
313
        // New visible course (for training).
314
        $course5 = $this->getDataGenerator()->create_course(array('shortname' => 'aaa', 'fullname' => 'aa'));
315
        $course6 = $this->getDataGenerator()->create_course();
316
        $result = $model->train();
317
        $this->assertEquals(2, $DB->count_records('analytics_used_files',
318
            array('modelid' => $model->get_id(), 'action' => 'trained')));
319
        $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
320
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
321
        $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
322
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
323
 
324
        // Confirm that the files associated to the model are deleted on clear and on delete. The ML backend deletion
325
        // processes will be triggered by these actions and any exception there would result in a failed test.
326
        $model->clear();
327
        $this->assertEquals(0, $DB->count_records('analytics_used_files',
328
            array('modelid' => $model->get_id(), 'action' => 'trained')));
329
        $this->assertCount(0, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
330
            \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
331
        $this->assertCount(0, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
332
            \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
333
        $model->delete();
334
 
335
        set_config('enabled_stores', '', 'tool_log');
336
        get_log_manager(true);
337
    }
338
 
339
    /**
340
     * provider_ml_training_and_prediction
341
     *
342
     * @return array
343
     */
344
    public function provider_ml_training_and_prediction() {
345
        $cases = array(
346
            'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 0, 1),
347
            'quarters' => array('\core\analytics\time_splitting\quarters', 3, 4)
348
        );
349
 
350
        // We need to test all system prediction processors.
351
        return $this->add_prediction_processors($cases);
352
    }
353
 
354
    /**
355
     * test_ml_export_import
356
     *
357
     * @param string $predictionsprocessorclass The class name
358
     * @param array $forcedconfig
359
     * @dataProvider provider_ml_processors
360
     */
11 efrain 361
    public function test_ml_export_import($predictionsprocessorclass, $forcedconfig): void {
1 efrain 362
        $this->resetAfterTest(true);
363
 
364
        $this->set_forced_config($forcedconfig);
365
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
366
 
367
        $this->setAdminuser();
368
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
369
 
370
        // Generate training data.
371
        $ncourses = 10;
372
        $this->generate_courses($ncourses);
373
 
374
        $model = $this->add_perfect_model();
375
 
376
        $model->update(true, false, '\core\analytics\time_splitting\quarters', get_class($predictionsprocessor));
377
 
378
        $model->train();
379
        $this->assertTrue($model->trained_locally());
380
 
381
        $this->generate_courses(10, ['visible' => 0]);
382
 
383
        $originalresults = $model->predict();
384
 
385
        $zipfilename = 'model-zip-' . microtime() . '.zip';
386
        $zipfilepath = $model->export_model($zipfilename);
387
 
388
        $modelconfig = new \core_analytics\model_config();
389
        list($modelconfig, $mlbackend) = $modelconfig->extract_import_contents($zipfilepath);
390
        $this->assertNotFalse($mlbackend);
391
 
392
        $importmodel = \core_analytics\model::import_model($zipfilepath);
393
        $importmodel->enable();
394
 
395
        // Now predict using the imported model without prior training.
396
        $importedmodelresults = $importmodel->predict();
397
 
398
        foreach ($originalresults->predictions as $sampleid => $prediction) {
399
            $this->assertEquals($importedmodelresults->predictions[$sampleid]->prediction, $prediction->prediction);
400
        }
401
 
402
        $this->assertFalse($importmodel->trained_locally());
403
 
404
        $zipfilename = 'model-zip-' . microtime() . '.zip';
405
        $zipfilepath = $model->export_model($zipfilename, false);
406
 
407
        $modelconfig = new \core_analytics\model_config();
408
        list($modelconfig, $mlbackend) = $modelconfig->extract_import_contents($zipfilepath);
409
        $this->assertFalse($mlbackend);
410
 
411
        set_config('enabled_stores', '', 'tool_log');
412
        get_log_manager(true);
413
    }
414
 
415
    /**
416
     * provider_ml_processors
417
     *
418
     * @return array
419
     */
420
    public function provider_ml_processors() {
421
        $cases = [
422
            'case' => [],
423
        ];
424
 
425
        // We need to test all system prediction processors.
426
        return $this->add_prediction_processors($cases);
427
    }
428
    /**
429
     * Test the system classifiers returns.
430
     *
431
     * This test checks that all mlbackend plugins in the system are able to return proper status codes
432
     * even under weird situations.
433
     *
434
     * @dataProvider provider_ml_classifiers_return
435
     * @param int $success
436
     * @param int $nsamples
437
     * @param int $classes
438
     * @param string $predictionsprocessorclass
439
     * @param array $forcedconfig
440
     * @return void
441
     */
11 efrain 442
    public function test_ml_classifiers_return($success, $nsamples, $classes, $predictionsprocessorclass, $forcedconfig): void {
1 efrain 443
        $this->resetAfterTest();
444
 
445
        $this->set_forced_config($forcedconfig);
446
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
447
 
448
        if ($nsamples % count($classes) != 0) {
449
            throw new \coding_exception('The number of samples should be divisible by the number of classes');
450
        }
451
        $samplesperclass = $nsamples / count($classes);
452
 
453
        // Metadata (we pass 2 classes even if $classes only provides 1 class samples as we want to test
454
        // what the backend does in this case.
455
        $dataset = "nfeatures,targetclasses,targettype" . PHP_EOL;
456
        $dataset .= "3,\"[0,1]\",\"discrete\"" . PHP_EOL;
457
 
458
        // Headers.
459
        $dataset .= "feature1,feature2,feature3,target" . PHP_EOL;
460
        foreach ($classes as $class) {
461
            for ($i = 0; $i < $samplesperclass; $i++) {
462
                $dataset .= "1,0,1,$class" . PHP_EOL;
463
            }
464
        }
465
 
466
        $trainingfile = array(
467
            'contextid' => \context_system::instance()->id,
468
            'component' => 'analytics',
469
            'filearea' => 'labelled',
470
            'itemid' => 123,
471
            'filepath' => '/',
472
            'filename' => 'whocares.csv'
473
        );
474
        $fs = get_file_storage();
475
        $dataset = $fs->create_file_from_string($trainingfile, $dataset);
476
 
477
        // Training should work correctly if at least 1 sample of each class is included.
478
        $dir = make_request_directory();
479
        $modeluniqueid = 'whatever' . microtime();
480
        $result = $predictionsprocessor->train_classification($modeluniqueid, $dataset, $dir);
481
 
482
        switch ($success) {
483
            case 'yes':
484
                $this->assertEquals(\core_analytics\model::OK, $result->status);
485
                break;
486
            case 'no':
487
                $this->assertNotEquals(\core_analytics\model::OK, $result->status);
488
                break;
489
            case 'maybe':
490
            default:
491
                // We just check that an object is returned so we don't have an empty check,
492
                // what we really want to check is that an exception was not thrown.
493
                $this->assertInstanceOf(\stdClass::class, $result);
494
        }
495
 
496
        // Purge the directory used in this test (useful in case the mlbackend is storing files
497
        // somewhere out of the default moodledata/models dir.
498
        $predictionsprocessor->delete_output_dir($dir, $modeluniqueid);
499
    }
500
 
501
    /**
502
     * test_ml_classifiers_return provider
503
     *
504
     * We can not be very specific here as test_ml_classifiers_return only checks that
505
     * mlbackend plugins behave and expected and control properly backend errors even
506
     * under weird situations.
507
     *
508
     * @return array
509
     */
510
    public function provider_ml_classifiers_return() {
511
        // Using verbose options as the first argument for readability.
512
        $cases = array(
513
            '1-samples' => array('maybe', 1, [0]),
514
            '2-samples-same-class' => array('maybe', 2, [0]),
515
            '2-samples-different-classes' => array('yes', 2, [0, 1]),
516
            '4-samples-different-classes' => array('yes', 4, [0, 1])
517
        );
518
 
519
        // We need to test all system prediction processors.
520
        return $this->add_prediction_processors($cases);
521
    }
522
 
523
    /**
524
     * Tests correct multi-classification.
525
     *
526
     * @dataProvider provider_test_multi_classifier
527
     * @param string $timesplittingid
528
     * @param string $predictionsprocessorclass
529
     * @param array|null $forcedconfig
530
     * @throws coding_exception
531
     * @throws moodle_exception
532
     */
11 efrain 533
    public function test_ml_multi_classifier($timesplittingid, $predictionsprocessorclass, $forcedconfig): void {
1 efrain 534
        global $DB;
535
 
536
        $this->resetAfterTest(true);
537
        $this->setAdminuser();
538
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
539
 
540
        $this->set_forced_config($forcedconfig);
541
 
542
        $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
543
        if ($predictionsprocessor->is_ready() !== true) {
544
            $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
545
        }
546
        // Generate training courses.
547
        $ncourses = 5;
548
        $this->generate_courses_multiclass($ncourses);
549
        $model = $this->add_multiclass_model();
550
        $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
551
        $results = $model->train();
552
 
553
        $params = [
554
            'startdate' => mktime(0, 0, 0, 10, 24, 2015),
555
            'enddate' => mktime(0, 0, 0, 2, 24, 2016),
556
        ];
557
        $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
558
        $course1 = $this->getDataGenerator()->create_course($courseparams);
559
        $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
560
        $course2 = $this->getDataGenerator()->create_course($courseparams);
561
        $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
562
        $course3 = $this->getDataGenerator()->create_course($courseparams);
563
 
564
        // They will not be skipped for prediction though.
565
        $result = $model->predict();
566
        // The $course1 predictions should be 0 == 'a', $course2 should be 1 == 'b' and $course3 should be 2 == 'c'.
567
        $correct = array($course1->id => 0, $course2->id => 1, $course3->id => 2);
568
        foreach ($result->predictions as $uniquesampleid => $predictiondata) {
569
            list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
570
 
571
            // The range index is not important here, both ranges prediction will be the same.
572
            $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
573
        }
574
 
575
        set_config('enabled_stores', '', 'tool_log');
576
        get_log_manager(true);
577
    }
578
 
579
    /**
580
     * Provider for the multi_classification test.
581
     *
582
     * @return array
583
     */
584
    public function provider_test_multi_classifier() {
585
        $cases = array(
586
            'notimesplitting' => array('\core\analytics\time_splitting\no_splitting'),
587
        );
588
 
589
        // Add all system prediction processors.
590
        return $this->add_prediction_processors($cases);
591
    }
592
 
593
    /**
594
     * Basic test to check that prediction processors work as expected.
595
     *
596
     * @coversNothing
597
     * @dataProvider provider_ml_test_evaluation_configuration
598
     * @param string $modelquality
599
     * @param int $ncourses
600
     * @param array $expected
601
     * @param string $predictionsprocessorclass
602
     * @param array $forcedconfig
603
     * @return void
604
     */
605
    public function test_ml_evaluation_configuration($modelquality, $ncourses, $expected, $predictionsprocessorclass,
11 efrain 606
            $forcedconfig): void {
1 efrain 607
        $this->resetAfterTest(true);
608
 
609
        $this->set_forced_config($forcedconfig);
610
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
611
 
612
        $this->setAdminuser();
613
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
614
 
615
        $sometimesplittings = '\core\analytics\time_splitting\single_range,' .
616
            '\core\analytics\time_splitting\quarters';
617
        set_config('defaulttimesplittingsevaluation', $sometimesplittings, 'analytics');
618
 
619
        if ($modelquality === 'perfect') {
620
            $model = $this->add_perfect_model();
621
        } else if ($modelquality === 'random') {
622
            $model = $this->add_random_model();
623
        } else {
624
            throw new \coding_exception('Only perfect and random accepted as $modelquality values');
625
        }
626
 
627
        // Generate training data.
628
        $this->generate_courses($ncourses);
629
 
630
        $model->update(false, false, false, get_class($predictionsprocessor));
631
        $results = $model->evaluate();
632
 
633
        // We check that the returned status includes at least $expectedcode code.
634
        foreach ($results as $timesplitting => $result) {
635
            $message = 'The returned status code ' . $result->status . ' should include ' . $expected[$timesplitting];
636
            $filtered = $result->status & $expected[$timesplitting];
637
            $this->assertEquals($expected[$timesplitting], $filtered, $message);
638
 
639
            $options = ['evaluation' => true, 'reuseprevanalysed' => true];
640
            $result = new \core_analytics\local\analysis\result_file($model->get_id(), true, $options);
641
            $timesplittingobj = \core_analytics\manager::get_time_splitting($timesplitting);
642
            $analysable = new \core_analytics\site();
643
            $cachedanalysis = $result->retrieve_cached_result($timesplittingobj, $analysable);
644
            $this->assertInstanceOf(\stored_file::class, $cachedanalysis);
645
        }
646
 
647
        set_config('enabled_stores', '', 'tool_log');
648
        get_log_manager(true);
649
    }
650
 
651
    /**
652
     * Tests the evaluation of already trained models.
653
     *
654
     * @coversNothing
655
     * @dataProvider provider_ml_processors
656
     * @param  string $predictionsprocessorclass
657
     * @param array $forcedconfig
658
     * @return null
659
     */
11 efrain 660
    public function test_ml_evaluation_trained_model($predictionsprocessorclass, $forcedconfig): void {
1 efrain 661
        $this->resetAfterTest(true);
662
 
663
        $this->set_forced_config($forcedconfig);
664
        $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
665
 
666
        $this->setAdminuser();
667
        set_config('enabled_stores', 'logstore_standard', 'tool_log');
668
 
669
        $model = $this->add_perfect_model();
670
 
671
        // Generate training data.
672
        $this->generate_courses(50);
673
 
674
        $model->update(true, false, '\\core\\analytics\\time_splitting\\quarters', get_class($predictionsprocessor));
675
        $model->train();
676
 
677
        $zipfilename = 'model-zip-' . microtime() . '.zip';
678
        $zipfilepath = $model->export_model($zipfilename);
679
        $importmodel = \core_analytics\model::import_model($zipfilepath);
680
 
681
        $results = $importmodel->evaluate(['mode' => 'trainedmodel']);
682
        $this->assertEquals(0, $results['\\core\\analytics\\time_splitting\\quarters']->status);
683
        $this->assertEquals(1, $results['\\core\\analytics\\time_splitting\\quarters']->score);
684
 
685
        set_config('enabled_stores', '', 'tool_log');
686
        get_log_manager(true);
687
    }
688
 
689
    /**
690
     * test_read_indicator_calculations
691
     *
692
     * @return void
693
     */
11 efrain 694
    public function test_read_indicator_calculations(): void {
1 efrain 695
        global $DB;
696
 
697
        $this->resetAfterTest(true);
698
 
699
        $starttime = 123;
700
        $endtime = 321;
701
        $sampleorigin = 'whatever';
702
 
703
        $indicator = $this->getMockBuilder('test_indicator_max')->onlyMethods(['calculate_sample'])->getMock();
704
        $indicator->expects($this->never())->method('calculate_sample');
705
 
706
        $existingcalcs = array(111 => 1, 222 => -1);
707
        $sampleids = array(111 => 111, 222 => 222);
708
        list($values, $unused) = $indicator->calculate($sampleids, $sampleorigin, $starttime, $endtime, $existingcalcs);
709
    }
710
 
711
    /**
712
     * test_not_null_samples
713
     */
11 efrain 714
    public function test_not_null_samples(): void {
1 efrain 715
        $this->resetAfterTest(true);
716
 
717
        $timesplitting = \core_analytics\manager::get_time_splitting('\core\analytics\time_splitting\quarters');
718
        $timesplitting->set_analysable(new \core_analytics\site());
719
 
720
        $ranges = array(
721
            array('start' => 111, 'end' => 222, 'time' => 222),
722
            array('start' => 222, 'end' => 333, 'time' => 333)
723
        );
724
        $samples = array(123 => 123, 321 => 321);
725
 
726
        $target = \core_analytics\manager::get_target('test_target_shortname');
727
        $indicators = array('test_indicator_null', 'test_indicator_min');
728
        foreach ($indicators as $key => $indicator) {
729
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
730
        }
731
        $model = \core_analytics\model::create($target, $indicators, '\core\analytics\time_splitting\no_splitting');
732
 
733
        $analyser = $model->get_analyser();
734
        $result = new \core_analytics\local\analysis\result_array($model->get_id(), false, $analyser->get_options());
735
        $analysis = new \core_analytics\analysis($analyser, false, $result);
736
 
737
        // Samples with at least 1 not null value are returned.
738
        $params = array(
739
            $timesplitting,
740
            $samples,
741
            $ranges
742
        );
743
        $dataset = \phpunit_util::call_internal_method($analysis, 'calculate_indicators', $params,
744
            '\core_analytics\analysis');
745
        $this->assertArrayHasKey('123-0', $dataset);
746
        $this->assertArrayHasKey('123-1', $dataset);
747
        $this->assertArrayHasKey('321-0', $dataset);
748
        $this->assertArrayHasKey('321-1', $dataset);
749
 
750
 
751
        $indicators = array('test_indicator_null');
752
        foreach ($indicators as $key => $indicator) {
753
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
754
        }
755
        $model = \core_analytics\model::create($target, $indicators, '\core\analytics\time_splitting\no_splitting');
756
 
757
        $analyser = $model->get_analyser();
758
        $result = new \core_analytics\local\analysis\result_array($model->get_id(), false, $analyser->get_options());
759
        $analysis = new \core_analytics\analysis($analyser, false, $result);
760
 
761
        // Samples with only null values are not returned.
762
        $params = array(
763
            $timesplitting,
764
            $samples,
765
            $ranges
766
        );
767
        $dataset = \phpunit_util::call_internal_method($analysis, 'calculate_indicators', $params,
768
            '\core_analytics\analysis');
769
        $this->assertArrayNotHasKey('123-0', $dataset);
770
        $this->assertArrayNotHasKey('123-1', $dataset);
771
        $this->assertArrayNotHasKey('321-0', $dataset);
772
        $this->assertArrayNotHasKey('321-1', $dataset);
773
    }
774
 
775
    /**
776
     * provider_ml_test_evaluation_configuration
777
     *
778
     * @return array
779
     */
780
    public function provider_ml_test_evaluation_configuration() {
781
 
782
        $cases = array(
783
            'bad' => array(
784
                'modelquality' => 'random',
785
                'ncourses' => 50,
786
                'expectedresults' => array(
787
                    '\core\analytics\time_splitting\single_range' => \core_analytics\model::LOW_SCORE,
788
                    '\core\analytics\time_splitting\quarters' => \core_analytics\model::LOW_SCORE,
789
                )
790
            ),
791
            'good' => array(
792
                'modelquality' => 'perfect',
793
                'ncourses' => 50,
794
                'expectedresults' => array(
795
                    '\core\analytics\time_splitting\single_range' => \core_analytics\model::OK,
796
                    '\core\analytics\time_splitting\quarters' => \core_analytics\model::OK,
797
                )
798
            )
799
        );
800
        return $this->add_prediction_processors($cases);
801
    }
802
 
803
    /**
804
     * add_random_model
805
     *
806
     * @return \core_analytics\model
807
     */
808
    protected function add_random_model() {
809
 
810
        $target = \core_analytics\manager::get_target('test_target_shortname');
811
        $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_random');
812
        foreach ($indicators as $key => $indicator) {
813
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
814
        }
815
 
816
        $model = \core_analytics\model::create($target, $indicators);
817
 
818
        // To load db defaults as well.
819
        return new \core_analytics\model($model->get_id());
820
    }
821
 
822
    /**
823
     * add_perfect_model
824
     *
825
     * @param string $targetclass
826
     * @return \core_analytics\model
827
     */
828
    protected function add_perfect_model($targetclass = 'test_target_shortname') {
829
        $target = \core_analytics\manager::get_target($targetclass);
830
        $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
831
        foreach ($indicators as $key => $indicator) {
832
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
833
        }
834
 
835
        $model = \core_analytics\model::create($target, $indicators);
836
 
837
        // To load db defaults as well.
838
        return new \core_analytics\model($model->get_id());
839
    }
840
 
841
    /**
842
     * Generates model for multi-classification
843
     *
844
     * @param string $targetclass
845
     * @return \core_analytics\model
846
     * @throws coding_exception
847
     * @throws moodle_exception
848
     */
849
    public function add_multiclass_model($targetclass = 'test_target_shortname_multiclass') {
850
        $target = \core_analytics\manager::get_target($targetclass);
851
        $indicators = array('test_indicator_fullname', 'test_indicator_multiclass');
852
        foreach ($indicators as $key => $indicator) {
853
            $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
854
        }
855
 
856
        $model = \core_analytics\model::create($target, $indicators);
857
        return new \core_analytics\model($model->get_id());
858
    }
859
 
860
    /**
861
     * Generates $ncourses courses
862
     *
863
     * @param  int $ncourses The number of courses to be generated.
864
     * @param  array $params Course params
865
     * @return null
866
     */
867
    protected function generate_courses($ncourses, array $params = []) {
868
 
869
        $params = $params + [
870
            'startdate' => mktime(0, 0, 0, 10, 24, 2015),
871
            'enddate' => mktime(0, 0, 0, 2, 24, 2016),
872
        ];
873
 
874
        for ($i = 0; $i < $ncourses; $i++) {
875
            $name = 'a' . random_string(10);
876
            $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
877
            $this->getDataGenerator()->create_course($courseparams);
878
        }
879
        for ($i = 0; $i < $ncourses; $i++) {
880
            $name = 'b' . random_string(10);
881
            $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
882
            $this->getDataGenerator()->create_course($courseparams);
883
        }
884
    }
885
 
886
    /**
887
     * Generates ncourses for multi-classification
888
     *
889
     * @param int $ncourses The number of courses to be generated.
890
     * @param array $params Course params
891
     * @return null
892
     */
893
    protected function generate_courses_multiclass($ncourses, array $params = []) {
894
 
895
        $params = $params + [
896
                'startdate' => mktime(0, 0, 0, 10, 24, 2015),
897
                'enddate' => mktime(0, 0, 0, 2, 24, 2016),
898
            ];
899
 
900
        for ($i = 0; $i < $ncourses; $i++) {
901
            $name = 'a' . random_string(10);
902
            $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
903
            $this->getDataGenerator()->create_course($courseparams);
904
        }
905
        for ($i = 0; $i < $ncourses; $i++) {
906
            $name = 'b' . random_string(10);
907
            $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
908
            $this->getDataGenerator()->create_course($courseparams);
909
        }
910
        for ($i = 0; $i < $ncourses; $i++) {
911
            $name = 'c' . random_string(10);
912
            $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
913
            $this->getDataGenerator()->create_course($courseparams);
914
        }
915
    }
916
 
917
    /**
918
     * Forces some configuration values.
919
     *
920
     * @param array $forcedconfig
921
     */
922
    protected function set_forced_config($forcedconfig) {
923
        \core_analytics\manager::reset_prediction_processors();
924
 
925
        if (empty($forcedconfig)) {
926
            return;
927
        }
928
        foreach ($forcedconfig as $pluginname => $pluginconfig) {
929
            foreach ($pluginconfig as $name => $value) {
930
                set_config($name, $value, $pluginname);
931
            }
932
        }
933
    }
934
 
935
    /**
936
     * Is the provided processor ready using the current configuration in the site?
937
     *
938
     * @param  string  $predictionsprocessorclass
939
     * @return \core_analytics\predictor
940
     */
941
    protected function is_predictions_processor_ready(string $predictionsprocessorclass) {
942
        // We repeat the test for all prediction processors.
943
        $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
944
        $ready = $predictionsprocessor->is_ready();
945
        if ($ready !== true) {
946
            $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready: ' . $ready);
947
        }
948
 
949
        return $predictionsprocessor;
950
    }
951
 
952
    /**
953
     * add_prediction_processors
954
     *
955
     * @param array $cases
956
     * @return array
957
     */
958
    protected function add_prediction_processors($cases) {
959
 
960
        $return = array();
961
 
962
        if (defined('TEST_MLBACKEND_PYTHON_HOST') && defined('TEST_MLBACKEND_PYTHON_PORT')
963
                && defined('TEST_MLBACKEND_PYTHON_USERNAME') && defined('TEST_MLBACKEND_PYTHON_USERNAME')) {
964
            $testpythonserver = true;
965
        }
966
 
967
        // We need to test all prediction processors in the system.
968
        $predictionprocessors = \core_analytics\manager::get_all_prediction_processors();
969
        foreach ($predictionprocessors as $classfullname => $predictionsprocessor) {
970
            foreach ($cases as $key => $case) {
971
 
972
                if (!$predictionsprocessor instanceof \mlbackend_python\processor || empty($testpythonserver)) {
973
                    $extraparams = ['predictionsprocessor' => $classfullname, 'forcedconfig' => null];
974
                    $return[$key . '-' . $classfullname] = $case + $extraparams;
975
                } else {
976
 
977
                    // We want the configuration to be forced during the test as things like importing models create new
978
                    // instances of ML backend processors during the process.
979
                    $forcedconfig = ['mlbackend_python' => ['useserver' => true, 'host' => TEST_MLBACKEND_PYTHON_HOST,
980
                        'port' => TEST_MLBACKEND_PYTHON_PORT, 'secure' => false, 'username' => TEST_MLBACKEND_PYTHON_USERNAME,
981
                        'password' => TEST_MLBACKEND_PYTHON_PASSWORD]];
982
                    $casekey = $key . '-' . $classfullname . '-server';
983
                    $return[$casekey] = $case + ['predictionsprocessor' => $classfullname, 'forcedconfig' => $forcedconfig];
984
                }
985
            }
986
        }
987
 
988
        return $return;
989
    }
990
}