1 |
efrain |
1 |
<?php
|
|
|
2 |
// This file is part of Moodle - http://moodle.org/
|
|
|
3 |
//
|
|
|
4 |
// Moodle is free software: you can redistribute it and/or modify
|
|
|
5 |
// it under the terms of the GNU General Public License as published by
|
|
|
6 |
// the Free Software Foundation, either version 3 of the License, or
|
|
|
7 |
// (at your option) any later version.
|
|
|
8 |
//
|
|
|
9 |
// Moodle is distributed in the hope that it will be useful,
|
|
|
10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
12 |
// GNU General Public License for more details.
|
|
|
13 |
//
|
|
|
14 |
// You should have received a copy of the GNU General Public License
|
|
|
15 |
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
16 |
|
|
|
17 |
/**
|
|
|
18 |
* Keeps track of the analysis results by storing the results in files.
|
|
|
19 |
*
|
|
|
20 |
* @package core_analytics
|
|
|
21 |
* @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
|
|
|
22 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
23 |
*/
|
|
|
24 |
|
|
|
25 |
namespace core_analytics\local\analysis;
|
|
|
26 |
|
|
|
27 |
defined('MOODLE_INTERNAL') || die();
|
|
|
28 |
|
|
|
29 |
/**
|
|
|
30 |
* Keeps track of the analysis results by storing the results in files.
|
|
|
31 |
*
|
|
|
32 |
* @package core_analytics
|
|
|
33 |
* @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
|
|
|
34 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
35 |
*/
|
|
|
36 |
class result_file extends result {
|
|
|
37 |
|
|
|
38 |
/**
|
|
|
39 |
* Stores the analysis results by time-splitting method.
|
|
|
40 |
* @var array
|
|
|
41 |
*/
|
|
|
42 |
private $filesbytimesplitting = [];
|
|
|
43 |
|
|
|
44 |
/**
|
|
|
45 |
* Stores the analysis results.
|
|
|
46 |
* @param array $results
|
|
|
47 |
* @return bool True if anything was successfully analysed
|
|
|
48 |
*/
|
|
|
49 |
public function add_analysable_results(array $results): bool {
|
|
|
50 |
|
|
|
51 |
$any = false;
|
|
|
52 |
|
|
|
53 |
// Process all provided time splitting methods.
|
|
|
54 |
foreach ($results as $timesplittingid => $result) {
|
|
|
55 |
if (!empty($result->result)) {
|
|
|
56 |
$this->filesbytimesplitting[$timesplittingid][] = $result->result;
|
|
|
57 |
$any = true;
|
|
|
58 |
}
|
|
|
59 |
}
|
|
|
60 |
|
|
|
61 |
if (empty($any)) {
|
|
|
62 |
return false;
|
|
|
63 |
}
|
|
|
64 |
return true;
|
|
|
65 |
}
|
|
|
66 |
|
|
|
67 |
/**
|
|
|
68 |
* Retrieves cached results during evaluation.
|
|
|
69 |
*
|
|
|
70 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
71 |
* @param \core_analytics\analysable $analysable
|
|
|
72 |
* @return mixed A \stored_file in this case.
|
|
|
73 |
*/
|
|
|
74 |
public function retrieve_cached_result(\core_analytics\local\time_splitting\base $timesplitting,
|
|
|
75 |
\core_analytics\analysable $analysable) {
|
|
|
76 |
|
|
|
77 |
// For evaluation purposes we don't need to be that strict about how updated the data is,
|
|
|
78 |
// if this analyser was analysed less that 1 week ago we skip generating a new one. This
|
|
|
79 |
// helps scale the evaluation process as sites with tons of courses may need a lot of time to
|
|
|
80 |
// complete an evaluation.
|
|
|
81 |
if (!empty($this->options['evaluation']) && !empty($this->options['reuseprevanalysed'])) {
|
|
|
82 |
|
|
|
83 |
$previousanalysis = \core_analytics\dataset_manager::get_evaluation_analysable_file($this->modelid,
|
|
|
84 |
$analysable->get_id(), $timesplitting->get_id());
|
|
|
85 |
// 1 week is a partly random time interval, no need to worry about DST.
|
|
|
86 |
$boundary = time() - WEEKSECS;
|
|
|
87 |
if ($previousanalysis && $previousanalysis->get_timecreated() > $boundary) {
|
|
|
88 |
// Recover the previous analysed file and avoid generating a new one.
|
|
|
89 |
return $previousanalysis;
|
|
|
90 |
}
|
|
|
91 |
}
|
|
|
92 |
|
|
|
93 |
return false;
|
|
|
94 |
}
|
|
|
95 |
|
|
|
96 |
/**
|
|
|
97 |
* Formats the result.
|
|
|
98 |
*
|
|
|
99 |
* @param array $data
|
|
|
100 |
* @param \core_analytics\local\target\base $target
|
|
|
101 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
102 |
* @param \core_analytics\analysable $analysable
|
|
|
103 |
* @return mixed A \stored_file in this case
|
|
|
104 |
*/
|
|
|
105 |
public function format_result(array $data, \core_analytics\local\target\base $target,
|
|
|
106 |
\core_analytics\local\time_splitting\base $timesplitting, \core_analytics\analysable $analysable) {
|
|
|
107 |
|
|
|
108 |
if (!empty($this->includetarget)) {
|
|
|
109 |
$filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
|
|
|
110 |
} else {
|
|
|
111 |
$filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
|
|
|
112 |
}
|
|
|
113 |
$dataset = new \core_analytics\dataset_manager($this->modelid, $analysable->get_id(),
|
|
|
114 |
$timesplitting->get_id(), $filearea, $this->options['evaluation']);
|
|
|
115 |
|
|
|
116 |
// Add extra metadata.
|
|
|
117 |
$this->add_model_metadata($data, $timesplitting, $target);
|
|
|
118 |
|
|
|
119 |
// Write all calculated data to a file.
|
|
|
120 |
if (!$result = $dataset->store($data)) {
|
|
|
121 |
return false;
|
|
|
122 |
}
|
|
|
123 |
|
|
|
124 |
return $result;
|
|
|
125 |
}
|
|
|
126 |
|
|
|
127 |
/**
|
|
|
128 |
* Returns the results of the analysis.
|
|
|
129 |
* @return array
|
|
|
130 |
*/
|
|
|
131 |
public function get(): array {
|
|
|
132 |
|
|
|
133 |
if ($this->options['evaluation'] === false) {
|
|
|
134 |
// Look for previous training and prediction files we generated and couldn't be used
|
|
|
135 |
// by machine learning backends because they weren't big enough.
|
|
|
136 |
|
|
|
137 |
$pendingfiles = \core_analytics\dataset_manager::get_pending_files($this->modelid, $this->includetarget,
|
|
|
138 |
array_keys($this->filesbytimesplitting));
|
|
|
139 |
foreach ($pendingfiles as $timesplittingid => $files) {
|
|
|
140 |
foreach ($files as $file) {
|
|
|
141 |
$this->filesbytimesplitting[$timesplittingid][] = $file;
|
|
|
142 |
}
|
|
|
143 |
}
|
|
|
144 |
}
|
|
|
145 |
|
|
|
146 |
// We join the datasets by time splitting method.
|
|
|
147 |
$timesplittingfiles = array();
|
|
|
148 |
foreach ($this->filesbytimesplitting as $timesplittingid => $files) {
|
|
|
149 |
|
|
|
150 |
if ($this->options['evaluation'] === true) {
|
|
|
151 |
// Delete the previous copy. Only when evaluating.
|
|
|
152 |
\core_analytics\dataset_manager::delete_previous_evaluation_file($this->modelid, $timesplittingid);
|
|
|
153 |
}
|
|
|
154 |
|
|
|
155 |
// Merge all course files into one.
|
|
|
156 |
if ($this->includetarget) {
|
|
|
157 |
$filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
|
|
|
158 |
} else {
|
|
|
159 |
$filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
|
|
|
160 |
}
|
|
|
161 |
$timesplittingfiles[$timesplittingid] = \core_analytics\dataset_manager::merge_datasets($files,
|
|
|
162 |
$this->modelid, $timesplittingid, $filearea, $this->options['evaluation']);
|
|
|
163 |
}
|
|
|
164 |
|
|
|
165 |
if (!empty($pendingfiles)) {
|
|
|
166 |
// We must remove them now as they are already part of another dataset.
|
|
|
167 |
foreach ($pendingfiles as $timesplittingid => $files) {
|
|
|
168 |
foreach ($files as $file) {
|
|
|
169 |
$file->delete();
|
|
|
170 |
}
|
|
|
171 |
}
|
|
|
172 |
}
|
|
|
173 |
|
|
|
174 |
return $timesplittingfiles;
|
|
|
175 |
}
|
|
|
176 |
|
|
|
177 |
/**
|
|
|
178 |
* Adds target metadata to the dataset.
|
|
|
179 |
*
|
|
|
180 |
* The final dataset document will look like this:
|
|
|
181 |
* ----------------------------------------------------
|
|
|
182 |
* metadata1,metadata2,metadata3,.....
|
|
|
183 |
* value1, value2, value3,.....
|
|
|
184 |
*
|
|
|
185 |
* header1,header2,header3,header4,.....
|
|
|
186 |
* stud1value1,stud1value2,stud1value3,stud1value4,.....
|
|
|
187 |
* stud2value1,stud2value2,stud2value3,stud2value4,.....
|
|
|
188 |
* .....
|
|
|
189 |
* ----------------------------------------------------
|
|
|
190 |
*
|
|
|
191 |
* @param array $data
|
|
|
192 |
* @param \core_analytics\local\time_splitting\base $timesplitting
|
|
|
193 |
* @param \core_analytics\local\target\base $target
|
|
|
194 |
* @return null
|
|
|
195 |
*/
|
|
|
196 |
private function add_model_metadata(array &$data, \core_analytics\local\time_splitting\base $timesplitting,
|
|
|
197 |
\core_analytics\local\target\base $target) {
|
|
|
198 |
global $CFG;
|
|
|
199 |
|
|
|
200 |
// If no target the first column is the sampleid, if target the last column is the target.
|
|
|
201 |
// This will need to be updated when we support unsupervised learning models.
|
|
|
202 |
$metadata = array(
|
|
|
203 |
'timesplitting' => $timesplitting->get_id(),
|
|
|
204 |
'nfeatures' => count(current($data)) - 1,
|
|
|
205 |
'moodleversion' => $CFG->version,
|
|
|
206 |
'targetcolumn' => $target->get_id()
|
|
|
207 |
);
|
|
|
208 |
if ($target->is_linear()) {
|
|
|
209 |
$metadata['targettype'] = 'linear';
|
|
|
210 |
$metadata['targetmin'] = $target::get_min_value();
|
|
|
211 |
$metadata['targetmax'] = $target::get_max_value();
|
|
|
212 |
} else {
|
|
|
213 |
$metadata['targettype'] = 'discrete';
|
|
|
214 |
$metadata['targetclasses'] = json_encode($target::get_classes());
|
|
|
215 |
}
|
|
|
216 |
|
|
|
217 |
// The first 2 samples will be used to store metadata about the dataset.
|
|
|
218 |
$metadatacolumns = [];
|
|
|
219 |
$metadatavalues = [];
|
|
|
220 |
foreach ($metadata as $key => $value) {
|
|
|
221 |
$metadatacolumns[] = $key;
|
|
|
222 |
$metadatavalues[] = $value;
|
|
|
223 |
}
|
|
|
224 |
|
|
|
225 |
// This will also reset samples' dataset keys.
|
|
|
226 |
array_unshift($data, $metadatacolumns, $metadatavalues);
|
|
|
227 |
}
|
|
|
228 |
}
|