Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
 
17
/**
18
 * Keeps track of the analysis results by storing the results in files.
19
 *
20
 * @package   core_analytics
21
 * @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
22
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23
 */
24
 
25
namespace core_analytics\local\analysis;
26
 
27
defined('MOODLE_INTERNAL') || die();
28
 
29
/**
30
 * Keeps track of the analysis results by storing the results in files.
31
 *
32
 * @package   core_analytics
33
 * @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
34
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35
 */
36
class result_file extends result {
37
 
38
    /**
39
     * Stores the analysis results by time-splitting method.
40
     * @var array
41
     */
42
    private $filesbytimesplitting = [];
43
 
44
    /**
45
     * Stores the analysis results.
46
     * @param  array $results
47
     * @return bool            True if anything was successfully analysed
48
     */
49
    public function add_analysable_results(array $results): bool {
50
 
51
        $any = false;
52
 
53
        // Process all provided time splitting methods.
54
        foreach ($results as $timesplittingid => $result) {
55
            if (!empty($result->result)) {
56
                $this->filesbytimesplitting[$timesplittingid][] = $result->result;
57
                $any = true;
58
            }
59
        }
60
 
61
        if (empty($any)) {
62
            return false;
63
        }
64
        return true;
65
    }
66
 
67
    /**
68
     * Retrieves cached results during evaluation.
69
     *
70
     * @param  \core_analytics\local\time_splitting\base $timesplitting
71
     * @param  \core_analytics\analysable                $analysable
72
     * @return mixed A \stored_file in this case.
73
     */
74
    public function retrieve_cached_result(\core_analytics\local\time_splitting\base $timesplitting,
75
        \core_analytics\analysable $analysable) {
76
 
77
        // For evaluation purposes we don't need to be that strict about how updated the data is,
78
        // if this analyser was analysed less that 1 week ago we skip generating a new one. This
79
        // helps scale the evaluation process as sites with tons of courses may need a lot of time to
80
        // complete an evaluation.
81
        if (!empty($this->options['evaluation']) && !empty($this->options['reuseprevanalysed'])) {
82
 
83
            $previousanalysis = \core_analytics\dataset_manager::get_evaluation_analysable_file($this->modelid,
84
                $analysable->get_id(), $timesplitting->get_id());
85
            // 1 week is a partly random time interval, no need to worry about DST.
86
            $boundary = time() - WEEKSECS;
87
            if ($previousanalysis && $previousanalysis->get_timecreated() > $boundary) {
88
                // Recover the previous analysed file and avoid generating a new one.
89
                return $previousanalysis;
90
            }
91
        }
92
 
93
        return false;
94
    }
95
 
96
    /**
97
     * Formats the result.
98
     *
99
     * @param  array                                     $data
100
     * @param  \core_analytics\local\target\base         $target
101
     * @param  \core_analytics\local\time_splitting\base $timesplitting
102
     * @param  \core_analytics\analysable                $analysable
103
     * @return mixed A \stored_file in this case
104
     */
105
    public function format_result(array $data, \core_analytics\local\target\base $target,
106
            \core_analytics\local\time_splitting\base $timesplitting, \core_analytics\analysable $analysable) {
107
 
108
        if (!empty($this->includetarget)) {
109
            $filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
110
        } else {
111
            $filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
112
        }
113
        $dataset = new \core_analytics\dataset_manager($this->modelid, $analysable->get_id(),
114
            $timesplitting->get_id(), $filearea, $this->options['evaluation']);
115
 
116
        // Add extra metadata.
117
        $this->add_model_metadata($data, $timesplitting, $target);
118
 
119
        // Write all calculated data to a file.
120
        if (!$result = $dataset->store($data)) {
121
            return false;
122
        }
123
 
124
        return $result;
125
    }
126
 
127
    /**
128
     * Returns the results of the analysis.
129
     * @return array
130
     */
131
    public function get(): array {
132
 
133
        if ($this->options['evaluation'] === false) {
134
            // Look for previous training and prediction files we generated and couldn't be used
135
            // by machine learning backends because they weren't big enough.
136
 
137
            $pendingfiles = \core_analytics\dataset_manager::get_pending_files($this->modelid, $this->includetarget,
138
                array_keys($this->filesbytimesplitting));
139
            foreach ($pendingfiles as $timesplittingid => $files) {
140
                foreach ($files as $file) {
141
                    $this->filesbytimesplitting[$timesplittingid][] = $file;
142
                }
143
            }
144
        }
145
 
146
        // We join the datasets by time splitting method.
147
        $timesplittingfiles = array();
148
        foreach ($this->filesbytimesplitting as $timesplittingid => $files) {
149
 
150
            if ($this->options['evaluation'] === true) {
151
                // Delete the previous copy. Only when evaluating.
152
                \core_analytics\dataset_manager::delete_previous_evaluation_file($this->modelid, $timesplittingid);
153
            }
154
 
155
            // Merge all course files into one.
156
            if ($this->includetarget) {
157
                $filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
158
            } else {
159
                $filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
160
            }
161
            $timesplittingfiles[$timesplittingid] = \core_analytics\dataset_manager::merge_datasets($files,
162
                $this->modelid, $timesplittingid, $filearea, $this->options['evaluation']);
163
        }
164
 
165
        if (!empty($pendingfiles)) {
166
            // We must remove them now as they are already part of another dataset.
167
            foreach ($pendingfiles as $timesplittingid => $files) {
168
                foreach ($files as $file) {
169
                    $file->delete();
170
                }
171
            }
172
        }
173
 
174
        return $timesplittingfiles;
175
    }
176
 
177
    /**
178
     * Adds target metadata to the dataset.
179
     *
180
     * The final dataset document will look like this:
181
     * ----------------------------------------------------
182
     * metadata1,metadata2,metadata3,.....
183
     * value1, value2, value3,.....
184
     *
185
     * header1,header2,header3,header4,.....
186
     * stud1value1,stud1value2,stud1value3,stud1value4,.....
187
     * stud2value1,stud2value2,stud2value3,stud2value4,.....
188
     * .....
189
     * ----------------------------------------------------
190
     *
191
     * @param array $data
192
     * @param \core_analytics\local\time_splitting\base $timesplitting
193
     * @param \core_analytics\local\target\base         $target
194
     * @return null
195
     */
196
    private function add_model_metadata(array &$data, \core_analytics\local\time_splitting\base $timesplitting,
197
            \core_analytics\local\target\base $target) {
198
        global $CFG;
199
 
200
        // If no target the first column is the sampleid, if target the last column is the target.
201
        // This will need to be updated when we support unsupervised learning models.
202
        $metadata = array(
203
            'timesplitting' => $timesplitting->get_id(),
204
            'nfeatures' => count(current($data)) - 1,
205
            'moodleversion' => $CFG->version,
206
            'targetcolumn' => $target->get_id()
207
        );
208
        if ($target->is_linear()) {
209
            $metadata['targettype'] = 'linear';
210
            $metadata['targetmin'] = $target::get_min_value();
211
            $metadata['targetmax'] = $target::get_max_value();
212
        } else {
213
            $metadata['targettype'] = 'discrete';
214
            $metadata['targetclasses'] = json_encode($target::get_classes());
215
        }
216
 
217
        // The first 2 samples will be used to store metadata about the dataset.
218
        $metadatacolumns = [];
219
        $metadatavalues = [];
220
        foreach ($metadata as $key => $value) {
221
            $metadatacolumns[] = $key;
222
            $metadatavalues[] = $value;
223
        }
224
 
225
        // This will also reset samples' dataset keys.
226
        array_unshift($data, $metadatacolumns, $metadatavalues);
227
    }
228
}