Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
 
17
/**
18
 * Document representation.
19
 *
20
 * @package    search_solr
21
 * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
22
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23
 */
24
 
25
namespace search_solr;
26
 
27
defined('MOODLE_INTERNAL') || die();
28
 
29
/**
30
 * Respresents a document to index.
31
 *
32
 * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
33
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
34
 */
35
class document extends \core_search\document {
36
    /**
37
     * Indicates the file contents were not indexed due to an error.
38
     */
39
    const INDEXED_FILE_ERROR = -1;
40
 
41
    /**
42
     * Indicates the file contents were not indexed due filtering/settings.
43
     */
44
    const INDEXED_FILE_FALSE = 0;
45
 
46
    /**
47
     * Indicates the file contents are indexed with the record.
48
     */
49
    const INDEXED_FILE_TRUE = 1;
50
 
51
    /**
52
     * Any fields that are engine specifc. These are fields that are solely used by a seach engine plugin
53
     * for internal purposes.
54
     *
55
     * @var array
56
     */
57
    protected static $enginefields = array(
58
        'solr_filegroupingid' => array(
59
            'type' => 'string',
60
            'stored' => true,
61
            'indexed' => true
62
        ),
63
        'solr_fileid' => array(
64
            'type' => 'string',
65
            'stored' => true,
66
            'indexed' => true
67
        ),
68
        'solr_filecontenthash' => array(
69
            'type' => 'string',
70
            'stored' => true,
71
            'indexed' => true
72
        ),
73
        // Stores the status of file indexing.
74
        'solr_fileindexstatus' => array(
75
            'type' => 'int',
76
            'stored' => true,
77
            'indexed' => true
78
        ),
79
        // Field to index, but not store, file contents.
80
        'solr_filecontent' => array(
81
            'type' => 'text',
82
            'stored' => false,
83
            'indexed' => true,
84
            'mainquery' => true
85
        )
86
    );
87
 
88
    /**
89
     * Formats the timestamp according to the search engine needs.
90
     *
91
     * @param int $timestamp
92
     * @return string
93
     */
94
    public static function format_time_for_engine($timestamp) {
95
        return gmdate(\search_solr\engine::DATE_FORMAT, $timestamp);
96
    }
97
 
98
    /**
99
     * Formats the timestamp according to the search engine needs.
100
     *
101
     * @param int $timestamp
102
     * @return string
103
     */
104
    public static function format_string_for_engine($string) {
105
        // 2^15 default. We could convert this to a setting as is possible to
106
        // change the max in solr.
107
        return \core_text::str_max_bytes($string, 32766);
108
    }
109
 
110
    /**
111
     * Returns a timestamp from the value stored in the search engine.
112
     *
113
     * @param string $time
114
     * @return int
115
     */
116
    public static function import_time_from_engine($time) {
117
        return strtotime($time);
118
    }
119
 
120
    /**
121
     * Overwritten to use HTML (highlighting).
122
     *
123
     * @return int
124
     */
125
    protected function get_text_format() {
126
        return FORMAT_HTML;
127
    }
128
 
129
    /**
130
     * Formats a text string coming from the search engine.
131
     *
132
     * Even if this is called through an external function it is fine to return HTML as
133
     * HTML is considered solr's search engine text format. An external function can ask
134
     * for raw text, but this just means that it will not pass through format_text, no that
135
     * we can not add HTML.
136
     *
137
     * @param  string $text Text to format
138
     * @return string HTML text to be renderer
139
     */
140
    protected function format_text($text) {
141
        // Since we allow output for highlighting, we need to encode html entities.
142
        // This ensures plaintext html chars don't become valid html.
143
        $out = s($text);
144
 
145
        $startcount = 0;
146
        $endcount = 0;
147
 
148
        // Remove end/start pairs that span a few common seperation characters. Allows us to highlight phrases instead of words.
149
        $regex = '|'.engine::HIGHLIGHT_END.'([ .,-]{0,3})'.engine::HIGHLIGHT_START.'|';
150
        $out = preg_replace($regex, '$1', $out);
151
 
152
        // Now replace our start and end highlight markers.
153
        $out = str_replace(engine::HIGHLIGHT_START, '<span class="highlight">', $out, $startcount);
154
        $out = str_replace(engine::HIGHLIGHT_END, '</span>', $out, $endcount);
155
 
156
        // This makes sure any highlight tags are balanced, incase truncation or the highlight text contained our markers.
157
        while ($startcount > $endcount) {
158
            $out .= '</span>';
159
            $endcount++;
160
        }
161
        while ($startcount < $endcount) {
162
            $out = '<span class="highlight">' . $out;
163
            $endcount++;
164
        }
165
 
166
        return parent::format_text($out);
167
    }
168
 
169
    /**
170
     * Apply any defaults to unset fields before export. Called after document building, but before export.
171
     *
172
     * Sub-classes of this should make sure to call parent::apply_defaults().
173
     */
174
    protected function apply_defaults() {
175
        parent::apply_defaults();
176
 
177
        // We want to set the solr_filegroupingid to id if it isn't set.
178
        if (!isset($this->data['solr_filegroupingid'])) {
179
            $this->data['solr_filegroupingid'] = $this->data['id'];
180
        }
181
    }
182
 
183
    /**
184
     * Export the data for the given file in relation to this document.
185
     *
186
     * @param \stored_file $file The stored file we are talking about.
187
     * @return array
188
     */
189
    public function export_file_for_engine($file) {
190
        $data = $this->export_for_engine();
191
 
192
        // Content is index in the main document.
193
        unset($data['content']);
194
        unset($data['description1']);
195
        unset($data['description2']);
196
 
197
        // Going to append the fileid to give it a unique id.
198
        $data['id'] = $data['id'].'-solrfile'.$file->get_id();
199
        $data['type'] = \core_search\manager::TYPE_FILE;
200
        $data['solr_fileid'] = $file->get_id();
201
        $data['solr_filecontenthash'] = $file->get_contenthash();
202
        $data['solr_fileindexstatus'] = self::INDEXED_FILE_TRUE;
203
        $data['title'] = $file->get_filename();
204
        $data['modified'] = self::format_time_for_engine($file->get_timemodified());
205
 
206
        return $data;
207
    }
208
}