Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
// This file is part of Moodle - http://moodle.org/
3
//
4
// Moodle is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// Moodle is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
 
17
/**
18
 * Implementation of .tar.gz extractor. Handles extraction of .tar.gz files.
19
 * Do not call directly; use methods in tgz_packer.
20
 *
21
 * @see tgz_packer
22
 * @package core_files
23
 * @copyright 2013 The Open University
24
 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25
 */
26
 
27
defined('MOODLE_INTERNAL') || die();
28
 
29
/**
30
 * Extracts .tar.gz files (POSIX format).
31
 */
32
class tgz_extractor {
33
    /**
34
     * @var int When writing data, the system writes blocks of this size.
35
     */
36
    const WRITE_BLOCK_SIZE = 65536;
37
    /**
38
     * @var int When reading data, the system reads blocks of this size.
39
     */
40
    const READ_BLOCK_SIZE = 65536;
41
    /**
42
     * @var stored_file File object for archive.
43
     */
44
    protected $storedfile;
45
    /**
46
     * @var string OS path for archive.
47
     */
48
    protected $ospath;
49
    /**
50
     * @var int Number of files (-1 if not known).
51
     */
52
    protected $numfiles;
53
    /**
54
     * @var int Number of files processed so far.
55
     */
56
    protected $donefiles;
57
    /**
58
     * @var string Current file path within archive.
59
     */
60
    protected $currentarchivepath;
61
    /**
62
     * @var string Full path to current file.
63
     */
64
    protected $currentfile;
65
    /**
66
     * @var int Size of current file in bytes.
67
     */
68
    protected $currentfilesize;
69
    /**
70
     * @var int Number of bytes of current file already written into buffer.
71
     */
72
    protected $currentfileprocessed;
73
    /**
74
     * @var resource File handle to current file.
75
     */
76
    protected $currentfp;
77
    /**
78
     * @var int Modified time of current file.
79
     */
80
    protected $currentmtime;
81
    /**
82
     * @var string Buffer containing file data awaiting write.
83
     */
84
    protected $filebuffer;
85
    /**
86
     * @var int Current length of buffer in bytes.
87
     */
88
    protected $filebufferlength;
89
    /**
90
     * @var array Results array of all files processed.
91
     */
92
    protected $results;
93
 
94
    /**
95
     * @var array In list mode, content of the list; outside list mode, null.
96
     */
97
    protected $listresults = null;
98
 
99
    /**
100
     * @var int Whether listing or extracting.
101
     */
102
    protected $mode = self::MODE_EXTRACT;
103
 
104
    /**
105
     * @var int If extracting (default).
106
     */
107
    const MODE_EXTRACT = 0;
108
 
109
    /**
110
     * @var int Listing contents.
111
     */
112
    const MODE_LIST = 1;
113
 
114
    /**
115
     * @var int Listing contents; list now complete.
116
     */
117
    const MODE_LIST_COMPLETE = 2;
118
 
119
    /**
120
     * Constructor.
121
     *
122
     * @param stored_file|string $archivefile Moodle file or OS path to archive
123
     */
124
    public function __construct($archivefile) {
125
        if (is_a($archivefile, 'stored_file')) {
126
            $this->storedfile = $archivefile;
127
        } else {
128
            $this->ospath = $archivefile;
129
        }
130
    }
131
 
132
    /**
133
     * Extracts the archive.
134
     *
135
     * @param tgz_extractor_handler $handler Will be called for extracted files
136
     * @param file_progress $progress Optional progress reporting
137
     * @return array Array from archive path => true of processed files
138
     * @throws moodle_exception If there is any error processing the archive
139
     */
140
    public function extract(tgz_extractor_handler $handler, file_progress $progress = null) {
141
        $this->mode = self::MODE_EXTRACT;
142
        $this->extract_or_list($handler, $progress);
143
        $results = $this->results;
144
        unset($this->results);
145
        return $results;
146
    }
147
 
148
    /**
149
     * Extracts or lists the archive depending on $this->listmode.
150
     *
151
     * @param tgz_extractor_handler $handler Optional handler
152
     * @param file_progress $progress Optional progress reporting
153
     * @throws moodle_exception If there is any error processing the archive
154
     */
155
    protected function extract_or_list(tgz_extractor_handler $handler = null, file_progress $progress = null) {
156
        // Open archive.
157
        if ($this->storedfile) {
158
            $gz = $this->storedfile->get_content_file_handle(stored_file::FILE_HANDLE_GZOPEN);
159
            // Estimate number of read-buffers (64KB) in file. Guess that the
160
            // uncompressed size is 2x compressed size. Add one just to ensure
161
            // it's non-zero.
162
            $estimatedbuffers = ($this->storedfile->get_filesize() * 2 / self::READ_BLOCK_SIZE) + 1;
163
        } else {
164
            $gz = gzopen($this->ospath, 'rb');
165
            $estimatedbuffers = (filesize($this->ospath) * 2 / self::READ_BLOCK_SIZE) + 1;
166
        }
167
        if (!$gz) {
168
            throw new moodle_exception('errorprocessingarchive', '', '', null,
169
                    'Failed to open gzip file');
170
        }
171
 
172
        // Calculate how much progress to report per buffer read.
173
        $progressperbuffer = (int)(tgz_packer::PROGRESS_MAX / $estimatedbuffers);
174
 
175
        // Process archive in 512-byte blocks (but reading 64KB at a time).
176
        $buffer = '';
177
        $bufferpos = 0;
178
        $bufferlength = 0;
179
        $this->numfiles = -1;
180
        $read = 0;
181
        $done = 0;
182
        $beforeprogress = -1;
183
        while (true) {
184
            if ($bufferpos == $bufferlength) {
185
                $buffer = gzread($gz, self::READ_BLOCK_SIZE);
186
                $bufferpos = 0;
187
                $bufferlength = strlen($buffer);
188
                if ($bufferlength == 0) {
189
                    // EOF.
190
                    break;
191
                }
192
 
193
                // Report progress if enabled.
194
                if ($progress) {
195
                    if ($this->numfiles === -1) {
196
                        // If we don't know the number of files, do an estimate based
197
                        // on number of buffers read.
198
                        $done += $progressperbuffer;
199
                        if ($done >= tgz_packer::PROGRESS_MAX) {
200
                            $done = tgz_packer::PROGRESS_MAX - 1;
201
                        }
202
                        $progress->progress($done, tgz_packer::PROGRESS_MAX);
203
                    } else {
204
                        // Once we know the number of files, use this.
205
                        if ($beforeprogress === -1) {
206
                            $beforeprogress = $done;
207
                        }
208
                        // Calculate progress as whatever progress we reported
209
                        // before we knew how many files there were (might be 0)
210
                        // plus a proportion of the number of files out of the
211
                        // remaining progress value.
212
                        $done = $beforeprogress + (int)(($this->donefiles / $this->numfiles) *
213
                                (tgz_packer::PROGRESS_MAX - $beforeprogress));
214
                    }
215
                    $progress->progress($done, tgz_packer::PROGRESS_MAX);
216
                }
217
            }
218
 
219
            $block = substr($buffer, $bufferpos, tgz_packer::TAR_BLOCK_SIZE);
220
            if ($this->currentfile) {
221
                $this->process_file_block($block, $handler);
222
            } else {
223
                $this->process_header($block, $handler);
224
            }
225
 
226
            // When listing, if we read an index file, we abort archive processing.
227
            if ($this->mode === self::MODE_LIST_COMPLETE) {
228
                break;
229
            }
230
 
231
            $bufferpos += tgz_packer::TAR_BLOCK_SIZE;
232
            $read++;
233
        }
234
 
235
        // Close archive and finish.
236
        gzclose($gz);
237
    }
238
 
239
    /**
240
     * Lists files in the archive, either using the index file (if present),
241
     * or by basically extracting the whole thing if there isn't an index file.
242
     *
243
     * @return array Array of file listing results:
244
     */
245
    public function list_files() {
246
        $this->listresults = array();
247
        $this->mode = self::MODE_LIST;
248
        $this->extract_or_list();
249
        $listresults = $this->listresults;
250
        $this->listresults = null;
251
        return $listresults;
252
    }
253
 
254
    /**
255
     * Process 512-byte header block.
256
     *
257
     * @param string $block Tar block
258
     * @param tgz_extractor_handler $handler Will be called for extracted files
259
     */
260
    protected function process_header($block, $handler) {
261
        // If the block consists entirely of nulls, ignore it. (This happens
262
        // twice at end of archive.)
263
        if ($block === str_pad('', tgz_packer::TAR_BLOCK_SIZE, "\0")) {
264
            return;
265
        }
266
 
267
        // struct header_posix_ustar {
268
        //    char name[100];
269
        $name = rtrim(substr($block, 0, 100), "\0");
270
 
271
        //    char mode[8];
272
        //    char uid[8];
273
        //    char gid[8];
274
        //    char size[12];
275
        $filesize = octdec(substr($block, 124, 11));
276
 
277
        //    char mtime[12];
278
        $mtime = octdec(substr($block, 136, 11));
279
 
280
        //    char checksum[8];
281
        //    char typeflag[1];
282
        $typeflag = substr($block, 156, 1);
283
 
284
        //    char linkname[100];
285
        //    char magic[6];
286
        $magic = substr($block, 257, 6);
287
        if ($magic !== "ustar\0" && $magic !== "ustar ") {
288
            // There are two checks above; the first is the correct POSIX format
289
            // and the second is for GNU tar default format.
290
            throw new moodle_exception('errorprocessingarchive', '', '', null,
291
                    'Header does not have POSIX ustar magic string');
292
        }
293
 
294
        //    char version[2];
295
        //    char uname[32];
296
        //    char gname[32];
297
        //    char devmajor[8];
298
        //    char devminor[8];
299
        //    char prefix[155];
300
        $prefix = rtrim(substr($block, 345, 155), "\0");
301
 
302
        //    char pad[12];
303
        // };
304
 
305
        $archivepath = ltrim($prefix . '/' . $name, '/');
306
 
307
        // For security, ensure there is no .. folder in the archivepath.
308
        $archivepath = clean_param($archivepath, PARAM_PATH);
309
 
310
        // Handle file depending on the type.
311
        switch ($typeflag) {
312
            case '1' :
313
            case '2' :
314
            case '3' :
315
            case '4' :
316
            case '6' :
317
            case '7' :
318
                // Ignore these special cases.
319
                break;
320
 
321
            case '5' :
322
                // Directory.
323
                if ($this->mode === self::MODE_LIST) {
324
                    $this->listresults[] = (object)array(
325
                            'original_pathname' => $archivepath,
326
                            'pathname' => $archivepath,
327
                            'mtime' => $mtime,
328
                            'is_directory' => true,
329
                            'size' => 0);
330
                } else if ($handler->tgz_directory($archivepath, $mtime)) {
331
                    $this->results[$archivepath] = true;
332
                }
333
                break;
334
 
335
            default:
336
                // All other values treated as normal file.
337
                $this->start_current_file($archivepath, $filesize, $mtime, $handler);
338
                break;
339
        }
340
    }
341
 
342
    /**
343
     * Processes one 512-byte block of an existing file.
344
     *
345
     * @param string $block Data block
346
     * @param tgz_extractor_handler $handler Will be called for extracted files
347
     */
348
    protected function process_file_block($block, tgz_extractor_handler $handler = null) {
349
        // Write block into buffer.
350
        $blocksize = tgz_packer::TAR_BLOCK_SIZE;
351
        if ($this->currentfileprocessed + tgz_packer::TAR_BLOCK_SIZE > $this->currentfilesize) {
352
            // Partial block at end of file.
353
            $blocksize = $this->currentfilesize - $this->currentfileprocessed;
354
            $this->filebuffer .= substr($block, 0, $blocksize);
355
        } else {
356
            // Full-length block.
357
            $this->filebuffer .= $block;
358
        }
359
        $this->filebufferlength += $blocksize;
360
        $this->currentfileprocessed += $blocksize;
361
 
362
        // Write block to file if necessary.
363
        $eof = $this->currentfileprocessed == $this->currentfilesize;
364
        if ($this->filebufferlength >= self::WRITE_BLOCK_SIZE || $eof) {
365
            // Except when skipping the file, write it out.
366
            if ($this->currentfile !== true) {
367
                if (!fwrite($this->currentfp, $this->filebuffer)) {
368
                    throw new moodle_exception('errorprocessingarchive', '', '', null,
369
                            'Failed to write buffer to output file: ' . $this->currentfile);
370
                }
371
            }
372
            $this->filebuffer = '';
373
            $this->filebufferlength = 0;
374
        }
375
 
376
        // If file is finished, close it.
377
        if ($eof) {
378
            $this->close_current_file($handler);
379
        }
380
    }
381
 
382
    /**
383
     * Starts processing a file from archive.
384
     *
385
     * @param string $archivepath Path inside archive
386
     * @param int $filesize Size in bytes
387
     * @param int $mtime File-modified time
388
     * @param tgz_extractor_handler $handler Will be called for extracted files
389
     * @throws moodle_exception
390
     */
391
    protected function start_current_file($archivepath, $filesize, $mtime,
392
            tgz_extractor_handler $handler = null) {
393
        global $CFG;
394
 
395
        $this->currentarchivepath = $archivepath;
396
        $this->currentmtime = $mtime;
397
        $this->currentfilesize = $filesize;
398
        $this->currentfileprocessed = 0;
399
 
400
        if ($archivepath === tgz_packer::ARCHIVE_INDEX_FILE) {
401
            // For index file, store in temp directory.
402
            $tempfolder = $CFG->tempdir . '/core_files';
403
            check_dir_exists($tempfolder);
404
            $this->currentfile = tempnam($tempfolder, '.index');
405
        } else {
406
            if ($this->mode === self::MODE_LIST) {
407
                // If listing, add to list.
408
                $this->listresults[] = (object)array(
409
                        'original_pathname' => $archivepath,
410
                        'pathname' => $archivepath,
411
                        'mtime' => $mtime,
412
                        'is_directory' => false,
413
                        'size' => $filesize);
414
 
415
                // Discard file.
416
                $this->currentfile = true;
417
            } else {
418
                // For other files, ask handler for location.
419
                $this->currentfile = $handler->tgz_start_file($archivepath);
420
                if ($this->currentfile === null) {
421
                    // This indicates that we are discarding the current file.
422
                    $this->currentfile = true;
423
                }
424
            }
425
        }
426
        $this->filebuffer = '';
427
        $this->filebufferlength = 0;
428
 
429
        // Open file.
430
        if ($this->currentfile !== true) {
431
            $this->currentfp = fopen($this->currentfile, 'wb');
432
            if (!$this->currentfp) {
433
                throw new moodle_exception('errorprocessingarchive', '', '', null,
434
                        'Failed to open output file: ' . $this->currentfile);
435
            }
436
        } else {
437
            $this->currentfp = null;
438
        }
439
 
440
        // If it has no size, close it right away.
441
        if ($filesize == 0) {
442
            $this->close_current_file($handler);
443
        }
444
    }
445
 
446
    /**
447
     * Closes the current file, calls handler, and sets up data.
448
     *
449
     * @param tgz_extractor_handler $handler Will be called for extracted files
450
     * @throws moodle_exception If there is an error closing it
451
     */
452
    protected function close_current_file($handler) {
453
        if ($this->currentfp !== null) {
454
            if (!fclose($this->currentfp)) {
455
                throw new moodle_exception('errorprocessingarchive', '', '', null,
456
                        'Failed to close output file: ' .  $this->currentfile);
457
            }
458
 
459
            // At this point we should touch the file to set its modified
460
            // time to $this->currentmtime. However, when extracting to the
461
            // temp directory, cron will delete files more than a week old,
462
            // so to avoid problems we leave all files at their current time.
463
        }
464
 
465
        if ($this->currentarchivepath === tgz_packer::ARCHIVE_INDEX_FILE) {
466
            if ($this->mode === self::MODE_LIST) {
467
                // When listing array, use the archive index to produce the list.
468
                $index = file($this->currentfile);
469
                $ok = true;
470
                foreach ($index as $num => $value) {
471
                    // For first line (header), check it's valid then skip it.
472
                    if ($num == 0) {
473
                        if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) . '~', $value)) {
474
                            continue;
475
                        } else {
476
                            // Not valid, better ignore the file.
477
                            $ok = false;
478
                            break;
479
                        }
480
                    }
481
                    // Split on tabs and store in results array.
482
                    $values = explode("\t", trim($value));
483
                    $this->listresults[] = (object)array(
484
                        'original_pathname' => $values[0],
485
                        'pathname' => $values[0],
486
                        'mtime' => ($values[3] === '?' ? tgz_packer::DEFAULT_TIMESTAMP : (int)$values[3]),
487
                        'is_directory' => $values[1] === 'd',
488
                        'size' => (int)$values[2]);
489
                }
490
                if ($ok) {
491
                    $this->mode = self::MODE_LIST_COMPLETE;
492
                }
493
                unlink($this->currentfile);
494
            } else {
495
                // For index file, get number of files and delete temp file.
496
                $contents = file_get_contents($this->currentfile, false, null, 0, 128);
497
                $matches = array();
498
                if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) .
499
                        '([0-9]+)~', $contents, $matches)) {
500
                    $this->numfiles = (int)$matches[1];
501
                }
502
                unlink($this->currentfile);
503
            }
504
        } else {
505
            // Report to handler and put in results.
506
            if ($this->currentfp !== null) {
507
                $handler->tgz_end_file($this->currentarchivepath, $this->currentfile);
508
                $this->results[$this->currentarchivepath] = true;
509
            }
510
            $this->donefiles++;
511
        }
512
 
513
        // No longer have a current file.
514
        $this->currentfp = null;
515
        $this->currentfile = null;
516
        $this->currentarchivepath = null;
517
    }
518
 
519
}
520
 
521
/**
522
 * Interface for callback from tgz_extractor::extract.
523
 *
524
 * The file functions will be called (in pairs tgz_start_file, tgz_end_file) for
525
 * each file in the archive. (There is only one exception, the special
526
 * .ARCHIVE_INDEX file which is not reported to the handler.)
527
 *
528
 * The directory function is called whenever the archive contains a directory
529
 * entry.
530
 */
531
interface tgz_extractor_handler {
532
    /**
533
     * Called when the system begins to extract a file. At this point, the
534
     * handler must decide where on disk the extracted file should be located.
535
     * This can be a temporary location or final target, as preferred.
536
     *
537
     * The handler can request for files to be skipped, in which case no data
538
     * will be written and tgz_end_file will not be called.
539
     *
540
     * @param string $archivepath Path and name of file within archive
541
     * @return string Location for output file in filesystem, or null to skip file
542
     */
543
    public function tgz_start_file($archivepath);
544
 
545
    /**
546
     * Called when the system has finished extracting a file. The handler can
547
     * now process the extracted file if required.
548
     *
549
     * @param string $archivepath Path and name of file within archive
550
     * @param string $realpath Path in filesystem (from tgz_start_file return)
551
     * @return bool True to continue processing, false to abort archive extract
552
     */
553
    public function tgz_end_file($archivepath, $realpath);
554
 
555
    /**
556
     * Called when a directory entry is found in the archive.
557
     *
558
     * The handler can create a corresponding directory if required.
559
     *
560
     * @param string $archivepath Path and name of directory within archive
561
     * @param int $mtime Modified time of directory
562
     * @return bool True if directory was created, false if skipped
563
     */
564
    public function tgz_directory($archivepath, $mtime);
565
}