Ir a la última revisión | Autoría | Comparar con el anterior | Ultima modificación | Ver Log |
<?php// This file is part of Moodle - http://moodle.org///// Moodle is free software: you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation, either version 3 of the License, or// (at your option) any later version.//// Moodle is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the// GNU General Public License for more details.//// You should have received a copy of the GNU General Public License// along with Moodle. If not, see <http://www.gnu.org/licenses/>./*** Implementation of .tar.gz extractor. Handles extraction of .tar.gz files.* Do not call directly; use methods in tgz_packer.** @see tgz_packer* @package core_files* @copyright 2013 The Open University* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later*/defined('MOODLE_INTERNAL') || die();/*** Extracts .tar.gz files (POSIX format).*/class tgz_extractor {/*** @var int When writing data, the system writes blocks of this size.*/const WRITE_BLOCK_SIZE = 65536;/*** @var int When reading data, the system reads blocks of this size.*/const READ_BLOCK_SIZE = 65536;/*** @var stored_file File object for archive.*/protected $storedfile;/*** @var string OS path for archive.*/protected $ospath;/*** @var int Number of files (-1 if not known).*/protected $numfiles;/*** @var int Number of files processed so far.*/protected $donefiles;/*** @var string Current file path within archive.*/protected $currentarchivepath;/*** @var string Full path to current file.*/protected $currentfile;/*** @var int Size of current file in bytes.*/protected $currentfilesize;/*** @var int Number of bytes of current file already written into buffer.*/protected $currentfileprocessed;/*** @var resource File handle to current file.*/protected $currentfp;/*** @var int Modified time of current file.*/protected $currentmtime;/*** @var string Buffer containing file data awaiting write.*/protected $filebuffer;/*** @var int Current length of buffer in bytes.*/protected $filebufferlength;/*** @var array Results array of all files processed.*/protected $results;/*** @var array In list mode, content of the list; outside list mode, null.*/protected $listresults = null;/*** @var int Whether listing or extracting.*/protected $mode = self::MODE_EXTRACT;/*** @var int If extracting (default).*/const MODE_EXTRACT = 0;/*** @var int Listing contents.*/const MODE_LIST = 1;/*** @var int Listing contents; list now complete.*/const MODE_LIST_COMPLETE = 2;/*** Constructor.** @param stored_file|string $archivefile Moodle file or OS path to archive*/public function __construct($archivefile) {if (is_a($archivefile, 'stored_file')) {$this->storedfile = $archivefile;} else {$this->ospath = $archivefile;}}/*** Extracts the archive.** @param tgz_extractor_handler $handler Will be called for extracted files* @param file_progress $progress Optional progress reporting* @return array Array from archive path => true of processed files* @throws moodle_exception If there is any error processing the archive*/public function extract(tgz_extractor_handler $handler, file_progress $progress = null) {$this->mode = self::MODE_EXTRACT;$this->extract_or_list($handler, $progress);$results = $this->results;unset($this->results);return $results;}/*** Extracts or lists the archive depending on $this->listmode.** @param tgz_extractor_handler $handler Optional handler* @param file_progress $progress Optional progress reporting* @throws moodle_exception If there is any error processing the archive*/protected function extract_or_list(tgz_extractor_handler $handler = null, file_progress $progress = null) {// Open archive.if ($this->storedfile) {$gz = $this->storedfile->get_content_file_handle(stored_file::FILE_HANDLE_GZOPEN);// Estimate number of read-buffers (64KB) in file. Guess that the// uncompressed size is 2x compressed size. Add one just to ensure// it's non-zero.$estimatedbuffers = ($this->storedfile->get_filesize() * 2 / self::READ_BLOCK_SIZE) + 1;} else {$gz = gzopen($this->ospath, 'rb');$estimatedbuffers = (filesize($this->ospath) * 2 / self::READ_BLOCK_SIZE) + 1;}if (!$gz) {throw new moodle_exception('errorprocessingarchive', '', '', null,'Failed to open gzip file');}// Calculate how much progress to report per buffer read.$progressperbuffer = (int)(tgz_packer::PROGRESS_MAX / $estimatedbuffers);// Process archive in 512-byte blocks (but reading 64KB at a time).$buffer = '';$bufferpos = 0;$bufferlength = 0;$this->numfiles = -1;$read = 0;$done = 0;$beforeprogress = -1;while (true) {if ($bufferpos == $bufferlength) {$buffer = gzread($gz, self::READ_BLOCK_SIZE);$bufferpos = 0;$bufferlength = strlen($buffer);if ($bufferlength == 0) {// EOF.break;}// Report progress if enabled.if ($progress) {if ($this->numfiles === -1) {// If we don't know the number of files, do an estimate based// on number of buffers read.$done += $progressperbuffer;if ($done >= tgz_packer::PROGRESS_MAX) {$done = tgz_packer::PROGRESS_MAX - 1;}$progress->progress($done, tgz_packer::PROGRESS_MAX);} else {// Once we know the number of files, use this.if ($beforeprogress === -1) {$beforeprogress = $done;}// Calculate progress as whatever progress we reported// before we knew how many files there were (might be 0)// plus a proportion of the number of files out of the// remaining progress value.$done = $beforeprogress + (int)(($this->donefiles / $this->numfiles) *(tgz_packer::PROGRESS_MAX - $beforeprogress));}$progress->progress($done, tgz_packer::PROGRESS_MAX);}}$block = substr($buffer, $bufferpos, tgz_packer::TAR_BLOCK_SIZE);if ($this->currentfile) {$this->process_file_block($block, $handler);} else {$this->process_header($block, $handler);}// When listing, if we read an index file, we abort archive processing.if ($this->mode === self::MODE_LIST_COMPLETE) {break;}$bufferpos += tgz_packer::TAR_BLOCK_SIZE;$read++;}// Close archive and finish.gzclose($gz);}/*** Lists files in the archive, either using the index file (if present),* or by basically extracting the whole thing if there isn't an index file.** @return array Array of file listing results:*/public function list_files() {$this->listresults = array();$this->mode = self::MODE_LIST;$this->extract_or_list();$listresults = $this->listresults;$this->listresults = null;return $listresults;}/*** Process 512-byte header block.** @param string $block Tar block* @param tgz_extractor_handler $handler Will be called for extracted files*/protected function process_header($block, $handler) {// If the block consists entirely of nulls, ignore it. (This happens// twice at end of archive.)if ($block === str_pad('', tgz_packer::TAR_BLOCK_SIZE, "\0")) {return;}// struct header_posix_ustar {// char name[100];$name = rtrim(substr($block, 0, 100), "\0");// char mode[8];// char uid[8];// char gid[8];// char size[12];$filesize = octdec(substr($block, 124, 11));// char mtime[12];$mtime = octdec(substr($block, 136, 11));// char checksum[8];// char typeflag[1];$typeflag = substr($block, 156, 1);// char linkname[100];// char magic[6];$magic = substr($block, 257, 6);if ($magic !== "ustar\0" && $magic !== "ustar ") {// There are two checks above; the first is the correct POSIX format// and the second is for GNU tar default format.throw new moodle_exception('errorprocessingarchive', '', '', null,'Header does not have POSIX ustar magic string');}// char version[2];// char uname[32];// char gname[32];// char devmajor[8];// char devminor[8];// char prefix[155];$prefix = rtrim(substr($block, 345, 155), "\0");// char pad[12];// };$archivepath = ltrim($prefix . '/' . $name, '/');// For security, ensure there is no .. folder in the archivepath.$archivepath = clean_param($archivepath, PARAM_PATH);// Handle file depending on the type.switch ($typeflag) {case '1' :case '2' :case '3' :case '4' :case '6' :case '7' :// Ignore these special cases.break;case '5' :// Directory.if ($this->mode === self::MODE_LIST) {$this->listresults[] = (object)array('original_pathname' => $archivepath,'pathname' => $archivepath,'mtime' => $mtime,'is_directory' => true,'size' => 0);} else if ($handler->tgz_directory($archivepath, $mtime)) {$this->results[$archivepath] = true;}break;default:// All other values treated as normal file.$this->start_current_file($archivepath, $filesize, $mtime, $handler);break;}}/*** Processes one 512-byte block of an existing file.** @param string $block Data block* @param tgz_extractor_handler $handler Will be called for extracted files*/protected function process_file_block($block, tgz_extractor_handler $handler = null) {// Write block into buffer.$blocksize = tgz_packer::TAR_BLOCK_SIZE;if ($this->currentfileprocessed + tgz_packer::TAR_BLOCK_SIZE > $this->currentfilesize) {// Partial block at end of file.$blocksize = $this->currentfilesize - $this->currentfileprocessed;$this->filebuffer .= substr($block, 0, $blocksize);} else {// Full-length block.$this->filebuffer .= $block;}$this->filebufferlength += $blocksize;$this->currentfileprocessed += $blocksize;// Write block to file if necessary.$eof = $this->currentfileprocessed == $this->currentfilesize;if ($this->filebufferlength >= self::WRITE_BLOCK_SIZE || $eof) {// Except when skipping the file, write it out.if ($this->currentfile !== true) {if (!fwrite($this->currentfp, $this->filebuffer)) {throw new moodle_exception('errorprocessingarchive', '', '', null,'Failed to write buffer to output file: ' . $this->currentfile);}}$this->filebuffer = '';$this->filebufferlength = 0;}// If file is finished, close it.if ($eof) {$this->close_current_file($handler);}}/*** Starts processing a file from archive.** @param string $archivepath Path inside archive* @param int $filesize Size in bytes* @param int $mtime File-modified time* @param tgz_extractor_handler $handler Will be called for extracted files* @throws moodle_exception*/protected function start_current_file($archivepath, $filesize, $mtime,tgz_extractor_handler $handler = null) {global $CFG;$this->currentarchivepath = $archivepath;$this->currentmtime = $mtime;$this->currentfilesize = $filesize;$this->currentfileprocessed = 0;if ($archivepath === tgz_packer::ARCHIVE_INDEX_FILE) {// For index file, store in temp directory.$tempfolder = $CFG->tempdir . '/core_files';check_dir_exists($tempfolder);$this->currentfile = tempnam($tempfolder, '.index');} else {if ($this->mode === self::MODE_LIST) {// If listing, add to list.$this->listresults[] = (object)array('original_pathname' => $archivepath,'pathname' => $archivepath,'mtime' => $mtime,'is_directory' => false,'size' => $filesize);// Discard file.$this->currentfile = true;} else {// For other files, ask handler for location.$this->currentfile = $handler->tgz_start_file($archivepath);if ($this->currentfile === null) {// This indicates that we are discarding the current file.$this->currentfile = true;}}}$this->filebuffer = '';$this->filebufferlength = 0;// Open file.if ($this->currentfile !== true) {$this->currentfp = fopen($this->currentfile, 'wb');if (!$this->currentfp) {throw new moodle_exception('errorprocessingarchive', '', '', null,'Failed to open output file: ' . $this->currentfile);}} else {$this->currentfp = null;}// If it has no size, close it right away.if ($filesize == 0) {$this->close_current_file($handler);}}/*** Closes the current file, calls handler, and sets up data.** @param tgz_extractor_handler $handler Will be called for extracted files* @throws moodle_exception If there is an error closing it*/protected function close_current_file($handler) {if ($this->currentfp !== null) {if (!fclose($this->currentfp)) {throw new moodle_exception('errorprocessingarchive', '', '', null,'Failed to close output file: ' . $this->currentfile);}// At this point we should touch the file to set its modified// time to $this->currentmtime. However, when extracting to the// temp directory, cron will delete files more than a week old,// so to avoid problems we leave all files at their current time.}if ($this->currentarchivepath === tgz_packer::ARCHIVE_INDEX_FILE) {if ($this->mode === self::MODE_LIST) {// When listing array, use the archive index to produce the list.$index = file($this->currentfile);$ok = true;foreach ($index as $num => $value) {// For first line (header), check it's valid then skip it.if ($num == 0) {if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) . '~', $value)) {continue;} else {// Not valid, better ignore the file.$ok = false;break;}}// Split on tabs and store in results array.$values = explode("\t", trim($value));$this->listresults[] = (object)array('original_pathname' => $values[0],'pathname' => $values[0],'mtime' => ($values[3] === '?' ? tgz_packer::DEFAULT_TIMESTAMP : (int)$values[3]),'is_directory' => $values[1] === 'd','size' => (int)$values[2]);}if ($ok) {$this->mode = self::MODE_LIST_COMPLETE;}unlink($this->currentfile);} else {// For index file, get number of files and delete temp file.$contents = file_get_contents($this->currentfile, false, null, 0, 128);$matches = array();if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) .'([0-9]+)~', $contents, $matches)) {$this->numfiles = (int)$matches[1];}unlink($this->currentfile);}} else {// Report to handler and put in results.if ($this->currentfp !== null) {$handler->tgz_end_file($this->currentarchivepath, $this->currentfile);$this->results[$this->currentarchivepath] = true;}$this->donefiles++;}// No longer have a current file.$this->currentfp = null;$this->currentfile = null;$this->currentarchivepath = null;}}/*** Interface for callback from tgz_extractor::extract.** The file functions will be called (in pairs tgz_start_file, tgz_end_file) for* each file in the archive. (There is only one exception, the special* .ARCHIVE_INDEX file which is not reported to the handler.)** The directory function is called whenever the archive contains a directory* entry.*/interface tgz_extractor_handler {/*** Called when the system begins to extract a file. At this point, the* handler must decide where on disk the extracted file should be located.* This can be a temporary location or final target, as preferred.** The handler can request for files to be skipped, in which case no data* will be written and tgz_end_file will not be called.** @param string $archivepath Path and name of file within archive* @return string Location for output file in filesystem, or null to skip file*/public function tgz_start_file($archivepath);/*** Called when the system has finished extracting a file. The handler can* now process the extracted file if required.** @param string $archivepath Path and name of file within archive* @param string $realpath Path in filesystem (from tgz_start_file return)* @return bool True to continue processing, false to abort archive extract*/public function tgz_end_file($archivepath, $realpath);/*** Called when a directory entry is found in the archive.** The handler can create a corresponding directory if required.** @param string $archivepath Path and name of directory within archive* @param int $mtime Modified time of directory* @return bool True if directory was created, false if skipped*/public function tgz_directory($archivepath, $mtime);}