Proyectos de Subversion Moodle

Rev

Autoría | Ultima modificación | Ver Log |

<?php

declare(strict_types=1);

namespace OpenSpout\Reader\XLSX\Manager;

use OpenSpout\Common\Helper\Escaper\XLSX;
use OpenSpout\Reader\Common\Manager\RowManager;
use OpenSpout\Reader\Common\XMLProcessor;
use OpenSpout\Reader\Wrapper\XMLReader;
use OpenSpout\Reader\XLSX\Helper\CellValueFormatter;
use OpenSpout\Reader\XLSX\Options;
use OpenSpout\Reader\XLSX\RowIterator;
use OpenSpout\Reader\XLSX\Sheet;
use OpenSpout\Reader\XLSX\SheetHeaderReader;

/**
 * @internal
 */
final class SheetManager
{
    /**
     * Paths of XML files relative to the XLSX file root.
     */
    public const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
    public const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';

    /**
     * Definition of XML node names used to parse data.
     */
    public const XML_NODE_WORKBOOK_PROPERTIES = 'workbookPr';
    public const XML_NODE_WORKBOOK_VIEW = 'workbookView';
    public const XML_NODE_SHEET = 'sheet';
    public const XML_NODE_SHEETS = 'sheets';
    public const XML_NODE_RELATIONSHIP = 'Relationship';

    /**
     * Definition of XML attributes used to parse data.
     */
    public const XML_ATTRIBUTE_DATE_1904 = 'date1904';
    public const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab';
    public const XML_ATTRIBUTE_R_ID = 'r:id';
    public const XML_ATTRIBUTE_NAME = 'name';
    public const XML_ATTRIBUTE_STATE = 'state';
    public const XML_ATTRIBUTE_ID = 'Id';
    public const XML_ATTRIBUTE_TARGET = 'Target';

    /**
     * State value to represent a hidden sheet.
     */
    public const SHEET_STATE_HIDDEN = 'hidden';

    /** @var string Path of the XLSX file being read */
    private readonly string $filePath;

    private readonly Options $options;

    /** @var SharedStringsManager Manages shared strings */
    private readonly SharedStringsManager $sharedStringsManager;

    /** @var XLSX Used to unescape XML data */
    private readonly XLSX $escaper;

    /** @var Sheet[] List of sheets */
    private array $sheets;

    /** @var int Index of the sheet currently read */
    private int $currentSheetIndex;

    /** @var int Index of the active sheet (0 by default) */
    private int $activeSheetIndex;

    public function __construct(
        string $filePath,
        Options $options,
        SharedStringsManager $sharedStringsManager,
        XLSX $escaper
    ) {
        $this->filePath = $filePath;
        $this->options = $options;
        $this->sharedStringsManager = $sharedStringsManager;
        $this->escaper = $escaper;
    }

    /**
     * Returns the sheets metadata of the file located at the previously given file path.
     * The paths to the sheets' data are read from the [Content_Types].xml file.
     *
     * @return Sheet[] Sheets within the XLSX file
     */
    public function getSheets(): array
    {
        $this->sheets = [];
        $this->currentSheetIndex = 0;
        $this->activeSheetIndex = 0; // By default, the first sheet is active

        $xmlReader = new XMLReader();
        $xmlProcessor = new XMLProcessor($xmlReader);

        $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_PROPERTIES, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookPropertiesStartingNode']);
        $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_VIEW, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookViewStartingNode']);
        $xmlProcessor->registerCallback(self::XML_NODE_SHEET, XMLProcessor::NODE_TYPE_START, [$this, 'processSheetStartingNode']);
        $xmlProcessor->registerCallback(self::XML_NODE_SHEETS, XMLProcessor::NODE_TYPE_END, [$this, 'processSheetsEndingNode']);

        if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
            $xmlProcessor->readUntilStopped();
            $xmlReader->close();
        }

        return $this->sheets;
    }

    /**
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node
     *
     * @return int A return code that indicates what action should the processor take next
     */
    private function processWorkbookPropertiesStartingNode(XMLReader $xmlReader): int
    {
        // Using "filter_var($x, FILTER_VALIDATE_BOOLEAN)" here because the value of the "date1904" attribute
        // may be the string "false", that is not mapped to the boolean "false" by default...
        $shouldUse1904Dates = filter_var($xmlReader->getAttribute(self::XML_ATTRIBUTE_DATE_1904), FILTER_VALIDATE_BOOLEAN);
        $this->options->SHOULD_USE_1904_DATES = $shouldUse1904Dates;

        return XMLProcessor::PROCESSING_CONTINUE;
    }

    /**
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node
     *
     * @return int A return code that indicates what action should the processor take next
     */
    private function processWorkbookViewStartingNode(XMLReader $xmlReader): int
    {
        // The "workbookView" node is located before "sheet" nodes, ensuring that
        // the active sheet is known before parsing sheets data.
        $this->activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB);

        return XMLProcessor::PROCESSING_CONTINUE;
    }

    /**
     * @param XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node
     *
     * @return int A return code that indicates what action should the processor take next
     */
    private function processSheetStartingNode(XMLReader $xmlReader): int
    {
        $isSheetActive = ($this->currentSheetIndex === $this->activeSheetIndex);
        $this->sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $this->currentSheetIndex, $isSheetActive);
        ++$this->currentSheetIndex;

        return XMLProcessor::PROCESSING_CONTINUE;
    }

    /**
     * @return int A return code that indicates what action should the processor take next
     */
    private function processSheetsEndingNode(): int
    {
        return XMLProcessor::PROCESSING_STOP;
    }

    /**
     * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml".
     * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
     * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
     *
     * @param XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
     * @param int       $sheetIndexZeroBased  Index of the sheet, based on order of appearance in the workbook (zero-based)
     * @param bool      $isSheetActive        Whether this sheet was defined as active
     *
     * @return Sheet Sheet instance
     */
    private function getSheetFromSheetXMLNode(XMLReader $xmlReaderOnSheetNode, int $sheetIndexZeroBased, bool $isSheetActive): Sheet
    {
        $sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID);
        \assert(null !== $sheetId);

        $sheetState = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_STATE);
        $isSheetVisible = (self::SHEET_STATE_HIDDEN !== $sheetState);

        $escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME);
        \assert(null !== $escapedSheetName);
        $sheetName = $this->escaper->unescape($escapedSheetName);

        $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);

        return new Sheet(
            $this->createRowIterator($this->filePath, $sheetDataXMLFilePath, $this->options, $this->sharedStringsManager),
            $this->createSheetHeaderReader($this->filePath, $sheetDataXMLFilePath),
            $sheetIndexZeroBased,
            $sheetName,
            $isSheetActive,
            $isSheetVisible
        );
    }

    /**
     * @param string $sheetId The sheet ID, as defined in "workbook.xml"
     *
     * @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID
     */
    private function getSheetDataXMLFilePathForSheetId(string $sheetId): string
    {
        $sheetDataXMLFilePath = '';

        // find the file path of the sheet, by looking at the "workbook.xml.res" file
        $xmlReader = new XMLReader();
        if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
            while ($xmlReader->read()) {
                if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) {
                    $relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID);

                    if ($relationshipSheetId === $sheetId) {
                        // In workbook.xml.rels, it is only "worksheets/sheet1.xml"
                        // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
                        $sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
                        \assert(null !== $sheetDataXMLFilePath);

                        // sometimes, the sheet data file path already contains "/xl/"...
                        if (!str_starts_with($sheetDataXMLFilePath, '/xl/')) {
                            $sheetDataXMLFilePath = '/xl/'.$sheetDataXMLFilePath;

                            break;
                        }
                    }
                }
            }

            $xmlReader->close();
        }

        return $sheetDataXMLFilePath;
    }

    private function createRowIterator(
        string $filePath,
        string $sheetDataXMLFilePath,
        Options $options,
        SharedStringsManager $sharedStringsManager
    ): RowIterator {
        $xmlReader = new XMLReader();

        $workbookRelationshipsManager = new WorkbookRelationshipsManager($filePath);
        $styleManager = new StyleManager(
            $filePath,
            $workbookRelationshipsManager->hasStylesXMLFile()
                ? $workbookRelationshipsManager->getStylesXMLFilePath()
                : null
        );

        $cellValueFormatter = new CellValueFormatter(
            $sharedStringsManager,
            $styleManager,
            $options->SHOULD_FORMAT_DATES,
            $options->SHOULD_USE_1904_DATES,
            new XLSX()
        );

        return new RowIterator(
            $filePath,
            $sheetDataXMLFilePath,
            $options->SHOULD_PRESERVE_EMPTY_ROWS,
            $xmlReader,
            new XMLProcessor($xmlReader),
            $cellValueFormatter,
            new RowManager()
        );
    }

    private function createSheetHeaderReader(
        string $filePath,
        string $sheetDataXMLFilePath
    ): SheetHeaderReader {
        $xmlReader = new XMLReader();

        return new SheetHeaderReader(
            $filePath,
            $sheetDataXMLFilePath,
            $xmlReader,
            new XMLProcessor($xmlReader)
        );
    }
}