AutorÃa | Ultima modificación | Ver Log |
<?phpdeclare(strict_types=1);namespace Phpml\DimensionReduction;use Phpml\Exception\InvalidArgumentException;use Phpml\Exception\InvalidOperationException;use Phpml\Math\Statistic\Covariance;use Phpml\Math\Statistic\Mean;class PCA extends EigenTransformerBase{/*** Temporary storage for mean values for each dimension in given data** @var array*/protected $means = [];/*** @var bool*/protected $fit = false;/*** PCA (Principal Component Analysis) used to explain given* data with lower number of dimensions. This analysis transforms the* data to a lower dimensional version of it by conserving a proportion of total variance* within the data. It is a lossy data compression technique.<br>** @param float $totalVariance Total explained variance to be preserved* @param int $numFeatures Number of features to be preserved** @throws InvalidArgumentException*/public function __construct(?float $totalVariance = null, ?int $numFeatures = null){if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99');}if ($numFeatures !== null && $numFeatures <= 0) {throw new InvalidArgumentException('Number of features to be preserved should be greater than 0');}if (($totalVariance !== null) === ($numFeatures !== null)) {throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm');}if ($numFeatures !== null) {$this->numFeatures = $numFeatures;}if ($totalVariance !== null) {$this->totalVariance = $totalVariance;}}/*** Takes a data and returns a lower dimensional version* of this data while preserving $totalVariance or $numFeatures. <br>* $data is an n-by-m matrix and returned array is* n-by-k matrix where k <= m*/public function fit(array $data): array{$n = count($data[0]);$data = $this->normalize($data, $n);$covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0));$this->eigenDecomposition($covMatrix);$this->fit = true;return $this->reduce($data);}/*** Transforms the given sample to a lower dimensional vector by using* the eigenVectors obtained in the last run of <code>fit</code>.** @throws InvalidOperationException*/public function transform(array $sample): array{if (!$this->fit) {throw new InvalidOperationException('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');}if (!is_array($sample[0])) {$sample = [$sample];}$sample = $this->normalize($sample, count($sample[0]));return $this->reduce($sample);}protected function calculateMeans(array $data, int $n): void{// Calculate means for each dimension$this->means = [];for ($i = 0; $i < $n; ++$i) {$column = array_column($data, $i);$this->means[] = Mean::arithmetic($column);}}/*** Normalization of the data includes subtracting mean from* each dimension therefore dimensions will be centered to zero*/protected function normalize(array $data, int $n): array{if (count($this->means) === 0) {$this->calculateMeans($data, $n);}// Normalize dataforeach (array_keys($data) as $i) {for ($k = 0; $k < $n; ++$k) {$data[$i][$k] -= $this->means[$k];}}return $data;}}