AutorÃa | Ultima modificación | Ver Log |
<?phpdeclare(strict_types=1);namespace Phpml\Math\Statistic;use Phpml\Exception\InvalidArgumentException;class Covariance{/*** Calculates covariance from two given arrays, x and y, respectively** @throws InvalidArgumentException*/public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float{$n = count($x);if ($n === 0 || count($y) === 0) {throw new InvalidArgumentException('The array has zero elements');}if ($sample && $n === 1) {throw new InvalidArgumentException('The array must have at least 2 elements');}if ($meanX === null) {$meanX = Mean::arithmetic($x);}if ($meanY === null) {$meanY = Mean::arithmetic($y);}$sum = 0.0;foreach ($x as $index => $xi) {$yi = $y[$index];$sum += ($xi - $meanX) * ($yi - $meanY);}if ($sample) {--$n;}return $sum / $n;}/*** Calculates covariance of two dimensions, i and k in the given data.** @throws InvalidArgumentException* @throws \Exception*/public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float{if (count($data) === 0) {throw new InvalidArgumentException('The array has zero elements');}$n = count($data);if ($sample && $n === 1) {throw new InvalidArgumentException('The array must have at least 2 elements');}if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) {throw new InvalidArgumentException('Given indices i and k do not match with the dimensionality of data');}if ($meanX === null || $meanY === null) {$x = array_column($data, $i);$y = array_column($data, $k);$meanX = Mean::arithmetic($x);$meanY = Mean::arithmetic($y);$sum = 0.0;foreach ($x as $index => $xi) {$yi = $y[$index];$sum += ($xi - $meanX) * ($yi - $meanY);}} else {// In the case, whole dataset given along with dimension indices, i and k,// we would like to avoid getting column data with array_column and operate// over this extra copy of column data for memory efficiency purposes.//// Instead we traverse through the whole data and get what we actually need// without copying the data. This way, memory use will be reduced// with a slight cost of CPU utilization.$sum = 0.0;foreach ($data as $row) {$val = [0, 0];foreach ($row as $index => $col) {if ($index == $i) {$val[0] = $col - $meanX;}if ($index == $k) {$val[1] = $col - $meanY;}}$sum += $val[0] * $val[1];}}if ($sample) {--$n;}return $sum / $n;}/*** Returns the covariance matrix of n-dimensional data** @param array|null $means*/public static function covarianceMatrix(array $data, ?array $means = null): array{$n = count($data[0]);if ($means === null) {$means = [];for ($i = 0; $i < $n; ++$i) {$means[] = Mean::arithmetic(array_column($data, $i));}}$cov = [];for ($i = 0; $i < $n; ++$i) {for ($k = 0; $k < $n; ++$k) {if ($i > $k) {$cov[$i][$k] = $cov[$k][$i];} else {$cov[$i][$k] = self::fromDataset($data,$i,$k,true,$means[$i],$means[$k]);}}}return $cov;}}