1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
declare(strict_types=1);
|
|
|
4 |
|
|
|
5 |
namespace Phpml\Math\Statistic;
|
|
|
6 |
|
|
|
7 |
use Phpml\Exception\InvalidArgumentException;
|
|
|
8 |
|
|
|
9 |
/**
|
|
|
10 |
* Analysis of variance
|
|
|
11 |
* https://en.wikipedia.org/wiki/Analysis_of_variance
|
|
|
12 |
*/
|
|
|
13 |
final class ANOVA
|
|
|
14 |
{
|
|
|
15 |
/**
|
|
|
16 |
* The one-way ANOVA tests the null hypothesis that 2 or more groups have
|
|
|
17 |
* the same population mean. The test is applied to samples from two or
|
|
|
18 |
* more groups, possibly with differing sizes.
|
|
|
19 |
*
|
|
|
20 |
* @param array[] $samples - each row is class samples
|
|
|
21 |
*
|
|
|
22 |
* @return float[]
|
|
|
23 |
*/
|
|
|
24 |
public static function oneWayF(array $samples): array
|
|
|
25 |
{
|
|
|
26 |
$classes = count($samples);
|
|
|
27 |
if ($classes < 2) {
|
|
|
28 |
throw new InvalidArgumentException('The array must have at least 2 elements');
|
|
|
29 |
}
|
|
|
30 |
|
|
|
31 |
$samplesPerClass = array_map(static function (array $class): int {
|
|
|
32 |
return count($class);
|
|
|
33 |
}, $samples);
|
|
|
34 |
$allSamples = (int) array_sum($samplesPerClass);
|
|
|
35 |
$ssAllSamples = self::sumOfSquaresPerFeature($samples);
|
|
|
36 |
$sumSamples = self::sumOfFeaturesPerClass($samples);
|
|
|
37 |
$squareSumSamples = self::sumOfSquares($sumSamples);
|
|
|
38 |
$sumSamplesSquare = self::squaresSum($sumSamples);
|
|
|
39 |
$ssbn = self::calculateSsbn($samples, $sumSamplesSquare, $samplesPerClass, $squareSumSamples, $allSamples);
|
|
|
40 |
$sswn = self::calculateSswn($ssbn, $ssAllSamples, $squareSumSamples, $allSamples);
|
|
|
41 |
$dfbn = $classes - 1;
|
|
|
42 |
$dfwn = $allSamples - $classes;
|
|
|
43 |
|
|
|
44 |
$msb = array_map(static function ($s) use ($dfbn) {
|
|
|
45 |
return $s / $dfbn;
|
|
|
46 |
}, $ssbn);
|
|
|
47 |
$msw = array_map(static function ($s) use ($dfwn) {
|
|
|
48 |
if ($dfwn === 0) {
|
|
|
49 |
return 1;
|
|
|
50 |
}
|
|
|
51 |
|
|
|
52 |
return $s / $dfwn;
|
|
|
53 |
}, $sswn);
|
|
|
54 |
|
|
|
55 |
$f = [];
|
|
|
56 |
foreach ($msb as $index => $msbValue) {
|
|
|
57 |
$f[$index] = $msbValue / $msw[$index];
|
|
|
58 |
}
|
|
|
59 |
|
|
|
60 |
return $f;
|
|
|
61 |
}
|
|
|
62 |
|
|
|
63 |
private static function sumOfSquaresPerFeature(array $samples): array
|
|
|
64 |
{
|
|
|
65 |
$sum = array_fill(0, count($samples[0][0]), 0);
|
|
|
66 |
foreach ($samples as $class) {
|
|
|
67 |
foreach ($class as $sample) {
|
|
|
68 |
foreach ($sample as $index => $feature) {
|
|
|
69 |
$sum[$index] += $feature ** 2;
|
|
|
70 |
}
|
|
|
71 |
}
|
|
|
72 |
}
|
|
|
73 |
|
|
|
74 |
return $sum;
|
|
|
75 |
}
|
|
|
76 |
|
|
|
77 |
private static function sumOfFeaturesPerClass(array $samples): array
|
|
|
78 |
{
|
|
|
79 |
return array_map(static function (array $class): array {
|
|
|
80 |
$sum = array_fill(0, count($class[0]), 0);
|
|
|
81 |
foreach ($class as $sample) {
|
|
|
82 |
foreach ($sample as $index => $feature) {
|
|
|
83 |
$sum[$index] += $feature;
|
|
|
84 |
}
|
|
|
85 |
}
|
|
|
86 |
|
|
|
87 |
return $sum;
|
|
|
88 |
}, $samples);
|
|
|
89 |
}
|
|
|
90 |
|
|
|
91 |
private static function sumOfSquares(array $sums): array
|
|
|
92 |
{
|
|
|
93 |
$squares = array_fill(0, count($sums[0]), 0);
|
|
|
94 |
foreach ($sums as $row) {
|
|
|
95 |
foreach ($row as $index => $sum) {
|
|
|
96 |
$squares[$index] += $sum;
|
|
|
97 |
}
|
|
|
98 |
}
|
|
|
99 |
|
|
|
100 |
return array_map(static function ($sum) {
|
|
|
101 |
return $sum ** 2;
|
|
|
102 |
}, $squares);
|
|
|
103 |
}
|
|
|
104 |
|
|
|
105 |
private static function squaresSum(array $sums): array
|
|
|
106 |
{
|
|
|
107 |
foreach ($sums as &$row) {
|
|
|
108 |
foreach ($row as &$sum) {
|
|
|
109 |
$sum **= 2;
|
|
|
110 |
}
|
|
|
111 |
}
|
|
|
112 |
|
|
|
113 |
return $sums;
|
|
|
114 |
}
|
|
|
115 |
|
|
|
116 |
private static function calculateSsbn(array $samples, array $sumSamplesSquare, array $samplesPerClass, array $squareSumSamples, int $allSamples): array
|
|
|
117 |
{
|
|
|
118 |
$ssbn = array_fill(0, count($samples[0][0]), 0);
|
|
|
119 |
foreach ($sumSamplesSquare as $classIndex => $class) {
|
|
|
120 |
foreach ($class as $index => $feature) {
|
|
|
121 |
$ssbn[$index] += $feature / $samplesPerClass[$classIndex];
|
|
|
122 |
}
|
|
|
123 |
}
|
|
|
124 |
|
|
|
125 |
foreach ($squareSumSamples as $index => $sum) {
|
|
|
126 |
$ssbn[$index] -= $sum / $allSamples;
|
|
|
127 |
}
|
|
|
128 |
|
|
|
129 |
return $ssbn;
|
|
|
130 |
}
|
|
|
131 |
|
|
|
132 |
private static function calculateSswn(array $ssbn, array $ssAllSamples, array $squareSumSamples, int $allSamples): array
|
|
|
133 |
{
|
|
|
134 |
$sswn = [];
|
|
|
135 |
foreach ($ssAllSamples as $index => $ss) {
|
|
|
136 |
$sswn[$index] = ($ss - $squareSumSamples[$index] / $allSamples) - $ssbn[$index];
|
|
|
137 |
}
|
|
|
138 |
|
|
|
139 |
return $sswn;
|
|
|
140 |
}
|
|
|
141 |
}
|