| 1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
declare(strict_types=1);
|
|
|
4 |
|
|
|
5 |
namespace Phpml\SupportVectorMachine;
|
|
|
6 |
|
|
|
7 |
use Phpml\Exception\InvalidArgumentException;
|
|
|
8 |
|
|
|
9 |
class DataTransformer
|
|
|
10 |
{
|
|
|
11 |
public static function trainingSet(array $samples, array $labels, bool $targets = false): string
|
|
|
12 |
{
|
|
|
13 |
$set = '';
|
|
|
14 |
$numericLabels = [];
|
|
|
15 |
|
|
|
16 |
if (!$targets) {
|
|
|
17 |
$numericLabels = self::numericLabels($labels);
|
|
|
18 |
}
|
|
|
19 |
|
|
|
20 |
foreach ($labels as $index => $label) {
|
|
|
21 |
$set .= sprintf('%s %s %s', ($targets ? $label : $numericLabels[$label]), self::sampleRow($samples[$index]), PHP_EOL);
|
|
|
22 |
}
|
|
|
23 |
|
|
|
24 |
return $set;
|
|
|
25 |
}
|
|
|
26 |
|
|
|
27 |
public static function testSet(array $samples): string
|
|
|
28 |
{
|
|
|
29 |
if (count($samples) === 0) {
|
|
|
30 |
throw new InvalidArgumentException('The array has zero elements');
|
|
|
31 |
}
|
|
|
32 |
|
|
|
33 |
if (!is_array($samples[0])) {
|
|
|
34 |
$samples = [$samples];
|
|
|
35 |
}
|
|
|
36 |
|
|
|
37 |
$set = '';
|
|
|
38 |
foreach ($samples as $sample) {
|
|
|
39 |
$set .= sprintf('0 %s %s', self::sampleRow($sample), PHP_EOL);
|
|
|
40 |
}
|
|
|
41 |
|
|
|
42 |
return $set;
|
|
|
43 |
}
|
|
|
44 |
|
|
|
45 |
public static function predictions(string $rawPredictions, array $labels): array
|
|
|
46 |
{
|
|
|
47 |
$numericLabels = self::numericLabels($labels);
|
|
|
48 |
$results = [];
|
|
|
49 |
foreach (explode(PHP_EOL, $rawPredictions) as $result) {
|
|
|
50 |
if (isset($result[0])) {
|
|
|
51 |
$results[] = array_search((int) $result, $numericLabels, true);
|
|
|
52 |
}
|
|
|
53 |
}
|
|
|
54 |
|
|
|
55 |
return $results;
|
|
|
56 |
}
|
|
|
57 |
|
|
|
58 |
public static function probabilities(string $rawPredictions, array $labels): array
|
|
|
59 |
{
|
|
|
60 |
$numericLabels = self::numericLabels($labels);
|
|
|
61 |
|
|
|
62 |
$predictions = explode(PHP_EOL, trim($rawPredictions));
|
|
|
63 |
|
|
|
64 |
$header = array_shift($predictions);
|
|
|
65 |
$headerColumns = explode(' ', (string) $header);
|
|
|
66 |
array_shift($headerColumns);
|
|
|
67 |
|
|
|
68 |
$columnLabels = [];
|
|
|
69 |
foreach ($headerColumns as $numericLabel) {
|
|
|
70 |
$columnLabels[] = array_search((int) $numericLabel, $numericLabels, true);
|
|
|
71 |
}
|
|
|
72 |
|
|
|
73 |
$results = [];
|
|
|
74 |
foreach ($predictions as $rawResult) {
|
|
|
75 |
$probabilities = explode(' ', $rawResult);
|
|
|
76 |
array_shift($probabilities);
|
|
|
77 |
|
|
|
78 |
$result = [];
|
|
|
79 |
foreach ($probabilities as $i => $prob) {
|
|
|
80 |
$result[$columnLabels[$i]] = (float) $prob;
|
|
|
81 |
}
|
|
|
82 |
|
|
|
83 |
$results[] = $result;
|
|
|
84 |
}
|
|
|
85 |
|
|
|
86 |
return $results;
|
|
|
87 |
}
|
|
|
88 |
|
|
|
89 |
public static function numericLabels(array $labels): array
|
|
|
90 |
{
|
|
|
91 |
$numericLabels = [];
|
|
|
92 |
foreach ($labels as $label) {
|
|
|
93 |
if (isset($numericLabels[$label])) {
|
|
|
94 |
continue;
|
|
|
95 |
}
|
|
|
96 |
|
|
|
97 |
$numericLabels[$label] = count($numericLabels);
|
|
|
98 |
}
|
|
|
99 |
|
|
|
100 |
return $numericLabels;
|
|
|
101 |
}
|
|
|
102 |
|
|
|
103 |
private static function sampleRow(array $sample): string
|
|
|
104 |
{
|
|
|
105 |
$row = [];
|
|
|
106 |
foreach ($sample as $index => $feature) {
|
|
|
107 |
$row[] = sprintf('%s:%F', $index + 1, $feature);
|
|
|
108 |
}
|
|
|
109 |
|
|
|
110 |
return implode(' ', $row);
|
|
|
111 |
}
|
|
|
112 |
}
|