Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace Phpml\FeatureExtraction;
6
 
7
use Phpml\Transformer;
8
 
9
class TfIdfTransformer implements Transformer
10
{
11
    /**
12
     * @var array
13
     */
14
    private $idf = [];
15
 
16
    public function __construct(array $samples = [])
17
    {
18
        if (count($samples) > 0) {
19
            $this->fit($samples);
20
        }
21
    }
22
 
23
    public function fit(array $samples, ?array $targets = null): void
24
    {
25
        $this->countTokensFrequency($samples);
26
 
27
        $count = count($samples);
28
        foreach ($this->idf as &$value) {
29
            $value = log((float) ($count / $value), 10.0);
30
        }
31
    }
32
 
33
    public function transform(array &$samples, ?array &$targets = null): void
34
    {
35
        foreach ($samples as &$sample) {
36
            foreach ($sample as $index => &$feature) {
37
                $feature *= $this->idf[$index];
38
            }
39
        }
40
    }
41
 
42
    private function countTokensFrequency(array $samples): void
43
    {
44
        $this->idf = array_fill_keys(array_keys($samples[0]), 0);
45
 
46
        foreach ($samples as $sample) {
47
            foreach ($sample as $index => $count) {
48
                if ($count > 0) {
49
                    ++$this->idf[$index];
50
                }
51
            }
52
        }
53
    }
54
}