|
Ultima modificación |
Ver Log
|
| Rev |
Autor |
Línea Nro. |
Línea |
| 1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
declare(strict_types=1);
|
|
|
4 |
|
|
|
5 |
namespace Phpml\Tokenization;
|
|
|
6 |
|
|
|
7 |
use Phpml\Exception\InvalidArgumentException;
|
|
|
8 |
|
|
|
9 |
class WhitespaceTokenizer implements Tokenizer
|
|
|
10 |
{
|
|
|
11 |
public function tokenize(string $text): array
|
|
|
12 |
{
|
|
|
13 |
$substrings = preg_split('/[\pZ\pC]+/u', $text, -1, PREG_SPLIT_NO_EMPTY);
|
|
|
14 |
if ($substrings === false) {
|
|
|
15 |
throw new InvalidArgumentException('preg_split failed on: '.$text);
|
|
|
16 |
}
|
|
|
17 |
|
|
|
18 |
return $substrings;
|
|
|
19 |
}
|
|
|
20 |
}
|