- private function increment(&$which, $token) {
- $generalized = $this->generalize([$token]);
- if (!isset($which[$generalized])) {
- $which[$generalized] = [
- 'count' => 1,
- 'examples' => [],
- ];
- $which[$generalized]['examples'][$token] = 1;
- } else {
- ++$which[$generalized]['count'];
- if (!isset($which[$generalized]['examples'][$token])) {
- $which[$generalized]['examples'][$token] = 1;
- } else {
- ++$which[$generalized]['examples'][$token];
- }
+ private function splitText($text) {
+ if (trim($text) === '') return [];
+ return preg_split('/\s+/u', $text);
+ }
+
+ private function makeKey($tokens) {
+ $key = $this->joinText(array_slice($tokens, $this->size * -1));
+ $key = mb_strtolower($key);
+ $key = str_replace(['.', ',', ':', ';', '!', '?', '^', '+', '-', '"', "'", '(', ')', '[', ']'], '', $key);
+ $key = preg_replace('/\d+/u', '0', $key);
+ foreach ($this->categories as $category => $patterns) {
+ $key = preg_replace($patterns, $category, $key);