]> git.localhorst.tv Git - alttp.git/blob - app/Models/ChatLib.php
try to improve message genration
[alttp.git] / app / Models / ChatLib.php
1 <?php
2
3 namespace App\Models;
4
5 class ChatLib {
6
7         public function addMessage($msg) {
8                 $tokens = $this->tokenize($msg->text_content);
9                 if (empty($tokens)) return;
10                 $tokens[] = '';
11                 foreach ($tokens as $num => $token) {
12                         if ($num === 0) {
13                                 $this->addTransition([], $token);
14                         } else {
15                                 $start = max(0, $num - $this->size);
16                                 $end = $num;
17                                 for ($i = $start; $i < $end; ++$i) {
18                                         $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
19                                         if ($end - $i < 3) break;
20                                 }
21                         }
22                 }
23         }
24
25         public function compile() {
26                 foreach ($this->transitions as $key => $value) {
27                         $this->transitions[$key] = $this->index($this->transitions[$key]);
28                         if (empty($this->transitions[$key])) {
29                                 unset($this->transitions[$key]);
30                         }
31                 }
32                 echo 'size: ', number_format(strlen(json_encode($this->transitions)), 0), PHP_EOL;
33         }
34
35         public function generate($limit = 100) {
36                 $tokens = [];
37                 $generated = '';
38                 while (strlen($generated) < $limit) {
39                         $next = $this->randomNext($tokens);
40                         if (empty($next)) break;
41                         $tokens[] = $next;
42                         $generated .= $next;
43                 }
44                 return $generated;
45         }
46
47         private function index($arr) {
48                 $result = [];
49                 $sum = 0;
50                 foreach ($arr as $key => $entry) {
51                         $weight = $entry['count'];
52                         if ($weight == 1) continue;
53                         $lower = $sum;
54                         $sum += $weight;
55                         $examples = [];
56                         if (is_array(end($entry['examples']))) {
57                                 // already processed
58                                 $examples = $entry['examples'];
59                         } else {
60                                 $subsum = 0;
61                                 foreach ($entry['examples'] as $example => $subweight) {
62                                         $sublower = $subsum;
63                                         $subsum += $subweight * $subweight;
64                                         $examples[] = [$example, $sublower, $subsum];
65                                 }
66                         }
67                         $result[] = [$key, $lower, $sum, $examples];
68                 }
69                 return $result;
70         }
71
72         private function randomNext($tokens) {
73                 $cnt = count($tokens);
74                 for ($size = min($this->size, $cnt); $size >= 0; --$size) {
75                         $cmb = $this->generalize(array_slice($tokens, $cnt - $size, $size));
76                         if (isset($this->transitions[$cmb])) {
77                                 $pick = $this->pick($this->transitions[$cmb]);
78                                 if (!is_null($pick)) {
79                                         return $this->exampleOf($pick);
80                                 }
81                         }
82                 }
83                 return '';
84         }
85
86         private function pick($options) {
87                 if (empty($options)) return null;
88                 $max = end($options)[2];
89                 $num = random_int(0, $max);
90                 $min_index = 0;
91                 $max_index = count($options) - 1;
92                 while ($min_index < $max_index) {
93                         $cur_index = intval(($min_index + $max_index) / 2);
94                         $cur_low = $options[$cur_index][1];
95                         $cur_high = $options[$cur_index][2];
96                         if ($cur_low > $num) {
97                                 $max_index = $cur_index;
98                         } else if ($cur_high < $num) {
99                                 $min_index = $cur_index + 1;
100                         } else {
101                                 $min_index = $cur_index;
102                                 break;
103                         }
104                 }
105                 return $options[$min_index];
106         }
107
108         private function addTransition($state, $next) {
109                 $cmb = $this->generalize($state);
110                 if (!isset($this->transitions[$cmb])) {
111                         $this->transitions[$cmb] = [];
112                 }
113                 $this->increment($this->transitions[$cmb], $next);
114         }
115
116         private function increment(&$which, $token) {
117                 $generalized = $this->generalize([$token]);
118                 if (!isset($which[$generalized])) {
119                         $which[$generalized] = [
120                                 'count' => 1,
121                                 'examples' => [],
122                         ];
123                         $which[$generalized]['examples'][$token] = 1;
124                 } else {
125                         ++$which[$generalized]['count'];
126                         if (!isset($which[$generalized]['examples'][$token])) {
127                                 $which[$generalized]['examples'][$token] = 1;
128                         } else {
129                                 ++$which[$generalized]['examples'][$token];
130                         }
131                 }
132         }
133
134         private function tokenize($str) {
135                 return array_values(array_filter(preg_split('/\b/u', $str)));
136         }
137
138         private function generalize($tokens) {
139                 $str = '';
140                 foreach ($tokens as $token) {
141                         $replaced = preg_replace('/\d+/', '0', $token);
142                         $replaced = strtolower($replaced);
143                         $str .= empty($replaced) ? $token : $replaced;
144                 }
145                 return $str;
146         }
147
148         private function exampleOf($pick) {
149                 $example = $this->pick($pick[3]);
150                 return $example[0];
151         }
152
153         private $size = 5;
154         private $transitions = [];
155
156 }