]> git.localhorst.tv Git - alttp.git/blob - app/Models/ChatLib.php
revamp chatlib tokenization
[alttp.git] / app / Models / ChatLib.php
1 <?php
2
3 namespace App\Models;
4
5 use Illuminate\Support\Facades\Storage;
6
7 class ChatLib {
8
9         public function __construct($size = 3) {
10                 $this->size = $size;
11                 $converted = [];
12                 foreach ($this->categories as $category => $patterns) {
13                         $converted_patterns = [];
14                         foreach ($patterns as $pattern) {
15                                 $converted_patterns[] = '/\b'.$pattern.'\b/u';
16                         }
17                         $converted[strtoupper($category)] = $converted_patterns;
18                 }
19                 $this->categories = $converted;
20         }
21
22         public function addMessage(ChatLog $msg, ChatLog $previous = null) {
23                 if ($msg->isReply()) {
24                         $this->addText($msg->text_content, $msg->getReplyParent());
25                 } else if (!is_null($previous)) {
26                         $this->addText($msg->text_content, $previous->text_content);
27                 } else {
28                         $this->addText($msg->text_content);
29                 }
30         }
31
32         public function addText($text, $context = '') {
33                 $tokens = $this->tokenize($text);
34                 for ($i = 0; $i < count($tokens) - $this->size; ++$i) {
35                         $this->addTransition(array_slice($tokens, $i, $this->size), $tokens[$i + $this->size]);
36                 }
37                 if (!empty($context)) {
38                         $tokens = $this->tokenizeWithContext($text, $context);
39                         $size = min($this->size - 1, count($tokens) - $this->size);
40                         for ($i = 0; $i < $size; ++$i) {
41                                 $this->addTransition(array_slice($tokens, $i, $this->size), $tokens[$i + $this->size]);
42                         }
43                 }
44         }
45
46         public function compile() {
47                 foreach ($this->transitions as $key => $values) {
48                         $this->transitions[$key] = $this->index($values);
49                 }
50         }
51
52         public function generate($context = null) {
53                 if (!is_null($context)) {
54                         $tokens = $this->tokenizeWithContext('', $context);
55                         $generated = $this->loop($tokens);
56                         if (!empty($generated)) {
57                                 return $generated;
58                         }
59                 }
60                 $tokens = $this->tokenize('');
61                 return $this->loop($tokens);
62         }
63
64         public function saveAs($name) {
65                 $data = [
66                         'size' => $this->size,
67                         'transitions' => $this->transitions,
68                 ];
69                 Storage::disk('chatlib')->put($name.'.json', json_encode($data));
70         }
71
72         public function loadFrom($name) {
73                 $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true);
74                 $this->size = $data['size'];
75                 $this->transitions = $data['transitions'];
76         }
77
78         private function index($arr) {
79                 $result = [];
80                 $sum = 0;
81                 foreach ($arr as $key => $weight) {
82                         $lower = $sum;
83                         $sum += $weight;
84                         $result[] = [$key, $lower, $sum];
85                 }
86                 return $result;
87         }
88
89         private function loop($tokens) {
90                 while (count($tokens) < 50) {
91                         $next = $this->randomNext($tokens);
92                         if ($next === ' ') break;
93                         $tokens[] = $next;
94                 }
95                 return $this->untokenize($tokens);
96         }
97
98         private function randomNext($tokens) {
99                 $key = $this->makeKey($tokens);
100                 if (!isset($this->transitions[$key])) return ' ';
101                 $pick = $this->pick($this->transitions[$key]);
102                 return $pick[0];
103         }
104
105         private function pick($options) {
106                 if (empty($options)) return null;
107                 $max = end($options)[2] - 1;
108                 $num = random_int(0, $max);
109                 return static::search($options, $num);
110         }
111
112         public static function search($options, $num) {
113                 $min_index = 0;
114                 $max_index = count($options) - 1;
115                 while ($min_index < $max_index) {
116                         $cur_index = intval(($min_index + $max_index) / 2);
117                         $cur_low = $options[$cur_index][1];
118                         $cur_high = $options[$cur_index][2] - 1;
119                         if ($cur_low > $num) {
120                                 $max_index = $cur_index;
121                         } else if ($cur_high < $num) {
122                                 $min_index = $cur_index + 1;
123                         } else {
124                                 $min_index = $cur_index;
125                                 break;
126                         }
127                 }
128                 return $options[$min_index];
129         }
130
131         private function addTransition($tokens, $next) {
132                 $key = $this->makeKey($tokens);
133                 if (!isset($this->transitions[$key])) {
134                         $this->transitions[$key] = [];
135                 }
136                 if (!isset($this->transitions[$key][$next])) {
137                         $this->transitions[$key][$next] = 1;
138                 } else {
139                         ++$this->transitions[$key][$next];
140                 }
141         }
142
143         private function splitText($text) {
144                 if (trim($text) === '') return [];
145                 return preg_split('/\s+/u', $text);
146         }
147
148         private function makeKey($tokens) {
149                 $key = $this->joinText(array_slice($tokens, $this->size * -1));
150                 $key = mb_strtolower($key);
151                 $key = str_replace(['.', ',', ':', ';', '!', '?', '^', '+', '-', '"', "'", '(', ')', '[', ']'], '', $key);
152                 $key = preg_replace('/\d+/u', '0', $key);
153                 foreach ($this->categories as $category => $patterns) {
154                         $key = preg_replace($patterns, $category, $key);
155                 }
156                 return $key;
157         }
158
159         private function joinText($tokens) {
160                 return implode(' ', $tokens);
161         }
162
163         private function untokenize($tokens) {
164                 return $this->joinText(array_slice($tokens, $this->size));
165         }
166
167         private function tokenize($text) {
168                 $tokens = $this->splitText($text);
169                 $combined = array_merge(array_fill(0, $this->size, ' '), $tokens);
170                 if (!empty($tokens)) {
171                         $combined[] = ' ';
172                 }
173                 return $combined;
174         }
175
176         private function tokenizeWithContext($text, $context) {
177                 $combined = $this->tokenize($text);
178                 $context_tokens = array_slice($this->splitText($context), $this->size * -1 + 1);
179                 for ($i = 0; $i < count($context_tokens); ++$i) {
180                         $combined[$this->size - $i - 2] = $context_tokens[count($context_tokens) - $i - 1];
181                 }
182                 return $combined;
183         }
184
185         private $size;
186         private $transitions = [];
187
188         private $categories = [
189                 'fail' => [
190                         'failfish',
191                         'holysm0notlikethis',
192                         'notlikethis',
193                         'tetobridge0',
194                         'vinter0clown',
195                 ],
196
197                 'hype' => [
198                         'dergoaparty',
199                         'dinodance',
200                         'elemen0party',
201                         'muftaahype',
202                         'luckwuhype',
203                         'olliwahype',
204                         'osora0umbrihype',
205                         'partyhat',
206                         'peepocheer',
207                         'rei0hype',
208                         'sakayahype',
209                         'tetotroete',
210                         'ticknaboargeil0',
211                         'ticknahype0',
212                 ],
213
214                 'kappa' => [
215                         'kappa(claus|hd)?',
216                 ],
217
218                 'jam' => [
219                         '(cat|dog|rat)jam',
220                         'kanash0jam',
221                         'rei0jamers',
222                         'samusdance',
223                 ],
224
225                 'lol' => [
226                         ':d',
227                         'boothi0lul',
228                         'kekw',
229                         'lol',
230                         'lul',
231                         'rei0lul',
232                         'samusgrin',
233                         'ticknaauslachen',
234                         'xd',
235                 ],
236
237                 'love' => [
238                         '<3',
239                         'duden0love',
240                         'exec0love',
241                         'krawal0heart',
242                         'lodanzhug',
243                         'luckwulove',
244                         'luvsign',
245                         'muftaal',
246                         'osora0love',
247                         'peepoexcitedhug',
248                         'spirit0love',
249                         'svenkalove',
250                         'ticknaherz',
251                 ],
252
253                 'name' => [
254                         'baba',
255                         'baka',
256                         'bobe?r',
257                         'brog(i|or)',
258                         'cfate',
259                         'danny',
260                         'danzi+',
261                         'daruck',
262                         'dennsen',
263                         'dimez',
264                         'divi',
265                         'dud(en|i+)',
266                         'ele',
267                         'eri(ror)?',
268                         '(name)?faker',
269                         'fetti+',
270                         'gamma(chuu)?',
271                         'goat(buster|ie?|y)?',
272                         'hitsu(yan)?',
273                         'holy',
274                         'jem',
275                         'kala(marino)?',
276                         'kromb',
277                         'koval',
278                         'kum(i|o|p)',
279                         'lanux',
280                         'len(esha|chen)',
281                         'leya+',
282                         'magno',
283                         'malmo',
284                         'markam',
285                         'micha',
286                         'mimsy',
287                         'muf(fy|taay)',
288                         'murd(elizer|i+)',
289                         'nami',
290                         'nula',
291                         'onio',
292                         'paulinche',
293                         'phaaze',
294                         'ralen',
295                         'ramond',
296                         'ray(vis)?',
297                         'schulzer',
298                         'skunk(ner)?',
299                         'skipsy',
300                         'soli+',
301                         'sven(ka+)?',
302                         'tantalus',
303                         'teto',
304                         'thalanee?',
305                         'tick(i+|naldo|y+)',
306                         'tofu',
307                         'tr[i0]x+',
308                         'vin(nie?|ny|ter)',
309                         'xall',
310                         'yasi',
311                 ],
312
313                 'pog' => [
314                         'bumble0Pog',
315                         'komodohype',
316                         'pog',
317                         'pogchamp',
318                         'poggers',
319                         'satono0pog',
320                 ],
321
322                 'run' => [
323                         'dennsenboots',
324                         'lodanzrun',
325                         'ticknaldosprint',
326                         'vinter0run',
327                 ],
328
329                 'wave' => [
330                         'dennsenhi',
331                         'dergoawave',
332                         'falcnwavehi',
333                         'heyguys',
334                         'holysm0heyguys',
335                         'muftaahey',
336                         'rei0wave',
337                         'sayuri0wave',
338                         'shindi0wave',
339                         'svenkawave',
340                         'wuschlwave',
341                 ],
342
343                 'zb' => [
344                         'aga(hnim)?',
345                         'armos( knights)?',
346                         'arrghus',
347                         'blind',
348                         'ganon(dorf)?',
349                         'helma',
350                         'kholdstare',
351                         'lanmo(las)?',
352                         'moldorm',
353                         'mothula',
354                         'mott[ei]',
355                         'trinexx',
356                         'vit(reous|ty)',
357                 ],
358
359                 'zd' => [
360                         'eastern',
361                         'desert( palace)?',
362                         'gt',
363                         'hera',
364                         'ice ?(palace)?',
365                         '(misery )?mire',
366                         'pod',
367                         'skull ?woods',
368                         'swamp',
369                         'thieve\'?s\'? ?town',
370                         'tr',
371                         'tt',
372                 ],
373
374                 'zi' => [
375                         '(big|small|retro|generic) ?keys?',
376                         'b[oö]gen',
377                         'bombos',
378                         'boots',
379                         'bottle',
380                         'bows?',
381                         'bugnet',
382                         'byrna',
383                         'cape',
384                         'ether',
385                         'flasche',
386                         'flippers',
387                         'fl[uö]te',
388                         'frod',
389                         '(gloves?|mitts|handschuhe?)',
390                         '(half|quarter) ?magic',
391                         'hammer',
392                         'hookshot',
393                         '(ice|fire) ?rod',
394                         'lampe?',
395                         'laser ?bridge',
396                         'mearl',
397                         'mirror',
398                         'moon ?pearl',
399                         'mushroom',
400                         'ocarina',
401                         'pilz',
402                         'powder',
403                         'puder',
404                         'quake',
405                         '(red|blue) ?cane',
406                         '(red|green|blue) ?(goo|potion)',
407                         '(red|green|blue|baby) ?mail',
408                         '(red|blue|bu|boo|good|bad|both)merang',
409                         'schaufel',
410                         '(gro(ss|ß)er? |kleiner? )?schlüssel',
411                         'schwert',
412                         'shovel',
413                         'silvers',
414                         'somaria',
415                         'spiegel',
416                         'sword',
417                 ],
418
419                 'zl' => [
420                         'big chest',
421                         'bumper( cave)?( ledge)?',
422                         '(hyrule)? ?castle ?(tower)?',
423                         'catfish',
424                         'cave 0?',
425                         'chest ?game',
426                         'cutscene ?chest',
427                         'damm',
428                         'desert( ledge)?',
429                         'dig(ging)? ?game',
430                         '((back|front) of )?escape',
431                         'gyl',
432                         'hobo',
433                         'hook ?(shot) cave',
434                         'lava ?chest',
435                         '(light|dark) ?world',
436                         'lss',
437                         'magic bat',
438                         '(dark )?(death )?mountain',
439                         'ped(estal)?',
440                         'pyramid( fairy)?( ledge)?',
441                         'red bomb',
442                         'sahasrahla',
443                         'sasha',
444                         'sick kid',
445                         'stumpy',
446                         'tile ?room',
447                         'torch',
448                         'zora( ledge)?',
449                 ],
450         ];
451
452 }