]> git.localhorst.tv Git - alttp.git/blobdiff - app/Models/ChatLib.php
fix number and space normalization
[alttp.git] / app / Models / ChatLib.php
index 8134d136b61122c22847193e0402e28d1a0ed281..f4ab93f4798b28ebc87383b94f68e75728bbc7c9 100644 (file)
@@ -2,82 +2,67 @@
 
 namespace App\Models;
 
+use Illuminate\Support\Facades\Storage;
+
 class ChatLib {
 
-       public function addMessage($msg) {
-               $tokens = array_values(array_filter(preg_split('/\s+/u', $msg->text_content)));
+       public function addMessage(ChatLog $msg) {
+               $this->addText($msg->text_content);
+       }
+
+       public function addText($text) {
+               $tokens = $this->tokenize($text);
                if (empty($tokens)) return;
-               $tokens [] = '';
+               $tokens[] = '';
                foreach ($tokens as $num => $token) {
                        if ($num === 0) {
-                               $this->addStart($token);
-                       }
-                       if ($num > 0) {
-                               $this->addOne($tokens[$num - 1], $token);
-                       }
-                       if ($num > 1) {
-                               $this->addTwo($tokens[$num - 2], $tokens[$num - 1], $token);
-                       }
-                       if ($num > 2) {
-                               $this->addThree($tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
-                       }
-                       if ($num > 3) {
-                               $this->addFour($tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
-                       }
-                       if ($num > 4) {
-                               $this->addFive($tokens[$num - 5], $tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
+                               $this->addTransition([], $token);
+                       } else {
+                               $start = max(0, $num - $this->size - 1);
+                               $end = $num;
+                               for ($i = $start; $i < $end; ++$i) {
+                                       $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
+                                       if ($end - $i < 5) break;
+                               }
                        }
                }
        }
 
        public function compile() {
-               $this->start = $this->index($this->start);
-               foreach ($this->one as $key => $value) {
-                       $this->one[$key] = $this->index($this->one[$key]);
-                       if (empty($this->one[$key])) {
-                               unset($this->one[$key]);
-                       }
-               }
-               foreach ($this->two as $key => $value) {
-                       $this->two[$key] = $this->index($this->two[$key]);
-                       if (empty($this->two[$key])) {
-                               unset($this->two[$key]);
-                       }
-               }
-               foreach ($this->three as $key => $value) {
-                       $this->three[$key] = $this->index($this->three[$key]);
-                       if (empty($this->three[$key])) {
-                               unset($this->three[$key]);
-                       }
-               }
-               foreach ($this->four as $key => $value) {
-                       $this->four[$key] = $this->index($this->four[$key]);
-                       if (empty($this->four[$key])) {
-                               unset($this->four[$key]);
-                       }
-               }
-               foreach ($this->five as $key => $value) {
-                       $this->five[$key] = $this->index($this->five[$key]);
-                       if (empty($this->five[$key])) {
-                               unset($this->five[$key]);
+               foreach ($this->transitions as $key => $value) {
+                       $this->transitions[$key] = $this->index($this->transitions[$key]);
+                       if (empty($this->transitions[$key])) {
+                               unset($this->transitions[$key]);
                        }
                }
        }
 
-       public function generate($limit = 75) {
-               $tokens = [];
-               $start = $this->randomStart();
-               $tokens[] = $start;
-               $generated = $start;
+       public function generate($limit = 100) {
+               $tokens = [''];
+               $generated = '';
                while (strlen($generated) < $limit) {
                        $next = $this->randomNext($tokens);
-                       if (empty($next)) break;
+                       if ($next === '') break;
                        $tokens[] = $next;
-                       $generated .= ' '.$next;
+                       $generated .= $next;
                }
                return $generated;
        }
 
+       public function saveAs($name) {
+               $data = [
+                       'size' => $this->size,
+                       'transitions' => $this->transitions,
+               ];
+               Storage::disk('chatlib')->put($name.'.json', json_encode($data));
+       }
+
+       public function loadFrom($name) {
+               $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true);
+               $this->size = $data['size'];
+               $this->transitions = $data['transitions'];
+       }
+
        private function index($arr) {
                $result = [];
                $sum = 0;
@@ -85,16 +70,18 @@ class ChatLib {
                        $weight = $entry['count'];
                        if ($weight == 1) continue;
                        $lower = $sum;
-                       $sum += intval(pow($weight, 1.4));
+                       $sum += $weight;
                        $examples = [];
                        if (is_array(end($entry['examples']))) {
                                // already processed
                                $examples = $entry['examples'];
+                       } else if ($key === ' ') {
+                               $examples = [[' ', 0, 1]];
                        } else {
                                $subsum = 0;
                                foreach ($entry['examples'] as $example => $subweight) {
                                        $sublower = $subsum;
-                                       $subsum += $subweight * $subweight;
+                                       $subsum += $subweight;
                                        $examples[] = [$example, $sublower, $subsum];
                                }
                        }
@@ -103,79 +90,18 @@ class ChatLib {
                return $result;
        }
 
-       private function randomStart() {
-               $pick = $this->pick($this->start);
-               if (is_null($pick)) return '';
-               return $this->exampleOf($pick);
-       }
-
        private function randomNext($tokens) {
                $cnt = count($tokens);
-               $picks = [];
-               if ($cnt >= 5) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 5, 5));
-                       if (isset($this->five[$cmb])) {
-                               $pick = $this->pick($this->five[$cmb]);
-                               if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 10,
-                                               'examples' => $pick[3],
-                                       ];
-                               }
-                       }
-               }
-               if ($cnt >= 4) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 4, 4));
-                       if (isset($this->four[$cmb])) {
-                               $pick = $this->pick($this->four[$cmb]);
-                               if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 12,
-                                               'examples' => $pick[3],
-                                       ];
-                               }
-                       }
-               }
-               if ($cnt >= 3) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 3, 3));
-                       if (isset($this->three[$cmb])) {
-                               $pick = $this->pick($this->three[$cmb]);
-                               if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 14,
-                                               'examples' => $pick[3],
-                                       ];
-                               }
-                       }
-               }
-               if ($cnt >= 2) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 2, 2));
-                       if (isset($this->two[$cmb])) {
-                               $pick = $this->pick($this->two[$cmb]);
-                               if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 4,
-                                               'examples' => $pick[3],
-                                       ];
-                               }
-                       }
-               }
-               if ($cnt >= 1) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 1, 1));
-                       if (isset($this->one[$cmb])) {
-                               $pick = $this->pick($this->one[$cmb]);
+               for ($size = min($this->size, $cnt); $size > 0; --$size) {
+                       $cmb = $this->generalize(array_slice($tokens, -$size));
+                       if (isset($this->transitions[$cmb])) {
+                               $pick = $this->pick($this->transitions[$cmb]);
                                if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 2,
-                                               'examples' => $pick[3],
-                                       ];
+                                       return $this->exampleOf($pick);
                                }
                        }
                }
-               if (empty($picks)) return '';
-               $picks = $this->index($picks);
-               $pick = $this->pick($picks);
-               return $this->exampleOf($pick);
+               return '';
        }
 
        private function pick($options) {
@@ -200,49 +126,12 @@ class ChatLib {
                return $options[$min_index];
        }
 
-       private function addStart($token) {
-               if (empty($token)) return;
-               $this->increment($this->start, $token);
-       }
-
-       private function addOne($one, $token) {
-               $cmb = $this->generalize([$one]);
-               if (!isset($this->one[$cmb])) {
-                       $this->one[$cmb] = [];
-               }
-               $this->increment($this->one[$cmb], $token);
-       }
-
-       private function addTwo($one, $two, $token) {
-               $cmb = $this->generalize([$one, $two]);
-               if (!isset($this->two[$cmb])) {
-                       $this->two[$cmb] = [];
-               }
-               $this->increment($this->two[$cmb], $token);
-       }
-
-       private function addThree($one, $two, $three, $token) {
-               $cmb = $this->generalize([$one, $two, $three]);
-               if (!isset($this->three[$cmb])) {
-                       $this->three[$cmb] = [];
+       private function addTransition($state, $next) {
+               $cmb = $this->generalize($state);
+               if (!isset($this->transitions[$cmb])) {
+                       $this->transitions[$cmb] = [];
                }
-               $this->increment($this->three[$cmb], $token);
-       }
-
-       private function addFour($one, $two, $three, $four, $token) {
-               $cmb = $this->generalize([$one, $two, $three, $four]);
-               if (!isset($this->four[$cmb])) {
-                       $this->four[$cmb] = [];
-               }
-               $this->increment($this->four[$cmb], $token);
-       }
-
-       private function addFive($one, $two, $three, $four, $five, $token) {
-               $cmb = $this->generalize([$one, $two, $three, $four, $five]);
-               if (!isset($this->five[$cmb])) {
-                       $this->five[$cmb] = [];
-               }
-               $this->increment($this->five[$cmb], $token);
+               $this->increment($this->transitions[$cmb], $next);
        }
 
        private function increment(&$which, $token) {
@@ -263,13 +152,36 @@ class ChatLib {
                }
        }
 
+       private function tokenize($str) {
+               return array_values(array_filter(preg_split('/\b/u', $str), function($token) {
+                       if ($token === '') return false;
+                       if (preg_match('/cheer\d+/u', strtolower($token))) return false;
+                       return true;
+               }));
+       }
+
        private function generalize($tokens) {
                $str = '';
                foreach ($tokens as $token) {
-                       $replaced = preg_replace('/\W/u', '', $token);
-                       $replaced = preg_replace('/\d+/', '0', $replaced);
-                       $replaced = strtolower(trim($replaced));
-                       $str .= empty($replaced) ? $token : $replaced;
+                       $replaced = preg_replace('/\d+/u', '0', $token);
+                       $replaced = preg_replace('/\s+/u', ' ', $replaced);
+                       $replaced = preg_replace('/(.)\1{2,}/u', '$1$1', $replaced);
+                       $replaced = strtolower($replaced);
+                       foreach ($this->aliases as $canonical => $variants) {
+                               if (in_array($replaced, $variants)) {
+                                       $replaced = $canonical;
+                                       break;
+                               }
+                               if ($replaced === $canonical) {
+                                       break;
+                               }
+                       }
+                       $str .= $replaced;
+               }
+               foreach ($this->categories as $category => $patterns) {
+                       foreach ($patterns as $pattern) {
+                               $str = preg_replace('/\b'.$pattern.'\b/u', '%'.strtoupper($category).'%', $str);
+                       }
                }
                return $str;
        }
@@ -279,11 +191,277 @@ class ChatLib {
                return $example[0];
        }
 
-       private $start = [];
-       private $one = [];
-       private $two = [];
-       private $three = [];
-       private $four = [];
-       private $five = [];
+       private $size = 7;
+       private $transitions = [];
+
+       private $aliases = [
+               'chest' => ['kiste'],
+               'einen' => ['n', 'nen'],
+               'musik' => ['mukke'],
+               'schade' => ['schad'],
+       ];
+
+       private $categories = [
+               'fail' => [
+                       'failfish',
+                       'holysm0notlikethis',
+                       'notlikethis',
+                       'tetobridge0',
+                       'vinter0clown',
+               ],
+
+               'hype' => [
+                       'dergoaparty',
+                       'dinodance',
+                       'elemen0party',
+                       'muftaahype',
+                       'luckwuhype',
+                       'olliwahype',
+                       'osora0umbrihype',
+                       'partyhat',
+                       'peepocheer',
+                       'rei0hype',
+                       'sakayahype',
+                       'tetotroete',
+                       'ticknaboargeil0',
+                       'ticknahype0',
+               ],
+
+               'kappa' => [
+                       'kappa(claus|hd)?',
+               ],
+
+               'jam' => [
+                       '(cat|dog|rat)jam',
+                       'kanash0jam',
+                       'rei0jamers',
+                       'samusdance',
+               ],
+
+               'lol' => [
+                       ':d',
+                       'boothi0lul',
+                       'kekw',
+                       'lol',
+                       'lul',
+                       'rei0lul',
+                       'samusgrin',
+                       'ticknaauslachen',
+                       'xd',
+               ],
+
+               'love' => [
+                       '<3',
+                       'duden0love',
+                       'exec0love',
+                       'krawal0heart',
+                       'lodanzhug',
+                       'luckwulove',
+                       'luvsign',
+                       'muftaal',
+                       'osora0love',
+                       'peepoexcitedhug',
+                       'spirit0love',
+                       'svenkalove',
+                       'ticknaherz',
+               ],
+
+               'name' => [
+                       'baba',
+                       'baka',
+                       'bobe?r',
+                       'brog(i|or)',
+                       'cfate',
+                       'danny',
+                       'danzi+',
+                       'daruck',
+                       'dennsen',
+                       'dimez',
+                       'divi',
+                       'dud(en|i+)',
+                       'ele',
+                       'eri(ror)?',
+                       '(name)?faker',
+                       'fetti+',
+                       'gamma(chuu)?',
+                       'goat(buster|ie?|y)?',
+                       'hitsu(yan)?',
+                       'holy',
+                       'jem',
+                       'kala(marino)?',
+                       'kromb',
+                       'koval',
+                       'kum(i|o|p)',
+                       'lanux',
+                       'len(esha|chen)',
+                       'leya+',
+                       'magno',
+                       'malmo',
+                       'markam',
+                       'micha',
+                       'mimsy',
+                       'muf(fy|taay)',
+                       'murd(elizer|i+)',
+                       'nami',
+                       'nula',
+                       'onio',
+                       'paulinche',
+                       'phaaze',
+                       'ralen',
+                       'ramond',
+                       'ray(vis)?',
+                       'schulzer',
+                       'skunk(ner)?',
+                       'skipsy',
+                       'soli+',
+                       'sven(ka+)?',
+                       'tantalus',
+                       'teto',
+                       'thalanee?',
+                       'tick(i+|naldo|y+)',
+                       'tofu',
+                       'tr[i0]x+',
+                       'vin(nie?|ny|ter)',
+                       'xall',
+                       'yasi',
+               ],
+
+               'pog' => [
+                       'bumble0Pog',
+                       'komodohype',
+                       'pog',
+                       'pogchamp',
+                       'poggers',
+                       'satono0pog',
+               ],
+
+               'run' => [
+                       'dennsenboots',
+                       'lodanzrun',
+                       'ticknaldosprint',
+                       'vinter0run',
+               ],
+
+               'wave' => [
+                       'dennsenhi',
+                       'dergoawave',
+                       'heyguys',
+                       'holysm0heyguys',
+                       'muftaahey',
+                       'rei0wave',
+                       'sayuri0wave',
+                       'shindi0wave',
+                       'svenkawave',
+                       'wuschlwave',
+               ],
+
+               'zelda_boss' => [
+                       'aga(hnim)?',
+                       'armos( knights)?',
+                       'arrghus',
+                       'blind',
+                       'ganon(dorf)?',
+                       'helma',
+                       'kholdstare',
+                       'lanmo(las)?',
+                       'moldorm',
+                       'mothula',
+                       'mott[ei]',
+                       'trinexx',
+                       'vit(reous|ty)',
+               ],
+
+               'zelda_dungeon' => [
+                       'eastern',
+                       'desert( palace)?',
+                       'gt',
+                       'hera',
+                       'ice ?(palace)?',
+                       '(misery )?mire',
+                       'pod',
+                       'skull ?woods',
+                       'swamp',
+                       'thieve\'?s\'? ?town',
+                       'tr',
+                       'tt',
+               ],
+
+               'zelda_item' => [
+                       '(big|small|retro|generic) ?keys?',
+                       'b[oö]gen',
+                       'bombos',
+                       'boots',
+                       'bottle',
+                       'bows?',
+                       'bugnet',
+                       'byrna',
+                       'cape',
+                       'ether',
+                       'flasche',
+                       'flippers',
+                       'fl[uö]te',
+                       'frod',
+                       '(gloves?|mitts|handschuhe?)',
+                       '(half|quarter) ?magic',
+                       'hammer',
+                       'hookshot',
+                       '(ice|fire) ?rod',
+                       'lampe?',
+                       'laser ?bridge',
+                       'mearl',
+                       'mirror',
+                       'moon ?pearl',
+                       'mushroom',
+                       'ocarina',
+                       'pilz',
+                       'powder',
+                       'puder',
+                       'quake',
+                       '(red|blue) ?cane',
+                       '(red|green|blue) ?(goo|potion)',
+                       '(red|green|blue|baby) ?mail',
+                       '(red|blue|bu|boo|good|bad|both)merang',
+                       'schaufel',
+                       '(gro(ss|ß)er? |kleiner? )?schlüssel',
+                       'schwert',
+                       'shovel',
+                       'silvers',
+                       'somaria',
+                       'spiegel',
+                       'sword',
+               ],
+
+               'zelda_location' => [
+                       'big chest',
+                       'bumper( cave)?( ledge)?',
+                       '(hyrule)? ?castle ?(tower)?',
+                       'catfish',
+                       'cave 0?',
+                       'chest ?game',
+                       'cutscene ?chest',
+                       'damm',
+                       'desert( ledge)?',
+                       'dig(ging)? ?game',
+                       '((back|front) of )?escape',
+                       'gyl',
+                       'hobo',
+                       'hook ?(shot) cave',
+                       'lava ?chest',
+                       '(light|dark) ?world',
+                       'lss',
+                       'magic bat',
+                       '(dark )?(death )?mountain',
+                       'ped(estal)?',
+                       'pyramid( fairy)?( ledge)?',
+                       'red bomb',
+                       'sahasrahla',
+                       'sasha',
+                       'sick kid',
+                       'stumpy',
+                       'tile ?room',
+                       'torch',
+                       'zora( ledge)?',
+               ],
+       ];
 
 }