]> git.localhorst.tv Git - alttp.git/commitdiff
slightly improved message generation
authorDaniel Karbach <daniel.karbach@localhorst.tv>
Tue, 7 May 2024 18:09:14 +0000 (20:09 +0200)
committerDaniel Karbach <daniel.karbach@localhorst.tv>
Tue, 7 May 2024 18:09:14 +0000 (20:09 +0200)
app/Console/Commands/ChatlibDatabase.php
app/Models/ChatLib.php

index 13c881a902a5589a8b31849c422dbf4259dbbf4a..cb7978f6e92bb84074c183354dc85f04766d1d36 100644 (file)
@@ -30,7 +30,8 @@ class ChatlibDatabase extends Command {
        public function handle() {
                $db = new ChatLib();
 
-               ChatLog::where('banned', '=', false)
+               ChatLog::where('type', '=', 'chat')
+                       ->where('banned', '=', false)
                        ->whereNotNull('evaluated_at')
                        ->chunk(500, function ($msgs) use ($db) {
                                foreach ($msgs as $msg) {
index d6d4f56e71bc57ffef6d545cc8f2913402070fa2..8134d136b61122c22847193e0402e28d1a0ed281 100644 (file)
@@ -5,22 +5,28 @@ namespace App\Models;
 class ChatLib {
 
        public function addMessage($msg) {
-               $tokens = array_values(array_filter(preg_split('/\b/', $msg->text_content)));
+               $tokens = array_values(array_filter(preg_split('/\s+/u', $msg->text_content)));
                if (empty($tokens)) return;
                $tokens [] = '';
                foreach ($tokens as $num => $token) {
                        if ($num === 0) {
                                $this->addStart($token);
-                       } else if ($num === 1) {
-                               $this->addOne($tokens[0], $token);
-                       } else if ($num === 2) {
-                               $this->addOne($tokens[1], $token);
-                               $this->addTwo($tokens[0], $tokens[1], $token);
-                       } else {
+                       }
+                       if ($num > 0) {
                                $this->addOne($tokens[$num - 1], $token);
+                       }
+                       if ($num > 1) {
                                $this->addTwo($tokens[$num - 2], $tokens[$num - 1], $token);
+                       }
+                       if ($num > 2) {
                                $this->addThree($tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
                        }
+                       if ($num > 3) {
+                               $this->addFour($tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
+                       }
+                       if ($num > 4) {
+                               $this->addFive($tokens[$num - 5], $tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
+                       }
                }
        }
 
@@ -28,16 +34,37 @@ class ChatLib {
                $this->start = $this->index($this->start);
                foreach ($this->one as $key => $value) {
                        $this->one[$key] = $this->index($this->one[$key]);
+                       if (empty($this->one[$key])) {
+                               unset($this->one[$key]);
+                       }
                }
                foreach ($this->two as $key => $value) {
                        $this->two[$key] = $this->index($this->two[$key]);
+                       if (empty($this->two[$key])) {
+                               unset($this->two[$key]);
+                       }
                }
                foreach ($this->three as $key => $value) {
                        $this->three[$key] = $this->index($this->three[$key]);
+                       if (empty($this->three[$key])) {
+                               unset($this->three[$key]);
+                       }
+               }
+               foreach ($this->four as $key => $value) {
+                       $this->four[$key] = $this->index($this->four[$key]);
+                       if (empty($this->four[$key])) {
+                               unset($this->four[$key]);
+                       }
+               }
+               foreach ($this->five as $key => $value) {
+                       $this->five[$key] = $this->index($this->five[$key]);
+                       if (empty($this->five[$key])) {
+                               unset($this->five[$key]);
+                       }
                }
        }
 
-       public function generate($limit = 50) {
+       public function generate($limit = 75) {
                $tokens = [];
                $start = $this->randomStart();
                $tokens[] = $start;
@@ -46,7 +73,7 @@ class ChatLib {
                        $next = $this->randomNext($tokens);
                        if (empty($next)) break;
                        $tokens[] = $next;
-                       $generated .= $next;
+                       $generated .= ' '.$next;
                }
                return $generated;
        }
@@ -54,43 +81,105 @@ class ChatLib {
        private function index($arr) {
                $result = [];
                $sum = 0;
-               asort($arr);
-               foreach ($arr as $key => $weight) {
+               foreach ($arr as $key => $entry) {
+                       $weight = $entry['count'];
+                       if ($weight == 1) continue;
                        $lower = $sum;
-                       $sum += $weight;
-                       $result[] = [$key, $lower, $sum];
+                       $sum += intval(pow($weight, 1.4));
+                       $examples = [];
+                       if (is_array(end($entry['examples']))) {
+                               // already processed
+                               $examples = $entry['examples'];
+                       } else {
+                               $subsum = 0;
+                               foreach ($entry['examples'] as $example => $subweight) {
+                                       $sublower = $subsum;
+                                       $subsum += $subweight * $subweight;
+                                       $examples[] = [$example, $sublower, $subsum];
+                               }
+                       }
+                       $result[] = [$key, $lower, $sum, $examples];
                }
                return $result;
        }
 
        private function randomStart() {
-               return $this->pick($this->start);
+               $pick = $this->pick($this->start);
+               if (is_null($pick)) return '';
+               return $this->exampleOf($pick);
        }
 
        private function randomNext($tokens) {
                $cnt = count($tokens);
+               $picks = [];
+               if ($cnt >= 5) {
+                       $cmb = $this->generalize(array_slice($tokens, $cnt - 5, 5));
+                       if (isset($this->five[$cmb])) {
+                               $pick = $this->pick($this->five[$cmb]);
+                               if (!is_null($pick)) {
+                                       $picks[$pick[0]] = [
+                                               'count' => 10,
+                                               'examples' => $pick[3],
+                                       ];
+                               }
+                       }
+               }
+               if ($cnt >= 4) {
+                       $cmb = $this->generalize(array_slice($tokens, $cnt - 4, 4));
+                       if (isset($this->four[$cmb])) {
+                               $pick = $this->pick($this->four[$cmb]);
+                               if (!is_null($pick)) {
+                                       $picks[$pick[0]] = [
+                                               'count' => 12,
+                                               'examples' => $pick[3],
+                                       ];
+                               }
+                       }
+               }
                if ($cnt >= 3) {
-                       $cmb = $tokens[$cnt - 3].$tokens[$cnt - 2].$tokens[$cnt - 1];
+                       $cmb = $this->generalize(array_slice($tokens, $cnt - 3, 3));
                        if (isset($this->three[$cmb])) {
-                               return $this->pick($this->three[$cmb]);
+                               $pick = $this->pick($this->three[$cmb]);
+                               if (!is_null($pick)) {
+                                       $picks[$pick[0]] = [
+                                               'count' => 14,
+                                               'examples' => $pick[3],
+                                       ];
+                               }
                        }
                }
                if ($cnt >= 2) {
-                       $cmb = $tokens[$cnt - 2].$tokens[$cnt - 1];
+                       $cmb = $this->generalize(array_slice($tokens, $cnt - 2, 2));
                        if (isset($this->two[$cmb])) {
-                               return $this->pick($this->two[$cmb]);
+                               $pick = $this->pick($this->two[$cmb]);
+                               if (!is_null($pick)) {
+                                       $picks[$pick[0]] = [
+                                               'count' => 4,
+                                               'examples' => $pick[3],
+                                       ];
+                               }
                        }
                }
                if ($cnt >= 1) {
-                       $cmb = $tokens[$cnt - 1];
+                       $cmb = $this->generalize(array_slice($tokens, $cnt - 1, 1));
                        if (isset($this->one[$cmb])) {
-                               return $this->pick($this->one[$cmb]);
+                               $pick = $this->pick($this->one[$cmb]);
+                               if (!is_null($pick)) {
+                                       $picks[$pick[0]] = [
+                                               'count' => 2,
+                                               'examples' => $pick[3],
+                                       ];
+                               }
                        }
                }
-               return '';
+               if (empty($picks)) return '';
+               $picks = $this->index($picks);
+               $pick = $this->pick($picks);
+               return $this->exampleOf($pick);
        }
 
        private function pick($options) {
+               if (empty($options)) return null;
                $max = end($options)[2];
                $num = random_int(0, $max);
                $min_index = 0;
@@ -108,56 +197,93 @@ class ChatLib {
                                break;
                        }
                }
-               return $options[$min_index][0];
+               return $options[$min_index];
        }
 
        private function addStart($token) {
                if (empty($token)) return;
-               if (!isset($this->start[$token])) {
-                       $this->start[$token] = 1;
-               } else {
-                       ++$this->start[$token];
-               }
+               $this->increment($this->start, $token);
        }
 
        private function addOne($one, $token) {
-               if (!isset($this->one[$one])) {
-                       $this->one[$one] = [];
-               }
-               if (!isset($this->one[$one][$token])) {
-                       $this->one[$one][$token] = 1;
-               } else {
-                       ++$this->one[$one][$token];
+               $cmb = $this->generalize([$one]);
+               if (!isset($this->one[$cmb])) {
+                       $this->one[$cmb] = [];
                }
+               $this->increment($this->one[$cmb], $token);
        }
 
        private function addTwo($one, $two, $token) {
-               $cmb = $one.$two;
+               $cmb = $this->generalize([$one, $two]);
                if (!isset($this->two[$cmb])) {
                        $this->two[$cmb] = [];
                }
-               if (!isset($this->two[$cmb][$token])) {
-                       $this->two[$cmb][$token] = 1;
-               } else {
-                       ++$this->two[$cmb][$token];
-               }
+               $this->increment($this->two[$cmb], $token);
        }
 
        private function addThree($one, $two, $three, $token) {
-               $cmb = $one.$two.$three;
+               $cmb = $this->generalize([$one, $two, $three]);
                if (!isset($this->three[$cmb])) {
                        $this->three[$cmb] = [];
                }
-               if (!isset($this->three[$cmb][$token])) {
-                       $this->three[$cmb][$token] = 1;
+               $this->increment($this->three[$cmb], $token);
+       }
+
+       private function addFour($one, $two, $three, $four, $token) {
+               $cmb = $this->generalize([$one, $two, $three, $four]);
+               if (!isset($this->four[$cmb])) {
+                       $this->four[$cmb] = [];
+               }
+               $this->increment($this->four[$cmb], $token);
+       }
+
+       private function addFive($one, $two, $three, $four, $five, $token) {
+               $cmb = $this->generalize([$one, $two, $three, $four, $five]);
+               if (!isset($this->five[$cmb])) {
+                       $this->five[$cmb] = [];
+               }
+               $this->increment($this->five[$cmb], $token);
+       }
+
+       private function increment(&$which, $token) {
+               $generalized = $this->generalize([$token]);
+               if (!isset($which[$generalized])) {
+                       $which[$generalized] = [
+                               'count' => 1,
+                               'examples' => [],
+                       ];
+                       $which[$generalized]['examples'][$token] = 1;
                } else {
-                       ++$this->three[$cmb][$token];
+                       ++$which[$generalized]['count'];
+                       if (!isset($which[$generalized]['examples'][$token])) {
+                               $which[$generalized]['examples'][$token] = 1;
+                       } else {
+                               ++$which[$generalized]['examples'][$token];
+                       }
                }
        }
 
+       private function generalize($tokens) {
+               $str = '';
+               foreach ($tokens as $token) {
+                       $replaced = preg_replace('/\W/u', '', $token);
+                       $replaced = preg_replace('/\d+/', '0', $replaced);
+                       $replaced = strtolower(trim($replaced));
+                       $str .= empty($replaced) ? $token : $replaced;
+               }
+               return $str;
+       }
+
+       private function exampleOf($pick) {
+               $example = $this->pick($pick[3]);
+               return $example[0];
+       }
+
        private $start = [];
        private $one = [];
        private $two = [];
        private $three = [];
+       private $four = [];
+       private $five = [];
 
 }