]> git.localhorst.tv Git - alttp.git/commitdiff
try to improve message genration
authorDaniel Karbach <daniel.karbach@localhorst.tv>
Wed, 8 May 2024 11:09:05 +0000 (13:09 +0200)
committerDaniel Karbach <daniel.karbach@localhorst.tv>
Wed, 8 May 2024 11:09:05 +0000 (13:09 +0200)
app/Console/Commands/ChatlibDatabase.php
app/Models/ChatLib.php

index cb7978f6e92bb84074c183354dc85f04766d1d36..bfe0135c4b8e00e684a9f2974faf862f447296fd 100644 (file)
@@ -28,21 +28,35 @@ class ChatlibDatabase extends Command {
         * @return int
         */
        public function handle() {
-               $db = new ChatLib();
+               $de = new ChatLib();
+               $en = new ChatLib();
 
                ChatLog::where('type', '=', 'chat')
                        ->where('banned', '=', false)
                        ->whereNotNull('evaluated_at')
-                       ->chunk(500, function ($msgs) use ($db) {
+                       ->chunk(5000, function ($msgs) use ($de, $en) {
                                foreach ($msgs as $msg) {
-                                       $db->addMessage($msg);
+                                       if ($msg->detected_language === 'de') {
+                                               $de->addMessage($msg);
+                                       } else if ($msg->detected_language === 'en') {
+                                               $en->addMessage($msg);
+                                       } else if (is_null($msg->detected_language)) {
+                                               $de->addMessage($msg);
+                                               $en->addMessage($msg);
+                                       }
                                }
                        });
 
-               $db->compile();
+               $de->compile();
+               $en->compile();
 
+               $this->line('');
                for ($i = 0; $i < 50; ++$i) {
-                       $this->line($db->generate());
+                       $this->line($de->generate());
+               }
+               $this->line('');
+               for ($i = 0; $i < 50; ++$i) {
+                       $this->line($en->generate());
                }
 
                return 0;
index 8134d136b61122c22847193e0402e28d1a0ed281..417b18f3dd6b01f1cbeb5b923d71148ebb995cd3 100644 (file)
@@ -5,75 +5,41 @@ namespace App\Models;
 class ChatLib {
 
        public function addMessage($msg) {
-               $tokens = array_values(array_filter(preg_split('/\s+/u', $msg->text_content)));
+               $tokens = $this->tokenize($msg->text_content);
                if (empty($tokens)) return;
-               $tokens [] = '';
+               $tokens[] = '';
                foreach ($tokens as $num => $token) {
                        if ($num === 0) {
-                               $this->addStart($token);
-                       }
-                       if ($num > 0) {
-                               $this->addOne($tokens[$num - 1], $token);
-                       }
-                       if ($num > 1) {
-                               $this->addTwo($tokens[$num - 2], $tokens[$num - 1], $token);
-                       }
-                       if ($num > 2) {
-                               $this->addThree($tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
-                       }
-                       if ($num > 3) {
-                               $this->addFour($tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
-                       }
-                       if ($num > 4) {
-                               $this->addFive($tokens[$num - 5], $tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
+                               $this->addTransition([], $token);
+                       } else {
+                               $start = max(0, $num - $this->size);
+                               $end = $num;
+                               for ($i = $start; $i < $end; ++$i) {
+                                       $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
+                                       if ($end - $i < 3) break;
+                               }
                        }
                }
        }
 
        public function compile() {
-               $this->start = $this->index($this->start);
-               foreach ($this->one as $key => $value) {
-                       $this->one[$key] = $this->index($this->one[$key]);
-                       if (empty($this->one[$key])) {
-                               unset($this->one[$key]);
-                       }
-               }
-               foreach ($this->two as $key => $value) {
-                       $this->two[$key] = $this->index($this->two[$key]);
-                       if (empty($this->two[$key])) {
-                               unset($this->two[$key]);
-                       }
-               }
-               foreach ($this->three as $key => $value) {
-                       $this->three[$key] = $this->index($this->three[$key]);
-                       if (empty($this->three[$key])) {
-                               unset($this->three[$key]);
-                       }
-               }
-               foreach ($this->four as $key => $value) {
-                       $this->four[$key] = $this->index($this->four[$key]);
-                       if (empty($this->four[$key])) {
-                               unset($this->four[$key]);
-                       }
-               }
-               foreach ($this->five as $key => $value) {
-                       $this->five[$key] = $this->index($this->five[$key]);
-                       if (empty($this->five[$key])) {
-                               unset($this->five[$key]);
+               foreach ($this->transitions as $key => $value) {
+                       $this->transitions[$key] = $this->index($this->transitions[$key]);
+                       if (empty($this->transitions[$key])) {
+                               unset($this->transitions[$key]);
                        }
                }
+               echo 'size: ', number_format(strlen(json_encode($this->transitions)), 0), PHP_EOL;
        }
 
-       public function generate($limit = 75) {
+       public function generate($limit = 100) {
                $tokens = [];
-               $start = $this->randomStart();
-               $tokens[] = $start;
-               $generated = $start;
+               $generated = '';
                while (strlen($generated) < $limit) {
                        $next = $this->randomNext($tokens);
                        if (empty($next)) break;
                        $tokens[] = $next;
-                       $generated .= ' '.$next;
+                       $generated .= $next;
                }
                return $generated;
        }
@@ -85,7 +51,7 @@ class ChatLib {
                        $weight = $entry['count'];
                        if ($weight == 1) continue;
                        $lower = $sum;
-                       $sum += intval(pow($weight, 1.4));
+                       $sum += $weight;
                        $examples = [];
                        if (is_array(end($entry['examples']))) {
                                // already processed
@@ -103,79 +69,18 @@ class ChatLib {
                return $result;
        }
 
-       private function randomStart() {
-               $pick = $this->pick($this->start);
-               if (is_null($pick)) return '';
-               return $this->exampleOf($pick);
-       }
-
        private function randomNext($tokens) {
                $cnt = count($tokens);
-               $picks = [];
-               if ($cnt >= 5) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 5, 5));
-                       if (isset($this->five[$cmb])) {
-                               $pick = $this->pick($this->five[$cmb]);
-                               if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 10,
-                                               'examples' => $pick[3],
-                                       ];
-                               }
-                       }
-               }
-               if ($cnt >= 4) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 4, 4));
-                       if (isset($this->four[$cmb])) {
-                               $pick = $this->pick($this->four[$cmb]);
-                               if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 12,
-                                               'examples' => $pick[3],
-                                       ];
-                               }
-                       }
-               }
-               if ($cnt >= 3) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 3, 3));
-                       if (isset($this->three[$cmb])) {
-                               $pick = $this->pick($this->three[$cmb]);
+               for ($size = min($this->size, $cnt); $size >= 0; --$size) {
+                       $cmb = $this->generalize(array_slice($tokens, $cnt - $size, $size));
+                       if (isset($this->transitions[$cmb])) {
+                               $pick = $this->pick($this->transitions[$cmb]);
                                if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 14,
-                                               'examples' => $pick[3],
-                                       ];
+                                       return $this->exampleOf($pick);
                                }
                        }
                }
-               if ($cnt >= 2) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 2, 2));
-                       if (isset($this->two[$cmb])) {
-                               $pick = $this->pick($this->two[$cmb]);
-                               if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 4,
-                                               'examples' => $pick[3],
-                                       ];
-                               }
-                       }
-               }
-               if ($cnt >= 1) {
-                       $cmb = $this->generalize(array_slice($tokens, $cnt - 1, 1));
-                       if (isset($this->one[$cmb])) {
-                               $pick = $this->pick($this->one[$cmb]);
-                               if (!is_null($pick)) {
-                                       $picks[$pick[0]] = [
-                                               'count' => 2,
-                                               'examples' => $pick[3],
-                                       ];
-                               }
-                       }
-               }
-               if (empty($picks)) return '';
-               $picks = $this->index($picks);
-               $pick = $this->pick($picks);
-               return $this->exampleOf($pick);
+               return '';
        }
 
        private function pick($options) {
@@ -200,49 +105,12 @@ class ChatLib {
                return $options[$min_index];
        }
 
-       private function addStart($token) {
-               if (empty($token)) return;
-               $this->increment($this->start, $token);
-       }
-
-       private function addOne($one, $token) {
-               $cmb = $this->generalize([$one]);
-               if (!isset($this->one[$cmb])) {
-                       $this->one[$cmb] = [];
-               }
-               $this->increment($this->one[$cmb], $token);
-       }
-
-       private function addTwo($one, $two, $token) {
-               $cmb = $this->generalize([$one, $two]);
-               if (!isset($this->two[$cmb])) {
-                       $this->two[$cmb] = [];
-               }
-               $this->increment($this->two[$cmb], $token);
-       }
-
-       private function addThree($one, $two, $three, $token) {
-               $cmb = $this->generalize([$one, $two, $three]);
-               if (!isset($this->three[$cmb])) {
-                       $this->three[$cmb] = [];
-               }
-               $this->increment($this->three[$cmb], $token);
-       }
-
-       private function addFour($one, $two, $three, $four, $token) {
-               $cmb = $this->generalize([$one, $two, $three, $four]);
-               if (!isset($this->four[$cmb])) {
-                       $this->four[$cmb] = [];
-               }
-               $this->increment($this->four[$cmb], $token);
-       }
-
-       private function addFive($one, $two, $three, $four, $five, $token) {
-               $cmb = $this->generalize([$one, $two, $three, $four, $five]);
-               if (!isset($this->five[$cmb])) {
-                       $this->five[$cmb] = [];
+       private function addTransition($state, $next) {
+               $cmb = $this->generalize($state);
+               if (!isset($this->transitions[$cmb])) {
+                       $this->transitions[$cmb] = [];
                }
-               $this->increment($this->five[$cmb], $token);
+               $this->increment($this->transitions[$cmb], $next);
        }
 
        private function increment(&$which, $token) {
@@ -263,12 +131,15 @@ class ChatLib {
                }
        }
 
+       private function tokenize($str) {
+               return array_values(array_filter(preg_split('/\b/u', $str)));
+       }
+
        private function generalize($tokens) {
                $str = '';
                foreach ($tokens as $token) {
-                       $replaced = preg_replace('/\W/u', '', $token);
-                       $replaced = preg_replace('/\d+/', '0', $replaced);
-                       $replaced = strtolower(trim($replaced));
+                       $replaced = preg_replace('/\d+/', '0', $token);
+                       $replaced = strtolower($replaced);
                        $str .= empty($replaced) ? $token : $replaced;
                }
                return $str;
@@ -279,11 +150,7 @@ class ChatLib {
                return $example[0];
        }
 
-       private $start = [];
-       private $one = [];
-       private $two = [];
-       private $three = [];
-       private $four = [];
-       private $five = [];
+       private $size = 5;
+       private $transitions = [];
 
 }