From f18af7cfb219ab9c07635ea8bbae80f2a9cee78e Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Tue, 7 May 2024 20:09:14 +0200 Subject: [PATCH] slightly improved message generation --- app/Console/Commands/ChatlibDatabase.php | 3 +- app/Models/ChatLib.php | 214 ++++++++++++++++++----- 2 files changed, 172 insertions(+), 45 deletions(-) diff --git a/app/Console/Commands/ChatlibDatabase.php b/app/Console/Commands/ChatlibDatabase.php index 13c881a..cb7978f 100644 --- a/app/Console/Commands/ChatlibDatabase.php +++ b/app/Console/Commands/ChatlibDatabase.php @@ -30,7 +30,8 @@ class ChatlibDatabase extends Command { public function handle() { $db = new ChatLib(); - ChatLog::where('banned', '=', false) + ChatLog::where('type', '=', 'chat') + ->where('banned', '=', false) ->whereNotNull('evaluated_at') ->chunk(500, function ($msgs) use ($db) { foreach ($msgs as $msg) { diff --git a/app/Models/ChatLib.php b/app/Models/ChatLib.php index d6d4f56..8134d13 100644 --- a/app/Models/ChatLib.php +++ b/app/Models/ChatLib.php @@ -5,22 +5,28 @@ namespace App\Models; class ChatLib { public function addMessage($msg) { - $tokens = array_values(array_filter(preg_split('/\b/', $msg->text_content))); + $tokens = array_values(array_filter(preg_split('/\s+/u', $msg->text_content))); if (empty($tokens)) return; $tokens [] = ''; foreach ($tokens as $num => $token) { if ($num === 0) { $this->addStart($token); - } else if ($num === 1) { - $this->addOne($tokens[0], $token); - } else if ($num === 2) { - $this->addOne($tokens[1], $token); - $this->addTwo($tokens[0], $tokens[1], $token); - } else { + } + if ($num > 0) { $this->addOne($tokens[$num - 1], $token); + } + if ($num > 1) { $this->addTwo($tokens[$num - 2], $tokens[$num - 1], $token); + } + if ($num > 2) { $this->addThree($tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token); } + if ($num > 3) { + $this->addFour($tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token); + } + if ($num > 4) { + $this->addFive($tokens[$num - 5], $tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token); + } } } @@ -28,16 +34,37 @@ class ChatLib { $this->start = $this->index($this->start); foreach ($this->one as $key => $value) { $this->one[$key] = $this->index($this->one[$key]); + if (empty($this->one[$key])) { + unset($this->one[$key]); + } } foreach ($this->two as $key => $value) { $this->two[$key] = $this->index($this->two[$key]); + if (empty($this->two[$key])) { + unset($this->two[$key]); + } } foreach ($this->three as $key => $value) { $this->three[$key] = $this->index($this->three[$key]); + if (empty($this->three[$key])) { + unset($this->three[$key]); + } + } + foreach ($this->four as $key => $value) { + $this->four[$key] = $this->index($this->four[$key]); + if (empty($this->four[$key])) { + unset($this->four[$key]); + } + } + foreach ($this->five as $key => $value) { + $this->five[$key] = $this->index($this->five[$key]); + if (empty($this->five[$key])) { + unset($this->five[$key]); + } } } - public function generate($limit = 50) { + public function generate($limit = 75) { $tokens = []; $start = $this->randomStart(); $tokens[] = $start; @@ -46,7 +73,7 @@ class ChatLib { $next = $this->randomNext($tokens); if (empty($next)) break; $tokens[] = $next; - $generated .= $next; + $generated .= ' '.$next; } return $generated; } @@ -54,43 +81,105 @@ class ChatLib { private function index($arr) { $result = []; $sum = 0; - asort($arr); - foreach ($arr as $key => $weight) { + foreach ($arr as $key => $entry) { + $weight = $entry['count']; + if ($weight == 1) continue; $lower = $sum; - $sum += $weight; - $result[] = [$key, $lower, $sum]; + $sum += intval(pow($weight, 1.4)); + $examples = []; + if (is_array(end($entry['examples']))) { + // already processed + $examples = $entry['examples']; + } else { + $subsum = 0; + foreach ($entry['examples'] as $example => $subweight) { + $sublower = $subsum; + $subsum += $subweight * $subweight; + $examples[] = [$example, $sublower, $subsum]; + } + } + $result[] = [$key, $lower, $sum, $examples]; } return $result; } private function randomStart() { - return $this->pick($this->start); + $pick = $this->pick($this->start); + if (is_null($pick)) return ''; + return $this->exampleOf($pick); } private function randomNext($tokens) { $cnt = count($tokens); + $picks = []; + if ($cnt >= 5) { + $cmb = $this->generalize(array_slice($tokens, $cnt - 5, 5)); + if (isset($this->five[$cmb])) { + $pick = $this->pick($this->five[$cmb]); + if (!is_null($pick)) { + $picks[$pick[0]] = [ + 'count' => 10, + 'examples' => $pick[3], + ]; + } + } + } + if ($cnt >= 4) { + $cmb = $this->generalize(array_slice($tokens, $cnt - 4, 4)); + if (isset($this->four[$cmb])) { + $pick = $this->pick($this->four[$cmb]); + if (!is_null($pick)) { + $picks[$pick[0]] = [ + 'count' => 12, + 'examples' => $pick[3], + ]; + } + } + } if ($cnt >= 3) { - $cmb = $tokens[$cnt - 3].$tokens[$cnt - 2].$tokens[$cnt - 1]; + $cmb = $this->generalize(array_slice($tokens, $cnt - 3, 3)); if (isset($this->three[$cmb])) { - return $this->pick($this->three[$cmb]); + $pick = $this->pick($this->three[$cmb]); + if (!is_null($pick)) { + $picks[$pick[0]] = [ + 'count' => 14, + 'examples' => $pick[3], + ]; + } } } if ($cnt >= 2) { - $cmb = $tokens[$cnt - 2].$tokens[$cnt - 1]; + $cmb = $this->generalize(array_slice($tokens, $cnt - 2, 2)); if (isset($this->two[$cmb])) { - return $this->pick($this->two[$cmb]); + $pick = $this->pick($this->two[$cmb]); + if (!is_null($pick)) { + $picks[$pick[0]] = [ + 'count' => 4, + 'examples' => $pick[3], + ]; + } } } if ($cnt >= 1) { - $cmb = $tokens[$cnt - 1]; + $cmb = $this->generalize(array_slice($tokens, $cnt - 1, 1)); if (isset($this->one[$cmb])) { - return $this->pick($this->one[$cmb]); + $pick = $this->pick($this->one[$cmb]); + if (!is_null($pick)) { + $picks[$pick[0]] = [ + 'count' => 2, + 'examples' => $pick[3], + ]; + } } } - return ''; + if (empty($picks)) return ''; + $picks = $this->index($picks); + $pick = $this->pick($picks); + return $this->exampleOf($pick); } private function pick($options) { + if (empty($options)) return null; $max = end($options)[2]; $num = random_int(0, $max); $min_index = 0; @@ -108,56 +197,93 @@ class ChatLib { break; } } - return $options[$min_index][0]; + return $options[$min_index]; } private function addStart($token) { if (empty($token)) return; - if (!isset($this->start[$token])) { - $this->start[$token] = 1; - } else { - ++$this->start[$token]; - } + $this->increment($this->start, $token); } private function addOne($one, $token) { - if (!isset($this->one[$one])) { - $this->one[$one] = []; - } - if (!isset($this->one[$one][$token])) { - $this->one[$one][$token] = 1; - } else { - ++$this->one[$one][$token]; + $cmb = $this->generalize([$one]); + if (!isset($this->one[$cmb])) { + $this->one[$cmb] = []; } + $this->increment($this->one[$cmb], $token); } private function addTwo($one, $two, $token) { - $cmb = $one.$two; + $cmb = $this->generalize([$one, $two]); if (!isset($this->two[$cmb])) { $this->two[$cmb] = []; } - if (!isset($this->two[$cmb][$token])) { - $this->two[$cmb][$token] = 1; - } else { - ++$this->two[$cmb][$token]; - } + $this->increment($this->two[$cmb], $token); } private function addThree($one, $two, $three, $token) { - $cmb = $one.$two.$three; + $cmb = $this->generalize([$one, $two, $three]); if (!isset($this->three[$cmb])) { $this->three[$cmb] = []; } - if (!isset($this->three[$cmb][$token])) { - $this->three[$cmb][$token] = 1; + $this->increment($this->three[$cmb], $token); + } + + private function addFour($one, $two, $three, $four, $token) { + $cmb = $this->generalize([$one, $two, $three, $four]); + if (!isset($this->four[$cmb])) { + $this->four[$cmb] = []; + } + $this->increment($this->four[$cmb], $token); + } + + private function addFive($one, $two, $three, $four, $five, $token) { + $cmb = $this->generalize([$one, $two, $three, $four, $five]); + if (!isset($this->five[$cmb])) { + $this->five[$cmb] = []; + } + $this->increment($this->five[$cmb], $token); + } + + private function increment(&$which, $token) { + $generalized = $this->generalize([$token]); + if (!isset($which[$generalized])) { + $which[$generalized] = [ + 'count' => 1, + 'examples' => [], + ]; + $which[$generalized]['examples'][$token] = 1; } else { - ++$this->three[$cmb][$token]; + ++$which[$generalized]['count']; + if (!isset($which[$generalized]['examples'][$token])) { + $which[$generalized]['examples'][$token] = 1; + } else { + ++$which[$generalized]['examples'][$token]; + } } } + private function generalize($tokens) { + $str = ''; + foreach ($tokens as $token) { + $replaced = preg_replace('/\W/u', '', $token); + $replaced = preg_replace('/\d+/', '0', $replaced); + $replaced = strtolower(trim($replaced)); + $str .= empty($replaced) ? $token : $replaced; + } + return $str; + } + + private function exampleOf($pick) { + $example = $this->pick($pick[3]); + return $example[0]; + } + private $start = []; private $one = []; private $two = []; private $three = []; + private $four = []; + private $five = []; } -- 2.39.2