X-Git-Url: https://git.localhorst.tv/?a=blobdiff_plain;ds=sidebyside;f=app%2FModels%2FChatLib.php;h=3f1530b396b9b752652918c45d9f0a0fdea3d596;hb=26d47ca368d8e7c2690cec49f6ae2ad509a0428d;hp=d6d4f56e71bc57ffef6d545cc8f2913402070fa2;hpb=771f1761f0abec996838c0ccc71cec0219bad71a;p=alttp.git diff --git a/app/Models/ChatLib.php b/app/Models/ChatLib.php index d6d4f56..3f1530b 100644 --- a/app/Models/ChatLib.php +++ b/app/Models/ChatLib.php @@ -2,95 +2,110 @@ namespace App\Models; +use Illuminate\Support\Facades\Storage; + class ChatLib { - public function addMessage($msg) { - $tokens = array_values(array_filter(preg_split('/\b/', $msg->text_content))); + public function addMessage(ChatLog $msg) { + $this->addText($msg->text_content); + } + + public function addText($text) { + $tokens = $this->tokenize($text); if (empty($tokens)) return; - $tokens [] = ''; + $tokens[] = ''; foreach ($tokens as $num => $token) { if ($num === 0) { - $this->addStart($token); - } else if ($num === 1) { - $this->addOne($tokens[0], $token); - } else if ($num === 2) { - $this->addOne($tokens[1], $token); - $this->addTwo($tokens[0], $tokens[1], $token); + $this->addTransition([], $token); } else { - $this->addOne($tokens[$num - 1], $token); - $this->addTwo($tokens[$num - 2], $tokens[$num - 1], $token); - $this->addThree($tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token); + $start = max(0, $num - $this->size - 1); + $end = $num; + for ($i = $start; $i < $end; ++$i) { + $this->addTransition(array_slice($tokens, $i, $end - $i), $token); + if ($end - $i < 5) break; + } } } } public function compile() { - $this->start = $this->index($this->start); - foreach ($this->one as $key => $value) { - $this->one[$key] = $this->index($this->one[$key]); - } - foreach ($this->two as $key => $value) { - $this->two[$key] = $this->index($this->two[$key]); - } - foreach ($this->three as $key => $value) { - $this->three[$key] = $this->index($this->three[$key]); + foreach ($this->transitions as $key => $value) { + $this->transitions[$key] = $this->index($this->transitions[$key]); + if (empty($this->transitions[$key])) { + unset($this->transitions[$key]); + } } } - public function generate($limit = 50) { - $tokens = []; - $start = $this->randomStart(); - $tokens[] = $start; - $generated = $start; + public function generate($limit = 100) { + $tokens = ['']; + $generated = ''; while (strlen($generated) < $limit) { $next = $this->randomNext($tokens); - if (empty($next)) break; + if ($next === '') break; $tokens[] = $next; $generated .= $next; } return $generated; } + public function saveAs($name) { + $data = [ + 'size' => $this->size, + 'transitions' => $this->transitions, + ]; + Storage::disk('chatlib')->put($name.'.json', json_encode($data)); + } + + public function loadFrom($name) { + $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true); + $this->size = $data['size']; + $this->transitions = $data['transitions']; + } + private function index($arr) { $result = []; $sum = 0; - asort($arr); - foreach ($arr as $key => $weight) { + foreach ($arr as $key => $entry) { + $weight = $entry['count']; + if ($weight == 1) continue; $lower = $sum; $sum += $weight; - $result[] = [$key, $lower, $sum]; + $examples = []; + if (is_array(end($entry['examples']))) { + // already processed + $examples = $entry['examples']; + } else if ($key === ' ') { + $examples = [[' ', 0, 1]]; + } else { + $subsum = 0; + foreach ($entry['examples'] as $example => $subweight) { + $sublower = $subsum; + $subsum += $subweight; + $examples[] = [$example, $sublower, $subsum]; + } + } + $result[] = [$key, $lower, $sum, $examples]; } return $result; } - private function randomStart() { - return $this->pick($this->start); - } - private function randomNext($tokens) { $cnt = count($tokens); - if ($cnt >= 3) { - $cmb = $tokens[$cnt - 3].$tokens[$cnt - 2].$tokens[$cnt - 1]; - if (isset($this->three[$cmb])) { - return $this->pick($this->three[$cmb]); - } - } - if ($cnt >= 2) { - $cmb = $tokens[$cnt - 2].$tokens[$cnt - 1]; - if (isset($this->two[$cmb])) { - return $this->pick($this->two[$cmb]); - } - } - if ($cnt >= 1) { - $cmb = $tokens[$cnt - 1]; - if (isset($this->one[$cmb])) { - return $this->pick($this->one[$cmb]); + for ($size = min($this->size, $cnt); $size > 0; --$size) { + $cmb = $this->generalize(array_slice($tokens, -$size)); + if (isset($this->transitions[$cmb])) { + $pick = $this->pick($this->transitions[$cmb]); + if (!is_null($pick)) { + return $this->exampleOf($pick); + } } } return ''; } private function pick($options) { + if (empty($options)) return null; $max = end($options)[2]; $num = random_int(0, $max); $min_index = 0; @@ -108,56 +123,61 @@ class ChatLib { break; } } - return $options[$min_index][0]; + return $options[$min_index]; } - private function addStart($token) { - if (empty($token)) return; - if (!isset($this->start[$token])) { - $this->start[$token] = 1; - } else { - ++$this->start[$token]; + private function addTransition($state, $next) { + $cmb = $this->generalize($state); + if (!isset($this->transitions[$cmb])) { + $this->transitions[$cmb] = []; } + $this->increment($this->transitions[$cmb], $next); } - private function addOne($one, $token) { - if (!isset($this->one[$one])) { - $this->one[$one] = []; - } - if (!isset($this->one[$one][$token])) { - $this->one[$one][$token] = 1; + private function increment(&$which, $token) { + $generalized = $this->generalize([$token]); + if (!isset($which[$generalized])) { + $which[$generalized] = [ + 'count' => 1, + 'examples' => [], + ]; + $which[$generalized]['examples'][$token] = 1; } else { - ++$this->one[$one][$token]; + ++$which[$generalized]['count']; + if (!isset($which[$generalized]['examples'][$token])) { + $which[$generalized]['examples'][$token] = 1; + } else { + ++$which[$generalized]['examples'][$token]; + } } } - private function addTwo($one, $two, $token) { - $cmb = $one.$two; - if (!isset($this->two[$cmb])) { - $this->two[$cmb] = []; - } - if (!isset($this->two[$cmb][$token])) { - $this->two[$cmb][$token] = 1; - } else { - ++$this->two[$cmb][$token]; - } + private function tokenize($str) { + return array_values(array_filter(preg_split('/\b/u', $str), function($token) { + if (empty($token)) return false; + if (preg_match('/cheer\d+/u', strtolower($token))) return false; + return true; + })); } - private function addThree($one, $two, $three, $token) { - $cmb = $one.$two.$three; - if (!isset($this->three[$cmb])) { - $this->three[$cmb] = []; - } - if (!isset($this->three[$cmb][$token])) { - $this->three[$cmb][$token] = 1; - } else { - ++$this->three[$cmb][$token]; - } + private function generalize($tokens) { + $str = ''; + foreach ($tokens as $token) { + $replaced = preg_replace('/\d+/', '0', $token); + $replaced = preg_replace('/\s+/', ' ', $token); + $replaced = preg_replace('/(.)\1{2,}/', '$1', $token); + $replaced = strtolower($replaced); + $str .= $replaced; + } + return $str; + } + + private function exampleOf($pick) { + $example = $this->pick($pick[3]); + return $example[0]; } - private $start = []; - private $one = []; - private $two = []; - private $three = []; + private $size = 7; + private $transitions = []; }