X-Git-Url: https://git.localhorst.tv/?a=blobdiff_plain;f=app%2FModels%2FChatLib.php;h=3f1530b396b9b752652918c45d9f0a0fdea3d596;hb=ac6921da72ff4b0beab9e5f1308788a55aae3ad9;hp=417b18f3dd6b01f1cbeb5b923d71148ebb995cd3;hpb=c2cc99020eee56f7790d1358abb44df078f2e655;p=alttp.git diff --git a/app/Models/ChatLib.php b/app/Models/ChatLib.php index 417b18f..3f1530b 100644 --- a/app/Models/ChatLib.php +++ b/app/Models/ChatLib.php @@ -2,21 +2,27 @@ namespace App\Models; +use Illuminate\Support\Facades\Storage; + class ChatLib { - public function addMessage($msg) { - $tokens = $this->tokenize($msg->text_content); + public function addMessage(ChatLog $msg) { + $this->addText($msg->text_content); + } + + public function addText($text) { + $tokens = $this->tokenize($text); if (empty($tokens)) return; $tokens[] = ''; foreach ($tokens as $num => $token) { if ($num === 0) { $this->addTransition([], $token); } else { - $start = max(0, $num - $this->size); + $start = max(0, $num - $this->size - 1); $end = $num; for ($i = $start; $i < $end; ++$i) { $this->addTransition(array_slice($tokens, $i, $end - $i), $token); - if ($end - $i < 3) break; + if ($end - $i < 5) break; } } } @@ -29,21 +35,34 @@ class ChatLib { unset($this->transitions[$key]); } } - echo 'size: ', number_format(strlen(json_encode($this->transitions)), 0), PHP_EOL; } public function generate($limit = 100) { - $tokens = []; + $tokens = ['']; $generated = ''; while (strlen($generated) < $limit) { $next = $this->randomNext($tokens); - if (empty($next)) break; + if ($next === '') break; $tokens[] = $next; $generated .= $next; } return $generated; } + public function saveAs($name) { + $data = [ + 'size' => $this->size, + 'transitions' => $this->transitions, + ]; + Storage::disk('chatlib')->put($name.'.json', json_encode($data)); + } + + public function loadFrom($name) { + $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true); + $this->size = $data['size']; + $this->transitions = $data['transitions']; + } + private function index($arr) { $result = []; $sum = 0; @@ -56,11 +75,13 @@ class ChatLib { if (is_array(end($entry['examples']))) { // already processed $examples = $entry['examples']; + } else if ($key === ' ') { + $examples = [[' ', 0, 1]]; } else { $subsum = 0; foreach ($entry['examples'] as $example => $subweight) { $sublower = $subsum; - $subsum += $subweight * $subweight; + $subsum += $subweight; $examples[] = [$example, $sublower, $subsum]; } } @@ -71,8 +92,8 @@ class ChatLib { private function randomNext($tokens) { $cnt = count($tokens); - for ($size = min($this->size, $cnt); $size >= 0; --$size) { - $cmb = $this->generalize(array_slice($tokens, $cnt - $size, $size)); + for ($size = min($this->size, $cnt); $size > 0; --$size) { + $cmb = $this->generalize(array_slice($tokens, -$size)); if (isset($this->transitions[$cmb])) { $pick = $this->pick($this->transitions[$cmb]); if (!is_null($pick)) { @@ -132,15 +153,21 @@ class ChatLib { } private function tokenize($str) { - return array_values(array_filter(preg_split('/\b/u', $str))); + return array_values(array_filter(preg_split('/\b/u', $str), function($token) { + if (empty($token)) return false; + if (preg_match('/cheer\d+/u', strtolower($token))) return false; + return true; + })); } private function generalize($tokens) { $str = ''; foreach ($tokens as $token) { $replaced = preg_replace('/\d+/', '0', $token); + $replaced = preg_replace('/\s+/', ' ', $token); + $replaced = preg_replace('/(.)\1{2,}/', '$1', $token); $replaced = strtolower($replaced); - $str .= empty($replaced) ? $token : $replaced; + $str .= $replaced; } return $str; } @@ -150,7 +177,7 @@ class ChatLib { return $example[0]; } - private $size = 5; + private $size = 7; private $transitions = []; }