X-Git-Url: https://git.localhorst.tv/?a=blobdiff_plain;f=app%2FModels%2FChatLib.php;h=d6a7ffcbbe7906bef5ae3e25b40cdf0e080cad71;hb=1dea58cb6fa9cf28966e75c1e1af87f67e6c0fd1;hp=c7a19b6998d7acafc09e4fefb2b708ba8087f02f;hpb=5df7f3ed455c56370b3c4b690b0ca47b183fa829;p=alttp.git diff --git a/app/Models/ChatLib.php b/app/Models/ChatLib.php index c7a19b6..d6a7ffc 100644 --- a/app/Models/ChatLib.php +++ b/app/Models/ChatLib.php @@ -6,79 +6,65 @@ use Illuminate\Support\Facades\Storage; class ChatLib { - public function __construct($size = 6) { + public function __construct($size = 3) { $this->size = $size; - $converted = []; foreach ($this->categories as $category => $patterns) { $converted_patterns = []; foreach ($patterns as $pattern) { $converted_patterns[] = '/\b'.$pattern.'\b/u'; } - $converted['%'.strtoupper($category).'%'] = $converted_patterns; + $converted[strtoupper($category)] = $converted_patterns; } $this->categories = $converted; } - public function addMessage(ChatLog $msg) { - $this->addText($msg->text_content); + public function addMessage(ChatLog $msg, ChatLog $previous = null) { + if ($msg->isReply()) { + $this->addText($msg->text_content, $msg->getReplyParent()); + } else if (!is_null($previous)) { + $this->addText($msg->text_content, $previous->text_content); + } else { + $this->addText($msg->text_content); + } } - public function addText($text) { + public function addText($text, $context = '') { $tokens = $this->tokenize($text); - if (empty($tokens)) return; - $tokens[] = ''; - foreach ($tokens as $num => $token) { - if ($num === 0) { - $this->addTransition([], $token); - } else { - $start = max(0, $num - $this->size - 1); - $end = $num; - for ($i = $start; $i < $end; ++$i) { - $this->addTransition(array_slice($tokens, $i, $end - $i), $token); - if ($end - $i < 5) break; - } + for ($i = 0; $i < count($tokens) - $this->size; ++$i) { + $this->addTransition(array_slice($tokens, $i, $this->size), $tokens[$i + $this->size]); + } + if (!empty($context)) { + $tokens = $this->tokenizeWithContext($text, $context); + $size = min($this->size - 1, count($tokens) - $this->size); + for ($i = 0; $i < $size; ++$i) { + $this->addTransition(array_slice($tokens, $i, $this->size), $tokens[$i + $this->size]); } - $this->addExample(array_slice($tokens, 0, $num), $token); } } public function compile() { foreach ($this->transitions as $key => $values) { - $this->transitions[$key] = $this->index($values, 2); - if (empty($this->transitions[$key])) { - unset($this->transitions[$key]); - } - } - foreach ($this->examples as $key => $values) { - if (in_array($key, ['', ' '])) { - unset($this->examples[$key]); - continue; - } - $this->examples[$key] = $this->index($values, 1); - if (empty($this->examples[$key]) || (count($this->examples[$key]) === 1 && $this->examples[$key][0][0] === $key)) { - unset($this->examples[$key]); - } + $this->transitions[$key] = $this->index($values); } } - public function generate($limit = 100) { - $tokens = ['']; - $generated = ''; - while (strlen($generated) < $limit) { - $next = $this->randomNext($tokens); - if ($next === '') break; - $tokens[] = $next; - $generated .= $next; + public function generate($context = null) { + if (!is_null($context)) { + $tokens = $this->tokenizeWithContext('', $context); + $generated = $this->loop($tokens); + if (!empty($generated)) { + return $generated; + } } - return $generated; + $tokens = $this->tokenize(''); + return $this->loop($tokens); } public function saveAs($name) { $data = [ 'size' => $this->size, 'transitions' => $this->transitions, - 'examples' => $this->examples, ]; Storage::disk('chatlib')->put($name.'.json', json_encode($data)); } @@ -87,14 +73,12 @@ class ChatLib { $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true); $this->size = $data['size']; $this->transitions = $data['transitions']; - $this->examples = $data['examples']; } - private function index($arr, $min_weight = 2) { + private function index($arr) { $result = []; $sum = 0; foreach ($arr as $key => $weight) { - if ($weight < $min_weight) continue; $lower = $sum; $sum += $weight; $result[] = [$key, $lower, $sum]; @@ -102,18 +86,20 @@ class ChatLib { return $result; } - private function randomNext($tokens) { - $cnt = count($tokens); - for ($size = min($this->size, $cnt); $size > 0; --$size) { - $cmb = $this->generalize(array_slice($tokens, -$size)); - if (isset($this->transitions[$cmb])) { - $pick = $this->pick($this->transitions[$cmb]); - if (!is_null($pick)) { - return $this->exampleOf($pick, $tokens); - } - } + private function loop($tokens) { + while (count($tokens) < 50) { + $next = $this->randomNext($tokens); + if ($next === ' ') break; + $tokens[] = $next; } - return ''; + return $this->untokenize($tokens); + } + + private function randomNext($tokens) { + $key = $this->makeKey($tokens); + if (!isset($this->transitions[$key])) return ' '; + $pick = $this->pick($this->transitions[$key]); + return $pick[0]; } private function pick($options) { @@ -142,84 +128,62 @@ class ChatLib { return $options[$min_index]; } - private function addTransition($state, $next) { - $ctx = $this->generalize($state); - $cmb = $this->generalize([$next]); - if (!isset($this->transitions[$ctx])) { - $this->transitions[$ctx] = []; + private function addTransition($tokens, $next) { + $key = $this->makeKey($tokens); + if (!isset($this->transitions[$key])) { + $this->transitions[$key] = []; } - if (!isset($this->transitions[$ctx][$cmb])) { - $this->transitions[$ctx][$cmb] = 1; + if (!isset($this->transitions[$key][$next])) { + $this->transitions[$key][$next] = 1; } else { - ++$this->transitions[$ctx][$cmb]; + ++$this->transitions[$key][$next]; } } - private function addExample($context, $token) { - $cmb = $this->generalize([$token]); - if (!isset($this->examples[$cmb])) { - $this->examples[$cmb] = []; - } - if (!isset($this->examples[$cmb][$token])) { - $this->examples[$cmb][$token] = 1; - } else { - ++$this->examples[$cmb][$token]; + private function splitText($text) { + if (trim($text) === '') return []; + return preg_split('/\s+/u', $text); + } + + private function makeKey($tokens) { + $key = $this->joinText(array_slice($tokens, $this->size * -1)); + $key = mb_strtolower($key); + $key = str_replace(['.', ',', ':', ';', '!', '?', '^', '+', '-', '"', "'", '(', ')', '[', ']'], '', $key); + $key = preg_replace('/\d+/u', '0', $key); + foreach ($this->categories as $category => $patterns) { + $key = preg_replace($patterns, $category, $key); } + return $key; } - private function tokenize($str) { - return array_values(array_filter(preg_split('/\b/u', $str), function($token) { - if ($token === '') return false; - if (preg_match('/cheer\d+/u', strtolower($token))) return false; - return true; - })); + private function joinText($tokens) { + return implode(' ', $tokens); } - private function generalize($tokens) { - $str = ''; - foreach ($tokens as $token) { - $replaced = preg_replace('/\d+/u', '0', $token); - $replaced = preg_replace('/\s+/u', ' ', $replaced); - $replaced = preg_replace('/(.)\1{2,}/u', '$1$1', $replaced); - $replaced = strtolower($replaced); - foreach ($this->aliases as $canonical => $variants) { - if (in_array($replaced, $variants)) { - $replaced = $canonical; - break; - } - if ($replaced === $canonical) { - break; - } - } - $str .= $replaced; - } - foreach ($this->categories as $category => $patterns) { - $str = preg_replace($patterns, $category, $str); - } - return $str; + private function untokenize($tokens) { + return $this->joinText(array_slice($tokens, $this->size)); } - private function exampleOf($pick, $context) { - if (!isset($this->examples[$pick[0]])) { - return $pick[0]; + private function tokenize($text) { + $tokens = $this->splitText($text); + $combined = array_merge(array_fill(0, $this->size, ' '), $tokens); + if (!empty($tokens)) { + $combined[] = ' '; } - if (isset($this->examples[$pick[0]])) { - $example = $this->pick($this->examples[$pick[0]]); - return $example[0]; + return $combined; + } + + private function tokenizeWithContext($text, $context) { + $combined = $this->tokenize($text); + $context_tokens = array_slice($this->splitText($context), $this->size * -1 + 1); + for ($i = 0; $i < count($context_tokens); ++$i) { + $combined[$this->size - $i - 2] = $context_tokens[count($context_tokens) - $i - 1]; } - return $pick[0]; + return $combined; } private $size; private $transitions = []; - private $examples = []; - - private $aliases = [ - 'chest' => ['kiste'], - 'einen' => ['n', 'nen'], - 'musik' => ['mukke'], - 'schade' => ['schad', 'schaade'], - ]; private $categories = [ 'fail' => [