X-Git-Url: https://git.localhorst.tv/?a=blobdiff_plain;ds=sidebyside;f=app%2FModels%2FChatLib.php;h=c7a19b6998d7acafc09e4fefb2b708ba8087f02f;hb=HEAD;hp=f4ab93f4798b28ebc87383b94f68e75728bbc7c9;hpb=e1e352a985c2da1cc0e4ad4716244261445a2604;p=alttp.git diff --git a/app/Models/ChatLib.php b/app/Models/ChatLib.php index f4ab93f..d6a7ffc 100644 --- a/app/Models/ChatLib.php +++ b/app/Models/ChatLib.php @@ -6,47 +6,59 @@ use Illuminate\Support\Facades\Storage; class ChatLib { - public function addMessage(ChatLog $msg) { - $this->addText($msg->text_content); + public function __construct($size = 3) { + $this->size = $size; + $converted = []; + foreach ($this->categories as $category => $patterns) { + $converted_patterns = []; + foreach ($patterns as $pattern) { + $converted_patterns[] = '/\b'.$pattern.'\b/u'; + } + $converted[strtoupper($category)] = $converted_patterns; + } + $this->categories = $converted; } - public function addText($text) { + public function addMessage(ChatLog $msg, ChatLog $previous = null) { + if ($msg->isReply()) { + $this->addText($msg->text_content, $msg->getReplyParent()); + } else if (!is_null($previous)) { + $this->addText($msg->text_content, $previous->text_content); + } else { + $this->addText($msg->text_content); + } + } + + public function addText($text, $context = '') { $tokens = $this->tokenize($text); - if (empty($tokens)) return; - $tokens[] = ''; - foreach ($tokens as $num => $token) { - if ($num === 0) { - $this->addTransition([], $token); - } else { - $start = max(0, $num - $this->size - 1); - $end = $num; - for ($i = $start; $i < $end; ++$i) { - $this->addTransition(array_slice($tokens, $i, $end - $i), $token); - if ($end - $i < 5) break; - } + for ($i = 0; $i < count($tokens) - $this->size; ++$i) { + $this->addTransition(array_slice($tokens, $i, $this->size), $tokens[$i + $this->size]); + } + if (!empty($context)) { + $tokens = $this->tokenizeWithContext($text, $context); + $size = min($this->size - 1, count($tokens) - $this->size); + for ($i = 0; $i < $size; ++$i) { + $this->addTransition(array_slice($tokens, $i, $this->size), $tokens[$i + $this->size]); } } } public function compile() { - foreach ($this->transitions as $key => $value) { - $this->transitions[$key] = $this->index($this->transitions[$key]); - if (empty($this->transitions[$key])) { - unset($this->transitions[$key]); - } + foreach ($this->transitions as $key => $values) { + $this->transitions[$key] = $this->index($values); } } - public function generate($limit = 100) { - $tokens = ['']; - $generated = ''; - while (strlen($generated) < $limit) { - $next = $this->randomNext($tokens); - if ($next === '') break; - $tokens[] = $next; - $generated .= $next; + public function generate($context = null) { + if (!is_null($context)) { + $tokens = $this->tokenizeWithContext('', $context); + $generated = $this->loop($tokens); + if (!empty($generated)) { + return $generated; + } } - return $generated; + $tokens = $this->tokenize(''); + return $this->loop($tokens); } public function saveAs($name) { @@ -66,54 +78,44 @@ class ChatLib { private function index($arr) { $result = []; $sum = 0; - foreach ($arr as $key => $entry) { - $weight = $entry['count']; - if ($weight == 1) continue; + foreach ($arr as $key => $weight) { $lower = $sum; $sum += $weight; - $examples = []; - if (is_array(end($entry['examples']))) { - // already processed - $examples = $entry['examples']; - } else if ($key === ' ') { - $examples = [[' ', 0, 1]]; - } else { - $subsum = 0; - foreach ($entry['examples'] as $example => $subweight) { - $sublower = $subsum; - $subsum += $subweight; - $examples[] = [$example, $sublower, $subsum]; - } - } - $result[] = [$key, $lower, $sum, $examples]; + $result[] = [$key, $lower, $sum]; } return $result; } - private function randomNext($tokens) { - $cnt = count($tokens); - for ($size = min($this->size, $cnt); $size > 0; --$size) { - $cmb = $this->generalize(array_slice($tokens, -$size)); - if (isset($this->transitions[$cmb])) { - $pick = $this->pick($this->transitions[$cmb]); - if (!is_null($pick)) { - return $this->exampleOf($pick); - } - } + private function loop($tokens) { + while (count($tokens) < 50) { + $next = $this->randomNext($tokens); + if ($next === ' ') break; + $tokens[] = $next; } - return ''; + return $this->untokenize($tokens); + } + + private function randomNext($tokens) { + $key = $this->makeKey($tokens); + if (!isset($this->transitions[$key])) return ' '; + $pick = $this->pick($this->transitions[$key]); + return $pick[0]; } private function pick($options) { if (empty($options)) return null; - $max = end($options)[2]; + $max = end($options)[2] - 1; $num = random_int(0, $max); + return static::search($options, $num); + } + + public static function search($options, $num) { $min_index = 0; $max_index = count($options) - 1; while ($min_index < $max_index) { $cur_index = intval(($min_index + $max_index) / 2); $cur_low = $options[$cur_index][1]; - $cur_high = $options[$cur_index][2]; + $cur_high = $options[$cur_index][2] - 1; if ($cur_low > $num) { $max_index = $cur_index; } else if ($cur_high < $num) { @@ -126,80 +128,62 @@ class ChatLib { return $options[$min_index]; } - private function addTransition($state, $next) { - $cmb = $this->generalize($state); - if (!isset($this->transitions[$cmb])) { - $this->transitions[$cmb] = []; + private function addTransition($tokens, $next) { + $key = $this->makeKey($tokens); + if (!isset($this->transitions[$key])) { + $this->transitions[$key] = []; } - $this->increment($this->transitions[$cmb], $next); - } - - private function increment(&$which, $token) { - $generalized = $this->generalize([$token]); - if (!isset($which[$generalized])) { - $which[$generalized] = [ - 'count' => 1, - 'examples' => [], - ]; - $which[$generalized]['examples'][$token] = 1; + if (!isset($this->transitions[$key][$next])) { + $this->transitions[$key][$next] = 1; } else { - ++$which[$generalized]['count']; - if (!isset($which[$generalized]['examples'][$token])) { - $which[$generalized]['examples'][$token] = 1; - } else { - ++$which[$generalized]['examples'][$token]; - } + ++$this->transitions[$key][$next]; } } - private function tokenize($str) { - return array_values(array_filter(preg_split('/\b/u', $str), function($token) { - if ($token === '') return false; - if (preg_match('/cheer\d+/u', strtolower($token))) return false; - return true; - })); + private function splitText($text) { + if (trim($text) === '') return []; + return preg_split('/\s+/u', $text); } - private function generalize($tokens) { - $str = ''; - foreach ($tokens as $token) { - $replaced = preg_replace('/\d+/u', '0', $token); - $replaced = preg_replace('/\s+/u', ' ', $replaced); - $replaced = preg_replace('/(.)\1{2,}/u', '$1$1', $replaced); - $replaced = strtolower($replaced); - foreach ($this->aliases as $canonical => $variants) { - if (in_array($replaced, $variants)) { - $replaced = $canonical; - break; - } - if ($replaced === $canonical) { - break; - } - } - $str .= $replaced; - } + private function makeKey($tokens) { + $key = $this->joinText(array_slice($tokens, $this->size * -1)); + $key = mb_strtolower($key); + $key = str_replace(['.', ',', ':', ';', '!', '?', '^', '+', '-', '"', "'", '(', ')', '[', ']'], '', $key); + $key = preg_replace('/\d+/u', '0', $key); foreach ($this->categories as $category => $patterns) { - foreach ($patterns as $pattern) { - $str = preg_replace('/\b'.$pattern.'\b/u', '%'.strtoupper($category).'%', $str); - } + $key = preg_replace($patterns, $category, $key); } - return $str; + return $key; } - private function exampleOf($pick) { - $example = $this->pick($pick[3]); - return $example[0]; + private function joinText($tokens) { + return implode(' ', $tokens); } - private $size = 7; - private $transitions = []; + private function untokenize($tokens) { + return $this->joinText(array_slice($tokens, $this->size)); + } - private $aliases = [ - 'chest' => ['kiste'], - 'einen' => ['n', 'nen'], - 'musik' => ['mukke'], - 'schade' => ['schad'], - ]; + private function tokenize($text) { + $tokens = $this->splitText($text); + $combined = array_merge(array_fill(0, $this->size, ' '), $tokens); + if (!empty($tokens)) { + $combined[] = ' '; + } + return $combined; + } + + private function tokenizeWithContext($text, $context) { + $combined = $this->tokenize($text); + $context_tokens = array_slice($this->splitText($context), $this->size * -1 + 1); + for ($i = 0; $i < count($context_tokens); ++$i) { + $combined[$this->size - $i - 2] = $context_tokens[count($context_tokens) - $i - 1]; + } + return $combined; + } + + private $size; + private $transitions = []; private $categories = [ 'fail' => [ @@ -345,6 +329,7 @@ class ChatLib { 'wave' => [ 'dennsenhi', 'dergoawave', + 'falcnwavehi', 'heyguys', 'holysm0heyguys', 'muftaahey', @@ -355,7 +340,7 @@ class ChatLib { 'wuschlwave', ], - 'zelda_boss' => [ + 'zb' => [ 'aga(hnim)?', 'armos( knights)?', 'arrghus', @@ -371,7 +356,7 @@ class ChatLib { 'vit(reous|ty)', ], - 'zelda_dungeon' => [ + 'zd' => [ 'eastern', 'desert( palace)?', 'gt', @@ -386,7 +371,7 @@ class ChatLib { 'tt', ], - 'zelda_item' => [ + 'zi' => [ '(big|small|retro|generic) ?keys?', 'b[oö]gen', 'bombos', @@ -431,7 +416,7 @@ class ChatLib { 'sword', ], - 'zelda_location' => [ + 'zl' => [ 'big chest', 'bumper( cave)?( ledge)?', '(hyrule)? ?castle ?(tower)?',