namespace App\Models;
+use Illuminate\Support\Facades\Storage;
+
class ChatLib {
- public function addMessage($msg) {
- $tokens = array_values(array_filter(preg_split('/\s+/u', $msg->text_content)));
+ public function __construct($size = 6) {
+ $this->size = $size;
+
+ $converted = [];
+ foreach ($this->categories as $category => $patterns) {
+ $converted_patterns = [];
+ foreach ($patterns as $pattern) {
+ $converted_patterns[] = '/\b'.$pattern.'\b/u';
+ }
+ $converted['%'.strtoupper($category).'%'] = $converted_patterns;
+ }
+ $this->categories = $converted;
+ }
+
+ public function addMessage(ChatLog $msg) {
+ $this->addText($msg->text_content);
+ }
+
+ public function addText($text) {
+ $tokens = $this->tokenize($text);
if (empty($tokens)) return;
- $tokens [] = '';
+ $tokens[] = '';
foreach ($tokens as $num => $token) {
if ($num === 0) {
- $this->addStart($token);
- }
- if ($num > 0) {
- $this->addOne($tokens[$num - 1], $token);
- }
- if ($num > 1) {
- $this->addTwo($tokens[$num - 2], $tokens[$num - 1], $token);
- }
- if ($num > 2) {
- $this->addThree($tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
- }
- if ($num > 3) {
- $this->addFour($tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
- }
- if ($num > 4) {
- $this->addFive($tokens[$num - 5], $tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
+ $this->addTransition([], $token);
+ } else {
+ $start = max(0, $num - $this->size - 1);
+ $end = $num;
+ for ($i = $start; $i < $end; ++$i) {
+ $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
+ if ($end - $i < 5) break;
+ }
}
+ $this->addExample(array_slice($tokens, 0, $num), $token);
}
}
public function compile() {
- $this->start = $this->index($this->start);
- foreach ($this->one as $key => $value) {
- $this->one[$key] = $this->index($this->one[$key]);
- if (empty($this->one[$key])) {
- unset($this->one[$key]);
- }
- }
- foreach ($this->two as $key => $value) {
- $this->two[$key] = $this->index($this->two[$key]);
- if (empty($this->two[$key])) {
- unset($this->two[$key]);
+ foreach ($this->transitions as $key => $values) {
+ $this->transitions[$key] = $this->index($values, 2);
+ if (empty($this->transitions[$key])) {
+ unset($this->transitions[$key]);
}
}
- foreach ($this->three as $key => $value) {
- $this->three[$key] = $this->index($this->three[$key]);
- if (empty($this->three[$key])) {
- unset($this->three[$key]);
+ foreach ($this->examples as $key => $values) {
+ if (in_array($key, ['', ' '])) {
+ unset($this->examples[$key]);
+ continue;
}
- }
- foreach ($this->four as $key => $value) {
- $this->four[$key] = $this->index($this->four[$key]);
- if (empty($this->four[$key])) {
- unset($this->four[$key]);
- }
- }
- foreach ($this->five as $key => $value) {
- $this->five[$key] = $this->index($this->five[$key]);
- if (empty($this->five[$key])) {
- unset($this->five[$key]);
+ $this->examples[$key] = $this->index($values, 1);
+ if (empty($this->examples[$key]) || (count($this->examples[$key]) === 1 && $this->examples[$key][0][0] === $key)) {
+ unset($this->examples[$key]);
}
}
}
- public function generate($limit = 75) {
- $tokens = [];
- $start = $this->randomStart();
- $tokens[] = $start;
- $generated = $start;
+ public function generate($limit = 100) {
+ $tokens = [''];
+ $generated = '';
while (strlen($generated) < $limit) {
$next = $this->randomNext($tokens);
- if (empty($next)) break;
+ if ($next === '') break;
$tokens[] = $next;
- $generated .= ' '.$next;
+ $generated .= $next;
}
return $generated;
}
- private function index($arr) {
+ public function saveAs($name) {
+ $data = [
+ 'size' => $this->size,
+ 'transitions' => $this->transitions,
+ 'examples' => $this->examples,
+ ];
+ Storage::disk('chatlib')->put($name.'.json', json_encode($data));
+ }
+
+ public function loadFrom($name) {
+ $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true);
+ $this->size = $data['size'];
+ $this->transitions = $data['transitions'];
+ $this->examples = $data['examples'];
+ }
+
+ private function index($arr, $min_weight = 2) {
$result = [];
$sum = 0;
- foreach ($arr as $key => $entry) {
- $weight = $entry['count'];
- if ($weight == 1) continue;
+ foreach ($arr as $key => $weight) {
+ if ($weight < $min_weight) continue;
$lower = $sum;
- $sum += intval(pow($weight, 1.4));
- $examples = [];
- if (is_array(end($entry['examples']))) {
- // already processed
- $examples = $entry['examples'];
- } else {
- $subsum = 0;
- foreach ($entry['examples'] as $example => $subweight) {
- $sublower = $subsum;
- $subsum += $subweight * $subweight;
- $examples[] = [$example, $sublower, $subsum];
- }
- }
- $result[] = [$key, $lower, $sum, $examples];
+ $sum += $weight;
+ $result[] = [$key, $lower, $sum];
}
return $result;
}
- private function randomStart() {
- $pick = $this->pick($this->start);
- if (is_null($pick)) return '';
- return $this->exampleOf($pick);
- }
-
private function randomNext($tokens) {
$cnt = count($tokens);
- $picks = [];
- if ($cnt >= 5) {
- $cmb = $this->generalize(array_slice($tokens, $cnt - 5, 5));
- if (isset($this->five[$cmb])) {
- $pick = $this->pick($this->five[$cmb]);
+ for ($size = min($this->size, $cnt); $size > 0; --$size) {
+ $cmb = $this->generalize(array_slice($tokens, -$size));
+ if (isset($this->transitions[$cmb])) {
+ $pick = $this->pick($this->transitions[$cmb]);
if (!is_null($pick)) {
- $picks[$pick[0]] = [
- 'count' => 10,
- 'examples' => $pick[3],
- ];
+ return $this->exampleOf($pick, $tokens);
}
}
}
- if ($cnt >= 4) {
- $cmb = $this->generalize(array_slice($tokens, $cnt - 4, 4));
- if (isset($this->four[$cmb])) {
- $pick = $this->pick($this->four[$cmb]);
- if (!is_null($pick)) {
- $picks[$pick[0]] = [
- 'count' => 12,
- 'examples' => $pick[3],
- ];
- }
- }
- }
- if ($cnt >= 3) {
- $cmb = $this->generalize(array_slice($tokens, $cnt - 3, 3));
- if (isset($this->three[$cmb])) {
- $pick = $this->pick($this->three[$cmb]);
- if (!is_null($pick)) {
- $picks[$pick[0]] = [
- 'count' => 14,
- 'examples' => $pick[3],
- ];
- }
- }
- }
- if ($cnt >= 2) {
- $cmb = $this->generalize(array_slice($tokens, $cnt - 2, 2));
- if (isset($this->two[$cmb])) {
- $pick = $this->pick($this->two[$cmb]);
- if (!is_null($pick)) {
- $picks[$pick[0]] = [
- 'count' => 4,
- 'examples' => $pick[3],
- ];
- }
- }
- }
- if ($cnt >= 1) {
- $cmb = $this->generalize(array_slice($tokens, $cnt - 1, 1));
- if (isset($this->one[$cmb])) {
- $pick = $this->pick($this->one[$cmb]);
- if (!is_null($pick)) {
- $picks[$pick[0]] = [
- 'count' => 2,
- 'examples' => $pick[3],
- ];
- }
- }
- }
- if (empty($picks)) return '';
- $picks = $this->index($picks);
- $pick = $this->pick($picks);
- return $this->exampleOf($pick);
+ return '';
}
private function pick($options) {
if (empty($options)) return null;
- $max = end($options)[2];
+ $max = end($options)[2] - 1;
$num = random_int(0, $max);
+ return static::search($options, $num);
+ }
+
+ public static function search($options, $num) {
$min_index = 0;
$max_index = count($options) - 1;
while ($min_index < $max_index) {
$cur_index = intval(($min_index + $max_index) / 2);
$cur_low = $options[$cur_index][1];
- $cur_high = $options[$cur_index][2];
+ $cur_high = $options[$cur_index][2] - 1;
if ($cur_low > $num) {
$max_index = $cur_index;
} else if ($cur_high < $num) {
return $options[$min_index];
}
- private function addStart($token) {
- if (empty($token)) return;
- $this->increment($this->start, $token);
- }
-
- private function addOne($one, $token) {
- $cmb = $this->generalize([$one]);
- if (!isset($this->one[$cmb])) {
- $this->one[$cmb] = [];
+ private function addTransition($state, $next) {
+ $ctx = $this->generalize($state);
+ $cmb = $this->generalize([$next]);
+ if (!isset($this->transitions[$ctx])) {
+ $this->transitions[$ctx] = [];
}
- $this->increment($this->one[$cmb], $token);
- }
-
- private function addTwo($one, $two, $token) {
- $cmb = $this->generalize([$one, $two]);
- if (!isset($this->two[$cmb])) {
- $this->two[$cmb] = [];
- }
- $this->increment($this->two[$cmb], $token);
- }
-
- private function addThree($one, $two, $three, $token) {
- $cmb = $this->generalize([$one, $two, $three]);
- if (!isset($this->three[$cmb])) {
- $this->three[$cmb] = [];
+ if (!isset($this->transitions[$ctx][$cmb])) {
+ $this->transitions[$ctx][$cmb] = 1;
+ } else {
+ ++$this->transitions[$ctx][$cmb];
}
- $this->increment($this->three[$cmb], $token);
}
- private function addFour($one, $two, $three, $four, $token) {
- $cmb = $this->generalize([$one, $two, $three, $four]);
- if (!isset($this->four[$cmb])) {
- $this->four[$cmb] = [];
+ private function addExample($context, $token) {
+ $cmb = $this->generalize([$token]);
+ if (!isset($this->examples[$cmb])) {
+ $this->examples[$cmb] = [];
}
- $this->increment($this->four[$cmb], $token);
- }
-
- private function addFive($one, $two, $three, $four, $five, $token) {
- $cmb = $this->generalize([$one, $two, $three, $four, $five]);
- if (!isset($this->five[$cmb])) {
- $this->five[$cmb] = [];
+ if (!isset($this->examples[$cmb][$token])) {
+ $this->examples[$cmb][$token] = 1;
+ } else {
+ ++$this->examples[$cmb][$token];
}
- $this->increment($this->five[$cmb], $token);
}
- private function increment(&$which, $token) {
- $generalized = $this->generalize([$token]);
- if (!isset($which[$generalized])) {
- $which[$generalized] = [
- 'count' => 1,
- 'examples' => [],
- ];
- $which[$generalized]['examples'][$token] = 1;
- } else {
- ++$which[$generalized]['count'];
- if (!isset($which[$generalized]['examples'][$token])) {
- $which[$generalized]['examples'][$token] = 1;
- } else {
- ++$which[$generalized]['examples'][$token];
- }
- }
+ private function tokenize($str) {
+ return array_values(array_filter(preg_split('/\b/u', $str), function($token) {
+ if ($token === '') return false;
+ if (preg_match('/cheer\d+/u', strtolower($token))) return false;
+ return true;
+ }));
}
private function generalize($tokens) {
$str = '';
foreach ($tokens as $token) {
- $replaced = preg_replace('/\W/u', '', $token);
- $replaced = preg_replace('/\d+/', '0', $replaced);
- $replaced = strtolower(trim($replaced));
- $str .= empty($replaced) ? $token : $replaced;
+ $replaced = preg_replace('/\d+/u', '0', $token);
+ $replaced = preg_replace('/\s+/u', ' ', $replaced);
+ $replaced = preg_replace('/(.)\1{2,}/u', '$1$1', $replaced);
+ $replaced = strtolower($replaced);
+ foreach ($this->aliases as $canonical => $variants) {
+ if (in_array($replaced, $variants)) {
+ $replaced = $canonical;
+ break;
+ }
+ if ($replaced === $canonical) {
+ break;
+ }
+ }
+ $str .= $replaced;
+ }
+ foreach ($this->categories as $category => $patterns) {
+ $str = preg_replace($patterns, $category, $str);
}
return $str;
}
- private function exampleOf($pick) {
- $example = $this->pick($pick[3]);
- return $example[0];
+ private function exampleOf($pick, $context) {
+ if (!isset($this->examples[$pick[0]])) {
+ return $pick[0];
+ }
+ if (isset($this->examples[$pick[0]])) {
+ $example = $this->pick($this->examples[$pick[0]]);
+ return $example[0];
+ }
+ return $pick[0];
}
- private $start = [];
- private $one = [];
- private $two = [];
- private $three = [];
- private $four = [];
- private $five = [];
+ private $size;
+ private $transitions = [];
+ private $examples = [];
+
+ private $aliases = [
+ 'chest' => ['kiste'],
+ 'einen' => ['n', 'nen'],
+ 'musik' => ['mukke'],
+ 'schade' => ['schad', 'schaade'],
+ ];
+
+ private $categories = [
+ 'fail' => [
+ 'failfish',
+ 'holysm0notlikethis',
+ 'notlikethis',
+ 'tetobridge0',
+ 'vinter0clown',
+ ],
+
+ 'hype' => [
+ 'dergoaparty',
+ 'dinodance',
+ 'elemen0party',
+ 'muftaahype',
+ 'luckwuhype',
+ 'olliwahype',
+ 'osora0umbrihype',
+ 'partyhat',
+ 'peepocheer',
+ 'rei0hype',
+ 'sakayahype',
+ 'tetotroete',
+ 'ticknaboargeil0',
+ 'ticknahype0',
+ ],
+
+ 'kappa' => [
+ 'kappa(claus|hd)?',
+ ],
+
+ 'jam' => [
+ '(cat|dog|rat)jam',
+ 'kanash0jam',
+ 'rei0jamers',
+ 'samusdance',
+ ],
+
+ 'lol' => [
+ ':d',
+ 'boothi0lul',
+ 'kekw',
+ 'lol',
+ 'lul',
+ 'rei0lul',
+ 'samusgrin',
+ 'ticknaauslachen',
+ 'xd',
+ ],
+
+ 'love' => [
+ '<3',
+ 'duden0love',
+ 'exec0love',
+ 'krawal0heart',
+ 'lodanzhug',
+ 'luckwulove',
+ 'luvsign',
+ 'muftaal',
+ 'osora0love',
+ 'peepoexcitedhug',
+ 'spirit0love',
+ 'svenkalove',
+ 'ticknaherz',
+ ],
+
+ 'name' => [
+ 'baba',
+ 'baka',
+ 'bobe?r',
+ 'brog(i|or)',
+ 'cfate',
+ 'danny',
+ 'danzi+',
+ 'daruck',
+ 'dennsen',
+ 'dimez',
+ 'divi',
+ 'dud(en|i+)',
+ 'ele',
+ 'eri(ror)?',
+ '(name)?faker',
+ 'fetti+',
+ 'gamma(chuu)?',
+ 'goat(buster|ie?|y)?',
+ 'hitsu(yan)?',
+ 'holy',
+ 'jem',
+ 'kala(marino)?',
+ 'kromb',
+ 'koval',
+ 'kum(i|o|p)',
+ 'lanux',
+ 'len(esha|chen)',
+ 'leya+',
+ 'magno',
+ 'malmo',
+ 'markam',
+ 'micha',
+ 'mimsy',
+ 'muf(fy|taay)',
+ 'murd(elizer|i+)',
+ 'nami',
+ 'nula',
+ 'onio',
+ 'paulinche',
+ 'phaaze',
+ 'ralen',
+ 'ramond',
+ 'ray(vis)?',
+ 'schulzer',
+ 'skunk(ner)?',
+ 'skipsy',
+ 'soli+',
+ 'sven(ka+)?',
+ 'tantalus',
+ 'teto',
+ 'thalanee?',
+ 'tick(i+|naldo|y+)',
+ 'tofu',
+ 'tr[i0]x+',
+ 'vin(nie?|ny|ter)',
+ 'xall',
+ 'yasi',
+ ],
+
+ 'pog' => [
+ 'bumble0Pog',
+ 'komodohype',
+ 'pog',
+ 'pogchamp',
+ 'poggers',
+ 'satono0pog',
+ ],
+
+ 'run' => [
+ 'dennsenboots',
+ 'lodanzrun',
+ 'ticknaldosprint',
+ 'vinter0run',
+ ],
+
+ 'wave' => [
+ 'dennsenhi',
+ 'dergoawave',
+ 'falcnwavehi',
+ 'heyguys',
+ 'holysm0heyguys',
+ 'muftaahey',
+ 'rei0wave',
+ 'sayuri0wave',
+ 'shindi0wave',
+ 'svenkawave',
+ 'wuschlwave',
+ ],
+
+ 'zb' => [
+ 'aga(hnim)?',
+ 'armos( knights)?',
+ 'arrghus',
+ 'blind',
+ 'ganon(dorf)?',
+ 'helma',
+ 'kholdstare',
+ 'lanmo(las)?',
+ 'moldorm',
+ 'mothula',
+ 'mott[ei]',
+ 'trinexx',
+ 'vit(reous|ty)',
+ ],
+
+ 'zd' => [
+ 'eastern',
+ 'desert( palace)?',
+ 'gt',
+ 'hera',
+ 'ice ?(palace)?',
+ '(misery )?mire',
+ 'pod',
+ 'skull ?woods',
+ 'swamp',
+ 'thieve\'?s\'? ?town',
+ 'tr',
+ 'tt',
+ ],
+
+ 'zi' => [
+ '(big|small|retro|generic) ?keys?',
+ 'b[oö]gen',
+ 'bombos',
+ 'boots',
+ 'bottle',
+ 'bows?',
+ 'bugnet',
+ 'byrna',
+ 'cape',
+ 'ether',
+ 'flasche',
+ 'flippers',
+ 'fl[uö]te',
+ 'frod',
+ '(gloves?|mitts|handschuhe?)',
+ '(half|quarter) ?magic',
+ 'hammer',
+ 'hookshot',
+ '(ice|fire) ?rod',
+ 'lampe?',
+ 'laser ?bridge',
+ 'mearl',
+ 'mirror',
+ 'moon ?pearl',
+ 'mushroom',
+ 'ocarina',
+ 'pilz',
+ 'powder',
+ 'puder',
+ 'quake',
+ '(red|blue) ?cane',
+ '(red|green|blue) ?(goo|potion)',
+ '(red|green|blue|baby) ?mail',
+ '(red|blue|bu|boo|good|bad|both)merang',
+ 'schaufel',
+ '(gro(ss|ß)er? |kleiner? )?schlüssel',
+ 'schwert',
+ 'shovel',
+ 'silvers',
+ 'somaria',
+ 'spiegel',
+ 'sword',
+ ],
+
+ 'zl' => [
+ 'big chest',
+ 'bumper( cave)?( ledge)?',
+ '(hyrule)? ?castle ?(tower)?',
+ 'catfish',
+ 'cave 0?',
+ 'chest ?game',
+ 'cutscene ?chest',
+ 'damm',
+ 'desert( ledge)?',
+ 'dig(ging)? ?game',
+ '((back|front) of )?escape',
+ 'gyl',
+ 'hobo',
+ 'hook ?(shot) cave',
+ 'lava ?chest',
+ '(light|dark) ?world',
+ 'lss',
+ 'magic bat',
+ '(dark )?(death )?mountain',
+ 'ped(estal)?',
+ 'pyramid( fairy)?( ledge)?',
+ 'red bomb',
+ 'sahasrahla',
+ 'sasha',
+ 'sick kid',
+ 'stumpy',
+ 'tile ?room',
+ 'torch',
+ 'zora( ledge)?',
+ ],
+ ];
}