5 use Illuminate\Support\Facades\Storage;
9 public function __construct($size = 7) {
13 foreach ($this->categories as $category => $patterns) {
14 $converted_patterns = [];
15 foreach ($patterns as $pattern) {
16 $converted_patterns[] = '/\b'.$pattern.'\b/u';
18 $converted['%'.strtoupper($category).'%'] = $converted_patterns;
20 $this->categories = $converted;
23 public function addMessage(ChatLog $msg) {
24 $this->addText($msg->text_content);
27 public function addText($text) {
28 $tokens = $this->tokenize($text);
29 if (empty($tokens)) return;
31 foreach ($tokens as $num => $token) {
33 $this->addTransition([], $token);
35 $start = max(0, $num - $this->size - 1);
37 for ($i = $start; $i < $end; ++$i) {
38 $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
39 if ($end - $i < 5) break;
45 public function compile() {
46 foreach ($this->transitions as $key => $value) {
47 $this->transitions[$key] = $this->index($this->transitions[$key]);
48 if (empty($this->transitions[$key])) {
49 unset($this->transitions[$key]);
54 public function generate($limit = 100) {
57 while (strlen($generated) < $limit) {
58 $next = $this->randomNext($tokens);
59 if ($next === '') break;
66 public function saveAs($name) {
68 'size' => $this->size,
69 'transitions' => $this->transitions,
71 Storage::disk('chatlib')->put($name.'.json', json_encode($data));
74 public function loadFrom($name) {
75 $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true);
76 $this->size = $data['size'];
77 $this->transitions = $data['transitions'];
80 private function index($arr) {
83 foreach ($arr as $key => $entry) {
85 if ($weight == 1) continue;
90 $examples = [[' ', 0, 1]];
93 foreach ($entry[1] as $example => $subweight) {
95 $subsum += $subweight;
96 $examples[] = [$example, $sublower, $subsum];
99 $result[] = [$key, $lower, $sum, $examples];
104 private function randomNext($tokens) {
105 $cnt = count($tokens);
106 for ($size = min($this->size, $cnt); $size > 0; --$size) {
107 $cmb = $this->generalize(array_slice($tokens, -$size));
108 if (isset($this->transitions[$cmb])) {
109 $pick = $this->pick($this->transitions[$cmb]);
110 if (!is_null($pick)) {
111 return $this->exampleOf($pick);
118 private function pick($options) {
119 if (empty($options)) return null;
120 $max = end($options)[2] - 1;
121 $num = random_int(0, $max);
122 return static::search($options, $num);
125 public static function search($options, $num) {
127 $max_index = count($options) - 1;
128 while ($min_index < $max_index) {
129 $cur_index = intval(($min_index + $max_index) / 2);
130 $cur_low = $options[$cur_index][1];
131 $cur_high = $options[$cur_index][2] - 1;
132 if ($cur_low > $num) {
133 $max_index = $cur_index;
134 } else if ($cur_high < $num) {
135 $min_index = $cur_index + 1;
137 $min_index = $cur_index;
141 return $options[$min_index];
144 private function addTransition($state, $next) {
145 $cmb = $this->generalize($state);
146 if (!isset($this->transitions[$cmb])) {
147 $this->transitions[$cmb] = [];
149 $this->increment($this->transitions[$cmb], $next);
152 private function increment(&$which, $token) {
153 $generalized = $this->generalize([$token]);
154 if (!isset($which[$generalized])) {
155 $which[$generalized] = [
159 $which[$generalized][1][$token] = 1;
161 ++$which[$generalized][0];
162 if (!isset($which[$generalized][1][$token])) {
163 $which[$generalized][1][$token] = 1;
165 ++$which[$generalized][1][$token];
170 private function tokenize($str) {
171 return array_values(array_filter(preg_split('/\b/u', $str), function($token) {
172 if ($token === '') return false;
173 if (preg_match('/cheer\d+/u', strtolower($token))) return false;
178 private function generalize($tokens) {
180 foreach ($tokens as $token) {
181 $replaced = preg_replace('/\d+/u', '0', $token);
182 $replaced = preg_replace('/\s+/u', ' ', $replaced);
183 $replaced = preg_replace('/(.)\1{2,}/u', '$1$1', $replaced);
184 $replaced = strtolower($replaced);
185 foreach ($this->aliases as $canonical => $variants) {
186 if (in_array($replaced, $variants)) {
187 $replaced = $canonical;
190 if ($replaced === $canonical) {
196 foreach ($this->categories as $category => $patterns) {
197 $str = preg_replace($patterns, $category, $str);
202 private function exampleOf($pick) {
203 $example = $this->pick($pick[3]);
208 private $transitions = [];
211 'chest' => ['kiste'],
212 'einen' => ['n', 'nen'],
213 'musik' => ['mukke'],
214 'schade' => ['schad', 'schaade'],
217 private $categories = [
220 'holysm0notlikethis',
300 'goat(buster|ie?|y)?',
397 'thieve\'?s\'? ?town',
403 '(big|small|retro|generic) ?keys?',
417 '(gloves?|mitts|handschuhe?)',
418 '(half|quarter) ?magic',
434 '(red|green|blue) ?(goo|potion)',
435 '(red|green|blue|baby) ?mail',
436 '(red|blue|bu|boo|good|bad|both)merang',
438 '(gro(ss|ß)er? |kleiner? )?schlüssel',
449 'bumper( cave)?( ledge)?',
450 '(hyrule)? ?castle ?(tower)?',
458 '((back|front) of )?escape',
463 '(light|dark) ?world',
466 '(dark )?(death )?mountain',
468 'pyramid( fairy)?( ledge)?',