5 use Illuminate\Support\Facades\Storage;
9 public function __construct($size = 6) {
13 foreach ($this->categories as $category => $patterns) {
14 $converted_patterns = [];
15 foreach ($patterns as $pattern) {
16 $converted_patterns[] = '/\b'.$pattern.'\b/u';
18 $converted['%'.strtoupper($category).'%'] = $converted_patterns;
20 $this->categories = $converted;
23 public function addMessage(ChatLog $msg) {
24 $this->addText($msg->text_content);
27 public function addText($text) {
28 $tokens = $this->tokenize($text);
29 if (empty($tokens)) return;
31 foreach ($tokens as $num => $token) {
33 $this->addTransition([], $token);
35 $start = max(0, $num - $this->size - 1);
37 for ($i = $start; $i < $end; ++$i) {
38 $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
39 if ($end - $i < 5) break;
42 $this->addExample(array_slice($tokens, 0, $num), $token);
46 public function compile() {
47 foreach ($this->transitions as $key => $values) {
48 $this->transitions[$key] = $this->index($values, 2);
49 if (empty($this->transitions[$key])) {
50 unset($this->transitions[$key]);
53 foreach ($this->examples as $key => $values) {
54 if (in_array($key, ['', ' '])) {
55 unset($this->examples[$key]);
58 $this->examples[$key] = $this->index($values, 1);
59 if (empty($this->examples[$key]) || (count($this->examples[$key]) === 1 && $this->examples[$key][0][0] === $key)) {
60 unset($this->examples[$key]);
65 public function generate($limit = 100) {
68 while (strlen($generated) < $limit) {
69 $next = $this->randomNext($tokens);
70 if ($next === '') break;
77 public function saveAs($name) {
79 'size' => $this->size,
80 'transitions' => $this->transitions,
81 'examples' => $this->examples,
83 Storage::disk('chatlib')->put($name.'.json', json_encode($data));
86 public function loadFrom($name) {
87 $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true);
88 $this->size = $data['size'];
89 $this->transitions = $data['transitions'];
90 $this->examples = $data['examples'];
93 private function index($arr, $min_weight = 2) {
96 foreach ($arr as $key => $weight) {
97 if ($weight < $min_weight) continue;
100 $result[] = [$key, $lower, $sum];
105 private function randomNext($tokens) {
106 $cnt = count($tokens);
107 for ($size = min($this->size, $cnt); $size > 0; --$size) {
108 $cmb = $this->generalize(array_slice($tokens, -$size));
109 if (isset($this->transitions[$cmb])) {
110 $pick = $this->pick($this->transitions[$cmb]);
111 if (!is_null($pick)) {
112 return $this->exampleOf($pick, $tokens);
119 private function pick($options) {
120 if (empty($options)) return null;
121 $max = end($options)[2] - 1;
122 $num = random_int(0, $max);
123 return static::search($options, $num);
126 public static function search($options, $num) {
128 $max_index = count($options) - 1;
129 while ($min_index < $max_index) {
130 $cur_index = intval(($min_index + $max_index) / 2);
131 $cur_low = $options[$cur_index][1];
132 $cur_high = $options[$cur_index][2] - 1;
133 if ($cur_low > $num) {
134 $max_index = $cur_index;
135 } else if ($cur_high < $num) {
136 $min_index = $cur_index + 1;
138 $min_index = $cur_index;
142 return $options[$min_index];
145 private function addTransition($state, $next) {
146 $ctx = $this->generalize($state);
147 $cmb = $this->generalize([$next]);
148 if (!isset($this->transitions[$ctx])) {
149 $this->transitions[$ctx] = [];
151 if (!isset($this->transitions[$ctx][$cmb])) {
152 $this->transitions[$ctx][$cmb] = 1;
154 ++$this->transitions[$ctx][$cmb];
158 private function addExample($context, $token) {
159 $cmb = $this->generalize([$token]);
160 if (!isset($this->examples[$cmb])) {
161 $this->examples[$cmb] = [];
163 if (!isset($this->examples[$cmb][$token])) {
164 $this->examples[$cmb][$token] = 1;
166 ++$this->examples[$cmb][$token];
170 private function tokenize($str) {
171 return array_values(array_filter(preg_split('/\b/u', $str), function($token) {
172 if ($token === '') return false;
173 if (preg_match('/cheer\d+/u', strtolower($token))) return false;
178 private function generalize($tokens) {
180 foreach ($tokens as $token) {
181 $replaced = preg_replace('/\d+/u', '0', $token);
182 $replaced = preg_replace('/\s+/u', ' ', $replaced);
183 $replaced = preg_replace('/(.)\1{2,}/u', '$1$1', $replaced);
184 $replaced = strtolower($replaced);
185 foreach ($this->aliases as $canonical => $variants) {
186 if (in_array($replaced, $variants)) {
187 $replaced = $canonical;
190 if ($replaced === $canonical) {
196 foreach ($this->categories as $category => $patterns) {
197 $str = preg_replace($patterns, $category, $str);
202 private function exampleOf($pick, $context) {
203 if (!isset($this->examples[$pick[0]])) {
206 if (isset($this->examples[$pick[0]])) {
207 $example = $this->pick($this->examples[$pick[0]]);
214 private $transitions = [];
215 private $examples = [];
218 'chest' => ['kiste'],
219 'einen' => ['n', 'nen'],
220 'musik' => ['mukke'],
221 'schade' => ['schad', 'schaade'],
224 private $categories = [
227 'holysm0notlikethis',
307 'goat(buster|ie?|y)?',
405 'thieve\'?s\'? ?town',
411 '(big|small|retro|generic) ?keys?',
425 '(gloves?|mitts|handschuhe?)',
426 '(half|quarter) ?magic',
442 '(red|green|blue) ?(goo|potion)',
443 '(red|green|blue|baby) ?mail',
444 '(red|blue|bu|boo|good|bad|both)merang',
446 '(gro(ss|ß)er? |kleiner? )?schlüssel',
457 'bumper( cave)?( ledge)?',
458 '(hyrule)? ?castle ?(tower)?',
466 '((back|front) of )?escape',
471 '(light|dark) ?world',
474 '(dark )?(death )?mountain',
476 'pyramid( fairy)?( ledge)?',