]> git.localhorst.tv Git - alttp.git/blob - app/Models/ChatLib.php
further refine chat gen
[alttp.git] / app / Models / ChatLib.php
1 <?php
2
3 namespace App\Models;
4
5 use Illuminate\Support\Facades\Storage;
6
7 class ChatLib {
8
9         public function addMessage(ChatLog $msg) {
10                 $this->addText($msg->text_content);
11         }
12
13         public function addText($text) {
14                 $tokens = $this->tokenize($text);
15                 if (empty($tokens)) return;
16                 $tokens[] = '';
17                 foreach ($tokens as $num => $token) {
18                         if ($num === 0) {
19                                 $this->addTransition([], $token);
20                         } else {
21                                 $start = max(0, $num - $this->size - 1);
22                                 $end = $num;
23                                 for ($i = $start; $i < $end; ++$i) {
24                                         $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
25                                         if ($end - $i < 5) break;
26                                 }
27                         }
28                 }
29         }
30
31         public function compile() {
32                 foreach ($this->transitions as $key => $value) {
33                         $this->transitions[$key] = $this->index($this->transitions[$key]);
34                         if (empty($this->transitions[$key])) {
35                                 unset($this->transitions[$key]);
36                         }
37                 }
38         }
39
40         public function generate($limit = 100) {
41                 $tokens = [''];
42                 $generated = '';
43                 while (strlen($generated) < $limit) {
44                         $next = $this->randomNext($tokens);
45                         if ($next === '') break;
46                         $tokens[] = $next;
47                         $generated .= $next;
48                 }
49                 return $generated;
50         }
51
52         public function saveAs($name) {
53                 $data = [
54                         'size' => $this->size,
55                         'transitions' => $this->transitions,
56                 ];
57                 Storage::disk('chatlib')->put($name.'.json', json_encode($data));
58         }
59
60         public function loadFrom($name) {
61                 $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true);
62                 $this->size = $data['size'];
63                 $this->transitions = $data['transitions'];
64         }
65
66         private function index($arr) {
67                 $result = [];
68                 $sum = 0;
69                 foreach ($arr as $key => $entry) {
70                         $weight = $entry['count'];
71                         if ($weight == 1) continue;
72                         $lower = $sum;
73                         $sum += $weight;
74                         $examples = [];
75                         if (is_array(end($entry['examples']))) {
76                                 // already processed
77                                 $examples = $entry['examples'];
78                         } else if ($key === ' ') {
79                                 $examples = [[' ', 0, 1]];
80                         } else {
81                                 $subsum = 0;
82                                 foreach ($entry['examples'] as $example => $subweight) {
83                                         $sublower = $subsum;
84                                         $subsum += $subweight;
85                                         $examples[] = [$example, $sublower, $subsum];
86                                 }
87                         }
88                         $result[] = [$key, $lower, $sum, $examples];
89                 }
90                 return $result;
91         }
92
93         private function randomNext($tokens) {
94                 $cnt = count($tokens);
95                 for ($size = min($this->size, $cnt); $size > 0; --$size) {
96                         $cmb = $this->generalize(array_slice($tokens, -$size));
97                         if (isset($this->transitions[$cmb])) {
98                                 $pick = $this->pick($this->transitions[$cmb]);
99                                 if (!is_null($pick)) {
100                                         return $this->exampleOf($pick);
101                                 }
102                         }
103                 }
104                 return '';
105         }
106
107         private function pick($options) {
108                 if (empty($options)) return null;
109                 $max = end($options)[2];
110                 $num = random_int(0, $max);
111                 $min_index = 0;
112                 $max_index = count($options) - 1;
113                 while ($min_index < $max_index) {
114                         $cur_index = intval(($min_index + $max_index) / 2);
115                         $cur_low = $options[$cur_index][1];
116                         $cur_high = $options[$cur_index][2];
117                         if ($cur_low > $num) {
118                                 $max_index = $cur_index;
119                         } else if ($cur_high < $num) {
120                                 $min_index = $cur_index + 1;
121                         } else {
122                                 $min_index = $cur_index;
123                                 break;
124                         }
125                 }
126                 return $options[$min_index];
127         }
128
129         private function addTransition($state, $next) {
130                 $cmb = $this->generalize($state);
131                 if (!isset($this->transitions[$cmb])) {
132                         $this->transitions[$cmb] = [];
133                 }
134                 $this->increment($this->transitions[$cmb], $next);
135         }
136
137         private function increment(&$which, $token) {
138                 $generalized = $this->generalize([$token]);
139                 if (!isset($which[$generalized])) {
140                         $which[$generalized] = [
141                                 'count' => 1,
142                                 'examples' => [],
143                         ];
144                         $which[$generalized]['examples'][$token] = 1;
145                 } else {
146                         ++$which[$generalized]['count'];
147                         if (!isset($which[$generalized]['examples'][$token])) {
148                                 $which[$generalized]['examples'][$token] = 1;
149                         } else {
150                                 ++$which[$generalized]['examples'][$token];
151                         }
152                 }
153         }
154
155         private function tokenize($str) {
156                 return array_values(array_filter(preg_split('/\b/u', $str), function($token) {
157                         if (empty($token)) return false;
158                         if (preg_match('/cheer\d+/u', strtolower($token))) return false;
159                         return true;
160                 }));
161         }
162
163         private function generalize($tokens) {
164                 $str = '';
165                 foreach ($tokens as $token) {
166                         $replaced = preg_replace('/\d+/', '0', $token);
167                         $replaced = preg_replace('/\s+/', ' ', $token);
168                         $replaced = preg_replace('/(.)\1{2,}/', '$1', $token);
169                         $replaced = strtolower($replaced);
170                         $str .= $replaced;
171                 }
172                 return $str;
173         }
174
175         private function exampleOf($pick) {
176                 $example = $this->pick($pick[3]);
177                 return $example[0];
178         }
179
180         private $size = 7;
181         private $transitions = [];
182
183 }