]> git.localhorst.tv Git - alttp.git/blob - app/TwitchBot/TokenizedMessage.php
multibyte handling in emote tokenizer
[alttp.git] / app / TwitchBot / TokenizedMessage.php
1 <?php
2
3 namespace App\TwitchBot;
4
5 use App\Models\ChatLog;
6 use Illuminate\Support\Arr;
7 use Illuminate\Support\Str;
8
9 class TokenizedMessage {
10
11         public function __construct($text, $tags = []) {
12                 $this->text = trim($text);
13                 $this->tags = $tags;
14                 $this->raw = strtolower(preg_replace('/[^\w]/u', '', $this->text));
15                 $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->text)))));
16
17                 $this->emoteless = $this->text;
18                 if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) {
19                         $emotes = explode('/', $this->tags['emotes']);
20                         foreach ($emotes as $emote) {
21                                 $set = explode(':', $emote);
22                                 $positions = explode(',', $set[1]);
23                                 foreach ($positions as $position) {
24                                         $coords = explode('-', $position);
25                                         $this->emotes[] = preg_replace('/\d+$/', '', strtolower(mb_substr($this->text, $coords[0], $coords[1] - $coords[0] + 1)));
26                                         $this->emoteless = mb_substr($this->emoteless, 0, $coords[0]).str_repeat(' ', $coords[1] - $coords[0] + 1).mb_substr($this->emoteless, $coords[1] + 1);
27                                 }
28                         }
29                         $this->emoteless = trim(preg_replace('/\s+/u', ' ', $this->emoteless));
30                 }
31                 $this->emoteless_raw = strtolower(preg_replace('/[^\w]/u', '', $this->emoteless));
32                 $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->emoteless)))));
33         }
34
35         public static function fromIRC(IRCMessage $msg) {
36                 return new self($msg->getText(), $msg->tags);
37         }
38
39         public static function fromLog(ChatLog $log) {
40                 return new self($log->params[1], $log->tags);
41         }
42
43         public static function fromString($text, $tags = []) {
44                 return new self($text, $tags);
45         }
46
47
48         public function contains($text) {
49                 return Str::contains($this->text, $text);
50         }
51
52         public function containsEmoteless($text) {
53                 return Str::contains($this->emoteless, $text);
54         }
55
56         public function containsRaw($text) {
57                 return Str::contains($this->raw, $text);
58         }
59
60         public function endsWith($text) {
61                 return Str::endsWith($this->text, $text);
62         }
63
64         public function endsWithEmoteless($text) {
65                 return Str::endsWith($this->emoteless, $text);
66         }
67
68         public function endsWithEmotelessToken($text) {
69                 return !empty($this->emoteless_tokens) && $this->emoteless_tokens[count($this->emoteless_tokens) - 1] == $text;
70         }
71
72         public function endsWithRaw($text) {
73                 return Str::endsWith($this->raw, $text);
74         }
75
76         public function endsWithToken($text) {
77                 return !empty($this->tokens) && $this->tokens[count($this->tokens) - 1] == $text;
78         }
79
80         public function getNumericValue() {
81                 return intval($this->text);
82         }
83
84         public function hasConsecutiveTokens($tokens) {
85                 for ($i = 0; $i < count($this->tokens) - count($tokens) + 1; ++$i) {
86                         for ($j = 0; $j < count($tokens); ++$j) {
87                                 if ($this->tokens[$i + $j] != $tokens[$j]) break;
88                         }
89                         if ($j == count($tokens)) return true;
90                 }
91                 return false;
92         }
93
94         public function hasEmote($text) {
95                 if (is_array($text)) {
96                         foreach ($text as $token) {
97                                 if (in_array($token, $this->emotes)) {
98                                         return true;
99                                 }
100                         }
101                         return false;
102                 }
103                 return in_array($text, $this->emotes);
104         }
105
106         public function hasEmoteThatContains($text) {
107                 foreach ($this->emotes as $emote) {
108                         if (Str::contains($emote, $text)) {
109                                 return true;
110                         }
111                 }
112                 return false;
113         }
114
115         public function hasEmoteThatEndsWith($text) {
116                 foreach ($this->emotes as $emote) {
117                         if (Str::endsWith($emote, $text)) {
118                                 return true;
119                         }
120                 }
121                 return false;
122         }
123
124         public function hasEmoteThatStartsOrEndsWith($text) {
125                 foreach ($this->emotes as $emote) {
126                         if (Str::startsWith($emote, $text) || Str::endsWith($emote, $text)) {
127                                 return true;
128                         }
129                 }
130                 return false;
131         }
132
133         public function hasEmoteThatStartsWith($text) {
134                 foreach ($this->emotes as $emote) {
135                         if (Str::startsWith($emote, $text)) {
136                                 return true;
137                         }
138                 }
139                 return false;
140         }
141
142         public function hasToken($text) {
143                 if (is_array($text)) {
144                         foreach ($text as $token) {
145                                 if (in_array($token, $this->tokens)) {
146                                         return true;
147                                 }
148                         }
149                         return false;
150                 }
151                 return in_array($text, $this->tokens);
152         }
153
154         public function hasTokenThatContains($text) {
155                 foreach ($this->tokens as $token) {
156                         if (Str::contains($token, $text)) {
157                                 return true;
158                         }
159                 }
160                 return false;
161         }
162
163         public function hasTokenThatEndsWith($text) {
164                 foreach ($this->tokens as $token) {
165                         if (Str::endsWith($token, $text)) {
166                                 return true;
167                         }
168                 }
169                 return false;
170         }
171
172         public function hasTokenThatStartsOrEndsWith($text) {
173                 foreach ($this->tokens as $token) {
174                         if (Str::startsWith($token, $text) || Str::endsWith($token, $text)) {
175                                 return true;
176                         }
177                 }
178                 return false;
179         }
180
181         public function hasTokenThatStartsWith($text) {
182                 foreach ($this->tokens as $token) {
183                         if (Str::startsWith($token, $text)) {
184                                 return true;
185                         }
186                 }
187                 return false;
188         }
189
190         public function isLong() {
191                 return strlen($this->emoteless_raw) > 20;
192         }
193
194         public function isShort() {
195                 return strlen($this->emoteless_raw) < 15;
196         }
197
198         public function isVeryLong() {
199                 return strlen($this->emoteless_raw) > 40;
200         }
201
202         public function startsOrEndsWith($text) {
203                 return $this->startsWith($text) || $this->endsWith($text);
204         }
205
206         public function startsOrEndsWithEmotelessToken($text) {
207                 return $this->startsWithEmotelessToken($text) || $this->endsWithEmotelessToken($text);
208         }
209
210         public function startsOrEndsWithRaw($text) {
211                 return $this->startsWithRaw($text) || $this->endsWithRaw($text);
212         }
213
214         public function startsOrEndsWithToken($text) {
215                 return $this->startsWithToken($text) || $this->endsWithToken($text);
216         }
217
218         public function startsWith($text) {
219                 return Str::startsWith($this->text, $text);
220         }
221
222         public function startsWithEmoteless($text) {
223                 return Str::startsWith($this->emoteless, $text);
224         }
225
226         public function startsWithEmotelessToken($text) {
227                 return isset($this->emoteless_tokens[0]) && $this->emoteless_tokens[0] == $text;
228         }
229
230         public function startsWithRaw($text) {
231                 return Str::startsWith($this->raw, $text);
232         }
233
234         public function startsWithToken($text) {
235                 return isset($this->tokens[0]) && $this->tokens[0] == $text;
236         }
237
238
239         public function isSpammy() {
240                 if ($this->startsWith('!')) {
241                         return true;
242                 }
243                 if ($this->contains(['€', '$', '@', '://'])) {
244                         return true;
245                 }
246                 if ($this->containsRaw(['followers', 'promotion', 'viewers'])) {
247                         return true;
248                 }
249                 if ($this->containsRaw('horsti')) {
250                         return true;
251                 }
252                 if ($this->containsRaw(['folgtjetzt', 'vielendankfürdenraid', 'thanksfortheraid', 'willkommenaufstarbase47'])) {
253                         return true;
254                 }
255                 return false;
256         }
257
258
259         public function classify() {
260                 if (is_null($this->classification)) {
261                         if (empty($this->text) || $this->isVeryLong()) {
262                                 $this->classification = 'unclassified';
263                         } else if ($this->startsWith('!')) {
264                                 $this->classification = 'cmd';
265                         } else if ($this->isShort() && ($this->hasTokenThatStartsOrEndsWith(['gg']) || $this->hasEmoteThatEndsWith(['gg']))) {
266                                 $this->classification = 'gg';
267                         } else if ($this->isShort() && $this->containsRaw(['glgl', 'glhf', 'goodluck', 'hfgl', 'vielglück'])) {
268                                 $this->classification = 'gl';
269                         } else if ($this->hasToken(['danke', 'thanks', 'thx', 'ty']) && !$this->hasToken(['nah', 'nee', 'nein', 'no'])) {
270                                 $this->classification = 'thx';
271                         } else if (!$this->isLong() && ($this->startsWithRaw(['ahoi', 'hallo', 'hello', 'hey', 'huhu', 'moin']) || $this->hasEmoteThatEndsWith(['hello', 'heyguys', 'hi', 'vohiyo', 'wave']) || $this->hasToken(['hi', 'hey', 'yo']) || $this->containsRaw(['gutenmorgen', 'gutenabend']))) {
272                                 $this->classification = 'hi';
273                         } else if ($this->isShort() && $this->hasTokenThatStartsOrEndsWith(['pog', 'wow'])) {
274                                 $this->classification = 'pog';
275                         } else if ($this->containsRaw(['hype', 'letsgo']) || $this->hasEmoteThatEndsWith(['dance', 'jam', 'party', 'rave', 'troete'])) {
276                                 $this->classification = 'hype';
277                         } else if ($this->hasToken(['<3']) || $this->hasEmoteThatEndsWith(['heart', 'herz', 'hug', 'love'])) {
278                                 $this->classification = 'love';
279                         } else if ($this->hasToken(['nani', 'wat', 'wtf']) || $this->hasEmoteThatEndsWith(['wat', 'wtf'])) {
280                                 $this->classification = 'wtf';
281                         } else if ($this->hasConsecutiveTokens([':', 'eyes', ':']) || $this->hasEmoteThatEndsWith(['eyes'])) {
282                                 $this->classification = 'eyes';
283                         } else if ($this->hasEmoteThatEndsWith(['angry', 'rage', 'ree'])) {
284                                 $this->classification = 'rage';
285                         } else if ($this->hasToken([':(']) || $this->hasEmoteThatEndsWith(['cry', 'sad'])) {
286                                 $this->classification = 'sad';
287                         } else if ($this->hasToken(['monkas', 'sweat_smile']) || $this->hasEmoteThatEndsWith(['sweat'])) {
288                                 $this->classification = 'sweat';
289                         } else if ($this->endsWithEmoteless('?')) {
290                                 $this->classification = 'question';
291                         } else if ($this->hasToken(['jo', 'yep', 'yes']) || $this->startsOrEndsWithEmotelessToken('ja') || $this->containsRaw('nodders') || $this->hasEmoteThatEndsWith(['nod', 'nodders', 'yea'])) {
292                                 $this->classification = 'yes';
293                         } else if ($this->hasToken(['nah', 'nee', 'nein', 'no']) || $this->containsRaw('nopers') || $this->hasEmoteThatEndsWith(['nay', 'nope', 'nopers'])) {
294                                 $this->classification = 'no';
295                         } else if ($this->hasEmoteThatContains(['kappa', 'keepo'])) {
296                                 $this->classification = 'kappa';
297                         } else if ($this->startsOrEndsWithRaw(['o7']) || $this->hasEmoteThatContains('salut')) {
298                                 $this->classification = 'o7';
299                         } else if (!$this->isLong() && ($this->containsRaw(['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul']) || $this->hasTokenThatStartsWith(['xd']) || $this->hasConsecutiveTokens([':', 'd']))) {
300                                 $this->classification = 'lol';
301                         } else if (is_numeric($this->raw)) {
302                                 $this->classification = 'number';
303                         } else {
304                                 $this->classification = 'unclassified';
305                         }
306                 }
307                 return $this->classification;
308         }
309
310         public function getResponseCategory() {
311                 switch ($this->classify()) {
312                         case 'gg':
313                                 return ['love', 'eyes', 'thx', 'pog', 'kappa'];
314                         case 'gl':
315                                 return ['love', 'eyes', 'thx'];
316                         case 'hi':
317                                 return ['hi', 'love', 'eyes', 'hype', 'pog'];
318                         case 'kappa':
319                                 return ['kappa', 'lol', 'eyes'];
320                         case 'love':
321                                 return ['hi', 'love', 'eyes', 'thx'];
322                         case 'question':
323                                 if (
324                                         $this->hasToken(['number', 'nummer', 'wieviel', 'zahl']) ||
325                                         $this->hasConsecutiveTokens(['how', 'many']) ||
326                                         $this->hasConsecutiveTokens(['how', 'much']) ||
327                                         $this->hasConsecutiveTokens(['wie', 'viele'])
328                                 ) {
329                                         return ['yes', 'no', 'kappa', 'lol', 'wtf', 'number'];
330                                 }
331                                 return ['yes', 'no', 'kappa', 'lol', 'wtf'];
332                         case 'rage':
333                                 return ['kappa', 'lol', 'rage'];
334                         case 'wtf':
335                                 return ['kappa', 'lol', 'rage'];
336                 }
337                 return false;
338         }
339
340
341         private $text;
342         private $tags;
343         private $raw;
344         private $tokens;
345
346         private $emotes = [];
347         private $emoteless = '';
348         private $emoteless_raw = '';
349         private $emoteless_tokens = [];
350
351         private $classification = null;
352
353 }