]> git.localhorst.tv Git - alttp.git/blobdiff - app/TwitchBot/TokenizedMessage.php
refined classification
[alttp.git] / app / TwitchBot / TokenizedMessage.php
index 259cfc6a33f20759f4f6a817e0f10df51232a290..42d6ca8ca8f08c05cc625e3b32df236ef4683932 100644 (file)
@@ -11,8 +11,8 @@ class TokenizedMessage {
        public function __construct($text, $tags = []) {
                $this->text = $text;
                $this->tags = $tags;
-               $this->raw = strtolower(preg_replace('/[^\w]/', '', $text));
-               $this->tokens = preg_split('/\s+/', strtolower(trim($text)));
+               $this->raw = strtolower(preg_replace('/[^\w]/u', '', $text));
+               $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($text)))));
 
                $this->emoteless = $this->text;
                if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) {
@@ -22,7 +22,7 @@ class TokenizedMessage {
                                $positions = explode(',', $set[1]);
                                foreach ($positions as $position) {
                                        $coords = explode('-', $position);
-                                       $this->emotes[] = substr($this->text, $coords[0], $coords[1] - $coords[0] + 1);
+                                       $this->emotes[] = preg_replace('/\d+$/', '', strtolower(substr($this->text, $coords[0], $coords[1] - $coords[0] + 1)));
                                        for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) {
                                                $this->emoteless[$i] = ' ';
                                        }
@@ -30,7 +30,8 @@ class TokenizedMessage {
                        }
                        $this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless));
                }
-               $this->emoteless_tokens = preg_split('/\s+/', strtolower($this->emoteless));
+               $this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless));
+               $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($this->emoteless)))));
        }
 
        public static function fromIRC(IRCMessage $msg) {
@@ -46,45 +47,175 @@ class TokenizedMessage {
        }
 
 
+       public function contains($text) {
+               return Str::contains($this->text, $text);
+       }
+
+       public function containsEmoteless($text) {
+               return Str::contains($this->emoteless, $text);
+       }
+
+       public function containsRaw($text) {
+               return Str::contains($this->raw, $text);
+       }
+
+       public function endsWith($text) {
+               return Str::endsWith($this->text, $text);
+       }
+
+       public function endsWithEmoteless($text) {
+               return Str::endsWith($this->emoteless, $text);
+       }
+
+       public function endsWithRaw($text) {
+               return Str::endsWith($this->raw, $text);
+       }
+
        public function getNumericValue() {
-               return intval($this->raw);
+               return intval($this->text);
        }
 
-       public function isSpammy() {
-               if (substr($this->raw, 0, 1) == '!') {
-                       return true;
+       public function hasConsecutiveTokens($tokens) {
+               for ($i = 0; $i < count($this->tokens) - count($tokens) + 1; ++$i) {
+                       for ($j = 0; $j < count($tokens); ++$j) {
+                               if ($this->tokens[$i + $j] != $tokens[$j]) break;
+                       }
+                       if ($j == count($tokens)) return true;
                }
-               if (strpos($this->raw, '$') !== false) {
-                       return true;
+               return false;
+       }
+
+       public function hasEmote($text) {
+               if (is_array($text)) {
+                       foreach ($text as $token) {
+                               if (in_array($token, $this->emotes)) {
+                                       return true;
+                               }
+                       }
+                       return false;
                }
-               if (strpos($this->raw, '€') !== false) {
-                       return true;
+               return in_array($text, $this->emotes);
+       }
+
+       public function hasEmoteThatContains($text) {
+               foreach ($this->emotes as $emote) {
+                       if (Str::contains($emote, $text)) {
+                               return true;
+                       }
                }
-               if (strpos($this->raw, '@') !== false) {
-                       return true;
+               return false;
+       }
+
+       public function hasEmoteThatEndsWith($text) {
+               foreach ($this->emotes as $emote) {
+                       if (Str::endsWith($emote, $text)) {
+                               return true;
+                       }
                }
-               if (strpos($this->raw, '://') !== false) {
-                       return true;
+               return false;
+       }
+
+       public function hasEmoteThatStartsOrEndsWith($text) {
+               foreach ($this->emotes as $emote) {
+                       if (Str::startsWith($emote, $text) || Str::endsWith($emote, $text)) {
+                               return true;
+                       }
                }
-               if (strpos($this->raw, 'followers') !== false) {
-                       return true;
+               return false;
+       }
+
+       public function hasEmoteThatStartsWith($text) {
+               foreach ($this->emotes as $emote) {
+                       if (Str::startsWith($emote, $text)) {
+                               return true;
+                       }
                }
-               if (strpos($this->raw, 'horstie') !== false) {
-                       return true;
+               return false;
+       }
+
+       public function hasToken($text) {
+               if (is_array($text)) {
+                       foreach ($text as $token) {
+                               if (in_array($token, $this->tokens)) {
+                                       return true;
+                               }
+                       }
+                       return false;
                }
-               if (strpos($this->raw, 'promotion') !== false) {
+               return in_array($text, $this->tokens);
+       }
+
+       public function hasTokenThatContains($text) {
+               foreach ($this->tokens as $token) {
+                       if (Str::contains($token, $text)) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       public function hasTokenThatEndsWith($text) {
+               foreach ($this->tokens as $token) {
+                       if (Str::endsWith($token, $text)) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       public function hasTokenThatStartsOrEndsWith($text) {
+               foreach ($this->tokens as $token) {
+                       if (Str::startsWith($token, $text) || Str::endsWith($token, $text)) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       public function hasTokenThatStartsWith($text) {
+               foreach ($this->tokens as $token) {
+                       if (Str::startsWith($token, $text)) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       public function startsOrEndsWith($text) {
+               return $this->startsWith($text) || $this->endsWith($text);
+       }
+
+       public function startsOrEndsWithRaw($text) {
+               return $this->startsWithRaw($text) || $this->endsWithRaw($text);
+       }
+
+       public function startsWith($text) {
+               return Str::startsWith($this->text, $text);
+       }
+
+       public function startsWithEmoteless($text) {
+               return Str::startsWith($this->emoteless, $text);
+       }
+
+       public function startsWithRaw($text) {
+               return Str::startsWith($this->raw, $text);
+       }
+
+
+       public function isSpammy() {
+               if ($this->startsWith('!')) {
                        return true;
                }
-               if (strpos($this->raw, 'viewers') !== false) {
+               if ($this->contains(['€', '$', '@', '://'])) {
                        return true;
                }
-               if (strpos($this->raw, 'view ers') !== false) {
+               if ($this->containsRaw(['followers', 'promotion', 'viewers'])) {
                        return true;
                }
-               if (strpos($this->raw, 'vielen dank für den raid') !== false) {
+               if ($this->containsRaw('horstie')) {
                        return true;
                }
-               if (strpos($this->raw, 'willkommen auf starbase 47') !== false) {
+               if ($this->containsRaw(['vielendankfürdenraid', 'thanksfortheraid', 'willkommenaufstarbase47'])) {
                        return true;
                }
                return false;
@@ -95,22 +226,32 @@ class TokenizedMessage {
                if (is_null($this->classification)) {
                        if (empty($this->raw)) {
                                $this->classification = 'unclassified';
+                       } else if ($this->startsWith('!')) {
+                               $this->classification = 'cmd';
                        } else if (is_numeric($this->raw)) {
                                $this->classification = 'number';
-                       } else if (Str::startsWith($this->raw, 'gg') || Str::endsWith($this->raw, 'gg')) {
+                       } else if ($this->hasTokenThatStartsOrEndsWith(['gg']) || $this->hasEmoteThatEndsWith(['gg'])) {
                                $this->classification = 'gg';
-                       } else if (Str::contains($this->raw, ['glgl', 'glhf', 'hfgl'])) {
+                       } else if ($this->containsRaw(['glgl', 'glhf', 'goodluck', 'hfgl'])) {
                                $this->classification = 'gl';
-                       } else if (Str::contains($this->raw, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) {
-                               $this->classification = 'lol';
-                       } else if (Str::startsWith($this->raw, ['ahoi', 'hallo', 'hello', 'hi ', 'huhu']) || Str::endsWith($this->raw, ['hi', 'wave'])) {
+                       } else if ($this->startsWithRaw(['ahoi', 'hallo', 'hello', 'hey', 'huhu', 'moin']) || $this->hasEmoteThatEndsWith(['hello', 'heyguys', 'hi', 'vohiyo', 'wave']) || $this->hasToken(['hi', 'hey', 'yo']) || $this->containsRaw(['gutenmorgen', 'gutenabend'])) {
                                $this->classification = 'hi';
-                       } else if (Str::contains($this->raw, ['pog', 'wow'])) {
+                       } else if ($this->hasTokenThatStartsOrEndsWith(['pog', 'wow'])) {
                                $this->classification = 'pog';
-                       } else if (Str::contains($this->raw, ['hype'])) {
+                       } else if ($this->containsRaw(['hype']) || $this->hasEmoteThatEndsWith(['dance', 'jam', 'party', 'rave', 'troete'])) {
                                $this->classification = 'hype';
-                       } else if (Str::startsWith($this->raw, 'o7') || Str::endsWith($this->raw, 'o7') || Str::contains($this->raw, 'salut')) {
+                       } else if ($this->hasToken(['danke', 'thanks', 'thx', 'ty'])) {
+                               $this->classification = 'thx';
+                       } else if ($this->hasToken(['<3']) || $this->hasEmoteThatEndsWith(['heart', 'herz', 'hug', 'love'])) {
+                               $this->classification = 'love';
+                       } else if ($this->hasToken(['nani', 'wat', 'wtf']) || $this->hasEmoteThatEndsWith(['wat', 'wtf'])) {
+                               $this->classification = 'wtf';
+                       } else if ($this->endsWithEmoteless('?')) {
+                               $this->classification = 'question';
+                       } else if ($this->startsOrEndsWithRaw(['o7']) || $this->hasEmoteThatContains('salut')) {
                                $this->classification = 'o7';
+                       } else if ($this->containsRaw(['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul']) || $this->hasTokenThatStartsWith(['xd']) || $this->hasConsecutiveTokens([':', 'd'])) {
+                               $this->classification = 'lol';
                        } else {
                                $this->classification = 'unclassified';
                        }
@@ -126,6 +267,7 @@ class TokenizedMessage {
 
        private $emotes = [];
        private $emoteless = '';
+       private $emoteless_raw = '';
        private $emoteless_tokens = [];
 
        private $classification = null;