]> git.localhorst.tv Git - alttp.git/blobdiff - app/Models/ChatLog.php
better classification
[alttp.git] / app / Models / ChatLog.php
index aa4e7aeef913185a1120043b2e8fcfbf66c9e201..063e8036485cdf5cc31ef793215db38f63a90b4c 100644 (file)
@@ -2,8 +2,12 @@
 
 namespace App\Models;
 
+use App\TwitchBot\TokenizedMessage;
 use Illuminate\Database\Eloquent\Factories\HasFactory;
 use Illuminate\Database\Eloquent\Model;
+use Illuminate\Support\Arr;
+use Illuminate\Support\Str;
+use LanguageDetection\Language;
 
 class ChatLog extends Model {
 
@@ -17,6 +21,28 @@ class ChatLog extends Model {
                return $this->belongsTo(User::class);
        }
 
+       public function tokenize() {
+               return TokenizedMessage::fromLog($this);
+       }
+
+       public function getTextWithoutEmotes() {
+               $text = $this->text_content;
+               if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) {
+                       $emotes = explode('/', $this->tags['emotes']);
+                       foreach ($emotes as $emote) {
+                               $set = explode(':', $emote);
+                               $positions = explode(',', $set[1]);
+                               foreach ($positions as $position) {
+                                       $coords = explode('-', $position);
+                                       for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) {
+                                               $text[$i] = ' ';
+                                       }
+                               }
+                       }
+               }
+               return trim(preg_replace('/\s+/', ' ', $text));
+       }
+
        public function evaluate() {
                $this->evaluateUser();
                $this->evaluateChannel();
@@ -25,13 +51,13 @@ class ChatLog extends Model {
                        $this->type = 'system';
                        return;
                }
-               if ($this->nick == 'localhorsttv') {
+               if (in_array($this->nick, ['horstiebot', 'localhorsttv'])) {
                        $this->type = 'self';
                        return;
                }
 
                if ($this->command == 'PRIVMSG') {
-                       if ($this->isKnownBot()) {
+                       if (static::isKnownBot($this->nick)) {
                                $this->type = 'bot';
                        } else if (substr($this->params[0], 0, 1) == '#') {
                                $this->type = 'chat';
@@ -39,20 +65,27 @@ class ChatLog extends Model {
                                $this->type = 'dm';
                        }
                        $this->text_content = $this->params[1];
-                       if ($this->scanForSpam()) {
+                       $this->detectLanguage();
+                       $tokenized = $this->tokenize();
+                       if ($tokenized->isSpammy()) {
                                $this->banned = true;
                        }
+                       $this->classification = $tokenized->classify();
                        return;
                }
 
                throw new \Exception('unidentified message');
        }
 
-       public function isKnownBot() {
-               return in_array(strtolower($this->nick), [
+       public static function isKnownBot($nick) {
+               return in_array(strtolower($nick), [
+                       'birrellthesquirrel',
                        'funtoon',
+                       'nidbot2000',
                        'nightbot',
                        'pokemoncommunitygame',
+                       'speedgaming',
+                       'starbase47',
                        'streamelements',
                        'wizebot',
                        'zockerstuebchen',
@@ -64,7 +97,6 @@ class ChatLog extends Model {
 
        protected function evaluateChannel() {
                if (empty($this->params)) {
-                       $this->channel()->associate(null);
                        return;
                }
                $cname = $this->params[0];
@@ -72,41 +104,29 @@ class ChatLog extends Model {
                        $cname = '#'.$cname;
                }
                $channel = Channel::firstWhere('twitch_chat', '=', $cname);
-               $this->channel()->associate($channel);
+               if (!is_null($channel)) {
+                       $this->channel()->associate($channel);
+                       if (empty($this->twitch_category) && now()->sub(15, 'minute')->isBefore($this->created_at)) {
+                               $this->twitch_category = $channel->twitch_category;
+                       }
+               }
        }
 
-       protected function scanForSpam() {
-               if (substr($this->text_content, 0, 1) == '!') {
-                       return true;
-               }
-               if (strpos($this->text_content, '$') !== false) {
-                       return true;
-               }
-               if (strpos($this->text_content, '€') !== false) {
-                       return true;
-               }
-               if (strpos($this->text_content, '@') !== false) {
-                       return true;
-               }
-               if (strpos($this->text_content, '://') !== false) {
-                       return true;
-               }
-               if (is_numeric($this->text_content)) {
-                       return true;
-               }
-               if (strpos($this->text_content, 'followers') !== false) {
-                       return true;
-               }
-               if (strpos($this->text_content, 'promotion') !== false) {
-                       return true;
-               }
-               if (strpos($this->text_content, 'viewers') !== false) {
-                       return true;
+       protected function detectLanguage() {
+               $languages = ['de', 'en', 'es', 'fr'];
+               if (!is_null($this->channel)) {
+                       $languages = array_values($this->channel->languages);
+                       if (!in_array('en', $languages)) {
+                               $languages[] = 'en';
+                       }
                }
-               if (strpos($this->text_content, 'view ers') !== false) {
-                       return true;
+               $detector = (new Language($languages))->detect($this->getTextWithoutEmotes());
+               $scores = $detector->close();
+               $lang = strval($detector);
+               //var_dump($scores, $lang, $this->text_content);
+               if (!empty($lang) && $scores[$lang] > 0.4) {
+                       $this->detected_language = $lang;
                }
-               return false;
        }
 
        protected $casts = [