X-Git-Url: https://git.localhorst.tv/?a=blobdiff_plain;f=app%2FModels%2FChatLog.php;h=063e8036485cdf5cc31ef793215db38f63a90b4c;hb=7d1ef8619d7513184340f73e99efa20481b86d75;hp=6f72c352b42ff356123ad3e1bdb29b8d8407e487;hpb=85879ea0c27ce6506919e2c083a139c470c0952c;p=alttp.git diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index 6f72c35..063e803 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -2,8 +2,12 @@ namespace App\Models; +use App\TwitchBot\TokenizedMessage; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; +use Illuminate\Support\Arr; +use Illuminate\Support\Str; +use LanguageDetection\Language; class ChatLog extends Model { @@ -17,6 +21,28 @@ class ChatLog extends Model { return $this->belongsTo(User::class); } + public function tokenize() { + return TokenizedMessage::fromLog($this); + } + + public function getTextWithoutEmotes() { + $text = $this->text_content; + if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { + $emotes = explode('/', $this->tags['emotes']); + foreach ($emotes as $emote) { + $set = explode(':', $emote); + $positions = explode(',', $set[1]); + foreach ($positions as $position) { + $coords = explode('-', $position); + for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) { + $text[$i] = ' '; + } + } + } + } + return trim(preg_replace('/\s+/', ' ', $text)); + } + public function evaluate() { $this->evaluateUser(); $this->evaluateChannel(); @@ -31,7 +57,7 @@ class ChatLog extends Model { } if ($this->command == 'PRIVMSG') { - if ($this->isKnownBot()) { + if (static::isKnownBot($this->nick)) { $this->type = 'bot'; } else if (substr($this->params[0], 0, 1) == '#') { $this->type = 'chat'; @@ -39,20 +65,27 @@ class ChatLog extends Model { $this->type = 'dm'; } $this->text_content = $this->params[1]; - if ($this->scanForSpam()) { + $this->detectLanguage(); + $tokenized = $this->tokenize(); + if ($tokenized->isSpammy()) { $this->banned = true; } + $this->classification = $tokenized->classify(); return; } throw new \Exception('unidentified message'); } - public function isKnownBot() { - return in_array(strtolower($this->nick), [ + public static function isKnownBot($nick) { + return in_array(strtolower($nick), [ + 'birrellthesquirrel', 'funtoon', + 'nidbot2000', 'nightbot', 'pokemoncommunitygame', + 'speedgaming', + 'starbase47', 'streamelements', 'wizebot', 'zockerstuebchen', @@ -64,7 +97,6 @@ class ChatLog extends Model { protected function evaluateChannel() { if (empty($this->params)) { - $this->channel()->associate(null); return; } $cname = $this->params[0]; @@ -72,41 +104,29 @@ class ChatLog extends Model { $cname = '#'.$cname; } $channel = Channel::firstWhere('twitch_chat', '=', $cname); - $this->channel()->associate($channel); + if (!is_null($channel)) { + $this->channel()->associate($channel); + if (empty($this->twitch_category) && now()->sub(15, 'minute')->isBefore($this->created_at)) { + $this->twitch_category = $channel->twitch_category; + } + } } - protected function scanForSpam() { - if (substr($this->text_content, 0, 1) == '!') { - return true; - } - if (strpos($this->text_content, '$') !== false) { - return true; - } - if (strpos($this->text_content, '€') !== false) { - return true; - } - if (strpos($this->text_content, '@') !== false) { - return true; - } - if (strpos($this->text_content, '://') !== false) { - return true; - } - if (is_numeric($this->text_content)) { - return true; - } - if (strpos($this->text_content, 'followers') !== false) { - return true; - } - if (strpos($this->text_content, 'promotion') !== false) { - return true; - } - if (strpos($this->text_content, 'viewers') !== false) { - return true; + protected function detectLanguage() { + $languages = ['de', 'en', 'es', 'fr']; + if (!is_null($this->channel)) { + $languages = array_values($this->channel->languages); + if (!in_array('en', $languages)) { + $languages[] = 'en'; + } } - if (strpos($this->text_content, 'view ers') !== false) { - return true; + $detector = (new Language($languages))->detect($this->getTextWithoutEmotes()); + $scores = $detector->close(); + $lang = strval($detector); + //var_dump($scores, $lang, $this->text_content); + if (!empty($lang) && $scores[$lang] > 0.4) { + $this->detected_language = $lang; } - return false; } protected $casts = [