X-Git-Url: https://git.localhorst.tv/?a=blobdiff_plain;f=app%2FModels%2FChatLog.php;h=8564ff097b55abbe7eb12e4072568222bb60c944;hb=47d864b96b80abb11fdf8e2fdc8920e93916d5b9;hp=aa4e7aeef913185a1120043b2e8fcfbf66c9e201;hpb=e8eb106aa5adab6dd992390cb3836589e4163e72;p=alttp.git diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index aa4e7ae..8564ff0 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -4,6 +4,9 @@ namespace App\Models; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; +use Illuminate\Support\Arr; +use Illuminate\Support\Str; +use LanguageDetector\LanguageDetector; class ChatLog extends Model { @@ -25,13 +28,13 @@ class ChatLog extends Model { $this->type = 'system'; return; } - if ($this->nick == 'localhorsttv') { + if (in_array($this->nick, ['horstiebot', 'localhorsttv'])) { $this->type = 'self'; return; } if ($this->command == 'PRIVMSG') { - if ($this->isKnownBot()) { + if (static::isKnownBot($this->nick)) { $this->type = 'bot'; } else if (substr($this->params[0], 0, 1) == '#') { $this->type = 'chat'; @@ -39,26 +42,65 @@ class ChatLog extends Model { $this->type = 'dm'; } $this->text_content = $this->params[1]; + $this->detectLanguage(); if ($this->scanForSpam()) { $this->banned = true; } + $this->classification = static::classify($this->text_content); return; } throw new \Exception('unidentified message'); } - public function isKnownBot() { - return in_array(strtolower($this->nick), [ + public static function isKnownBot($nick) { + return in_array(strtolower($nick), [ + 'birrellthesquirrel', 'funtoon', + 'nidbot2000', 'nightbot', 'pokemoncommunitygame', + 'speedgaming', + 'starbase47', 'streamelements', 'wizebot', 'zockerstuebchen', ]); } + public static function classify($text) { + if (empty($text)) { + return 'unclassified'; + } + if (is_numeric(trim($text))) { + return 'number'; + } + $rawText = strtolower(preg_replace('/[^\w]/', '', $text)); + $tokenizedText = preg_split('/\s+/', strtolower(trim($text))); + if (Str::startsWith($rawText, 'gg') || Str::endsWith($rawText, 'gg')) { + return 'gg'; + } + if (Str::contains($rawText, ['glgl', 'glhf', 'hfgl'])) { + return 'gl'; + } + if (Str::contains($rawText, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) { + return 'lol'; + } + if (Str::startsWith($rawText, ['ahoi', 'hallo', 'hello', 'hi', 'huhu']) || Str::endsWith($rawText, ['hi', 'wave'])) { + return 'hi'; + } + if (Str::contains($rawText, ['pog', 'wow'])) { + return 'pog'; + } + if (Str::contains($rawText, ['hype'])) { + return 'hype'; + } + if (Str::startsWith($rawText, 'o7') || Str::endsWith($rawText, 'o7') || Str::contains($rawText, 'salut')) { + return 'o7'; + } + return 'unclassified'; + } + protected function evaluateUser() { } @@ -75,40 +117,61 @@ class ChatLog extends Model { $this->channel()->associate($channel); } - protected function scanForSpam() { - if (substr($this->text_content, 0, 1) == '!') { - return true; + protected function detectLanguage() { + $languages = ['de', 'en', 'es', 'fr']; + if (!is_null($this->channel)) { + $languages = array_values($this->channel->languages); + if (!in_array('en', $languages)) { + $languages[] = 'en'; + } } - if (strpos($this->text_content, '$') !== false) { + $detector = LanguageDetector::detect($this->text_content, $languages); + $scores = $detector->getScores(); + $lang = strval($detector->getLanguage()); + //var_dump($scores, $lang, $this->text_content); + if (is_array($scores) && isset($scores[$lang]) && $scores[$lang] > 0.35) { + $this->detected_language = $lang; + } + } + + public static function spammyText($text) { + if (substr($text, 0, 1) == '!') { return true; } - if (strpos($this->text_content, '€') !== false) { + if (strpos($text, '$') !== false) { return true; } - if (strpos($this->text_content, '@') !== false) { + if (strpos($text, '€') !== false) { return true; } - if (strpos($this->text_content, '://') !== false) { + if (strpos($text, '@') !== false) { return true; } - if (is_numeric($this->text_content)) { + if (strpos($text, '://') !== false) { return true; } - if (strpos($this->text_content, 'followers') !== false) { + if (strpos($text, 'followers') !== false) { return true; } - if (strpos($this->text_content, 'promotion') !== false) { + if (strpos($text, 'promotion') !== false) { return true; } - if (strpos($this->text_content, 'viewers') !== false) { + if (strpos($text, 'viewers') !== false) { return true; } - if (strpos($this->text_content, 'view ers') !== false) { + if (strpos($text, 'view ers') !== false) { return true; } return false; } + protected function scanForSpam() { + if (is_numeric($text)) { + return true; + } + return static::spammyText($this->text_content); + } + protected $casts = [ 'banned' => 'boolean', 'params' => 'array',