X-Git-Url: https://git.localhorst.tv/?a=blobdiff_plain;f=app%2FModels%2FChatLog.php;h=063e8036485cdf5cc31ef793215db38f63a90b4c;hb=7d1ef8619d7513184340f73e99efa20481b86d75;hp=79eab97debff1a56a6ee081efbdf59951f22a5d9;hpb=abdc2ea1ade1fb12ceacc28660890750e69ae36f;p=alttp.git diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index 79eab97..063e803 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -2,11 +2,12 @@ namespace App\Models; +use App\TwitchBot\TokenizedMessage; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; use Illuminate\Support\Arr; use Illuminate\Support\Str; -use LanguageDetector\LanguageDetector; +use LanguageDetection\Language; class ChatLog extends Model { @@ -20,6 +21,28 @@ class ChatLog extends Model { return $this->belongsTo(User::class); } + public function tokenize() { + return TokenizedMessage::fromLog($this); + } + + public function getTextWithoutEmotes() { + $text = $this->text_content; + if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { + $emotes = explode('/', $this->tags['emotes']); + foreach ($emotes as $emote) { + $set = explode(':', $emote); + $positions = explode(',', $set[1]); + foreach ($positions as $position) { + $coords = explode('-', $position); + for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) { + $text[$i] = ' '; + } + } + } + } + return trim(preg_replace('/\s+/', ' ', $text)); + } + public function evaluate() { $this->evaluateUser(); $this->evaluateChannel(); @@ -43,10 +66,11 @@ class ChatLog extends Model { } $this->text_content = $this->params[1]; $this->detectLanguage(); - if ($this->scanForSpam()) { + $tokenized = $this->tokenize(); + if ($tokenized->isSpammy()) { $this->banned = true; } - $this->classification = static::classify($this->text_content); + $this->classification = $tokenized->classify(); return; } @@ -68,45 +92,11 @@ class ChatLog extends Model { ]); } - public static function classify($text) { - if (empty($text)) { - return 'unclassified'; - } - if (is_numeric(trim($text))) { - return 'number'; - } - $rawText = strtolower(preg_replace('/[^\w]/', '', $text)); - $tokenizedText = preg_split('/\s+/', strtolower(trim($text))); - if (Str::startsWith($rawText, 'gg') || Str::endsWith($rawText, 'gg')) { - return 'gg'; - } - if (Str::contains($rawText, ['glgl', 'glhf', 'hfgl'])) { - return 'gl'; - } - if (Str::contains($rawText, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) { - return 'lol'; - } - if (Str::startsWith($rawText, ['ahoi', 'hallo', 'hello', 'hi', 'huhu']) || Str::endsWith($rawText, ['hi', 'wave'])) { - return 'hi'; - } - if (Str::contains($rawText, ['pog', 'wow'])) { - return 'pog'; - } - if (Str::contains($rawText, ['hype'])) { - return 'hype'; - } - if (Str::startsWith($rawText, 'o7') || Str::endsWith($rawText, 'o7') || Str::contains($rawText, 'salut')) { - return 'o7'; - } - return 'unclassified'; - } - protected function evaluateUser() { } protected function evaluateChannel() { if (empty($this->params)) { - $this->channel()->associate(null); return; } $cname = $this->params[0]; @@ -114,7 +104,12 @@ class ChatLog extends Model { $cname = '#'.$cname; } $channel = Channel::firstWhere('twitch_chat', '=', $cname); - $this->channel()->associate($channel); + if (!is_null($channel)) { + $this->channel()->associate($channel); + if (empty($this->twitch_category) && now()->sub(15, 'minute')->isBefore($this->created_at)) { + $this->twitch_category = $channel->twitch_category; + } + } } protected function detectLanguage() { @@ -125,63 +120,15 @@ class ChatLog extends Model { $languages[] = 'en'; } } - $detector = LanguageDetector::detect($this->text_content, $languages); - $scores = $detector->getScores(); - $lang = strval($detector->getLanguage()); + $detector = (new Language($languages))->detect($this->getTextWithoutEmotes()); + $scores = $detector->close(); + $lang = strval($detector); //var_dump($scores, $lang, $this->text_content); - if (is_array($scores) && isset($scores[$lang]) && $scores[$lang] > 0.35) { + if (!empty($lang) && $scores[$lang] > 0.4) { $this->detected_language = $lang; } } - public static function spammyText($raw_text) { - $text = strtolower($raw_text); - if (substr($text, 0, 1) == '!') { - return true; - } - if (strpos($text, '$') !== false) { - return true; - } - if (strpos($text, '€') !== false) { - return true; - } - if (strpos($text, '@') !== false) { - return true; - } - if (strpos($text, '://') !== false) { - return true; - } - if (strpos($text, 'followers') !== false) { - return true; - } - if (strpos($text, 'horstie') !== false) { - return true; - } - if (strpos($text, 'promotion') !== false) { - return true; - } - if (strpos($text, 'viewers') !== false) { - return true; - } - if (strpos($text, 'view ers') !== false) { - return true; - } - if (strpos($text, 'vielen dank für den raid') !== false) { - return true; - } - if (strpos($text, 'willkommen auf starbase 47') !== false) { - return true; - } - return false; - } - - protected function scanForSpam() { - if (is_numeric($this->text_content)) { - return true; - } - return static::spammyText($this->text_content); - } - protected $casts = [ 'banned' => 'boolean', 'params' => 'array',