From: Daniel Karbach Date: Sun, 7 Apr 2024 11:26:51 +0000 (+0200) Subject: strip emotes for language detection X-Git-Url: https://git.localhorst.tv/?a=commitdiff_plain;ds=inline;h=9a4e2305bb37cb8c62e3ad332f4a2bf45f720a18;hp=5b1693c80ed72de514e40d5c364dc1ab14d84bff;p=alttp.git strip emotes for language detection --- diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index e17076a..3b4df15 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -20,6 +20,24 @@ class ChatLog extends Model { return $this->belongsTo(User::class); } + public function getTextWithoutEmotes() { + $text = $this->text_content; + if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { + $emotes = explode('/', $this->tags['emotes']); + foreach ($emotes as $emote) { + $set = explode(':', $emote); + $positions = explode(',', $set[1]); + foreach ($positions as $position) { + $coords = explode('-', $position); + for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) { + $text[$i] = ' '; + } + } + } + } + return trim(preg_replace('/\s+/', ' ', $text)); + } + public function evaluate() { $this->evaluateUser(); $this->evaluateChannel(); @@ -125,7 +143,7 @@ class ChatLog extends Model { $languages[] = 'en'; } } - $detector = (new Language($languages))->detect($this->text_content); + $detector = (new Language($languages))->detect($this->getTextWithoutEmotes()); $scores = $detector->close(); $lang = strval($detector); //var_dump($scores, $lang, $this->text_content);