From 9a4e2305bb37cb8c62e3ad332f4a2bf45f720a18 Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Sun, 7 Apr 2024 13:26:51 +0200 Subject: [PATCH] strip emotes for language detection --- app/Models/ChatLog.php | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index e17076a..3b4df15 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -20,6 +20,24 @@ class ChatLog extends Model { return $this->belongsTo(User::class); } + public function getTextWithoutEmotes() { + $text = $this->text_content; + if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { + $emotes = explode('/', $this->tags['emotes']); + foreach ($emotes as $emote) { + $set = explode(':', $emote); + $positions = explode(',', $set[1]); + foreach ($positions as $position) { + $coords = explode('-', $position); + for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) { + $text[$i] = ' '; + } + } + } + } + return trim(preg_replace('/\s+/', ' ', $text)); + } + public function evaluate() { $this->evaluateUser(); $this->evaluateChannel(); @@ -125,7 +143,7 @@ class ChatLog extends Model { $languages[] = 'en'; } } - $detector = (new Language($languages))->detect($this->text_content); + $detector = (new Language($languages))->detect($this->getTextWithoutEmotes()); $scores = $detector->close(); $lang = strval($detector); //var_dump($scores, $lang, $this->text_content); -- 2.39.2