]> git.localhorst.tv Git - alttp.git/commitdiff
strip emotes for language detection
authorDaniel Karbach <daniel.karbach@localhorst.tv>
Sun, 7 Apr 2024 11:26:51 +0000 (13:26 +0200)
committerDaniel Karbach <daniel.karbach@localhorst.tv>
Sun, 7 Apr 2024 11:26:51 +0000 (13:26 +0200)
app/Models/ChatLog.php

index e17076aee164871121663d9976cdbc796699238f..3b4df15f2ac9596846729b91fc564c5780ca68b8 100644 (file)
@@ -20,6 +20,24 @@ class ChatLog extends Model {
                return $this->belongsTo(User::class);
        }
 
+       public function getTextWithoutEmotes() {
+               $text = $this->text_content;
+               if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) {
+                       $emotes = explode('/', $this->tags['emotes']);
+                       foreach ($emotes as $emote) {
+                               $set = explode(':', $emote);
+                               $positions = explode(',', $set[1]);
+                               foreach ($positions as $position) {
+                                       $coords = explode('-', $position);
+                                       for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) {
+                                               $text[$i] = ' ';
+                                       }
+                               }
+                       }
+               }
+               return trim(preg_replace('/\s+/', ' ', $text));
+       }
+
        public function evaluate() {
                $this->evaluateUser();
                $this->evaluateChannel();
@@ -125,7 +143,7 @@ class ChatLog extends Model {
                                $languages[] = 'en';
                        }
                }
-               $detector = (new Language($languages))->detect($this->text_content);
+               $detector = (new Language($languages))->detect($this->getTextWithoutEmotes());
                $scores = $detector->close();
                $lang = strval($detector);
                //var_dump($scores, $lang, $this->text_content);