]> git.localhorst.tv Git - alttp.git/commitdiff
multibyte handling in emote tokenizer
authorDaniel Karbach <daniel.karbach@localhorst.tv>
Wed, 8 May 2024 19:47:28 +0000 (21:47 +0200)
committerDaniel Karbach <daniel.karbach@localhorst.tv>
Wed, 8 May 2024 19:47:28 +0000 (21:47 +0200)
app/TwitchBot/TokenizedMessage.php
tests/Unit/TwitchBot/TokenizedMessageTest.php

index b32ba720a3cb0489d61143969b82f4e0e154f83a..37836a915235c7356d7bcac8b1d4939a90b63ce4 100644 (file)
@@ -22,15 +22,13 @@ class TokenizedMessage {
                                $positions = explode(',', $set[1]);
                                foreach ($positions as $position) {
                                        $coords = explode('-', $position);
-                                       $this->emotes[] = preg_replace('/\d+$/', '', strtolower(substr($this->text, $coords[0], $coords[1] - $coords[0] + 1)));
-                                       for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) {
-                                               $this->emoteless[$i] = ' ';
-                                       }
+                                       $this->emotes[] = preg_replace('/\d+$/', '', strtolower(mb_substr($this->text, $coords[0], $coords[1] - $coords[0] + 1)));
+                                       $this->emoteless = mb_substr($this->emoteless, 0, $coords[0]).str_repeat(' ', $coords[1] - $coords[0] + 1).mb_substr($this->emoteless, $coords[1] + 1);
                                }
                        }
-                       $this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless));
+                       $this->emoteless = trim(preg_replace('/\s+/u', ' ', $this->emoteless));
                }
-               $this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless));
+               $this->emoteless_raw = strtolower(preg_replace('/[^\w]/u', '', $this->emoteless));
                $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->emoteless)))));
        }
 
index 1af6bb2efab17e6f0c6634f4121250c313384021..832b9702cf2a345b82023f022bc592d4b157cdaa 100644 (file)
@@ -34,6 +34,7 @@ class TokenizedMessageTest extends TestCase {
                $this->assertNotEquals('hi', TokenizedMessage::fromString('hier steht was')->classify());
 
                $this->assertEquals('hype', TokenizedMessage::fromString('122 Hype!')->classify());
+               $this->assertEquals('hype', TokenizedMessage::fromString('Sarühlalü KomodoHype', ['emotes' => '81273:10-19'])->classify());
 
                $this->assertEquals('kappa', TokenizedMessage::fromString('Kappa', ['emotes' => 'blah:0-4'])->classify());
                $this->assertEquals('kappa', TokenizedMessage::fromString('KappaClaus', ['emotes' => 'blah:0-9'])->classify());
@@ -72,7 +73,7 @@ class TokenizedMessageTest extends TestCase {
                $this->assertNotEquals('wtf', TokenizedMessage::fromString('ein waterwalk aufgesetzt')->classify());
 
                $this->assertEquals('yes', TokenizedMessage::fromString('ja geht SeemsGood')->classify());
-               $this->assertEquals('yes', TokenizedMessage::fromString('also ich würde sagen ja LUL', ['emotes' => 'blah:25-27'])->classify());
+               $this->assertEquals('yes', TokenizedMessage::fromString('also ich würde sagen ja LUL', ['emotes' => 'blah:24-26'])->classify());
                $this->assertNotEquals('yes', TokenizedMessage::fromString('find ich ja gut')->classify());
 
                $this->assertEquals('unclassified', TokenizedMessage::fromString('')->classify());