From: Daniel Karbach Date: Wed, 8 May 2024 11:08:46 +0000 (+0200) Subject: fix unicode problem in message tokenizer X-Git-Url: https://git.localhorst.tv/?a=commitdiff_plain;h=2d1c02504d80ad0e2c754f31f3b17d8c9ea60682;hp=f18af7cfb219ab9c07635ea8bbae80f2a9cee78e;p=alttp.git fix unicode problem in message tokenizer --- diff --git a/app/TwitchBot/TokenizedMessage.php b/app/TwitchBot/TokenizedMessage.php index 668634a..b32ba72 100644 --- a/app/TwitchBot/TokenizedMessage.php +++ b/app/TwitchBot/TokenizedMessage.php @@ -12,7 +12,7 @@ class TokenizedMessage { $this->text = trim($text); $this->tags = $tags; $this->raw = strtolower(preg_replace('/[^\w]/u', '', $this->text)); - $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($this->text))))); + $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->text))))); $this->emoteless = $this->text; if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { @@ -31,7 +31,7 @@ class TokenizedMessage { $this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless)); } $this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless)); - $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($this->emoteless))))); + $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->emoteless))))); } public static function fromIRC(IRCMessage $msg) {