From 2d1c02504d80ad0e2c754f31f3b17d8c9ea60682 Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Wed, 8 May 2024 13:08:46 +0200 Subject: [PATCH] fix unicode problem in message tokenizer --- app/TwitchBot/TokenizedMessage.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/TwitchBot/TokenizedMessage.php b/app/TwitchBot/TokenizedMessage.php index 668634a..b32ba72 100644 --- a/app/TwitchBot/TokenizedMessage.php +++ b/app/TwitchBot/TokenizedMessage.php @@ -12,7 +12,7 @@ class TokenizedMessage { $this->text = trim($text); $this->tags = $tags; $this->raw = strtolower(preg_replace('/[^\w]/u', '', $this->text)); - $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($this->text))))); + $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->text))))); $this->emoteless = $this->text; if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { @@ -31,7 +31,7 @@ class TokenizedMessage { $this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless)); } $this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless)); - $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($this->emoteless))))); + $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->emoteless))))); } public static function fromIRC(IRCMessage $msg) { -- 2.39.2