]> git.localhorst.tv Git - alttp.git/blobdiff - app/TwitchBot/TokenizedMessage.php
fix unicode problem in message tokenizer
[alttp.git] / app / TwitchBot / TokenizedMessage.php
index 668634af6ef45f887d3360997d7fb85e1e1ee0c4..b32ba720a3cb0489d61143969b82f4e0e154f83a 100644 (file)
@@ -12,7 +12,7 @@ class TokenizedMessage {
                $this->text = trim($text);
                $this->tags = $tags;
                $this->raw = strtolower(preg_replace('/[^\w]/u', '', $this->text));
-               $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($this->text)))));
+               $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->text)))));
 
                $this->emoteless = $this->text;
                if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) {
@@ -31,7 +31,7 @@ class TokenizedMessage {
                        $this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless));
                }
                $this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless));
-               $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($this->emoteless)))));
+               $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/u', strtolower($this->emoteless)))));
        }
 
        public static function fromIRC(IRCMessage $msg) {