From ebc1e9c3b448b575b749d62a0f31f82879c47625 Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Thu, 11 Apr 2024 13:19:22 +0200 Subject: [PATCH] refined classification --- app/TwitchBot/TokenizedMessage.php | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/app/TwitchBot/TokenizedMessage.php b/app/TwitchBot/TokenizedMessage.php index 82b4d47..42d6ca8 100644 --- a/app/TwitchBot/TokenizedMessage.php +++ b/app/TwitchBot/TokenizedMessage.php @@ -12,7 +12,7 @@ class TokenizedMessage { $this->text = $text; $this->tags = $tags; $this->raw = strtolower(preg_replace('/[^\w]/u', '', $text)); - $this->tokens = preg_split('/\s+/', strtolower(trim($text))); + $this->tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($text))))); $this->emoteless = $this->text; if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { @@ -31,7 +31,7 @@ class TokenizedMessage { $this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless)); } $this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless)); - $this->emoteless_tokens = preg_split('/\s+/', strtolower($this->emoteless)); + $this->emoteless_tokens = array_values(array_map('trim', array_filter(preg_split('/\b/', strtolower($this->emoteless))))); } public static function fromIRC(IRCMessage $msg) { @@ -75,6 +75,16 @@ class TokenizedMessage { return intval($this->text); } + public function hasConsecutiveTokens($tokens) { + for ($i = 0; $i < count($this->tokens) - count($tokens) + 1; ++$i) { + for ($j = 0; $j < count($tokens); ++$j) { + if ($this->tokens[$i + $j] != $tokens[$j]) break; + } + if ($j == count($tokens)) return true; + } + return false; + } + public function hasEmote($text) { if (is_array($text)) { foreach ($text as $token) { @@ -224,23 +234,23 @@ class TokenizedMessage { $this->classification = 'gg'; } else if ($this->containsRaw(['glgl', 'glhf', 'goodluck', 'hfgl'])) { $this->classification = 'gl'; - } else if ($this->startsWithRaw(['ahoi', 'hallo', 'hello', 'hey', 'huhu', 'moin']) || $this->hasEmoteThatEndsWith(['hello', 'heyguys', 'hi', 'wave']) || $this->hasToken(['hi', 'hey']) || $this->containsRaw(['gutenmorgen', 'gutenabend'])) { + } else if ($this->startsWithRaw(['ahoi', 'hallo', 'hello', 'hey', 'huhu', 'moin']) || $this->hasEmoteThatEndsWith(['hello', 'heyguys', 'hi', 'vohiyo', 'wave']) || $this->hasToken(['hi', 'hey', 'yo']) || $this->containsRaw(['gutenmorgen', 'gutenabend'])) { $this->classification = 'hi'; } else if ($this->hasTokenThatStartsOrEndsWith(['pog', 'wow'])) { $this->classification = 'pog'; - } else if ($this->containsRaw(['hype'])) { + } else if ($this->containsRaw(['hype']) || $this->hasEmoteThatEndsWith(['dance', 'jam', 'party', 'rave', 'troete'])) { $this->classification = 'hype'; } else if ($this->hasToken(['danke', 'thanks', 'thx', 'ty'])) { $this->classification = 'thx'; - } else if ($this->hasToken(['<3']) || $this->hasEmoteThatEndsWith(['herz', 'hug', 'love'])) { + } else if ($this->hasToken(['<3']) || $this->hasEmoteThatEndsWith(['heart', 'herz', 'hug', 'love'])) { $this->classification = 'love'; - } else if ($this->hasToken(['wat', 'wat?']) || $this->hasTokenThatStartsWith(['wtf']) || $this->hasEmoteThatEndsWith(['wat', 'wtf'])) { + } else if ($this->hasToken(['nani', 'wat', 'wtf']) || $this->hasEmoteThatEndsWith(['wat', 'wtf'])) { $this->classification = 'wtf'; } else if ($this->endsWithEmoteless('?')) { $this->classification = 'question'; } else if ($this->startsOrEndsWithRaw(['o7']) || $this->hasEmoteThatContains('salut')) { $this->classification = 'o7'; - } else if ($this->containsRaw(['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul']) || $this->hasTokenThatStartsWith([':d', 'xd'])) { + } else if ($this->containsRaw(['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul']) || $this->hasTokenThatStartsWith(['xd']) || $this->hasConsecutiveTokens([':', 'd'])) { $this->classification = 'lol'; } else { $this->classification = 'unclassified'; -- 2.39.2