From 6a643908d58f26272c2095616514a140e7c0b4c0 Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Tue, 9 Apr 2024 12:06:51 +0200 Subject: [PATCH] better quota system for contextual messages --- app/Models/ChatLog.php | 91 +-------- app/TwitchBot/IRCMessage.php | 4 + app/TwitchBot/TokenizedMessage.php | 133 +++++++++++++ app/TwitchBot/TwitchChatBot.php | 187 +++++++----------- tests/Unit/Models/ChatLogTest.php | 38 ---- tests/Unit/TwitchBot/TokenizedMessageTest.php | 38 ++++ 6 files changed, 249 insertions(+), 242 deletions(-) create mode 100644 app/TwitchBot/TokenizedMessage.php delete mode 100644 tests/Unit/Models/ChatLogTest.php create mode 100644 tests/Unit/TwitchBot/TokenizedMessageTest.php diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index 3b4df15..d5fc76d 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -2,6 +2,7 @@ namespace App\Models; +use App\TwitchBot\TokenizedMessage; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; use Illuminate\Support\Arr; @@ -20,6 +21,10 @@ class ChatLog extends Model { return $this->belongsTo(User::class); } + public function tokenize() { + return TokenizedMessage::fromLog($this); + } + public function getTextWithoutEmotes() { $text = $this->text_content; if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { @@ -61,10 +66,11 @@ class ChatLog extends Model { } $this->text_content = $this->params[1]; $this->detectLanguage(); - if ($this->scanForSpam()) { + $tokenized = $this->tokenize(); + if ($tokenized->isSpammy()) { $this->banned = true; } - $this->classification = static::classify($this->text_content); + $this->classification = $tokenized->classify(); return; } @@ -86,39 +92,6 @@ class ChatLog extends Model { ]); } - public static function classify($text) { - if (empty($text)) { - return 'unclassified'; - } - if (is_numeric(trim($text))) { - return 'number'; - } - $rawText = strtolower(preg_replace('/[^\w]/', '', $text)); - $tokenizedText = preg_split('/\s+/', strtolower(trim($text))); - if (Str::startsWith($rawText, 'gg') || Str::endsWith($rawText, 'gg')) { - return 'gg'; - } - if (Str::contains($rawText, ['glgl', 'glhf', 'hfgl'])) { - return 'gl'; - } - if (Str::contains($rawText, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) { - return 'lol'; - } - if (Str::startsWith($rawText, ['ahoi', 'hallo', 'hello', 'hi ', 'huhu']) || Str::endsWith($rawText, ['hi', 'wave'])) { - return 'hi'; - } - if (Str::contains($rawText, ['pog', 'wow'])) { - return 'pog'; - } - if (Str::contains($rawText, ['hype'])) { - return 'hype'; - } - if (Str::startsWith($rawText, 'o7') || Str::endsWith($rawText, 'o7') || Str::contains($rawText, 'salut')) { - return 'o7'; - } - return 'unclassified'; - } - protected function evaluateUser() { } @@ -152,54 +125,6 @@ class ChatLog extends Model { } } - public static function spammyText($raw_text) { - $text = strtolower($raw_text); - if (substr($text, 0, 1) == '!') { - return true; - } - if (strpos($text, '$') !== false) { - return true; - } - if (strpos($text, '€') !== false) { - return true; - } - if (strpos($text, '@') !== false) { - return true; - } - if (strpos($text, '://') !== false) { - return true; - } - if (strpos($text, 'followers') !== false) { - return true; - } - if (strpos($text, 'horstie') !== false) { - return true; - } - if (strpos($text, 'promotion') !== false) { - return true; - } - if (strpos($text, 'viewers') !== false) { - return true; - } - if (strpos($text, 'view ers') !== false) { - return true; - } - if (strpos($text, 'vielen dank für den raid') !== false) { - return true; - } - if (strpos($text, 'willkommen auf starbase 47') !== false) { - return true; - } - return false; - } - - protected function scanForSpam() { - if (is_numeric($this->text_content)) { - return true; - } - return static::spammyText($this->text_content); - } - protected $casts = [ 'banned' => 'boolean', 'params' => 'array', diff --git a/app/TwitchBot/IRCMessage.php b/app/TwitchBot/IRCMessage.php index d6cfaec..0f73c06 100644 --- a/app/TwitchBot/IRCMessage.php +++ b/app/TwitchBot/IRCMessage.php @@ -136,6 +136,10 @@ class IRCMessage { ]); } + public function tokenize() { + return TokenizedMessage::fromIRC($this); + } + public static function join($channels) { $msg = new IRCMessage(); $msg->command = 'JOIN'; diff --git a/app/TwitchBot/TokenizedMessage.php b/app/TwitchBot/TokenizedMessage.php new file mode 100644 index 0000000..259cfc6 --- /dev/null +++ b/app/TwitchBot/TokenizedMessage.php @@ -0,0 +1,133 @@ +text = $text; + $this->tags = $tags; + $this->raw = strtolower(preg_replace('/[^\w]/', '', $text)); + $this->tokens = preg_split('/\s+/', strtolower(trim($text))); + + $this->emoteless = $this->text; + if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) { + $emotes = explode('/', $this->tags['emotes']); + foreach ($emotes as $emote) { + $set = explode(':', $emote); + $positions = explode(',', $set[1]); + foreach ($positions as $position) { + $coords = explode('-', $position); + $this->emotes[] = substr($this->text, $coords[0], $coords[1] - $coords[0] + 1); + for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) { + $this->emoteless[$i] = ' '; + } + } + } + $this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless)); + } + $this->emoteless_tokens = preg_split('/\s+/', strtolower($this->emoteless)); + } + + public static function fromIRC(IRCMessage $msg) { + return new self($msg->getText(), $msg->tags); + } + + public static function fromLog(ChatLog $log) { + return new self($log->params[1], $log->tags); + } + + public static function fromString($text, $tags = []) { + return new self($text, $tags); + } + + + public function getNumericValue() { + return intval($this->raw); + } + + public function isSpammy() { + if (substr($this->raw, 0, 1) == '!') { + return true; + } + if (strpos($this->raw, '$') !== false) { + return true; + } + if (strpos($this->raw, '€') !== false) { + return true; + } + if (strpos($this->raw, '@') !== false) { + return true; + } + if (strpos($this->raw, '://') !== false) { + return true; + } + if (strpos($this->raw, 'followers') !== false) { + return true; + } + if (strpos($this->raw, 'horstie') !== false) { + return true; + } + if (strpos($this->raw, 'promotion') !== false) { + return true; + } + if (strpos($this->raw, 'viewers') !== false) { + return true; + } + if (strpos($this->raw, 'view ers') !== false) { + return true; + } + if (strpos($this->raw, 'vielen dank für den raid') !== false) { + return true; + } + if (strpos($this->raw, 'willkommen auf starbase 47') !== false) { + return true; + } + return false; + } + + + public function classify() { + if (is_null($this->classification)) { + if (empty($this->raw)) { + $this->classification = 'unclassified'; + } else if (is_numeric($this->raw)) { + $this->classification = 'number'; + } else if (Str::startsWith($this->raw, 'gg') || Str::endsWith($this->raw, 'gg')) { + $this->classification = 'gg'; + } else if (Str::contains($this->raw, ['glgl', 'glhf', 'hfgl'])) { + $this->classification = 'gl'; + } else if (Str::contains($this->raw, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) { + $this->classification = 'lol'; + } else if (Str::startsWith($this->raw, ['ahoi', 'hallo', 'hello', 'hi ', 'huhu']) || Str::endsWith($this->raw, ['hi', 'wave'])) { + $this->classification = 'hi'; + } else if (Str::contains($this->raw, ['pog', 'wow'])) { + $this->classification = 'pog'; + } else if (Str::contains($this->raw, ['hype'])) { + $this->classification = 'hype'; + } else if (Str::startsWith($this->raw, 'o7') || Str::endsWith($this->raw, 'o7') || Str::contains($this->raw, 'salut')) { + $this->classification = 'o7'; + } else { + $this->classification = 'unclassified'; + } + } + return $this->classification; + } + + + private $text; + private $tags; + private $raw; + private $tokens; + + private $emotes = []; + private $emoteless = ''; + private $emoteless_tokens = []; + + private $classification = null; + +} diff --git a/app/TwitchBot/TwitchChatBot.php b/app/TwitchBot/TwitchChatBot.php index 7cdc704..aacc56f 100644 --- a/app/TwitchBot/TwitchChatBot.php +++ b/app/TwitchBot/TwitchChatBot.php @@ -82,7 +82,7 @@ class TwitchChatBot extends TwitchBot { if (!isset($this->notes[$channel->id])) { $this->notes[$channel->id] = [ 'last_read' => 0, - 'last_special' => '', + 'last_special' => [], 'last_write' => time(), 'latest_msgs' => [], 'read_since_last_write' => 0, @@ -106,127 +106,57 @@ class TwitchChatBot extends TwitchBot { $this->notes[$channel->id][$name] = $value; } - private function checkForGG(Channel $channel) { + private function collectClassifications(Channel $channel) { + $classifications = []; $notes = $this->getNotes($channel); - $ggs = 0; - foreach ($notes['latest_msgs'] as $text) { - if (ChatLog::classify($text) == 'gg') { - ++$ggs; + foreach ($notes['latest_msgs'] as $msg) { + $classification = $msg->classify(); + if ($classification == 'unclassified') continue; + if (isset($classifications[$classification])) { + ++$classifications[$classification]; + } else { + $classifications[$classification] = 1; } } - return $ggs > 2; - } - - private function checkForGLHF(Channel $channel) { - $notes = $this->getNotes($channel); - $gls = 0; - foreach ($notes['latest_msgs'] as $text) { - if (ChatLog::classify($text) == 'gl') { - ++$gls; - } - } - return $gls > 2; - } - - private function checkForGreeting(Channel $channel) { - $notes = $this->getNotes($channel); - $his = 0; - foreach ($notes['latest_msgs'] as $text) { - if (ChatLog::classify($text) == 'hi') { - ++$his; - } - } - return $his > 2; - } - - private function checkForHype(Channel $channel) { - $notes = $this->getNotes($channel); - $hypes = 0; - foreach ($notes['latest_msgs'] as $text) { - if (ChatLog::classify($text) == 'hype') { - ++$hypes; - } - } - return $hypes > 2; - } - - private function checkForLaughter(Channel $channel) { - $notes = $this->getNotes($channel); - $lulz = 0; - foreach ($notes['latest_msgs'] as $text) { - if (ChatLog::classify($text) == 'lol') { - ++$lulz; - } - } - return $lulz > 2; - } - - private function checkForNumbers(Channel $channel) { - $notes = $this->getNotes($channel); - $numbers = 0; - foreach ($notes['latest_msgs'] as $text) { - if (is_numeric(trim($text))) { - ++$numbers; - } - } - return $numbers > 2; - } - - private function checkForPog(Channel $channel) { - $notes = $this->getNotes($channel); - $pogs = 0; - foreach ($notes['latest_msgs'] as $text) { - if (ChatLog::classify($text) == 'pog') { - ++$pogs; - } - } - return $pogs > 2; - } - - private function checkForSalute(Channel $channel) { - $notes = $this->getNotes($channel); - $o7s = 0; - foreach ($notes['latest_msgs'] as $text) { - if (ChatLog::classify($text) == 'o7') { - ++$o7s; - } - } - return $o7s > 2; + arsort($classifications); + return $classifications; } private function contextualMsg(Channel $channel) { $last = $this->getNote($channel, 'last_special'); - if ($last != 'gg' && $this->checkForGG($channel)) { - $this->setNote($channel, 'last_special', 'gg'); - return $channel->randomOfClass('gg'); - } - if ($last != 'number' && $this->checkForNumbers($channel)) { - $this->setNote($channel, 'last_special', 'number'); - return $this->randomContextualNumber($channel); - } - if ($last != 'lol' && $this->checkForLaughter($channel)) { - $this->setNote($channel, 'last_special', 'lol'); - return $this->randomLaughter($channel); - } - if ($last != 'glhf' && $this->checkForGLHF($channel)) { - $this->setNote($channel, 'last_special', 'glhf'); - return $channel->randomOfClass('gl'); - } - if ($last != 'hi' && $this->checkForGreeting($channel)) { - $this->setNote($channel, 'last_special', 'hi'); - return $channel->randomOfClass('hi'); - } - if ($last != 'hype' && $this->checkForHype($channel)) { - $this->setNote($channel, 'last_special', 'hype'); - return $channel->randomOfClass('hype'); - } - if ($last != 'pog' && $this->checkForPog($channel)) { - $this->setNote($channel, 'last_special', 'pog'); - return $channel->randomOfClass('pog'); - } - if ($last != 'o7' && $this->checkForSalute($channel)) { - $this->setNote($channel, 'last_special', 'o7'); - return $channel->randomOfClass('o7'); + $classifications = $this->collectClassifications($channel); + $count_quotas = [ + 'gg' => 2, + 'gl' => 2, + 'hi' => 2, + 'hype' => 2, + 'lol' => 2, + 'number' => 2, + 'pog' => 2, + 'o7' => 2, + ]; + $time_quotas = [ + 'gg' => 300, + 'gl' => 900, + 'hi' => 60, + 'hype' => 60, + 'lol' => 60, + 'number' => 300, + 'pog' => 60, + 'o7' => 300, + ]; + foreach ($classifications as $classification => $count) { + if ($classification == $last) continue; + if (!isset($count_quotas[$classification]) || $count < $count_quotas[$classification]) continue; + if (!isset($time_quotas[$classification]) || $this->getTimeSinceSpecial($channel, $classification) < $time_quotas[$classification]) continue; + $this->tagChannelSpecialSent($channel, $classification); + if ($classification == 'number') { + return $this->randomContextualNumber($channel); + } + if ($classification == 'lol') { + return $this->randomLaughter($channel); + } + return $channel->randomOfClass($classification); } return false; } @@ -242,9 +172,9 @@ class TwitchChatBot extends TwitchBot { $notes = $this->getNotes($channel); $min = 100000; $max = 0; - foreach ($notes['latest_msgs'] as $text) { - if (is_numeric(trim($text))) { - $number = intval(trim($text)); + foreach ($notes['latest_msgs'] as $msg) { + if ($msg->classify() == 'number') { + $number = $msg->getNumericValue(); $min = min($min, $number); $max = max($max, $number); } @@ -307,8 +237,10 @@ class TwitchChatBot extends TwitchBot { $this->getNotes($channel); $this->notes[$channel->id]['last_read'] = time(); ++$this->notes[$channel->id]['read_since_last_write']; - if (!ChatLog::isKnownBot($msg->nick) && !ChatLog::spammyText($msg->getText())) { - $this->notes[$channel->id]['latest_msgs'][] = $msg->getText(); + + $tokenized = $msg->tokenize(); + if (!ChatLog::isKnownBot($msg->nick) && !$tokenized->isSpammy()) { + $this->notes[$channel->id]['latest_msgs'][] = $tokenized; if (count($this->notes[$channel->id]['latest_msgs']) > 10) { array_shift($this->notes[$channel->id]['latest_msgs']); } @@ -327,9 +259,22 @@ class TwitchChatBot extends TwitchBot { $this->notes[$channel->id]['wait_time'] = $this->randomWaitTime($channel); } + private function tagChannelSpecialSent(Channel $channel, $classification) { + $this->getNotes($channel); + $this->notes[$channel->id]['last_special'][$classification] = time(); + } + + private function getTimeSinceSpecial(Channel $channel, $classification) { + $notes = $this->getNotes($channel); + if (isset($notes['last_special'][$classification])) { + return time() - $notes['last_special'][$classification]; + } + return 999999; + } + private function isDirectedAtMe($raw_text) { $text = strtolower($raw_text); - if (strpos($text, 'horstie') !== false) { + if (strpos($text, 'horsti') !== false) { return true; } return false; diff --git a/tests/Unit/Models/ChatLogTest.php b/tests/Unit/Models/ChatLogTest.php deleted file mode 100644 index 30b8732..0000000 --- a/tests/Unit/Models/ChatLogTest.php +++ /dev/null @@ -1,38 +0,0 @@ -assertEquals('gg', ChatLog::classify('gg')); - $this->assertEquals('gg', ChatLog::classify('GG')); - $this->assertEquals('gg', ChatLog::classify('Gg')); - - $this->assertEquals('gl', ChatLog::classify('glhf')); - $this->assertEquals('gl', ChatLog::classify('gl & hf')); - - $this->assertEquals('hi', ChatLog::classify('hi')); - $this->assertEquals('hi', ChatLog::classify('hallo')); - $this->assertNotEquals('hi', ChatLog::classify('hier steht was')); - - $this->assertEquals('hype', ChatLog::classify('122 Hype!')); - - $this->assertEquals('number', ChatLog::classify('13')); - $this->assertEquals('number', ChatLog::classify('22')); - - $this->assertEquals('lol', ChatLog::classify('haha')); - $this->assertEquals('lol', ChatLog::classify('KEKW')); - $this->assertEquals('lol', ChatLog::classify('LUL')); - - $this->assertEquals('o7', ChatLog::classify('o7')); - - $this->assertEquals('pog', ChatLog::classify('Pog')); - - $this->assertEquals('unclassified', ChatLog::classify('')); - } - -} diff --git a/tests/Unit/TwitchBot/TokenizedMessageTest.php b/tests/Unit/TwitchBot/TokenizedMessageTest.php new file mode 100644 index 0000000..243f098 --- /dev/null +++ b/tests/Unit/TwitchBot/TokenizedMessageTest.php @@ -0,0 +1,38 @@ +assertEquals('gg', TokenizedMessage::fromString('gg')->classify()); + $this->assertEquals('gg', TokenizedMessage::fromString('GG')->classify()); + $this->assertEquals('gg', TokenizedMessage::fromString('Gg')->classify()); + + $this->assertEquals('gl', TokenizedMessage::fromString('glhf')->classify()); + $this->assertEquals('gl', TokenizedMessage::fromString('gl & hf')->classify()); + + $this->assertEquals('hi', TokenizedMessage::fromString('hi')->classify()); + $this->assertEquals('hi', TokenizedMessage::fromString('hallo')->classify()); + $this->assertNotEquals('hi', TokenizedMessage::fromString('hier steht was')->classify()); + + $this->assertEquals('hype', TokenizedMessage::fromString('122 Hype!')->classify()); + + $this->assertEquals('number', TokenizedMessage::fromString('13')->classify()); + $this->assertEquals('number', TokenizedMessage::fromString('22')->classify()); + + $this->assertEquals('lol', TokenizedMessage::fromString('haha')->classify()); + $this->assertEquals('lol', TokenizedMessage::fromString('KEKW')->classify()); + $this->assertEquals('lol', TokenizedMessage::fromString('LUL')->classify()); + + $this->assertEquals('o7', TokenizedMessage::fromString('o7')->classify()); + + $this->assertEquals('pog', TokenizedMessage::fromString('Pog')->classify()); + + $this->assertEquals('unclassified', TokenizedMessage::fromString('')->classify()); + } + +} -- 2.39.2