From: Daniel Karbach Date: Wed, 10 Apr 2024 16:08:05 +0000 (+0200) Subject: better classification X-Git-Url: https://git.localhorst.tv/?a=commitdiff_plain;h=7d1ef8619d7513184340f73e99efa20481b86d75;p=alttp.git better classification --- diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index 41bdcc5..063e803 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -106,7 +106,7 @@ class ChatLog extends Model { $channel = Channel::firstWhere('twitch_chat', '=', $cname); if (!is_null($channel)) { $this->channel()->associate($channel); - if (now()->sub(5, 'minute')->isBefore($this->created_at)) { + if (empty($this->twitch_category) && now()->sub(15, 'minute')->isBefore($this->created_at)) { $this->twitch_category = $channel->twitch_category; } } diff --git a/app/TwitchBot/TokenizedMessage.php b/app/TwitchBot/TokenizedMessage.php index dc908d8..9994737 100644 --- a/app/TwitchBot/TokenizedMessage.php +++ b/app/TwitchBot/TokenizedMessage.php @@ -11,7 +11,7 @@ class TokenizedMessage { public function __construct($text, $tags = []) { $this->text = $text; $this->tags = $tags; - $this->raw = strtolower(preg_replace('/[^\w]/', '', $text)); + $this->raw = strtolower(preg_replace('/[^\w]/u', '', $text)); $this->tokens = preg_split('/\s+/', strtolower(trim($text))); $this->emoteless = $this->text; @@ -22,7 +22,7 @@ class TokenizedMessage { $positions = explode(',', $set[1]); foreach ($positions as $position) { $coords = explode('-', $position); - $this->emotes[] = substr($this->text, $coords[0], $coords[1] - $coords[0] + 1); + $this->emotes[] = strtolower(substr($this->text, $coords[0], $coords[1] - $coords[0] + 1)); for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) { $this->emoteless[$i] = ' '; } @@ -30,6 +30,7 @@ class TokenizedMessage { } $this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless)); } + $this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless)); $this->emoteless_tokens = preg_split('/\s+/', strtolower($this->emoteless)); } @@ -46,45 +47,137 @@ class TokenizedMessage { } + public function contains($text) { + return Str::contains($this->text, $text); + } + + public function containsRaw($text) { + return Str::contains($this->raw, $text); + } + + public function endsWith($text) { + return Str::endsWith($this->text, $text); + } + + public function endsWithRaw($text) { + return Str::endsWith($this->raw, $text); + } + public function getNumericValue() { - return intval($this->raw); + return intval($this->text); } - public function isSpammy() { - if (substr($this->raw, 0, 1) == '!') { - return true; + public function hasEmote($text) { + return in_array($text, $this->emotes); + } + + public function hasEmoteThatContains($text) { + foreach ($this->emotes as $emote) { + if (Str::contains($emote, $text)) { + return true; + } } - if (strpos($this->raw, '$') !== false) { - return true; + return false; + } + + public function hasEmoteThatEndsWith($text) { + foreach ($this->emotes as $emote) { + if (Str::endsWith($emote, $text)) { + return true; + } } - if (strpos($this->raw, '€') !== false) { - return true; + return false; + } + + public function hasEmoteThatStartsOrEndsWith($text) { + foreach ($this->emotes as $emote) { + if (Str::startsWith($emote, $text) || Str::endsWith($emote, $text)) { + return true; + } } - if (strpos($this->raw, '@') !== false) { - return true; + return false; + } + + public function hasEmoteThatStartsWith($text) { + foreach ($this->emotes as $emote) { + if (Str::startsWith($emote, $text)) { + return true; + } } - if (strpos($this->raw, '://') !== false) { - return true; + return false; + } + + public function hasToken($text) { + return in_array($text, $this->tokens); + } + + public function hasTokenThatContains($text) { + foreach ($this->tokens as $token) { + if (Str::contains($token, $text)) { + return true; + } } - if (strpos($this->raw, 'followers') !== false) { - return true; + return false; + } + + public function hasTokenThatEndsWith($text) { + foreach ($this->tokens as $token) { + if (Str::endsWith($token, $text)) { + return true; + } } - if (strpos($this->raw, 'horstie') !== false) { - return true; + return false; + } + + public function hasTokenThatStartsOrEndsWith($text) { + foreach ($this->tokens as $token) { + if (Str::startsWith($token, $text) || Str::endsWith($token, $text)) { + return true; + } } - if (strpos($this->raw, 'promotion') !== false) { + return false; + } + + public function hasTokenThatStartsWith($text) { + foreach ($this->tokens as $token) { + if (Str::startsWith($token, $text)) { + return true; + } + } + return false; + } + + public function startsOrEndsWith($text) { + return $this->startsWith($text) || $this->endsWith($text); + } + + public function startsOrEndsWithRaw($text) { + return $this->startsWithRaw($text) || $this->endsWithRaw($text); + } + + public function startsWith($text) { + return Str::startsWith($this->text, $text); + } + + public function startsWithRaw($text) { + return Str::startsWith($this->raw, $text); + } + + + public function isSpammy() { + if ($this->startsWith('!')) { return true; } - if (strpos($this->raw, 'viewers') !== false) { + if ($this->contains(['€', '$', '@', '://'])) { return true; } - if (strpos($this->raw, 'view ers') !== false) { + if ($this->containsRaw(['followers', 'promotion', 'viewers'])) { return true; } - if (strpos($this->raw, 'vielen dank für den raid') !== false) { + if ($this->containsRaw('horstie')) { return true; } - if (strpos($this->raw, 'willkommen auf starbase 47') !== false) { + if ($this->containsRaw(['vielendankfürdenraid', 'willkommenaufstarbase47'])) { return true; } return false; @@ -97,19 +190,19 @@ class TokenizedMessage { $this->classification = 'unclassified'; } else if (is_numeric($this->raw)) { $this->classification = 'number'; - } else if (Str::startsWith($this->raw, 'gg') || Str::endsWith($this->raw, 'gg')) { + } else if ($this->hasTokenThatStartsOrEndsWith(['gg'])) { $this->classification = 'gg'; - } else if (Str::contains($this->raw, ['glgl', 'glhf', 'hfgl'])) { + } else if ($this->containsRaw(['glgl', 'glhf', 'hfgl'])) { $this->classification = 'gl'; - } else if (Str::contains($this->raw, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) { + } else if ($this->containsRaw(['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd']) || $this->hasTokenThatStartsWith(':d')) { $this->classification = 'lol'; - } else if (Str::startsWith($this->raw, ['ahoi', 'hallo', 'hello', 'hi ', 'huhu']) || Str::endsWith($this->raw, ['hello', 'hi', 'wave'])) { + } else if ($this->startsWithRaw(['ahoi', 'hallo', 'hello', 'huhu']) || $this->hasEmoteThatEndsWith(['hello', 'hi', 'wave']) || $this->hasToken('hi')) { $this->classification = 'hi'; - } else if (Str::contains($this->raw, ['pog', 'wow'])) { + } else if ($this->containsRaw(['pog', 'wow'])) { $this->classification = 'pog'; - } else if (Str::contains($this->raw, ['hype'])) { + } else if ($this->containsRaw(['hype'])) { $this->classification = 'hype'; - } else if (Str::startsWith($this->raw, 'o7') || Str::endsWith($this->raw, 'o7') || Str::contains($this->raw, 'salut')) { + } else if ($this->startsOrEndsWithRaw(['o7']) || $this->hasEmoteThatContains('salut')) { $this->classification = 'o7'; } else { $this->classification = 'unclassified'; @@ -126,6 +219,7 @@ class TokenizedMessage { private $emotes = []; private $emoteless = ''; + private $emoteless_raw = ''; private $emoteless_tokens = []; private $classification = null; diff --git a/tests/Unit/TwitchBot/TokenizedMessageTest.php b/tests/Unit/TwitchBot/TokenizedMessageTest.php index 8d41573..48ae5ab 100644 --- a/tests/Unit/TwitchBot/TokenizedMessageTest.php +++ b/tests/Unit/TwitchBot/TokenizedMessageTest.php @@ -11,13 +11,17 @@ class TokenizedMessageTest extends TestCase { $this->assertEquals('gg', TokenizedMessage::fromString('gg')->classify()); $this->assertEquals('gg', TokenizedMessage::fromString('GG')->classify()); $this->assertEquals('gg', TokenizedMessage::fromString('Gg')->classify()); + $this->assertEquals('gg', TokenizedMessage::fromString('ggs')->classify()); + $this->assertEquals('gg', TokenizedMessage::fromString('ja gg dann, ne')->classify()); + $this->assertEquals('gg', TokenizedMessage::fromString('duden2Gg')->classify()); + $this->assertNotEquals('gg', TokenizedMessage::fromString('Eggnog')->classify()); $this->assertEquals('gl', TokenizedMessage::fromString('glhf')->classify()); $this->assertEquals('gl', TokenizedMessage::fromString('gl & hf')->classify()); $this->assertEquals('hi', TokenizedMessage::fromString('hi')->classify()); $this->assertEquals('hi', TokenizedMessage::fromString('hallo')->classify()); - $this->assertEquals('hi', TokenizedMessage::fromString('osora9Hello')->classify()); + $this->assertEquals('hi', TokenizedMessage::fromString('osora9Hello', ['emotes' => 'blah:0-10'])->classify()); $this->assertNotEquals('hi', TokenizedMessage::fromString('hier steht was')->classify()); $this->assertEquals('hype', TokenizedMessage::fromString('122 Hype!')->classify()); @@ -28,12 +32,28 @@ class TokenizedMessageTest extends TestCase { $this->assertEquals('lol', TokenizedMessage::fromString('haha')->classify()); $this->assertEquals('lol', TokenizedMessage::fromString('KEKW')->classify()); $this->assertEquals('lol', TokenizedMessage::fromString('LUL')->classify()); + $this->assertEquals('lol', TokenizedMessage::fromString(':D')->classify()); $this->assertEquals('o7', TokenizedMessage::fromString('o7')->classify()); + $this->assertEquals('o7', TokenizedMessage::fromString('ticknaSalutieren', ['emotes' => 'blah:0-15'])->classify()); $this->assertEquals('pog', TokenizedMessage::fromString('Pog')->classify()); $this->assertEquals('unclassified', TokenizedMessage::fromString('')->classify()); } + public function test_spam() { + $this->assertTrue(TokenizedMessage::fromString('!start')->isSpammy()); + $this->assertTrue(TokenizedMessage::fromString('@LocalhorstTV')->isSpammy()); + + $this->assertTrue(TokenizedMessage::fromString('just 50€')->isSpammy()); + $this->assertTrue(TokenizedMessage::fromString('hello would you like some followers?')->isSpammy()); + $this->assertTrue(TokenizedMessage::fromString('get view ers for free')->isSpammy()); + + $this->assertTrue(TokenizedMessage::fromString('also bitte, horstie')->isSpammy()); + + $this->assertTrue(TokenizedMessage::fromString('hey maengi, vielen dank für den raid')->isSpammy()); + $this->assertTrue(TokenizedMessage::fromString('Willkommen auf Starbase 47')->isSpammy()); + } + }