From: Daniel Karbach <daniel.karbach@localhorst.tv>
Date: Wed, 10 Apr 2024 16:08:05 +0000 (+0200)
Subject: better classification
X-Git-Url: https://git.localhorst.tv/?a=commitdiff_plain;h=7d1ef8619d7513184340f73e99efa20481b86d75;p=alttp.git

better classification
---

diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php
index 41bdcc5..063e803 100644
--- a/app/Models/ChatLog.php
+++ b/app/Models/ChatLog.php
@@ -106,7 +106,7 @@ class ChatLog extends Model {
 		$channel = Channel::firstWhere('twitch_chat', '=', $cname);
 		if (!is_null($channel)) {
 			$this->channel()->associate($channel);
-			if (now()->sub(5, 'minute')->isBefore($this->created_at)) {
+			if (empty($this->twitch_category) && now()->sub(15, 'minute')->isBefore($this->created_at)) {
 				$this->twitch_category = $channel->twitch_category;
 			}
 		}
diff --git a/app/TwitchBot/TokenizedMessage.php b/app/TwitchBot/TokenizedMessage.php
index dc908d8..9994737 100644
--- a/app/TwitchBot/TokenizedMessage.php
+++ b/app/TwitchBot/TokenizedMessage.php
@@ -11,7 +11,7 @@ class TokenizedMessage {
 	public function __construct($text, $tags = []) {
 		$this->text = $text;
 		$this->tags = $tags;
-		$this->raw = strtolower(preg_replace('/[^\w]/', '', $text));
+		$this->raw = strtolower(preg_replace('/[^\w]/u', '', $text));
 		$this->tokens = preg_split('/\s+/', strtolower(trim($text)));
 
 		$this->emoteless = $this->text;
@@ -22,7 +22,7 @@ class TokenizedMessage {
 				$positions = explode(',', $set[1]);
 				foreach ($positions as $position) {
 					$coords = explode('-', $position);
-					$this->emotes[] = substr($this->text, $coords[0], $coords[1] - $coords[0] + 1);
+					$this->emotes[] = strtolower(substr($this->text, $coords[0], $coords[1] - $coords[0] + 1));
 					for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) {
 						$this->emoteless[$i] = ' ';
 					}
@@ -30,6 +30,7 @@ class TokenizedMessage {
 			}
 			$this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless));
 		}
+		$this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless));
 		$this->emoteless_tokens = preg_split('/\s+/', strtolower($this->emoteless));
 	}
 
@@ -46,45 +47,137 @@ class TokenizedMessage {
 	}
 
 
+	public function contains($text) {
+		return Str::contains($this->text, $text);
+	}
+
+	public function containsRaw($text) {
+		return Str::contains($this->raw, $text);
+	}
+
+	public function endsWith($text) {
+		return Str::endsWith($this->text, $text);
+	}
+
+	public function endsWithRaw($text) {
+		return Str::endsWith($this->raw, $text);
+	}
+
 	public function getNumericValue() {
-		return intval($this->raw);
+		return intval($this->text);
 	}
 
-	public function isSpammy() {
-		if (substr($this->raw, 0, 1) == '!') {
-			return true;
+	public function hasEmote($text) {
+		return in_array($text, $this->emotes);
+	}
+
+	public function hasEmoteThatContains($text) {
+		foreach ($this->emotes as $emote) {
+			if (Str::contains($emote, $text)) {
+				return true;
+			}
 		}
-		if (strpos($this->raw, '$') !== false) {
-			return true;
+		return false;
+	}
+
+	public function hasEmoteThatEndsWith($text) {
+		foreach ($this->emotes as $emote) {
+			if (Str::endsWith($emote, $text)) {
+				return true;
+			}
 		}
-		if (strpos($this->raw, '€') !== false) {
-			return true;
+		return false;
+	}
+
+	public function hasEmoteThatStartsOrEndsWith($text) {
+		foreach ($this->emotes as $emote) {
+			if (Str::startsWith($emote, $text) || Str::endsWith($emote, $text)) {
+				return true;
+			}
 		}
-		if (strpos($this->raw, '@') !== false) {
-			return true;
+		return false;
+	}
+
+	public function hasEmoteThatStartsWith($text) {
+		foreach ($this->emotes as $emote) {
+			if (Str::startsWith($emote, $text)) {
+				return true;
+			}
 		}
-		if (strpos($this->raw, '://') !== false) {
-			return true;
+		return false;
+	}
+
+	public function hasToken($text) {
+		return in_array($text, $this->tokens);
+	}
+
+	public function hasTokenThatContains($text) {
+		foreach ($this->tokens as $token) {
+			if (Str::contains($token, $text)) {
+				return true;
+			}
 		}
-		if (strpos($this->raw, 'followers') !== false) {
-			return true;
+		return false;
+	}
+
+	public function hasTokenThatEndsWith($text) {
+		foreach ($this->tokens as $token) {
+			if (Str::endsWith($token, $text)) {
+				return true;
+			}
 		}
-		if (strpos($this->raw, 'horstie') !== false) {
-			return true;
+		return false;
+	}
+
+	public function hasTokenThatStartsOrEndsWith($text) {
+		foreach ($this->tokens as $token) {
+			if (Str::startsWith($token, $text) || Str::endsWith($token, $text)) {
+				return true;
+			}
 		}
-		if (strpos($this->raw, 'promotion') !== false) {
+		return false;
+	}
+
+	public function hasTokenThatStartsWith($text) {
+		foreach ($this->tokens as $token) {
+			if (Str::startsWith($token, $text)) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	public function startsOrEndsWith($text) {
+		return $this->startsWith($text) || $this->endsWith($text);
+	}
+
+	public function startsOrEndsWithRaw($text) {
+		return $this->startsWithRaw($text) || $this->endsWithRaw($text);
+	}
+
+	public function startsWith($text) {
+		return Str::startsWith($this->text, $text);
+	}
+
+	public function startsWithRaw($text) {
+		return Str::startsWith($this->raw, $text);
+	}
+
+
+	public function isSpammy() {
+		if ($this->startsWith('!')) {
 			return true;
 		}
-		if (strpos($this->raw, 'viewers') !== false) {
+		if ($this->contains(['€', '$', '@', '://'])) {
 			return true;
 		}
-		if (strpos($this->raw, 'view ers') !== false) {
+		if ($this->containsRaw(['followers', 'promotion', 'viewers'])) {
 			return true;
 		}
-		if (strpos($this->raw, 'vielen dank für den raid') !== false) {
+		if ($this->containsRaw('horstie')) {
 			return true;
 		}
-		if (strpos($this->raw, 'willkommen auf starbase 47') !== false) {
+		if ($this->containsRaw(['vielendankfürdenraid', 'willkommenaufstarbase47'])) {
 			return true;
 		}
 		return false;
@@ -97,19 +190,19 @@ class TokenizedMessage {
 				$this->classification = 'unclassified';
 			} else if (is_numeric($this->raw)) {
 				$this->classification = 'number';
-			} else if (Str::startsWith($this->raw, 'gg') || Str::endsWith($this->raw, 'gg')) {
+			} else if ($this->hasTokenThatStartsOrEndsWith(['gg'])) {
 				$this->classification = 'gg';
-			} else if (Str::contains($this->raw, ['glgl', 'glhf', 'hfgl'])) {
+			} else if ($this->containsRaw(['glgl', 'glhf', 'hfgl'])) {
 				$this->classification = 'gl';
-			} else if (Str::contains($this->raw, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) {
+			} else if ($this->containsRaw(['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd']) || $this->hasTokenThatStartsWith(':d')) {
 				$this->classification = 'lol';
-			} else if (Str::startsWith($this->raw, ['ahoi', 'hallo', 'hello', 'hi ', 'huhu']) || Str::endsWith($this->raw, ['hello', 'hi', 'wave'])) {
+			} else if ($this->startsWithRaw(['ahoi', 'hallo', 'hello', 'huhu']) || $this->hasEmoteThatEndsWith(['hello', 'hi', 'wave']) || $this->hasToken('hi')) {
 				$this->classification = 'hi';
-			} else if (Str::contains($this->raw, ['pog', 'wow'])) {
+			} else if ($this->containsRaw(['pog', 'wow'])) {
 				$this->classification = 'pog';
-			} else if (Str::contains($this->raw, ['hype'])) {
+			} else if ($this->containsRaw(['hype'])) {
 				$this->classification = 'hype';
-			} else if (Str::startsWith($this->raw, 'o7') || Str::endsWith($this->raw, 'o7') || Str::contains($this->raw, 'salut')) {
+			} else if ($this->startsOrEndsWithRaw(['o7']) || $this->hasEmoteThatContains('salut')) {
 				$this->classification = 'o7';
 			} else {
 				$this->classification = 'unclassified';
@@ -126,6 +219,7 @@ class TokenizedMessage {
 
 	private $emotes = [];
 	private $emoteless = '';
+	private $emoteless_raw = '';
 	private $emoteless_tokens = [];
 
 	private $classification = null;
diff --git a/tests/Unit/TwitchBot/TokenizedMessageTest.php b/tests/Unit/TwitchBot/TokenizedMessageTest.php
index 8d41573..48ae5ab 100644
--- a/tests/Unit/TwitchBot/TokenizedMessageTest.php
+++ b/tests/Unit/TwitchBot/TokenizedMessageTest.php
@@ -11,13 +11,17 @@ class TokenizedMessageTest extends TestCase {
 		$this->assertEquals('gg', TokenizedMessage::fromString('gg')->classify());
 		$this->assertEquals('gg', TokenizedMessage::fromString('GG')->classify());
 		$this->assertEquals('gg', TokenizedMessage::fromString('Gg')->classify());
+		$this->assertEquals('gg', TokenizedMessage::fromString('ggs')->classify());
+		$this->assertEquals('gg', TokenizedMessage::fromString('ja gg dann, ne')->classify());
+		$this->assertEquals('gg', TokenizedMessage::fromString('duden2Gg')->classify());
+		$this->assertNotEquals('gg', TokenizedMessage::fromString('Eggnog')->classify());
 
 		$this->assertEquals('gl', TokenizedMessage::fromString('glhf')->classify());
 		$this->assertEquals('gl', TokenizedMessage::fromString('gl & hf')->classify());
 
 		$this->assertEquals('hi', TokenizedMessage::fromString('hi')->classify());
 		$this->assertEquals('hi', TokenizedMessage::fromString('hallo')->classify());
-		$this->assertEquals('hi', TokenizedMessage::fromString('osora9Hello')->classify());
+		$this->assertEquals('hi', TokenizedMessage::fromString('osora9Hello', ['emotes' => 'blah:0-10'])->classify());
 		$this->assertNotEquals('hi', TokenizedMessage::fromString('hier steht was')->classify());
 
 		$this->assertEquals('hype', TokenizedMessage::fromString('122 Hype!')->classify());
@@ -28,12 +32,28 @@ class TokenizedMessageTest extends TestCase {
 		$this->assertEquals('lol', TokenizedMessage::fromString('haha')->classify());
 		$this->assertEquals('lol', TokenizedMessage::fromString('KEKW')->classify());
 		$this->assertEquals('lol', TokenizedMessage::fromString('LUL')->classify());
+		$this->assertEquals('lol', TokenizedMessage::fromString(':D')->classify());
 
 		$this->assertEquals('o7', TokenizedMessage::fromString('o7')->classify());
+		$this->assertEquals('o7', TokenizedMessage::fromString('ticknaSalutieren', ['emotes' => 'blah:0-15'])->classify());
 
 		$this->assertEquals('pog', TokenizedMessage::fromString('Pog')->classify());
 
 		$this->assertEquals('unclassified', TokenizedMessage::fromString('')->classify());
 	}
 
+	public function test_spam() {
+		$this->assertTrue(TokenizedMessage::fromString('!start')->isSpammy());
+		$this->assertTrue(TokenizedMessage::fromString('@LocalhorstTV')->isSpammy());
+
+		$this->assertTrue(TokenizedMessage::fromString('just 50€')->isSpammy());
+		$this->assertTrue(TokenizedMessage::fromString('hello would you like some followers?')->isSpammy());
+		$this->assertTrue(TokenizedMessage::fromString('get view ers for free')->isSpammy());
+
+		$this->assertTrue(TokenizedMessage::fromString('also bitte, horstie')->isSpammy());
+
+		$this->assertTrue(TokenizedMessage::fromString('hey maengi, vielen dank für den raid')->isSpammy());
+		$this->assertTrue(TokenizedMessage::fromString('Willkommen auf Starbase 47')->isSpammy());
+	}
+
 }