public function __construct($text, $tags = []) {
$this->text = $text;
$this->tags = $tags;
- $this->raw = strtolower(preg_replace('/[^\w]/', '', $text));
+ $this->raw = strtolower(preg_replace('/[^\w]/u', '', $text));
$this->tokens = preg_split('/\s+/', strtolower(trim($text)));
$this->emoteless = $this->text;
$positions = explode(',', $set[1]);
foreach ($positions as $position) {
$coords = explode('-', $position);
- $this->emotes[] = substr($this->text, $coords[0], $coords[1] - $coords[0] + 1);
+ $this->emotes[] = strtolower(substr($this->text, $coords[0], $coords[1] - $coords[0] + 1));
for ($i = intval($coords[0]); $i <= intval($coords[1]); ++$i) {
$this->emoteless[$i] = ' ';
}
}
$this->emoteless = trim(preg_replace('/\s+/', ' ', $this->emoteless));
}
+ $this->emoteless_raw = strtolower(preg_replace('/[^\w]/', '', $this->emoteless));
$this->emoteless_tokens = preg_split('/\s+/', strtolower($this->emoteless));
}
}
+ public function contains($text) {
+ return Str::contains($this->text, $text);
+ }
+
+ public function containsRaw($text) {
+ return Str::contains($this->raw, $text);
+ }
+
+ public function endsWith($text) {
+ return Str::endsWith($this->text, $text);
+ }
+
+ public function endsWithRaw($text) {
+ return Str::endsWith($this->raw, $text);
+ }
+
public function getNumericValue() {
- return intval($this->raw);
+ return intval($this->text);
}
- public function isSpammy() {
- if (substr($this->raw, 0, 1) == '!') {
- return true;
+ public function hasEmote($text) {
+ return in_array($text, $this->emotes);
+ }
+
+ public function hasEmoteThatContains($text) {
+ foreach ($this->emotes as $emote) {
+ if (Str::contains($emote, $text)) {
+ return true;
+ }
}
- if (strpos($this->raw, '$') !== false) {
- return true;
+ return false;
+ }
+
+ public function hasEmoteThatEndsWith($text) {
+ foreach ($this->emotes as $emote) {
+ if (Str::endsWith($emote, $text)) {
+ return true;
+ }
}
- if (strpos($this->raw, '€') !== false) {
- return true;
+ return false;
+ }
+
+ public function hasEmoteThatStartsOrEndsWith($text) {
+ foreach ($this->emotes as $emote) {
+ if (Str::startsWith($emote, $text) || Str::endsWith($emote, $text)) {
+ return true;
+ }
}
- if (strpos($this->raw, '@') !== false) {
- return true;
+ return false;
+ }
+
+ public function hasEmoteThatStartsWith($text) {
+ foreach ($this->emotes as $emote) {
+ if (Str::startsWith($emote, $text)) {
+ return true;
+ }
}
- if (strpos($this->raw, '://') !== false) {
- return true;
+ return false;
+ }
+
+ public function hasToken($text) {
+ return in_array($text, $this->tokens);
+ }
+
+ public function hasTokenThatContains($text) {
+ foreach ($this->tokens as $token) {
+ if (Str::contains($token, $text)) {
+ return true;
+ }
}
- if (strpos($this->raw, 'followers') !== false) {
- return true;
+ return false;
+ }
+
+ public function hasTokenThatEndsWith($text) {
+ foreach ($this->tokens as $token) {
+ if (Str::endsWith($token, $text)) {
+ return true;
+ }
}
- if (strpos($this->raw, 'horstie') !== false) {
- return true;
+ return false;
+ }
+
+ public function hasTokenThatStartsOrEndsWith($text) {
+ foreach ($this->tokens as $token) {
+ if (Str::startsWith($token, $text) || Str::endsWith($token, $text)) {
+ return true;
+ }
}
- if (strpos($this->raw, 'promotion') !== false) {
+ return false;
+ }
+
+ public function hasTokenThatStartsWith($text) {
+ foreach ($this->tokens as $token) {
+ if (Str::startsWith($token, $text)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public function startsOrEndsWith($text) {
+ return $this->startsWith($text) || $this->endsWith($text);
+ }
+
+ public function startsOrEndsWithRaw($text) {
+ return $this->startsWithRaw($text) || $this->endsWithRaw($text);
+ }
+
+ public function startsWith($text) {
+ return Str::startsWith($this->text, $text);
+ }
+
+ public function startsWithRaw($text) {
+ return Str::startsWith($this->raw, $text);
+ }
+
+
+ public function isSpammy() {
+ if ($this->startsWith('!')) {
return true;
}
- if (strpos($this->raw, 'viewers') !== false) {
+ if ($this->contains(['€', '$', '@', '://'])) {
return true;
}
- if (strpos($this->raw, 'view ers') !== false) {
+ if ($this->containsRaw(['followers', 'promotion', 'viewers'])) {
return true;
}
- if (strpos($this->raw, 'vielen dank für den raid') !== false) {
+ if ($this->containsRaw('horstie')) {
return true;
}
- if (strpos($this->raw, 'willkommen auf starbase 47') !== false) {
+ if ($this->containsRaw(['vielendankfürdenraid', 'willkommenaufstarbase47'])) {
return true;
}
return false;
$this->classification = 'unclassified';
} else if (is_numeric($this->raw)) {
$this->classification = 'number';
- } else if (Str::startsWith($this->raw, 'gg') || Str::endsWith($this->raw, 'gg')) {
+ } else if ($this->hasTokenThatStartsOrEndsWith(['gg'])) {
$this->classification = 'gg';
- } else if (Str::contains($this->raw, ['glgl', 'glhf', 'hfgl'])) {
+ } else if ($this->containsRaw(['glgl', 'glhf', 'hfgl'])) {
$this->classification = 'gl';
- } else if (Str::contains($this->raw, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) {
+ } else if ($this->containsRaw(['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd']) || $this->hasTokenThatStartsWith(':d')) {
$this->classification = 'lol';
- } else if (Str::startsWith($this->raw, ['ahoi', 'hallo', 'hello', 'hi ', 'huhu']) || Str::endsWith($this->raw, ['hello', 'hi', 'wave'])) {
+ } else if ($this->startsWithRaw(['ahoi', 'hallo', 'hello', 'huhu']) || $this->hasEmoteThatEndsWith(['hello', 'hi', 'wave']) || $this->hasToken('hi')) {
$this->classification = 'hi';
- } else if (Str::contains($this->raw, ['pog', 'wow'])) {
+ } else if ($this->containsRaw(['pog', 'wow'])) {
$this->classification = 'pog';
- } else if (Str::contains($this->raw, ['hype'])) {
+ } else if ($this->containsRaw(['hype'])) {
$this->classification = 'hype';
- } else if (Str::startsWith($this->raw, 'o7') || Str::endsWith($this->raw, 'o7') || Str::contains($this->raw, 'salut')) {
+ } else if ($this->startsOrEndsWithRaw(['o7']) || $this->hasEmoteThatContains('salut')) {
$this->classification = 'o7';
} else {
$this->classification = 'unclassified';
private $emotes = [];
private $emoteless = '';
+ private $emoteless_raw = '';
private $emoteless_tokens = [];
private $classification = null;
$this->assertEquals('gg', TokenizedMessage::fromString('gg')->classify());
$this->assertEquals('gg', TokenizedMessage::fromString('GG')->classify());
$this->assertEquals('gg', TokenizedMessage::fromString('Gg')->classify());
+ $this->assertEquals('gg', TokenizedMessage::fromString('ggs')->classify());
+ $this->assertEquals('gg', TokenizedMessage::fromString('ja gg dann, ne')->classify());
+ $this->assertEquals('gg', TokenizedMessage::fromString('duden2Gg')->classify());
+ $this->assertNotEquals('gg', TokenizedMessage::fromString('Eggnog')->classify());
$this->assertEquals('gl', TokenizedMessage::fromString('glhf')->classify());
$this->assertEquals('gl', TokenizedMessage::fromString('gl & hf')->classify());
$this->assertEquals('hi', TokenizedMessage::fromString('hi')->classify());
$this->assertEquals('hi', TokenizedMessage::fromString('hallo')->classify());
- $this->assertEquals('hi', TokenizedMessage::fromString('osora9Hello')->classify());
+ $this->assertEquals('hi', TokenizedMessage::fromString('osora9Hello', ['emotes' => 'blah:0-10'])->classify());
$this->assertNotEquals('hi', TokenizedMessage::fromString('hier steht was')->classify());
$this->assertEquals('hype', TokenizedMessage::fromString('122 Hype!')->classify());
$this->assertEquals('lol', TokenizedMessage::fromString('haha')->classify());
$this->assertEquals('lol', TokenizedMessage::fromString('KEKW')->classify());
$this->assertEquals('lol', TokenizedMessage::fromString('LUL')->classify());
+ $this->assertEquals('lol', TokenizedMessage::fromString(':D')->classify());
$this->assertEquals('o7', TokenizedMessage::fromString('o7')->classify());
+ $this->assertEquals('o7', TokenizedMessage::fromString('ticknaSalutieren', ['emotes' => 'blah:0-15'])->classify());
$this->assertEquals('pog', TokenizedMessage::fromString('Pog')->classify());
$this->assertEquals('unclassified', TokenizedMessage::fromString('')->classify());
}
+ public function test_spam() {
+ $this->assertTrue(TokenizedMessage::fromString('!start')->isSpammy());
+ $this->assertTrue(TokenizedMessage::fromString('@LocalhorstTV')->isSpammy());
+
+ $this->assertTrue(TokenizedMessage::fromString('just 50€')->isSpammy());
+ $this->assertTrue(TokenizedMessage::fromString('hello would you like some followers?')->isSpammy());
+ $this->assertTrue(TokenizedMessage::fromString('get view ers for free')->isSpammy());
+
+ $this->assertTrue(TokenizedMessage::fromString('also bitte, horstie')->isSpammy());
+
+ $this->assertTrue(TokenizedMessage::fromString('hey maengi, vielen dank für den raid')->isSpammy());
+ $this->assertTrue(TokenizedMessage::fromString('Willkommen auf Starbase 47')->isSpammy());
+ }
+
}