use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
+use Illuminate\Support\Arr;
+use Illuminate\Support\Str;
+use LanguageDetector\LanguageDetector;
class ChatLog extends Model {
}
if ($this->command == 'PRIVMSG') {
- if ($this->isKnownBot()) {
+ if (static::isKnownBot($this->nick)) {
$this->type = 'bot';
} else if (substr($this->params[0], 0, 1) == '#') {
$this->type = 'chat';
$this->type = 'dm';
}
$this->text_content = $this->params[1];
+ $this->detectLanguage();
if ($this->scanForSpam()) {
$this->banned = true;
}
+ $this->classification = static::classify($this->text_content);
return;
}
throw new \Exception('unidentified message');
}
- public function isKnownBot() {
- return in_array(strtolower($this->nick), [
+ public static function isKnownBot($nick) {
+ return in_array(strtolower($nick), [
+ 'birrellthesquirrel',
'funtoon',
+ 'nidbot2000',
'nightbot',
'pokemoncommunitygame',
+ 'speedgaming',
+ 'starbase47',
'streamelements',
'wizebot',
'zockerstuebchen',
]);
}
+ public static function classify($text) {
+ if (empty($text)) {
+ return 'unclassified';
+ }
+ if (is_numeric(trim($text))) {
+ return 'number';
+ }
+ $rawText = strtolower(preg_replace('/[^\w]/', '', $text));
+ $tokenizedText = preg_split('/\s+/', strtolower(trim($text)));
+ if (Str::startsWith($rawText, 'gg') || Str::endsWith($rawText, 'gg')) {
+ return 'gg';
+ }
+ if (Str::contains($rawText, ['glgl', 'glhf', 'hfgl'])) {
+ return 'gl';
+ }
+ if (Str::contains($rawText, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) {
+ return 'lol';
+ }
+ if (Str::startsWith($rawText, ['ahoi', 'hallo', 'hello', 'hi ', 'huhu']) || Str::endsWith($rawText, ['hi', 'wave'])) {
+ return 'hi';
+ }
+ if (Str::contains($rawText, ['pog', 'wow'])) {
+ return 'pog';
+ }
+ if (Str::contains($rawText, ['hype'])) {
+ return 'hype';
+ }
+ if (Str::startsWith($rawText, 'o7') || Str::endsWith($rawText, 'o7') || Str::contains($rawText, 'salut')) {
+ return 'o7';
+ }
+ return 'unclassified';
+ }
+
protected function evaluateUser() {
}
$this->channel()->associate($channel);
}
- protected function scanForSpam() {
- if (substr($this->text_content, 0, 1) == '!') {
+ protected function detectLanguage() {
+ $languages = ['de', 'en', 'es', 'fr'];
+ if (!is_null($this->channel)) {
+ $languages = array_values($this->channel->languages);
+ if (!in_array('en', $languages)) {
+ $languages[] = 'en';
+ }
+ }
+ $detector = LanguageDetector::detect($this->text_content, $languages);
+ $scores = $detector->getScores();
+ $lang = strval($detector->getLanguage());
+ //var_dump($scores, $lang, $this->text_content);
+ if (is_array($scores) && isset($scores[$lang]) && $scores[$lang] > 0.35) {
+ $this->detected_language = $lang;
+ }
+ }
+
+ public static function spammyText($raw_text) {
+ $text = strtolower($raw_text);
+ if (substr($text, 0, 1) == '!') {
return true;
}
- if (strpos($this->text_content, '$') !== false) {
+ if (strpos($text, '$') !== false) {
return true;
}
- if (strpos($this->text_content, '€') !== false) {
+ if (strpos($text, '€') !== false) {
return true;
}
- if (strpos($this->text_content, '@') !== false) {
+ if (strpos($text, '@') !== false) {
return true;
}
- if (strpos($this->text_content, '://') !== false) {
+ if (strpos($text, '://') !== false) {
return true;
}
- if (is_numeric($this->text_content)) {
+ if (strpos($text, 'followers') !== false) {
+ return true;
+ }
+ if (strpos($text, 'horstie') !== false) {
+ return true;
+ }
+ if (strpos($text, 'promotion') !== false) {
return true;
}
- if (strpos($this->text_content, 'followers') !== false) {
+ if (strpos($text, 'viewers') !== false) {
return true;
}
- if (strpos($this->text_content, 'promotion') !== false) {
+ if (strpos($text, 'view ers') !== false) {
return true;
}
- if (strpos($this->text_content, 'viewers') !== false) {
+ if (strpos($text, 'vielen dank für den raid') !== false) {
return true;
}
- if (strpos($this->text_content, 'view ers') !== false) {
+ if (strpos($text, 'willkommen auf starbase 47') !== false) {
return true;
}
return false;
}
+ protected function scanForSpam() {
+ if (is_numeric($this->text_content)) {
+ return true;
+ }
+ return static::spammyText($this->text_content);
+ }
+
protected $casts = [
'banned' => 'boolean',
'params' => 'array',