namespace App\Models;
+use App\TwitchBot\TokenizedMessage;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Support\Arr;
use Illuminate\Support\Str;
-use LanguageDetector\LanguageDetector;
+use LanguageDetection\Language;
class ChatLog extends Model {
return $this->belongsTo(User::class);
}
+ public function tokenize() {
+ return TokenizedMessage::fromLog($this);
+ }
+
+ public function isReply() {
+ return !empty($this->tags['reply-parent-msg-body']);
+ }
+
+ public function getReplyParent() {
+ return str_replace('\\s', ' ', $this->tags['reply-parent-msg-body']);
+ }
+
+ public function getReplyParentUser() {
+ return $this->tags['reply-parent-display-name'];
+ }
+
+ public function getText() {
+ return $this->params[1];
+ }
+
+ public function getTextWithoutEmotes() {
+ $text = $this->params[1];
+ if (isset($this->tags['emotes']) && !empty($this->tags['emotes'])) {
+ $emotes = explode('/', $this->tags['emotes']);
+ foreach ($emotes as $emote) {
+ $set = explode(':', $emote);
+ $positions = explode(',', $set[1]);
+ foreach ($positions as $position) {
+ $coords = explode('-', $position);
+ $text = mb_substr($text, 0, $coords[0]).str_repeat(' ', $coords[1] - $coords[0] + 1).mb_substr($text, $coords[1] + 1);
+ }
+ }
+ }
+ return trim(preg_replace('/\s+/', ' ', $text));
+ }
+
+ public function getTextWithoutReply() {
+ if ($this->isReply()) {
+ return mb_substr($this->params[1], mb_strlen($this->getReplyParentUser()) + 2);
+ }
+ return $this->params[1];
+ }
+
public function evaluate() {
$this->evaluateUser();
$this->evaluateChannel();
$this->type = 'self';
return;
}
+ if (!empty($this->params) && $this->params[0] == '#'.$this->nick) {
+ $this->type = 'owner';
+ return;
+ }
- if ($this->command == 'PRIVMSG') {
+ if ($this->command == 'PRIVMSG' || $this->command == 'WHISPER') {
if (static::isKnownBot($this->nick)) {
$this->type = 'bot';
} else if (substr($this->params[0], 0, 1) == '#') {
} else {
$this->type = 'dm';
}
- $this->text_content = $this->params[1];
+ $this->text_content = $this->getTextWithoutReply();
$this->detectLanguage();
- if ($this->scanForSpam()) {
+ $tokenized = $this->tokenize();
+ if ($tokenized->isSpammy()) {
$this->banned = true;
}
- $this->classification = static::classify($this->text_content);
+ $this->emote_only = $tokenized->isEmoteOnly();
+ $this->classification = $tokenized->classify();
return;
}
public static function isKnownBot($nick) {
return in_array(strtolower($nick), [
+ 'a_n_i_v',
'birrellthesquirrel',
+ 'brokkobot',
+ 'creatisbot',
+ 'fossabot',
'funtoon',
'nidbot2000',
'nightbot',
'pokemoncommunitygame',
+ 'sery_bot',
'speedgaming',
'starbase47',
'streamelements',
]);
}
- public static function classify($text) {
- if (empty($text)) {
- return 'unclassified';
- }
- if (is_numeric(trim($text))) {
- return 'number';
- }
- $rawText = strtolower(preg_replace('/[^\w]/', '', $text));
- $tokenizedText = preg_split('/\s+/', strtolower(trim($text)));
- if (Str::startsWith($rawText, 'gg') || Str::endsWith($rawText, 'gg')) {
- return 'gg';
- }
- if (Str::contains($rawText, ['glgl', 'glhf', 'hfgl'])) {
- return 'gl';
- }
- if (Str::contains($rawText, ['haha', 'hehe', 'hihi', 'kekw', 'lol', 'lul', 'xd'])) {
- return 'lol';
- }
- if (Str::startsWith($rawText, ['ahoi', 'hallo', 'hello', 'hi', 'huhu']) || Str::endsWith($rawText, ['hi', 'wave'])) {
- return 'hi';
- }
- if (Str::contains($rawText, ['pog', 'wow'])) {
- return 'pog';
- }
- if (Str::contains($rawText, ['hype'])) {
- return 'hype';
- }
- if (Str::startsWith($rawText, 'o7') || Str::endsWith($rawText, 'o7') || Str::contains($rawText, 'salut')) {
- return 'o7';
- }
- return 'unclassified';
- }
-
protected function evaluateUser() {
}
protected function evaluateChannel() {
if (empty($this->params)) {
- $this->channel()->associate(null);
return;
}
$cname = $this->params[0];
$cname = '#'.$cname;
}
$channel = Channel::firstWhere('twitch_chat', '=', $cname);
- $this->channel()->associate($channel);
+ if (!is_null($channel)) {
+ $this->channel()->associate($channel);
+ if (empty($this->twitch_category) && now()->sub(15, 'minute')->isBefore($this->created_at)) {
+ $this->twitch_category = $channel->twitch_category;
+ }
+ }
}
protected function detectLanguage() {
$languages[] = 'en';
}
}
- $detector = LanguageDetector::detect($this->text_content, $languages);
- $scores = $detector->getScores();
- $lang = strval($detector->getLanguage());
+ $detector = (new Language($languages))->detect($this->getTextWithoutEmotes());
+ $scores = $detector->close();
+ $lang = strval($detector);
//var_dump($scores, $lang, $this->text_content);
- if (is_array($scores) && isset($scores[$lang]) && $scores[$lang] > 0.35) {
+ if (!empty($lang) && $scores[$lang] > 0.4) {
$this->detected_language = $lang;
}
}
- public static function spammyText($text) {
- if (substr($text, 0, 1) == '!') {
- return true;
- }
- if (strpos($text, '$') !== false) {
- return true;
- }
- if (strpos($text, '€') !== false) {
- return true;
- }
- if (strpos($text, '@') !== false) {
- return true;
- }
- if (strpos($text, '://') !== false) {
- return true;
- }
- if (strpos($text, 'followers') !== false) {
- return true;
- }
- if (strpos($text, 'promotion') !== false) {
- return true;
- }
- if (strpos($text, 'viewers') !== false) {
- return true;
- }
- if (strpos($text, 'view ers') !== false) {
- return true;
- }
- return false;
- }
-
- protected function scanForSpam() {
- if (is_numeric($text)) {
- return true;
- }
- return static::spammyText($this->text_content);
- }
-
protected $casts = [
'banned' => 'boolean',
'params' => 'array',