From cce68689529251915af11ade10699ffa74cb6a3b Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Sat, 20 Jan 2024 15:54:31 +0100 Subject: [PATCH] try to guess chat language --- app/Models/ChatLog.php | 19 +++++++ app/TwitchBot/TwitchChatBot.php | 6 ++- composer.json | 1 + composer.lock | 49 ++++++++++++++++++- ...24_01_20_142549_chat_detected_language.php | 33 +++++++++++++ 5 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 database/migrations/2024_01_20_142549_chat_detected_language.php diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index 6f72c35..8a936a8 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -4,6 +4,7 @@ namespace App\Models; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; +use LanguageDetector\LanguageDetector; class ChatLog extends Model { @@ -39,6 +40,7 @@ class ChatLog extends Model { $this->type = 'dm'; } $this->text_content = $this->params[1]; + $this->detectLanguage(); if ($this->scanForSpam()) { $this->banned = true; } @@ -75,6 +77,23 @@ class ChatLog extends Model { $this->channel()->associate($channel); } + protected function detectLanguage() { + $languages = ['de', 'en', 'es', 'fr']; + if (!is_null($this->channel)) { + $languages = array_values($this->channel->languages); + if (!in_array('en', $languages)) { + $languages[] = 'en'; + } + } + $detector = LanguageDetector::detect($this->text_content, $languages); + $scores = $detector->getScores(); + $lang = strval($detector->getLanguage()); + //var_dump($scores, $lang, $this->text_content); + if (is_array($scores) && isset($scores[$lang]) && $scores[$lang] > 0.35) { + $this->detected_language = $lang; + } + } + protected function scanForSpam() { if (substr($this->text_content, 0, 1) == '!') { return true; diff --git a/app/TwitchBot/TwitchChatBot.php b/app/TwitchBot/TwitchChatBot.php index 5864c5f..576bf04 100644 --- a/app/TwitchBot/TwitchChatBot.php +++ b/app/TwitchBot/TwitchChatBot.php @@ -80,6 +80,10 @@ class TwitchChatBot extends TwitchBot { $line = ChatLog::where('type', '=', 'chat') ->where('banned', '=', false) ->where('created_at', '<', now()->sub(1, 'day')) + ->where(function ($query) use ($channel) { + $query->whereNull('detected_language'); + $query->orWhereIn('detected_language', $channel->languages); + }) ->inRandomOrder() ->first(); return $line->text_content; @@ -90,7 +94,7 @@ class TwitchChatBot extends TwitchBot { } private function randomWaitTime(Channel $channel) { - return random_int(1, 1800); + return random_int(1, 900); } private function tagChannelRead(Channel $channel) { diff --git a/composer.json b/composer.json index 3c7d625..f564e6e 100644 --- a/composer.json +++ b/composer.json @@ -10,6 +10,7 @@ "doctrine/dbal": "^3.3", "guzzlehttp/guzzle": "^7.2", "jakyeru/larascord": "^3.0", + "landrok/language-detector": "^1.4", "laravel/breeze": "^1.4", "laravel/framework": "^9.2", "laravel/sanctum": "^2.14.1", diff --git a/composer.lock b/composer.lock index eb1e05e..37d5599 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "a370e7a3f5ba178c836caec3a7ac4879", + "content-hash": "ffe863b64722a40de9d8de5597d8c9b8", "packages": [ { "name": "beyondcode/laravel-websockets", @@ -1770,6 +1770,51 @@ }, "time": "2022-06-26T11:09:59+00:00" }, + { + "name": "landrok/language-detector", + "version": "1.4.0", + "source": { + "type": "git", + "url": "https://github.com/landrok/language-detector.git", + "reference": "91511a4f93700bd1c4c576b0e3b42173334a3cab" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/landrok/language-detector/zipball/91511a4f93700bd1c4c576b0e3b42173334a3cab", + "reference": "91511a4f93700bd1c4c576b0e3b42173334a3cab", + "shasum": "" + }, + "require": { + "ext-mbstring": "*", + "php": ">=7.4", + "webmozart/assert": "^1.2" + }, + "require-dev": { + "phpunit/phpunit": ">=6" + }, + "type": "library", + "autoload": { + "psr-4": { + "LanguageDetector\\": "src/LanguageDetector/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "A fast and reliable PHP library for detecting languages", + "homepage": "https://github.com/landrok/language-detector", + "keywords": [ + "detector", + "language", + "n-grams" + ], + "support": { + "issues": "https://github.com/landrok/language-detector/issues", + "source": "https://github.com/landrok/language-detector/tree/1.4.0" + }, + "time": "2023-12-18T21:52:42+00:00" + }, { "name": "laravel/breeze", "version": "v1.19.1", @@ -10448,5 +10493,5 @@ "php": "^8.0.2" }, "platform-dev": [], - "plugin-api-version": "2.0.0" + "plugin-api-version": "2.3.0" } diff --git a/database/migrations/2024_01_20_142549_chat_detected_language.php b/database/migrations/2024_01_20_142549_chat_detected_language.php new file mode 100644 index 0000000..75c4eb2 --- /dev/null +++ b/database/migrations/2024_01_20_142549_chat_detected_language.php @@ -0,0 +1,33 @@ +string('detected_language')->nullable()->default(null); + $table->index(['detected_language']); + }); + } + + /** + * Reverse the migrations. + * + * @return void + */ + public function down() + { + Schema::table('chat_logs', function (Blueprint $table) { + $table->dropColumn('detected_language'); + }); + } +}; -- 2.39.2