From: Daniel Karbach <daniel.karbach@localhorst.tv>
Date: Sat, 20 Jan 2024 14:54:31 +0000 (+0100)
Subject: try to guess chat language
X-Git-Url: https://git.localhorst.tv/?a=commitdiff_plain;h=cce68689529251915af11ade10699ffa74cb6a3b;p=alttp.git

try to guess chat language
---

diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php
index 6f72c35..8a936a8 100644
--- a/app/Models/ChatLog.php
+++ b/app/Models/ChatLog.php
@@ -4,6 +4,7 @@ namespace App\Models;
 
 use Illuminate\Database\Eloquent\Factories\HasFactory;
 use Illuminate\Database\Eloquent\Model;
+use LanguageDetector\LanguageDetector;
 
 class ChatLog extends Model {
 
@@ -39,6 +40,7 @@ class ChatLog extends Model {
 				$this->type = 'dm';
 			}
 			$this->text_content = $this->params[1];
+			$this->detectLanguage();
 			if ($this->scanForSpam()) {
 				$this->banned = true;
 			}
@@ -75,6 +77,23 @@ class ChatLog extends Model {
 		$this->channel()->associate($channel);
 	}
 
+	protected function detectLanguage() {
+		$languages = ['de', 'en', 'es', 'fr'];
+		if (!is_null($this->channel)) {
+			$languages = array_values($this->channel->languages);
+			if (!in_array('en', $languages)) {
+				$languages[] = 'en';
+			}
+		}
+		$detector = LanguageDetector::detect($this->text_content, $languages);
+		$scores = $detector->getScores();
+		$lang = strval($detector->getLanguage());
+		//var_dump($scores, $lang, $this->text_content);
+		if (is_array($scores) && isset($scores[$lang]) && $scores[$lang] > 0.35) {
+			$this->detected_language = $lang;
+		}
+	}
+
 	protected function scanForSpam() {
 		if (substr($this->text_content, 0, 1) == '!') {
 			return true;
diff --git a/app/TwitchBot/TwitchChatBot.php b/app/TwitchBot/TwitchChatBot.php
index 5864c5f..576bf04 100644
--- a/app/TwitchBot/TwitchChatBot.php
+++ b/app/TwitchBot/TwitchChatBot.php
@@ -80,6 +80,10 @@ class TwitchChatBot extends TwitchBot {
 		$line = ChatLog::where('type', '=', 'chat')
 			->where('banned', '=', false)
 			->where('created_at', '<', now()->sub(1, 'day'))
+			->where(function ($query) use ($channel) {
+				$query->whereNull('detected_language');
+				$query->orWhereIn('detected_language', $channel->languages);
+			})
 			->inRandomOrder()
 			->first();
 		return $line->text_content;
@@ -90,7 +94,7 @@ class TwitchChatBot extends TwitchBot {
 	}
 
 	private function randomWaitTime(Channel $channel) {
-		return random_int(1, 1800);
+		return random_int(1, 900);
 	}
 
 	private function tagChannelRead(Channel $channel) {
diff --git a/composer.json b/composer.json
index 3c7d625..f564e6e 100644
--- a/composer.json
+++ b/composer.json
@@ -10,6 +10,7 @@
         "doctrine/dbal": "^3.3",
         "guzzlehttp/guzzle": "^7.2",
         "jakyeru/larascord": "^3.0",
+        "landrok/language-detector": "^1.4",
         "laravel/breeze": "^1.4",
         "laravel/framework": "^9.2",
         "laravel/sanctum": "^2.14.1",
diff --git a/composer.lock b/composer.lock
index eb1e05e..37d5599 100644
--- a/composer.lock
+++ b/composer.lock
@@ -4,7 +4,7 @@
         "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
         "This file is @generated automatically"
     ],
-    "content-hash": "a370e7a3f5ba178c836caec3a7ac4879",
+    "content-hash": "ffe863b64722a40de9d8de5597d8c9b8",
     "packages": [
         {
             "name": "beyondcode/laravel-websockets",
@@ -1770,6 +1770,51 @@
             },
             "time": "2022-06-26T11:09:59+00:00"
         },
+        {
+            "name": "landrok/language-detector",
+            "version": "1.4.0",
+            "source": {
+                "type": "git",
+                "url": "https://github.com/landrok/language-detector.git",
+                "reference": "91511a4f93700bd1c4c576b0e3b42173334a3cab"
+            },
+            "dist": {
+                "type": "zip",
+                "url": "https://api.github.com/repos/landrok/language-detector/zipball/91511a4f93700bd1c4c576b0e3b42173334a3cab",
+                "reference": "91511a4f93700bd1c4c576b0e3b42173334a3cab",
+                "shasum": ""
+            },
+            "require": {
+                "ext-mbstring": "*",
+                "php": ">=7.4",
+                "webmozart/assert": "^1.2"
+            },
+            "require-dev": {
+                "phpunit/phpunit": ">=6"
+            },
+            "type": "library",
+            "autoload": {
+                "psr-4": {
+                    "LanguageDetector\\": "src/LanguageDetector/"
+                }
+            },
+            "notification-url": "https://packagist.org/downloads/",
+            "license": [
+                "MIT"
+            ],
+            "description": "A fast and reliable PHP library for detecting languages",
+            "homepage": "https://github.com/landrok/language-detector",
+            "keywords": [
+                "detector",
+                "language",
+                "n-grams"
+            ],
+            "support": {
+                "issues": "https://github.com/landrok/language-detector/issues",
+                "source": "https://github.com/landrok/language-detector/tree/1.4.0"
+            },
+            "time": "2023-12-18T21:52:42+00:00"
+        },
         {
             "name": "laravel/breeze",
             "version": "v1.19.1",
@@ -10448,5 +10493,5 @@
         "php": "^8.0.2"
     },
     "platform-dev": [],
-    "plugin-api-version": "2.0.0"
+    "plugin-api-version": "2.3.0"
 }
diff --git a/database/migrations/2024_01_20_142549_chat_detected_language.php b/database/migrations/2024_01_20_142549_chat_detected_language.php
new file mode 100644
index 0000000..75c4eb2
--- /dev/null
+++ b/database/migrations/2024_01_20_142549_chat_detected_language.php
@@ -0,0 +1,33 @@
+<?php
+
+use Illuminate\Database\Migrations\Migration;
+use Illuminate\Database\Schema\Blueprint;
+use Illuminate\Support\Facades\Schema;
+
+return new class extends Migration
+{
+	/**
+	 * Run the migrations.
+	 *
+	 * @return void
+	 */
+	public function up()
+	{
+		Schema::table('chat_logs', function (Blueprint $table) {
+			$table->string('detected_language')->nullable()->default(null);
+			$table->index(['detected_language']);
+		});
+	}
+
+	/**
+	 * Reverse the migrations.
+	 *
+	 * @return void
+	 */
+	public function down()
+	{
+		Schema::table('chat_logs', function (Blueprint $table) {
+			$table->dropColumn('detected_language');
+		});
+	}
+};