From 5b1693c80ed72de514e40d5c364dc1ab14d84bff Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Sun, 7 Apr 2024 12:56:06 +0200 Subject: [PATCH] switch to (slightly) better language detection model --- app/Models/ChatLog.php | 10 ++--- composer.json | 2 +- composer.lock | 98 ++++++++++++++++++++++-------------------- 3 files changed, 58 insertions(+), 52 deletions(-) diff --git a/app/Models/ChatLog.php b/app/Models/ChatLog.php index 792e9e7..e17076a 100644 --- a/app/Models/ChatLog.php +++ b/app/Models/ChatLog.php @@ -6,7 +6,7 @@ use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; use Illuminate\Support\Arr; use Illuminate\Support\Str; -use LanguageDetector\LanguageDetector; +use LanguageDetection\Language; class ChatLog extends Model { @@ -125,11 +125,11 @@ class ChatLog extends Model { $languages[] = 'en'; } } - $detector = LanguageDetector::detect($this->text_content, $languages); - $scores = $detector->getScores(); - $lang = strval($detector->getLanguage()); + $detector = (new Language($languages))->detect($this->text_content); + $scores = $detector->close(); + $lang = strval($detector); //var_dump($scores, $lang, $this->text_content); - if (is_array($scores) && isset($scores[$lang]) && $scores[$lang] > 0.35) { + if (!empty($lang) && $scores[$lang] > 0.4) { $this->detected_language = $lang; } } diff --git a/composer.json b/composer.json index e84394e..5075d26 100644 --- a/composer.json +++ b/composer.json @@ -10,12 +10,12 @@ "doctrine/dbal": "^3.3", "guzzlehttp/guzzle": "^7.2", "jakyeru/larascord": "^3.0", - "landrok/language-detector": "^1.4", "laravel/breeze": "^1.4", "laravel/framework": "^10.0", "laravel/sanctum": "^3.2", "laravel/tinker": "^2.7", "laravel/ui": "^4.0", + "patrickschur/language-detection": "^5.3", "team-reflex/discord-php": "^7.0", "z3/enemizer_linux": "6.1.0.180" }, diff --git a/composer.lock b/composer.lock index a460b99..01da470 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "95303f143cf7ff4e29c178f14289292a", + "content-hash": "9f3035a68ea28a4f87fd5c8433bb5464", "packages": [ { "name": "beyondcode/laravel-websockets", @@ -1842,51 +1842,6 @@ }, "time": "2022-06-26T11:09:59+00:00" }, - { - "name": "landrok/language-detector", - "version": "1.4.0", - "source": { - "type": "git", - "url": "https://github.com/landrok/language-detector.git", - "reference": "91511a4f93700bd1c4c576b0e3b42173334a3cab" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/landrok/language-detector/zipball/91511a4f93700bd1c4c576b0e3b42173334a3cab", - "reference": "91511a4f93700bd1c4c576b0e3b42173334a3cab", - "shasum": "" - }, - "require": { - "ext-mbstring": "*", - "php": ">=7.4", - "webmozart/assert": "^1.2" - }, - "require-dev": { - "phpunit/phpunit": ">=6" - }, - "type": "library", - "autoload": { - "psr-4": { - "LanguageDetector\\": "src/LanguageDetector/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "description": "A fast and reliable PHP library for detecting languages", - "homepage": "https://github.com/landrok/language-detector", - "keywords": [ - "detector", - "language", - "n-grams" - ], - "support": { - "issues": "https://github.com/landrok/language-detector/issues", - "source": "https://github.com/landrok/language-detector/tree/1.4.0" - }, - "time": "2023-12-18T21:52:42+00:00" - }, { "name": "laravel/breeze", "version": "v1.29.1", @@ -3544,6 +3499,57 @@ }, "time": "2024-04-05T21:00:10+00:00" }, + { + "name": "patrickschur/language-detection", + "version": "v5.3.0", + "source": { + "type": "git", + "url": "https://github.com/patrickschur/language-detection.git", + "reference": "b8da335336c09fa6814fe0ca0d6d506c357cd7b9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/patrickschur/language-detection/zipball/b8da335336c09fa6814fe0ca0d6d506c357cd7b9", + "reference": "b8da335336c09fa6814fe0ca0d6d506c357cd7b9", + "shasum": "" + }, + "require": { + "ext-json": "*", + "ext-mbstring": "*", + "php": "^7.4 || ^8.0" + }, + "require-dev": { + "phpunit/phpunit": "^9.5.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "LanguageDetection\\": "src/LanguageDetection" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Patrick Schur", + "email": "patrick_schur@outlook.de" + } + ], + "description": "A language detection library for PHP. Detects the language from a given text string.", + "homepage": "https://github.com/patrickschur/language-detection", + "keywords": [ + "detect", + "detection", + "language" + ], + "support": { + "issues": "https://github.com/patrickschur/language-detection/issues", + "source": "https://github.com/patrickschur/language-detection/tree/v5.3.0" + }, + "time": "2023-08-18T22:46:39+00:00" + }, { "name": "phpoption/phpoption", "version": "1.9.2", -- 2.39.2