]> git.localhorst.tv Git - alttp.git/commitdiff
further refine chat gen
authorDaniel Karbach <holy@localhorst.tv>
Wed, 8 May 2024 17:03:07 +0000 (19:03 +0200)
committerDaniel Karbach <holy@localhorst.tv>
Wed, 8 May 2024 17:03:07 +0000 (19:03 +0200)
app/Console/Commands/ChatlibDatabase.php
app/Models/ChatLib.php

index b95bb64a06a6dbec47b333c2c9248493e0c8492d..280d4a0c6351e29fd04a2ee0d31c4d0a68fe1712 100644 (file)
@@ -35,6 +35,8 @@ class ChatlibDatabase extends Command {
                        ->where('banned', '=', false)
                        ->whereNotNull('evaluated_at')
                        ->where('created_at', '<', now()->sub(7, 'day'))
+                       ->whereNotIn('classification', ['gg', 'gl', 'number', 'o7'])
+                       ->whereRaw('LENGTH(`text_content`) > 12')
                        ->chunk(5000, function ($msgs) use ($de, $en) {
                                foreach ($msgs as $msg) {
                                        if ($msg->detected_language === 'de') {
index c89ef941be5f4c2802eb89c92cc2be6dd7bcfd9d..3f1530b396b9b752652918c45d9f0a0fdea3d596 100644 (file)
@@ -22,7 +22,7 @@ class ChatLib {
                                $end = $num;
                                for ($i = $start; $i < $end; ++$i) {
                                        $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
-                                       if ($end - $i < 4) break;
+                                       if ($end - $i < 5) break;
                                }
                        }
                }
@@ -75,6 +75,8 @@ class ChatLib {
                        if (is_array(end($entry['examples']))) {
                                // already processed
                                $examples = $entry['examples'];
+                       } else if ($key === ' ') {
+                               $examples = [[' ', 0, 1]];
                        } else {
                                $subsum = 0;
                                foreach ($entry['examples'] as $example => $subweight) {
@@ -162,6 +164,8 @@ class ChatLib {
                $str = '';
                foreach ($tokens as $token) {
                        $replaced = preg_replace('/\d+/', '0', $token);
+                       $replaced = preg_replace('/\s+/', ' ', $token);
+                       $replaced = preg_replace('/(.)\1{2,}/', '$1', $token);
                        $replaced = strtolower($replaced);
                        $str .= $replaced;
                }
@@ -173,7 +177,7 @@ class ChatLib {
                return $example[0];
        }
 
-       private $size = 5;
+       private $size = 7;
        private $transitions = [];
 
 }