]> git.localhorst.tv Git - alttp.git/commitdiff
separate chatlib database generation
authorDaniel Karbach <daniel.karbach@localhorst.tv>
Fri, 10 May 2024 13:26:43 +0000 (15:26 +0200)
committerDaniel Karbach <daniel.karbach@localhorst.tv>
Fri, 10 May 2024 13:26:43 +0000 (15:26 +0200)
app/Console/Commands/ChatlibDatabase.php
app/Console/Commands/ChatlibGenerate.php
app/Models/ChatLib.php
tests/Unit/Models/ChatLibTest.php [new file with mode: 0644]

index b701ad144b3d0aed3abc26cb481813e4347b0b4e..e492bb7c1169160fafea1c9a938aec7f5b0c8b86 100644 (file)
@@ -13,14 +13,14 @@ class ChatlibDatabase extends Command {
         *
         * @var string
         */
-       protected $signature = 'chatlib:database';
+       protected $signature = 'chatlib:database {which=de} {size=7}';
 
        /**
         * The console command description.
         *
         * @var string
         */
-       protected $description = 'Updates the ChatLib database';
+       protected $description = 'Update a ChatLib database';
 
        /**
         * Execute the console command.
@@ -29,36 +29,37 @@ class ChatlibDatabase extends Command {
         */
        public function handle() {
                $count = 0;
+               $start = time();
 
-               $de = new ChatLib();
-               $en = new ChatLib();
+               $size = $this->argument('size');
+               $lang = $this->argument('which');
+               $db = new ChatLib($size);
 
                ChatLog::where('type', '=', 'chat')
                        ->where('banned', '=', false)
                        ->whereNotNull('evaluated_at')
                        ->where('created_at', '<', now()->sub(7, 'day'))
                        ->whereNotIn('classification', ['gg', 'gl', 'number', 'o7'])
-                       ->whereRaw('LENGTH(`text_content`) > 12')
-                       ->chunk(5000, function ($msgs) use (&$count, $de, $en) {
+                       ->where(function ($query) use ($lang) {
+                               $query->whereNull('detected_language');
+                               $query->orWhere('detected_language', '=', $lang);
+                       })
+                       ->whereRaw('LENGTH(`text_content`) > 10')
+                       ->chunk(5000, function ($msgs) use (&$count, $db) {
                                foreach ($msgs as $msg) {
-                                       if ($msg->detected_language === 'de') {
-                                               $de->addMessage($msg);
-                                       } else if ($msg->detected_language === 'en') {
-                                               $en->addMessage($msg);
-                                       } else if (is_null($msg->detected_language)) {
-                                               $de->addMessage($msg);
-                                               $en->addMessage($msg);
-                                       }
+                                       $db->addMessage($msg);
                                        ++$count;
                                }
                                $this->line($count);
                        });
 
-               $de->compile();
-               $de->saveAs('de');
+               $db->compile();
+               $db->saveAs($lang);
 
-               $en->compile();
-               $en->saveAs('en');
+               $this->line(
+                       number_format(time() - $start, 0).'s '.
+                       number_format(memory_get_usage() / 1024 / 1024, 3).'MB now '.
+                       number_format(memory_get_peak_usage() / 1024 / 1024, 3).'MB peak');
 
                return 0;
        }
index 5ea85f6838bcc19a326c9dbb8cc88eb626ed5ac3..04c9e41b63a07887d4f7b78a753807995f2415d5 100644 (file)
@@ -27,8 +27,15 @@ class ChatlibGenerate extends Command {
         * @return int
         */
        public function handle() {
+
+               $start = microtime(true);
+               $this->line('loading database');
                $db = new ChatLib();
                $db->loadFrom($this->argument('which'));
+               $this->line(
+                       number_format(microtime(true) - $start, 2).'s '.
+                       number_format(memory_get_usage() / 1024 / 1024, 3).'MB now '.
+                       number_format(memory_get_peak_usage() / 1024 / 1024, 3).'MB peak');
 
                $amount = intval($this->argument('amount'));
                for ($i = 0; $i < $amount; ++$i) {
index f4ab93f4798b28ebc87383b94f68e75728bbc7c9..a87c6a7e0b04978e93b78b06ff028af782f9d052 100644 (file)
@@ -6,6 +6,20 @@ use Illuminate\Support\Facades\Storage;
 
 class ChatLib {
 
+       public function __construct($size = 7) {
+               $this->size = $size;
+
+               $converted = [];
+               foreach ($this->categories as $category => $patterns) {
+                       $converted_patterns = [];
+                       foreach ($patterns as $pattern) {
+                               $converted_patterns[] = '/\b'.$pattern.'\b/u';
+                       }
+                       $converted['%'.strtoupper($category).'%'] = $converted_patterns;
+               }
+               $this->categories = $converted;
+       }
+
        public function addMessage(ChatLog $msg) {
                $this->addText($msg->text_content);
        }
@@ -67,19 +81,16 @@ class ChatLib {
                $result = [];
                $sum = 0;
                foreach ($arr as $key => $entry) {
-                       $weight = $entry['count'];
+                       $weight = $entry[0];
                        if ($weight == 1) continue;
                        $lower = $sum;
                        $sum += $weight;
                        $examples = [];
-                       if (is_array(end($entry['examples']))) {
-                               // already processed
-                               $examples = $entry['examples'];
-                       } else if ($key === ' ') {
+                       if ($key === ' ') {
                                $examples = [[' ', 0, 1]];
                        } else {
                                $subsum = 0;
-                               foreach ($entry['examples'] as $example => $subweight) {
+                               foreach ($entry[1] as $example => $subweight) {
                                        $sublower = $subsum;
                                        $subsum += $subweight;
                                        $examples[] = [$example, $sublower, $subsum];
@@ -106,14 +117,18 @@ class ChatLib {
 
        private function pick($options) {
                if (empty($options)) return null;
-               $max = end($options)[2];
+               $max = end($options)[2] - 1;
                $num = random_int(0, $max);
+               return static::search($options, $num);
+       }
+
+       public static function search($options, $num) {
                $min_index = 0;
                $max_index = count($options) - 1;
                while ($min_index < $max_index) {
                        $cur_index = intval(($min_index + $max_index) / 2);
                        $cur_low = $options[$cur_index][1];
-                       $cur_high = $options[$cur_index][2];
+                       $cur_high = $options[$cur_index][2] - 1;
                        if ($cur_low > $num) {
                                $max_index = $cur_index;
                        } else if ($cur_high < $num) {
@@ -138,16 +153,16 @@ class ChatLib {
                $generalized = $this->generalize([$token]);
                if (!isset($which[$generalized])) {
                        $which[$generalized] = [
-                               'count' => 1,
-                               'examples' => [],
+                               1,
+                               [],
                        ];
-                       $which[$generalized]['examples'][$token] = 1;
+                       $which[$generalized][1][$token] = 1;
                } else {
-                       ++$which[$generalized]['count'];
-                       if (!isset($which[$generalized]['examples'][$token])) {
-                               $which[$generalized]['examples'][$token] = 1;
+                       ++$which[$generalized][0];
+                       if (!isset($which[$generalized][1][$token])) {
+                               $which[$generalized][1][$token] = 1;
                        } else {
-                               ++$which[$generalized]['examples'][$token];
+                               ++$which[$generalized][1][$token];
                        }
                }
        }
@@ -179,9 +194,7 @@ class ChatLib {
                        $str .= $replaced;
                }
                foreach ($this->categories as $category => $patterns) {
-                       foreach ($patterns as $pattern) {
-                               $str = preg_replace('/\b'.$pattern.'\b/u', '%'.strtoupper($category).'%', $str);
-                       }
+                       $str = preg_replace($patterns, $category, $str);
                }
                return $str;
        }
@@ -198,7 +211,7 @@ class ChatLib {
                'chest' => ['kiste'],
                'einen' => ['n', 'nen'],
                'musik' => ['mukke'],
-               'schade' => ['schad'],
+               'schade' => ['schad', 'schaade'],
        ];
 
        private $categories = [
@@ -355,7 +368,7 @@ class ChatLib {
                        'wuschlwave',
                ],
 
-               'zelda_boss' => [
+               'zb' => [
                        'aga(hnim)?',
                        'armos( knights)?',
                        'arrghus',
@@ -371,7 +384,7 @@ class ChatLib {
                        'vit(reous|ty)',
                ],
 
-               'zelda_dungeon' => [
+               'zd' => [
                        'eastern',
                        'desert( palace)?',
                        'gt',
@@ -386,7 +399,7 @@ class ChatLib {
                        'tt',
                ],
 
-               'zelda_item' => [
+               'zi' => [
                        '(big|small|retro|generic) ?keys?',
                        'b[oö]gen',
                        'bombos',
@@ -431,7 +444,7 @@ class ChatLib {
                        'sword',
                ],
 
-               'zelda_location' => [
+               'zl' => [
                        'big chest',
                        'bumper( cave)?( ledge)?',
                        '(hyrule)? ?castle ?(tower)?',
diff --git a/tests/Unit/Models/ChatLibTest.php b/tests/Unit/Models/ChatLibTest.php
new file mode 100644 (file)
index 0000000..7289200
--- /dev/null
@@ -0,0 +1,28 @@
+<?php
+
+namespace Tests\Unit\Models;
+
+use App\Models\ChatLib;
+use PHPUnit\Framework\TestCase;
+
+class ChatLibTest extends TestCase {
+
+       public function test_binary_search() {
+               $options = [
+                       ['a', 0, 2],
+                       ['b', 2, 3],
+                       ['c', 3, 6],
+               ];
+
+               $this->assertEquals('a', ChatLib::search($options, 0)[0]);
+               $this->assertEquals('a', ChatLib::search($options, 1)[0]);
+               $this->assertEquals('b', ChatLib::search($options, 2)[0]);
+               $this->assertEquals('c', ChatLib::search($options, 3)[0]);
+               $this->assertEquals('c', ChatLib::search($options, 4)[0]);
+               $this->assertEquals('c', ChatLib::search($options, 5)[0]);
+
+               $this->assertEquals('a', ChatLib::search($options, -1)[0]);
+               $this->assertEquals('c', ChatLib::search($options, 6)[0]);
+       }
+
+}