From 4c72d4b8bec61eba5b3dc43df5eafd890e123d37 Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Fri, 10 May 2024 15:26:43 +0200 Subject: [PATCH] separate chatlib database generation --- app/Console/Commands/ChatlibDatabase.php | 37 +++++++-------- app/Console/Commands/ChatlibGenerate.php | 7 +++ app/Models/ChatLib.php | 59 +++++++++++++++--------- tests/Unit/Models/ChatLibTest.php | 28 +++++++++++ 4 files changed, 90 insertions(+), 41 deletions(-) create mode 100644 tests/Unit/Models/ChatLibTest.php diff --git a/app/Console/Commands/ChatlibDatabase.php b/app/Console/Commands/ChatlibDatabase.php index b701ad1..e492bb7 100644 --- a/app/Console/Commands/ChatlibDatabase.php +++ b/app/Console/Commands/ChatlibDatabase.php @@ -13,14 +13,14 @@ class ChatlibDatabase extends Command { * * @var string */ - protected $signature = 'chatlib:database'; + protected $signature = 'chatlib:database {which=de} {size=7}'; /** * The console command description. * * @var string */ - protected $description = 'Updates the ChatLib database'; + protected $description = 'Update a ChatLib database'; /** * Execute the console command. @@ -29,36 +29,37 @@ class ChatlibDatabase extends Command { */ public function handle() { $count = 0; + $start = time(); - $de = new ChatLib(); - $en = new ChatLib(); + $size = $this->argument('size'); + $lang = $this->argument('which'); + $db = new ChatLib($size); ChatLog::where('type', '=', 'chat') ->where('banned', '=', false) ->whereNotNull('evaluated_at') ->where('created_at', '<', now()->sub(7, 'day')) ->whereNotIn('classification', ['gg', 'gl', 'number', 'o7']) - ->whereRaw('LENGTH(`text_content`) > 12') - ->chunk(5000, function ($msgs) use (&$count, $de, $en) { + ->where(function ($query) use ($lang) { + $query->whereNull('detected_language'); + $query->orWhere('detected_language', '=', $lang); + }) + ->whereRaw('LENGTH(`text_content`) > 10') + ->chunk(5000, function ($msgs) use (&$count, $db) { foreach ($msgs as $msg) { - if ($msg->detected_language === 'de') { - $de->addMessage($msg); - } else if ($msg->detected_language === 'en') { - $en->addMessage($msg); - } else if (is_null($msg->detected_language)) { - $de->addMessage($msg); - $en->addMessage($msg); - } + $db->addMessage($msg); ++$count; } $this->line($count); }); - $de->compile(); - $de->saveAs('de'); + $db->compile(); + $db->saveAs($lang); - $en->compile(); - $en->saveAs('en'); + $this->line( + number_format(time() - $start, 0).'s '. + number_format(memory_get_usage() / 1024 / 1024, 3).'MB now '. + number_format(memory_get_peak_usage() / 1024 / 1024, 3).'MB peak'); return 0; } diff --git a/app/Console/Commands/ChatlibGenerate.php b/app/Console/Commands/ChatlibGenerate.php index 5ea85f6..04c9e41 100644 --- a/app/Console/Commands/ChatlibGenerate.php +++ b/app/Console/Commands/ChatlibGenerate.php @@ -27,8 +27,15 @@ class ChatlibGenerate extends Command { * @return int */ public function handle() { + + $start = microtime(true); + $this->line('loading database'); $db = new ChatLib(); $db->loadFrom($this->argument('which')); + $this->line( + number_format(microtime(true) - $start, 2).'s '. + number_format(memory_get_usage() / 1024 / 1024, 3).'MB now '. + number_format(memory_get_peak_usage() / 1024 / 1024, 3).'MB peak'); $amount = intval($this->argument('amount')); for ($i = 0; $i < $amount; ++$i) { diff --git a/app/Models/ChatLib.php b/app/Models/ChatLib.php index f4ab93f..a87c6a7 100644 --- a/app/Models/ChatLib.php +++ b/app/Models/ChatLib.php @@ -6,6 +6,20 @@ use Illuminate\Support\Facades\Storage; class ChatLib { + public function __construct($size = 7) { + $this->size = $size; + + $converted = []; + foreach ($this->categories as $category => $patterns) { + $converted_patterns = []; + foreach ($patterns as $pattern) { + $converted_patterns[] = '/\b'.$pattern.'\b/u'; + } + $converted['%'.strtoupper($category).'%'] = $converted_patterns; + } + $this->categories = $converted; + } + public function addMessage(ChatLog $msg) { $this->addText($msg->text_content); } @@ -67,19 +81,16 @@ class ChatLib { $result = []; $sum = 0; foreach ($arr as $key => $entry) { - $weight = $entry['count']; + $weight = $entry[0]; if ($weight == 1) continue; $lower = $sum; $sum += $weight; $examples = []; - if (is_array(end($entry['examples']))) { - // already processed - $examples = $entry['examples']; - } else if ($key === ' ') { + if ($key === ' ') { $examples = [[' ', 0, 1]]; } else { $subsum = 0; - foreach ($entry['examples'] as $example => $subweight) { + foreach ($entry[1] as $example => $subweight) { $sublower = $subsum; $subsum += $subweight; $examples[] = [$example, $sublower, $subsum]; @@ -106,14 +117,18 @@ class ChatLib { private function pick($options) { if (empty($options)) return null; - $max = end($options)[2]; + $max = end($options)[2] - 1; $num = random_int(0, $max); + return static::search($options, $num); + } + + public static function search($options, $num) { $min_index = 0; $max_index = count($options) - 1; while ($min_index < $max_index) { $cur_index = intval(($min_index + $max_index) / 2); $cur_low = $options[$cur_index][1]; - $cur_high = $options[$cur_index][2]; + $cur_high = $options[$cur_index][2] - 1; if ($cur_low > $num) { $max_index = $cur_index; } else if ($cur_high < $num) { @@ -138,16 +153,16 @@ class ChatLib { $generalized = $this->generalize([$token]); if (!isset($which[$generalized])) { $which[$generalized] = [ - 'count' => 1, - 'examples' => [], + 1, + [], ]; - $which[$generalized]['examples'][$token] = 1; + $which[$generalized][1][$token] = 1; } else { - ++$which[$generalized]['count']; - if (!isset($which[$generalized]['examples'][$token])) { - $which[$generalized]['examples'][$token] = 1; + ++$which[$generalized][0]; + if (!isset($which[$generalized][1][$token])) { + $which[$generalized][1][$token] = 1; } else { - ++$which[$generalized]['examples'][$token]; + ++$which[$generalized][1][$token]; } } } @@ -179,9 +194,7 @@ class ChatLib { $str .= $replaced; } foreach ($this->categories as $category => $patterns) { - foreach ($patterns as $pattern) { - $str = preg_replace('/\b'.$pattern.'\b/u', '%'.strtoupper($category).'%', $str); - } + $str = preg_replace($patterns, $category, $str); } return $str; } @@ -198,7 +211,7 @@ class ChatLib { 'chest' => ['kiste'], 'einen' => ['n', 'nen'], 'musik' => ['mukke'], - 'schade' => ['schad'], + 'schade' => ['schad', 'schaade'], ]; private $categories = [ @@ -355,7 +368,7 @@ class ChatLib { 'wuschlwave', ], - 'zelda_boss' => [ + 'zb' => [ 'aga(hnim)?', 'armos( knights)?', 'arrghus', @@ -371,7 +384,7 @@ class ChatLib { 'vit(reous|ty)', ], - 'zelda_dungeon' => [ + 'zd' => [ 'eastern', 'desert( palace)?', 'gt', @@ -386,7 +399,7 @@ class ChatLib { 'tt', ], - 'zelda_item' => [ + 'zi' => [ '(big|small|retro|generic) ?keys?', 'b[oö]gen', 'bombos', @@ -431,7 +444,7 @@ class ChatLib { 'sword', ], - 'zelda_location' => [ + 'zl' => [ 'big chest', 'bumper( cave)?( ledge)?', '(hyrule)? ?castle ?(tower)?', diff --git a/tests/Unit/Models/ChatLibTest.php b/tests/Unit/Models/ChatLibTest.php new file mode 100644 index 0000000..7289200 --- /dev/null +++ b/tests/Unit/Models/ChatLibTest.php @@ -0,0 +1,28 @@ +assertEquals('a', ChatLib::search($options, 0)[0]); + $this->assertEquals('a', ChatLib::search($options, 1)[0]); + $this->assertEquals('b', ChatLib::search($options, 2)[0]); + $this->assertEquals('c', ChatLib::search($options, 3)[0]); + $this->assertEquals('c', ChatLib::search($options, 4)[0]); + $this->assertEquals('c', ChatLib::search($options, 5)[0]); + + $this->assertEquals('a', ChatLib::search($options, -1)[0]); + $this->assertEquals('c', ChatLib::search($options, 6)[0]); + } + +} -- 2.39.2