*
* @var string
*/
- protected $signature = 'chatlib:database';
+ protected $signature = 'chatlib:database {which=de} {size=7}';
/**
* The console command description.
*
* @var string
*/
- protected $description = 'Updates the ChatLib database';
+ protected $description = 'Update a ChatLib database';
/**
* Execute the console command.
*/
public function handle() {
$count = 0;
+ $start = time();
- $de = new ChatLib();
- $en = new ChatLib();
+ $size = $this->argument('size');
+ $lang = $this->argument('which');
+ $db = new ChatLib($size);
ChatLog::where('type', '=', 'chat')
->where('banned', '=', false)
->whereNotNull('evaluated_at')
->where('created_at', '<', now()->sub(7, 'day'))
->whereNotIn('classification', ['gg', 'gl', 'number', 'o7'])
- ->whereRaw('LENGTH(`text_content`) > 12')
- ->chunk(5000, function ($msgs) use (&$count, $de, $en) {
+ ->where(function ($query) use ($lang) {
+ $query->whereNull('detected_language');
+ $query->orWhere('detected_language', '=', $lang);
+ })
+ ->whereRaw('LENGTH(`text_content`) > 10')
+ ->chunk(5000, function ($msgs) use (&$count, $db) {
foreach ($msgs as $msg) {
- if ($msg->detected_language === 'de') {
- $de->addMessage($msg);
- } else if ($msg->detected_language === 'en') {
- $en->addMessage($msg);
- } else if (is_null($msg->detected_language)) {
- $de->addMessage($msg);
- $en->addMessage($msg);
- }
+ $db->addMessage($msg);
++$count;
}
$this->line($count);
});
- $de->compile();
- $de->saveAs('de');
+ $db->compile();
+ $db->saveAs($lang);
- $en->compile();
- $en->saveAs('en');
+ $this->line(
+ number_format(time() - $start, 0).'s '.
+ number_format(memory_get_usage() / 1024 / 1024, 3).'MB now '.
+ number_format(memory_get_peak_usage() / 1024 / 1024, 3).'MB peak');
return 0;
}
* @return int
*/
public function handle() {
+
+ $start = microtime(true);
+ $this->line('loading database');
$db = new ChatLib();
$db->loadFrom($this->argument('which'));
+ $this->line(
+ number_format(microtime(true) - $start, 2).'s '.
+ number_format(memory_get_usage() / 1024 / 1024, 3).'MB now '.
+ number_format(memory_get_peak_usage() / 1024 / 1024, 3).'MB peak');
$amount = intval($this->argument('amount'));
for ($i = 0; $i < $amount; ++$i) {
class ChatLib {
+ public function __construct($size = 7) {
+ $this->size = $size;
+
+ $converted = [];
+ foreach ($this->categories as $category => $patterns) {
+ $converted_patterns = [];
+ foreach ($patterns as $pattern) {
+ $converted_patterns[] = '/\b'.$pattern.'\b/u';
+ }
+ $converted['%'.strtoupper($category).'%'] = $converted_patterns;
+ }
+ $this->categories = $converted;
+ }
+
public function addMessage(ChatLog $msg) {
$this->addText($msg->text_content);
}
$result = [];
$sum = 0;
foreach ($arr as $key => $entry) {
- $weight = $entry['count'];
+ $weight = $entry[0];
if ($weight == 1) continue;
$lower = $sum;
$sum += $weight;
$examples = [];
- if (is_array(end($entry['examples']))) {
- // already processed
- $examples = $entry['examples'];
- } else if ($key === ' ') {
+ if ($key === ' ') {
$examples = [[' ', 0, 1]];
} else {
$subsum = 0;
- foreach ($entry['examples'] as $example => $subweight) {
+ foreach ($entry[1] as $example => $subweight) {
$sublower = $subsum;
$subsum += $subweight;
$examples[] = [$example, $sublower, $subsum];
private function pick($options) {
if (empty($options)) return null;
- $max = end($options)[2];
+ $max = end($options)[2] - 1;
$num = random_int(0, $max);
+ return static::search($options, $num);
+ }
+
+ public static function search($options, $num) {
$min_index = 0;
$max_index = count($options) - 1;
while ($min_index < $max_index) {
$cur_index = intval(($min_index + $max_index) / 2);
$cur_low = $options[$cur_index][1];
- $cur_high = $options[$cur_index][2];
+ $cur_high = $options[$cur_index][2] - 1;
if ($cur_low > $num) {
$max_index = $cur_index;
} else if ($cur_high < $num) {
$generalized = $this->generalize([$token]);
if (!isset($which[$generalized])) {
$which[$generalized] = [
- 'count' => 1,
- 'examples' => [],
+ 1,
+ [],
];
- $which[$generalized]['examples'][$token] = 1;
+ $which[$generalized][1][$token] = 1;
} else {
- ++$which[$generalized]['count'];
- if (!isset($which[$generalized]['examples'][$token])) {
- $which[$generalized]['examples'][$token] = 1;
+ ++$which[$generalized][0];
+ if (!isset($which[$generalized][1][$token])) {
+ $which[$generalized][1][$token] = 1;
} else {
- ++$which[$generalized]['examples'][$token];
+ ++$which[$generalized][1][$token];
}
}
}
$str .= $replaced;
}
foreach ($this->categories as $category => $patterns) {
- foreach ($patterns as $pattern) {
- $str = preg_replace('/\b'.$pattern.'\b/u', '%'.strtoupper($category).'%', $str);
- }
+ $str = preg_replace($patterns, $category, $str);
}
return $str;
}
'chest' => ['kiste'],
'einen' => ['n', 'nen'],
'musik' => ['mukke'],
- 'schade' => ['schad'],
+ 'schade' => ['schad', 'schaade'],
];
private $categories = [
'wuschlwave',
],
- 'zelda_boss' => [
+ 'zb' => [
'aga(hnim)?',
'armos( knights)?',
'arrghus',
'vit(reous|ty)',
],
- 'zelda_dungeon' => [
+ 'zd' => [
'eastern',
'desert( palace)?',
'gt',
'tt',
],
- 'zelda_item' => [
+ 'zi' => [
'(big|small|retro|generic) ?keys?',
'b[oö]gen',
'bombos',
'sword',
],
- 'zelda_location' => [
+ 'zl' => [
'big chest',
'bumper( cave)?( ledge)?',
'(hyrule)? ?castle ?(tower)?',
--- /dev/null
+<?php
+
+namespace Tests\Unit\Models;
+
+use App\Models\ChatLib;
+use PHPUnit\Framework\TestCase;
+
+class ChatLibTest extends TestCase {
+
+ public function test_binary_search() {
+ $options = [
+ ['a', 0, 2],
+ ['b', 2, 3],
+ ['c', 3, 6],
+ ];
+
+ $this->assertEquals('a', ChatLib::search($options, 0)[0]);
+ $this->assertEquals('a', ChatLib::search($options, 1)[0]);
+ $this->assertEquals('b', ChatLib::search($options, 2)[0]);
+ $this->assertEquals('c', ChatLib::search($options, 3)[0]);
+ $this->assertEquals('c', ChatLib::search($options, 4)[0]);
+ $this->assertEquals('c', ChatLib::search($options, 5)[0]);
+
+ $this->assertEquals('a', ChatLib::search($options, -1)[0]);
+ $this->assertEquals('c', ChatLib::search($options, 6)[0]);
+ }
+
+}