namespace App\Models;
+use Illuminate\Support\Facades\Storage;
+
class ChatLib {
- public function addMessage($msg) {
- $tokens = $this->tokenize($msg->text_content);
+ public function addMessage(ChatLog $msg) {
+ $this->addText($msg->text_content);
+ }
+
+ public function addText($text) {
+ $tokens = $this->tokenize($text);
if (empty($tokens)) return;
$tokens[] = '';
foreach ($tokens as $num => $token) {
if ($num === 0) {
$this->addTransition([], $token);
} else {
- $start = max(0, $num - $this->size);
+ $start = max(0, $num - $this->size - 1);
$end = $num;
for ($i = $start; $i < $end; ++$i) {
$this->addTransition(array_slice($tokens, $i, $end - $i), $token);
- if ($end - $i < 3) break;
+ if ($end - $i < 4) break;
}
}
}
unset($this->transitions[$key]);
}
}
- echo 'size: ', number_format(strlen(json_encode($this->transitions)), 0), PHP_EOL;
}
public function generate($limit = 100) {
- $tokens = [];
+ $tokens = [''];
$generated = '';
while (strlen($generated) < $limit) {
$next = $this->randomNext($tokens);
- if (empty($next)) break;
+ if ($next === '') break;
$tokens[] = $next;
$generated .= $next;
}
return $generated;
}
+ public function saveAs($name) {
+ $data = [
+ 'size' => $this->size,
+ 'transitions' => $this->transitions,
+ ];
+ Storage::disk('chatlib')->put($name.'.json', json_encode($data));
+ }
+
+ public function loadFrom($name) {
+ $data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true);
+ $this->size = $data['size'];
+ $this->transitions = $data['transitions'];
+ }
+
private function index($arr) {
$result = [];
$sum = 0;
$subsum = 0;
foreach ($entry['examples'] as $example => $subweight) {
$sublower = $subsum;
- $subsum += $subweight * $subweight;
+ $subsum += $subweight;
$examples[] = [$example, $sublower, $subsum];
}
}
private function randomNext($tokens) {
$cnt = count($tokens);
- for ($size = min($this->size, $cnt); $size >= 0; --$size) {
- $cmb = $this->generalize(array_slice($tokens, $cnt - $size, $size));
+ for ($size = min($this->size, $cnt); $size > 0; --$size) {
+ $cmb = $this->generalize(array_slice($tokens, -$size));
if (isset($this->transitions[$cmb])) {
$pick = $this->pick($this->transitions[$cmb]);
if (!is_null($pick)) {
}
private function tokenize($str) {
- return array_values(array_filter(preg_split('/\b/u', $str)));
+ return array_values(array_filter(preg_split('/\b/u', $str), function($token) {
+ if (empty($token)) return false;
+ if (preg_match('/cheer\d+/u', strtolower($token))) return false;
+ return true;
+ }));
}
private function generalize($tokens) {
foreach ($tokens as $token) {
$replaced = preg_replace('/\d+/', '0', $token);
$replaced = strtolower($replaced);
- $str .= empty($replaced) ? $token : $replaced;
+ $str .= $replaced;
}
return $str;
}