class ChatLib {
- public function addMessage($msg) {
- $tokens = $this->tokenize($msg->text_content);
+ public function addMessage(ChatLog $msg) {
+ $this->addText($msg->text_content);
+ }
+
+ public function addText($text) {
+ $tokens = $this->tokenize($text);
if (empty($tokens)) return;
$tokens[] = '';
foreach ($tokens as $num => $token) {
if ($num === 0) {
$this->addTransition([], $token);
} else {
- $start = max(0, $num - $this->size);
+ $start = max(0, $num - $this->size - 1);
$end = $num;
for ($i = $start; $i < $end; ++$i) {
$this->addTransition(array_slice($tokens, $i, $end - $i), $token);
- if ($end - $i < 3) break;
+ if ($end - $i < 4) break;
}
}
}
}
public function generate($limit = 100) {
- $tokens = [];
+ $tokens = [''];
$generated = '';
while (strlen($generated) < $limit) {
$next = $this->randomNext($tokens);
- if (empty($next)) break;
+ if ($next === '') break;
$tokens[] = $next;
$generated .= $next;
}
$subsum = 0;
foreach ($entry['examples'] as $example => $subweight) {
$sublower = $subsum;
- $subsum += $subweight * $subweight;
+ $subsum += $subweight;
$examples[] = [$example, $sublower, $subsum];
}
}
private function randomNext($tokens) {
$cnt = count($tokens);
- for ($size = min($this->size, $cnt); $size >= 0; --$size) {
- $cmb = $this->generalize(array_slice($tokens, $cnt - $size, $size));
+ for ($size = min($this->size, $cnt); $size > 0; --$size) {
+ $cmb = $this->generalize(array_slice($tokens, -$size));
if (isset($this->transitions[$cmb])) {
$pick = $this->pick($this->transitions[$cmb]);
if (!is_null($pick)) {
foreach ($tokens as $token) {
$replaced = preg_replace('/\d+/', '0', $token);
$replaced = strtolower($replaced);
- $str .= empty($replaced) ? $token : $replaced;
+ $str .= $replaced;
}
return $str;
}