class ChatLib {
- public function __construct($size = 6) {
+ public function __construct($size = 3) {
$this->size = $size;
-
$converted = [];
foreach ($this->categories as $category => $patterns) {
$converted_patterns = [];
foreach ($patterns as $pattern) {
$converted_patterns[] = '/\b'.$pattern.'\b/u';
}
- $converted['%'.strtoupper($category).'%'] = $converted_patterns;
+ $converted[strtoupper($category)] = $converted_patterns;
}
$this->categories = $converted;
}
- public function addMessage(ChatLog $msg) {
- $this->addText($msg->text_content);
+ public function addMessage(ChatLog $msg, ChatLog $previous = null) {
+ if ($msg->isReply()) {
+ $this->addText($msg->text_content, $msg->getReplyParent());
+ } else if (!is_null($previous)) {
+ $this->addText($msg->text_content, $previous->text_content);
+ } else {
+ $this->addText($msg->text_content);
+ }
}
- public function addText($text) {
+ public function addText($text, $context = '') {
$tokens = $this->tokenize($text);
- if (empty($tokens)) return;
- $tokens[] = '';
- foreach ($tokens as $num => $token) {
- if ($num === 0) {
- $this->addTransition([], $token);
- } else {
- $start = max(0, $num - $this->size - 1);
- $end = $num;
- for ($i = $start; $i < $end; ++$i) {
- $this->addTransition(array_slice($tokens, $i, $end - $i), $token);
- if ($end - $i < 5) break;
- }
+ for ($i = 0; $i < count($tokens) - $this->size; ++$i) {
+ $this->addTransition(array_slice($tokens, $i, $this->size), $tokens[$i + $this->size]);
+ }
+ if (!empty($context)) {
+ $tokens = $this->tokenizeWithContext($text, $context);
+ $size = min($this->size - 1, count($tokens) - $this->size);
+ for ($i = 0; $i < $size; ++$i) {
+ $this->addTransition(array_slice($tokens, $i, $this->size), $tokens[$i + $this->size]);
}
- $this->addExample(array_slice($tokens, 0, $num), $token);
}
}
public function compile() {
foreach ($this->transitions as $key => $values) {
- $this->transitions[$key] = $this->index($values, 2);
- if (empty($this->transitions[$key])) {
- unset($this->transitions[$key]);
- }
- }
- foreach ($this->examples as $key => $values) {
- if (in_array($key, ['', ' '])) {
- unset($this->examples[$key]);
- continue;
- }
- $this->examples[$key] = $this->index($values, 1);
- if (empty($this->examples[$key]) || (count($this->examples[$key]) === 1 && $this->examples[$key][0][0] === $key)) {
- unset($this->examples[$key]);
- }
+ $this->transitions[$key] = $this->index($values);
}
}
- public function generate($limit = 100) {
- $tokens = [''];
- $generated = '';
- while (strlen($generated) < $limit) {
- $next = $this->randomNext($tokens);
- if ($next === '') break;
- $tokens[] = $next;
- $generated .= $next;
+ public function generate($context = null) {
+ if (!is_null($context)) {
+ $tokens = $this->tokenizeWithContext('', $context);
+ $generated = $this->loop($tokens);
+ if (!empty($generated)) {
+ return $generated;
+ }
}
- return $generated;
+ $tokens = $this->tokenize('');
+ return $this->loop($tokens);
}
public function saveAs($name) {
$data = [
'size' => $this->size,
'transitions' => $this->transitions,
- 'examples' => $this->examples,
];
Storage::disk('chatlib')->put($name.'.json', json_encode($data));
}
$data = json_decode(Storage::disk('chatlib')->get($name.'.json'), true);
$this->size = $data['size'];
$this->transitions = $data['transitions'];
- $this->examples = $data['examples'];
}
- private function index($arr, $min_weight = 2) {
+ private function index($arr) {
$result = [];
$sum = 0;
foreach ($arr as $key => $weight) {
- if ($weight < $min_weight) continue;
$lower = $sum;
$sum += $weight;
$result[] = [$key, $lower, $sum];
return $result;
}
- private function randomNext($tokens) {
- $cnt = count($tokens);
- for ($size = min($this->size, $cnt); $size > 0; --$size) {
- $cmb = $this->generalize(array_slice($tokens, -$size));
- if (isset($this->transitions[$cmb])) {
- $pick = $this->pick($this->transitions[$cmb]);
- if (!is_null($pick)) {
- return $this->exampleOf($pick, $tokens);
- }
- }
+ private function loop($tokens) {
+ while (count($tokens) < 50) {
+ $next = $this->randomNext($tokens);
+ if ($next === ' ') break;
+ $tokens[] = $next;
}
- return '';
+ return $this->untokenize($tokens);
+ }
+
+ private function randomNext($tokens) {
+ $key = $this->makeKey($tokens);
+ if (!isset($this->transitions[$key])) return ' ';
+ $pick = $this->pick($this->transitions[$key]);
+ return $pick[0];
}
private function pick($options) {
return $options[$min_index];
}
- private function addTransition($state, $next) {
- $ctx = $this->generalize($state);
- $cmb = $this->generalize([$next]);
- if (!isset($this->transitions[$ctx])) {
- $this->transitions[$ctx] = [];
+ private function addTransition($tokens, $next) {
+ $key = $this->makeKey($tokens);
+ if (!isset($this->transitions[$key])) {
+ $this->transitions[$key] = [];
}
- if (!isset($this->transitions[$ctx][$cmb])) {
- $this->transitions[$ctx][$cmb] = 1;
+ if (!isset($this->transitions[$key][$next])) {
+ $this->transitions[$key][$next] = 1;
} else {
- ++$this->transitions[$ctx][$cmb];
+ ++$this->transitions[$key][$next];
}
}
- private function addExample($context, $token) {
- $cmb = $this->generalize([$token]);
- if (!isset($this->examples[$cmb])) {
- $this->examples[$cmb] = [];
- }
- if (!isset($this->examples[$cmb][$token])) {
- $this->examples[$cmb][$token] = 1;
- } else {
- ++$this->examples[$cmb][$token];
+ private function splitText($text) {
+ if (trim($text) === '') return [];
+ return preg_split('/\s+/u', $text);
+ }
+
+ private function makeKey($tokens) {
+ $key = $this->joinText(array_slice($tokens, $this->size * -1));
+ $key = mb_strtolower($key);
+ $key = str_replace(['.', ',', ':', ';', '!', '?', '^', '+', '-', '"', "'", '(', ')', '[', ']'], '', $key);
+ $key = preg_replace('/\d+/u', '0', $key);
+ foreach ($this->categories as $category => $patterns) {
+ $key = preg_replace($patterns, $category, $key);
}
+ return $key;
}
- private function tokenize($str) {
- return array_values(array_filter(preg_split('/\b/u', $str), function($token) {
- if ($token === '') return false;
- if (preg_match('/cheer\d+/u', strtolower($token))) return false;
- return true;
- }));
+ private function joinText($tokens) {
+ return implode(' ', $tokens);
}
- private function generalize($tokens) {
- $str = '';
- foreach ($tokens as $token) {
- $replaced = preg_replace('/\d+/u', '0', $token);
- $replaced = preg_replace('/\s+/u', ' ', $replaced);
- $replaced = preg_replace('/(.)\1{2,}/u', '$1$1', $replaced);
- $replaced = strtolower($replaced);
- foreach ($this->aliases as $canonical => $variants) {
- if (in_array($replaced, $variants)) {
- $replaced = $canonical;
- break;
- }
- if ($replaced === $canonical) {
- break;
- }
- }
- $str .= $replaced;
- }
- foreach ($this->categories as $category => $patterns) {
- $str = preg_replace($patterns, $category, $str);
- }
- return $str;
+ private function untokenize($tokens) {
+ return $this->joinText(array_slice($tokens, $this->size));
}
- private function exampleOf($pick, $context) {
- if (!isset($this->examples[$pick[0]])) {
- return $pick[0];
+ private function tokenize($text) {
+ $tokens = $this->splitText($text);
+ $combined = array_merge(array_fill(0, $this->size, ' '), $tokens);
+ if (!empty($tokens)) {
+ $combined[] = ' ';
}
- if (isset($this->examples[$pick[0]])) {
- $example = $this->pick($this->examples[$pick[0]]);
- return $example[0];
+ return $combined;
+ }
+
+ private function tokenizeWithContext($text, $context) {
+ $combined = $this->tokenize($text);
+ $context_tokens = array_slice($this->splitText($context), $this->size * -1 + 1);
+ for ($i = 0; $i < count($context_tokens); ++$i) {
+ $combined[$this->size - $i - 2] = $context_tokens[count($context_tokens) - $i - 1];
}
- return $pick[0];
+ return $combined;
}
private $size;
private $transitions = [];
- private $examples = [];
-
- private $aliases = [
- 'chest' => ['kiste'],
- 'einen' => ['n', 'nen'],
- 'musik' => ['mukke'],
- 'schade' => ['schad', 'schaade'],
- ];
private $categories = [
'fail' => [