class ChatLib {
public function addMessage($msg) {
- $tokens = array_values(array_filter(preg_split('/\b/', $msg->text_content)));
+ $tokens = array_values(array_filter(preg_split('/\s+/u', $msg->text_content)));
if (empty($tokens)) return;
$tokens [] = '';
foreach ($tokens as $num => $token) {
if ($num === 0) {
$this->addStart($token);
- } else if ($num === 1) {
- $this->addOne($tokens[0], $token);
- } else if ($num === 2) {
- $this->addOne($tokens[1], $token);
- $this->addTwo($tokens[0], $tokens[1], $token);
- } else {
+ }
+ if ($num > 0) {
$this->addOne($tokens[$num - 1], $token);
+ }
+ if ($num > 1) {
$this->addTwo($tokens[$num - 2], $tokens[$num - 1], $token);
+ }
+ if ($num > 2) {
$this->addThree($tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
}
+ if ($num > 3) {
+ $this->addFour($tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
+ }
+ if ($num > 4) {
+ $this->addFive($tokens[$num - 5], $tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token);
+ }
}
}
$this->start = $this->index($this->start);
foreach ($this->one as $key => $value) {
$this->one[$key] = $this->index($this->one[$key]);
+ if (empty($this->one[$key])) {
+ unset($this->one[$key]);
+ }
}
foreach ($this->two as $key => $value) {
$this->two[$key] = $this->index($this->two[$key]);
+ if (empty($this->two[$key])) {
+ unset($this->two[$key]);
+ }
}
foreach ($this->three as $key => $value) {
$this->three[$key] = $this->index($this->three[$key]);
+ if (empty($this->three[$key])) {
+ unset($this->three[$key]);
+ }
+ }
+ foreach ($this->four as $key => $value) {
+ $this->four[$key] = $this->index($this->four[$key]);
+ if (empty($this->four[$key])) {
+ unset($this->four[$key]);
+ }
+ }
+ foreach ($this->five as $key => $value) {
+ $this->five[$key] = $this->index($this->five[$key]);
+ if (empty($this->five[$key])) {
+ unset($this->five[$key]);
+ }
}
}
- public function generate($limit = 50) {
+ public function generate($limit = 75) {
$tokens = [];
$start = $this->randomStart();
$tokens[] = $start;
$next = $this->randomNext($tokens);
if (empty($next)) break;
$tokens[] = $next;
- $generated .= $next;
+ $generated .= ' '.$next;
}
return $generated;
}
private function index($arr) {
$result = [];
$sum = 0;
- asort($arr);
- foreach ($arr as $key => $weight) {
+ foreach ($arr as $key => $entry) {
+ $weight = $entry['count'];
+ if ($weight == 1) continue;
$lower = $sum;
- $sum += $weight;
- $result[] = [$key, $lower, $sum];
+ $sum += intval(pow($weight, 1.4));
+ $examples = [];
+ if (is_array(end($entry['examples']))) {
+ // already processed
+ $examples = $entry['examples'];
+ } else {
+ $subsum = 0;
+ foreach ($entry['examples'] as $example => $subweight) {
+ $sublower = $subsum;
+ $subsum += $subweight * $subweight;
+ $examples[] = [$example, $sublower, $subsum];
+ }
+ }
+ $result[] = [$key, $lower, $sum, $examples];
}
return $result;
}
private function randomStart() {
- return $this->pick($this->start);
+ $pick = $this->pick($this->start);
+ if (is_null($pick)) return '';
+ return $this->exampleOf($pick);
}
private function randomNext($tokens) {
$cnt = count($tokens);
+ $picks = [];
+ if ($cnt >= 5) {
+ $cmb = $this->generalize(array_slice($tokens, $cnt - 5, 5));
+ if (isset($this->five[$cmb])) {
+ $pick = $this->pick($this->five[$cmb]);
+ if (!is_null($pick)) {
+ $picks[$pick[0]] = [
+ 'count' => 10,
+ 'examples' => $pick[3],
+ ];
+ }
+ }
+ }
+ if ($cnt >= 4) {
+ $cmb = $this->generalize(array_slice($tokens, $cnt - 4, 4));
+ if (isset($this->four[$cmb])) {
+ $pick = $this->pick($this->four[$cmb]);
+ if (!is_null($pick)) {
+ $picks[$pick[0]] = [
+ 'count' => 12,
+ 'examples' => $pick[3],
+ ];
+ }
+ }
+ }
if ($cnt >= 3) {
- $cmb = $tokens[$cnt - 3].$tokens[$cnt - 2].$tokens[$cnt - 1];
+ $cmb = $this->generalize(array_slice($tokens, $cnt - 3, 3));
if (isset($this->three[$cmb])) {
- return $this->pick($this->three[$cmb]);
+ $pick = $this->pick($this->three[$cmb]);
+ if (!is_null($pick)) {
+ $picks[$pick[0]] = [
+ 'count' => 14,
+ 'examples' => $pick[3],
+ ];
+ }
}
}
if ($cnt >= 2) {
- $cmb = $tokens[$cnt - 2].$tokens[$cnt - 1];
+ $cmb = $this->generalize(array_slice($tokens, $cnt - 2, 2));
if (isset($this->two[$cmb])) {
- return $this->pick($this->two[$cmb]);
+ $pick = $this->pick($this->two[$cmb]);
+ if (!is_null($pick)) {
+ $picks[$pick[0]] = [
+ 'count' => 4,
+ 'examples' => $pick[3],
+ ];
+ }
}
}
if ($cnt >= 1) {
- $cmb = $tokens[$cnt - 1];
+ $cmb = $this->generalize(array_slice($tokens, $cnt - 1, 1));
if (isset($this->one[$cmb])) {
- return $this->pick($this->one[$cmb]);
+ $pick = $this->pick($this->one[$cmb]);
+ if (!is_null($pick)) {
+ $picks[$pick[0]] = [
+ 'count' => 2,
+ 'examples' => $pick[3],
+ ];
+ }
}
}
- return '';
+ if (empty($picks)) return '';
+ $picks = $this->index($picks);
+ $pick = $this->pick($picks);
+ return $this->exampleOf($pick);
}
private function pick($options) {
+ if (empty($options)) return null;
$max = end($options)[2];
$num = random_int(0, $max);
$min_index = 0;
break;
}
}
- return $options[$min_index][0];
+ return $options[$min_index];
}
private function addStart($token) {
if (empty($token)) return;
- if (!isset($this->start[$token])) {
- $this->start[$token] = 1;
- } else {
- ++$this->start[$token];
- }
+ $this->increment($this->start, $token);
}
private function addOne($one, $token) {
- if (!isset($this->one[$one])) {
- $this->one[$one] = [];
- }
- if (!isset($this->one[$one][$token])) {
- $this->one[$one][$token] = 1;
- } else {
- ++$this->one[$one][$token];
+ $cmb = $this->generalize([$one]);
+ if (!isset($this->one[$cmb])) {
+ $this->one[$cmb] = [];
}
+ $this->increment($this->one[$cmb], $token);
}
private function addTwo($one, $two, $token) {
- $cmb = $one.$two;
+ $cmb = $this->generalize([$one, $two]);
if (!isset($this->two[$cmb])) {
$this->two[$cmb] = [];
}
- if (!isset($this->two[$cmb][$token])) {
- $this->two[$cmb][$token] = 1;
- } else {
- ++$this->two[$cmb][$token];
- }
+ $this->increment($this->two[$cmb], $token);
}
private function addThree($one, $two, $three, $token) {
- $cmb = $one.$two.$three;
+ $cmb = $this->generalize([$one, $two, $three]);
if (!isset($this->three[$cmb])) {
$this->three[$cmb] = [];
}
- if (!isset($this->three[$cmb][$token])) {
- $this->three[$cmb][$token] = 1;
+ $this->increment($this->three[$cmb], $token);
+ }
+
+ private function addFour($one, $two, $three, $four, $token) {
+ $cmb = $this->generalize([$one, $two, $three, $four]);
+ if (!isset($this->four[$cmb])) {
+ $this->four[$cmb] = [];
+ }
+ $this->increment($this->four[$cmb], $token);
+ }
+
+ private function addFive($one, $two, $three, $four, $five, $token) {
+ $cmb = $this->generalize([$one, $two, $three, $four, $five]);
+ if (!isset($this->five[$cmb])) {
+ $this->five[$cmb] = [];
+ }
+ $this->increment($this->five[$cmb], $token);
+ }
+
+ private function increment(&$which, $token) {
+ $generalized = $this->generalize([$token]);
+ if (!isset($which[$generalized])) {
+ $which[$generalized] = [
+ 'count' => 1,
+ 'examples' => [],
+ ];
+ $which[$generalized]['examples'][$token] = 1;
} else {
- ++$this->three[$cmb][$token];
+ ++$which[$generalized]['count'];
+ if (!isset($which[$generalized]['examples'][$token])) {
+ $which[$generalized]['examples'][$token] = 1;
+ } else {
+ ++$which[$generalized]['examples'][$token];
+ }
}
}
+ private function generalize($tokens) {
+ $str = '';
+ foreach ($tokens as $token) {
+ $replaced = preg_replace('/\W/u', '', $token);
+ $replaced = preg_replace('/\d+/', '0', $replaced);
+ $replaced = strtolower(trim($replaced));
+ $str .= empty($replaced) ? $token : $replaced;
+ }
+ return $str;
+ }
+
+ private function exampleOf($pick) {
+ $example = $this->pick($pick[3]);
+ return $example[0];
+ }
+
private $start = [];
private $one = [];
private $two = [];
private $three = [];
+ private $four = [];
+ private $five = [];
}