text_content))); if (empty($tokens)) return; $tokens [] = ''; foreach ($tokens as $num => $token) { if ($num === 0) { $this->addStart($token); } if ($num > 0) { $this->addOne($tokens[$num - 1], $token); } if ($num > 1) { $this->addTwo($tokens[$num - 2], $tokens[$num - 1], $token); } if ($num > 2) { $this->addThree($tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token); } if ($num > 3) { $this->addFour($tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token); } if ($num > 4) { $this->addFive($tokens[$num - 5], $tokens[$num - 4], $tokens[$num - 3], $tokens[$num - 2], $tokens[$num - 1], $token); } } } public function compile() { $this->start = $this->index($this->start); foreach ($this->one as $key => $value) { $this->one[$key] = $this->index($this->one[$key]); if (empty($this->one[$key])) { unset($this->one[$key]); } } foreach ($this->two as $key => $value) { $this->two[$key] = $this->index($this->two[$key]); if (empty($this->two[$key])) { unset($this->two[$key]); } } foreach ($this->three as $key => $value) { $this->three[$key] = $this->index($this->three[$key]); if (empty($this->three[$key])) { unset($this->three[$key]); } } foreach ($this->four as $key => $value) { $this->four[$key] = $this->index($this->four[$key]); if (empty($this->four[$key])) { unset($this->four[$key]); } } foreach ($this->five as $key => $value) { $this->five[$key] = $this->index($this->five[$key]); if (empty($this->five[$key])) { unset($this->five[$key]); } } } public function generate($limit = 75) { $tokens = []; $start = $this->randomStart(); $tokens[] = $start; $generated = $start; while (strlen($generated) < $limit) { $next = $this->randomNext($tokens); if (empty($next)) break; $tokens[] = $next; $generated .= ' '.$next; } return $generated; } private function index($arr) { $result = []; $sum = 0; foreach ($arr as $key => $entry) { $weight = $entry['count']; if ($weight == 1) continue; $lower = $sum; $sum += intval(pow($weight, 1.4)); $examples = []; if (is_array(end($entry['examples']))) { // already processed $examples = $entry['examples']; } else { $subsum = 0; foreach ($entry['examples'] as $example => $subweight) { $sublower = $subsum; $subsum += $subweight * $subweight; $examples[] = [$example, $sublower, $subsum]; } } $result[] = [$key, $lower, $sum, $examples]; } return $result; } private function randomStart() { $pick = $this->pick($this->start); if (is_null($pick)) return ''; return $this->exampleOf($pick); } private function randomNext($tokens) { $cnt = count($tokens); $picks = []; if ($cnt >= 5) { $cmb = $this->generalize(array_slice($tokens, $cnt - 5, 5)); if (isset($this->five[$cmb])) { $pick = $this->pick($this->five[$cmb]); if (!is_null($pick)) { $picks[$pick[0]] = [ 'count' => 10, 'examples' => $pick[3], ]; } } } if ($cnt >= 4) { $cmb = $this->generalize(array_slice($tokens, $cnt - 4, 4)); if (isset($this->four[$cmb])) { $pick = $this->pick($this->four[$cmb]); if (!is_null($pick)) { $picks[$pick[0]] = [ 'count' => 12, 'examples' => $pick[3], ]; } } } if ($cnt >= 3) { $cmb = $this->generalize(array_slice($tokens, $cnt - 3, 3)); if (isset($this->three[$cmb])) { $pick = $this->pick($this->three[$cmb]); if (!is_null($pick)) { $picks[$pick[0]] = [ 'count' => 14, 'examples' => $pick[3], ]; } } } if ($cnt >= 2) { $cmb = $this->generalize(array_slice($tokens, $cnt - 2, 2)); if (isset($this->two[$cmb])) { $pick = $this->pick($this->two[$cmb]); if (!is_null($pick)) { $picks[$pick[0]] = [ 'count' => 4, 'examples' => $pick[3], ]; } } } if ($cnt >= 1) { $cmb = $this->generalize(array_slice($tokens, $cnt - 1, 1)); if (isset($this->one[$cmb])) { $pick = $this->pick($this->one[$cmb]); if (!is_null($pick)) { $picks[$pick[0]] = [ 'count' => 2, 'examples' => $pick[3], ]; } } } if (empty($picks)) return ''; $picks = $this->index($picks); $pick = $this->pick($picks); return $this->exampleOf($pick); } private function pick($options) { if (empty($options)) return null; $max = end($options)[2]; $num = random_int(0, $max); $min_index = 0; $max_index = count($options) - 1; while ($min_index < $max_index) { $cur_index = intval(($min_index + $max_index) / 2); $cur_low = $options[$cur_index][1]; $cur_high = $options[$cur_index][2]; if ($cur_low > $num) { $max_index = $cur_index; } else if ($cur_high < $num) { $min_index = $cur_index + 1; } else { $min_index = $cur_index; break; } } return $options[$min_index]; } private function addStart($token) { if (empty($token)) return; $this->increment($this->start, $token); } private function addOne($one, $token) { $cmb = $this->generalize([$one]); if (!isset($this->one[$cmb])) { $this->one[$cmb] = []; } $this->increment($this->one[$cmb], $token); } private function addTwo($one, $two, $token) { $cmb = $this->generalize([$one, $two]); if (!isset($this->two[$cmb])) { $this->two[$cmb] = []; } $this->increment($this->two[$cmb], $token); } private function addThree($one, $two, $three, $token) { $cmb = $this->generalize([$one, $two, $three]); if (!isset($this->three[$cmb])) { $this->three[$cmb] = []; } $this->increment($this->three[$cmb], $token); } private function addFour($one, $two, $three, $four, $token) { $cmb = $this->generalize([$one, $two, $three, $four]); if (!isset($this->four[$cmb])) { $this->four[$cmb] = []; } $this->increment($this->four[$cmb], $token); } private function addFive($one, $two, $three, $four, $five, $token) { $cmb = $this->generalize([$one, $two, $three, $four, $five]); if (!isset($this->five[$cmb])) { $this->five[$cmb] = []; } $this->increment($this->five[$cmb], $token); } private function increment(&$which, $token) { $generalized = $this->generalize([$token]); if (!isset($which[$generalized])) { $which[$generalized] = [ 'count' => 1, 'examples' => [], ]; $which[$generalized]['examples'][$token] = 1; } else { ++$which[$generalized]['count']; if (!isset($which[$generalized]['examples'][$token])) { $which[$generalized]['examples'][$token] = 1; } else { ++$which[$generalized]['examples'][$token]; } } } private function generalize($tokens) { $str = ''; foreach ($tokens as $token) { $replaced = preg_replace('/\W/u', '', $token); $replaced = preg_replace('/\d+/', '0', $replaced); $replaced = strtolower(trim($replaced)); $str .= empty($replaced) ? $token : $replaced; } return $str; } private function exampleOf($pick) { $example = $this->pick($pick[3]); return $example[0]; } private $start = []; private $one = []; private $two = []; private $three = []; private $four = []; private $five = []; }