* @return int
*/
public function handle() {
+ $count = 0;
+
$de = new ChatLib();
$en = new ChatLib();
->where('created_at', '<', now()->sub(7, 'day'))
->whereNotIn('classification', ['gg', 'gl', 'number', 'o7'])
->whereRaw('LENGTH(`text_content`) > 12')
- ->chunk(5000, function ($msgs) use ($de, $en) {
+ ->chunk(5000, function ($msgs) use (&$count, $de, $en) {
foreach ($msgs as $msg) {
if ($msg->detected_language === 'de') {
$de->addMessage($msg);
$de->addMessage($msg);
$en->addMessage($msg);
}
+ ++$count;
}
+ $this->line($count);
});
$de->compile();
private function tokenize($str) {
return array_values(array_filter(preg_split('/\b/u', $str), function($token) {
- if (empty($token)) return false;
+ if ($token === '') return false;
if (preg_match('/cheer\d+/u', strtolower($token))) return false;
return true;
}));
private function generalize($tokens) {
$str = '';
foreach ($tokens as $token) {
- $replaced = preg_replace('/\d+/', '0', $token);
- $replaced = preg_replace('/\s+/', ' ', $token);
- $replaced = preg_replace('/(.)\1{2,}/', '$1', $token);
+ $replaced = preg_replace('/\d+/u', '0', $token);
+ $replaced = preg_replace('/\s+/u', ' ', $token);
+ $replaced = preg_replace('/(.)\1{2,}/u', '$1$1', $token);
$replaced = strtolower($replaced);
+ foreach ($this->aliases as $canonical => $variants) {
+ if (in_array($replaced, $variants)) {
+ $replaced = $canonical;
+ break;
+ }
+ if ($replaced === $canonical) {
+ break;
+ }
+ }
$str .= $replaced;
}
+ foreach ($this->categories as $category => $patterns) {
+ foreach ($patterns as $pattern) {
+ $str = preg_replace('/\b'.$pattern.'\b/u', '%'.strtoupper($category).'%', $str);
+ }
+ }
return $str;
}
private $size = 7;
private $transitions = [];
+ private $aliases = [
+ 'chest' => ['kiste'],
+ 'einen' => ['n', 'nen'],
+ 'musik' => ['mukke'],
+ 'schade' => ['schad'],
+ ];
+
+ private $categories = [
+ 'fail' => [
+ 'failfish',
+ 'holysm0notlikethis',
+ 'notlikethis',
+ 'tetobridge0',
+ 'vinter0clown',
+ ],
+
+ 'hype' => [
+ 'dergoaparty',
+ 'dinodance',
+ 'elemen0party',
+ 'muftaahype',
+ 'luckwuhype',
+ 'olliwahype',
+ 'osora0umbrihype',
+ 'partyhat',
+ 'peepocheer',
+ 'rei0hype',
+ 'sakayahype',
+ 'tetotroete',
+ 'ticknaboargeil0',
+ 'ticknahype0',
+ ],
+
+ 'kappa' => [
+ 'kappa(claus|hd)?',
+ ],
+
+ 'jam' => [
+ '(cat|dog|rat)jam',
+ 'kanash0jam',
+ 'rei0jamers',
+ 'samusdance',
+ ],
+
+ 'lol' => [
+ ':d',
+ 'boothi0lul',
+ 'kekw',
+ 'lol',
+ 'lul',
+ 'rei0lul',
+ 'samusgrin',
+ 'ticknaauslachen',
+ 'xd',
+ ],
+
+ 'love' => [
+ '<3',
+ 'duden0love',
+ 'exec0love',
+ 'krawal0heart',
+ 'lodanzhug',
+ 'luckwulove',
+ 'luvsign',
+ 'muftaal',
+ 'osora0love',
+ 'peepoexcitedhug',
+ 'spirit0love',
+ 'svenkalove',
+ 'ticknaherz',
+ ],
+
+ 'name' => [
+ 'baba',
+ 'baka',
+ 'bobe?r',
+ 'brog(i|or)',
+ 'cfate',
+ 'danny',
+ 'danzi+',
+ 'daruck',
+ 'dennsen',
+ 'dimez',
+ 'divi',
+ 'dud(en|i+)',
+ 'ele',
+ 'eri(ror)?',
+ '(name)?faker',
+ 'fetti+',
+ 'gamma(chuu)?',
+ 'goat(buster|ie?|y)?',
+ 'hitsu(yan)?',
+ 'holy',
+ 'jem',
+ 'kala(marino)?',
+ 'kromb',
+ 'koval',
+ 'kum(i|o|p)',
+ 'lanux',
+ 'len(esha|chen)',
+ 'leya+',
+ 'magno',
+ 'malmo',
+ 'markam',
+ 'micha',
+ 'mimsy',
+ 'muf(fy|taay)',
+ 'murd(elizer|i+)',
+ 'nami',
+ 'nula',
+ 'onio',
+ 'paulinche',
+ 'phaaze',
+ 'ralen',
+ 'ramond',
+ 'ray(vis)?',
+ 'schulzer',
+ 'skunk(ner)?',
+ 'skipsy',
+ 'soli+',
+ 'sven(ka+)?',
+ 'tantalus',
+ 'teto',
+ 'thalanee?',
+ 'tick(i+|naldo|y+)',
+ 'tofu',
+ 'tr[i0]x+',
+ 'vin(nie?|ny|ter)',
+ 'xall',
+ 'yasi',
+ ],
+
+ 'pog' => [
+ 'bumble0Pog',
+ 'komodohype',
+ 'pog',
+ 'pogchamp',
+ 'poggers',
+ 'satono0pog',
+ ],
+
+ 'run' => [
+ 'dennsenboots',
+ 'lodanzrun',
+ 'ticknaldosprint',
+ 'vinter0run',
+ ],
+
+ 'wave' => [
+ 'dennsenhi',
+ 'dergoawave',
+ 'heyguys',
+ 'holysm0heyguys',
+ 'muftaahey',
+ 'rei0wave',
+ 'sayuri0wave',
+ 'shindi0wave',
+ 'svenkawave',
+ 'wuschlwave',
+ ],
+
+ 'zelda_boss' => [
+ 'aga(hnim)?',
+ 'armos( knights)?',
+ 'arrghus',
+ 'blind',
+ 'ganon(dorf)?',
+ 'helma',
+ 'kholdstare',
+ 'lanmo(las)?',
+ 'moldorm',
+ 'mothula',
+ 'mott[ei]',
+ 'trinexx',
+ 'vit(reous|ty)',
+ ],
+
+ 'zelda_dungeon' => [
+ 'eastern',
+ 'desert( palace)?',
+ 'gt',
+ 'hera',
+ 'ice ?(palace)?',
+ '(misery )?mire',
+ 'pod',
+ 'skull ?woods',
+ 'swamp',
+ 'thieve\'?s\'? ?town',
+ 'tr',
+ 'tt',
+ ],
+
+ 'zelda_item' => [
+ '(big|small|retro|generic) ?keys?',
+ 'b[oö]gen',
+ 'bombos',
+ 'boots',
+ 'bottle',
+ 'bows?',
+ 'bugnet',
+ 'byrna',
+ 'cape',
+ 'ether',
+ 'flasche',
+ 'flippers',
+ 'fl[uö]te',
+ 'frod',
+ '(gloves?|mitts|handschuhe?)',
+ '(half|quarter) ?magic',
+ 'hammer',
+ 'hookshot',
+ '(ice|fire) ?rod',
+ 'lampe?',
+ 'laser ?bridge',
+ 'mearl',
+ 'mirror',
+ 'moon ?pearl',
+ 'mushroom',
+ 'ocarina',
+ 'pilz',
+ 'powder',
+ 'puder',
+ 'quake',
+ '(red|blue) ?cane',
+ '(red|green|blue) ?(goo|potion)',
+ '(red|green|blue|baby) ?mail',
+ '(red|blue|bu|boo|good|bad|both)merang',
+ 'schaufel',
+ '(gro(ss|ß)er? |kleiner? )?schlüssel',
+ 'schwert',
+ 'shovel',
+ 'silvers',
+ 'somaria',
+ 'spiegel',
+ 'sword',
+ ],
+
+ 'zelda_location' => [
+ 'big chest',
+ 'bumper( cave)?( ledge)?',
+ '(hyrule)? ?castle ?(tower)?',
+ 'catfish',
+ 'cave 0?',
+ 'chest ?game',
+ 'cutscene ?chest',
+ 'damm',
+ 'desert( ledge)?',
+ 'dig(ging)? ?game',
+ '((back|front) of )?escape',
+ 'gyl',
+ 'hobo',
+ 'hook ?(shot) cave',
+ 'lava ?chest',
+ '(light|dark) ?world',
+ 'lss',
+ 'magic bat',
+ '(dark )?(death )?mountain',
+ 'ped(estal)?',
+ 'pyramid( fairy)?( ledge)?',
+ 'red bomb',
+ 'sahasrahla',
+ 'sasha',
+ 'sick kid',
+ 'stumpy',
+ 'tile ?room',
+ 'torch',
+ 'zora( ledge)?',
+ ],
+ ];
+
}