File manager - Edit - /home/opticamezl/www/newok/php-stemmer.zip
Back
PK w�\u� �c c src/StemmerFactory.phpnu �[��� <?php namespace Wamania\Snowball; use voku\helper\UTF8; use Wamania\Snowball\Stemmer\Catalan; use Wamania\Snowball\Stemmer\Danish; use Wamania\Snowball\Stemmer\Dutch; use Wamania\Snowball\Stemmer\English; use Wamania\Snowball\Stemmer\Finnish; use Wamania\Snowball\Stemmer\French; use Wamania\Snowball\Stemmer\German; use Wamania\Snowball\Stemmer\Italian; use Wamania\Snowball\Stemmer\Norwegian; use Wamania\Snowball\Stemmer\Portuguese; use Wamania\Snowball\Stemmer\Romanian; use Wamania\Snowball\Stemmer\Russian; use Wamania\Snowball\Stemmer\Spanish; use Wamania\Snowball\Stemmer\Stemmer; use Wamania\Snowball\Stemmer\Swedish; class StemmerFactory { const LANGS = [ Catalan::class => ['ca', 'cat', 'catalan'], Danish::class => ['da', 'dan', 'danish'], Dutch::class => ['nl', 'dut', 'nld', 'dutch'], English::class => ['en', 'eng', 'english'], Finnish::class => ['fi', 'fin', 'finnish'], French::class => ['fr', 'fre', 'fra', 'french'], German::class => ['de', 'deu', 'ger', 'german'], Italian::class => ['it', 'ita', 'italian'], Norwegian::class => ['no', 'nor', 'norwegian'], Portuguese::class => ['pt', 'por', 'portuguese'], Romanian::class => ['ro', 'rum', 'ron', 'romanian'], Russian::class => ['ru', 'rus', 'russian'], Spanish::class => ['es', 'spa', 'spanish'], Swedish::class => ['sv', 'swe', 'swedish'] ]; /** * @throws NotFoundException */ public static function create(string $code): Stemmer { $code = UTF8::strtolower($code); foreach (self::LANGS as $classname => $isoCodes) { if (in_array($code, $isoCodes)) { return new $classname; } } throw new NotFoundException(sprintf('Stemmer not found for %s', $code)); } } PK w�\�%�9 9 src/Stemmer/Norwegian.phpnu �[��� <?php namespace Wamania\Snowball\Stemmer; use voku\helper\UTF8; /** * * @link http://snowball.tartarus.org/algorithms/norwegian/stemmer.html * @author wamania * */ class Norwegian extends Stem { /** * All norwegian vowels */ protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'æ', 'å', 'ø'); /** * {@inheritdoc} */ public function stem($word) { // we do ALL in UTF-8 if (!UTF8::is_utf8($word)) { throw new \Exception('Word must be in UTF-8'); } $this->word = UTF8::strtolower($word); // R2 is not used: R1 is defined in the same way as in the German stemmer $this->r1(); // then R1 is adjusted so that the region before it contains at least 3 letters. if ($this->r1Index < 3) { $this->r1Index = 3; $this->r1 = UTF8::substr($this->word, 3); } // Do each of steps 1, 2 3 and 4. $this->step1(); $this->step2(); $this->step3(); return $this->word; } /** * Define a valid s-ending as one of * b c d f g h j l m n o p r t v y z, * or k not preceded by a vowel * * @param string $ending * @return boolean */ private function hasValidSEnding($word) { $lastLetter = UTF8::substr($word, -1, 1); if (in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z'))) { return true; } if ($lastLetter == 'k') { $beforeLetter = UTF8::substr($word, -2, 1); if (!in_array($beforeLetter, self::$vowels)) { return true; } } return false; } /** * Step 1 * Search for the longest among the following suffixes in R1, and perform the action indicated. */ private function step1() { // erte ert // replace with er if ( ($position = $this->searchIfInR1(array('erte', 'ert'))) !== false) { $this->word = preg_replace('#(erte|ert)$#u', 'er', $this->word); return true; } // a e ede ande ende ane ene hetene en heten ar er heter as es edes endes enes hetenes ens hetens ers ets et het ast // delete if ( ($position = $this->searchIfInR1(array( 'hetenes', 'hetene', 'hetens', 'heten', 'endes', 'heter', 'ande', 'ende', 'enes', 'edes', 'ede', 'ane', 'ene', 'het', 'ers', 'ets', 'ast', 'ens', 'en', 'ar', 'er', 'as', 'es', 'et', 'a', 'e' ))) !== false) { $this->word = UTF8::substr($this->word, 0, $position); return true; } // s // delete if preceded by a valid s-ending if ( ($position = $this->searchIfInR1(array('s'))) !== false) { $word = UTF8::substr($this->word, 0, $position); if ($this->hasValidSEnding($word)) { $this->word = $word; } return true; } } /** * Step 2 * If the word ends dt or vt in R1, delete the t. */ private function step2() { if ($this->searchIfInR1(array('dt', 'vt')) !== false) { $this->word = UTF8::substr($this->word, 0, -1); } } /** * Step 3: * Search for the longest among the following suffixes in R1, and if found, delete. */ private function step3() { // leg eleg ig eig lig elig els lov elov slov hetslov if ( ($position = $this->searchIfInR1(array( 'hetslov', 'eleg', 'elov', 'slov', 'elig', 'eig', 'lig', 'els', 'lov', 'leg', 'ig' ))) !== false) { $this->word = UTF8::substr($this->word, 0, $position); } } } PK w�\˩�� I I src/Stemmer/English.phpnu �[��� <?php namespace Wamania\Snowball\Stemmer; use voku\helper\UTF8; /** * English Porter 2 * * @link http://snowball.tartarus.org/algorithms/english/stemmer.html * @author wamania * */ class English extends Stem { /** * All english vowels */ protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y'); protected static $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt'); protected static $liEnding = array('c', 'd', 'e', 'g', 'h', 'k', 'm', 'n', 'r', 't'); /** * {@inheritdoc} */ public function stem($word) { // we do ALL in UTF-8 if (!UTF8::is_utf8($word)) { throw new \Exception('Word must be in UTF-8'); } if (Utf8::strlen($word) < 3) { return $word; } $this->word = UTF8::strtolower($word); // exceptions if (null !== ($word = $this->exception1())) { return $word; } $this->plainVowels = implode('', self::$vowels); // Remove initial ', if present. $first = UTF8::substr($this->word, 0, 1); if ($first == "'") { $this->word = UTF8::substr($this->word, 1); } // Set initial y, or y after a vowel, to Y if ($first == 'y') { $this->word = preg_replace('#^y#u', 'Y', $this->word); } $this->word = preg_replace('#(['.$this->plainVowels.'])y#u', '$1Y', $this->word); $this->r1(); $this->exceptionR1(); $this->r2(); $this->step0(); $this->step1a(); // exceptions 2 if (null !== ($word = $this->exception2())) { return $word; } $this->step1b(); $this->step1c(); $this->step2(); $this->step3(); $this->step4(); $this->step5(); $this->finish(); return $this->word; } /** * Step 0 * Remove ', 's, 's' */ private function step0() { if ( ($position = $this->search(array("'s'", "'s", "'"))) !== false) { $this->word = UTF8::substr($this->word, 0, $position); } } private function step1a() { // sses // replace by ss if ( ($position = $this->search(array('sses'))) !== false) { $this->word = preg_replace('#(sses)$#u', 'ss', $this->word); return true; } // ied+ ies* // replace by i if preceded by more than one letter, otherwise by ie (so ties -> tie, cries -> cri) if ( ($position = $this->search(array('ied', 'ies'))) !== false) { if ($position > 1) { $this->word = preg_replace('#(ied|ies)$#u', 'i', $this->word); } else { $this->word = preg_replace('#(ied|ies)$#u', 'ie', $this->word); } return true; } // us+ ss // do nothing if ( ($position = $this->search(array('us', 'ss'))) !== false) { return true; } // s // delete if the preceding word part contains a vowel not immediately before the s (so gas and this retain the s, gaps and kiwis lose it) if ( ($position = $this->search(array('s'))) !== false) { for ($i=0; $i<$position-1; $i++) { $letter = UTF8::substr($this->word, $i, 1); if (in_array($letter, self::$vowels)) { $this->word = UTF8::substr($this->word, 0, $position); return true; } } return true; } return false; } /** * Step 1b */ private function step1b() { // eed eedly+ // replace by ee if in R1 if ( ($position = $this->search(array('eedly', 'eed'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(eedly|eed)$#u', 'ee', $this->word); } return true; } // ed edly+ ing ingly+ // delete if the preceding word part contains a vowel, and after the deletion: // if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or // if the word ends with a double remove the last letter (so hopp -> hop), or // if the word is short, add e (so hop -> hope) if ( ($position = $this->search(array('edly', 'ingly', 'ed', 'ing'))) !== false) { for ($i=0; $i<$position; $i++) { $letter = UTF8::substr($this->word, $i, 1); if (in_array($letter, self::$vowels)) { $this->word = UTF8::substr($this->word, 0, $position); if ($this->search(array('at', 'bl', 'iz')) !== false) { $this->word .= 'e'; } elseif ( ($position2 = $this->search(self::$doubles)) !== false) { $this->word = UTF8::substr($this->word, 0, ($position2+1)); } elseif ($this->isShort()) { $this->word .= 'e'; } return true; } } return true; } return false; } /** * Step 1c: * */ private function step1c() { // replace suffix y or Y by i if preceded by a non-vowel // which is not the first letter of the word (so cry -> cri, by -> by, say -> say) $length = UTF8::strlen($this->word); if ($length < 3) { return true; } if ( ($position = $this->search(array('y', 'Y'))) !== false) { $before = $position - 1; $letter = UTF8::substr($this->word, $before, 1); if (! in_array($letter, self::$vowels)) { $this->word = preg_replace('#(y|Y)$#u', 'i', $this->word); } return true; } return false; } /** * Step 2 * Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated. */ private function step2() { // iveness iviti: replace by ive if ( ($position = $this->search(array('iveness', 'iviti'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(iveness|iviti)$#u', 'ive', $this->word); } return true; } // ousli ousness: replace by ous if ( ($position = $this->search(array('ousli', 'ousness'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(ousli|ousness)$#u', 'ous', $this->word); } return true; } // izer ization: replace by ize if ( ($position = $this->search(array('izer', 'ization'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(izer|ization)$#u', 'ize', $this->word); } return true; } // ational ation ator: replace by ate if ( ($position = $this->search(array('ational', 'ation', 'ator'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(ational|ation|ator)$#u', 'ate', $this->word); } return true; } // biliti bli+: replace by ble if ( ($position = $this->search(array('biliti', 'bli'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(biliti|bli)$#u', 'ble', $this->word); } return true; } // lessli+: replace by less if ( ($position = $this->search(array('lessli'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(lessli)$#u', 'less', $this->word); } return true; } // fulness: replace by ful if ( ($position = $this->search(array('fulness', 'fulli'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(fulness|fulli)$#u', 'ful', $this->word); } return true; } // tional: replace by tion if ( ($position = $this->search(array('tional'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(tional)$#u', 'tion', $this->word); } return true; } // alism aliti alli: replace by al if ( ($position = $this->search(array('alism', 'aliti', 'alli'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(alism|aliti|alli)$#u', 'al', $this->word); } return true; } // enci: replace by ence if ( ($position = $this->search(array('enci'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(enci)$#u', 'ence', $this->word); } return true; } // anci: replace by ance if ( ($position = $this->search(array('anci'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(anci)$#u', 'ance', $this->word); } return true; } // abli: replace by able if ( ($position = $this->search(array('abli'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(abli)$#u', 'able', $this->word); } return true; } // entli: replace by ent if ( ($position = $this->search(array('entli'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(entli)$#u', 'ent', $this->word); } return true; } // ogi+: replace by og if preceded by l if ( ($position = $this->search(array('ogi'))) !== false) { if ($this->inR1($position)) { $before = $position - 1; $letter = UTF8::substr($this->word, $before, 1); if ($letter == 'l') { $this->word = preg_replace('#(ogi)$#u', 'og', $this->word); } } return true; } // li+: delete if preceded by a valid li-ending if ( ($position = $this->search(array('li'))) !== false) { if ($this->inR1($position)) { // a letter for you $letter = UTF8::substr($this->word, ($position-1), 1); if (in_array($letter, self::$liEnding)) { $this->word = UTF8::substr($this->word, 0, $position); } } return true; } return false; } /** * Step 3: * Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated. */ private function step3() { // ational+: replace by ate if ($this->searchIfInR1(array('ational')) !== false) { $this->word = preg_replace('#(ational)$#u', 'ate', $this->word); return true; } // tional+: replace by tion if ($this->searchIfInR1(array('tional')) !== false) { $this->word = preg_replace('#(tional)$#u', 'tion', $this->word); return true; } // alize: replace by al if ($this->searchIfInR1(array('alize')) !== false) { $this->word = preg_replace('#(alize)$#u', 'al', $this->word); return true; } // icate iciti ical: replace by ic if ($this->searchIfInR1(array('icate', 'iciti', 'ical')) !== false) { $this->word = preg_replace('#(icate|iciti|ical)$#u', 'ic', $this->word); return true; } // ful ness: delete if ( ($position = $this->searchIfInR1(array('ful', 'ness'))) !== false) { $this->word = UTF8::substr($this->word, 0, $position); return true; } // ative*: delete if in R2 if ( (($position = $this->searchIfInR1(array('ative'))) !== false) && ($this->inR2($position)) ) { $this->word = UTF8::substr($this->word, 0, $position); return true; } return false; } /** * Step 4 * Search for the longest among the following suffixes, and, if found and in R2, perform the action indicated. */ private function step4() { // ement ance ence able ible ant ment ent ism ate iti ous ive ize al er ic // delete if ( ($position = $this->search(array( 'ance', 'ence', 'ement', 'able', 'ible', 'ant', 'ment', 'ent', 'ism', 'ate', 'iti', 'ous', 'ive', 'ize', 'al', 'er', 'ic'))) !== false) { if ($this->inR2($position)) { $this->word = UTF8::substr($this->word, 0, $position); } return true; } // ion // delete if preceded by s or t if ( ($position = $this->searchIfInR2(array('ion'))) !== false) { $before = $position - 1; $letter = UTF8::substr($this->word, $before, 1); if ($letter == 's' || $letter == 't') { $this->word = UTF8::substr($this->word, 0, $position); } return true; } return false; } /** * Step 5: * * Search for the the following suffixes, and, if found, perform the action indicated. */ private function step5() { // e // delete if in R2, or in R1 and not preceded by a short syllable if ( ($position = $this->search(array('e'))) !== false) { if ($this->inR2($position)) { $this->word = UTF8::substr($this->word, 0, $position); } elseif ($this->inR1($position)) { if ( (! $this->searchShortSyllabe(-4, 3)) && (! $this->searchShortSyllabe(-3, 2)) ) { $this->word = UTF8::substr($this->word, 0, $position); } } return true; } // l // delete if in R2 and preceded by l if ( ($position = $this->searchIfInR2(array('l'))) !== false) { $before = $position - 1; $letter = UTF8::substr($this->word, $before, 1); if ($letter == 'l') { $this->word = UTF8::substr($this->word, 0, $position); } return true; } return false; } private function finish() { $this->word = UTF8::str_replace('Y', 'y', $this->word); } private function exceptionR1() { if (Utf8::strpos($this->word, 'gener') === 0) { $this->r1 = UTF8::substr($this->word, 5); $this->r1Index = 5; } elseif (Utf8::strpos($this->word, 'commun') === 0) { $this->r1 = UTF8::substr($this->word, 6); $this->r1Index = 6; } elseif (Utf8::strpos($this->word, 'arsen') === 0) { $this->r1 = UTF8::substr($this->word, 5); $this->r1Index = 5; } } /** * 1/ Stem certain special words as follows, * 2/ If one of the following is found, leave it invariant, */ private function exception1() { $exceptions = array( 'skis' => 'ski', 'skies' => 'sky', 'dying' => 'die', 'lying' => 'lie', 'tying' => 'tie', 'idly' => 'idl', 'gently' => 'gentl', 'ugly' => 'ugli', 'early' => 'earli', 'only' => 'onli', 'singly' => 'singl', // invariants 'sky' => 'sky', 'news' => 'news', 'howe' => 'howe', 'atlas' => 'atlas', 'cosmos' => 'cosmos', 'bias' => 'bias', 'andes' => 'andes' ); if (isset($exceptions[$this->word])) { return $exceptions[$this->word]; } return null; } /** * Following step 1a, leave the following invariant, */ private function exception2() { $exceptions = array( 'inning' => 'inning', 'outing' => 'outing', 'canning' => 'canning', 'herring' => 'herring', 'earring' => 'earring', 'proceed' => 'proceed', 'exceed' => 'exceed', 'succeed' => 'succeed' ); if (isset($exceptions[$this->word])) { return $exceptions[$this->word]; } return null; } /** * A word is called short if it ends in a short syllable, and if R1 is null. * Note : R1 not really null, but the word at this state must be smaller than r1 index * * @return boolean */ private function isShort() { $length = UTF8::strlen($this->word); return ( ($this->searchShortSyllabe(-3, 3) || $this->searchShortSyllabe(-2, 2)) && ($length == $this->r1Index) ); } /** * Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, * or * (b) a vowel at the beginning of the word followed by a non-vowel. * * So rap, trap, entrap end with a short syllable, and ow, on, at are classed as short syllables. * But uproot, bestow, disturb do not end with a short syllable. */ private function searchShortSyllabe($from, $nbLetters) { $length = UTF8::strlen($this->word); if ($from < 0) { $from = $length + $from; } if ($from < 0) { $from = 0; } // (a) is just for beginning of the word if ( ($nbLetters == 2) && ($from != 0) ) { return false; } $first = UTF8::substr($this->word, $from, 1); $second = UTF8::substr($this->word, ($from+1), 1); if ($nbLetters == 2) { if ( (in_array($first, self::$vowels)) && (!in_array($second, self::$vowels)) ) { return true; } } $third = UTF8::substr($this->word, ($from+2), 1); if ( (!in_array($first, self::$vowels)) && (in_array($second, self::$vowels)) && (!in_array($third, array_merge(self::$vowels, array('x', 'Y', 'w'))))) { return true; } return false; } } PK w�\��J J src/Stemmer/Stemmer.phpnu �[��� <?php namespace Wamania\Snowball\Stemmer; /** * @author Luís Cobucci <lcobucci@gmail.com> */ interface Stemmer { /** * Main function to get the STEM of a word * * @param string $word A valid UTF-8 word * * @return string * * @throws \Exception */ public function stem($word); } PK w�\T�Lj� � src/Stemmer/Danish.phpnu �[��� <?php namespace Wamania\Snowball\Stemmer; use voku\helper\UTF8; /** * * @link http://snowball.tartarus.org/algorithms/danish/stemmer.html * @author wamania * */ class Danish extends Stem { /** * All danish vowels */ protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'æ', 'å', 'ø'); /** * {@inheritdoc} */ public function stem($word): string { // we do ALL in UTF-8 if (!UTF8::is_utf8($word)) { throw new \Exception('Word must be in UTF-8'); } $this->word = UTF8::strtolower($word); // R2 is not used: R1 is defined in the same way as in the German stemmer $this->r1(); // then R1 is adjusted so that the region before it contains at least 3 letters. if ($this->r1Index < 3) { $this->r1Index = 3; $this->r1 = UTF8::substr($this->word, 3); } // Do each of steps 1, 2 3 and 4. $this->step1(); $this->step2(); $this->step3(); $this->step4(); return $this->word; } /** * Define a valid s-ending as one of * a b c d f g h j k l m n o p r t v y z å * * @param string $ending * @return boolean */ private function hasValidSEnding($word) { $lastLetter = UTF8::substr($word, -1, 1); return in_array($lastLetter, array('a', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z', 'å')); } /** * Step 1 * Search for the longest among the following suffixes in R1, and perform the action indicated. */ private function step1() { // hed ethed ered e erede ende erende ene erne ere en heden eren er heder erer // heds es endes erendes enes ernes eres ens hedens erens ers ets erets et eret // delete if ( ($position = $this->searchIfInR1(array( 'erendes', 'erende', 'hedens', 'erede', 'ethed', 'heden', 'endes', 'erets', 'heder', 'ernes', 'erens', 'ered', 'ende', 'erne', 'eres', 'eren', 'eret', 'erer', 'enes', 'heds', 'ens', 'ene', 'ere', 'ers', 'ets', 'hed', 'es', 'et', 'er', 'en', 'e' ))) !== false) { $this->word = UTF8::substr($this->word, 0, $position); return true; } // s // delete if preceded by a valid s-ending if ( ($position = $this->searchIfInR1(array('s'))) !== false) { $word = UTF8::substr($this->word, 0, $position); if ($this->hasValidSEnding($word)) { $this->word = $word; } return true; } } /** * Step 2 * Search for one of the following suffixes in R1, and if found delete the last letter. * gd dt gt kt */ private function step2() { if ($this->searchIfInR1(array('gd', 'dt', 'gt', 'kt')) !== false) { $this->word = UTF8::substr($this->word, 0, -1); } } /** * Step 3: */ private function step3() { // If the word ends igst, remove the final st. if ($this->search(array('igst')) !== false) { $this->word = UTF8::substr($this->word, 0, -2); } // Search for the longest among the following suffixes in R1, and perform the action indicated. // ig lig elig els // delete, and then repeat step 2 if ( ($position = $this->searchIfInR1(array('elig', 'lig', 'ig', 'els'))) !== false) { $this->word = UTF8::substr($this->word, 0, $position); $this->step2(); return true; } // løst // replace with løs if ($this->searchIfInR1(array('løst')) !== false) { $this->word = UTF8::substr($this->word, 0, -1); } } /** * Step 4: undouble * If the word ends with double consonant in R1, remove one of the consonants. */ private function step4() { $length = UTF8::strlen($this->word); if (!$this->inR1(($length-1))) { return false; } $lastLetter = UTF8::substr($this->word, -1, 1); if (in_array($lastLetter, self::$vowels)) { return false; } $beforeLastLetter = UTF8::substr($this->word, -2, 1); if ($lastLetter == $beforeLastLetter) { $this->word = UTF8::substr($this->word, 0, -1); } return true; } } PK w�\�� �,3 ,3 src/Stemmer/Romanian.phpnu �[��� <?php namespace Wamania\Snowball\Stemmer; use voku\helper\UTF8; /** * * @link http://snowball.tartarus.org/algorithms/romanian/stemmer.html * @author wamania * */ class Romanian extends Stem { /** * All Romanian vowels */ protected static $vowels = array('a', 'ă', 'â', 'e', 'i', 'î', 'o', 'u'); /** * {@inheritdoc} */ public function stem($word) { // we do ALL in UTF-8 if (!UTF8::is_utf8($word)) { throw new \Exception('Word must be in UTF-8'); } $this->word = UTF8::strtolower($word); $this->plainVowels = implode('', self::$vowels); // First, i and u between vowels are put into upper case (so that they are treated as consonants). $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word); $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word); $this->rv(); $this->r1(); $this->r2(); $this->step0(); $word1 = $this->word; $word2 = $this->word; do { $word1 = $this->word; $this->step1(); } while ($this->word != $word1); $this->step2(); // Do step 3 if no suffix was removed either by step 1 or step 2. if ($word2 == $this->word) { $this->step3(); } $this->step4(); $this->finish(); return $this->word; } /** * Step 0: Removal of plurals (and other simplifications) * Search for the longest among the following suffixes, and, if it is in R1, perform the action indicated. * @return boolean */ private function step0() { // ul ului // delete if ( ($position = $this->search(array('ul', 'ului'))) !== false) { if ($this->inR1($position)) { $this->word = UTF8::substr($this->word, 0, $position); } return true; } // aua // replace with a if ( ($position = $this->search(array('aua'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(aua)$#u', 'a', $this->word); } return true; } // ea ele elor // replace with e if ( ($position = $this->search(array('ea', 'ele', 'elor'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(ea|ele|elor)$#u', 'e', $this->word); } return true; } // ii iua iei iile iilor ilor // replace with i if ( ($position = $this->search(array('ii', 'iua', 'iei', 'iile', 'iilor', 'ilor'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(ii|iua|iei|iile|iilor|ilor)$#u', 'i', $this->word); } return true; } // ile // replace with i if not preceded by ab if ( ($position = $this->search(array('ile'))) !== false) { if ($this->inR1($position)) { $before = UTF8::substr($this->word, ($position-2), 2); if ($before != 'ab') { $this->word = preg_replace('#(ile)$#u', 'i', $this->word); } } return true; } // atei // replace with at if ( ($position = $this->search(array('atei'))) != false) { if ($this->inR1($position)) { $this->word = preg_replace('#(atei)$#u', 'at', $this->word); } return true; } // aţie aţia // replace with aţi if ( ($position = $this->search(array('aţie', 'aţia'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(aţie|aţia)$#u', 'aţi', $this->word); } return true; } return false; } /** * Step 1: Reduction of combining suffixes * Search for the longest among the following suffixes, and, if it is in R1, preform the replacement action indicated. * Then repeat this step until no replacement occurs. * @return boolean */ private function step1() { // abilitate abilitati abilităi abilităţi // replace with abil if ( ($position = $this->search(array('abilitate', 'abilitati', 'abilităi', 'abilităţi'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(abilitate|abilitati|abilităi|abilităţi)$#u', 'abil', $this->word); } return true; } // ibilitate // replace with ibil if ( ($position = $this->search(array('ibilitate'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(ibilitate)$#u', 'ibil', $this->word); } return true; } // ivitate ivitati ivităi ivităţi // replace with iv if ( ($position = $this->search(array('ivitate', 'ivitati', 'ivităi', 'ivităţi'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(ivitate|ivitati|ivităi|ivităţi)$#u', 'iv', $this->word); } return true; } // icitate icitati icităi icităţi icator icatori iciv iciva icive icivi icivă ical icala icale icali icală // replace with ic if ( ($position = $this->search(array( 'icitate', 'icitati', 'icităi', 'icităţi', 'icatori', 'icator', 'iciva', 'icive', 'icivi', 'icivă', 'icala', 'icale', 'icali', 'icală', 'iciv', 'ical'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(icitate|icitati|icităi|icităţi|cator|icatori|iciva|icive|icivi|icivă|icala|icale|icali|icală|ical|iciv)$#u', 'ic', $this->word); } return true; } // ativ ativa ative ativi ativă aţiune atoare ator atori ătoare ător ători // replace with at if ( ($position = $this->search(array('ativa', 'ative', 'ativi', 'ativă', 'ativ', 'aţiune', 'atoare', 'atori', 'ătoare', 'ători', 'ător', 'ator'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(ativa|ative|ativi|ativă|ativ|aţiune|atoare|atori|ătoare|ători|ător|ator)$#u', 'at', $this->word); } return true; } // itiv itiva itive itivi itivă iţiune itoare itor itori // replace with it if ( ($position = $this->search(array('itiva', 'itive', 'itivi', 'itivă', 'itiv', 'iţiune', 'itoare', 'itori', 'itor'))) !== false) { if ($this->inR1($position)) { $this->word = preg_replace('#(itiva|itive|itivi|itivă|itiv|iţiune|itoare|itori|itor)$#u', 'it', $this->word); } return true; } return false; } /** * Step 2: Removal of 'standard' suffixes * Search for the longest among the following suffixes, and, if it is in R2, perform the action indicated. * @return boolean */ private function step2() { // atori itate itati, ităţi, abila abile abili abilă, ibila ibile ibili ibilă // anta, ante, anti, antă, ator, ibil, oasa oasă oase, ităi, abil // osi oşi ant ici ică iva ive ivi ivă ata ată ati ate, ata ată ati ate uta ută uti ute, ita ită iti ite ica ice // at, os, iv, ut, it, ic // delete if ( ($position = $this->search(array( 'atori', 'itate', 'itati', 'ităţi', 'abila', 'abile', 'abili', 'abilă', 'ibila', 'ibile', 'ibili', 'ibilă', 'anta', 'ante', 'anti', 'antă', 'ator', 'ibil', 'oasa', 'oasă', 'oase', 'ităi', 'abil', 'osi', 'oşi', 'ant', 'ici', 'ică', 'iva', 'ive', 'ivi', 'ivă', 'ata', 'ată', 'ati', 'ate', 'ata', 'ată', 'ati', 'ate', 'uta', 'ută', 'uti', 'ute', 'ita', 'ită', 'iti', 'ite', 'ica', 'ice', 'at', 'os', 'iv', 'ut', 'it', 'ic' ))) !== false) { if ($this->inR2($position)) { $this->word = UTF8::substr($this->word, 0, $position); } return true; } // iune iuni // delete if preceded by ţ, and replace the ţ by t. if ( ($position = $this->search(array('iune', 'iuni'))) !== false) { if ($this->inR2($position)) { $before = $position - 1; $letter = UTF8::substr($this->word, $before, 1); if ($letter == 'ţ') { $this->word = UTF8::substr($this->word, 0, $position); $this->word = preg_replace('#(ţ)$#u', 't', $this->word); } } return true; } // ism isme ist ista iste isti istă işti // replace with ist if ( ($position = $this->search(array('isme', 'ism', 'ista', 'iste', 'isti', 'istă', 'işti', 'ist'))) !== false) { if ($this->inR2($position)) { $this->word = preg_replace('#(isme|ism|ista|iste|isti|istă|işti|ist)$#u', 'ist', $this->word); } return true; } return false; } /** * Step 3: Removal of verb suffixes * Do step 3 if no suffix was removed either by step 1 or step 2. * @return boolean */ private function step3() { // are ere ire âre ind ând indu ându eze ească ez ezi ează esc eşti // eşte ăsc ăşti ăşte am ai au eam eai ea eaţi eau iam iai ia iaţi // iau ui aşi arăm arăţi ară uşi urăm urăţi ură işi irăm irăţi iră âi // âşi ârăm ârăţi âră asem aseşi ase aserăm aserăţi aseră isem iseşi ise // iserăm iserăţi iseră âsem âseşi âse âserăm âserăţi âseră usem useşi use userăm userăţi useră // delete if preceded in RV by a consonant or u if ( ($position = $this->searchIfInRv(array( 'userăţi', 'iserăţi', 'âserăţi', 'aserăţi', 'userăm', 'iserăm', 'âserăm', 'aserăm', 'iseră', 'âseşi', 'useră', 'âseră', 'useşi', 'iseşi', 'aseră', 'aseşi', 'ârăţi', 'irăţi', 'urăţi', 'arăţi', 'ească', 'usem', 'âsem', 'isem', 'asem', 'ârăm', 'urăm', 'irăm', 'arăm', 'iaţi', 'eaţi', 'ăşte', 'ăşti', 'eşte', 'eşti', 'ează', 'ându', 'indu', 'âse', 'use', 'ise', 'ase', 'âră', 'iră', 'işi', 'ură', 'uşi', 'ară', 'aşi', 'âşi', 'iau', 'iai', 'iam', 'eau', 'eai', 'eam', 'ăsc', 'are', 'ere', 'ire', 'âre', 'ind', 'ând', 'eze', 'ezi', 'esc', 'âi', 'ui', 'ia', 'ea', 'au', 'ai', 'am', 'ez' ))) !== false) { if ($this->inRv($position)) { $before = $position - 1; if ($this->inRv($before)) { $letter = UTF8::substr($this->word, $before, 1); if ( (!in_array($letter, self::$vowels)) || ($letter == 'u') ) { $this->word = UTF8::substr($this->word, 0, $position); } } } return true; } // ăm aţi em eţi im iţi âm âţi seşi serăm serăţi seră sei se sesem seseşi sese seserăm seserăţi seseră // delete if ( ($position = $this->searchIfInRv(array( 'seserăm', 'seserăţi', 'seseră', 'seseşi', 'sesem', 'serăţi', 'serăm', 'seşi', 'sese', 'seră', 'aţi', 'eţi', 'iţi', 'âţi', 'sei', 'se', 'ăm', 'âm', 'em', 'im' ))) !== false) { if ($this->inRv($position)) { $this->word = UTF8::substr($this->word, 0, $position); } return true; } } /** * Step 4: Removal of final vowel */ private function step4() { // Search for the longest among the suffixes "a e i ie ă " and, if it is in RV, delete it. if ( ($position = $this->search(array('a', 'ie', 'e', 'i', 'ă'))) !== false) { if ($this->inRv($position)) { $this->word = UTF8::substr($this->word, 0, $position); } } return true; } /** * Finally * Turn I, U back into i, u */ private function finish() { // Turn I, U back into i, u $this->word = UTF8::str_replace(array('I', 'U'), array('i', 'u'), $this->word); } } PK w�\�(+1 +1 src/Stemmer/Spanish.phpnu �[��� <?php namespace Wamania\Snowball\Stemmer; use voku\helper\UTF8; /** * * @link http://snowball.tartarus.org/algorithms/spanish/stemmer.html * @author wamania * */ class Spanish extends Stem { /** * All spanish vowels */ protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'ü'); /** * {@inheritdoc} */ public function stem($word) { // we do ALL in UTF-8 if (!UTF8::is_utf8($word)) { throw new \Exception('Word must be in UTF-8'); } $this->word = UTF8::strtolower($word); $this->rv(); $this->r1(); $this->r2(); $this->step0(); $word = $this->word; $this->step1(); // Do step 2a if no ending was removed by step 1. if ($this->word == $word) { $this->step2a(); // Do Step 2b if step 2a was done, but failed to remove a suffix. if ($this->word == $word) { $this->step2b(); } } $this->step3(); $this->finish(); return $this->word; } /** * Step 0: Attached pronoun * * Search for the longest among the following suffixes * me se sela selo selas selos la le lo las les los nos * * and delete it, if comes after one of * (a) iéndo ándo ár ér ír * (b) ando iendo ar er ir * (c) yendo following u * * in RV. In the case of (c), yendo must lie in RV, but the preceding u can be outside it. * In the case of (a), deletion is followed by removing the acute accent (for example, haciéndola -> haciendo). */ private function step0() { if ( ($position = $this->searchIfInRv(array('selas', 'selos', 'las', 'los', 'les', 'nos', 'selo', 'sela', 'me', 'se', 'la', 'le', 'lo' ))) != false) { $suffixe = UTF8::substr($this->word, $position); // a $a = array('iéndo', 'ándo', 'ár', 'ér', 'ír'); $a = array_map(function($item) use ($suffixe) { return $item . $suffixe; }, $a); if ( ($position2 = $this->searchIfInRv($a)) !== false) { $suffixe2 = UTF8::substr($this->word, $position2); $suffixe2 = UTF8::to_utf8(UTF8::to_ascii($suffixe2)); // unaccent $this->word = UTF8::substr($this->word, 0, $position2); $this->word .= $suffixe2; $this->word = UTF8::substr($this->word, 0, $position); return true; } // b $b = array('iendo', 'ando', 'ar', 'er', 'ir'); $b = array_map(function($item) use ($suffixe) { return $item . $suffixe; }, $b); if ( ($position2 = $this->searchIfInRv($b)) !== false) { $this->word = UTF8::substr($this->word, 0, $position); return true; } // c if ( ($position2 = $this->searchIfInRv(array('yendo' . $suffixe))) != false) { $before = UTF8::substr($this->word, ($position2-1), 1); if ( (isset($before)) && ($before == 'u') ) { $this->word = UTF8::substr($this->word, 0, $position); return true; } } } return false; } /** * Step 1 */ private function step1() { // anza anzas ico ica icos icas ismo ismos able ables ible ibles ista // istas oso osa osos osas amiento amientos imiento imientos // delete if in R2 if ( ($position = $this->search(array( 'imientos', 'imiento', 'amientos', 'amiento', 'osas', 'osos', 'osa', 'oso', 'istas', 'ista', 'ibles', 'ible', 'ables', 'able', 'ismos', 'ismo', 'icas', 'icos', 'ica', 'ico', 'anzas', 'anza'))) != false) { if ($this->inR2($position)) { $this->word = UTF8::substr($this->word, 0, $position); } return true; } // adora ador ación adoras adores aciones ante antes ancia ancias // delete if in R2 // if preceded by ic, delete if in R2 if ( ($position = $this->search(array( 'adoras', 'adora', 'aciones', 'ación', 'adores', 'ador', 'antes', 'ante', 'ancias', 'ancia'))) != false) { if ($this->inR2($position)) { $this->word = UTF8::substr($this->word, 0, $position); } if ( ($position2 = $this->searchIfInR2(array('ic')))) { $this->word = UTF8::substr($this->word, 0, $position2); } return true; } // logía logías // replace with log if in R2 if ( ($position = $this->search(array('logías', 'logía'))) != false) { if ($this->inR2($position)) { $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word); } return true; } // ución uciones // replace with u if in R2 if ( ($position = $this->search(array('uciones', 'ución'))) != false) { if ($this->inR2($position)) { $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word); } return true; } // encia encias // replace with ente if in R2 if ( ($position = $this->search(array('encias', 'encia'))) != false) { if ($this->inR2($position)) { $this->word = preg_replace('#(encias|encia)$#u', 'ente', $this->word); } return true; } // amente // delete if in R1 // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise, // if preceded by os, ic or ad, delete if in R2 if ( ($position = $this->search(array('amente'))) != false) { // delete if in R1 if ($this->inR1($position)) { $this->word = UTF8::substr($this->word, 0, $position); } // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise, if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) { $this->word = UTF8::substr($this->word, 0, $position2); if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) { $this->word = UTF8::substr($this->word, 0, $position3); } // if preceded by os, ic or ad, delete if in R2 } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) != false) { $this->word = UTF8::substr($this->word, 0, $position4); } return true; } // mente // delete if in R2 // if preceded by ante, able or ible, delete if in R2 if ( ($position = $this->search(array('mente'))) != false) { // delete if in R2 if ($this->inR2($position)) { $this->word = UTF8::substr($this->word, 0, $position); } // if preceded by ante, able or ible, delete if in R2 if ( ($position2 = $this->searchIfInR2(array('ante', 'able', 'ible'))) != false) { $this->word = UTF8::substr($this->word, 0, $position2); } return true; } // idad idades // delete if in R2 // if preceded by abil, ic or iv, delete if in R2 if ( ($position = $this->search(array('idades', 'idad'))) != false) { // delete if in R2 if ($this->inR2($position)) { $this->word = UTF8::substr($this->word, 0, $position); } // if preceded by abil, ic or iv, delete if in R2 if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) { $this->word = UTF8::substr($this->word, 0, $position2); } return true; } // iva ivo ivas ivos // delete if in R2 // if preceded by at, delete if in R2 if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) != false) { // delete if in R2 if ($this->inR2($position)) { $this->word = UTF8::substr($this->word, 0, $position); } // if preceded by at, delete if in R2 if ( ($position2 = $this->searchIfInR2(array('at'))) != false) { $this->word = UTF8::substr($this->word, 0, $position2); } return true; } return false; } /** * Step 2a: Verb suffixes beginning y */ private function step2a() { // if found, delete if preceded by u // (Note that the preceding u need not be in RV.) if ( ($position = $this->searchIfInRv(array( 'yamos', 'yendo', 'yeron', 'yan', 'yen', 'yais', 'yas', 'yes', 'yo', 'yó', 'ya', 'ye'))) != false) { $before = UTF8::substr($this->word, ($position-1), 1); if ( (isset($before)) && ($before == 'u') ) { $this->word = UTF8::substr($this->word, 0, $position); return true; } } return false; } /** * Step 2b: Other verb suffixes * Search for the longest among the following suffixes in RV, and perform the action indicated. */ private function step2b() { // delete if ( ($position = $this->searchIfInRv(array( 'iésemos', 'iéramos', 'ábamos', 'iríamos', 'eríamos', 'aríamos', 'áramos', 'ásemos', 'eríais', 'aremos', 'eremos', 'iremos', 'asteis', 'ieseis', 'ierais', 'isteis', 'aríais', 'irían', 'aréis', 'erían', 'erías', 'eréis', 'iréis', 'irías', 'ieran', 'iesen', 'ieron', 'iendo', 'ieras', 'iríais', 'arían', 'arías', 'amos', 'imos', 'ados', 'idos', 'irán', 'irás', 'erán', 'erás', 'ería', 'iría', 'íais', 'arán', 'arás', 'aría', 'iera', 'iese', 'aste', 'iste', 'aban', 'aran', 'asen', 'aron', 'ando', 'abas', 'adas', 'idas', 'ases', 'aras', 'aré', 'erá', 'eré', 'áis', 'ías', 'irá', 'iré', 'aba', 'ían', 'ada', 'ara', 'ase', 'ida', 'ado', 'ido', 'ará', 'ad', 'ed', 'id', 'ís', 'ió', 'ar', 'er', 'ir', 'as', 'ía', 'an' ))) != false) { $this->word = UTF8::substr($this->word, 0, $position); return true; } // en es éis emos // delete, and if preceded by gu delete the u (the gu need not be in RV) if ( ($position = $this->searchIfInRv(array('éis', 'emos', 'en', 'es'))) != false) { $this->word = UTF8::substr($this->word, 0, $position); if ( ($position2 = $this->search(array('gu'))) != false) { $this->word = UTF8::substr($this->word, 0, ($position2+1)); } return true; } } /** * Step 3: residual suffix * Search for the longest among the following suffixes in RV, and perform the action indicated. */ private function step3() { // os a o á í ó // delete if in RV if ( ($position = $this->searchIfInRv(array('os', 'a', 'o', 'á', 'í', 'ó'))) != false) { $this->word = UTF8::substr($this->word, 0, $position); return true; } // e é // delete if in RV, and if preceded by gu with the u in RV delete the u if ( ($position = $this->searchIfInRv(array('e', 'é'))) != false) { $this->word = UTF8::substr($this->word, 0, $position); if ( ($position2 = $this->searchIfInRv(array('u'))) != false) { $before = UTF8::substr($this->word, ($position2-1), 1); if ( (isset($before)) && ($before == 'g') ) { $this->word = UTF8::substr($this->word, 0, $position2); return true; } } } return false; } /** * And finally: * Remove acute accents */ private function finish() { $this->word = UTF8::str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word); } } PK w�\�a��� � src/Stemmer/Stem.phpnu �[��� <?php namespace Wamania\Snowball\Stemmer; use voku\helper\UTF8; abstract class Stem implements Stemmer { protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y'); /** * helper, contains stringified list of vowels * @var string */ protected $plainVowels; /** * The word we are stemming * @var string */ protected $word; /** * The original word, use to check if word has been modified * @var string */ protected $originalWord; /** * RV value * @var string */ protected $rv; /** * RV index (based on the beginning of the word) * @var integer */ protected $rvIndex; /** * R1 value * @var integer */ protected $r1; /** * R1 index (based on the beginning of the word) * @var int */ protected $r1Index; /** * R2 value * @var integer */ protected $r2; /** * R2 index (based on the beginning of the word) * @var int */ protected $r2Index; protected function inRv($position) { return ($position >= $this->rvIndex); } protected function inR1($position) { return ($position >= $this->r1Index); } protected function inR2($position) { return ($position >= $this->r2Index); } protected function searchIfInRv($suffixes) { return $this->search($suffixes, $this->rvIndex); } protected function searchIfInR1($suffixes) { return $this->search($suffixes, $this->r1Index); } protected function searchIfInR2($suffixes) { return $this->search($suffixes, $this->r2Index); } protected function search($suffixes, $offset = 0) { $length = UTF8::strlen($this->word); if ($offset > $length) { return false; } foreach ($suffixes as $suffixe) { if ( (($position = UTF8::strrpos($this->word, $suffixe, $offset)) !== false) && ((Utf8::strlen($suffixe)+$position) == $length) ) { return $position; } } return false; } /** * R1 is the region after the first non-vowel following a vowel, or the end of the word if there is no such non-vowel. */ protected function r1() { list($this->r1Index, $this->r1) = $this->rx($this->word); } /** * R2 is the region after the first non-vowel following a vowel in R1, or the end of the word if there is no such non-vowel. */ protected function r2() { list($index, $value) = $this->rx($this->r1); $this->r2 = $value; $this->r2Index = $this->r1Index + $index; } /** * Common function for R1 and R2 * Search the region after the first non-vowel following a vowel in $word, or the end of the word if there is no such non-vowel. * R1 : $in = $this->word * R2 : $in = R1 */ protected function rx($in) { $length = UTF8::strlen($in); // defaults $value = ''; $index = $length; // we search all vowels $vowels = array(); for ($i=0; $i<$length; $i++) { $letter = UTF8::substr($in, $i, 1); if (in_array($letter, static::$vowels)) { $vowels[] = $i; } } // search the non-vowel following a vowel foreach ($vowels as $position) { $after = $position + 1; $letter = UTF8::substr($in, $after, 1); if (! in_array($letter, static::$vowels)) { $index = $after + 1; $value = UTF8::substr($in, ($after+1)); break; } } return array($index, $value); } /** * Used by spanish, italian, portuguese, etc (but not by french) * * If the second letter is a consonant, RV is the region after the next following vowel, * or if the first two letters are vowels, RV is the region after the next consonant, * and otherwise (consonant-vowel case) RV is the region after the third letter. * But RV is the end of the word if these positions cannot be found. */ protected function rv() { $length = UTF8::strlen($this->word); $this->rv = ''; $this->rvIndex = $length; if ($length < 3) { return true; } $first = UTF8::substr($this->word, 0, 1); $second = UTF8::substr($this->word, 1, 1); // If the second letter is a consonant, RV is the region after the next following vowel, if (!in_array($second, static::$vowels)) { for ($i=2; $i<$length; $i++) { $letter = UTF8::substr($this->word, $i, 1); if (in_array($letter, static::$vowels)) { $this->rvIndex = $i + 1; $this->rv = UTF8::substr($this->word, ($i+1)); return true; } } } // or if the first two letters are vowels, RV is the region after the next consonant, if ( (in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) { for ($i=2; $i<$length; $i++) { $letter = UTF8::substr($this->word, $i, 1); if (! in_array($letter, static::$vowels)) { $this->rvIndex = $i + 1; $this->rv = UTF8::substr($this->word, ($i+1)); return true; } } } // and otherwise (consonant-vowel case) RV is the region after the third letter. if ( (! in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) { $this->rv = UTF8::substr($this->word, 3); $this->rvIndex = 3; return true; } } } PK w�\Q�Wr�<