Skip to content

Commit

Permalink
Refinements in stemmers.
Browse files Browse the repository at this point in the history
  • Loading branch information
parpalak committed Aug 21, 2024
1 parent aa85493 commit 468d5e7
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 6 deletions.
8 changes: 4 additions & 4 deletions bin/stem
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ if (PHP_SAPI !== 'cli') {
require __DIR__ . '/../vendor/autoload.php';

if (count($argv) < 3) {
die('Usage: ' . $argv[0] . ' <kind> <argument>' . PHP_EOL);
die('Usage: ' . $argv[0] . ' <language> <argument>' . PHP_EOL);
}

$kind = $argv[1];
$language = $argv[1];
$argument = $argv[2];

$stemmer = match ($kind) {
$stemmer = match ($language) {
'russian' => new \S2\Rose\Stemmer\PorterStemmerRussian(),
'english' => new \S2\Rose\Stemmer\PorterStemmerEnglish(),
default => throw new \Exception('Unknown stemmer kind: ' . $kind),
default => throw new \Exception('Unknown stemmer language: ' . $language),
};

echo $stemmer->stemWord($argument), PHP_EOL;
2 changes: 1 addition & 1 deletion src/S2/Rose/Stemmer/PorterStemmerEnglish.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
*/
class PorterStemmerEnglish extends AbstractStemmer implements StemmerInterface
{
private const SUPPORTS_REGEX = '#^[a-zA-Z\-0-9\'’]*$#Su';
private const SUPPORTS_REGEX = '#(?:^|-|\d)[a-zA-Z\'’]+$#Su';

protected static array $irregularWords = [
'skis' => 'ski',
Expand Down
5 changes: 4 additions & 1 deletion src/S2/Rose/Stemmer/PorterStemmerRussian.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*/
class PorterStemmerRussian extends AbstractStemmer implements StemmerInterface
{
const SUPPORTS_REGEX = '#^[а-яА-ЯёЁ\-0-9]*$#Su';
const SUPPORTS_REGEX = '#(?:^|-|\d)[а-яА-ЯёЁ]+$#Su';

const VOWEL = '/аеиоуыэюя/Su';
const PERFECTIVEGROUND = '/((ив|ивши|ившись|ыв|ывши|ывшись)|((?<=[ая])(в|вши|вшись)))$/Su';
Expand Down Expand Up @@ -296,6 +296,9 @@ class PorterStemmerRussian extends AbstractStemmer implements StemmerInterface
'модема' => 'модем',
'модему' => 'модем',

'токен' => '',
'токена' => 'токен',

'ищу' => 'иска',
'ищешь' => 'иска',
'ищет' => 'иска',
Expand Down

0 comments on commit 468d5e7

Please sign in to comment.