diff --git a/packages/yoastseo/spec/languageProcessing/languages/de/helpers/internal/determineStemSpec.js b/packages/yoastseo/spec/languageProcessing/languages/de/helpers/internal/determineStemSpec.js index 86fe1ad93d9..5817b214eac 100644 --- a/packages/yoastseo/spec/languageProcessing/languages/de/helpers/internal/determineStemSpec.js +++ b/packages/yoastseo/spec/languageProcessing/languages/de/helpers/internal/determineStemSpec.js @@ -7,10 +7,6 @@ const morphologyDataDE = getMorphologyData( "de" ).de; const wordsToStem = [ // Default stemmer [ "studenten", "student" ], - // Nouns: exceptionStems - [ "vögel", "vogel" ], - // Nouns: exceptionStems compound - [ "raubvögel", "raubvogel" ], // Nouns: exceptionStems with one plural matching multiple singulars [ "stadium", "stadi" ], [ "stadion", "stadi" ], @@ -55,8 +51,61 @@ const wordsToStem = [ [ "Kraftwerke", "Kraftwerk" ], ]; -describe( "Test for determining stems for German words", () => { - it( "creates stems for German words", () => { - wordsToStem.forEach( wordToStem => expect( determineStem( wordToStem[ 0 ], morphologyDataDE ) ).toBe( wordToStem[ 1 ] ) ); +describe.each( wordsToStem )( "Test for determining stems for German words", ( word, stem ) => { + it( "stems for German word " + word + " to " + stem, () => { + expect( determineStem( word, morphologyDataDE ) ).toBe( stem ); + } ); +} ); + +const umlautExceptions = [ + // A noun that gets umlaut in plural + [ "vögel", "vogel" ], + [ "läden", "laden" ], + // A noun that gets umlaut and an irregular plural dative suffix + [ "müttern", "mutter" ], + [ "schwägern", "schwager" ], + // A noun that gets umlaut and a regular case suffix + [ "bädern", "bad" ], + [ "ängsten", "angst" ], + [ "hände", "hand" ], + // A noun that gets umlaut and -e in plural + [ "häuse", "haus" ], + [ "ängste", "angst" ], + // A noun that gets umlaut and -er in plural + [ "männer", "mann" ], + [ "wörter", "wort" ], + // compound noun that gets umlaut in plural + [ "raubvögel", "raubvogel" ], + // compound noun that gets umlaut and -e in plural + [ "landflüchte", "landflucht" ], + [ "geschwülst", "geschwulst" ], + [ "feuersbrünst", "feuersbrunst" ], + [ "hirschbrünft", "hirschbrunft" ], + [ "brünst", "brunst" ], + [ "lebensbrünst", "lebensbrunst" ], + [ "liebesbrünst", "liebesbrunst" ], + // More umlaut nouns from all groups + [ "schwäger", "schwager" ], + [ "schäden", "schaden" ], + [ "töchter", "tochter" ], + [ "brünst", "brunst" ], + [ "brüder", "bruder" ], + [ "gärten", "garten" ], + [ "gräben", "graben" ], + [ "kästen", "kasten" ], + [ "mütter", "mutter" ], + [ "läden", "laden" ], + [ "väter", "vater" ], + [ "füchs", "fuchs" ], + [ "ärzte", "arzt" ], + [ "gäns", "gans" ], + [ "häls", "hal" ], + [ "äxte", "axt" ], + [ "äste", "ast" ], +]; + +describe.each( umlautExceptions )( "Test for determining stems for German words with umlauts", ( word, stem ) => { + it( "stems for German word with umlaut " + word + " to " + stem, () => { + expect( determineStem( word, morphologyDataDE ) ).toBe( stem ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/languages/de/helpers/internal/determineStem.js b/packages/yoastseo/src/languageProcessing/languages/de/helpers/internal/determineStem.js index 8e84092e4fa..5addaaa4ffd 100644 --- a/packages/yoastseo/src/languageProcessing/languages/de/helpers/internal/determineStem.js +++ b/packages/yoastseo/src/languageProcessing/languages/de/helpers/internal/determineStem.js @@ -1,23 +1,21 @@ import { flatten } from "lodash"; import { languageProcessing } from "yoastseo"; -const { flattenSortLength } = languageProcessing; - import { detectAndStemRegularParticiple } from "./detectAndStemRegularParticiple"; import stem from "./stem"; +const { flattenSortLength } = languageProcessing; + /** * Returns a stem for a word that appears on the noun exception lists. * - * @param {Object} morphologyDataNouns The German morphology data for nouns. - * @param {string} stemmedWord The stem to check. + * @param {array[]} exceptionList The exception list to check. + * @param {string} stemmedWord The stem to check. * * @returns {string|null} The stemmed word or null if none was found. */ -const findStemOnNounExceptionList = function( morphologyDataNouns, stemmedWord ) { - const exceptionStems = morphologyDataNouns.exceptionStems; - - for ( const exceptionStemSet of exceptionStems ) { +const findStemOnNounExceptionList = function( exceptionList, stemmedWord ) { + for ( const exceptionStemSet of exceptionList ) { const matchedStem = exceptionStemSet.find( exceptionStem => stemmedWord.endsWith( exceptionStem ) ); if ( matchedStem ) { @@ -108,6 +106,13 @@ const findStemOnVerbExceptionList = function( morphologyDataVerbs, stemmedWord ) * @returns {string} Stemmed form of the word. */ export default function determineStem( word, morphologyDataGerman ) { + // Already return the stem here if the word contains umlaut and ends with an ending that looks like a valid suffix, e.g. "läden" stemmed to "laden". + const umlautException = morphologyDataGerman.nouns.umlautException || []; + const findUmlautException = findStemOnNounExceptionList( umlautException, word ); + if ( findUmlautException ) { + return findUmlautException; + } + const verbData = morphologyDataGerman.verbs; const stemmedWord = stem( verbData, word ); @@ -115,7 +120,7 @@ export default function determineStem( word, morphologyDataGerman ) { * Goes through the stem exception functions from left to right, returns the first stem it finds. * If no stem has been found, return the original, programmatically created, stem. */ - return findStemOnNounExceptionList( morphologyDataGerman.nouns, stemmedWord ) || + return findStemOnNounExceptionList( morphologyDataGerman.nouns.exceptionStems, stemmedWord ) || findStemOnAdjectiveExceptionList( morphologyDataGerman.adjectives, stemmedWord ) || findStemOnVerbExceptionList( verbData, stemmedWord ) || detectAndStemRegularParticiple( verbData, word ) ||