From 20b20348deb683eba6205cd2e4c78d1a60da6690 Mon Sep 17 00:00:00 2001 From: Leonhard Kuboschek Date: Fri, 28 Oct 2022 11:10:36 +0200 Subject: [PATCH 1/2] Add swiss thousands --- Duckling/Numeral/DE/Corpus.hs | 4 ++++ Duckling/Numeral/DE/Rules.hs | 29 ++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/Duckling/Numeral/DE/Corpus.hs b/Duckling/Numeral/DE/Corpus.hs index 69b03132a..d2d406159 100644 --- a/Duckling/Numeral/DE/Corpus.hs +++ b/Duckling/Numeral/DE/Corpus.hs @@ -156,4 +156,8 @@ allExamples = concat , examples (NumeralValue 2771090092000000.0) [ "zwei billiarden siebenhunderteinundsiebzig billionen neunzig milliarden zweiundneunzig millionen" ] + , examples (NumeralValue 100000.0) + [ "100'000", + "100'000.00" + ] ] diff --git a/Duckling/Numeral/DE/Rules.hs b/Duckling/Numeral/DE/Rules.hs index 79553595d..07d36a16a 100644 --- a/Duckling/Numeral/DE/Rules.hs +++ b/Duckling/Numeral/DE/Rules.hs @@ -51,6 +51,19 @@ ruleFew = Rule , prod = \_ -> integer 3 } +ruleDecimalWithSwissThousandsSeparator :: Rule +ruleDecimalWithSwissThousandsSeparator = Rule + { name = "decimal with thousands separator" + , pattern = + [ regex "(\\d+(\\'\\d\\d\\d)+\\,\\d+)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)): + _) -> let fmt = Text.replace "," "." $ Text.replace "." Text.empty match + in parseDouble fmt >>= double + _ -> Nothing + } + ruleDecimalWithThousandsSeparator :: Rule ruleDecimalWithThousandsSeparator = Rule { name = "decimal with thousands separator" @@ -203,6 +216,18 @@ ruleNumeralDotNumeral = Rule _ -> Nothing } +ruleIntegerWithSwissThousandsSeparator :: Rule +ruleIntegerWithSwissThousandsSeparator = Rule + { name = "integer with thousands separator ." + , pattern = + [ regex "(\\d{1,3}(\\'\\d\\d\\d){1,5})" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + parseDouble (Text.replace "'" Text.empty match) >>= double + _ -> Nothing + } + ruleIntegerWithThousandsSeparator :: Rule ruleIntegerWithThousandsSeparator = Rule { name = "integer with thousands separator ." @@ -231,9 +256,11 @@ rules = [ ruleCouple , ruleDecimalNumeral , ruleDecimalWithThousandsSeparator + , ruleDecimalWithSwissThousandsSeparator , ruleDozen , ruleFew , ruleIntegerWithThousandsSeparator + , ruleIntegerWithSwissThousandsSeparator , ruleIntersect , ruleMultiply , ruleNumeralDotNumeral @@ -243,4 +270,4 @@ rules = , rulePowersOfTen , ruleZero , ruleAllNumeralWords - ] + ] \ No newline at end of file From 3966ecbb5b4adfac5b29d8ad8e9b04ba4bd31569 Mon Sep 17 00:00:00 2001 From: Leonhard Kuboschek Date: Fri, 28 Oct 2022 13:29:18 +0200 Subject: [PATCH 2/2] Remove one test --- Duckling/Numeral/DE/Corpus.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Duckling/Numeral/DE/Corpus.hs b/Duckling/Numeral/DE/Corpus.hs index d2d406159..8e81345d5 100644 --- a/Duckling/Numeral/DE/Corpus.hs +++ b/Duckling/Numeral/DE/Corpus.hs @@ -157,7 +157,6 @@ allExamples = concat [ "zwei billiarden siebenhunderteinundsiebzig billionen neunzig milliarden zweiundneunzig millionen" ] , examples (NumeralValue 100000.0) - [ "100'000", - "100'000.00" + [ "100'000" ] ]