diff --git a/CHANGELOG.md b/CHANGELOG.md index 216d1db..dc6b78d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 1.1.0 + +- Add `get_supported_languages` method to engine +- Fix issue with "x,x" improper fraction rule +- Compute tolerance against rounded value instead of floor +- Use Decimal for string input +- Add command-line interface + ## 1.0.0 - Initial release diff --git a/setup.py b/setup.py index 40c17c9..f1db621 100644 --- a/setup.py +++ b/setup.py @@ -9,11 +9,15 @@ module_dir = this_dir / module_name data_files = list((module_dir / "rbnf").glob("*.xml")) +version_path = module_dir / "VERSION" +data_files.append(version_path) +version = version_path.read_text(encoding="utf-8").strip() + # ----------------------------------------------------------------------------- setup( name=module_name, - version="1.0.0", + version=version, description="Rule-based number formatting using Unicode CLDR data", url="http://github.com/rhasspy/unicode", author="Michael Hansen", diff --git a/tests/test_en.py b/tests/test_en.py index 4307043..e6747f6 100644 --- a/tests/test_en.py +++ b/tests/test_en.py @@ -26,13 +26,17 @@ def test_english(): # Special rules assert engine.format_number(-1) == "minus one" assert engine.format_number(float("nan")) == "not a number" - assert engine.format_number(float("inf")) == "infinity" + assert engine.format_number(float("inf")) == "infinite" # Fractions assert ( engine.format_number(3.14, ruleset_name=RulesetName.CARDINAL) == "three point fourteen" ) + assert ( + engine.format_number("5.3", ruleset_name=RulesetName.CARDINAL) + == "five point three" + ) # Ordinals assert engine.format_number(99, ruleset_name=RulesetName.ORDINAL) == "ninety-ninth" diff --git a/tests/test_engine.py b/tests/test_engine.py index 388eaff..017bf42 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -43,7 +43,7 @@ def test_find_rule(): engine.add_rule(20, "twenty[-→→];") engine.add_rule(100, "←← hundred[ →→];") - ruleset = engine.rulesets[DEFAULT_LANGUAGE][RulesetName.DEFAULT] + ruleset = engine.rulesets[DEFAULT_LANGUAGE][RulesetName.CARDINAL] rule_2 = ruleset.find_rule(2) assert rule_2 is not None diff --git a/tests/test_es.py b/tests/test_es.py new file mode 100644 index 0000000..36830f9 --- /dev/null +++ b/tests/test_es.py @@ -0,0 +1,8 @@ +from unicode_rbnf import RbnfEngine + + +def test_german(): + engine = RbnfEngine.for_language("es") + assert engine.format_number(5) == "cinco" + assert engine.format_number(2) == "dos" + assert engine.format_number(5.2) == "cinco coma dos" diff --git a/unicode_rbnf/VERSION b/unicode_rbnf/VERSION new file mode 100644 index 0000000..9084fa2 --- /dev/null +++ b/unicode_rbnf/VERSION @@ -0,0 +1 @@ +1.1.0 diff --git a/unicode_rbnf/__init__.py b/unicode_rbnf/__init__.py index 583e4fa..81419cb 100644 --- a/unicode_rbnf/__init__.py +++ b/unicode_rbnf/__init__.py @@ -1,6 +1,13 @@ +from pathlib import Path + from .engine import RbnfEngine, RulesetName +_DIR = Path(__file__).parent + +__version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip() + __all__ = [ + "__version__", "RbnfEngine", "RulesetName", ] diff --git a/unicode_rbnf/__main__.py b/unicode_rbnf/__main__.py new file mode 100644 index 0000000..4cca6c1 --- /dev/null +++ b/unicode_rbnf/__main__.py @@ -0,0 +1,29 @@ +import argparse + +from unicode_rbnf import RbnfEngine, RulesetName + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "--language", + choices=RbnfEngine.get_supported_languages(), + required=True, + help="Language code", + ) + parser.add_argument( + "--rule", + choices=[v.value for v in RulesetName], + help="Ruleset name", + ) + parser.add_argument("number", nargs="+", help="Number(s) to turn into words") + args = parser.parse_args() + + engine = RbnfEngine.for_language(args.language) + for number_str in args.number: + words = engine.format_number(number_str, ruleset_name=args.rule) + print(words) + + +if __name__ == "__main__": + main() diff --git a/unicode_rbnf/engine.py b/unicode_rbnf/engine.py index e766217..0c6fd81 100644 --- a/unicode_rbnf/engine.py +++ b/unicode_rbnf/engine.py @@ -3,6 +3,7 @@ from bisect import bisect_left from collections import defaultdict from dataclasses import dataclass, field +from decimal import Decimal from enum import Enum from math import ceil, floor, isinf, isnan, log, modf from pathlib import Path @@ -13,7 +14,7 @@ class RulesetName(str, Enum): """Names of common rulesets.""" - DEFAULT = "spellout-numbering" + NUMBERING = "spellout-numbering" VERBOSE = "spellout-numbering-verbose" CARDINAL = "spellout-cardinal" CARDINAL_VERBOSE = "spellout-cardinal-verbose" @@ -22,6 +23,10 @@ class RulesetName(str, Enum): YEAR = "spellout-numbering-year" +DEFAULT_RULESET = RulesetName.NUMBERING +DEFAULT_RULESET_FOR_LANGUAGE: Final = { + "en": RulesetName.CARDINAL, +} DEFAULT_LANGUAGE: Final = "en" DEFAULT_TOLERANCE: Final = 1e-8 SKIP_RULESETS: Final = {"lenient-parse"} @@ -127,7 +132,7 @@ def parse(value_str: str, text: str, radix: int = 10) -> "Optional[RbnfRule]": # Handle special rules if value_str == "-x": rule = RbnfRule(value=RbnfSpecialRule.NEGATIVE_NUMBER) - elif value_str == "x.x": + elif value_str in ("x.x", "x,x"): rule = RbnfRule(value=RbnfSpecialRule.IMPROPER_FRACTION) elif value_str == "NaN": rule = RbnfRule(value=RbnfSpecialRule.NOT_A_NUMBER) @@ -137,7 +142,7 @@ def parse(value_str: str, text: str, radix: int = 10) -> "Optional[RbnfRule]": try: rule = RbnfRule(value=int(value_str), radix=radix) except ValueError: - _LOGGER.error( + _LOGGER.debug( "Unrecognized special rule: value=%s, text=%s", value_str, text ) return None @@ -294,7 +299,7 @@ def find_rule( if isinf(number): return self.special_rules.get(RbnfSpecialRule.INFINITY) - if (number - int(number)) > DEFAULT_TOLERANCE: + if abs(number - round(number)) > DEFAULT_TOLERANCE: return self.special_rules.get(RbnfSpecialRule.IMPROPER_FRACTION) # Numeric rules @@ -336,6 +341,11 @@ def __init__(self, language: Optional[str] = None) -> None: # lang -> ruleset name -> ruleset self.rulesets: Dict[str, Dict[str, RbnfRuleSet]] = defaultdict(dict) + @staticmethod + def get_supported_languages() -> List[str]: + """Return a list of supported language codes.""" + return sorted([f.stem for f in _LANG_DIR.glob("*.xml")]) + @staticmethod def for_language(language: str) -> "RbnfEngine": """Load XML rules for a language and construct an engine.""" @@ -360,8 +370,11 @@ def add_rule( ) -> Optional[RbnfRule]: """Manually add a rule to the engine.""" language = language or self.language or DEFAULT_LANGUAGE - ruleset_name = ruleset_name or RulesetName.DEFAULT + ruleset_name = ruleset_name or DEFAULT_RULESET_FOR_LANGUAGE.get( + language, DEFAULT_RULESET + ) + assert ruleset_name is not None ruleset = self.rulesets[language].get(ruleset_name) if ruleset is None: ruleset = RbnfRuleSet(name=ruleset_name) @@ -411,7 +424,7 @@ def load_xml(self, root: et.Element, language: Optional[str] = None) -> None: def format_number( self, - number: float, + number: Union[int, float, str, Decimal], ruleset_name: Optional[str] = None, radix: Optional[int] = None, language: Optional[str] = None, @@ -429,7 +442,7 @@ def format_number( def iter_format_number( self, - number: float, + number: Union[int, float, str, Decimal], ruleset_name: Optional[str] = None, radix: Optional[int] = None, language: Optional[str] = None, @@ -437,13 +450,19 @@ def iter_format_number( ) -> Iterable[str]: """Format a number using loaded rulesets (generator).""" language = language or self.language or DEFAULT_LANGUAGE - ruleset_name = ruleset_name or RulesetName.DEFAULT + ruleset_name = ruleset_name or DEFAULT_RULESET_FOR_LANGUAGE.get( + language, DEFAULT_RULESET + ) + + if isinstance(number, str): + number = Decimal(number) + assert ruleset_name is not None ruleset = self.rulesets[language].get(ruleset_name) if ruleset is None: raise ValueError(f"No ruleset: {ruleset_name}") - rule = ruleset.find_rule(number, tolerance=tolerance) + rule = ruleset.find_rule(float(number), tolerance=tolerance) if rule is None: raise ValueError(f"No rule for {number} in {ruleset_name}") @@ -497,9 +516,9 @@ def iter_format_number( def fractional_to_int(frac_part: float, tolerance: float = DEFAULT_TOLERANCE) -> int: """Convert fractional part to int like 0.14000000000000012 -> 14""" - frac_int = int(frac_part) + frac_int = round(frac_part) - if (frac_part - frac_int) > tolerance: + if abs(frac_part - frac_int) > tolerance: return fractional_to_int(frac_part * 10, tolerance=tolerance) return frac_int