Skip to content

Commit

Permalink
Version 1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
synesthesiam committed Feb 6, 2024
1 parent 38eee13 commit c86df31
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 14 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## 1.1.0

- Add `get_supported_languages` method to engine
- Fix issue with "x,x" improper fraction rule
- Compute tolerance against rounded value instead of floor
- Use Decimal for string input
- Add command-line interface

## 1.0.0

- Initial release
Expand Down
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@
module_dir = this_dir / module_name
data_files = list((module_dir / "rbnf").glob("*.xml"))

version_path = module_dir / "VERSION"
data_files.append(version_path)
version = version_path.read_text(encoding="utf-8").strip()

# -----------------------------------------------------------------------------

setup(
name=module_name,
version="1.0.0",
version=version,
description="Rule-based number formatting using Unicode CLDR data",
url="http://github.com/rhasspy/unicode",
author="Michael Hansen",
Expand Down
6 changes: 5 additions & 1 deletion tests/test_en.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,17 @@ def test_english():
# Special rules
assert engine.format_number(-1) == "minus one"
assert engine.format_number(float("nan")) == "not a number"
assert engine.format_number(float("inf")) == "infinity"
assert engine.format_number(float("inf")) == "infinite"

# Fractions
assert (
engine.format_number(3.14, ruleset_name=RulesetName.CARDINAL)
== "three point fourteen"
)
assert (
engine.format_number("5.3", ruleset_name=RulesetName.CARDINAL)
== "five point three"
)

# Ordinals
assert engine.format_number(99, ruleset_name=RulesetName.ORDINAL) == "ninety-ninth"
Expand Down
2 changes: 1 addition & 1 deletion tests/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_find_rule():
engine.add_rule(20, "twenty[-→→];")
engine.add_rule(100, "←← hundred[ →→];")

ruleset = engine.rulesets[DEFAULT_LANGUAGE][RulesetName.DEFAULT]
ruleset = engine.rulesets[DEFAULT_LANGUAGE][RulesetName.CARDINAL]

rule_2 = ruleset.find_rule(2)
assert rule_2 is not None
Expand Down
8 changes: 8 additions & 0 deletions tests/test_es.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from unicode_rbnf import RbnfEngine


def test_german():
engine = RbnfEngine.for_language("es")
assert engine.format_number(5) == "cinco"
assert engine.format_number(2) == "dos"
assert engine.format_number(5.2) == "cinco coma dos"
1 change: 1 addition & 0 deletions unicode_rbnf/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.1.0
7 changes: 7 additions & 0 deletions unicode_rbnf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from pathlib import Path

from .engine import RbnfEngine, RulesetName

_DIR = Path(__file__).parent

__version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip()

__all__ = [
"__version__",
"RbnfEngine",
"RulesetName",
]
29 changes: 29 additions & 0 deletions unicode_rbnf/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import argparse

from unicode_rbnf import RbnfEngine, RulesetName


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--language",
choices=RbnfEngine.get_supported_languages(),
required=True,
help="Language code",
)
parser.add_argument(
"--rule",
choices=[v.value for v in RulesetName],
help="Ruleset name",
)
parser.add_argument("number", nargs="+", help="Number(s) to turn into words")
args = parser.parse_args()

engine = RbnfEngine.for_language(args.language)
for number_str in args.number:
words = engine.format_number(number_str, ruleset_name=args.rule)
print(words)


if __name__ == "__main__":
main()
41 changes: 30 additions & 11 deletions unicode_rbnf/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from bisect import bisect_left
from collections import defaultdict
from dataclasses import dataclass, field
from decimal import Decimal
from enum import Enum
from math import ceil, floor, isinf, isnan, log, modf
from pathlib import Path
Expand All @@ -13,7 +14,7 @@
class RulesetName(str, Enum):
"""Names of common rulesets."""

DEFAULT = "spellout-numbering"
NUMBERING = "spellout-numbering"
VERBOSE = "spellout-numbering-verbose"
CARDINAL = "spellout-cardinal"
CARDINAL_VERBOSE = "spellout-cardinal-verbose"
Expand All @@ -22,6 +23,10 @@ class RulesetName(str, Enum):
YEAR = "spellout-numbering-year"


DEFAULT_RULESET = RulesetName.NUMBERING
DEFAULT_RULESET_FOR_LANGUAGE: Final = {
"en": RulesetName.CARDINAL,
}
DEFAULT_LANGUAGE: Final = "en"
DEFAULT_TOLERANCE: Final = 1e-8
SKIP_RULESETS: Final = {"lenient-parse"}
Expand Down Expand Up @@ -127,7 +132,7 @@ def parse(value_str: str, text: str, radix: int = 10) -> "Optional[RbnfRule]":
# Handle special rules
if value_str == "-x":
rule = RbnfRule(value=RbnfSpecialRule.NEGATIVE_NUMBER)
elif value_str == "x.x":
elif value_str in ("x.x", "x,x"):
rule = RbnfRule(value=RbnfSpecialRule.IMPROPER_FRACTION)
elif value_str == "NaN":
rule = RbnfRule(value=RbnfSpecialRule.NOT_A_NUMBER)
Expand All @@ -137,7 +142,7 @@ def parse(value_str: str, text: str, radix: int = 10) -> "Optional[RbnfRule]":
try:
rule = RbnfRule(value=int(value_str), radix=radix)
except ValueError:
_LOGGER.error(
_LOGGER.debug(
"Unrecognized special rule: value=%s, text=%s", value_str, text
)
return None
Expand Down Expand Up @@ -294,7 +299,7 @@ def find_rule(
if isinf(number):
return self.special_rules.get(RbnfSpecialRule.INFINITY)

if (number - int(number)) > DEFAULT_TOLERANCE:
if abs(number - round(number)) > DEFAULT_TOLERANCE:
return self.special_rules.get(RbnfSpecialRule.IMPROPER_FRACTION)

# Numeric rules
Expand Down Expand Up @@ -336,6 +341,11 @@ def __init__(self, language: Optional[str] = None) -> None:
# lang -> ruleset name -> ruleset
self.rulesets: Dict[str, Dict[str, RbnfRuleSet]] = defaultdict(dict)

@staticmethod
def get_supported_languages() -> List[str]:
"""Return a list of supported language codes."""
return sorted([f.stem for f in _LANG_DIR.glob("*.xml")])

@staticmethod
def for_language(language: str) -> "RbnfEngine":
"""Load XML rules for a language and construct an engine."""
Expand All @@ -360,8 +370,11 @@ def add_rule(
) -> Optional[RbnfRule]:
"""Manually add a rule to the engine."""
language = language or self.language or DEFAULT_LANGUAGE
ruleset_name = ruleset_name or RulesetName.DEFAULT
ruleset_name = ruleset_name or DEFAULT_RULESET_FOR_LANGUAGE.get(
language, DEFAULT_RULESET
)

assert ruleset_name is not None
ruleset = self.rulesets[language].get(ruleset_name)
if ruleset is None:
ruleset = RbnfRuleSet(name=ruleset_name)
Expand Down Expand Up @@ -411,7 +424,7 @@ def load_xml(self, root: et.Element, language: Optional[str] = None) -> None:

def format_number(
self,
number: float,
number: Union[int, float, str, Decimal],
ruleset_name: Optional[str] = None,
radix: Optional[int] = None,
language: Optional[str] = None,
Expand All @@ -429,21 +442,27 @@ def format_number(

def iter_format_number(
self,
number: float,
number: Union[int, float, str, Decimal],
ruleset_name: Optional[str] = None,
radix: Optional[int] = None,
language: Optional[str] = None,
tolerance: float = DEFAULT_TOLERANCE,
) -> Iterable[str]:
"""Format a number using loaded rulesets (generator)."""
language = language or self.language or DEFAULT_LANGUAGE
ruleset_name = ruleset_name or RulesetName.DEFAULT
ruleset_name = ruleset_name or DEFAULT_RULESET_FOR_LANGUAGE.get(
language, DEFAULT_RULESET
)

if isinstance(number, str):
number = Decimal(number)

assert ruleset_name is not None
ruleset = self.rulesets[language].get(ruleset_name)
if ruleset is None:
raise ValueError(f"No ruleset: {ruleset_name}")

rule = ruleset.find_rule(number, tolerance=tolerance)
rule = ruleset.find_rule(float(number), tolerance=tolerance)
if rule is None:
raise ValueError(f"No rule for {number} in {ruleset_name}")

Expand Down Expand Up @@ -497,9 +516,9 @@ def iter_format_number(

def fractional_to_int(frac_part: float, tolerance: float = DEFAULT_TOLERANCE) -> int:
"""Convert fractional part to int like 0.14000000000000012 -> 14"""
frac_int = int(frac_part)
frac_int = round(frac_part)

if (frac_part - frac_int) > tolerance:
if abs(frac_part - frac_int) > tolerance:
return fractional_to_int(frac_part * 10, tolerance=tolerance)

return frac_int

0 comments on commit c86df31

Please sign in to comment.