Skip to content

Commit

Permalink
[natural_translit] Added gr_class to Grapheme features. Added Latin d…
Browse files Browse the repository at this point in the history
…igits to inventories.

PiperOrigin-RevId: 732115394
  • Loading branch information
isingoo authored and copybara-github committed Feb 28, 2025
1 parent 46ed0e6 commit 5779e46
Show file tree
Hide file tree
Showing 8 changed files with 863 additions and 113 deletions.
3 changes: 2 additions & 1 deletion nisaba/scripts/natural_translit/language_params/en.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,9 @@ def _latn_inventory() -> g.Grapheme.Inventory:
gr.make_iterable_suppl(
'consonant', *consonants, *(c.upper for c in consonants)
)
gr.import_graphemes(*latn.number, list_alias='number')
return gr.sync_atomics(
[gr.upper, gr.lower, gr.letter, gr.vowel, gr.consonant]
[gr.upper, gr.lower, gr.letter, gr.vowel, gr.consonant, gr.number]
)


Expand Down
3 changes: 2 additions & 1 deletion nisaba/scripts/natural_translit/script/grapheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def _grapheme_features() -> ft.Feature.Inventory:
Script('br', 'Brahmic Parent', 801),
)
),
f.Aspect(f.equidistant('gr_class', f('letter'), f('number'))),
f.Aspect(f.equidistant('case', f('upper'), f('lower'))),
)
return ftr
Expand Down Expand Up @@ -132,7 +133,7 @@ def from_char(
name = unicodedata.name(character)
except ValueError:
name = 'GRAPHEME'
name += ' U+' + code_hex.upper()[2:]
name += ' U+' + code_hex.upper()[2:].rjust(4, '0')
return cls(
alias=alias,
text=character,
Expand Down
10 changes: 8 additions & 2 deletions nisaba/scripts/natural_translit/script/grapheme_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def _test_inventory() -> _G.Inventory:
gr_inv = _G.Inventory(_G.GR_FEATURES.script.latn)
gr_inv.add_graphemes(
# Raw
_G.from_char('a', 'a'),
_G.from_char('a', 'a', {_G.GR_FEATURES.gr_class.letter}),
_G.from_char('1', 'one', {_G.GR_FEATURES.gr_class.number}),
# Abstract with custom text
_G('nasal', '~'),
# Abstract with no text
Expand Down Expand Up @@ -158,7 +159,7 @@ def test_parse(self):
def test_grapheme_description(self):
self.assertEqual(
_TEST_INVENTORY.a.description(),
'alias: a\traw: a\tname: LATIN SMALL LETTER A U+61',
'alias: a\traw: a\tname: LATIN SMALL LETTER A U+0061',
)
self.assertEqual(
_TEST_INVENTORY.nasal.description(),
Expand All @@ -169,6 +170,11 @@ def test_grapheme_description(self):
'alias: ch_1\ttext: ch_1\tname: ch_1',
)

def test_grapheme_class(self):
self.AssertHasFeature(_TEST_INVENTORY.a, _G.GR_FEATURES.gr_class.letter)
self.AssertHasFeature(_TEST_INVENTORY.one, _G.GR_FEATURES.gr_class.number)
self.AssertHasFeature(_TEST_INVENTORY.nasal, _G.GR_FEATURES.gr_class.any)

def test_import_graphemes(self):
new_inv = _G.Inventory(
_G.GR_FEATURES.script.latn, _G.LANGUAGE.en
Expand Down
Loading

0 comments on commit 5779e46

Please sign in to comment.