Skip to content

Commit

Permalink
Add Expression.ANY constant that accepts, contains, and matches with …
Browse files Browse the repository at this point in the history
…everything other than an empty Or. This will be used as the default value for Alignment arguments.

PiperOrigin-RevId: 630344671
  • Loading branch information
isingoo authored and copybara-github committed May 3, 2024
1 parent 64d73d0 commit e3fefc8
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 7 deletions.
50 changes: 43 additions & 7 deletions nisaba/scripts/natural_translit/utils/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,19 @@ def _symbols_of(
else:
return [[other]]

def is_any(self) -> bool:
if isinstance(self, sym.Symbol):
return False
if len(self) == 1:
return self.item(0).is_any()
return self is Expression.ANY

def is_eps(self) -> bool:
return isinstance(self, sym.Symbol) and self.symbol.is_eps()

def is_nor(self) -> bool:
return isinstance(self, sym.Symbol) and self.symbol.is_nor()

def accepts(
self, other: 'Expression.OR_SYMBOL', equivalent: bool = False
) -> bool:
Expand All @@ -124,6 +137,10 @@ def accepts(
Returns:
bool
"""
if self.is_any() or other.is_any():
if equivalent:
return self.is_any() and other.is_any()
return True
self_symbols, other_symbols = self.symbols(), self._symbols_of(other)
self_len, other_len = len(self_symbols), len(other_symbols)
if (
Expand Down Expand Up @@ -198,6 +215,8 @@ def contains_symbol_list(
return True
if search_for == [sym.Symbol.CTRL.nor]:
return False
if self.is_any():
return True
# Loop over symbol lists, eg: [[a, b, c, d], [e, f, g]]
for symbol_list in self.symbols():
while symbol_list:
Expand Down Expand Up @@ -251,15 +270,19 @@ def contains(
a.contains(b, head=True): [a, b, c, d] starts with [a, b]
a.contains(b, tail=True): False
"""
if self.is_any() or other.is_any():
return not self.is_nor() and not other.is_nor()
for sym_list in self._symbols_of(other):
if self.contains_symbol_list(sym_list, head, tail):
return True
return False

def _symbol_contains(self, other: sym.Symbol) -> bool:
if other.is_any():
return True
self_symbols = self.symbols()
return [sym.Symbol.CTRL.eps] in self_symbols or (
other != sym.Symbol.CTRL.nor and [other] in self_symbols
not other.is_nor() and [other] in self_symbols
)

def is_contained(
Expand All @@ -278,13 +301,13 @@ def matches(self, other: 'Expression.OR_SYMBOL') -> bool:
return self.contains(other, head=True, tail=True)

# head_matches and tail_matches require at least one symbol match unless
# both expressions are empty Cats. For example, if a rule requires a vowel as
# following context but there is no following context, the rule shouldn't
# apply.
# both expressions are empty Cats or one of the expressions is Expression.ANY
# For example, if a rule requires a vowel as following context but there is no
# following context, the rule shouldn't apply.

def head_matches(self, other: 'Expression.OR_SYMBOL') -> bool:
if self and not other:
return False
return other.is_any()
return self.contains(other, head=True)

def is_prefix(self, other: 'Expression.OR_SYMBOL') -> bool:
Expand All @@ -294,7 +317,7 @@ def is_prefix(self, other: 'Expression.OR_SYMBOL') -> bool:

def tail_matches(self, other: 'Expression.OR_SYMBOL') -> bool:
if self and not other:
return False
return other.is_any()
return self.contains(other, tail=True)

def is_suffix(self, other: 'Expression.OR_SYMBOL') -> bool:
Expand All @@ -316,6 +339,9 @@ def repeat(self, n: int = 2) -> 'Cat':
return Cat(*([self] * n))


Expression.ANY = Expression('any_expression')


class Atomic(Expression, sym.Symbol):
"""An instance of a single symbol."""

Expand Down Expand Up @@ -392,7 +418,10 @@ def __str__(self):

def add(self, *items: Expression) -> 'Cat':
for item in items:
self._add_item(item)
if item.is_any():
self._items.append(item)
else:
self._add_item(item)
return self

def symbols(self) -> list[list[sym.Symbol]]:
Expand Down Expand Up @@ -462,6 +491,13 @@ def add(self, *items: Expression) -> 'Or':
self
"""
for item in items:
# If the Expression.ANY is in Or, don't add any items.
if Expression.ANY in self:
break
# If the item is any, the other items are irrelevant.
if item.is_any():
self._items = [item]
break
if self.accepts(item):
if item.accepts(self) and item.state_count() < self.state_count():
self._update(item)
Expand Down
23 changes: 23 additions & 0 deletions nisaba/scripts/natural_translit/utils/expression_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def test_atomic_read(self):

def test_control(self):
self.assertTrue(exp.Atomic.CTRL.unk.is_control())
self.assertTrue(exp.Atomic.CTRL.eps.is_eps())
self.assertTrue(exp.Atomic.CTRL.nor.is_nor())

def test_symbol_inventory_lookup(self):
self.assertEqual(_ATM.lookup(_ATM.a, 'atm_sym'), _SYM.a)
Expand Down Expand Up @@ -98,6 +100,7 @@ def test_cat_items(self):
self.AssertStrEqual(cat, '(a b a)')
self.assertIsNot(cat.item(0), cat.item(2))
self.AssertEquivalent(cat.item(0), (cat.item(2)))
self.assertTrue(exp.Cat(exp.Expression.ANY).is_any())

def test_cat_nested(self):
cat1 = exp.Cat(_ATM.a, _ATM.b)
Expand Down Expand Up @@ -132,10 +135,13 @@ def test_or_items(self):
or2 = exp.Or(_ATM.b, _ATM.c)
or3 = exp.Or(or1, or2)
or4 = exp.Or(_ATM.a, _ATM.b, _ATM.a)
or5 = or1.copy().add(exp.Expression.ANY)
self.AssertStrEqual(or2, '(b | c)')
self.AssertStrEqual(or3, '(a | b | c)')
self.AssertStrEqual(or4, '(a | b)')
self.assertLen(or4, 2)
self.assertNotIn(_ATM.a, or5)
self.AssertAccepts(or5, _ATM.a)

def test_or_nested(self):
cat1 = exp.Cat(_ATM.a)
Expand Down Expand Up @@ -214,6 +220,8 @@ def test_state_count(self):
def test_equivalent(self):
or0 = exp.Or()
self.AssertEquivalent(exp.Atomic.CTRL.eps, sym.Symbol.CTRL.eps)
self.AssertEquivalent(exp.Expression.ANY, exp.Expression.ANY)
self.AssertNotEquivalent(exp.Expression.ANY, exp.Atomic.CTRL.eps)
self.AssertEquivalent(exp.Cat(), exp.Atomic.CTRL.eps)
self.AssertEquivalent(exp.Cat(), exp.Cat())
self.AssertNotEquivalent(or0, exp.Atomic.CTRL.nor)
Expand All @@ -229,13 +237,21 @@ def test_equivalent(self):
def test_contains_controls(self):
eps = exp.Atomic.CTRL.eps
nor = exp.Atomic.CTRL.nor
any_exp = exp.Expression.ANY
self.AssertContains(eps, eps)
self.AssertContains(nor, eps)
self.AssertNotContains(nor, any_exp)
self.AssertNotContains(eps, nor)
self.AssertNotContains(nor, nor)
self.AssertNotContains(any_exp, nor)

def test_contains_expressions(self):
cat_abc = _ATM.a + _ATM.b + _ATM.c
any_exp = exp.Expression.ANY
self.AssertContains(any_exp, _ATM.a)
self.AssertContains(_ATM.a, any_exp)
self.AssertContains(any_exp, cat_abc)
self.AssertContains(cat_abc, any_exp)
self.AssertContains(cat_abc, exp.Cat())
self.AssertNotContains(cat_abc, exp.Or())
self.AssertContains(cat_abc, exp.Or(exp.Cat()))
Expand All @@ -250,8 +266,15 @@ def test_contains_expressions(self):
def test_matches(self):
abc_or_cd = (_ATM.a + _ATM.b + _ATM.c) | (_ATM.c + _ATM.d)
a_or_c_b_or_d = (_ATM.a | _ATM.c) + (_ATM.b | _ATM.d)
any_exp = exp.Expression.ANY
self.AssertMatches(any_exp, a_or_c_b_or_d)
self.AssertMatches(a_or_c_b_or_d, any_exp)
self.AssertMatches(abc_or_cd, a_or_c_b_or_d)
self.AssertNotMatches(abc_or_cd, _ATM.a + _ATM.b + _ATM.d)
self.assertTrue(any_exp.is_prefix(abc_or_cd))
self.assertTrue(any_exp.is_suffix(abc_or_cd))
self.assertTrue(abc_or_cd.is_prefix(any_exp))
self.assertTrue(abc_or_cd.is_suffix(any_exp))
self.assertTrue(exp.Cat().is_prefix(exp.Cat()))
self.assertTrue(exp.Cat().is_suffix(exp.Cat()))
self.assertFalse(exp.Cat().is_prefix(abc_or_cd))
Expand Down
9 changes: 9 additions & 0 deletions nisaba/scripts/natural_translit/utils/symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ def __str__(self) -> str:
def is_control(self) -> bool:
return self in Symbol.CTRL

def is_any(self) -> bool:
return False

def is_eps(self) -> bool:
return self is Symbol.CTRL.eps

def is_nor(self) -> bool:
return self is Symbol.CTRL.nor

def description(self, show_features: bool = False) -> str:
"""A string that describes the symbol."""
text = 'alias: %s index: %s' % (self.alias, self.index)
Expand Down

0 comments on commit e3fefc8

Please sign in to comment.