Skip to content

Commit

Permalink
Update layout: -N to -Z and upper S- to Z-
Browse files Browse the repository at this point in the history
  • Loading branch information
mkrnr committed Nov 7, 2022
1 parent dd7f8c7 commit 94a2f7d
Show file tree
Hide file tree
Showing 17 changed files with 88 additions and 62 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ jobs:

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.x
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.x'
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
3 changes: 2 additions & 1 deletion regenpfeifer/assets/patterns/final_patterns.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"e$": "/[e|E]"
"e$": "/[e|E]",
"en$": "/[EPB]"
}
2 changes: 1 addition & 1 deletion regenpfeifer/assets/patterns/left_patterns.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@
"j": "[SKWR]",
"x": "[KP]",
"y": "[KWR]",
"z": "[S*]"
"z": "[Z]"
}
7 changes: 2 additions & 5 deletions regenpfeifer/assets/patterns/right_patterns.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"gst": "[-FGT]",
"mpf": "[-FPL]",
"nst": "[-FPBT]",
"tzt": "[-FT*]",
"tzt": "[-FT]",
"nem": "[-PBL]",
"sch": "[-RB]",
"nch": "[-FRPB]",
Expand All @@ -32,10 +32,7 @@
"der": "[-RD]",
"lst": "[-FLT]",
"ber": "[-RB]",
"ben": "[-PB]*",
"en$": "[-N]",
"ms": "[-FPL]",
"en": "[-PB]",
"es": "[-S]",
"lb": "[-BL]",
"ck": "[-BG]",
Expand All @@ -58,7 +55,7 @@
"t": "[-T]",
"s": "[-S]",
"ß": "[-S]",
"z": "[-S]*",
"z": "[-Z]",
"d": "[-D]",
"n": "[-PB]",
"k": "[-BG]",
Expand Down
4 changes: 2 additions & 2 deletions regenpfeifer/stroke_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ def __init__(self):
Constructor
"""

left_consonant_keys = ["S", "T", "K", "P", "W", "H", "R"]
left_consonant_keys = ["Z", "S", "T", "K", "P", "W", "H", "R"]
vowel_keys = ["A", "O", "*", "E", "U"]
right_consonant_keys = ["-", "F", "R", "P", "B", "L", "G", "T", "S", "D", "N"]
right_consonant_keys = ["-", "F", "R", "P", "B", "L", "G", "T", "S", "D", "Z"]

def validate(self, strokes):

Expand Down
14 changes: 14 additions & 0 deletions regenpfeifer/util/stroke_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ def remove_markup(strokes):
return "/".join(stripped_strokes)


def strip_unmatched_letters(match):
strokes = match.split("/")
stripped_strokes = []
for stroke in strokes:
stripped_stroke = ""
stroke_parts = split(stroke)
for stroke_part in stroke_parts:
if stroke_part.startswith("["):
stripped_stroke += stroke_part
if stripped_stroke:
stripped_strokes.append(stripped_stroke)
return "/".join(stripped_strokes)


def remove_excess_hyphens(stroke):
# if there's a vowel or *, no hyphens are needed at all
if "[e|" in stroke or "[*]" in stroke:
Expand Down
13 changes: 12 additions & 1 deletion regenpfeifer/word_pattern_matcher.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import re

from regenpfeifer.stroke_validator import StrokeValidator
from regenpfeifer.util import stroke_util, pattern_util


class WordPatternMatcher:
def __init__(self):
self.stroke_validator = StrokeValidator()

self.vowel_patterns = pattern_util.load_pattern_file("vowel_patterns.json")
self.left_patterns = pattern_util.load_pattern_file("left_patterns.json")
Expand Down Expand Up @@ -42,7 +44,16 @@ def generate_matches(self, match):
word_parts = stroke_util.split(match)
generated_matches.update(self.generate_left_consonants(word_parts))
generated_matches.update(self.generate_right_consonants(word_parts))
return generated_matches

validated_matches = set()
for generated_match in generated_matches:
stripped_generated_match = stroke_util.strip_unmatched_letters(
generated_match
)
if self.stroke_validator.validate(stripped_generated_match):
validated_matches.add(generated_match)

return validated_matches

def generate_left_consonants(self, word_parts):
generated_matches = set()
Expand Down
4 changes: 2 additions & 2 deletions test/test_stroke_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def test_split_easy_words(self):
)
self.run_test("Zu/sammen", ["Zusammen", "Zu/sammen"])

def run_test(self, word, result):
self.assertEqual(self.stroke_aggregator.aggregate_strokes(word), result)
def run_test(self, word, expected):
self.assertEqual(expected, self.stroke_aggregator.aggregate_strokes(word))


if __name__ == "__main__":
Expand Down
26 changes: 12 additions & 14 deletions test/test_stroke_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,15 @@ def setUp(self):

def test_easy_words(self):
self.run_test("sein", "inf", ["SAEUPB"])
self.run_test("müssten", "1ppl?", ["PHOUFTN", "PHOUS/TEPB"])
self.run_test("müssten", "1ppl?", ["PHOUS/TEPB"])
self.run_test("hattest", "2sgp", ["HAT/TEFT"])
self.run_test("zweit", "nn", ["SWA*EUT"])
self.run_test("Zeit", "nn", ["SA*EUT"])
self.run_test("Abend", "nn", ["A*PBD"])
self.run_test("Eiszeit", "nn", ["AEUS/SA*EUT"])
self.run_test("ganz", "nn", ["TKPWA*PBS"])
self.run_test("zweit", "nn", ["ZWAEUT"])
self.run_test("Zeit", "nn", ["ZAEUT"])
self.run_test("Eiszeit", "nn", ["AEUS/ZAEUT"])
self.run_test("ganz", "nn", ["TKPWAPBZ"])
self.run_test("Wiese", "nn", ["WAOEU/SE"])
self.run_test("Gehege", "nn", ["TKPWE/HE/TKPWE"])
self.run_test("rennen", "inf", ["REPBN", "REPB/TPHEPB"])
self.run_test("rennen", "inf", ["REPB/TPHEPB"])
self.run_test("regelte", "1sgp", ["RE/TKPWEL/TE"])
self.run_test("wandernd", "part", ["WAPB/TKERPBD"])
self.run_test("gingst", "2sgp", ["TKPWEUFPBGT"])
Expand All @@ -34,18 +33,17 @@ def test_easy_words(self):
self.run_test("deutschem", "attrmind", ["TKOEUT/SHEPL"])
self.run_test("gleichem", "attrmind", ["TKPWHRAEU/KHEPL"])
self.run_test("erst", "rb", ["EFRT"])
self.run_test("habend", "part", ["HA*PBD", "HA/PWEPBD"])
self.run_test("habend", "part", ["HA/PWEPBD"])
self.run_test("allgemein", "part", ["AL/TKPWE/PHAEUPB"])
self.run_test("Beute", "sg", ["PWOEU/TE"])
self.run_test("Beutezug", "sg", ["PWOEU/TE/S*UG"])
self.run_test("Beutezug", "sg", ["PWOEU/TE/ZUG"])
self.run_test("unglück", "sg", ["UPB/TKPWHROUBG"])
self.run_test("sollend", "part", ["SOL/HREPBD"])
self.run_test(
"Aufgaben", "pl", ["AUF/TKPWABN", "AUF/TKPWA*PB", "AUF/TKPWA/PWEPB"]
)
self.run_test("Aufgaben", "pl", ["AUF/TKPWA/PWEPB"])
self.run_test("seinen", "prp$", ["SAEU/TPHEPB"])

def run_test(self, word, word_type, result):
self.assertEqual(self.stroke_generator.generate(word, word_type), result)
def run_test(self, word, word_type, expected):
self.assertEqual(expected, self.stroke_generator.generate(word, word_type))


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions test/test_stroke_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def setUp(self):
self.stroke_validator = StrokeValidator()

def test_simple_words(self):
self.run_test("[TP][e|OU][-R][-N]", True)
self.run_test("[TP][e|OU][-R]", True)

def test_multiple_strokes(self):
self.run_test("[TKPW][HR][e|AEU]/[KH][e|E][-PL]", True)
Expand Down Expand Up @@ -45,8 +45,8 @@ def test_wrong_steno_order(self):
def test_strokes_with_asterisk(self):
self.run_test("[TKPW][e|A][-PB][-S]*", True)

def run_test(self, matched_stroke, result):
self.assertEqual(self.stroke_validator.validate(matched_stroke), result)
def run_test(self, matched_stroke, expected):
self.assertEqual(expected, self.stroke_validator.validate(matched_stroke))


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions test/test_word_emphasizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def test_prefixes(self):
self.run_test("missfallen", "inf", "missf[e|a]llen")
self.run_test("Missfallen", "other", "M[e|i]ssfallen")

def run_test(self, word, word_type, result):
self.assertEqual(self.word_emphasizer.emphasize(word, word_type), result)
def run_test(self, word, word_type, expected):
self.assertEqual(expected, self.word_emphasizer.emphasize(word, word_type))


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions test/test_word_part_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def test_split(self):
# self.run_test('Zurückliegen', ['Zu', 'rück', 'lie', 'gen'])
# self.run_test('Erzbistum', ['Erz', 'bis', 'tum'])

def run_test(self, word, result):
self.assertEqual(self.word_part_splitter.split(word), result)
def run_test(self, word, expected):
self.assertEqual(expected, self.word_part_splitter.split(word))


if __name__ == "__main__":
Expand Down
26 changes: 3 additions & 23 deletions test/test_word_pattern_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,13 @@ def test_easy_words(self):
self.run_test("t[e|o]r", ["[T][e|O][-R]"])
self.run_test("s[e|i]nd", ["[S][e|EU][-PB][-D]"])
self.run_test("st[e|a]rk", ["[S][T][e|A][-R][-BG]"])
self.run_test(
"br[e|i]ngen",
[
"[PW][R][e|EU][-PB][-G][-N]",
"[PW][R][e|EU][-PB][-G][-PB]",
"[PW][R][e|EU][-PB][-G]e[-PB]",
],
)
self.run_test(
"f[e|ü]hren",
["[TP][e|OU][-R][-N]", "[TP][e|OU][-R][-PB]", "[TP][e|OU][-R]e[-PB]"],
)
self.run_test("g[e|a]nz", ["[TKPW][e|A][-PB][-S]*"])
self.run_test("g[e|a]nz", ["[TKPW][e|A][-PB][-Z]"])

def test_disambiguations(self):
self.run_test(
"s[e|ei]nen",
["[S][e|AEU][-PB][-N]", "[S][e|AEU][-PB][-PB]", "[S][e|AEU][-PB]e[-PB]"],
)
self.run_test(
"s[e|ei]en", ["[S][e|AEU][-N]", "[S][e|AEU][-PB]", "[S][e|AEU]e[-PB]"]
)
self.run_test("s[e|ei]n", ["[S][e|AEU][-PB]"])

def run_test(self, emphasized_word, result):
print(self.word_pattern_matcher.match(emphasized_word))
self.assertEqual(self.word_pattern_matcher.match(emphasized_word), result)
def run_test(self, emphasized_word, expected):
self.assertEqual(expected, self.word_pattern_matcher.match(emphasized_word))


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions test/test_word_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def test_split(self):
self.run_test("Zurückliegen", ["Zu", "rück", "lie", "gen"])
self.run_test("Erzbistum", ["Erz", "bis", "tum"])

def run_test(self, word, result):
self.assertEqual(self.word_splitter.split(word), result)
def run_test(self, word, expected):
self.assertEqual(expected, self.word_splitter.split(word))


if __name__ == "__main__":
Expand Down
6 changes: 4 additions & 2 deletions test/test_word_syllable_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ def test_split(self):
self.run_test("neue", ["neu", "e"])
# self.run_test('andere', ['an', 'de', 're'])
self.run_test("altes", ["al", "tes"])
self.run_test("müssten", ["müss", "ten"])
self.run_test("seinen", ["sei", "nen"])

def run_test(self, word, result):
self.assertEqual(self.syllable_splitter.split(word), result)
def run_test(self, word, expected):
self.assertEqual(expected, self.syllable_splitter.split(word))


if __name__ == "__main__":
Expand Down
Empty file added test/util/__init__.py
Empty file.
23 changes: 23 additions & 0 deletions test/util/test_stroke_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import unittest

from regenpfeifer.util import stroke_util


class TestWordPatternMatcher(unittest.TestCase):
def test_strip_unmatched_letters(self):
self.assertEqual(
"[TP][e|OU][-R][-PB]",
stroke_util.strip_unmatched_letters("ab[TP][e|OU][-R]e[-PB]d"),
)
self.assertEqual(
"[TP][e|OU][-R]/[-PB]",
stroke_util.strip_unmatched_letters("ab[TP][e|OU][-R]/e[-PB]d"),
)
self.assertEqual(
"[TP][e|OU][-R]/[-PB]",
stroke_util.strip_unmatched_letters("ab[TP][e|OU][-R]/e/[-PB]d"),
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 94a2f7d

Please sign in to comment.