add exceptions

Esukhia · Sep 18, 2022 · eb7c42d · eb7c42d
1 parent 824d9c6
commit eb7c42d
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 19 deletions.
diff --git a/TODO.md b/TODO.md
@@ -1,5 +1,9 @@
 ## TODO
 
+- option (MST 438) : /p/ between two vowels can be pronounced β
+
+- how is nyon mongs pronounced? [ɲø:moŋ] or [ɲønmoŋ]? MST gives phonology nyönmong but doesn't really give a phonetic equivalent
+
 - བཟས་སོང་། (passe) va etre prononce ཟ་སོང་། (mange passe)
 - glottal stop + k-| ?
 - study behavior for ambiguous syllables (probably list some as exceptions)

diff --git a/bophono/PhonStateMST.py b/bophono/PhonStateMST.py
@@ -51,7 +51,9 @@ def __init__(self, options={}, pos=None, endOfSentence=False):
         self.eatR = options['eatR'] if 'eatR' in options else False
         self.eatL = options['eatL'] if 'eatL' in options else False
         self.eatP = options['eatP'] if 'eatP' in options else True
-        self.eatK = options['eatK'] if 'eatK' in options else True
+        # this is rather complex: basically 
+        self.gsuffixstrategy = options['gsuffixstrategy'] if 'gsuffixstrategy' in options else "simple"
+        self.gsuffixchar = options['gsuffixchar'] if 'gsuffixchar' in options else "simple"
         self.aiAffixchar = options['aiAffixchar'] if 'aiAffixchar' in options else 'ːɪ̯'
         self.aiAffixmonochar = options['aiAffixmonochar'] if 'aiAffixmonochar' in options else self.syllablesepchar+'ɪ'
         # does the འི affix in monosyllabic words change the vowel sound (a -> ä) or not (defaults to not)

diff --git a/bophono/data/exceptions.csv b/bophono/data/exceptions.csv
@@ -19,24 +19,30 @@
 ས་གདན,s+ap|tän
 ཁ་གཅོད,kh+ap|cö'
 དབ/Cb,w+
-རྗེས་འཇུག,c-en|cu'
+རྗེས་འཇུག,c-en|cuk
 #
-# numbers, from NT Annex 1, completed by Drupchen
+# NT, spoken Tibetan only
 #
-བཅུ་གཅིག,c+uk|c+ik
-བཅུ་གཉིས,c+uk|ny+i'
+ཡོད་རེད,y-o:|re'
+#
+# numbers, from NT Annex 1 and p. 132, completed by Drupchen
+#
+#བཅུ་གཅིག,c+uk|c+ik # not indicated in NT, skipping this one and the next one
+#བཅུ་གཉིས,c+uk|ny+i'
 བཅུ་གསུམ,c+uk|s+um
 བཅུ་བཞི/Ci,c+up|sh-
+བཅོ་ལྔ/Ca,c+ö|ng+
 བཅུ་བདུན,c+up|t-ün
 བཅོ་བརྒྱད,c+op|ky-ä'
+བཅུ་དགུ/Cu,c+ur|k-
 རྩ་བཞི/Ci,ts+ap|sh-
 རྩ་བདུན,ts+ap|t-ün
 རྩ་བརྒྱད,ts+ap|ky-ä'
 སོ་བརྒྱད,s+op|ky-ä'
 བཞི་བཅུ/Cu,sh-ip|c+
 ལྔ་བཅུ/Cu,ng+ap|c+
 ང་བཞི/Ci,ng-ap|sh-
-དགུ་བཅུ/Cu,k-u|c+
+དགུ་བཅུ/Cu,k-up|c+
 # possible according to accent, not always (source: Drupchen)
 ལྔ་ཁྲི/Ci,ng+ap|thr+
 གོ་བཞི/Ci,k-op|sh-
@@ -49,6 +55,8 @@
 # Sanskrit (TODO: check)
 པདྨ/Ca,p+e|m-
 ཀརྨ/Ca,k+ar|m-
+སེངྒེ/Ce,s+eng|kh-
+བཛྲ/Ca,p-en|ts-# ?
 #
 # NT Annex 1, nasalizer
 #
@@ -92,6 +100,8 @@
 ཕྱག་མཛོད,ch+ang|~ts-ö'
 ལྷ་མཛེས,lh+an|~ts-e'
 ལོ་མཆོད,l-om|~ch+ö'
+#མཆོད་འབུལ,ch+öm|p-ul # TODO: in NT p. 380 pul has a tone...
+སྒྲ་སྙན,tr-ap|ny+än
 #
 # THL nasalizations
 #
@@ -131,7 +141,7 @@
 ཡ་བཞི/Ci,y-ap|sh-
 ས་བཅད,s+ap|c+ä'
 #ཡང་ལེ་བེར,y-ang|l-e|w-er
-#མཆོད་རྟེན,ch+ör|t+en (only in some dialects)
+#མཆོད་རྟེན,ch+ör|t+en (only in some dialects, but also in NT p. 380)
 སྡེ་དགེ/Ce,t-er|k-
 སློ་བཟང,l+op|s-ang
 # not sure about these:

diff --git a/demo.py b/demo.py
@@ -1,16 +1,71 @@
 import bophono
 import sys
 
-options = {
-  'aspirateLowTones': True,
+options_fast = {
+  'weakAspirationChar': '',
+  'aspirateLowTones': False,
   'prefixStrategy': 'always',
   'aiAffixchar': 'ː',
   'hightonechar':'',
   'lowtonechar':'',
+  'nasalchar': '',
+  'stopSDMode': "eow",
+  'useUnreleasedStops': False,
+  'eatR': True,
+  'eatL': True,
+  'syllablesepchar': ''
+}
+
 
+options_fastidious = {
+  'weakAspirationChar': '3',
+  'aspirateLowTones': True,
+  'prefixStrategy': 'always',
+  'aiAffixchar': 'ː',
+  'hightonechar':'̄',
+  'lowtonechar':'̱',
+  'nasalchar': '',
+  'stopSDMode': "eow",
+  'eatP': False,
+  'useUnreleasedStops': True,
+  'eatK': False,
+  'syllablesepchar': ''
 }
+
+def toEnglish(s, mode):
+    s = s.replace("y", "ü")
+    if (mode == "expert"):
+        s = s.replace("ɔ", "o1")
+        s = s.replace("ɣ", "g2")
+        s = s.replace("̊", "1")
+        s = s.replace("̥", "1")
+
+    else:
+        s = s.replace("ɔ", "o")
+        s = s.replace("ɣ", "g")
+        s = s.replace("̊", "")
+        s = s.replace("̥", "")
+    s = s.replace("ɖ", "ḍ")
+    s = s.replace("ʈ", "ṭ")
+    s = s.replace("ɲ", "ny")
+    s = s.replace("ø", "ö")
+    s = s.replace("ɟ", "gy")
+    s = s.replace("c", "ky")
+    s = s.replace("j", "y")
+    s = s.replace("ɛ", "è")
+    s = s.replace("e", "é")
+    s = s.replace("ŋ", "ṅ")
+    s = s.replace("tɕ", "ch")
+    s = s.replace("ɕ", "sh")
+    s = s.replace("dʑ", "j")
+    s = s.replace("dz", "z")
+
+    return s
+
 filename = 'tests/demo.txt'
-converter = bophono.UnicodeToApi(schema="MST", options = options) # try with CAT for Amdokä
+converter_fastidious = bophono.UnicodeToApi(schema="MST", options = options_fastidious)
+converter_fast = bophono.UnicodeToApi(schema="MST", options = options_fast)
+converter_kvp = bophono.UnicodeToApi(schema="KVP", options = options_fast)
 if (len(sys.argv) > 1):
     filename = sys.argv[1]
 with open(filename, 'r', encoding="utf8") as f:
@@ -24,12 +79,13 @@
         words = line.split()
         res = ""
         for word in words:
-            res += converter.get_api(word)+'  '
+            res += converter_kvp.get_api(word)+'  '
+        #res = toEnglish(res, "expert")    
         print(res)
 
 #Chinese transcription
 sentence = "བཀྲ་ཤིས་"
-api = converter.get_api(sentence)
+api = converter_fast.get_api(sentence)
 zh = bophono.apitochinese.api2chinese(api)
 print("\n" + sentence + " -> " + api + \
       " -> " + zh["zhuyin"] + " -> " + zh["chinese_trad"])
diff --git a/doc/a-variations.md b/doc/a-variations.md