Skip to content

Commit

Permalink
finish PR
Browse files Browse the repository at this point in the history
  • Loading branch information
eroux committed Oct 8, 2024
1 parent 9967324 commit 7f33a0e
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 20 deletions.
22 changes: 16 additions & 6 deletions bophono/PhonStateKVP.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ def doCombineCurEnd(self, endofword, nrc='', nextvowel=''): # nrc = next root co
self.phon += self.end


def combineWithException(self, exception, tibetanSyllable):
def combineWithException(self, exception):
syllables = exception.split('|')
for syl in syllables:
indexplusminus = syl.find('-')
if indexplusminus == -1:
print("invalid exception syllable: "+syl)
continue
self.combineWith(syl[:indexplusminus], syl[indexplusminus+1:], tibetanSyllable)
self.combineWith(syl[:indexplusminus], syl[indexplusminus+1:])

def combineWith(self, nextroot, nextend, tibetanSyllable):
def combineWith(self, nextroot, nextend):
nextrootconsonant = nextroot
nextvowel = ''
self.doCombineCurEnd(False, nextrootconsonant, nextvowel)
Expand All @@ -60,8 +60,18 @@ def combineWith(self, nextroot, nextend, tibetanSyllable):
self.phon += ""
elif nextrootconsonant.startswith("dz") and self.position > 1:
self.phon += "z"
elif "གྲ" in tibetanSyllable and nextrootconsonant.startswith("tr") and self.position == 2:
self.phon += "dr"
elif nextrootconsonant.startswith("tdr"):
# Here the KVP rules have the rather puzzling convention to have different rules
# for syllables that have the exact same phonology in Tibetan. It has:
# བྲ -> always dra
# དྲ -> dra in second position, tra in first position
# which doesn't make sense as Tibetans make no difference between བྲ and དྲ.
# We thus have to artificially differentiate them at the phonological level recorded in roots.csv
# By having "tdra" for དྲ.
if self.position == 1:
self.phon += "tr"
else:
self.phon += "dr"
else:
self.phon += nextrootconsonant
# decompose multi-syllable ends:
Expand All @@ -70,7 +80,7 @@ def combineWith(self, nextroot, nextend, tibetanSyllable):
self.end = ends[0]
for endsyl in ends[1:]:
# we suppose that roots are always null
self.combineWith(endsyl[:1], endsyl[1:], tibetanSyllable)
self.combineWith(endsyl[:1], endsyl[1:])
else:
self.end = nextend

Expand Down
4 changes: 2 additions & 2 deletions bophono/UnicodeToApi.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __combine_next_syll_phon(self, tibstr, bindex, state, eindex):
return -1
if endinfo['i'] < eindex and self.__is_tib_letter(tibstr[endinfo['i']]) and (tibstr[endinfo['i']] not in self.ignored_chars):
return -1
state.combineWith(rootinfo['d'], endinfo['d'], tibstr[bindex:eindex])
state.combineWith(rootinfo['d'], endinfo['d'])
assert(endinfo['i']>bindex)
return endinfo['i']

Expand All @@ -97,7 +97,7 @@ def get_api(self, tibstr, bindex=0, eindex=-1, pos=None, endOfSentence=False):
# if it starts with '2:' and we're in the first syllable, we ignore it:
if exceptioninfo['d'].startswith('2:'):
exceptioninfo['d'] = exceptioninfo['d'][2:]
state.combineWithException(exceptioninfo['d'], tibstr[bindex:eindex])
state.combineWithException(exceptioninfo['d'])
nextidx = self.__get_next_letter_index(tibstr, exceptioninfo['i']+1, eindex)
if nextidx == -1:
nextidx = eindex
Expand Down
8 changes: 5 additions & 3 deletions bophono/data/exceptions-kvp.csv
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@
ཨ་གསར,ags-ar
ས་གདན,sabd-en
ཁ་གཅོད,khabch-ö
# dba becomes wa except if exactly dba (no vowel, no suffix)
དབ*/Cb,w-
# dba doesn't become wa if it has vowel i, e or u
དབེ/Ce,-
དབུ/Cu,-
དབི/Ci,-
# numbers, from NT Annex 1, completed by Drupchen
བཅུ་གཅིག,chugch-ik
བཅུ་གཉིས,chugny-i
Expand Down Expand Up @@ -159,4 +161,4 @@
འཕྲོ་འདུ/Cu,tront-
སྤྲོ་བསྡུ/Co,tront-
ན་བཟ/Cb,namz-
མ་འགགས,mank-ak
མ་འགགས,mank-ak
6 changes: 3 additions & 3 deletions bophono/data/roots.csv
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
འཁྲ,~thr+,[']tr+,tr,tr
ག,kh-,k,g,g
གྱ,khy-,c,gy,gy
གྲ,thr-,tr,tr,tr
གྲ,thr-,tr,tdr,tr
གླ,l+,l,l,l
དག*,k-,[r]g,g,g
དགྱ,ky-,[r]j,gy,gy
Expand Down Expand Up @@ -136,7 +136,7 @@
བྱ,ch-,sh,j,j
བྲ,thr-,tr,dr,dr
བླ,l+,l,l,l
དབ*,+,R,-,-
དབ*,+,R,w,-
དབྱ,y+,[r]y,y,y
དབྲ,r+,,r,r
འབ*,~p-,[']b,b,b
Expand Down Expand Up @@ -233,4 +233,4 @@
སྟྭ,t+,[s]t,t,t
སྭ,s+,s+,s,s
བསྭ,s+,s+,s,s
ཧྭ,h+,h,h,h
ཧྭ,h+,h,h,h
9 changes: 4 additions & 5 deletions tests/test_KVP_corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,14 @@ def test_ratas():

def test_dao_wa():
assert_equal_phonetics("KVP", "དབ", "dab")
assert_equal_phonetics("KVP", "དབོ", "o")
assert_equal_phonetics("KVP", "དབོས", "ö")
assert_equal_phonetics("KVP", "དབུ", "u")
assert_equal_phonetics("KVP", "དབུས", "ü")
assert_equal_phonetics("KVP", "དབི", "i")
assert_equal_phonetics("KVP", "དབེ", "e")
assert_equal_phonetics("KVP", "དབང", "ang")
assert_equal_phonetics("KVP", "དབྱང", "yang")
assert_equal_phonetics("KVP", "དབོ", "wo")
assert_equal_phonetics("KVP", "དབོས", "wö")
assert_equal_phonetics("KVP", "དབང", "wang")

### Additional Phonetics Instructions:

Expand Down Expand Up @@ -277,7 +277,6 @@ def test_specific_cases():
assert_equal_phonetics("KVP", "བར་ཆད", "barche")
assert_equal_phonetics("KVP", "བར་དོ", "bardo")


### Checking that things work as expected in KVP_corrections.csv

def load_corrections():
Expand All @@ -291,4 +290,4 @@ def load_corrections():

@pytest.mark.parametrize("tibetan, expected", corrections)
def test_phonetics_tool_corrections(tibetan, expected):
assert_equal_phonetics("KVP", tibetan, expected)
assert_equal_phonetics("KVP", tibetan, expected)
2 changes: 1 addition & 1 deletion tests/test_KVP_wasur.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def load_wasur_cases():
def test_cases_without_wasur():
assert_equal_phonetics("KVP", "མངས", "nge")
assert_equal_phonetics("KVP", "མགས", "ge")
assert_equal_phonetics("KVP", "དབས", "e")
assert_equal_phonetics("KVP", "དབས", "we")
assert_equal_phonetics("KVP", "དངས", "nge")
assert_equal_phonetics("KVP", "དགས", "ge")
assert_equal_phonetics("KVP", "དམས", "me")
Expand Down

0 comments on commit 7f33a0e

Please sign in to comment.