Skip to content

Commit

Permalink
Merge branch 'master' into feature/optional-torchaudio
Browse files Browse the repository at this point in the history
  • Loading branch information
pzelasko authored Dec 7, 2023
2 parents 3a0bda4 + 6c777da commit 84e817a
Showing 1 changed file with 5 additions and 9 deletions.
14 changes: 5 additions & 9 deletions lhotse/workflows/forced_alignment/mms_aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,19 +142,15 @@ def _word_tokenize(text: str, language: Optional[str] = None) -> List[str]:
return kss.split_morphemes(text, return_pos=False)

elif language == "th":
# `pythainlp` is alive and much better, but it is a huge package bloated with dependencies
if not is_module_available("tltk"):
if not is_module_available("attacut"):
raise ImportError(
"MMSForcedAligner requires the 'tltk' module to be installed to align Thai text."
"Please install it with 'pip install tltk'."
"MMSForcedAligner requires the 'attacut' module to be installed to align Thai text."
"Please install it with 'pip install attacut'."
)

from tltk import nlp
import attacut

pieces = nlp.pos_tag(text)
return [
word if word != "<s/>" else " " for piece in pieces for word, _ in piece
]
return attacut.tokenize(text)

elif language == "my":
if not is_module_available("pyidaungsu"):
Expand Down

0 comments on commit 84e817a

Please sign in to comment.