Skip to content

Commit

Permalink
Allow for using .txt files in aligning
Browse files Browse the repository at this point in the history
Resolves #94
  • Loading branch information
mmcauliffe committed Sep 20, 2018
1 parent 7ddad65 commit 01cce92
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 7 deletions.
8 changes: 6 additions & 2 deletions aligner/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

def find_lab(filename, files):
'''
Finds a .lab file that corresponds to a wav file
Finds a .lab file or .txt file that corresponds to a wav file. The .lab extension is given priority.
Parameters
----------
Expand All @@ -32,13 +32,17 @@ def find_lab(filename, files):
Returns
-------
str or None
If a corresponding .lab file is found, returns it, otherwise returns None
If a corresponding .lab or .txt file is found, returns it, otherwise returns None
'''
name, ext = os.path.splitext(filename)
for f in files:
fn, fext = os.path.splitext(f)
if fn == name and fext.lower() == '.lab':
return f
for f in files: # Use .txt if no .lab file available
fn, fext = os.path.splitext(f)
if fn == name and fext.lower() == '.txt':
return f
return None


Expand Down
5 changes: 4 additions & 1 deletion docs/source/data_format.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ text file with the .lab extension).
you can generate .lab files from it using the relabel function of relabel_clean.py.
The relabel_clean.py script is currently in the prosodylab.alignertools repository on GitHub.

If no ``.lab`` file is found, then the aligner will look for any matching ``.txt`` files and use those.

In terms of directory structure, the default configuration assumes that
files are separated into subdirectories based on their speaker (with one
speaker per file).
Expand Down Expand Up @@ -66,7 +68,8 @@ each speaker.
:align: center
:alt: Image cannot be displayed in your browser

.. note ::
.. note::

Intervals in the TextGrid less than 100 milliseconds will not be aligned.

Transcription normalization and dictionary lookup
Expand Down
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,20 @@ def basic_corpus_dir(corpus_root_dir, wav_dir, lab_dir):
return path


@pytest.fixture(scope='session')
def basic_corpus_txt_dir(corpus_root_dir, wav_dir, lab_dir):
path = os.path.join(corpus_root_dir, 'basic_txt')
os.makedirs(path, exist_ok=True)
names = [('michael', ['acoustic_corpus']), ('sickmichael', ['cold_corpus', 'cold_corpus3'])]
for s, files in names:
s_dir = os.path.join(path, s)
os.makedirs(s_dir, exist_ok=True)
for name in files:
shutil.copyfile(os.path.join(wav_dir, name + '.wav'), os.path.join(s_dir, name + '.wav'))
shutil.copyfile(os.path.join(lab_dir, name + '.lab'), os.path.join(s_dir, name + '.txt'))
return path


@pytest.fixture(scope='session')
def extra_corpus_dir(corpus_root_dir, wav_dir, lab_dir):
path = os.path.join(corpus_root_dir, 'extra')
Expand Down
20 changes: 16 additions & 4 deletions tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,23 @@ def test_basic(basic_dict_path, basic_corpus_dir, generated_dir):
dictionary = Dictionary(basic_dict_path, os.path.join(generated_dir, 'basic'))
dictionary.write()
output_directory = os.path.join(generated_dir, 'basic')
d = Corpus(basic_corpus_dir, output_directory)
d.initialize_corpus(dictionary)
c = Corpus(basic_corpus_dir, output_directory)
c.initialize_corpus(dictionary)
fc = FeatureConfig()
fc.generate_features(d)
assert d.get_feat_dim(fc) == 39
fc.generate_features(c)
assert c.get_feat_dim(fc) == 39


def test_basic_txt(basic_corpus_txt_dir, basic_dict_path, generated_dir):
dictionary = Dictionary(basic_dict_path, os.path.join(generated_dir, 'basic'))
dictionary.write()
output_directory = os.path.join(generated_dir, 'basic')
c = Corpus(basic_corpus_txt_dir, output_directory)
assert len(c.no_transcription_files) == 0
c.initialize_corpus(dictionary)
fc = FeatureConfig()
fc.generate_features(c)
assert c.get_feat_dim(fc) == 39


def test_extra(sick_dict, extra_corpus_dir, generated_dir):
Expand Down

0 comments on commit 01cce92

Please sign in to comment.