Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed format of input file to csv with headers (required) #14

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 7 additions & 14 deletions PyTorch/SpeechSynthesis/FastPitch/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
import warnings
from pathlib import Path
from typing import Optional

import librosa
import numpy as np

import torch
from scipy.io.wavfile import read

from csv import DictReader

class BenchmarkStats:
""" Tracks statistics used for benchmarking. """
Expand Down Expand Up @@ -69,21 +67,16 @@ def load_wav_to_torch(full_path, force_sampling_rate=None):

def load_filepaths_and_text(fnames, dataset_path=None, has_speakers=False,
split="|"):
def split_line(line, root=None):
parts = line.strip().split(split)
if has_speakers:
paths, non_paths = parts[:-2], parts[-2:]
else:
paths, non_paths = parts[:-1], parts[-1:]
if root:
return tuple(str(Path(root, p)) for p in paths) + tuple(non_paths)
else:
return tuple(str(Path(p)) for p in paths) + tuple(non_paths)

#Reads in csv with headers mels|pitch|text|optional-speaker
#Returns list of dicts

fpaths_and_text = []
for fname in fnames:
with open(fname, encoding='utf-8') as f:
fpaths_and_text += [split_line(line, dataset_path) for line in f]
dict_reader = DictReader(f, delimiter='|')
fpaths_and_text = list(dict_reader)

return fpaths_and_text


Expand Down
25 changes: 12 additions & 13 deletions PyTorch/SpeechSynthesis/FastPitch/fastpitch/data_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def __init__(self,
self.dataset_path = dataset_path
self.audiopaths_and_text = load_filepaths_and_text(
audiopaths_and_text, dataset_path,
has_speakers=(n_speakers > 1))
has_speakers=(n_speakers > 1)) #this now returns a list of dicts
self.load_mel_from_disk = load_mel_from_disk
if not load_mel_from_disk:
self.max_wav_value = max_wav_value
Expand Down Expand Up @@ -193,26 +193,25 @@ def __init__(self,

assert not (load_pitch_from_disk and self.pitch_tmp_dir is not None)

if len(self.audiopaths_and_text[0]) < expected_columns:
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we still do some kind of check on expected number of columns?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh lol, I commented on this across 2 PRs

raise ValueError(f'Expected {expected_columns} columns in audiopaths file. '
'The format is <mel_or_wav>|[<pitch>|]<text>[|<speaker_id>]')

if len(self.audiopaths_and_text[0]) > expected_columns:
print('WARNING: Audiopaths file has more columns than expected')

to_tensor = lambda x: torch.Tensor([x]) if type(x) is float else x
self.pitch_mean = to_tensor(pitch_mean)
self.pitch_std = to_tensor(pitch_std)

def __getitem__(self, index):
# Separate filename and text

#Indexing items using dictionary entries
if self.n_speakers > 1:
audiopath, *extra, text, speaker = self.audiopaths_and_text[index]
audiopath = self.audiopaths_and_text[index]['mels']
text = self.audiopaths_and_text[index]['text']
speaker = self.audiopaths_and_text[index]['speaker']
speaker = int(speaker)
else:
audiopath, *extra, text = self.audiopaths_and_text[index]
audiopath = self.audiopaths_and_text[index]['mels']
text = self.audiopaths_and_text[index]['text']
speaker = None


mel = self.get_mel(audiopath)
text = self.get_text(text)
pitch = self.get_pitch(index, mel.size(-1))
Expand Down Expand Up @@ -287,15 +286,15 @@ def get_prior(self, index, mel_len, text_len):
return attn_prior

def get_pitch(self, index, mel_len=None):
audiopath, *fields = self.audiopaths_and_text[index]
audiopath = self.audiopaths_and_text[index]['mels']

if self.n_speakers > 1:
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just out of interest, where is this spk used? Afaik it's not currently used here, though I imagine the intention is to have different mean and std?

spk = int(fields[-1])
spk = int(self.audiopaths_and_text[index]['speaker'])
else:
spk = 0

if self.load_pitch_from_disk:
pitchpath = fields[0]
pitchpath = self.audiopaths_and_text[index]['pitch']
pitch = torch.load(pitchpath)
if self.pitch_mean is not None:
assert self.pitch_std is not None
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

commenting here because I can't do it lower down but what about the TTSCollate call function?
I guess that's to do with the return type if getitem returning a tuple?

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mels|pitch|text
wavs/LJ045-0096.wav|pitch/LJ045-0096.pt|Mrs. De Mohrenschildt thought that Oswald,
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these files should then also be updated to use absolute paths

wavs/LJ049-0022.wav|pitch/LJ049-0022.pt|The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.
wavs/LJ033-0042.wav|pitch/LJ033-0042.pt|Between the hours of eight and nine p.m. they were occupied with the children in the bedrooms located at the extreme east end of the house.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mels|pitch|text
wavs/LJ050-0234.wav|pitch/LJ050-0234.pt|It has used other Treasury law enforcement agents on special experiments in building and route surveys in places to which the President frequently travels.
wavs/LJ019-0373.wav|pitch/LJ019-0373.pt|to avail himself of his powers, as it was difficult to bring home the derelictions of duties and evasion of the acts. Too much was left to the inspectors.
wavs/LJ050-0207.wav|pitch/LJ050-0207.pt|Although Chief Rowley does not complain about the pay scale for Secret Service agents,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mels|pitch|text
wavs/LJ016-0288.wav|pitch/LJ016-0288.pt|"Müller, Müller, He's the man," till a diversion was created by the appearance of the gallows, which was received with continuous yells.
wavs/LJ028-0275.wav|pitch/LJ028-0275.pt|At last, in the twentieth month,
wavs/LJ019-0273.wav|pitch/LJ019-0273.pt|which Sir Joshua Jebb told the committee he considered the proper elements of penal discipline.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mels|text
wavs/LJ050-0234.wav|It has used other Treasury law enforcement agents on special experiments in building and route surveys in places to which the President frequently travels.
wavs/LJ019-0373.wav|to avail himself of his powers, as it was difficult to bring home the derelictions of duties and evasion of the acts. Too much was left to the inspectors.
wavs/LJ050-0207.wav|Although Chief Rowley does not complain about the pay scale for Secret Service agents,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mels|text
wavs/LJ045-0096.wav|Mrs. De Mohrenschildt thought that Oswald,
wavs/LJ049-0022.wav|The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.
wavs/LJ033-0042.wav|Between the hours of eight and nine p.m. they were occupied with the children in the bedrooms located at the extreme east end of the house.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mels|text
wavs/LJ050-0234.wav|It has used other Treasury law enforcement agents on special experiments in building and route surveys in places to which the President frequently travels.
wavs/LJ019-0373.wav|to avail himself of his powers, as it was difficult to bring home the derelictions of duties and evasion of the acts. Too much was left to the inspectors.
wavs/LJ050-0207.wav|Although Chief Rowley does not complain about the pay scale for Secret Service agents,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mels|text
wavs/LJ016-0288.wav|"Müller, Müller, He's the man," till a diversion was created by the appearance of the gallows, which was received with continuous yells.
wavs/LJ028-0275.wav|At last, in the twentieth month,
wavs/LJ019-0273.wav|which Sir Joshua Jebb told the committee he considered the proper elements of penal discipline.
Expand Down