-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changed format of input file to csv with headers (required) #14
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,7 +162,7 @@ def __init__(self, | |
self.dataset_path = dataset_path | ||
self.audiopaths_and_text = load_filepaths_and_text( | ||
audiopaths_and_text, dataset_path, | ||
has_speakers=(n_speakers > 1)) | ||
has_speakers=(n_speakers > 1)) #this now returns a list of dicts | ||
self.load_mel_from_disk = load_mel_from_disk | ||
if not load_mel_from_disk: | ||
self.max_wav_value = max_wav_value | ||
|
@@ -193,26 +193,25 @@ def __init__(self, | |
|
||
assert not (load_pitch_from_disk and self.pitch_tmp_dir is not None) | ||
|
||
if len(self.audiopaths_and_text[0]) < expected_columns: | ||
raise ValueError(f'Expected {expected_columns} columns in audiopaths file. ' | ||
'The format is <mel_or_wav>|[<pitch>|]<text>[|<speaker_id>]') | ||
|
||
if len(self.audiopaths_and_text[0]) > expected_columns: | ||
print('WARNING: Audiopaths file has more columns than expected') | ||
|
||
to_tensor = lambda x: torch.Tensor([x]) if type(x) is float else x | ||
self.pitch_mean = to_tensor(pitch_mean) | ||
self.pitch_std = to_tensor(pitch_std) | ||
|
||
def __getitem__(self, index): | ||
# Separate filename and text | ||
|
||
#Indexing items using dictionary entries | ||
if self.n_speakers > 1: | ||
audiopath, *extra, text, speaker = self.audiopaths_and_text[index] | ||
audiopath = self.audiopaths_and_text[index]['mels'] | ||
text = self.audiopaths_and_text[index]['text'] | ||
speaker = self.audiopaths_and_text[index]['speaker'] | ||
speaker = int(speaker) | ||
else: | ||
audiopath, *extra, text = self.audiopaths_and_text[index] | ||
audiopath = self.audiopaths_and_text[index]['mels'] | ||
text = self.audiopaths_and_text[index]['text'] | ||
speaker = None | ||
|
||
|
||
mel = self.get_mel(audiopath) | ||
text = self.get_text(text) | ||
pitch = self.get_pitch(index, mel.size(-1)) | ||
|
@@ -287,15 +286,15 @@ def get_prior(self, index, mel_len, text_len): | |
return attn_prior | ||
|
||
def get_pitch(self, index, mel_len=None): | ||
audiopath, *fields = self.audiopaths_and_text[index] | ||
audiopath = self.audiopaths_and_text[index]['mels'] | ||
|
||
if self.n_speakers > 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just out of interest, where is this spk used? Afaik it's not currently used here, though I imagine the intention is to have different mean and std? |
||
spk = int(fields[-1]) | ||
spk = int(self.audiopaths_and_text[index]['speaker']) | ||
else: | ||
spk = 0 | ||
|
||
if self.load_pitch_from_disk: | ||
pitchpath = fields[0] | ||
pitchpath = self.audiopaths_and_text[index]['pitch'] | ||
pitch = torch.load(pitchpath) | ||
if self.pitch_mean is not None: | ||
assert self.pitch_std is not None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. commenting here because I can't do it lower down but what about the TTSCollate call function? |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
mels|pitch|text | ||
wavs/LJ045-0096.wav|pitch/LJ045-0096.pt|Mrs. De Mohrenschildt thought that Oswald, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these files should then also be updated to use absolute paths |
||
wavs/LJ049-0022.wav|pitch/LJ049-0022.pt|The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent. | ||
wavs/LJ033-0042.wav|pitch/LJ033-0042.pt|Between the hours of eight and nine p.m. they were occupied with the children in the bedrooms located at the extreme east end of the house. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we still do some kind of check on expected number of columns?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh lol, I commented on this across 2 PRs