Skip to content

Commit

Permalink
refactoring and clearning up code
Browse files Browse the repository at this point in the history
  • Loading branch information
dhpollack authored and soumith committed Jan 4, 2019
1 parent 0e0d1e5 commit 3bd4db8
Show file tree
Hide file tree
Showing 14 changed files with 213 additions and 93 deletions.
88 changes: 88 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 2000000
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
...
33 changes: 33 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
# NOTE there must be no spaces before the '-', so put the comma first.
Checks: '
-*
,bugprone-*
,-bugprone-forward-declaration-namespace
,-bugprone-macro-parentheses
,cppcoreguidelines-*
,-cppcoreguidelines-interfaces-global-init
,-cppcoreguidelines-owning-memory
,-cppcoreguidelines-pro-bounds-array-to-pointer-decay
,-cppcoreguidelines-pro-bounds-constant-array-index
,-cppcoreguidelines-pro-bounds-pointer-arithmetic
,-cppcoreguidelines-pro-type-cstyle-cast
,-cppcoreguidelines-pro-type-reinterpret-cast
,-cppcoreguidelines-pro-type-static-cast-downcast
,-cppcoreguidelines-pro-type-union-access
,-cppcoreguidelines-pro-type-vararg
,-cppcoreguidelines-special-member-functions
,hicpp-exception-baseclass
,hicpp-avoid-goto
,modernize-*
,-modernize-return-braced-init-list
,-modernize-use-auto
,-modernize-use-default-member-init
,-modernize-use-using
,performance-unnecessary-value-param
'
WarningsAsErrors: '*'
HeaderFilterRegex: 'torchaudio/.*'
AnalyzeTemporaryDtors: false
CheckOptions:
...
4 changes: 4 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[flake8]
max-line-length = 120
ignore = E305,E402,E721,E741,F401,F403,F405,F821,F841,F999,W503,W504
exclude = build,docs/source,_ext
22 changes: 21 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
#!/usr/bin/env python
import os
import platform

from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CppExtension


def check_env_flag(name, default=''):
return os.getenv(name, default).upper() in set(['ON', '1', 'YES', 'TRUE', 'Y'])

DEBUG = check_env_flag('DEBUG')
eca = []
ela = []
if DEBUG:
if platform.system() == 'Windows':
ela += ['/DEBUG:FULL']
else:
eca += ['-O0', '-g']
ela += ['-O0', '-g']

setup(
name="torchaudio",
version="0.2",
Expand All @@ -14,6 +30,10 @@
packages=find_packages(exclude=["build"]),
ext_modules=[
CppExtension(
'_torch_sox', ['torchaudio/torch_sox.cpp'], libraries=['sox']),
'_torch_sox',
['torchaudio/torch_sox.cpp'],
libraries=['sox'],
extra_compile_args=eca,
extra_link_args=ela),
],
cmdclass={'build_ext': BuildExtension})
7 changes: 3 additions & 4 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def test_1_save(self):
os.unlink(new_filepath)

# test save 1d tensor
#x = x[:, 0] # get mono signal
x = x[0, :] # get mono signal
x.squeeze_() # remove channel dim
torchaudio.save(new_filepath, x, sr)
Expand Down Expand Up @@ -91,7 +90,7 @@ def test_2_load(self):
offset = 15
x, _ = torchaudio.load(self.test_filepath)
x_offset, _ = torchaudio.load(self.test_filepath, offset=offset)
self.assertTrue(x[:,offset:].allclose(x_offset))
self.assertTrue(x[:, offset:].allclose(x_offset))

# check number of frames
n = 201
Expand Down Expand Up @@ -132,7 +131,7 @@ def test_4_load_partial(self):
input_sine_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
x_sine_full, sr_sine = torchaudio.load(input_sine_path)
x_sine_part, _ = torchaudio.load(input_sine_path, num_frames=num_frames, offset=offset)
l1_error = x_sine_full[:, offset:(num_frames+offset)].sub(x_sine_part).abs().sum().item()
l1_error = x_sine_full[:, offset:(num_frames + offset)].sub(x_sine_part).abs().sum().item()
# test for the correct number of samples and that the correct portion was loaded
self.assertEqual(x_sine_part.size(1), num_frames)
self.assertEqual(l1_error, 0.)
Expand All @@ -148,7 +147,7 @@ def test_4_load_partial(self):
# test with two channel mp3
x_2ch_full, sr_2ch = torchaudio.load(self.test_filepath, normalization=True)
x_2ch_part, _ = torchaudio.load(self.test_filepath, normalization=True, num_frames=num_frames, offset=offset)
l1_error = x_2ch_full[:, offset:(offset+num_frames)].sub(x_2ch_part).abs().sum().item()
l1_error = x_2ch_full[:, offset:(offset + num_frames)].sub(x_2ch_part).abs().sum().item()
self.assertEqual(x_2ch_part.size(1), num_frames)
self.assertEqual(l1_error, 0.)

Expand Down
3 changes: 2 additions & 1 deletion test/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,14 @@ def __getitem__(self, index):
def __len__(self):
return len(self.data)


class Test_DataLoader(unittest.TestCase):
def test_1(self):
expected_size = (2, 1, 16000)
ds = TORCHAUDIODS()
dl = DataLoader(ds, batch_size=2)
for x in dl:
#print(x.size())
# print(x.size())
continue

self.assertTrue(x.size() == expected_size)
Expand Down
4 changes: 2 additions & 2 deletions test/test_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_load_partial(self):
input_sine_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
x_sine_full, sr_sine = load(input_sine_path)
x_sine_part, _ = load(input_sine_path, num_frames=num_frames, offset=offset)
l1_error = x_sine_full[offset:(num_frames+offset)].sub(x_sine_part).abs().sum().item()
l1_error = x_sine_full[offset:(num_frames + offset)].sub(x_sine_part).abs().sum().item()
# test for the correct number of samples and that the correct portion was loaded
self.assertEqual(x_sine_part.size(0), num_frames)
self.assertEqual(l1_error, 0.)
Expand All @@ -137,7 +137,7 @@ def test_load_partial(self):
# test with two channel mp3
x_2ch_full, sr_2ch = load(self.test_filepath, normalization=True)
x_2ch_part, _ = load(self.test_filepath, normalization=True, num_frames=num_frames, offset=offset)
l1_error = x_2ch_full[offset:(offset+num_frames)].sub(x_2ch_part).abs().sum().item()
l1_error = x_2ch_full[offset:(offset + num_frames)].sub(x_2ch_part).abs().sum().item()
self.assertEqual(x_2ch_part.size(0), num_frames)
self.assertEqual(l1_error, 0.)

Expand Down
9 changes: 5 additions & 4 deletions test/test_sox_effects.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_single_channel(self):
E.append_effect_to_chain("echos", [0.8, 0.7, 40, 0.25, 63, 0.3])
x, sr = E.sox_build_flow_effects()
# check if effects worked
#print(x.size())
# print(x.size())

def test_rate_channels(self):
target_rate = 16000
Expand Down Expand Up @@ -154,7 +154,7 @@ def test_trim(self):
E.append_effect_to_chain("trim", [offset, num_frames])
x, sr = E.sox_build_flow_effects()
# check if effect worked
self.assertTrue(x.allclose(x_orig[:,offset_int:(offset_int+num_frames_int)], rtol=1e-4, atol=1e-4))
self.assertTrue(x.allclose(x_orig[:, offset_int:(offset_int + num_frames_int)], rtol=1e-4, atol=1e-4))

def test_silence_contrast(self):
si, _ = torchaudio.info(self.test_filepath)
Expand Down Expand Up @@ -183,13 +183,14 @@ def test_compand_fade(self):
E.append_effect_to_chain("fade", ["q", "0.25", "0", "0.33"])
x, _ = E.sox_build_flow_effects()
# check if effect worked
#print(x.size())
# print(x.size())

def test_biquad_delay(self):
si, _ = torchaudio.info(self.test_filepath)
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(self.test_filepath)
E.append_effect_to_chain("biquad", ["0.25136437", "0.50272873", "0.25136437", "1.0", "-0.17123075", "0.17668821"])
E.append_effect_to_chain("biquad", ["0.25136437", "0.50272873", "0.25136437",
"1.0", "-0.17123075", "0.17668821"])
E.append_effect_to_chain("delay", ["15000s"])
x, _ = E.sox_build_flow_effects()
# check if effect worked
Expand Down
6 changes: 4 additions & 2 deletions test/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ def test_pad_trim(self):
length_new = int(length_orig * 1.2)

result = transforms.PadTrim(max_len=length_new, channels_first=False)(audio_orig)

self.assertEqual(result.size(0), length_new)

result = transforms.PadTrim(max_len=length_new, channels_first=True)(audio_orig.transpose(0, 1))
self.assertEqual(result.size(1), length_new)

audio_orig = self.sig.clone()
length_orig = audio_orig.size(0)
length_new = int(length_orig * 0.8)
Expand Down Expand Up @@ -147,7 +149,7 @@ def test_mel2(self):
audio_orig = self.sig.clone() # (16000, 1)
audio_scaled = transforms.Scale()(audio_orig) # (16000, 1)
audio_scaled = transforms.LC2CL()(audio_scaled) # (1, 16000)
spectrogram_torch = transforms.MEL2()(audio_scaled) # (1, 319, 40)
spectrogram_torch = transforms.MEL2(window_fn=torch.hamming_window, pad=10)(audio_scaled) # (1, 319, 40)
self.assertTrue(spectrogram_torch.dim() == 3)
self.assertTrue(spectrogram_torch.max() <= 0.)

Expand Down
6 changes: 3 additions & 3 deletions torchaudio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def load(filepath,
filetype (str, optional): a filetype or extension to be set if sox cannot determine it automatically
Returns: tuple(Tensor, int)
- Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels
- Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and
C is the number of channels
- int: the sample rate of the audio (as listed in the metadata of the file)
Example::
Expand Down Expand Up @@ -127,8 +128,7 @@ def save_encinfo(filepath,
>>> torchaudio.save('foo.wav', data, sample_rate)
"""
ch_idx = 0 if channels_first else 1
len_idx = 1 if channels_first else 0
ch_idx, len_idx = (0, 1) if channels_first else (1, 0)

# check if save directory exists
abs_dirpath = os.path.dirname(os.path.abspath(filepath))
Expand Down
3 changes: 2 additions & 1 deletion torchaudio/sox_effects.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ class SoxEffectsChain(object):
filetype (str, optional): a filetype or extension to be set if sox cannot determine it automatically
Returns: tuple(Tensor, int)
- Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels
- Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and
C is the number of channels
- int: the sample rate of the audio (as listed in the metadata of the file)
Example::
Expand Down
15 changes: 4 additions & 11 deletions torchaudio/torch_sox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ int read_audio_file(

void write_audio_file(
const std::string& file_name,
at::Tensor tensor,
const at::Tensor& tensor,
sox_signalinfo_t* si,
sox_encodinginfo_t* ei,
const char* file_type) {
Expand Down Expand Up @@ -332,16 +332,9 @@ int build_flow_effects(const std::string& file_name,
int sr;
// Read the in-memory audio buffer or temp file that we just wrote.
#ifdef __APPLE__
/* certain effects will result in a target signal length of 0.
if (target_signal->length > 0) {
if (target_signal->channels != output->signal.channels) {
std::cout << "output: " << output->signal.channels << "|" << output->signal.length << "\n";
std::cout << "interm: " << interm_signal.channels << "|" << interm_signal.length << "\n";
std::cout << "target: " << target_signal->channels << "|" << target_signal->length << "\n";
unlink(tmp_name);
throw std::runtime_error("unexpected number of audio channels");
}
}
/*
Temporary filetype must have a valid header. Wav seems to work here while
raw does not. Certain effects like chorus caused strange behavior on the mac.
*/
// read_audio_file reads the temporary file and returns the sr and otensor
sr = read_audio_file(tmp_name, otensor, ch_first, 0, 0,
Expand Down
11 changes: 9 additions & 2 deletions torchaudio/torch_sox.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ int read_audio_file(
/// writing, or an error ocurred during writing of the audio data.
void write_audio_file(
const std::string& file_name,
at::Tensor tensor,
at::Tensor& tensor,
sox_signalinfo_t* si,
sox_encodinginfo_t* ei,
const char* extension)
const char* file_type)

/// Reads an audio file from the given `path` and returns a tuple of
/// sox_signalinfo_t and sox_encodinginfo_t, which contain information about
Expand All @@ -46,6 +46,13 @@ std::vector<std::string> get_effect_names();
int initialize_sox();
int shutdown_sox();

// Struct for build_flow_effects function
struct SoxEffect {
SoxEffect() : ename(""), eopts({""}) { }
std::string ename;
std::vector<std::string> eopts;
};

/// Build a SoX chain, flow the effects, and capture the results in a tensor.
/// An audio file from the given `path` flows through an effects chain given
/// by a list of effects and effect options to an output buffer which is encoded
Expand Down
Loading

0 comments on commit 3bd4db8

Please sign in to comment.