refactoring and clearning up code

pytorch · Jan 4, 2019 · 3bd4db8 · 3bd4db8
1 parent 0e0d1e5
commit 3bd4db8
Show file tree

Hide file tree

Showing 14 changed files with 213 additions and 93 deletions.
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,88 @@
+---
+AccessModifierOffset: -1
+AlignAfterOpenBracket: AlwaysBreak
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: false
+BinPackParameters: false
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ]
+IncludeCategories:
+  - Regex:           '^<.*\.h(pp)?>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IndentCaseLabels: true
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 2000000
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        8
+UseTab:          Never
+...
diff --git a/.clang-tidy b/.clang-tidy
@@ -0,0 +1,33 @@
+---
+# NOTE there must be no spaces before the '-', so put the comma first.
+Checks: '
+  -*
+  ,bugprone-*
+  ,-bugprone-forward-declaration-namespace
+  ,-bugprone-macro-parentheses
+  ,cppcoreguidelines-*
+  ,-cppcoreguidelines-interfaces-global-init
+  ,-cppcoreguidelines-owning-memory
+  ,-cppcoreguidelines-pro-bounds-array-to-pointer-decay
+  ,-cppcoreguidelines-pro-bounds-constant-array-index
+  ,-cppcoreguidelines-pro-bounds-pointer-arithmetic
+  ,-cppcoreguidelines-pro-type-cstyle-cast
+  ,-cppcoreguidelines-pro-type-reinterpret-cast
+  ,-cppcoreguidelines-pro-type-static-cast-downcast
+  ,-cppcoreguidelines-pro-type-union-access
+  ,-cppcoreguidelines-pro-type-vararg
+  ,-cppcoreguidelines-special-member-functions
+  ,hicpp-exception-baseclass
+  ,hicpp-avoid-goto
+  ,modernize-*
+  ,-modernize-return-braced-init-list
+  ,-modernize-use-auto
+  ,-modernize-use-default-member-init
+  ,-modernize-use-using
+  ,performance-unnecessary-value-param
+  '
+WarningsAsErrors: '*'
+HeaderFilterRegex: 'torchaudio/.*'
+AnalyzeTemporaryDtors: false
+CheckOptions:
+...
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 120
+ignore = E305,E402,E721,E741,F401,F403,F405,F821,F841,F999,W503,W504
+exclude = build,docs/source,_ext
diff --git a/setup.py b/setup.py
@@ -1,8 +1,24 @@
 #!/usr/bin/env python
+import os
+import platform
 
 from setuptools import setup, find_packages
 from torch.utils.cpp_extension import BuildExtension, CppExtension
 
+
+def check_env_flag(name, default=''):
+    return os.getenv(name, default).upper() in set(['ON', '1', 'YES', 'TRUE', 'Y'])
+
+DEBUG = check_env_flag('DEBUG')
+eca = []
+ela = []
+if DEBUG:
+    if platform.system() == 'Windows':
+        ela += ['/DEBUG:FULL']
+    else:
+        eca += ['-O0', '-g']
+        ela += ['-O0', '-g']
+
 setup(
     name="torchaudio",
     version="0.2",
@@ -14,6 +30,10 @@
     packages=find_packages(exclude=["build"]),
     ext_modules=[
         CppExtension(
-            '_torch_sox', ['torchaudio/torch_sox.cpp'], libraries=['sox']),
+            '_torch_sox',
+            ['torchaudio/torch_sox.cpp'],
+            libraries=['sox'],
+            extra_compile_args=eca,
+            extra_link_args=ela),
     ],
     cmdclass={'build_ext': BuildExtension})
diff --git a/test/test.py b/test/test.py
@@ -27,7 +27,6 @@ def test_1_save(self):
         os.unlink(new_filepath)
 
         # test save 1d tensor
-        #x = x[:, 0]  # get mono signal
         x = x[0, :]  # get mono signal
         x.squeeze_()  # remove channel dim
         torchaudio.save(new_filepath, x, sr)
@@ -91,7 +90,7 @@ def test_2_load(self):
         offset = 15
         x, _ = torchaudio.load(self.test_filepath)
         x_offset, _ = torchaudio.load(self.test_filepath, offset=offset)
-        self.assertTrue(x[:,offset:].allclose(x_offset))
+        self.assertTrue(x[:, offset:].allclose(x_offset))
 
         # check number of frames
         n = 201
@@ -132,7 +131,7 @@ def test_4_load_partial(self):
         input_sine_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
         x_sine_full, sr_sine = torchaudio.load(input_sine_path)
         x_sine_part, _ = torchaudio.load(input_sine_path, num_frames=num_frames, offset=offset)
-        l1_error = x_sine_full[:, offset:(num_frames+offset)].sub(x_sine_part).abs().sum().item()
+        l1_error = x_sine_full[:, offset:(num_frames + offset)].sub(x_sine_part).abs().sum().item()
         # test for the correct number of samples and that the correct portion was loaded
         self.assertEqual(x_sine_part.size(1), num_frames)
         self.assertEqual(l1_error, 0.)
@@ -148,7 +147,7 @@ def test_4_load_partial(self):
         # test with two channel mp3
         x_2ch_full, sr_2ch = torchaudio.load(self.test_filepath, normalization=True)
         x_2ch_part, _ = torchaudio.load(self.test_filepath, normalization=True, num_frames=num_frames, offset=offset)
-        l1_error = x_2ch_full[:, offset:(offset+num_frames)].sub(x_2ch_part).abs().sum().item()
+        l1_error = x_2ch_full[:, offset:(offset + num_frames)].sub(x_2ch_part).abs().sum().item()
         self.assertEqual(x_2ch_part.size(1), num_frames)
         self.assertEqual(l1_error, 0.)
 

diff --git a/test/test_dataloader.py b/test/test_dataloader.py
@@ -30,13 +30,14 @@ def __getitem__(self, index):
     def __len__(self):
         return len(self.data)
 
+
 class Test_DataLoader(unittest.TestCase):
     def test_1(self):
         expected_size = (2, 1, 16000)
         ds = TORCHAUDIODS()
         dl = DataLoader(ds, batch_size=2)
         for x in dl:
-            #print(x.size())
+            # print(x.size())
             continue
 
         self.assertTrue(x.size() == expected_size)

diff --git a/test/test_legacy.py b/test/test_legacy.py
@@ -120,7 +120,7 @@ def test_load_partial(self):
         input_sine_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
         x_sine_full, sr_sine = load(input_sine_path)
         x_sine_part, _ = load(input_sine_path, num_frames=num_frames, offset=offset)
-        l1_error = x_sine_full[offset:(num_frames+offset)].sub(x_sine_part).abs().sum().item()
+        l1_error = x_sine_full[offset:(num_frames + offset)].sub(x_sine_part).abs().sum().item()
         # test for the correct number of samples and that the correct portion was loaded
         self.assertEqual(x_sine_part.size(0), num_frames)
         self.assertEqual(l1_error, 0.)
@@ -137,7 +137,7 @@ def test_load_partial(self):
         # test with two channel mp3
         x_2ch_full, sr_2ch = load(self.test_filepath, normalization=True)
         x_2ch_part, _ = load(self.test_filepath, normalization=True, num_frames=num_frames, offset=offset)
-        l1_error = x_2ch_full[offset:(offset+num_frames)].sub(x_2ch_part).abs().sum().item()
+        l1_error = x_2ch_full[offset:(offset + num_frames)].sub(x_2ch_part).abs().sum().item()
         self.assertEqual(x_2ch_part.size(0), num_frames)
         self.assertEqual(l1_error, 0.)
 

diff --git a/test/test_sox_effects.py b/test/test_sox_effects.py
@@ -17,7 +17,7 @@ def test_single_channel(self):
         E.append_effect_to_chain("echos", [0.8, 0.7, 40, 0.25, 63, 0.3])
         x, sr = E.sox_build_flow_effects()
         # check if effects worked
-        #print(x.size())
+        # print(x.size())
 
     def test_rate_channels(self):
         target_rate = 16000
@@ -154,7 +154,7 @@ def test_trim(self):
         E.append_effect_to_chain("trim", [offset, num_frames])
         x, sr = E.sox_build_flow_effects()
         # check if effect worked
-        self.assertTrue(x.allclose(x_orig[:,offset_int:(offset_int+num_frames_int)], rtol=1e-4, atol=1e-4))
+        self.assertTrue(x.allclose(x_orig[:, offset_int:(offset_int + num_frames_int)], rtol=1e-4, atol=1e-4))
 
     def test_silence_contrast(self):
         si, _ = torchaudio.info(self.test_filepath)
@@ -183,13 +183,14 @@ def test_compand_fade(self):
         E.append_effect_to_chain("fade", ["q", "0.25", "0", "0.33"])
         x, _ = E.sox_build_flow_effects()
         # check if effect worked
-        #print(x.size())
+        # print(x.size())
 
     def test_biquad_delay(self):
         si, _ = torchaudio.info(self.test_filepath)
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(self.test_filepath)
-        E.append_effect_to_chain("biquad", ["0.25136437", "0.50272873", "0.25136437", "1.0", "-0.17123075", "0.17668821"])
+        E.append_effect_to_chain("biquad", ["0.25136437", "0.50272873", "0.25136437",
+                                            "1.0", "-0.17123075", "0.17668821"])
         E.append_effect_to_chain("delay", ["15000s"])
         x, _ = E.sox_build_flow_effects()
         # check if effect worked

diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -38,9 +38,11 @@ def test_pad_trim(self):
         length_new = int(length_orig * 1.2)
 
         result = transforms.PadTrim(max_len=length_new, channels_first=False)(audio_orig)
-
         self.assertEqual(result.size(0), length_new)
 
+        result = transforms.PadTrim(max_len=length_new, channels_first=True)(audio_orig.transpose(0, 1))
+        self.assertEqual(result.size(1), length_new)
+
         audio_orig = self.sig.clone()
         length_orig = audio_orig.size(0)
         length_new = int(length_orig * 0.8)
@@ -147,7 +149,7 @@ def test_mel2(self):
         audio_orig = self.sig.clone()  # (16000, 1)
         audio_scaled = transforms.Scale()(audio_orig)  # (16000, 1)
         audio_scaled = transforms.LC2CL()(audio_scaled)  # (1, 16000)
-        spectrogram_torch = transforms.MEL2()(audio_scaled)  # (1, 319, 40)
+        spectrogram_torch = transforms.MEL2(window_fn=torch.hamming_window, pad=10)(audio_scaled)  # (1, 319, 40)
         self.assertTrue(spectrogram_torch.dim() == 3)
         self.assertTrue(spectrogram_torch.max() <= 0.)
 

diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py
@@ -44,7 +44,8 @@ def load(filepath,
         filetype (str, optional): a filetype or extension to be set if sox cannot determine it automatically
 
     Returns: tuple(Tensor, int)
-       - Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels
+       - Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and
+                 C is the number of channels
        - int: the sample rate of the audio (as listed in the metadata of the file)
 
     Example::
@@ -127,8 +128,7 @@ def save_encinfo(filepath,
         >>> torchaudio.save('foo.wav', data, sample_rate)
 
     """
-    ch_idx = 0 if channels_first else 1
-    len_idx = 1 if channels_first else 0
+    ch_idx, len_idx = (0, 1) if channels_first else (1, 0)
 
     # check if save directory exists
     abs_dirpath = os.path.dirname(os.path.abspath(filepath))

diff --git a/torchaudio/sox_effects.py b/torchaudio/sox_effects.py
@@ -44,7 +44,8 @@ class SoxEffectsChain(object):
         filetype (str, optional): a filetype or extension to be set if sox cannot determine it automatically
 
     Returns: tuple(Tensor, int)
-       - Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels
+       - Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and
+                 C is the number of channels
        - int: the sample rate of the audio (as listed in the metadata of the file)
 
     Example::

diff --git a/torchaudio/torch_sox.cpp b/torchaudio/torch_sox.cpp
@@ -158,7 +158,7 @@ int read_audio_file(
 
 void write_audio_file(
     const std::string& file_name,
-    at::Tensor tensor,
+    const at::Tensor& tensor,
     sox_signalinfo_t* si,
     sox_encodinginfo_t* ei,
     const char* file_type) {
@@ -332,16 +332,9 @@ int build_flow_effects(const std::string& file_name,
   int sr;
   // Read the in-memory audio buffer or temp file that we just wrote.
 #ifdef __APPLE__
-  /*  certain effects will result in a target signal length of 0.
-  if (target_signal->length > 0) {
-    if (target_signal->channels != output->signal.channels) {
-      std::cout << "output: " << output->signal.channels << "|" << output->signal.length << "\n";
-      std::cout << "interm: " << interm_signal.channels << "|" << interm_signal.length << "\n";
-      std::cout << "target: " << target_signal->channels << "|" << target_signal->length << "\n";
-      unlink(tmp_name);
-      throw std::runtime_error("unexpected number of audio channels");
-    }
-  }
+  /*
+     Temporary filetype must have a valid header.  Wav seems to work here while
+     raw does not.  Certain effects like chorus caused strange behavior on the mac.
   */
   // read_audio_file reads the temporary file and returns the sr and otensor
   sr = read_audio_file(tmp_name, otensor, ch_first, 0, 0,

diff --git a/torchaudio/torch_sox.h b/torchaudio/torch_sox.h
@@ -26,10 +26,10 @@ int read_audio_file(
 /// writing, or an error ocurred during writing of the audio data.
 void write_audio_file(
     const std::string& file_name,
-    at::Tensor tensor,
+    at::Tensor& tensor,
     sox_signalinfo_t* si,
     sox_encodinginfo_t* ei,
-    const char* extension)
+    const char* file_type)
 
 /// Reads an audio file from the given `path` and returns a tuple of
 /// sox_signalinfo_t and sox_encodinginfo_t, which contain information about
@@ -46,6 +46,13 @@ std::vector<std::string> get_effect_names();
 int initialize_sox();
 int shutdown_sox();
 
+// Struct for build_flow_effects function
+struct SoxEffect {
+  SoxEffect() : ename(""), eopts({""})  { }
+  std::string ename;
+  std::vector<std::string> eopts;
+};
+
 /// Build a SoX chain, flow the effects, and capture the results in a tensor.
 /// An audio file from the given `path` flows through an effects chain given
 /// by a list of effects and effect options to an output buffer which is encoded