Merge branch 'master' into r0.9

coqui-ai · Dec 9, 2020 · 056f5a4 · 056f5a4
2 parents 504e55b + 3e10163
commit 056f5a4
Show file tree

Hide file tree

Showing 56 changed files with 542 additions and 162 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -72,6 +72,30 @@ jobs:
               docker push "${DOCKERHUB_REPO}:${CIRCLE_TAG}"
             fi
 
+  lint:
+    docker:
+      - image: circleci/python:3.7.9
+    steps:
+      - checkout
+      - run:
+          name: Install dependencies
+          command: |
+            pip install --upgrade cardboardlint pylint
+      - run:
+          name: Run linter
+          command: |
+            set -ex
+            # Check if branch can be merged with master (if failing script will stop due to set -e)
+            git config user.email "[email protected]"
+            git config user.name "Your Name"
+            git merge --no-commit --no-ff origin/master
+
+            # Undo merge changes if any
+            git reset --hard $CIRCLE_BRANCH
+
+            # Lint differences against master
+            cardboardlinter --refspec origin/master -n auto;
+
 workflows:
   version: 2
   build-deploy:
@@ -87,3 +111,7 @@ workflows:
           filters:
             tags:
               only: /.*/
+
+  lint:
+    jobs:
+      - lint
diff --git a/.gitignore b/.gitignore
@@ -38,3 +38,4 @@ doc/xml-c
 doc/xml-java
 doc/xml-dotnet
 convert_graphdef_memmapped_format
+native_client/swift/deepspeech_ios.framework/deepspeech_ios
diff --git a/.travis.yml b/.travis.yml
diff --git a/BIBLIOGRAPHY.md b/BIBLIOGRAPHY.md
@@ -1,5 +1,5 @@
 This file contains a list of papers in chronological order that have been published 
-using Mozilla's DeepSpeech.
+using DeepSpeech.
 
 To appear
 ==========

diff --git a/CODE_OWNERS.rst b/CODE_OWNERS.rst
@@ -0,0 +1,108 @@
+DeepSpeech code owners
+======================
+
+This file describes reviewers who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping.
+
+There's overlap in the areas of expertise of each reviewer, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate question.
+
+Global reviewers
+----------------
+
+These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other reviewers, global reviewers will make a final decision.
+
+- Alexandre Lissy (@lissyx)
+- Reuben Morais (@reuben)
+
+Training, feeding
+-----------------
+
+- Reuben Morais (@reuben)
+
+Model exporting
+---------------
+
+- Alexandre Lissy (@lissyx)
+
+Transfer learning
+-----------------
+
+- Josh Meyer (@JRMeyer)
+- Reuben Morais (@reuben)
+
+Testing & CI
+------------
+
+- Alexandre Lissy (@lissyx)
+- Reuben Morais (@reuben)
+
+Native inference client
+-----------------------
+
+Everything that goes into libdeepspeech.so and is not specifically covered in another area fits here.
+
+- Alexandre Lissy (@lissyx)
+- Reuben Morais (@reuben)
+
+Streaming decoder
+-----------------
+
+- Reuben Morais (@reuben)
+- @dabinat
+
+Python bindings
+---------------
+
+- Alexandre Lissy (@lissyx)
+- Reuben Morais (@reuben)
+
+Java Bindings
+-------------
+
+- Alexandre Lissy (@lissyx)
+
+JavaScript/NodeJS/ElectronJS bindings
+-------------------------------------
+
+- Alexandre Lissy (@lissyx)
+- Reuben Morais (@reuben)
+
+.NET bindings
+-------------
+
+- Carlos Fonseca (@carlfm01)
+
+Swift bindings
+--------------
+
+- Reuben Morais (@reuben)
+
+Android support
+---------------
+
+- Alexandre Lissy (@lissyx)
+
+Raspberry Pi support
+--------------------
+
+- Alexandre Lissy (@lissyx)
+
+Windows support
+---------------
+
+- Carlos Fonseca (@carlfm01)
+
+iOS support
+-----------
+
+- Reuben Morais (@reuben)
+
+Documentation
+-------------
+
+- Alexandre Lissy (@lissyx)
+- Reuben Morais (@reuben)
+
+Third party bindings
+--------------------
+
+Hosted externally and owned by the individual authors. See the `list of third-party bindings <https://deepspeech.readthedocs.io/en/master/USING.html#third-party-bindings>`_ for more info.
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
@@ -33,6 +33,12 @@ Whenever you add a new feature to DeepSpeech and what to contribute that feature
 2. You've made changes to the Python code. Make sure you run a linter (described below).
 3. Make sure your new feature doesn't regress the project. If you've added a significant feature or amount of code, you want to be sure your new feature doesn't create performance issues. For example, if you've made a change to the DeepSpeech decoder, you should know that inference performance doesn't drop in terms of latency, accuracy, or memory usage. Unless you're proposing a new decoding algorithm, you probably don't have to worry about affecting accuracy. However, it's very possible you've affected latency or memory usage. You should run local performance tests to make sure no bugs have crept in. There are lots of tools to check latency and memory usage, and you should use what is most comfortable for you and gets the job done. If you're on Linux, you might find [[perf](https://perf.wiki.kernel.org/index.php/Main_Page)] to be a useful tool. You can use sample WAV files for testing which are provided in the `DeepSpeech/data/` directory.
 
+Requesting review on your PR
+----------------------------
+
+Generally, a code owner will be notified of your pull request and will either review it or ask some other code owner for their review. If you'd like to proactively request review as you open the PR, see the the CODE_OWNERS.rst file which describes who's an appropriate reviewer depending on which parts of the code you're changing.
+
+
 Python Linter
 -------------
 

diff --git a/bin/compare_samples.py b/bin/compare_samples.py
@@ -15,8 +15,8 @@ def fail(message):
 
 
 def compare_samples():
-    sample1 = load_sample(CLI_ARGS.sample1)
-    sample2 = load_sample(CLI_ARGS.sample2)
+    sample1 = load_sample(CLI_ARGS.sample1).unpack()
+    sample2 = load_sample(CLI_ARGS.sample2).unpack()
     if sample1.audio_format != sample2.audio_format:
         fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format))
     if sample1.duration != sample2.duration:

diff --git a/doc/Java-API.rst b/doc/Java-API.rst
@@ -4,26 +4,26 @@ Java
 DeepSpeechModel
 ---------------
 
-.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::DeepSpeechModel
+.. doxygenclass:: org::deepspeech::libdeepspeech::DeepSpeechModel
    :project: deepspeech-java
    :members:
 
 Metadata
 --------
 
-.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::Metadata
+.. doxygenclass:: org::deepspeech::libdeepspeech::Metadata
    :project: deepspeech-java
    :members: getNumTranscripts, getTranscript
 
 CandidateTranscript
 -------------------
 
-.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::CandidateTranscript
+.. doxygenclass:: org::deepspeech::libdeepspeech::CandidateTranscript
    :project: deepspeech-java
    :members: getNumTokens, getConfidence, getToken
 
 TokenMetadata
 -------------
-.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::TokenMetadata
+.. doxygenclass:: org::deepspeech::libdeepspeech::TokenMetadata
    :project: deepspeech-java
    :members: getText, getTimestep, getStartTime
diff --git a/doc/Java-Examples.rst b/doc/Java-Examples.rst
@@ -1,12 +1,12 @@
 Java API Usage example
 ======================
 
-Examples are from `native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java`.
+Examples are from `native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java`.
 
 Creating a model instance and loading model
 -------------------------------------------
 
-.. literalinclude:: ../native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java
+.. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java
    :language: java
    :linenos:
    :lineno-match:
@@ -16,7 +16,7 @@ Creating a model instance and loading model
 Performing inference
 --------------------
 
-.. literalinclude:: ../native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java
+.. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java
    :language: java
    :linenos:
    :lineno-match:
@@ -26,4 +26,4 @@ Performing inference
 Full source code
 ----------------
 
-See :download:`Full source code<../native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java>`.
+See :download:`Full source code<../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java>`.
diff --git a/doc/NodeJS-API.rst b/doc/NodeJS-API.rst
@@ -10,7 +10,7 @@ Model
 Stream
 ------
 
-.. js:autoclass:: Stream
+.. js:autoclass:: StreamImpl
    :members:
 
 Module exported methods

diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst
@@ -26,6 +26,8 @@ If you plan on committing code or you want to report bugs, please use the master
 Creating a virtual environment
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Throughout the documentation we assume you are using **virtualenv** to manage your Python environments. This setup is the one used and recommended by the project authors and is the easiest way to make sure you won't run into environment issues. If you're using **Anaconda, Miniconda or Mamba**, first read the instructions at :ref:`training-with-conda` and then continue from the installation step below.
+
 In creating a virtual environment you will create a directory containing a ``python3`` binary and everything needed to run deepspeech. You can use whatever directory you want. For the purpose of the documentation, we will rely on ``$HOME/tmp/deepspeech-train-venv``. You can create it using this command:
 
 .. code-block::
@@ -245,7 +247,7 @@ N.B. - If you have access to a pre-trained model which uses UTF-8 bytes at the o
 Fine-Tuning (same alphabet)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-If you'd like to use one of the pre-trained models released by Mozilla to bootstrap your training process (fine tuning), you can do so by using the ``--checkpoint_dir`` flag in ``DeepSpeech.py``. Specify the path where you downloaded the checkpoint from the release, and training will resume from the pre-trained model.
+If you'd like to use one of the pre-trained models to bootstrap your training process (fine tuning), you can do so by using the ``--checkpoint_dir`` flag in ``DeepSpeech.py``. Specify the path where you downloaded the checkpoint from the release, and training will resume from the pre-trained model.
 
 For example, if you want to fine tune the entire graph using your own data in ``my-train.csv``\ , ``my-dev.csv`` and ``my-test.csv``\ , for three epochs, you can something like the following, tuning the hyperparameters as needed:
 
@@ -527,3 +529,17 @@ Example of creating a pre-augmented test set:
           --augment overlay[source=noise.sdb,layers=1,snr=20~10] \
           --augment resample[rate=12000:8000~4000] \
           test.sdb test-augmented.sdb
+
+.. _training-with-conda:
+
+Training from an Anaconda or miniconda environment
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Keep in mind that none of the core authors use Anaconda or miniconda, so this setup is not guaranteed to work. If you experience problems, try using a non-conda setup first. We're happy to accept pull requests fixing any incompatibilities with conda setups, but we will not offer any support ourselves beyond reviewing pull requests.
+
+To prevent common problems, make sure you **always use a separate environment when setting things up for training**:
+
+.. code-block:: bash
+
+   (base) $ conda create -n deepspeech python=3.7
+   (base) $ conda activate deepspeech
diff --git a/doc/conf.py b/doc/conf.py
@@ -46,8 +46,8 @@
 # -- Project information -----------------------------------------------------
 
 project = u'DeepSpeech'
-copyright = '2019-2020, Mozilla Corporation'
-author = 'Mozilla Corporation'
+copyright = '2019-2020 Mozilla Corporation, 2020 DeepSpeech authors'
+author = 'DeepSpeech authors'
 
 with open('../VERSION', 'r') as ver:
     v = ver.read().strip()
@@ -175,7 +175,7 @@
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
     (master_doc, 'DeepSpeech.tex', u'DeepSpeech Documentation',
-     u'Mozilla Research', 'manual'),
+     u'DeepSpeech authors', 'manual'),
 ]
 
 

diff --git a/doc/doxygen-java.conf b/doc/doxygen-java.conf
@@ -790,7 +790,7 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/ native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/
+INPUT                  = native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/ native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses

diff --git a/doc/examples b/doc/examples
+16 −7		README.rst
+4 −5		android_mic_streaming/README.md
+1 −1		android_mic_streaming/app/build.gradle
+62 −82		android_mic_streaming/app/src/main/java/org/deepspeechdemo/MainActivity.kt
+157 −0		batch_processing/Readme.md
+83 −0		batch_processing/driver.py
+61 −0		batch_processing/requirements.txt
+5 −0		batch_processing/setup.ps1
+1 −0		batch_processing/test.ps1
+10 −0		batch_processing/test_tf.py
+95 −0		electron/.gitignore
+71 −0		electron/Readme.md
+16,338 −0		electron/package-lock.json
+98 −0		electron/package.json
+95 −0		electron/public/create-window.js
+22 −0		electron/public/download.js
+24 −0		electron/public/electron.js
+44 −0		electron/public/index.html
+25 −0		electron/public/manifest.json
+2 −0		electron/public/preload.js
+64 −0		electron/public/recognize-wav.js
+60 −0		electron/src/App.js
+5 −0		electron/src/index.js
+36 −0		electron/test.sh
+1 −1		ffmpeg_vad_streaming/README.MD
+1 −1		ffmpeg_vad_streaming/package.json
+6 −6		ffmpeg_vad_streaming/test.sh
+12 −10		mic_vad_streaming/README.rst
+2 −2		mic_vad_streaming/mic_vad_streaming.py
+1 −1		mic_vad_streaming/requirements.txt
+2 −2		mic_vad_streaming/test.sh
+1 −1		net_framework/DeepSpeechWPF/App.xaml.cs
+139 −139		net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj
+36 −0		nim_mic_vad_streaming/README.md
+70 −0		nim_mic_vad_streaming/linux_nim_vad_streaming/README.md
+190 −0		nim_mic_vad_streaming/linux_nim_vad_streaming/vad_stream.nim
+62 −0		nim_mic_vad_streaming/win_nim_vad_streaming/README.md
+160 −0		nim_mic_vad_streaming/win_nim_vad_streaming/vad_stream.nim
+4 −4		nodejs_mic_vad_streaming/Readme.md
+0 −731		nodejs_mic_vad_streaming/package-lock.json
+1 −1		nodejs_mic_vad_streaming/package.json
+9 −18		nodejs_mic_vad_streaming/start.js
+6 −6		nodejs_wav/Readme.md
+2 −2		nodejs_wav/index.js
+1 −1		nodejs_wav/package.json
+330 −0		uwp/.gitignore
+51 −0		uwp/DeepSpeechUWP.sln
+1 −0		uwp/DeepSpeechUWP/.gitignore
+7 −0		uwp/DeepSpeechUWP/App.xaml
+103 −0		uwp/DeepSpeechUWP/App.xaml.cs
+ −		uwp/DeepSpeechUWP/Assets/LockScreenLogo.scale-200.png
+ −		uwp/DeepSpeechUWP/Assets/SplashScreen.scale-200.png
+ −		uwp/DeepSpeechUWP/Assets/Square150x150Logo.scale-200.png
+ −		uwp/DeepSpeechUWP/Assets/Square44x44Logo.scale-200.png
+ −		uwp/DeepSpeechUWP/Assets/Square44x44Logo.targetsize-24_altform-unplated.png
+ −		uwp/DeepSpeechUWP/Assets/StoreLogo.png
+ −		uwp/DeepSpeechUWP/Assets/Wide310x150Logo.scale-200.png
+190 −0		uwp/DeepSpeechUWP/DeepSpeechUWP.csproj
+51 −0		uwp/DeepSpeechUWP/MainPage.xaml
+328 −0		uwp/DeepSpeechUWP/MainPage.xaml.cs
+50 −0		uwp/DeepSpeechUWP/Package.appxmanifest
+29 −0		uwp/DeepSpeechUWP/Properties/AssemblyInfo.cs
+31 −0		uwp/DeepSpeechUWP/Properties/Default.rd.xml
+0 −0		uwp/DeepSpeechUWP/models/.gitkeep
+1 −1		vad_transcriber/README.md
+1 −1		vad_transcriber/requirements.txt
+2 −2		vad_transcriber/wavTranscriber.py
+2 −2		web_microphone_websocket/Readme.md
+2 −2		web_microphone_websocket/package.json
+9 −18		web_microphone_websocket/server.js
+1 −1		web_microphone_websocket/test.sh
+0 −11,345		web_microphone_websocket/yarn.lock