From 9a92fa40cae1bc16cb2fabf5d0f513d0fc00f94e Mon Sep 17 00:00:00 2001 From: Catalin Voss Date: Mon, 2 Nov 2020 21:09:35 -0800 Subject: [PATCH 01/62] Make variables consistent --- training/deepspeech_training/train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 5d4d7a0dd..ded45d2d7 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -900,21 +900,21 @@ def do_single_file_inference(input_file_path): features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) - logits = outputs['outputs'].eval(feed_dict={ + probs = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, inputs['previous_state_c']: previous_state_c, inputs['previous_state_h']: previous_state_h, }, session=session) - logits = np.squeeze(logits) + probs = np.squeeze(probs) if FLAGS.scorer_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.scorer_path, Config.alphabet) else: scorer = None - decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, + decoded = ctc_beam_search_decoder(probs, Config.alphabet, FLAGS.beam_width, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) # Print highest probability result From 1cd5e44a52c8ef05879fcc1d1b2334d706d39f37 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 3 Nov 2020 13:32:06 +0100 Subject: [PATCH 02/62] Force npm install on RTD and set appropriate PATH value --- doc/conf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index bb64d77e2..a74fff706 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -30,7 +30,11 @@ # on our CI as in RTD to avoid regressions on RTD that we would not catch on # TaskCluster import subprocess +parent = subprocess.check_output("cd ../ && pwd", shell=True).decode().strip() +os.environ["PATH"] = os.path.join(parent, 'node_modules', '.bin') + ':' + os.environ["PATH"] subprocess.check_call('cd ../ && npm install typedoc@0.17.4 typescript@3.8.3 @types/node@13.9.x', shell=True) +subprocess.check_call('env', shell=True) +subprocess.check_call('which typedoc', shell=True) subprocess.check_call('cd ../ && doxygen doc/doxygen-c.conf', shell=True) subprocess.check_call('cd ../ && doxygen doc/doxygen-java.conf', shell=True) subprocess.check_call('cd ../ && doxygen doc/doxygen-dotnet.conf', shell=True) From 98e75c3c0370effbcdff7cdd7560ad2cbec3f105 Mon Sep 17 00:00:00 2001 From: Catalin Voss Date: Tue, 3 Nov 2020 09:49:27 -0800 Subject: [PATCH 03/62] Call the logits probs in `create_inference_graph` after they go thru softmax --- training/deepspeech_training/train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index ded45d2d7..8bf7a3545 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -730,7 +730,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): logits = tf.squeeze(logits, [1]) # Apply softmax for CTC decoder - logits = tf.nn.softmax(logits, name='logits') + probs = tf.nn.softmax(logits, name='logits') if batch_size <= 0: if tflite: @@ -743,7 +743,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): 'input_lengths': seq_length, }, { - 'outputs': logits, + 'outputs': probs, }, layers ) @@ -763,7 +763,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): inputs['input_lengths'] = seq_length outputs = { - 'outputs': logits, + 'outputs': probs, 'new_state_c': new_state_c, 'new_state_h': new_state_h, 'mfccs': mfccs, From 3a2879933f932f4c9bb53d0f0781035f58567652 Mon Sep 17 00:00:00 2001 From: dag7dev <44711271+dag7dev@users.noreply.github.com> Date: Wed, 4 Nov 2020 15:52:43 +0100 Subject: [PATCH 04/62] initial commit for py39 support --- native_client/ctcdecode/Makefile | 4 ++-- native_client/python/Makefile | 2 +- taskcluster/.shared.yml | 12 ++++++------ ...Y38 => examples-mic_vad_streaming-py38.yml} | 1 + ...treaming-py39.yml.DISABLED_UNTIL_SCIPY_PY39 | 14 ++++++++++++++ taskcluster/examples-vad_transcriber-py35.yml | 2 +- taskcluster/examples-vad_transcriber-py36.yml | 2 +- taskcluster/examples-vad_transcriber-py37.yml | 2 +- taskcluster/examples-vad_transcriber-py38.yml | 2 +- taskcluster/examples-vad_transcriber-py39.yml | 11 +++++++++++ taskcluster/tc-all-vars.sh | 4 ++-- taskcluster/tc-py-utils.sh | 18 +++++++++++++++--- .../test-python_39-darwin-amd64-opt.yml | 12 ++++++++++++ taskcluster/test-python_39-win-amd64-opt.yml | 14 ++++++++++++++ taskcluster/test-python_39-win-cuda-opt.yml | 14 ++++++++++++++ .../test-python_39_16k-linux-amd64-opt.yml | 12 ++++++++++++ ...hon_39_16k-linux-amd64-prod_pbmodel-opt.yml | 10 ++++++++++ .../test-python_39_8k-linux-amd64-opt.yml | 12 ++++++++++++ ...thon_39_8k-linux-amd64-prod_pbmodel-opt.yml | 10 ++++++++++ ...t-python_39_tflite_16k-darwin-amd64-opt.yml | 12 ++++++++++++ ...hon_39_tflite_16k-darwin-amd64-prod-opt.yml | 12 ++++++++++++ ...st-python_39_tflite_16k-linux-amd64-opt.yml | 12 ++++++++++++ ...thon_39_tflite_16k-linux-amd64-prod-opt.yml | 10 ++++++++++ ...test-python_39_tflite_16k-win-amd64-opt.yml | 14 ++++++++++++++ ...python_39_tflite_16k-win-amd64-prod-opt.yml | 14 ++++++++++++++ ...ython_39_tflite_8k-linux-amd64-prod-opt.yml | 10 ++++++++++ 26 files changed, 224 insertions(+), 18 deletions(-) rename taskcluster/{examples-mic_vad_streaming-py38.yml.DISABLED_UNTIL_SCIPY_PY38 => examples-mic_vad_streaming-py38.yml} (92%) create mode 100644 taskcluster/examples-mic_vad_streaming-py39.yml.DISABLED_UNTIL_SCIPY_PY39 create mode 100644 taskcluster/examples-vad_transcriber-py39.yml create mode 100644 taskcluster/test-python_39-darwin-amd64-opt.yml create mode 100644 taskcluster/test-python_39-win-amd64-opt.yml create mode 100644 taskcluster/test-python_39-win-cuda-opt.yml create mode 100644 taskcluster/test-python_39_16k-linux-amd64-opt.yml create mode 100644 taskcluster/test-python_39_16k-linux-amd64-prod_pbmodel-opt.yml create mode 100644 taskcluster/test-python_39_8k-linux-amd64-opt.yml create mode 100644 taskcluster/test-python_39_8k-linux-amd64-prod_pbmodel-opt.yml create mode 100644 taskcluster/test-python_39_tflite_16k-darwin-amd64-opt.yml create mode 100644 taskcluster/test-python_39_tflite_16k-darwin-amd64-prod-opt.yml create mode 100644 taskcluster/test-python_39_tflite_16k-linux-amd64-opt.yml create mode 100644 taskcluster/test-python_39_tflite_16k-linux-amd64-prod-opt.yml create mode 100644 taskcluster/test-python_39_tflite_16k-win-amd64-opt.yml create mode 100644 taskcluster/test-python_39_tflite_16k-win-amd64-prod-opt.yml create mode 100644 taskcluster/test-python_39_tflite_8k-linux-amd64-prod-opt.yml diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile index 8bff277b3..87859af14 100644 --- a/native_client/ctcdecode/Makefile +++ b/native_client/ctcdecode/Makefile @@ -44,14 +44,14 @@ workspace_status.cc: # Enforce PATH here because swig calls from build_ext looses track of some # variables over several runs bindings: clean-keep-third-party workspace_status.cc ds-swig - pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 + pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==45.0.0 DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) find temp_build -type f -name "*.o" -delete DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) rm -rf temp_build bindings-debug: clean-keep-third-party workspace_status.cc ds-swig - pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 + pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==45.0.0 DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) $(GENERATE_DEBUG_SYMS) find temp_build -type f -name "*.o" -delete diff --git a/native_client/python/Makefile b/native_client/python/Makefile index 7f948649a..10924654f 100644 --- a/native_client/python/Makefile +++ b/native_client/python/Makefile @@ -9,7 +9,7 @@ bindings-clean: # Enforce PATH here because swig calls from build_ext looses track of some # variables over several runs bindings-build: ds-swig - pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 + pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==45.0.0 DISTUTILS_USE_SDK=1 PATH=$(TOOLCHAIN):$(DS_SWIG_BIN_PATH):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME) MANIFEST.in: bindings-build diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index cc8651c04..5663ef717 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -137,14 +137,14 @@ system: namespace: 'project.deepspeech.gradle.7' pyenv: linux: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.9/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.linux.9' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.18/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.linux.18' osx: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.9/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.osx.9' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.18/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.osx.18' win: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.8/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.win.8' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.18/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.win.18' kenlm: android_arm64_cpu: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.android-arm64/artifacts/public/kenlm.tar.gz" diff --git a/taskcluster/examples-mic_vad_streaming-py38.yml.DISABLED_UNTIL_SCIPY_PY38 b/taskcluster/examples-mic_vad_streaming-py38.yml similarity index 92% rename from taskcluster/examples-mic_vad_streaming-py38.yml.DISABLED_UNTIL_SCIPY_PY38 rename to taskcluster/examples-mic_vad_streaming-py38.yml index 950de3533..e1f53fd05 100644 --- a/taskcluster/examples-mic_vad_streaming-py38.yml.DISABLED_UNTIL_SCIPY_PY38 +++ b/taskcluster/examples-mic_vad_streaming-py38.yml @@ -8,6 +8,7 @@ build: apt-get -qq -y install portaudio19-dev pulseaudio args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/examples/mic_vad_streaming/test.sh 3.8.0:" + workerType: "${docker.dsTests}" metadata: name: "DeepSpeech examples: mic VAD streaming Py3.8" description: "DeepSpeech examples: mic VAD streaming Python 3.8" diff --git a/taskcluster/examples-mic_vad_streaming-py39.yml.DISABLED_UNTIL_SCIPY_PY39 b/taskcluster/examples-mic_vad_streaming-py39.yml.DISABLED_UNTIL_SCIPY_PY39 new file mode 100644 index 000000000..ff78555b9 --- /dev/null +++ b/taskcluster/examples-mic_vad_streaming-py39.yml.DISABLED_UNTIL_SCIPY_PY39 @@ -0,0 +1,14 @@ +build: + template_file: examples-base.tyml + docker_image: "python:3.9" + dependencies: + - "linux-amd64-cpu-opt" + system_setup: + > + apt-get -qq -y install portaudio19-dev pulseaudio + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/examples/mic_vad_streaming/test.sh 3.9.0:" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech examples: mic VAD streaming Py3.9" + description: "DeepSpeech examples: mic VAD streaming Python 3.9" diff --git a/taskcluster/examples-vad_transcriber-py35.yml b/taskcluster/examples-vad_transcriber-py35.yml index ebf891af8..dbd4ef120 100644 --- a/taskcluster/examples-vad_transcriber-py35.yml +++ b/taskcluster/examples-vad_transcriber-py35.yml @@ -8,4 +8,4 @@ build: workerType: "${docker.dsTests}" metadata: name: "DeepSpeech examples: VAD transcriber Py3.5" - description: "DeepSpeech examples: VAD transcriberaming Python 3.5" + description: "DeepSpeech examples: VAD transcriber streaming Python 3.5" diff --git a/taskcluster/examples-vad_transcriber-py36.yml b/taskcluster/examples-vad_transcriber-py36.yml index a7cdfd480..15df5ddeb 100644 --- a/taskcluster/examples-vad_transcriber-py36.yml +++ b/taskcluster/examples-vad_transcriber-py36.yml @@ -8,4 +8,4 @@ build: workerType: "${docker.dsTests}" metadata: name: "DeepSpeech examples: VAD transcriber Py3.6" - description: "DeepSpeech examples: VAD transcriberaming Python 3.6" + description: "DeepSpeech examples: VAD transcriber streaming Python 3.6" diff --git a/taskcluster/examples-vad_transcriber-py37.yml b/taskcluster/examples-vad_transcriber-py37.yml index c5c4eecab..1b2bcc3c1 100644 --- a/taskcluster/examples-vad_transcriber-py37.yml +++ b/taskcluster/examples-vad_transcriber-py37.yml @@ -8,4 +8,4 @@ build: workerType: "${docker.dsTests}" metadata: name: "DeepSpeech examples: VAD transcriber Py3.7" - description: "DeepSpeech examples: VAD transcriberaming Python 3.7" + description: "DeepSpeech examples: VAD transcriber streaming Python 3.7" diff --git a/taskcluster/examples-vad_transcriber-py38.yml b/taskcluster/examples-vad_transcriber-py38.yml index 5542ef439..b04358f9a 100644 --- a/taskcluster/examples-vad_transcriber-py38.yml +++ b/taskcluster/examples-vad_transcriber-py38.yml @@ -8,4 +8,4 @@ build: workerType: "${docker.dsTests}" metadata: name: "DeepSpeech examples: VAD transcriber Py3.8" - description: "DeepSpeech examples: VAD transcriberaming Python 3.8" + description: "DeepSpeech examples: VAD transcriber streaming Python 3.8" diff --git a/taskcluster/examples-vad_transcriber-py39.yml b/taskcluster/examples-vad_transcriber-py39.yml new file mode 100644 index 000000000..502629351 --- /dev/null +++ b/taskcluster/examples-vad_transcriber-py39.yml @@ -0,0 +1,11 @@ +build: + template_file: examples-base.tyml + docker_image: "python:3.9" + dependencies: + - "linux-amd64-cpu-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/examples/vad_transcriber/test.sh 3.9.0:" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech examples: VAD transcriber Py3.9" + description: "DeepSpeech examples: VAD transcriber streaming Python 3.9" diff --git a/taskcluster/tc-all-vars.sh b/taskcluster/tc-all-vars.sh index dd204dbf5..432bf7e49 100755 --- a/taskcluster/tc-all-vars.sh +++ b/taskcluster/tc-all-vars.sh @@ -16,7 +16,7 @@ if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then export DS_CPU_COUNT=$(nproc) # Those are the versions available on NuGet.org - export SUPPORTED_PYTHON_VERSIONS="3.5.4:ucs2 3.6.8:ucs2 3.7.6:ucs2 3.8.1:ucs2" + export SUPPORTED_PYTHON_VERSIONS="3.5.4:ucs2 3.6.8:ucs2 3.7.6:ucs2 3.8.1:ucs2 3.9.0:ucs2" fi; if [ "${OS}" = "Darwin" ]; then @@ -79,7 +79,7 @@ model_source_mmap="$(dirname "${model_source}")/${model_name_mmap}" ldc93s1_sample_filename='' -SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-3.5.8:ucs2 3.6.10:ucs2 3.7.6:ucs2 3.8.1:ucs2} +SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-3.5.8:ucs2 3.6.10:ucs2 3.7.6:ucs2 3.8.1:ucs2 3.9.0:ucs2} # When updating NodeJS / ElectronJS supported versions, do not forget to increment # deepspeech.node-gyp-cache. in both `system.node_gyp_cache` (taskcluster/.shared.yml) diff --git a/taskcluster/tc-py-utils.sh b/taskcluster/tc-py-utils.sh index 6d573c207..13f92ea72 100755 --- a/taskcluster/tc-py-utils.sh +++ b/taskcluster/tc-py-utils.sh @@ -24,7 +24,7 @@ install_pyenv() fi pushd ${PYENV_ROOT} - git checkout --quiet 20a1f0cd7a3d2f95800d8e0d5863b4e98f25f4df + git checkout --quiet 806b30d6ce5b263a765648fbcdd68266833b7289 popd if [ ! -d "${PYENV_ROOT}/plugins/pyenv-alias" ]; then @@ -213,6 +213,10 @@ maybe_numpy_min_version() export NUMPY_BUILD_VERSION="==1.17.3" export NUMPY_DEP_VERSION=">=1.17.3" ;; + 3.9*) + export NUMPY_BUILD_VERSION="==1.19.4" + export NUMPY_DEP_VERSION=">=1.19.4" + ;; esac ;; @@ -230,6 +234,10 @@ maybe_numpy_min_version() export NUMPY_BUILD_VERSION="==1.17.3" export NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3" ;; + 3.9*) + export NUMPY_BUILD_VERSION="==1.19.4" + export NUMPY_DEP_VERSION=">=1.19.4" + ;; esac ;; @@ -251,6 +259,10 @@ maybe_numpy_min_version() export NUMPY_BUILD_VERSION="==1.17.3" export NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3" ;; + 3.9*) + export NUMPY_BUILD_VERSION="==1.19.4" + export NUMPY_DEP_VERSION=">=1.19.4" + ;; esac ;; @@ -310,7 +322,7 @@ extract_python_versions() local _pyver=$(echo "${_pyver_full}" | cut -d':' -f1) - # 3.8.x => 38 + # 3.8.x => 38 / 3.9.x => 39 local _pyver_pkg=$(echo "${_pyver}" | cut -d'.' -f1,2 | tr -d '.') # https://www.python.org/dev/peps/pep-3149/#proposal @@ -321,7 +333,7 @@ extract_python_versions() local _pyconf="ucs2" elif [ "${_py_unicode_type}" = "mu" ]; then local _pyconf="ucs4" - elif [ "${_py_unicode_type}" = "" ]; then # valid for Python 3.8 + elif [ "${_py_unicode_type}" = "" ]; then # valid for Python 3.8 and 3.9 local _pyconf="ucs2" fi; diff --git a/taskcluster/test-python_39-darwin-amd64-opt.yml b/taskcluster/test-python_39-darwin-amd64-opt.yml new file mode 100644 index 000000000..8c15e08f7 --- /dev/null +++ b/taskcluster/test-python_39-darwin-amd64-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-darwin-opt-base.tyml + dependencies: + - "darwin-amd64-cpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + - "homebrew_tests-darwin-amd64" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + args: + tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tc-python-tests.sh 3.9.0: 16k" + metadata: + name: "DeepSpeech OSX AMD64 CPU Python v3.9 tests" + description: "Testing DeepSpeech for OSX/AMD64 on Python v3.9.0:m, CPU only, optimized version" diff --git a/taskcluster/test-python_39-win-amd64-opt.yml b/taskcluster/test-python_39-win-amd64-opt.yml new file mode 100644 index 000000000..81c9c115f --- /dev/null +++ b/taskcluster/test-python_39-win-amd64-opt.yml @@ -0,0 +1,14 @@ +build: + template_file: test-win-opt-base.tyml + dependencies: + - "win-amd64-cpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${system.sox_win} + args: + tests_cmdline: "${system.homedir.win}/DeepSpeech/ds/taskcluster/tc-python-tests.sh 3.9.0: 16k" + metadata: + name: "DeepSpeech Windows AMD64 CPU Python v3.9 tests" + description: "Testing DeepSpeech for Windows/AMD64 on Python v3.9, CPU only, optimized version" diff --git a/taskcluster/test-python_39-win-cuda-opt.yml b/taskcluster/test-python_39-win-cuda-opt.yml new file mode 100644 index 000000000..9ed092419 --- /dev/null +++ b/taskcluster/test-python_39-win-cuda-opt.yml @@ -0,0 +1,14 @@ +build: + template_file: test-win-cuda-opt-base.tyml + dependencies: + - "win-amd64-gpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${system.sox_win} + args: + tests_cmdline: "${system.homedir.win}/DeepSpeech/ds/taskcluster/tc-python-tests.sh 3.9.0: 16k cuda" + metadata: + name: "DeepSpeech Windows AMD64 CUDA Python v3.9 tests" + description: "Testing DeepSpeech for Windows/AMD64 on Python v3.9, CUDA, optimized version" diff --git a/taskcluster/test-python_39_16k-linux-amd64-opt.yml b/taskcluster/test-python_39_16k-linux-amd64-opt.yml new file mode 100644 index 000000000..65da9b25a --- /dev/null +++ b/taskcluster/test-python_39_16k-linux-amd64-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-cpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-python-tests.sh 3.9.0: 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU Python v3.9 tests (16kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on Python v3.9, CPU only, optimized version (16kHz)" diff --git a/taskcluster/test-python_39_16k-linux-amd64-prod_pbmodel-opt.yml b/taskcluster/test-python_39_16k-linux-amd64-prod_pbmodel-opt.yml new file mode 100644 index 000000000..31d714cd0 --- /dev/null +++ b/taskcluster/test-python_39_16k-linux-amd64-prod_pbmodel-opt.yml @@ -0,0 +1,10 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-cpu-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-python-tests-prod.sh 3.9.0: 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU Python v3.9 prod tests (16kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on Python v3.9 on prod model, CPU only, optimized version (16kHz)" diff --git a/taskcluster/test-python_39_8k-linux-amd64-opt.yml b/taskcluster/test-python_39_8k-linux-amd64-opt.yml new file mode 100644 index 000000000..79fd06ad7 --- /dev/null +++ b/taskcluster/test-python_39_8k-linux-amd64-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-cpu-opt" + - "test-training_8k-linux-amd64-py36m-opt" + test_model_task: "test-training_8k-linux-amd64-py36m-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-python-tests.sh 3.9.0: 8k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU Python v3.9 tests (8kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on Python v3.9, CPU only, optimized version (8kHz)" diff --git a/taskcluster/test-python_39_8k-linux-amd64-prod_pbmodel-opt.yml b/taskcluster/test-python_39_8k-linux-amd64-prod_pbmodel-opt.yml new file mode 100644 index 000000000..27ad17b1e --- /dev/null +++ b/taskcluster/test-python_39_8k-linux-amd64-prod_pbmodel-opt.yml @@ -0,0 +1,10 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-cpu-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-python-tests-prod.sh 3.9.0: 8k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU Python v3.9 prod tests (8kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on Python v3.9 on prod model, CPU only, optimized version (8kHz)" diff --git a/taskcluster/test-python_39_tflite_16k-darwin-amd64-opt.yml b/taskcluster/test-python_39_tflite_16k-darwin-amd64-opt.yml new file mode 100644 index 000000000..3b604919e --- /dev/null +++ b/taskcluster/test-python_39_tflite_16k-darwin-amd64-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-darwin-opt-base.tyml + dependencies: + - "darwin-amd64-tflite-opt" + - "test-training_16k-linux-amd64-py36m-opt" + - "homebrew_tests-darwin-amd64" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + args: + tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tc-python_tflite-tests.sh 3.9.0: 16k" + metadata: + name: "DeepSpeech OSX AMD64 TFLite Python v3.9 tests (16kHz)" + description: "Testing DeepSpeech for OSX/AMD64 on Python v3.9 TFLite, optimized version (16kHz)" diff --git a/taskcluster/test-python_39_tflite_16k-darwin-amd64-prod-opt.yml b/taskcluster/test-python_39_tflite_16k-darwin-amd64-prod-opt.yml new file mode 100644 index 000000000..fd62eadb3 --- /dev/null +++ b/taskcluster/test-python_39_tflite_16k-darwin-amd64-prod-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-darwin-opt-base.tyml + dependencies: + - "darwin-amd64-tflite-opt" + - "test-training_16k-linux-amd64-py36m-opt" + - "homebrew_tests-darwin-amd64" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + args: + tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tc-python_tflite-tests-prod.sh 3.9.0: 16k" + metadata: + name: "DeepSpeech OSX AMD64 TFLite Python v3.9 prod tests (16kHz)" + description: "Testing DeepSpeech for OSX/AMD64 on Python v3.9 on prod model, TFLite, optimized version (16kHz)" diff --git a/taskcluster/test-python_39_tflite_16k-linux-amd64-opt.yml b/taskcluster/test-python_39_tflite_16k-linux-amd64-opt.yml new file mode 100644 index 000000000..e26151429 --- /dev/null +++ b/taskcluster/test-python_39_tflite_16k-linux-amd64-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-tflite-opt" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-python_tflite-tests.sh 3.9.0: 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 TFLite Python v3.9 tests (16kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on Python v3.9 TFLite, optimized version (16kHz)" diff --git a/taskcluster/test-python_39_tflite_16k-linux-amd64-prod-opt.yml b/taskcluster/test-python_39_tflite_16k-linux-amd64-prod-opt.yml new file mode 100644 index 000000000..d33a77f02 --- /dev/null +++ b/taskcluster/test-python_39_tflite_16k-linux-amd64-prod-opt.yml @@ -0,0 +1,10 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-tflite-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-python_tflite-tests-prod.sh 3.9.0: 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 TFLite Python v3.9 prod tests (16kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on Python v3.9 on prod model, TFLite, optimized version (16kHz)" diff --git a/taskcluster/test-python_39_tflite_16k-win-amd64-opt.yml b/taskcluster/test-python_39_tflite_16k-win-amd64-opt.yml new file mode 100644 index 000000000..22183b8be --- /dev/null +++ b/taskcluster/test-python_39_tflite_16k-win-amd64-opt.yml @@ -0,0 +1,14 @@ +build: + template_file: test-win-opt-base.tyml + dependencies: + - "win-amd64-tflite-opt" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${system.sox_win} + args: + tests_cmdline: "${system.homedir.win}/DeepSpeech/ds/taskcluster/tc-python_tflite-tests.sh 3.9.0: 16k" + metadata: + name: "DeepSpeech Windows AMD64 TFLite Python v3.9 tests (16kHz)" + description: "Testing DeepSpeech for Windows/AMD64 on Python v3.9 TFLite, optimized version (16kHz)" diff --git a/taskcluster/test-python_39_tflite_16k-win-amd64-prod-opt.yml b/taskcluster/test-python_39_tflite_16k-win-amd64-prod-opt.yml new file mode 100644 index 000000000..9bdb76f0e --- /dev/null +++ b/taskcluster/test-python_39_tflite_16k-win-amd64-prod-opt.yml @@ -0,0 +1,14 @@ +build: + template_file: test-win-opt-base.tyml + dependencies: + - "win-amd64-tflite-opt" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${system.sox_win} + args: + tests_cmdline: "${system.homedir.win}/DeepSpeech/ds/taskcluster/tc-python_tflite-tests-prod.sh 3.9.0: 16k" + metadata: + name: "DeepSpeech Windows AMD64 TFLite Python v3.9 prod tests (16kHz)" + description: "Testing DeepSpeech for Windows/AMD64 on Python v3.9 on prod model, TFLite, optimized version (16kHz)" diff --git a/taskcluster/test-python_39_tflite_8k-linux-amd64-prod-opt.yml b/taskcluster/test-python_39_tflite_8k-linux-amd64-prod-opt.yml new file mode 100644 index 000000000..9264eab1a --- /dev/null +++ b/taskcluster/test-python_39_tflite_8k-linux-amd64-prod-opt.yml @@ -0,0 +1,10 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-tflite-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-python_tflite-tests-prod.sh 3.9.0: 8k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 TFLite Python v3.9 prod tests (8kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on Python v3.9 on prod model, TFLite, optimized version (8kHz)" From 53e3f5374fad861585d5877823533af7d024dd22 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 10:44:19 -0800 Subject: [PATCH 05/62] Add I/O helpers for remote file access --- training/deepspeech_training/util/io.py | 76 +++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 training/deepspeech_training/util/io.py diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py new file mode 100644 index 000000000..4801c0756 --- /dev/null +++ b/training/deepspeech_training/util/io.py @@ -0,0 +1,76 @@ +""" +A set of I/O utils that allow us to open files on remote storage as if they were present locally. +Currently only includes wrappers for Google's GCS, but this can easily be expanded for AWS S3 buckets. +""" +import inspect +import os +import sys + +def path_exists_remote(path): + """ + Wrapper that allows existance check of local and remote paths like + `gs://...` + """ + # Conditional import + if path.startswith("gs://"): + from tensorflow.io import gfile + return gfile.exists(path) + return path_exists_remotes(path) + + +def open_remote(path, mode): + """ + Wrapper around open_remote() method that can handle remote paths like `gs://...` + off Google Cloud using Tensorflow's IO helpers. + + This enables us to do: + with open_remote('gs://.....', mode='w+') as f: + do something with the file f, whether or not we have local access to it + """ + # Conditional import + if path.startswith("gs://"): + from tensorflow.io import gfile + return gfile.GFile(path, mode=mode) + return open_remote(path, mode) + + +def isdir_remote(path): + """ + Wrapper to check if remote and local paths are directories + """ + # Conditional import + if path.startswith("gs://"): + from tensorflow.io import gfile + return gfile.isdir(path) + return isdir_remote(path) + + +def listdir_remote(path): + """ + Wrapper to list paths in local dirs (alternative to using a glob, I suppose) + """ + # Conditional import + if path.startswith("gs://"): + from tensorflow.io import gfile + return gfile.listdir(path) + return os.listdir(path) + + +def glob_remote(filename): + """ + Wrapper that provides globs on local and remote paths like `gs://...` + """ + # Conditional import + from tensorflow.io import gfile + + return gfile.glob(filename) + + +def remove_remote(filename): + """ + Wrapper that can remove_remote local and remote files like `gs://...` + """ + # Conditional import + from tensorflow.io import gfile + + return gfile.remove_remote(filename) \ No newline at end of file From 579921cc9250e86e4aee566df8898e10fffad67b Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 10:45:35 -0800 Subject: [PATCH 06/62] Work remote I/O into train script --- training/deepspeech_training/train.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 8bf7a3545..d94e8a451 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -35,6 +35,7 @@ from .util.flags import create_flags, FLAGS from .util.helpers import check_ctcdecoder_version, ExceptionBox from .util.logging import create_progressbar, log_debug, log_error, log_info, log_progress, log_warn +from .util.io import open_remote, remove_remote, listdir_remote check_ctcdecoder_version() @@ -514,7 +515,7 @@ def train(): # Save flags next to checkpoints os.makedirs(FLAGS.save_checkpoint_dir, exist_ok=True) flags_file = os.path.join(FLAGS.save_checkpoint_dir, 'flags.txt') - with open(flags_file, 'w') as fout: + with open_remote(flags_file, 'w') as fout: fout.write(FLAGS.flags_into_string()) with tfv1.Session(config=Config.session_config) as session: @@ -541,7 +542,7 @@ def run_set(set_name, epoch, init_op, dataset=None): feature_cache_index = FLAGS.feature_cache + '.index' if epoch % FLAGS.cache_for_epochs == 0 and os.path.isfile(feature_cache_index): log_info('Invalidating feature cache') - os.remove(feature_cache_index) # this will let TF also overwrite the related cache data files + remove_remote(feature_cache_index) # this will let TF also overwrite the related cache data files # Setup progress bar class LossWidget(progressbar.widgets.FormatLabel): @@ -773,7 +774,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): def file_relative_read(fname): - return open(os.path.join(os.path.dirname(__file__), fname)).read() + return open_remote(os.path.join(os.path.dirname(__file__), fname)).read() def export(): @@ -809,14 +810,14 @@ def export(): load_graph_for_evaluation(session) output_filename = FLAGS.export_file_name + '.pb' - if FLAGS.remove_export: - if os.path.isdir(FLAGS.export_dir): + if FLAGS.remove_remote_export: + if isdir_remote(FLAGS.export_dir): log_info('Removing old export') shutil.rmtree(FLAGS.export_dir) output_graph_path = os.path.join(FLAGS.export_dir, output_filename) - if not os.path.isdir(FLAGS.export_dir): + if not isdir_remote(FLAGS.export_dir): os.makedirs(FLAGS.export_dir) frozen_graph = tfv1.graph_util.convert_variables_to_constants( @@ -829,7 +830,7 @@ def export(): dest_nodes=output_names) if not FLAGS.export_tflite: - with open(output_graph_path, 'wb') as fout: + with open_remote(output_graph_path, 'wb') as fout: fout.write(frozen_graph.SerializeToString()) else: output_tflite_path = os.path.join(FLAGS.export_dir, output_filename.replace('.pb', '.tflite')) @@ -840,7 +841,7 @@ def export(): converter.allow_custom_ops = True tflite_model = converter.convert() - with open(output_tflite_path, 'wb') as fout: + with open_remote(output_tflite_path, 'wb') as fout: fout.write(tflite_model) log_info('Models exported at %s' % (FLAGS.export_dir)) @@ -851,7 +852,7 @@ def export(): FLAGS.export_model_version)) model_runtime = 'tflite' if FLAGS.export_tflite else 'tensorflow' - with open(metadata_fname, 'w') as f: + with open_remote(metadata_fname, 'w') as f: f.write('---\n') f.write('author: {}\n'.format(FLAGS.export_author_id)) f.write('model_name: {}\n'.format(FLAGS.export_model_name)) @@ -959,7 +960,7 @@ def main(_): tfv1.reset_default_graph() FLAGS.export_tflite = True - if os.listdir(FLAGS.export_dir): + if listdir_remote(FLAGS.export_dir): log_error('Directory {} is not empty, please fix this.'.format(FLAGS.export_dir)) sys.exit(1) From 83e5cf0416fdbd51b90ea3aec52042a9374d3c1a Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 10:46:15 -0800 Subject: [PATCH 07/62] Remote I/O fro check_characters --- training/deepspeech_training/util/check_characters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/training/deepspeech_training/util/check_characters.py b/training/deepspeech_training/util/check_characters.py index f155b4ac0..b40c5b3af 100644 --- a/training/deepspeech_training/util/check_characters.py +++ b/training/deepspeech_training/util/check_characters.py @@ -19,6 +19,7 @@ import os import sys import unicodedata +from .util.io import open_remote def main(): parser = argparse.ArgumentParser() @@ -34,7 +35,7 @@ def main(): all_text = set() for in_file in in_files: - with open(in_file, "r") as csv_file: + with open_remote(in_file, "r") as csv_file: reader = csv.reader(csv_file) try: next(reader, None) # skip the file header (i.e. "transcript") From 42170a57eb4d14120b847cde95998b3c91d9b7d7 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 10:46:49 -0800 Subject: [PATCH 08/62] Remote I/O for config --- training/deepspeech_training/util/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/config.py b/training/deepspeech_training/util/config.py index 0b9929e59..17d8a5a03 100755 --- a/training/deepspeech_training/util/config.py +++ b/training/deepspeech_training/util/config.py @@ -13,7 +13,7 @@ from .logging import log_error, log_warn from .helpers import parse_file_size from .augmentations import parse_augmentations - +from .util.io import path_exists_remote class ConfigSingleton: _config = None @@ -139,7 +139,7 @@ def initialize_globals(): c.audio_step_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_step / 1000) if FLAGS.one_shot_infer: - if not os.path.exists(FLAGS.one_shot_infer): + if not path_exists_remote(FLAGS.one_shot_infer): log_error('Path specified in --one_shot_infer is not a valid file.') sys.exit(1) From 933d96dc7435074a3861627c29ee88fecfef773a Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 10:47:26 -0800 Subject: [PATCH 09/62] Fix relative imports --- training/deepspeech_training/util/check_characters.py | 2 +- training/deepspeech_training/util/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/check_characters.py b/training/deepspeech_training/util/check_characters.py index b40c5b3af..bde69d743 100644 --- a/training/deepspeech_training/util/check_characters.py +++ b/training/deepspeech_training/util/check_characters.py @@ -19,7 +19,7 @@ import os import sys import unicodedata -from .util.io import open_remote +from .io import open_remote def main(): parser = argparse.ArgumentParser() diff --git a/training/deepspeech_training/util/config.py b/training/deepspeech_training/util/config.py index 17d8a5a03..18da6eed1 100755 --- a/training/deepspeech_training/util/config.py +++ b/training/deepspeech_training/util/config.py @@ -13,7 +13,7 @@ from .logging import log_error, log_warn from .helpers import parse_file_size from .augmentations import parse_augmentations -from .util.io import path_exists_remote +from .io import path_exists_remote class ConfigSingleton: _config = None From 396ac7fe4685c9eeeed5e1dd8a9c9d69e56019c7 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 10:48:49 -0800 Subject: [PATCH 10/62] Remote I/O for downloader --- training/deepspeech_training/util/downloader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/training/deepspeech_training/util/downloader.py b/training/deepspeech_training/util/downloader.py index 9fcbf6744..0a40c4817 100644 --- a/training/deepspeech_training/util/downloader.py +++ b/training/deepspeech_training/util/downloader.py @@ -2,6 +2,7 @@ import progressbar from os import path, makedirs +from .io import open_remote, path_exists_remote SIMPLE_BAR = ['Progress ', progressbar.Bar(), ' ', progressbar.Percentage(), ' completed'] @@ -9,16 +10,16 @@ def maybe_download(archive_name, target_dir, archive_url): # If archive file does not exist, download it... archive_path = path.join(target_dir, archive_name) - if not path.exists(target_dir): + if not path_exists_remote(target_dir): print('No path "%s" - creating ...' % target_dir) makedirs(target_dir) - if not path.exists(archive_path): + if not path_exists_remote(archive_path): print('No archive "%s" - downloading...' % archive_path) req = requests.get(archive_url, stream=True) total_size = int(req.headers.get('content-length', 0)) done = 0 - with open(archive_path, 'wb') as f: + with open_remote(archive_path, 'wb') as f: bar = progressbar.ProgressBar(max_value=total_size, widgets=SIMPLE_BAR) for data in req.iter_content(1024*1024): done += len(data) From 7de317cf59289ece0d3cce92f7171d9d68554aa5 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 10:49:33 -0800 Subject: [PATCH 11/62] Remote I/O for evaluate_tools --- training/deepspeech_training/util/evaluate_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/evaluate_tools.py b/training/deepspeech_training/util/evaluate_tools.py index 66fc82935..68d29f3ee 100644 --- a/training/deepspeech_training/util/evaluate_tools.py +++ b/training/deepspeech_training/util/evaluate_tools.py @@ -10,7 +10,7 @@ from .flags import FLAGS from .text import levenshtein - +from .io import open_remote def pmap(fun, iterable): pool = Pool() @@ -124,5 +124,5 @@ def save_samples_json(samples, output_path): We set ensure_ascii=True to prevent json from escaping non-ASCII chars in the texts. ''' - with open(output_path, 'w') as fout: + with open_remote(output_path, 'w') as fout: json.dump(samples, fout, default=float, ensure_ascii=False, indent=2) From 296b74e01a9409beb593a69ae885b30875031bb2 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 10:54:44 -0800 Subject: [PATCH 12/62] Remote I/O for sample_collections --- .../deepspeech_training/util/sample_collections.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index 3f1b55ea2..2467854d9 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -18,6 +18,7 @@ get_audio_type_from_extension, write_wav ) +from .io import open_remote BIG_ENDIAN = 'big' INT_SIZE = 4 @@ -80,7 +81,7 @@ def load_sample(filename, label=None): audio_type = get_audio_type_from_extension(ext) if audio_type is None: raise ValueError('Unknown audio type extension "{}"'.format(ext)) - with open(filename, 'rb') as audio_file: + with open_remote(filename, 'rb') as audio_file: if label is None: return Sample(audio_type, audio_file.read(), sample_id=filename) return LabeledSample(audio_type, audio_file.read(), label, sample_id=filename) @@ -119,7 +120,7 @@ def __init__(self, raise ValueError('Audio type "{}" not supported'.format(audio_type)) self.audio_type = audio_type self.bitrate = bitrate - self.sdb_file = open(sdb_filename, 'wb', buffering=buffering) + self.sdb_file = open_remote(sdb_filename, 'wb', buffering=buffering) self.offsets = [] self.num_samples = 0 @@ -215,7 +216,7 @@ def __init__(self, """ self.sdb_filename = sdb_filename self.id_prefix = sdb_filename if id_prefix is None else id_prefix - self.sdb_file = open(sdb_filename, 'rb', buffering=REVERSE_BUFFER_SIZE if reverse else buffering) + self.sdb_file = open_remote(sdb_filename, 'rb', buffering=REVERSE_BUFFER_SIZE if reverse else buffering) self.offsets = [] if self.sdb_file.read(len(MAGIC)) != MAGIC: raise RuntimeError('No Sample Database') @@ -345,7 +346,7 @@ def __init__(self, self.labeled = labeled if labeled: fieldnames.append('transcript') - self.csv_file = open(csv_filename, 'w', encoding='utf-8', newline='') + self.csv_file = open_remote(csv_filename, 'w', encoding='utf-8', newline='') self.csv_writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames) self.csv_writer.writeheader() self.counter = 0 @@ -399,7 +400,7 @@ def __init__(self, include : str[] List of files to include into tar root. """ - self.tar = tarfile.open(tar_filename, 'w:gz' if gz else 'w') + self.tar = tarfile.open_remote(tar_filename, 'w:gz' if gz else 'w') samples_dir = tarfile.TarInfo('samples') samples_dir.type = tarfile.DIRTYPE self.tar.addfile(samples_dir) @@ -499,7 +500,7 @@ def __init__(self, csv_filename, labeled=None, reverse=False): """ rows = [] csv_dir = Path(csv_filename).parent - with open(csv_filename, 'r', encoding='utf8') as csv_file: + with open_remote(csv_filename, 'r', encoding='utf8') as csv_file: reader = csv.DictReader(csv_file) if 'transcript' in reader.fieldnames: if labeled is None: From abe5dd2eb4bbab122c96138f841d9f6a572f0ca9 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 12:49:44 -0800 Subject: [PATCH 13/62] Remote I/O for taskcluster --- training/deepspeech_training/util/taskcluster.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/training/deepspeech_training/util/taskcluster.py b/training/deepspeech_training/util/taskcluster.py index d0053c7dc..1a5200ab9 100644 --- a/training/deepspeech_training/util/taskcluster.py +++ b/training/deepspeech_training/util/taskcluster.py @@ -14,6 +14,7 @@ from pkg_resources import parse_version +from .io import isdir_remote, open_remote DEFAULT_SCHEMES = { 'deepspeech': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s', @@ -48,7 +49,7 @@ def report_progress(count, block_size, total_size): except OSError as e: if e.errno != errno.EEXIST: raise e - assert os.path.isdir(os.path.dirname(target_dir)) + assert isdir_remote(os.path.dirname(target_dir)) tc_filename = os.path.basename(tc_url) target_file = os.path.join(target_dir, tc_filename) @@ -61,7 +62,7 @@ def report_progress(count, block_size, total_size): print('File already exists: %s' % target_file) if is_gzip: - with open(target_file, "r+b") as frw: + with open_remote(target_file, "r+b") as frw: decompressed = gzip.decompress(frw.read()) frw.seek(0) frw.write(decompressed) @@ -75,7 +76,7 @@ def maybe_download_tc_bin(**kwargs): os.chmod(final_file, final_stat.st_mode | stat.S_IEXEC) def read(fname): - return open(os.path.join(os.path.dirname(__file__), fname)).read() + return open_remote(os.path.join(os.path.dirname(__file__), fname)).read() def main(): parser = argparse.ArgumentParser(description='Tooling to ease downloading of components from TaskCluster.') From c3dc4c0d5c1a301c661d957bfaf6e2aae36dc20c Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 14:06:22 -0800 Subject: [PATCH 14/62] Fix bad I/O helper fn replace errors --- training/deepspeech_training/util/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py index 4801c0756..eb177b76c 100644 --- a/training/deepspeech_training/util/io.py +++ b/training/deepspeech_training/util/io.py @@ -15,7 +15,7 @@ def path_exists_remote(path): if path.startswith("gs://"): from tensorflow.io import gfile return gfile.exists(path) - return path_exists_remotes(path) + return os.path.exists(path) def open_remote(path, mode): @@ -42,7 +42,7 @@ def isdir_remote(path): if path.startswith("gs://"): from tensorflow.io import gfile return gfile.isdir(path) - return isdir_remote(path) + return os.path.isdir(path) def listdir_remote(path): From 3d503bd69ec11e455cad2b39275e033c5642eb32 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 14:16:37 -0800 Subject: [PATCH 15/62] Add universal is_remote_path to I/O helper --- training/deepspeech_training/util/io.py | 28 ++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py index eb177b76c..3d9b3dc0a 100644 --- a/training/deepspeech_training/util/io.py +++ b/training/deepspeech_training/util/io.py @@ -1,23 +1,41 @@ """ -A set of I/O utils that allow us to open files on remote storage as if they were present locally. +A set of I/O utils that allow us to open files on remote storage as if they were present locally and access +into HDFS storage using Tensorflow's C++ FileStream API. Currently only includes wrappers for Google's GCS, but this can easily be expanded for AWS S3 buckets. """ import inspect import os import sys + +def is_remote_path(path): + """ + Returns True iff the path is one of the remote formats that this + module supports + """ + return path.startswith('gs://') or path.starts_with('hdfs://') + + def path_exists_remote(path): """ Wrapper that allows existance check of local and remote paths like `gs://...` """ # Conditional import - if path.startswith("gs://"): + if is_remote_path(path): from tensorflow.io import gfile return gfile.exists(path) return os.path.exists(path) +def copy_remote(src, dst, overwrite=False): + """ + Allows us to copy a file from local to remote or vice versa + """ + from tensorflow.io import gfile + return gfile.copy(src, dst, overwrite) + + def open_remote(path, mode): """ Wrapper around open_remote() method that can handle remote paths like `gs://...` @@ -28,7 +46,7 @@ def open_remote(path, mode): do something with the file f, whether or not we have local access to it """ # Conditional import - if path.startswith("gs://"): + if is_remote_path(path): from tensorflow.io import gfile return gfile.GFile(path, mode=mode) return open_remote(path, mode) @@ -39,7 +57,7 @@ def isdir_remote(path): Wrapper to check if remote and local paths are directories """ # Conditional import - if path.startswith("gs://"): + if is_remote_path(path): from tensorflow.io import gfile return gfile.isdir(path) return os.path.isdir(path) @@ -50,7 +68,7 @@ def listdir_remote(path): Wrapper to list paths in local dirs (alternative to using a glob, I suppose) """ # Conditional import - if path.startswith("gs://"): + if is_remote_path(path): from tensorflow.io import gfile return gfile.listdir(path) return os.listdir(path) From ad0883042126f265bcc6c7180313746beb257535 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 14:17:03 -0800 Subject: [PATCH 16/62] Work remote I/O into audio utils -- a bit more involved --- training/deepspeech_training/util/audio.py | 34 ++++++++++++++++------ 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 031f13ed6..da1a9acba 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -8,6 +8,7 @@ from .helpers import LimitingPool from collections import namedtuple +from .io import open_remote, remove_remote, copy_remote, is_remote_path AudioFormat = namedtuple('AudioFormat', 'rate channels width') @@ -168,29 +169,44 @@ def __init__(self, audio_path, as_path=False, audio_format=DEFAULT_FORMAT): self.audio_format = audio_format self.as_path = as_path self.open_file = None + self.open_wav = None self.tmp_file_path = None def __enter__(self): if self.audio_path.endswith('.wav'): - self.open_file = wave.open(self.audio_path, 'r') - if read_audio_format_from_wav_file(self.open_file) == self.audio_format: + self.open_file = open_remote(self.audio_path, 'r') + self.open_wav = wave.open(self.open_file) + if read_audio_format_from_wav_file(self.open_wav) == self.audio_format: if self.as_path: + self.open_wav.close() self.open_file.close() return self.audio_path - return self.open_file + return self.open_wav + self.open_wav.close() self.open_file.close() + + # If the format isn't right, copy the file to local tmp dir and do the conversion on disk + if is_remote_path(self.audio_path): + _, self.tmp_src_file_path = tempfile.mkstemp(suffix='.wav') + copy_remote(self.audio_path, self.tmp_src_file_path) + self.audio_path = self.tmp_file_path + _, self.tmp_file_path = tempfile.mkstemp(suffix='.wav') convert_audio(self.audio_path, self.tmp_file_path, file_type='wav', audio_format=self.audio_format) if self.as_path: return self.tmp_file_path - self.open_file = wave.open(self.tmp_file_path, 'r') - return self.open_file + self.open_wav = wave.open(self.tmp_file_path, 'r') + return self.open_wav def __exit__(self, *args): if not self.as_path: - self.open_file.close() + self.open_wav.close() + if self.open_file: + self.open_file.close() if self.tmp_file_path is not None: os.remove(self.tmp_file_path) + if self.tmp_src_file_path is not None: + os.remove(self.tmp_src_file_path) def read_frames(wav_file, frame_duration_ms=30, yield_remainder=False): @@ -320,7 +336,7 @@ def read_opus(opus_file): def write_wav(wav_file, pcm_data, audio_format=DEFAULT_FORMAT): - with wave.open(wav_file, 'wb') as wav_file_writer: + with wave.open_remote(wav_file, 'wb') as wav_file_writer: wav_file_writer.setframerate(audio_format.rate) wav_file_writer.setnchannels(audio_format.channels) wav_file_writer.setsampwidth(audio_format.width) @@ -329,7 +345,7 @@ def write_wav(wav_file, pcm_data, audio_format=DEFAULT_FORMAT): def read_wav(wav_file): wav_file.seek(0) - with wave.open(wav_file, 'rb') as wav_file_reader: + with wave.open_remote(wav_file, 'rb') as wav_file_reader: audio_format = read_audio_format_from_wav_file(wav_file_reader) pcm_data = wav_file_reader.readframes(wav_file_reader.getnframes()) return audio_format, pcm_data @@ -353,7 +369,7 @@ def write_audio(audio_type, audio_file, pcm_data, audio_format=DEFAULT_FORMAT, b def read_wav_duration(wav_file): wav_file.seek(0) - with wave.open(wav_file, 'rb') as wav_file_reader: + with wave.open_remote(wav_file, 'rb') as wav_file_reader: return wav_file_reader.getnframes() / wav_file_reader.getframerate() From 90e2e1f7d26cffe603dc47d75b1fda6f330a4799 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 14:45:05 -0800 Subject: [PATCH 17/62] Respect buffering, encoding, newline, closefd, and opener if we're looking at a local file --- training/deepspeech_training/util/io.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py index 3d9b3dc0a..7d72f9103 100644 --- a/training/deepspeech_training/util/io.py +++ b/training/deepspeech_training/util/io.py @@ -36,11 +36,13 @@ def copy_remote(src, dst, overwrite=False): return gfile.copy(src, dst, overwrite) -def open_remote(path, mode): +def open_remote(path, mode='r', buffering=-1, encoding=None, newline=None, closefd=True, opener=None): """ Wrapper around open_remote() method that can handle remote paths like `gs://...` off Google Cloud using Tensorflow's IO helpers. + buffering, encoding, newline, closefd, and opener are ignored for remote files + This enables us to do: with open_remote('gs://.....', mode='w+') as f: do something with the file f, whether or not we have local access to it @@ -49,7 +51,7 @@ def open_remote(path, mode): if is_remote_path(path): from tensorflow.io import gfile return gfile.GFile(path, mode=mode) - return open_remote(path, mode) + return open(path, mode, buffering=buffering, encoding=encoding, newline=newline, closefd=closefd, opener=opener) def isdir_remote(path): From 8f310729989db0fd1b9368d258fe89bad352b06b Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 15:09:42 -0800 Subject: [PATCH 18/62] Fix startswith check --- training/deepspeech_training/util/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py index 7d72f9103..885a276db 100644 --- a/training/deepspeech_training/util/io.py +++ b/training/deepspeech_training/util/io.py @@ -13,7 +13,7 @@ def is_remote_path(path): Returns True iff the path is one of the remote formats that this module supports """ - return path.startswith('gs://') or path.starts_with('hdfs://') + return path.startswith('gs://') or path.startswith('hdfs://') def path_exists_remote(path): From a6322b384e9c0c55f72151799ce78e2728117626 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 16:29:16 -0800 Subject: [PATCH 19/62] Fix remote I/O handling in train --- training/deepspeech_training/train.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index d94e8a451..2e7263b1d 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -35,7 +35,7 @@ from .util.flags import create_flags, FLAGS from .util.helpers import check_ctcdecoder_version, ExceptionBox from .util.logging import create_progressbar, log_debug, log_error, log_info, log_progress, log_warn -from .util.io import open_remote, remove_remote, listdir_remote +from .util.io import open_remote, remove_remote, listdir_remote, is_remote_path, isdir_remote check_ctcdecoder_version() @@ -513,7 +513,8 @@ def train(): best_dev_path = os.path.join(FLAGS.save_checkpoint_dir, 'best_dev') # Save flags next to checkpoints - os.makedirs(FLAGS.save_checkpoint_dir, exist_ok=True) + if not is_remote_path(FLAGS.save_checkpoint_dir): + os.makedirs(FLAGS.save_checkpoint_dir, exist_ok=True) flags_file = os.path.join(FLAGS.save_checkpoint_dir, 'flags.txt') with open_remote(flags_file, 'w') as fout: fout.write(FLAGS.flags_into_string()) @@ -813,11 +814,11 @@ def export(): if FLAGS.remove_remote_export: if isdir_remote(FLAGS.export_dir): log_info('Removing old export') - shutil.rmtree(FLAGS.export_dir) + remove_remote(FLAGS.export_dir) output_graph_path = os.path.join(FLAGS.export_dir, output_filename) - if not isdir_remote(FLAGS.export_dir): + if not is_remote_path(FLAGS.export_dir) and not os.path.isdir(FLAGS.export_dir): os.makedirs(FLAGS.export_dir) frozen_graph = tfv1.graph_util.convert_variables_to_constants( From 0030cab22078592134ce9950c8e6fb603af0679b Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 16:29:23 -0800 Subject: [PATCH 20/62] Skip remote zipping for now --- training/deepspeech_training/train.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 2e7263b1d..6ebe29a63 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -875,8 +875,12 @@ def export(): def package_zip(): # --export_dir path/to/export/LANG_CODE/ => path/to/export/LANG_CODE.zip export_dir = os.path.join(os.path.abspath(FLAGS.export_dir), '') # Force ending '/' - zip_filename = os.path.dirname(export_dir) + if is_remote_path(export_dir): + log_error("Cannot package remote path zip %s. Please do this manually." % export_dir) + return + zip_filename = os.path.dirname(export_dir) + shutil.copy(FLAGS.scorer_path, export_dir) archive = shutil.make_archive(zip_filename, 'zip', export_dir) From 64d278560dc20b6f623bc3770e527e6d9f551829 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 16:29:43 -0800 Subject: [PATCH 21/62] Why do we need absolute paths everywhere here? --- training/deepspeech_training/util/check_characters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/deepspeech_training/util/check_characters.py b/training/deepspeech_training/util/check_characters.py index bde69d743..7e6cdd0bd 100644 --- a/training/deepspeech_training/util/check_characters.py +++ b/training/deepspeech_training/util/check_characters.py @@ -28,7 +28,7 @@ def main(): parser.add_argument("-alpha", "--alphabet-format", help="Bool. Print in format for alphabet.txt", action="store_true") parser.add_argument("-unicode", "--disable-unicode-variants", help="Bool. DISABLE check for unicode consistency (use with --alphabet-format)", action="store_true") args = parser.parse_args() - in_files = [os.path.abspath(i) for i in args.csv_files.split(",")] + in_files = args.csv_files.split(",") print("### Reading in the following transcript files: ###") print("### {} ###".format(in_files)) From 783cdad8db471cc33c0f9d9fa79b0b1c8d4c198b Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 16:30:11 -0800 Subject: [PATCH 22/62] Fix downloader and taskcluster directory mgmt with remote I/O --- training/deepspeech_training/util/downloader.py | 4 ++-- training/deepspeech_training/util/taskcluster.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/training/deepspeech_training/util/downloader.py b/training/deepspeech_training/util/downloader.py index 0a40c4817..b8fcdb8d7 100644 --- a/training/deepspeech_training/util/downloader.py +++ b/training/deepspeech_training/util/downloader.py @@ -2,7 +2,7 @@ import progressbar from os import path, makedirs -from .io import open_remote, path_exists_remote +from .io import open_remote, path_exists_remote, is_remote_path SIMPLE_BAR = ['Progress ', progressbar.Bar(), ' ', progressbar.Percentage(), ' completed'] @@ -10,7 +10,7 @@ def maybe_download(archive_name, target_dir, archive_url): # If archive file does not exist, download it... archive_path = path.join(target_dir, archive_name) - if not path_exists_remote(target_dir): + if not is_remote_path(target_dir) and not path.exists(target_dir): print('No path "%s" - creating ...' % target_dir) makedirs(target_dir) diff --git a/training/deepspeech_training/util/taskcluster.py b/training/deepspeech_training/util/taskcluster.py index 1a5200ab9..ba4f2019d 100644 --- a/training/deepspeech_training/util/taskcluster.py +++ b/training/deepspeech_training/util/taskcluster.py @@ -14,7 +14,7 @@ from pkg_resources import parse_version -from .io import isdir_remote, open_remote +from .io import isdir_remote, open_remote, is_remote_path DEFAULT_SCHEMES = { 'deepspeech': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s', @@ -43,13 +43,13 @@ def report_progress(count, block_size, total_size): assert target_dir is not None - target_dir = os.path.abspath(target_dir) - try: - os.makedirs(target_dir) - except OSError as e: - if e.errno != errno.EEXIST: - raise e - assert isdir_remote(os.path.dirname(target_dir)) + if not is_remote_path(target_dir): + try: + os.makedirs(target_dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise e + assert os.path.isdir(os.path.dirname(target_dir)) tc_filename = os.path.basename(tc_url) target_file = os.path.join(target_dir, tc_filename) From 8fe972eb6f296f0bb1bbb4b8f51657c7660656b6 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 16:40:40 -0800 Subject: [PATCH 23/62] Fix wave file reading helpers --- training/deepspeech_training/util/audio.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index da1a9acba..5e2ed5d9f 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -336,7 +336,8 @@ def read_opus(opus_file): def write_wav(wav_file, pcm_data, audio_format=DEFAULT_FORMAT): - with wave.open_remote(wav_file, 'wb') as wav_file_writer: + # wav_file is already a file-pointer here + with wave.open(wav_file, 'wb') as wav_file_writer: wav_file_writer.setframerate(audio_format.rate) wav_file_writer.setnchannels(audio_format.channels) wav_file_writer.setsampwidth(audio_format.width) @@ -345,7 +346,7 @@ def write_wav(wav_file, pcm_data, audio_format=DEFAULT_FORMAT): def read_wav(wav_file): wav_file.seek(0) - with wave.open_remote(wav_file, 'rb') as wav_file_reader: + with wave.open(wav_file, 'rb') as wav_file_reader: audio_format = read_audio_format_from_wav_file(wav_file_reader) pcm_data = wav_file_reader.readframes(wav_file_reader.getnframes()) return audio_format, pcm_data @@ -369,7 +370,7 @@ def write_audio(audio_type, audio_file, pcm_data, audio_format=DEFAULT_FORMAT, b def read_wav_duration(wav_file): wav_file.seek(0) - with wave.open_remote(wav_file, 'rb') as wav_file_reader: + with wave.open(wav_file, 'rb') as wav_file_reader: return wav_file_reader.getnframes() / wav_file_reader.getframerate() From 86cba458c556227e5c685fce81a7112cb76af7dd Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 16:40:59 -0800 Subject: [PATCH 24/62] Fix remote path handling for CSV sample reading --- .../deepspeech_training/util/sample_collections.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index 2467854d9..e0e6b12b9 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -18,7 +18,7 @@ get_audio_type_from_extension, write_wav ) -from .io import open_remote +from .io import open_remote, is_remote_path BIG_ENDIAN = 'big' INT_SIZE = 4 @@ -499,7 +499,6 @@ def __init__(self, csv_filename, labeled=None, reverse=False): If the order of the samples should be reversed """ rows = [] - csv_dir = Path(csv_filename).parent with open_remote(csv_filename, 'r', encoding='utf8') as csv_file: reader = csv.DictReader(csv_file) if 'transcript' in reader.fieldnames: @@ -509,9 +508,12 @@ def __init__(self, csv_filename, labeled=None, reverse=False): raise RuntimeError('No transcript data (missing CSV column)') for row in reader: wav_filename = Path(row['wav_filename']) - if not wav_filename.is_absolute(): - wav_filename = csv_dir / wav_filename - wav_filename = str(wav_filename) + if not wav_filename.is_absolute() and not is_remote_path(row['wav_filename']): + wav_filename = Path(csv_filename).parent / wav_filename + wav_filename = str(wav_filename) + else: + # Pathlib otherwise removes a / from filenames like hdfs:// + wav_filename = row['wav_filename'] wav_filesize = int(row['wav_filesize']) if 'wav_filesize' in row else 0 if labeled: rows.append((wav_filename, wav_filesize, row['transcript'])) From fc0b4956431271f0b7caa834492aaf71fd2768d2 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 16:46:59 -0800 Subject: [PATCH 25/62] TODO: CSVWriter still totally breaks with remote paths --- training/deepspeech_training/util/sample_collections.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index e0e6b12b9..d98564846 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -334,6 +334,8 @@ def __init__(self, If True: Writes labeled samples (util.sample_collections.LabeledSample) only. If False: Ignores transcripts (if available) and writes (unlabeled) util.audio.Sample instances. """ + + # TODO: This all breaks with remote paths self.csv_filename = Path(csv_filename) self.csv_base_dir = self.csv_filename.parent.resolve().absolute() self.set_name = self.csv_filename.stem From be39d3354dc71499b5fa461c8ce2983779b9f262 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Thu, 12 Nov 2020 21:46:39 -0800 Subject: [PATCH 26/62] Perform data loading I/O within worker process rather than main process by wrapping Sample --- .../deepspeech_training/util/augmentations.py | 10 ++++-- .../util/sample_collections.py | 34 +++++++++++++++---- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 941c17f2b..0934fbd5b 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -150,6 +150,12 @@ def _init_augmentation_worker(preparation_context): AUGMENTATION_CONTEXT = preparation_context +def _load_and_augment_sample(timed_sample, context=None): + sample, clock = timed_sample + realized_sample = sample.unpack() + return _augment_sample((realized_sample, clock), context) + + def _augment_sample(timed_sample, context=None): context = AUGMENTATION_CONTEXT if context is None else context sample, clock = timed_sample @@ -213,12 +219,12 @@ def timed_samples(): context = AugmentationContext(audio_type, augmentations) if process_ahead == 0: for timed_sample in timed_samples(): - yield _augment_sample(timed_sample, context=context) + yield _load_and_augment_sample(timed_sample, context=context) else: with LimitingPool(process_ahead=process_ahead, initializer=_init_augmentation_worker, initargs=(context,)) as pool: - yield from pool.imap(_augment_sample, timed_samples()) + yield from pool.imap(_load_and_augment_sample, timed_samples()) finally: for augmentation in augmentations: augmentation.stop() diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index d98564846..23b0422ba 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -60,6 +60,27 @@ def __init__(self, audio_type, raw_data, transcript, audio_format=DEFAULT_FORMAT self.transcript = transcript +class PackedSample: + """ + A wrapper that we can carry around in an iterator and pass to a child process in order to + have the child process do the loading/unpacking of the sample, allowing for parallel file + I/O. + """ + def __init__(self, filename, audio_type, label): + self.filename = filename + self.audio_type = audio_type + self.label = label + + def unpack(self): + print("Unpacking sample: %s" % self.filename) + with open_remote(self.filename, 'rb') as audio_file: + data = audio_file.read() + if self.label is None: + s = Sample(self.audio_type, data, sample_id=self.filename) + s = LabeledSample(self.audio_type, data, self.label, sample_id=self.filename) + print("unpacked!") + return s + def load_sample(filename, label=None): """ Loads audio-file as a (labeled or unlabeled) sample @@ -70,21 +91,20 @@ def load_sample(filename, label=None): Filename of the audio-file to load as sample label : str Label (transcript) of the sample. - If None: return util.audio.Sample instance - Otherwise: return util.sample_collections.LabeledSample instance + If None: returned result.unpack() will return util.audio.Sample instance + Otherwise: returned result.unpack() util.sample_collections.LabeledSample instance Returns ------- - util.audio.Sample instance if label is None, else util.sample_collections.LabeledSample instance + util.sample_collections.PackedSample, a wrapper object, on which calling unpack() will return + util.audio.Sample instance if label is None, else util.sample_collections.LabeledSample instance """ + print("loading sample!") ext = os.path.splitext(filename)[1].lower() audio_type = get_audio_type_from_extension(ext) if audio_type is None: raise ValueError('Unknown audio type extension "{}"'.format(ext)) - with open_remote(filename, 'rb') as audio_file: - if label is None: - return Sample(audio_type, audio_file.read(), sample_id=filename) - return LabeledSample(audio_type, audio_file.read(), label, sample_id=filename) + return PackedSample(filename, audio_type, label) class DirectSDBWriter: From 2332e7fb76c72dc9d7bc2ca73823ebfa83ec85b9 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Fri, 13 Nov 2020 10:45:53 -0800 Subject: [PATCH 27/62] Linter fix: define self.tmp_src_file_path in init --- training/deepspeech_training/util/audio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 5e2ed5d9f..05ceba385 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -171,6 +171,7 @@ def __init__(self, audio_path, as_path=False, audio_format=DEFAULT_FORMAT): self.open_file = None self.open_wav = None self.tmp_file_path = None + self.tmp_src_file_path = None def __enter__(self): if self.audio_path.endswith('.wav'): From 3d2b09b951241885d773a42d3a5c20188216a2bb Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Fri, 13 Nov 2020 10:47:06 -0800 Subject: [PATCH 28/62] Linter seems unhappy with conditional imports. Make gfile a module-level import. I usually do this as a conditional because tf takes a while to load and it's nice to skip it when you want to run a script that just preps data or something like that, but it doesn't seem like a big deal. --- training/deepspeech_training/util/io.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py index 885a276db..5f1de4830 100644 --- a/training/deepspeech_training/util/io.py +++ b/training/deepspeech_training/util/io.py @@ -3,9 +3,8 @@ into HDFS storage using Tensorflow's C++ FileStream API. Currently only includes wrappers for Google's GCS, but this can easily be expanded for AWS S3 buckets. """ -import inspect import os -import sys +from tensorflow.io import gfile def is_remote_path(path): @@ -21,9 +20,7 @@ def path_exists_remote(path): Wrapper that allows existance check of local and remote paths like `gs://...` """ - # Conditional import if is_remote_path(path): - from tensorflow.io import gfile return gfile.exists(path) return os.path.exists(path) @@ -32,7 +29,6 @@ def copy_remote(src, dst, overwrite=False): """ Allows us to copy a file from local to remote or vice versa """ - from tensorflow.io import gfile return gfile.copy(src, dst, overwrite) @@ -47,9 +43,7 @@ def open_remote(path, mode='r', buffering=-1, encoding=None, newline=None, close with open_remote('gs://.....', mode='w+') as f: do something with the file f, whether or not we have local access to it """ - # Conditional import if is_remote_path(path): - from tensorflow.io import gfile return gfile.GFile(path, mode=mode) return open(path, mode, buffering=buffering, encoding=encoding, newline=newline, closefd=closefd, opener=opener) @@ -58,9 +52,7 @@ def isdir_remote(path): """ Wrapper to check if remote and local paths are directories """ - # Conditional import if is_remote_path(path): - from tensorflow.io import gfile return gfile.isdir(path) return os.path.isdir(path) @@ -69,9 +61,7 @@ def listdir_remote(path): """ Wrapper to list paths in local dirs (alternative to using a glob, I suppose) """ - # Conditional import if is_remote_path(path): - from tensorflow.io import gfile return gfile.listdir(path) return os.listdir(path) @@ -80,9 +70,6 @@ def glob_remote(filename): """ Wrapper that provides globs on local and remote paths like `gs://...` """ - # Conditional import - from tensorflow.io import gfile - return gfile.glob(filename) @@ -91,6 +78,4 @@ def remove_remote(filename): Wrapper that can remove_remote local and remote files like `gs://...` """ # Conditional import - from tensorflow.io import gfile - return gfile.remove_remote(filename) \ No newline at end of file From 47020e4ecbcb30976104e3ad9dbf7af5b9945cd7 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Fri, 13 Nov 2020 19:20:02 -0800 Subject: [PATCH 29/62] Add an imap_unordered helper to LimitPool -- I might experiment with this --- training/deepspeech_training/util/helpers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/training/deepspeech_training/util/helpers.py b/training/deepspeech_training/util/helpers.py index 195c117e5..8d35e149e 100644 --- a/training/deepspeech_training/util/helpers.py +++ b/training/deepspeech_training/util/helpers.py @@ -103,6 +103,11 @@ def imap(self, fun, it): self.processed -= 1 yield obj + def imap_unordered(self, fun, it): + for obj in self.pool.imap_unordered(fun, self._limit(it)): + self.processed -= 1 + yield obj + def terminate(self): self.pool.terminate() From 8c1a183c671063ae113c2f9d1ac710dc4b8efc76 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Fri, 13 Nov 2020 19:24:09 -0800 Subject: [PATCH 30/62] Clean up print debugging statements --- training/deepspeech_training/util/sample_collections.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index 23b0422ba..d075b4409 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -72,13 +72,11 @@ def __init__(self, filename, audio_type, label): self.label = label def unpack(self): - print("Unpacking sample: %s" % self.filename) with open_remote(self.filename, 'rb') as audio_file: data = audio_file.read() if self.label is None: s = Sample(self.audio_type, data, sample_id=self.filename) s = LabeledSample(self.audio_type, data, self.label, sample_id=self.filename) - print("unpacked!") return s def load_sample(filename, label=None): @@ -99,7 +97,6 @@ def load_sample(filename, label=None): util.sample_collections.PackedSample, a wrapper object, on which calling unpack() will return util.audio.Sample instance if label is None, else util.sample_collections.LabeledSample instance """ - print("loading sample!") ext = os.path.splitext(filename)[1].lower() audio_type = get_audio_type_from_extension(ext) if audio_type is None: From fb6d4ca361da6283f75ca5e57edea4f55d08bf68 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Fri, 13 Nov 2020 19:36:07 -0800 Subject: [PATCH 31/62] Add disclaimers to CSV and Tar writers --- .../deepspeech_training/util/sample_collections.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index d075b4409..227d152ce 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -350,9 +350,9 @@ def __init__(self, labeled : bool or None If True: Writes labeled samples (util.sample_collections.LabeledSample) only. If False: Ignores transcripts (if available) and writes (unlabeled) util.audio.Sample instances. + + Currently only works with local files (not gs:// or hdfs://...) """ - - # TODO: This all breaks with remote paths self.csv_filename = Path(csv_filename) self.csv_base_dir = self.csv_filename.parent.resolve().absolute() self.set_name = self.csv_filename.stem @@ -400,7 +400,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): class TarWriter: # pylint: disable=too-many-instance-attributes - """Sample collection writer for writing a CSV data-set and all its referenced WAV samples to a tar file""" + """Sample collection writer for writing a CSV data-set and all its referenced WAV samples to a tar file.""" def __init__(self, tar_filename, gz=False, @@ -418,8 +418,10 @@ def __init__(self, If False: Ignores transcripts (if available) and writes (unlabeled) util.audio.Sample instances. include : str[] List of files to include into tar root. + + Currently only works with local files (not gs:// or hdfs://...) """ - self.tar = tarfile.open_remote(tar_filename, 'w:gz' if gz else 'w') + self.tar = tarfile.open(tar_filename, 'w:gz' if gz else 'w') samples_dir = tarfile.TarInfo('samples') samples_dir.type = tarfile.DIRTYPE self.tar.addfile(samples_dir) From b5b3b2546ca5ba9581b833194921fe9f23daaf3e Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Mon, 16 Nov 2020 13:46:34 -0800 Subject: [PATCH 32/62] Clean up remote I/O docs --- training/deepspeech_training/util/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py index 5f1de4830..947b43af6 100644 --- a/training/deepspeech_training/util/io.py +++ b/training/deepspeech_training/util/io.py @@ -34,7 +34,7 @@ def copy_remote(src, dst, overwrite=False): def open_remote(path, mode='r', buffering=-1, encoding=None, newline=None, closefd=True, opener=None): """ - Wrapper around open_remote() method that can handle remote paths like `gs://...` + Wrapper around open() method that can handle remote paths like `gs://...` off Google Cloud using Tensorflow's IO helpers. buffering, encoding, newline, closefd, and opener are ignored for remote files @@ -75,7 +75,7 @@ def glob_remote(filename): def remove_remote(filename): """ - Wrapper that can remove_remote local and remote files like `gs://...` + Wrapper that can remove local and remote files like `gs://...` """ # Conditional import return gfile.remove_remote(filename) \ No newline at end of file From 611633fcf64faaa168dbf4c50f799499902a2b2a Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Mon, 16 Nov 2020 13:47:06 -0800 Subject: [PATCH 33/62] Remove unnecessary uses of `open_remote()` where we know `__file__` will always be local --- training/deepspeech_training/train.py | 2 +- training/deepspeech_training/util/taskcluster.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 6ebe29a63..3428598d2 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -775,7 +775,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): def file_relative_read(fname): - return open_remote(os.path.join(os.path.dirname(__file__), fname)).read() + return open(os.path.join(os.path.dirname(__file__), fname)).read() def export(): diff --git a/training/deepspeech_training/util/taskcluster.py b/training/deepspeech_training/util/taskcluster.py index ba4f2019d..4471659de 100644 --- a/training/deepspeech_training/util/taskcluster.py +++ b/training/deepspeech_training/util/taskcluster.py @@ -76,7 +76,7 @@ def maybe_download_tc_bin(**kwargs): os.chmod(final_file, final_stat.st_mode | stat.S_IEXEC) def read(fname): - return open_remote(os.path.join(os.path.dirname(__file__), fname)).read() + return open(os.path.join(os.path.dirname(__file__), fname)).read() def main(): parser = argparse.ArgumentParser(description='Tooling to ease downloading of components from TaskCluster.') From d0678cd1b70d2207dfb29c29863a31eb255971a7 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Mon, 16 Nov 2020 13:47:21 -0800 Subject: [PATCH 34/62] Remove unused unordered imap from LimitPool --- training/deepspeech_training/util/helpers.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/training/deepspeech_training/util/helpers.py b/training/deepspeech_training/util/helpers.py index 8d35e149e..195c117e5 100644 --- a/training/deepspeech_training/util/helpers.py +++ b/training/deepspeech_training/util/helpers.py @@ -103,11 +103,6 @@ def imap(self, fun, it): self.processed -= 1 yield obj - def imap_unordered(self, fun, it): - for obj in self.pool.imap_unordered(fun, self._limit(it)): - self.processed -= 1 - yield obj - def terminate(self): self.pool.terminate() From 7121ca5a2b1456539034dd115401f39032e9dec0 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 13:40:35 -0800 Subject: [PATCH 35/62] Add a dockerignore for slightly faster local docker builds --- .dockerignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..7fbf26f3a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +tensorflow/ +data/ From ffe2155733e257df587d78fd70e10df1c512877b Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 13:47:55 -0800 Subject: [PATCH 36/62] Undo remote edits for taskcluster as this is all local --- training/deepspeech_training/util/taskcluster.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/training/deepspeech_training/util/taskcluster.py b/training/deepspeech_training/util/taskcluster.py index 4471659de..13829bdff 100644 --- a/training/deepspeech_training/util/taskcluster.py +++ b/training/deepspeech_training/util/taskcluster.py @@ -14,8 +14,6 @@ from pkg_resources import parse_version -from .io import isdir_remote, open_remote, is_remote_path - DEFAULT_SCHEMES = { 'deepspeech': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s', 'tensorflow': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s' @@ -43,7 +41,7 @@ def report_progress(count, block_size, total_size): assert target_dir is not None - if not is_remote_path(target_dir): + if not os.path.isdir(target_dir): try: os.makedirs(target_dir) except OSError as e: @@ -62,7 +60,7 @@ def report_progress(count, block_size, total_size): print('File already exists: %s' % target_file) if is_gzip: - with open_remote(target_file, "r+b") as frw: + with open(target_file, "r+b") as frw: decompressed = gzip.decompress(frw.read()) frw.seek(0) frw.write(decompressed) From 8bf1e9ddb79bc59225d2f9949f6269f76b4cdddf Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 14:21:31 -0800 Subject: [PATCH 37/62] Fix too aggressive F&R --- training/deepspeech_training/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 3428598d2..94ca7c04d 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -811,7 +811,7 @@ def export(): load_graph_for_evaluation(session) output_filename = FLAGS.export_file_name + '.pb' - if FLAGS.remove_remote_export: + if FLAGS.remove_export: if isdir_remote(FLAGS.export_dir): log_info('Removing old export') remove_remote(FLAGS.export_dir) From 9aaa0e406bd77969a024aaa8f2e4b9ec031059cf Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 14:31:48 -0800 Subject: [PATCH 38/62] Make sure to unpack samples now --- bin/compare_samples.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/compare_samples.py b/bin/compare_samples.py index 94108a7a0..27898cd1e 100755 --- a/bin/compare_samples.py +++ b/bin/compare_samples.py @@ -15,8 +15,8 @@ def fail(message): def compare_samples(): - sample1 = load_sample(CLI_ARGS.sample1) - sample2 = load_sample(CLI_ARGS.sample2) + sample1 = load_sample(CLI_ARGS.sample1).unpack() + sample2 = load_sample(CLI_ARGS.sample2).unpack() if sample1.audio_format != sample2.audio_format: fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format)) if sample1.duration != sample2.duration: From 24e9e6777c112373792bd137e591d1bd1d8626bf Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 14:44:26 -0800 Subject: [PATCH 39/62] Make sure we properly unpack samples when changing audio types --- training/deepspeech_training/util/audio.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 05ceba385..04e99fbdd 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -118,15 +118,16 @@ def change_audio_type(self, new_audio_type, bitrate=None): self.audio_type = new_audio_type -def _change_audio_type(sample_and_audio_type): - sample, audio_type, bitrate = sample_and_audio_type +def _unpack_and_change_audio_type(sample_and_audio_type): + packed_sample, audio_type, bitrate = sample_and_audio_type + sample = packed_sample.unpack() sample.change_audio_type(audio_type, bitrate=bitrate) return sample -def change_audio_types(samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None): +def change_audio_types(packed_samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None): with LimitingPool(processes=processes, process_ahead=process_ahead) as pool: - yield from pool.imap(_change_audio_type, map(lambda s: (s, audio_type, bitrate), samples)) + yield from pool.imap(_unpack_and_change_audio_type, map(lambda s: (s, audio_type, bitrate), packed_samples)) def get_audio_type_from_extension(ext): From 6cb638211efc604f1285fd800fa481b1680b7c04 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 16:55:49 -0800 Subject: [PATCH 40/62] Only unpack when we need to, to make things work with SDBs --- training/deepspeech_training/util/audio.py | 5 ++++- training/deepspeech_training/util/augmentations.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 04e99fbdd..15c7743c6 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -120,7 +120,10 @@ def change_audio_type(self, new_audio_type, bitrate=None): def _unpack_and_change_audio_type(sample_and_audio_type): packed_sample, audio_type, bitrate = sample_and_audio_type - sample = packed_sample.unpack() + if hasattr(sample, 'unpack'): + sample = packed_sample.unpack() + else: + sample = packed_sample sample.change_audio_type(audio_type, bitrate=bitrate) return sample diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 0934fbd5b..79aa57502 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -152,7 +152,10 @@ def _init_augmentation_worker(preparation_context): def _load_and_augment_sample(timed_sample, context=None): sample, clock = timed_sample - realized_sample = sample.unpack() + if hasattr(sample, 'unpack'): + realized_sample = sample.unpack() + else: + realized_sample = sample return _augment_sample((realized_sample, clock), context) From 3caa474cce484d824a43b87d354319a4d4d4b79a Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Thu, 19 Nov 2020 10:46:59 +0100 Subject: [PATCH 41/62] Fix #3429: TaskCluster behavioral change wrt compression of artifacts --- taskcluster/tc-all-utils.sh | 9 +++++++-- taskcluster/tc-python_tflite-tests.sh | 1 - 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/taskcluster/tc-all-utils.sh b/taskcluster/tc-all-utils.sh index 3f877c5ac..504954d14 100755 --- a/taskcluster/tc-all-utils.sh +++ b/taskcluster/tc-all-utils.sh @@ -94,11 +94,16 @@ download_dependency_file() download_data() { - ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}" - ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}" + local _model_source_file=$(basename "${model_source}") + ${WGET} "${model_source}" -O - | gunzip --force > "${TASKCLUSTER_TMP_DIR}/${_model_source_file}" + + local _model_source_mmap_file=$(basename "${model_source_mmap}") + ${WGET} "${model_source_mmap}" -O - | gunzip --force > "${TASKCLUSTER_TMP_DIR}/${_model_source_mmap_file}" + cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/ cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.bytes.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.bytes.scorer + cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources } diff --git a/taskcluster/tc-python_tflite-tests.sh b/taskcluster/tc-python_tflite-tests.sh index fa087bb0f..712fc4e07 100644 --- a/taskcluster/tc-python_tflite-tests.sh +++ b/taskcluster/tc-python_tflite-tests.sh @@ -12,7 +12,6 @@ set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") -model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP//.pbmm/.tflite} download_data From f5cbda694a30f2b981d4fc83104343dd9fec6954 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 19 Nov 2020 16:58:01 +0200 Subject: [PATCH 42/62] Revert "Merge pull request #3424 from mozilla/io-fixes" This reverts commit ab1288ffde7118a76e5394e142b789adf3ad1bba, reversing changes made to 08d18d7328c03eb0c65d28ffdc0d3755549585e0. --- .dockerignore | 2 -- bin/compare_samples.py | 4 ++-- training/deepspeech_training/train.py | 2 +- training/deepspeech_training/util/audio.py | 12 ++++-------- training/deepspeech_training/util/augmentations.py | 5 +---- training/deepspeech_training/util/taskcluster.py | 6 ++++-- 6 files changed, 12 insertions(+), 19 deletions(-) delete mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 7fbf26f3a..000000000 --- a/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -tensorflow/ -data/ diff --git a/bin/compare_samples.py b/bin/compare_samples.py index 27898cd1e..94108a7a0 100755 --- a/bin/compare_samples.py +++ b/bin/compare_samples.py @@ -15,8 +15,8 @@ def fail(message): def compare_samples(): - sample1 = load_sample(CLI_ARGS.sample1).unpack() - sample2 = load_sample(CLI_ARGS.sample2).unpack() + sample1 = load_sample(CLI_ARGS.sample1) + sample2 = load_sample(CLI_ARGS.sample2) if sample1.audio_format != sample2.audio_format: fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format)) if sample1.duration != sample2.duration: diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 94ca7c04d..3428598d2 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -811,7 +811,7 @@ def export(): load_graph_for_evaluation(session) output_filename = FLAGS.export_file_name + '.pb' - if FLAGS.remove_export: + if FLAGS.remove_remote_export: if isdir_remote(FLAGS.export_dir): log_info('Removing old export') remove_remote(FLAGS.export_dir) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 15c7743c6..05ceba385 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -118,19 +118,15 @@ def change_audio_type(self, new_audio_type, bitrate=None): self.audio_type = new_audio_type -def _unpack_and_change_audio_type(sample_and_audio_type): - packed_sample, audio_type, bitrate = sample_and_audio_type - if hasattr(sample, 'unpack'): - sample = packed_sample.unpack() - else: - sample = packed_sample +def _change_audio_type(sample_and_audio_type): + sample, audio_type, bitrate = sample_and_audio_type sample.change_audio_type(audio_type, bitrate=bitrate) return sample -def change_audio_types(packed_samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None): +def change_audio_types(samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None): with LimitingPool(processes=processes, process_ahead=process_ahead) as pool: - yield from pool.imap(_unpack_and_change_audio_type, map(lambda s: (s, audio_type, bitrate), packed_samples)) + yield from pool.imap(_change_audio_type, map(lambda s: (s, audio_type, bitrate), samples)) def get_audio_type_from_extension(ext): diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 79aa57502..0934fbd5b 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -152,10 +152,7 @@ def _init_augmentation_worker(preparation_context): def _load_and_augment_sample(timed_sample, context=None): sample, clock = timed_sample - if hasattr(sample, 'unpack'): - realized_sample = sample.unpack() - else: - realized_sample = sample + realized_sample = sample.unpack() return _augment_sample((realized_sample, clock), context) diff --git a/training/deepspeech_training/util/taskcluster.py b/training/deepspeech_training/util/taskcluster.py index 13829bdff..4471659de 100644 --- a/training/deepspeech_training/util/taskcluster.py +++ b/training/deepspeech_training/util/taskcluster.py @@ -14,6 +14,8 @@ from pkg_resources import parse_version +from .io import isdir_remote, open_remote, is_remote_path + DEFAULT_SCHEMES = { 'deepspeech': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s', 'tensorflow': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s' @@ -41,7 +43,7 @@ def report_progress(count, block_size, total_size): assert target_dir is not None - if not os.path.isdir(target_dir): + if not is_remote_path(target_dir): try: os.makedirs(target_dir) except OSError as e: @@ -60,7 +62,7 @@ def report_progress(count, block_size, total_size): print('File already exists: %s' % target_file) if is_gzip: - with open(target_file, "r+b") as frw: + with open_remote(target_file, "r+b") as frw: decompressed = gzip.decompress(frw.read()) frw.seek(0) frw.write(decompressed) From 88f7297215dbbc16b0109d0d754fa9cbc57247e4 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 19 Nov 2020 16:58:21 +0200 Subject: [PATCH 43/62] Revert "Merge pull request #3420 from CatalinVoss/remote-io" This reverts commit 08d18d7328c03eb0c65d28ffdc0d3755549585e0, reversing changes made to 12badcce1ffc820bebc4cd2ed5d9787b248200f6. --- training/deepspeech_training/train.py | 30 +++---- training/deepspeech_training/util/audio.py | 30 ++----- .../deepspeech_training/util/augmentations.py | 10 +-- .../util/check_characters.py | 5 +- training/deepspeech_training/util/config.py | 4 +- .../deepspeech_training/util/downloader.py | 7 +- .../util/evaluate_tools.py | 4 +- training/deepspeech_training/util/io.py | 81 ------------------- .../util/sample_collections.py | 56 ++++--------- .../deepspeech_training/util/taskcluster.py | 17 ++-- 10 files changed, 53 insertions(+), 191 deletions(-) delete mode 100644 training/deepspeech_training/util/io.py diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 3428598d2..8bf7a3545 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -35,7 +35,6 @@ from .util.flags import create_flags, FLAGS from .util.helpers import check_ctcdecoder_version, ExceptionBox from .util.logging import create_progressbar, log_debug, log_error, log_info, log_progress, log_warn -from .util.io import open_remote, remove_remote, listdir_remote, is_remote_path, isdir_remote check_ctcdecoder_version() @@ -513,10 +512,9 @@ def train(): best_dev_path = os.path.join(FLAGS.save_checkpoint_dir, 'best_dev') # Save flags next to checkpoints - if not is_remote_path(FLAGS.save_checkpoint_dir): - os.makedirs(FLAGS.save_checkpoint_dir, exist_ok=True) + os.makedirs(FLAGS.save_checkpoint_dir, exist_ok=True) flags_file = os.path.join(FLAGS.save_checkpoint_dir, 'flags.txt') - with open_remote(flags_file, 'w') as fout: + with open(flags_file, 'w') as fout: fout.write(FLAGS.flags_into_string()) with tfv1.Session(config=Config.session_config) as session: @@ -543,7 +541,7 @@ def run_set(set_name, epoch, init_op, dataset=None): feature_cache_index = FLAGS.feature_cache + '.index' if epoch % FLAGS.cache_for_epochs == 0 and os.path.isfile(feature_cache_index): log_info('Invalidating feature cache') - remove_remote(feature_cache_index) # this will let TF also overwrite the related cache data files + os.remove(feature_cache_index) # this will let TF also overwrite the related cache data files # Setup progress bar class LossWidget(progressbar.widgets.FormatLabel): @@ -811,14 +809,14 @@ def export(): load_graph_for_evaluation(session) output_filename = FLAGS.export_file_name + '.pb' - if FLAGS.remove_remote_export: - if isdir_remote(FLAGS.export_dir): + if FLAGS.remove_export: + if os.path.isdir(FLAGS.export_dir): log_info('Removing old export') - remove_remote(FLAGS.export_dir) + shutil.rmtree(FLAGS.export_dir) output_graph_path = os.path.join(FLAGS.export_dir, output_filename) - if not is_remote_path(FLAGS.export_dir) and not os.path.isdir(FLAGS.export_dir): + if not os.path.isdir(FLAGS.export_dir): os.makedirs(FLAGS.export_dir) frozen_graph = tfv1.graph_util.convert_variables_to_constants( @@ -831,7 +829,7 @@ def export(): dest_nodes=output_names) if not FLAGS.export_tflite: - with open_remote(output_graph_path, 'wb') as fout: + with open(output_graph_path, 'wb') as fout: fout.write(frozen_graph.SerializeToString()) else: output_tflite_path = os.path.join(FLAGS.export_dir, output_filename.replace('.pb', '.tflite')) @@ -842,7 +840,7 @@ def export(): converter.allow_custom_ops = True tflite_model = converter.convert() - with open_remote(output_tflite_path, 'wb') as fout: + with open(output_tflite_path, 'wb') as fout: fout.write(tflite_model) log_info('Models exported at %s' % (FLAGS.export_dir)) @@ -853,7 +851,7 @@ def export(): FLAGS.export_model_version)) model_runtime = 'tflite' if FLAGS.export_tflite else 'tensorflow' - with open_remote(metadata_fname, 'w') as f: + with open(metadata_fname, 'w') as f: f.write('---\n') f.write('author: {}\n'.format(FLAGS.export_author_id)) f.write('model_name: {}\n'.format(FLAGS.export_model_name)) @@ -875,12 +873,8 @@ def export(): def package_zip(): # --export_dir path/to/export/LANG_CODE/ => path/to/export/LANG_CODE.zip export_dir = os.path.join(os.path.abspath(FLAGS.export_dir), '') # Force ending '/' - if is_remote_path(export_dir): - log_error("Cannot package remote path zip %s. Please do this manually." % export_dir) - return - zip_filename = os.path.dirname(export_dir) - + shutil.copy(FLAGS.scorer_path, export_dir) archive = shutil.make_archive(zip_filename, 'zip', export_dir) @@ -965,7 +959,7 @@ def main(_): tfv1.reset_default_graph() FLAGS.export_tflite = True - if listdir_remote(FLAGS.export_dir): + if os.listdir(FLAGS.export_dir): log_error('Directory {} is not empty, please fix this.'.format(FLAGS.export_dir)) sys.exit(1) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 05ceba385..031f13ed6 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -8,7 +8,6 @@ from .helpers import LimitingPool from collections import namedtuple -from .io import open_remote, remove_remote, copy_remote, is_remote_path AudioFormat = namedtuple('AudioFormat', 'rate channels width') @@ -169,45 +168,29 @@ def __init__(self, audio_path, as_path=False, audio_format=DEFAULT_FORMAT): self.audio_format = audio_format self.as_path = as_path self.open_file = None - self.open_wav = None self.tmp_file_path = None - self.tmp_src_file_path = None def __enter__(self): if self.audio_path.endswith('.wav'): - self.open_file = open_remote(self.audio_path, 'r') - self.open_wav = wave.open(self.open_file) - if read_audio_format_from_wav_file(self.open_wav) == self.audio_format: + self.open_file = wave.open(self.audio_path, 'r') + if read_audio_format_from_wav_file(self.open_file) == self.audio_format: if self.as_path: - self.open_wav.close() self.open_file.close() return self.audio_path - return self.open_wav - self.open_wav.close() + return self.open_file self.open_file.close() - - # If the format isn't right, copy the file to local tmp dir and do the conversion on disk - if is_remote_path(self.audio_path): - _, self.tmp_src_file_path = tempfile.mkstemp(suffix='.wav') - copy_remote(self.audio_path, self.tmp_src_file_path) - self.audio_path = self.tmp_file_path - _, self.tmp_file_path = tempfile.mkstemp(suffix='.wav') convert_audio(self.audio_path, self.tmp_file_path, file_type='wav', audio_format=self.audio_format) if self.as_path: return self.tmp_file_path - self.open_wav = wave.open(self.tmp_file_path, 'r') - return self.open_wav + self.open_file = wave.open(self.tmp_file_path, 'r') + return self.open_file def __exit__(self, *args): if not self.as_path: - self.open_wav.close() - if self.open_file: - self.open_file.close() + self.open_file.close() if self.tmp_file_path is not None: os.remove(self.tmp_file_path) - if self.tmp_src_file_path is not None: - os.remove(self.tmp_src_file_path) def read_frames(wav_file, frame_duration_ms=30, yield_remainder=False): @@ -337,7 +320,6 @@ def read_opus(opus_file): def write_wav(wav_file, pcm_data, audio_format=DEFAULT_FORMAT): - # wav_file is already a file-pointer here with wave.open(wav_file, 'wb') as wav_file_writer: wav_file_writer.setframerate(audio_format.rate) wav_file_writer.setnchannels(audio_format.channels) diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 0934fbd5b..941c17f2b 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -150,12 +150,6 @@ def _init_augmentation_worker(preparation_context): AUGMENTATION_CONTEXT = preparation_context -def _load_and_augment_sample(timed_sample, context=None): - sample, clock = timed_sample - realized_sample = sample.unpack() - return _augment_sample((realized_sample, clock), context) - - def _augment_sample(timed_sample, context=None): context = AUGMENTATION_CONTEXT if context is None else context sample, clock = timed_sample @@ -219,12 +213,12 @@ def timed_samples(): context = AugmentationContext(audio_type, augmentations) if process_ahead == 0: for timed_sample in timed_samples(): - yield _load_and_augment_sample(timed_sample, context=context) + yield _augment_sample(timed_sample, context=context) else: with LimitingPool(process_ahead=process_ahead, initializer=_init_augmentation_worker, initargs=(context,)) as pool: - yield from pool.imap(_load_and_augment_sample, timed_samples()) + yield from pool.imap(_augment_sample, timed_samples()) finally: for augmentation in augmentations: augmentation.stop() diff --git a/training/deepspeech_training/util/check_characters.py b/training/deepspeech_training/util/check_characters.py index 7e6cdd0bd..f155b4ac0 100644 --- a/training/deepspeech_training/util/check_characters.py +++ b/training/deepspeech_training/util/check_characters.py @@ -19,7 +19,6 @@ import os import sys import unicodedata -from .io import open_remote def main(): parser = argparse.ArgumentParser() @@ -28,14 +27,14 @@ def main(): parser.add_argument("-alpha", "--alphabet-format", help="Bool. Print in format for alphabet.txt", action="store_true") parser.add_argument("-unicode", "--disable-unicode-variants", help="Bool. DISABLE check for unicode consistency (use with --alphabet-format)", action="store_true") args = parser.parse_args() - in_files = args.csv_files.split(",") + in_files = [os.path.abspath(i) for i in args.csv_files.split(",")] print("### Reading in the following transcript files: ###") print("### {} ###".format(in_files)) all_text = set() for in_file in in_files: - with open_remote(in_file, "r") as csv_file: + with open(in_file, "r") as csv_file: reader = csv.reader(csv_file) try: next(reader, None) # skip the file header (i.e. "transcript") diff --git a/training/deepspeech_training/util/config.py b/training/deepspeech_training/util/config.py index 18da6eed1..0b9929e59 100755 --- a/training/deepspeech_training/util/config.py +++ b/training/deepspeech_training/util/config.py @@ -13,7 +13,7 @@ from .logging import log_error, log_warn from .helpers import parse_file_size from .augmentations import parse_augmentations -from .io import path_exists_remote + class ConfigSingleton: _config = None @@ -139,7 +139,7 @@ def initialize_globals(): c.audio_step_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_step / 1000) if FLAGS.one_shot_infer: - if not path_exists_remote(FLAGS.one_shot_infer): + if not os.path.exists(FLAGS.one_shot_infer): log_error('Path specified in --one_shot_infer is not a valid file.') sys.exit(1) diff --git a/training/deepspeech_training/util/downloader.py b/training/deepspeech_training/util/downloader.py index b8fcdb8d7..9fcbf6744 100644 --- a/training/deepspeech_training/util/downloader.py +++ b/training/deepspeech_training/util/downloader.py @@ -2,7 +2,6 @@ import progressbar from os import path, makedirs -from .io import open_remote, path_exists_remote, is_remote_path SIMPLE_BAR = ['Progress ', progressbar.Bar(), ' ', progressbar.Percentage(), ' completed'] @@ -10,16 +9,16 @@ def maybe_download(archive_name, target_dir, archive_url): # If archive file does not exist, download it... archive_path = path.join(target_dir, archive_name) - if not is_remote_path(target_dir) and not path.exists(target_dir): + if not path.exists(target_dir): print('No path "%s" - creating ...' % target_dir) makedirs(target_dir) - if not path_exists_remote(archive_path): + if not path.exists(archive_path): print('No archive "%s" - downloading...' % archive_path) req = requests.get(archive_url, stream=True) total_size = int(req.headers.get('content-length', 0)) done = 0 - with open_remote(archive_path, 'wb') as f: + with open(archive_path, 'wb') as f: bar = progressbar.ProgressBar(max_value=total_size, widgets=SIMPLE_BAR) for data in req.iter_content(1024*1024): done += len(data) diff --git a/training/deepspeech_training/util/evaluate_tools.py b/training/deepspeech_training/util/evaluate_tools.py index 68d29f3ee..66fc82935 100644 --- a/training/deepspeech_training/util/evaluate_tools.py +++ b/training/deepspeech_training/util/evaluate_tools.py @@ -10,7 +10,7 @@ from .flags import FLAGS from .text import levenshtein -from .io import open_remote + def pmap(fun, iterable): pool = Pool() @@ -124,5 +124,5 @@ def save_samples_json(samples, output_path): We set ensure_ascii=True to prevent json from escaping non-ASCII chars in the texts. ''' - with open_remote(output_path, 'w') as fout: + with open(output_path, 'w') as fout: json.dump(samples, fout, default=float, ensure_ascii=False, indent=2) diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py deleted file mode 100644 index 947b43af6..000000000 --- a/training/deepspeech_training/util/io.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A set of I/O utils that allow us to open files on remote storage as if they were present locally and access -into HDFS storage using Tensorflow's C++ FileStream API. -Currently only includes wrappers for Google's GCS, but this can easily be expanded for AWS S3 buckets. -""" -import os -from tensorflow.io import gfile - - -def is_remote_path(path): - """ - Returns True iff the path is one of the remote formats that this - module supports - """ - return path.startswith('gs://') or path.startswith('hdfs://') - - -def path_exists_remote(path): - """ - Wrapper that allows existance check of local and remote paths like - `gs://...` - """ - if is_remote_path(path): - return gfile.exists(path) - return os.path.exists(path) - - -def copy_remote(src, dst, overwrite=False): - """ - Allows us to copy a file from local to remote or vice versa - """ - return gfile.copy(src, dst, overwrite) - - -def open_remote(path, mode='r', buffering=-1, encoding=None, newline=None, closefd=True, opener=None): - """ - Wrapper around open() method that can handle remote paths like `gs://...` - off Google Cloud using Tensorflow's IO helpers. - - buffering, encoding, newline, closefd, and opener are ignored for remote files - - This enables us to do: - with open_remote('gs://.....', mode='w+') as f: - do something with the file f, whether or not we have local access to it - """ - if is_remote_path(path): - return gfile.GFile(path, mode=mode) - return open(path, mode, buffering=buffering, encoding=encoding, newline=newline, closefd=closefd, opener=opener) - - -def isdir_remote(path): - """ - Wrapper to check if remote and local paths are directories - """ - if is_remote_path(path): - return gfile.isdir(path) - return os.path.isdir(path) - - -def listdir_remote(path): - """ - Wrapper to list paths in local dirs (alternative to using a glob, I suppose) - """ - if is_remote_path(path): - return gfile.listdir(path) - return os.listdir(path) - - -def glob_remote(filename): - """ - Wrapper that provides globs on local and remote paths like `gs://...` - """ - return gfile.glob(filename) - - -def remove_remote(filename): - """ - Wrapper that can remove local and remote files like `gs://...` - """ - # Conditional import - return gfile.remove_remote(filename) \ No newline at end of file diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index 227d152ce..3f1b55ea2 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -18,7 +18,6 @@ get_audio_type_from_extension, write_wav ) -from .io import open_remote, is_remote_path BIG_ENDIAN = 'big' INT_SIZE = 4 @@ -60,25 +59,6 @@ def __init__(self, audio_type, raw_data, transcript, audio_format=DEFAULT_FORMAT self.transcript = transcript -class PackedSample: - """ - A wrapper that we can carry around in an iterator and pass to a child process in order to - have the child process do the loading/unpacking of the sample, allowing for parallel file - I/O. - """ - def __init__(self, filename, audio_type, label): - self.filename = filename - self.audio_type = audio_type - self.label = label - - def unpack(self): - with open_remote(self.filename, 'rb') as audio_file: - data = audio_file.read() - if self.label is None: - s = Sample(self.audio_type, data, sample_id=self.filename) - s = LabeledSample(self.audio_type, data, self.label, sample_id=self.filename) - return s - def load_sample(filename, label=None): """ Loads audio-file as a (labeled or unlabeled) sample @@ -89,19 +69,21 @@ def load_sample(filename, label=None): Filename of the audio-file to load as sample label : str Label (transcript) of the sample. - If None: returned result.unpack() will return util.audio.Sample instance - Otherwise: returned result.unpack() util.sample_collections.LabeledSample instance + If None: return util.audio.Sample instance + Otherwise: return util.sample_collections.LabeledSample instance Returns ------- - util.sample_collections.PackedSample, a wrapper object, on which calling unpack() will return - util.audio.Sample instance if label is None, else util.sample_collections.LabeledSample instance + util.audio.Sample instance if label is None, else util.sample_collections.LabeledSample instance """ ext = os.path.splitext(filename)[1].lower() audio_type = get_audio_type_from_extension(ext) if audio_type is None: raise ValueError('Unknown audio type extension "{}"'.format(ext)) - return PackedSample(filename, audio_type, label) + with open(filename, 'rb') as audio_file: + if label is None: + return Sample(audio_type, audio_file.read(), sample_id=filename) + return LabeledSample(audio_type, audio_file.read(), label, sample_id=filename) class DirectSDBWriter: @@ -137,7 +119,7 @@ def __init__(self, raise ValueError('Audio type "{}" not supported'.format(audio_type)) self.audio_type = audio_type self.bitrate = bitrate - self.sdb_file = open_remote(sdb_filename, 'wb', buffering=buffering) + self.sdb_file = open(sdb_filename, 'wb', buffering=buffering) self.offsets = [] self.num_samples = 0 @@ -233,7 +215,7 @@ def __init__(self, """ self.sdb_filename = sdb_filename self.id_prefix = sdb_filename if id_prefix is None else id_prefix - self.sdb_file = open_remote(sdb_filename, 'rb', buffering=REVERSE_BUFFER_SIZE if reverse else buffering) + self.sdb_file = open(sdb_filename, 'rb', buffering=REVERSE_BUFFER_SIZE if reverse else buffering) self.offsets = [] if self.sdb_file.read(len(MAGIC)) != MAGIC: raise RuntimeError('No Sample Database') @@ -350,8 +332,6 @@ def __init__(self, labeled : bool or None If True: Writes labeled samples (util.sample_collections.LabeledSample) only. If False: Ignores transcripts (if available) and writes (unlabeled) util.audio.Sample instances. - - Currently only works with local files (not gs:// or hdfs://...) """ self.csv_filename = Path(csv_filename) self.csv_base_dir = self.csv_filename.parent.resolve().absolute() @@ -365,7 +345,7 @@ def __init__(self, self.labeled = labeled if labeled: fieldnames.append('transcript') - self.csv_file = open_remote(csv_filename, 'w', encoding='utf-8', newline='') + self.csv_file = open(csv_filename, 'w', encoding='utf-8', newline='') self.csv_writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames) self.csv_writer.writeheader() self.counter = 0 @@ -400,7 +380,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): class TarWriter: # pylint: disable=too-many-instance-attributes - """Sample collection writer for writing a CSV data-set and all its referenced WAV samples to a tar file.""" + """Sample collection writer for writing a CSV data-set and all its referenced WAV samples to a tar file""" def __init__(self, tar_filename, gz=False, @@ -418,8 +398,6 @@ def __init__(self, If False: Ignores transcripts (if available) and writes (unlabeled) util.audio.Sample instances. include : str[] List of files to include into tar root. - - Currently only works with local files (not gs:// or hdfs://...) """ self.tar = tarfile.open(tar_filename, 'w:gz' if gz else 'w') samples_dir = tarfile.TarInfo('samples') @@ -520,7 +498,8 @@ def __init__(self, csv_filename, labeled=None, reverse=False): If the order of the samples should be reversed """ rows = [] - with open_remote(csv_filename, 'r', encoding='utf8') as csv_file: + csv_dir = Path(csv_filename).parent + with open(csv_filename, 'r', encoding='utf8') as csv_file: reader = csv.DictReader(csv_file) if 'transcript' in reader.fieldnames: if labeled is None: @@ -529,12 +508,9 @@ def __init__(self, csv_filename, labeled=None, reverse=False): raise RuntimeError('No transcript data (missing CSV column)') for row in reader: wav_filename = Path(row['wav_filename']) - if not wav_filename.is_absolute() and not is_remote_path(row['wav_filename']): - wav_filename = Path(csv_filename).parent / wav_filename - wav_filename = str(wav_filename) - else: - # Pathlib otherwise removes a / from filenames like hdfs:// - wav_filename = row['wav_filename'] + if not wav_filename.is_absolute(): + wav_filename = csv_dir / wav_filename + wav_filename = str(wav_filename) wav_filesize = int(row['wav_filesize']) if 'wav_filesize' in row else 0 if labeled: rows.append((wav_filename, wav_filesize, row['transcript'])) diff --git a/training/deepspeech_training/util/taskcluster.py b/training/deepspeech_training/util/taskcluster.py index 4471659de..d0053c7dc 100644 --- a/training/deepspeech_training/util/taskcluster.py +++ b/training/deepspeech_training/util/taskcluster.py @@ -14,7 +14,6 @@ from pkg_resources import parse_version -from .io import isdir_remote, open_remote, is_remote_path DEFAULT_SCHEMES = { 'deepspeech': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s', @@ -43,13 +42,13 @@ def report_progress(count, block_size, total_size): assert target_dir is not None - if not is_remote_path(target_dir): - try: - os.makedirs(target_dir) - except OSError as e: - if e.errno != errno.EEXIST: - raise e - assert os.path.isdir(os.path.dirname(target_dir)) + target_dir = os.path.abspath(target_dir) + try: + os.makedirs(target_dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise e + assert os.path.isdir(os.path.dirname(target_dir)) tc_filename = os.path.basename(tc_url) target_file = os.path.join(target_dir, tc_filename) @@ -62,7 +61,7 @@ def report_progress(count, block_size, total_size): print('File already exists: %s' % target_file) if is_gzip: - with open_remote(target_file, "r+b") as frw: + with open(target_file, "r+b") as frw: decompressed = gzip.decompress(frw.read()) frw.seek(0) frw.write(decompressed) From 3ae77ca75d7cb35285205c0ba98dfc066522f3a2 Mon Sep 17 00:00:00 2001 From: Olaf Thiele Date: Mon, 23 Nov 2020 19:55:27 +0100 Subject: [PATCH 44/62] Conditional msg for missing lm.binary added --- native_client/generate_scorer_package.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/native_client/generate_scorer_package.cpp b/native_client/generate_scorer_package.cpp index 4486b42cb..4513a0f8d 100644 --- a/native_client/generate_scorer_package.cpp +++ b/native_client/generate_scorer_package.cpp @@ -68,7 +68,8 @@ create_package(absl::optional alphabet_path, int err = scorer.load_lm(lm_path); if (err != DS_ERR_SCORER_NO_TRIE) { cerr << "Error loading language model file: " - << DS_ErrorCodeToErrorMessage(err) << "\n"; + << (err == DS_ERR_SCORER_UNREADABLE ? "Can't open binary LM file." : DS_ErrorCodeToErrorMessage(err)) + << "\n"; return 1; } scorer.fill_dictionary(words); From c822a6e8753babe12efba71010d3511a1c3d62a4 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Wed, 22 Jan 2020 18:58:33 +0100 Subject: [PATCH 45/62] =?UTF-8?q?Importer=20for=20dataset=20from=20Centre?= =?UTF-8?q?=20de=20Conf=C3=A9rences=20Pierre=20Mend=C3=A8s-France?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Released by Ministère de l'Economie, des Finances, et de la Relance --- bin/import_ccpmf.py | 513 ++++++++++++++++++ .../deepspeech_training/util/downloader.py | 2 +- 2 files changed, 514 insertions(+), 1 deletion(-) create mode 100755 bin/import_ccpmf.py diff --git a/bin/import_ccpmf.py b/bin/import_ccpmf.py new file mode 100755 index 000000000..0895b1442 --- /dev/null +++ b/bin/import_ccpmf.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python +""" +Importer for dataset published from Centre de Conférence Pierre Mendès-France +Ministère de l'Économie, des Finances et de la Relance +""" + +import csv +import sys +import os +import progressbar +import subprocess +import zipfile +from glob import glob +from multiprocessing import Pool + +import hashlib +import decimal +import math +import unicodedata +import re +import sox +import xml.etree.ElementTree as ET + +try: + from num2words import num2words +except ImportError as ex: + print("pip install num2words") + sys.exit(1) + +import requests +import json + +from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download +from deepspeech_training.util.helpers import secs_to_hours +from deepspeech_training.util.importers import ( + get_counter, + get_importers_parser, + get_imported_samples, + get_validate_label, + print_import_report, +) +from ds_ctcdecoder import Alphabet + +FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] +SAMPLE_RATE = 16000 +CHANNELS = 1 +BIT_DEPTH = 16 +MAX_SECS = 10 +MIN_SECS = 0.85 + +DATASET_RELEASE_CSV = "https://data.economie.gouv.fr/explore/dataset/transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020/download/?format=csv&timezone=Europe/Berlin&lang=fr&use_labels_for_header=true&csv_separator=%3B" +DATASET_RELEASE_SHA = [ + ("863d39a06a388c6491c6ff2f6450b151f38f1b57", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.001"), + ("2f3a0305aa04c61220bb00b5a4e553e45dbf12e1", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.002"), + ("5e55e9f1f844097349188ac875947e5a3d7fe9f1", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.003"), + ("8bf54842cf07948ca5915e27a8bd5fa5139c06ae", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.004"), + ("c8963504aadc015ac48f9af80058a0bb3440b94f", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.005"), + ("d95e225e908621d83ce4e9795fd108d9d310e244", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.006"), + ("de6ed9c2b0ee80ca879aae8ba7923cc93217d811", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.007"), + ("234283c47dacfcd4450d836c52c25f3e807fc5f2", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.008"), + ("4e6b67a688639bb72f8cd81782eaba604a8d32a6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.009"), + ("4165a51389777c8af8e6253d87bdacb877e8b3b0", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.010"), + ("34322e7009780d97ef5bd02bf2f2c7a31f00baff", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.011"), + ("48c5be3b2ca9d6108d525da6a03e91d93a95dbac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.012"), + ("87573172f506a189c2ebc633856fe11a2e9cd213", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.013"), + ("6ab2c9e508e9278d5129f023e018725c4a7c69e8", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.014"), + ("4f84df831ef46dce5d3ab3e21817687a2d8c12d0", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.015"), + ("e69bfb079885c299cb81080ef88b1b8b57158aa6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.016"), + ("5f764ba788ee273981cf211b242c29b49ca22c5e", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.017"), + ("b6aa81a959525363223494830c1e7307d4c4bae6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.018"), + ("91ddcf43c7bf113a6f2528b857c7ec22a50a148a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.019"), + ("fa1b29273dd77b9a7494983a2f9ae52654b931d7", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.020"), + ("1113aef4f5e2be2f7fbf2d54b6c710c1c0e7135f", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.021"), + ("ce6420d5d0b6b5135ba559f83e1a82d4d615c470", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.022"), + ("d0976ed292ac24fcf1590d1ea195077c74b05471", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.023"), + ("ec746cd6af066f62d9bf8d3b2f89174783ff4e3c", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.024"), + ("570d9e1e84178e32fd867171d4b3aaecda1fd4fb", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.025"), + ("c29ccc7467a75b2cae3d7f2e9fbbb2ab276cb8ac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.026"), + ("08406a51146d88e208704ce058c060a1e44efa50", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.027"), + ("199aedad733a78ea1e7d47def9c71c6fd5795e02", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.028"), + ("db856a068f92fb4f01f410bba42c7271de0f231a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.029"), + ("e3c0135f16c6c9d25a09dcb4f99a685438a84740", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.030"), + ("e51b8bb9c0ae4339f98b4f21e6d29b825109f0ac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.031"), + ("be5e80cbc49b59b31ae33c30576ef0e1a162d84e", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.032"), + ("501df58e3ff55fcfd75b93dab57566dc536948b8", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.033"), + ("1a114875811a8cdcb8d85a9f6dbee78be3e05131", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.034"), + ("465d824e7ee46448369182c0c28646d155a2249b", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.035"), + ("37f341b1b266d143eb73138c31cfff3201b9d619", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.036"), + ("9e7d8255987a8a77a90e0d4b55c8fd38b9fb5694", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.037"), + ("54886755630cb080a53098cb1b6c951c6714a143", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.038"), + ("4b7cbb0154697be795034f7a49712e882a97197a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.039"), + ("c8e1e565a0e7a1f6ff1dbfcefe677aa74a41d2f2", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.040"), +] + +def _download_and_preprocess_data(csv_url, target_dir): + dataset_sources = os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "data.txt") + if os.path.exists(dataset_sources): + return dataset_sources + + # Making path absolute + target_dir = os.path.abspath(target_dir) + csv_ref = requests.get(csv_url).text.split('\r\n')[1:-1] + for part in csv_ref: + part_filename = requests.head(part).headers.get("Content-Disposition").split(" ")[1].split("=")[1].replace('"', "") + if not os.path.exists(os.path.join(target_dir, part_filename)): + part_path = maybe_download(part_filename, target_dir, part) + + def _big_sha1(fname): + s = hashlib.sha1() + buffer_size = 65536 + with open(fname, "rb") as f: + while True: + data = f.read(buffer_size) + if not data: + break + s.update(data) + return s.hexdigest() + + for (sha1, filename) in DATASET_RELEASE_SHA: + print("Checking {} SHA1:".format(filename)) + csum = _big_sha1(os.path.join(target_dir, filename)) + if csum == sha1: + print("\t{}: OK {}".format(filename, sha1)) + else: + print("\t{}: ERROR: expected {}, computed {}".format(filename, sha1, csum)) + assert csum == sha1 + + # Conditionally extract data + _maybe_extract(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip", "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020.zip") + + # Produce source text for extraction / conversion + return _maybe_create_sources(os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020")) + +def _maybe_extract(target_dir, extracted_data, archive, final): + # If target_dir/extracted_data does not exist, extract archive in target_dir + extracted_path = os.path.join(target_dir, extracted_data) + archive_path = os.path.join(target_dir, archive) + final_archive = os.path.join(extracted_path, final) + + if not os.path.exists(extracted_path): + if not os.path.exists(archive_path): + print('No archive "%s" - building ...' % archive_path) + all_zip_parts = glob(archive_path + ".*") + all_zip_parts.sort() + cmdline = "cat {} > {}".format(" ".join(all_zip_parts), archive_path) + print('Building with "%s"' % cmdline) + subprocess.check_call(cmdline, shell=True, cwd=target_dir) + assert os.path.exists(archive_path) + + print('No directory "%s" - extracting archive %s ...' % (extracted_path, archive_path)) + with zipfile.ZipFile(archive_path) as zip_f: + zip_f.extractall(extracted_path) + + with zipfile.ZipFile(final_archive) as zip_f: + zip_f.extractall(target_dir) + else: + print('Found directory "%s" - not extracting it from archive.' % extracted_path) + +def _maybe_create_sources(dir): + dataset_sources = os.path.join(dir, "data.txt") + MP3 = glob(os.path.join(dir, "**", "*.mp3")) + XML = glob(os.path.join(dir, "**", "*.xml")) + + MP3_XML_Scores = [] + MP3_XML_Fin = {} + + for f_mp3 in MP3: + for f_xml in XML: + b_mp3 = os.path.splitext(os.path.basename(f_mp3))[0] + b_xml = os.path.splitext(os.path.basename(f_xml))[0] + a_mp3 = b_mp3.split('_') + a_xml = b_xml.split('_') + score = 0 + date_mp3 = a_mp3[0] + date_xml = a_xml[0] + + if date_mp3 != date_xml: + continue + + for i in range(min(len(a_mp3), len(a_xml))): + if (a_mp3[i] == a_xml[i]): + score += 1 + + if score >= 1: + MP3_XML_Scores.append((f_mp3, f_xml, score)) + + # sort by score + MP3_XML_Scores.sort(key=lambda x: x[2], reverse=True) + for s_mp3, s_xml, score in MP3_XML_Scores: + #print(s_mp3, s_xml, score) + if score not in MP3_XML_Fin: + MP3_XML_Fin[score] = {} + + if s_mp3 not in MP3_XML_Fin[score]: + try: + MP3.index(s_mp3) + MP3.remove(s_mp3) + MP3_XML_Fin[score][s_mp3] = s_xml + except ValueError as ex: + pass + else: + print("here:", MP3_XML_Fin[score][s_mp3], s_xml, file=sys.stderr) + + with open(dataset_sources, "w") as ds: + for score in MP3_XML_Fin: + for mp3 in MP3_XML_Fin[score]: + xml = MP3_XML_Fin[score][mp3] + if os.path.getsize(mp3) > 0 and os.path.getsize(xml) > 0: + mp3 = os.path.relpath(mp3, dir) + xml = os.path.relpath(xml, dir) + ds.write('{},{},{:0.2e}\n'.format(xml, mp3, 2.5e-4)) + else: + print("Empty file {} or {}".format(mp3, xml), file=sys.stderr) + + print("Missing XML pairs:", MP3, file=sys.stderr) + return dataset_sources + +def maybe_normalize_for_digits(label): + # first, try to identify numbers like "50 000", "260 000" + if " " in label: + if any(s.isdigit() for s in label): + thousands = re.compile(r"(\d{1,3}(?:\s*\d{3})*(?:,\d+)?)") + maybe_thousands = thousands.findall(label) + if len(maybe_thousands) > 0: + while True: + (label, r) = re.subn(r"(\d)\s(\d{3})", "\\1\\2", label) + if r == 0: + break + + # this might be a time or duration in the form "hh:mm" or "hh:mm:ss" + if ":" in label: + for s in label.split(" "): + if any(i.isdigit() for i in s): + date_or_time = re.compile(r"(\d{1,2}):(\d{2}):?(\d{2})?") + maybe_date_or_time = date_or_time.findall(s) + if len(maybe_date_or_time) > 0: + maybe_hours = maybe_date_or_time[0][0] + maybe_minutes = maybe_date_or_time[0][1] + maybe_seconds = maybe_date_or_time[0][2] + if len(maybe_seconds) > 0: + label = label.replace("{}:{}:{}".format(maybe_hours, maybe_minutes, maybe_seconds), "{} heures {} minutes et {} secondes".format(maybe_hours, maybe_minutes, maybe_seconds)) + else: + label = label.replace("{}:{}".format(maybe_hours, maybe_minutes), "{} heures et {} minutes".format(maybe_hours, maybe_minutes)) + + new_label = [] + # pylint: disable=too-many-nested-blocks + for s in label.split(" "): + if any(i.isdigit() for i in s): + s = s.replace(",", ".") # num2words requires "." for floats + s = s.replace("\"", "") # clean some data, num2words would choke on 1959" + + last_c = s[-1] + if not last_c.isdigit(): # num2words will choke on "0.6.", "24 ?" + s = s[:-1] + + if any(i.isalpha() for i in s): # So we have any(isdigit()) **and** any(sialpha), like "3D" + ns = [] + for c in s: + nc = c + if c.isdigit(): # convert "3" to "trois-" + try: + nc = num2words(c, lang="fr") + "-" + except decimal.InvalidOperation as ex: + print("decimal.InvalidOperation: '{}'".format(s)) + raise ex + ns.append(nc) + s = "".join(s) + else: + try: + s = num2words(s, lang="fr") + except decimal.InvalidOperation as ex: + print("decimal.InvalidOperation: '{}'".format(s)) + raise ex + new_label.append(s) + return " ".join(new_label) + +def maybe_normalize_for_specials_chars(label): + label = label.replace("%", "pourcents") + label = label.replace("/", ", ") # clean intervals like 2019/2022 to "2019 2022" + label = label.replace("-", ", ") # clean intervals like 70-80 to "70 80" + label = label.replace("+", " plus ") # clean + and make it speakable + label = label.replace("€", " euros ") # clean euro symbol and make it speakable + label = label.replace("., ", ", ") # clean some strange "4.0., " (20181017_Innovation.xml) + label = label.replace("°", " degré ") # clean some strange "°5" (20181210_EtatsGeneraux-1000_fre_750_und.xml) + label = label.replace("...", ".") # remove ellipsis + label = label.replace("..", ".") # remove broken ellipsis + label = label.replace("m²", "mètre-carrés") # 20150616_Defi_Climat_3_wmv_0_fre_minefi.xml + label = label.replace("[end]", "") # broken tag in 20150123_Entretiens_Tresor_PGM_wmv_0_fre_minefi.xml + label = label.replace(u'\xB8c', " ç") # strange cedilla in 20150417_Printemps_Economie_2_wmv_0_fre_minefi.xml + label = label.replace("C0²", "CO 2") # 20121016_Syteme_sante_copie_wmv_0_fre_minefi.xml + return label + +def maybe_normalize_for_anglicisms(label): + label = label.replace("B2B", "B to B") + label = label.replace("B2C", "B to C") + label = label.replace("#", "hashtag ") + label = label.replace("@", "at ") + return label + +def maybe_normalize(label): + label = maybe_normalize_for_specials_chars(label) + label = maybe_normalize_for_anglicisms(label) + label = maybe_normalize_for_digits(label) + return label + +def one_sample(sample): + file_size = -1 + frames = 0 + + audio_source = sample[0] + target_dir = sample[1] + dataset_basename = sample[2] + + start_time = sample[3] + duration = sample[4] + label = label_filter_fun(sample[5]) + sample_id = sample[6] + + _wav_filename = os.path.basename(audio_source.replace(".wav", "_{:06}.wav".format(sample_id))) + wav_fullname = os.path.join(target_dir, dataset_basename, _wav_filename) + + if not os.path.exists(wav_fullname): + subprocess.check_output(["ffmpeg", "-i", audio_source, "-ss", str(start_time), "-t", str(duration), "-c", "copy", wav_fullname], stdin=subprocess.DEVNULL, stderr=subprocess.STDOUT) + + file_size = os.path.getsize(wav_fullname) + frames = int(subprocess.check_output(["soxi", "-s", wav_fullname], stderr=subprocess.STDOUT)) + + _counter = get_counter() + _rows = [] + + if file_size == -1: + # Excluding samples that failed upon conversion + _counter["failed"] += 1 + elif label is None: + # Excluding samples that failed on label validation + _counter["invalid_label"] += 1 + elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)): + # Excluding samples that are too short to fit the transcript + _counter["too_short"] += 1 + elif frames/SAMPLE_RATE < MIN_SECS: + # Excluding samples that are too short + _counter["too_short"] += 1 + elif frames/SAMPLE_RATE > MAX_SECS: + # Excluding very long samples to keep a reasonable batch-size + _counter["too_long"] += 1 + else: + # This one is good - keep it for the target CSV + _rows.append((os.path.join(dataset_basename, _wav_filename), file_size, label)) + _counter["imported_time"] += frames + _counter["all"] += 1 + _counter["total_time"] += frames + + return (_counter, _rows) + +def _maybe_import_data(xml_file, audio_source, target_dir, rel_tol=1e-1): + dataset_basename = os.path.splitext(os.path.split(xml_file)[1])[0] + wav_root = os.path.join(target_dir, dataset_basename) + if not os.path.exists(wav_root): + os.makedirs(wav_root) + + source_frames = int(subprocess.check_output(["soxi", "-s", audio_source], stderr=subprocess.STDOUT)) + print("Source audio length: %s" % secs_to_hours(source_frames / SAMPLE_RATE)) + + # Get audiofile path and transcript for each sentence in tsv + samples = [] + tree = ET.parse(xml_file) + root = tree.getroot() + seq_id = 0 + this_time = 0.0 + this_duration = 0.0 + prev_time = 0.0 + prev_duration = 0.0 + this_text = "" + for child in root: + if child.tag == "row": + cur_time = float(child.attrib["timestamp"]) + cur_duration = float(child.attrib["timedur"]) + cur_text = child.text + + if this_time == 0.0: + this_time = cur_time + + delta = cur_time - (prev_time + prev_duration) + # rel_tol value is made from trial/error to try and compromise between: + # - cutting enough to skip missing words + # - not too short, not too long sentences + is_close = math.isclose(cur_time, this_time + this_duration, rel_tol=rel_tol) + is_short = ((this_duration + cur_duration + delta) < MAX_SECS) + + # when the previous element is close enough **and** this does not + # go over MAX_SECS, we append content + if (is_close and is_short): + this_duration += cur_duration + delta + this_text += cur_text + else: + samples.append((audio_source, target_dir, dataset_basename, this_time, this_duration, this_text, seq_id)) + + this_time = cur_time + this_duration = cur_duration + this_text = cur_text + + seq_id += 1 + + prev_time = cur_time + prev_duration = cur_duration + + # Keep track of how many samples are good vs. problematic + _counter = get_counter() + num_samples = len(samples) + _rows = [] + + print("Processing XML data: {}".format(xml_file)) + pool = Pool() + bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) + for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1): + _counter += processed[0] + _rows += processed[1] + bar.update(i) + bar.update(num_samples) + pool.close() + pool.join() + + imported_samples = get_imported_samples(_counter) + assert _counter["all"] == num_samples + assert len(_rows) == imported_samples + + print_import_report(_counter, SAMPLE_RATE, MAX_SECS) + print("Import efficiency: %.1f%%" % ((_counter["total_time"] / source_frames)*100)) + print("") + + return _counter, _rows + +def _maybe_convert_wav(mp3_filename, _wav_filename): + if not os.path.exists(_wav_filename): + print("Converting {} to WAV file: {}".format(mp3_filename, _wav_filename)) + transformer = sox.Transformer() + transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS, bitdepth=BIT_DEPTH) + try: + transformer.build(mp3_filename, _wav_filename) + except sox.core.SoxError: + pass + +def write_general_csv(target_dir, _rows, _counter): + target_csv_template = os.path.join(target_dir, "ccpmf_{}.csv") + with open(target_csv_template.format("train"), "w") as train_csv_file: # 80% + with open(target_csv_template.format("dev"), "w") as dev_csv_file: # 10% + with open(target_csv_template.format("test"), "w") as test_csv_file: # 10% + train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) + train_writer.writeheader() + dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) + dev_writer.writeheader() + test_writer = csv.DictWriter(test_csv_file, fieldnames=FIELDNAMES) + test_writer.writeheader() + + bar = progressbar.ProgressBar(max_value=len(_rows), widgets=SIMPLE_BAR) + for i, item in enumerate(bar(_rows)): + i_mod = i % 10 + if i_mod == 0: + writer = test_writer + elif i_mod == 1: + writer = dev_writer + else: + writer = train_writer + writer.writerow({"wav_filename": item[0], "wav_filesize": item[1], "transcript": item[2]}) + + print("") + print("~~~~ FINAL STATISTICS ~~~~") + print_import_report(_counter, SAMPLE_RATE, MAX_SECS) + print("~~~~ (FINAL STATISTICS) ~~~~") + print("") + +if __name__ == "__main__": + PARSER = get_importers_parser(description="Import XML from Conference Centre for Economics, France") + PARSER.add_argument("target_dir", help="Destination directory") + PARSER.add_argument("--filter_alphabet", help="Exclude samples with characters not in provided alphabet") + PARSER.add_argument("--normalize", action="store_true", help="Converts diacritic characters to their base ones") + + PARAMS = PARSER.parse_args() + validate_label = get_validate_label(PARAMS) + ALPHABET = Alphabet(PARAMS.filter_alphabet) if PARAMS.filter_alphabet else None + + def label_filter_fun(label): + if PARAMS.normalize: + label = unicodedata.normalize("NFKD", label.strip()) \ + .encode("ascii", "ignore") \ + .decode("ascii", "ignore") + label = maybe_normalize(label) + label = validate_label(label) + if ALPHABET and label: + try: + ALPHABET.encode(label) + except KeyError: + label = None + return label + + dataset_sources = _download_and_preprocess_data(csv_url=DATASET_RELEASE_CSV, target_dir=PARAMS.target_dir) + sources_root_dir = os.path.dirname(dataset_sources) + all_counter = get_counter() + all_rows = [] + with open(dataset_sources, "r") as sources: + for line in sources.readlines(): + d = line.split(",") + this_xml = os.path.join(sources_root_dir, d[0]) + this_mp3 = os.path.join(sources_root_dir, d[1]) + this_rel = float(d[2]) + + wav_filename = os.path.join(sources_root_dir, os.path.splitext(os.path.basename(this_mp3))[0] + ".wav") + _maybe_convert_wav(this_mp3, wav_filename) + counter, rows = _maybe_import_data(this_xml, wav_filename, sources_root_dir, this_rel) + + all_counter += counter + all_rows += rows + write_general_csv(sources_root_dir, _counter=all_counter, _rows=all_rows) diff --git a/training/deepspeech_training/util/downloader.py b/training/deepspeech_training/util/downloader.py index 9fcbf6744..a6d57e3e9 100644 --- a/training/deepspeech_training/util/downloader.py +++ b/training/deepspeech_training/util/downloader.py @@ -19,7 +19,7 @@ def maybe_download(archive_name, target_dir, archive_url): total_size = int(req.headers.get('content-length', 0)) done = 0 with open(archive_path, 'wb') as f: - bar = progressbar.ProgressBar(max_value=total_size, widgets=SIMPLE_BAR) + bar = progressbar.ProgressBar(max_value=total_size if total_size > 0 else progressbar.UnknownLength, widgets=SIMPLE_BAR) for data in req.iter_content(1024*1024): done += len(data) f.write(data) From c0c5e6ade8b78a845fb5ee7cc8e0e48d8cddaa6a Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Thu, 26 Nov 2020 13:28:57 +0100 Subject: [PATCH 46/62] Adding support for ElectronJS v11.0 --- taskcluster/tc-all-vars.sh | 2 +- .../test-electronjs_v11.0-darwin-amd64-opt.yml | 15 +++++++++++++++ .../test-electronjs_v11.0-win-amd64-opt.yml | 14 ++++++++++++++ ...test-electronjs_v11.0_16k-linux-amd64-opt.yml | 16 ++++++++++++++++ .../test-electronjs_v11.0_8k-linux-amd64-opt.yml | 16 ++++++++++++++++ ...ectronjs_v11.0_multiarchpkg-win-amd64-opt.yml | 14 ++++++++++++++ ...lectronjs_v11.0_multiarchpkg-win-cuda-opt.yml | 14 ++++++++++++++ ...ctronjs_v11.0_multiarchpkg-win-tflite-opt.yml | 14 ++++++++++++++ 8 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 taskcluster/test-electronjs_v11.0-darwin-amd64-opt.yml create mode 100644 taskcluster/test-electronjs_v11.0-win-amd64-opt.yml create mode 100644 taskcluster/test-electronjs_v11.0_16k-linux-amd64-opt.yml create mode 100644 taskcluster/test-electronjs_v11.0_8k-linux-amd64-opt.yml create mode 100644 taskcluster/test-electronjs_v11.0_multiarchpkg-win-amd64-opt.yml create mode 100644 taskcluster/test-electronjs_v11.0_multiarchpkg-win-cuda-opt.yml create mode 100644 taskcluster/test-electronjs_v11.0_multiarchpkg-win-tflite-opt.yml diff --git a/taskcluster/tc-all-vars.sh b/taskcluster/tc-all-vars.sh index 432bf7e49..ef1cba845 100755 --- a/taskcluster/tc-all-vars.sh +++ b/taskcluster/tc-all-vars.sh @@ -90,4 +90,4 @@ SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-3.5.8:ucs2 3.6.10:ucs2 3. SUPPORTED_NODEJS_BUILD_VERSIONS=${SUPPORTED_NODEJS_BUILD_VERSIONS:-10.0.0 11.0.0 12.7.0 13.0.0 14.0.0 15.0.0} SUPPORTED_NODEJS_TESTS_VERSIONS=${SUPPORTED_NODEJS_TESTS_VERSIONS:-10.20.1 11.15.0 12.17.0 13.14.0 14.3.0 15.0.0} -SUPPORTED_ELECTRONJS_VERSIONS=${SUPPORTED_ELECTRONJS_VERSIONS:-5.0.13 6.0.12 6.1.7 7.0.1 7.1.8 8.0.1 9.0.1 9.1.0 9.2.0 10.0.0 10.1.0} +SUPPORTED_ELECTRONJS_VERSIONS=${SUPPORTED_ELECTRONJS_VERSIONS:-5.0.13 6.0.12 6.1.7 7.0.1 7.1.8 8.0.1 9.0.1 9.1.0 9.2.0 10.0.0 10.1.0 11.0.0} diff --git a/taskcluster/test-electronjs_v11.0-darwin-amd64-opt.yml b/taskcluster/test-electronjs_v11.0-darwin-amd64-opt.yml new file mode 100644 index 000000000..f17e47b58 --- /dev/null +++ b/taskcluster/test-electronjs_v11.0-darwin-amd64-opt.yml @@ -0,0 +1,15 @@ +build: + template_file: test-darwin-opt-base.tyml + dependencies: + - "darwin-amd64-cpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + - "homebrew_tests-darwin-amd64" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${nodejs.brew.prep_12} + args: + tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tc-electron-tests.sh 12.x 11.0.0 16k" + metadata: + name: "DeepSpeech OSX AMD64 CPU ElectronJS v11.0 tests" + description: "Testing DeepSpeech for OSX/AMD64 on ElectronJS v11.0, CPU only, optimized version" diff --git a/taskcluster/test-electronjs_v11.0-win-amd64-opt.yml b/taskcluster/test-electronjs_v11.0-win-amd64-opt.yml new file mode 100644 index 000000000..c824e3488 --- /dev/null +++ b/taskcluster/test-electronjs_v11.0-win-amd64-opt.yml @@ -0,0 +1,14 @@ +build: + template_file: test-win-opt-base.tyml + dependencies: + - "win-amd64-cpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${system.sox_win} && ${nodejs.win.prep_12} + args: + tests_cmdline: "${system.homedir.win}/DeepSpeech/ds/taskcluster/tc-electron-tests.sh 12.x 11.0.0 16k" + metadata: + name: "DeepSpeech Windows AMD64 CPU ElectronJS v11.0 tests" + description: "Testing DeepSpeech for Windows/AMD64 on ElectronJS v11.0, CPU only, optimized version" diff --git a/taskcluster/test-electronjs_v11.0_16k-linux-amd64-opt.yml b/taskcluster/test-electronjs_v11.0_16k-linux-amd64-opt.yml new file mode 100644 index 000000000..89e65ac20 --- /dev/null +++ b/taskcluster/test-electronjs_v11.0_16k-linux-amd64-opt.yml @@ -0,0 +1,16 @@ +build: + template_file: test-linux-opt-base.tyml + docker_image: "ubuntu:16.04" + dependencies: + - "linux-amd64-cpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install ${nodejs.packages_xenial.apt} ${electronjs.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-electron-tests.sh 12.x 11.0.0 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU ElectronJS v11.0 tests (16kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on ElectronJS v11.0, CPU only, optimized version (16kHz)" diff --git a/taskcluster/test-electronjs_v11.0_8k-linux-amd64-opt.yml b/taskcluster/test-electronjs_v11.0_8k-linux-amd64-opt.yml new file mode 100644 index 000000000..23be3b9b6 --- /dev/null +++ b/taskcluster/test-electronjs_v11.0_8k-linux-amd64-opt.yml @@ -0,0 +1,16 @@ +build: + template_file: test-linux-opt-base.tyml + docker_image: "ubuntu:16.04" + dependencies: + - "linux-amd64-cpu-opt" + - "test-training_8k-linux-amd64-py36m-opt" + test_model_task: "test-training_8k-linux-amd64-py36m-opt" + system_setup: + > + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install ${nodejs.packages_xenial.apt} ${electronjs.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-electron-tests.sh 12.x 11.0.0 8k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU ElectronJS v11.0 tests (8kHz)" + description: "Testing DeepSpeech for Linux/AMD64 on ElectronJS v11.0, CPU only, optimized version (8kHz)" diff --git a/taskcluster/test-electronjs_v11.0_multiarchpkg-win-amd64-opt.yml b/taskcluster/test-electronjs_v11.0_multiarchpkg-win-amd64-opt.yml new file mode 100644 index 000000000..cbcf59dda --- /dev/null +++ b/taskcluster/test-electronjs_v11.0_multiarchpkg-win-amd64-opt.yml @@ -0,0 +1,14 @@ +build: + template_file: test-win-opt-base.tyml + dependencies: + - "node-package-cpu" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${system.sox_win} && ${nodejs.win.prep_12} + args: + tests_cmdline: "${system.homedir.win}/DeepSpeech/ds/taskcluster/tc-electron-tests.sh 12.x 11.0.0 16k" + metadata: + name: "DeepSpeech Windows AMD64 CPU ElectronJS MultiArch Package v11.0 tests" + description: "Testing DeepSpeech for Windows/AMD64 on ElectronJS MultiArch Package v11.0, CPU only, optimized version" diff --git a/taskcluster/test-electronjs_v11.0_multiarchpkg-win-cuda-opt.yml b/taskcluster/test-electronjs_v11.0_multiarchpkg-win-cuda-opt.yml new file mode 100644 index 000000000..bf5640b67 --- /dev/null +++ b/taskcluster/test-electronjs_v11.0_multiarchpkg-win-cuda-opt.yml @@ -0,0 +1,14 @@ +build: + template_file: test-win-cuda-opt-base.tyml + dependencies: + - "node-package-gpu" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${system.sox_win} && ${nodejs.win.prep_12} + args: + tests_cmdline: "${system.homedir.win}/DeepSpeech/ds/taskcluster/tc-electron-tests.sh 12.x 11.0.0 16k cuda" + metadata: + name: "DeepSpeech Windows AMD64 CUDA ElectronJS MultiArch Package v11.0 tests" + description: "Testing DeepSpeech for Windows/AMD64 on ElectronJS MultiArch Package v11.0, CUDA, optimized version" diff --git a/taskcluster/test-electronjs_v11.0_multiarchpkg-win-tflite-opt.yml b/taskcluster/test-electronjs_v11.0_multiarchpkg-win-tflite-opt.yml new file mode 100644 index 000000000..079e2d3b5 --- /dev/null +++ b/taskcluster/test-electronjs_v11.0_multiarchpkg-win-tflite-opt.yml @@ -0,0 +1,14 @@ +build: + template_file: test-win-opt-base.tyml + dependencies: + - "node-package-tflite" + - "test-training_16k-linux-amd64-py36m-opt" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + ${system.sox_win} && ${nodejs.win.prep_12} + args: + tests_cmdline: "${system.homedir.win}/DeepSpeech/ds/taskcluster/tc-electron_tflite-tests.sh 12.x 11.0.0 16k" + metadata: + name: "DeepSpeech Windows AMD64 TFLite ElectronJS MultiArch Package v11.0 tests" + description: "Testing DeepSpeech for Windows/AMD64 on ElectronJS MultiArch Package v11.0, TFLite only, optimized version" From c979e360da4ee55541a6ed84dad857f26c991505 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Fri, 27 Nov 2020 12:14:02 +0100 Subject: [PATCH 47/62] Fix #3443: Link to upstream Dockerfile for lack of correct TensorFlow GPU deps doc. --- doc/TRAINING.rst | 7 +++++-- doc/USING.rst | 14 +++++++------- doc/index.rst | 14 +++++++------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index ba38b64b8..7eed6b35a 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -3,11 +3,14 @@ Training Your Own Model ======================= +.. _cuda-training-deps: + Prerequisites for training a model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * `Python 3.6 `_ * Mac or Linux environment +* CUDA 10.0 / CuDNN v7.6 per `Dockerfile `_. Getting the training code ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -69,7 +72,7 @@ If you have a capable (NVIDIA, at least 8GB of VRAM) GPU, it is highly recommend pip3 uninstall tensorflow pip3 install 'tensorflow-gpu==1.15.4' -Please ensure you have the required :ref:`CUDA dependency `. +Please ensure you have the required `CUDA dependency `_ and/or :ref:`Prerequisites `. It has been reported for some people failure at training: @@ -78,7 +81,7 @@ It has been reported for some people failure at training: tensorflow.python.framework.errors_impl.UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [[{{node tower_0/conv1d/Conv2D}}]] -Setting the ``TF_FORCE_GPU_ALLOW_GROWTH`` environment variable to ``true`` seems to help in such cases. This could also be due to an incorrect version of libcudnn. Double check your versions with the :ref:`TensorFlow 1.15 documentation `. +Setting the ``TF_FORCE_GPU_ALLOW_GROWTH`` environment variable to ``true`` seems to help in such cases. This could also be due to an incorrect version of libcudnn. Double check your versions with the :ref:`TensorFlow 1.15 documentation `. Basic Dockerfile for training ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/USING.rst b/doc/USING.rst index 12519980a..40cc044f5 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -23,10 +23,10 @@ Running ``deepspeech`` might, see below, require some runtime dependencies to be Please refer to your system's documentation on how to install these dependencies. -.. _cuda-deps: +.. _cuda-inference-deps: -CUDA dependency -^^^^^^^^^^^^^^^ +CUDA dependency (inference) +^^^^^^^^^^^^^^^^^^^^^^^^^^^ The GPU capable builds (Python, NodeJS, C++, etc) depend on CUDA 10.1 and CuDNN v7.6. @@ -37,8 +37,8 @@ If you want to use the pre-trained English model for performing speech-to-text, .. code-block:: bash - wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.pbmm - wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.scorer + wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.pbmm + wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.scorer There are several pre-trained model files available in official releases. Files ending in ``.pbmm`` are compatible with clients and language bindings built against the standard TensorFlow runtime. Usually these packages are simply called ``deepspeech``. These files are also compatible with CUDA enabled clients and language bindings. These packages are usually called ``deepspeech-gpu``. Files ending in ``.tflite`` are compatible with clients and language bindings built against the `TensorFlow Lite runtime `_. These models are optimized for size and performance in low power devices. On desktop platforms, the compatible packages are called ``deepspeech-tflite``. On Android and Raspberry Pi, we only publish TensorFlow Lite enabled packages, and they are simply called ``deepspeech``. You can see a full list of supported platforms and which TensorFlow runtime is supported at :ref:`supported-platforms-inference`. @@ -136,7 +136,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio my_audio_file.wav + deepspeech --model deepspeech-0.9.1-models.pbmm --scorer deepspeech-0.9.1-models.scorer --audio my_audio_file.wav The ``--scorer`` argument is optional, and represents an external language model to be used when transcribing the audio. @@ -200,7 +200,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - ./deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio_input.wav + ./deepspeech --model deepspeech-0.9.1-models.pbmm --scorer deepspeech-0.9.1-models.scorer --audio audio_input.wav See the help output with ``./deepspeech -h`` for more details. diff --git a/doc/index.rst b/doc/index.rst index e8991d3f5..40a01670e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -20,15 +20,15 @@ To install and use DeepSpeech all you have to do is: pip3 install deepspeech # Download pre-trained English model files - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.pbmm - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.scorer + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.pbmm + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.scorer # Download example audio files - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/audio-0.7.4.tar.gz - tar xvf audio-0.7.4.tar.gz + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/audio-0.9.1.tar.gz + tar xvf audio-0.9.1.tar.gz # Transcribe an audio file - deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.9.1-models.pbmm --scorer deepspeech-0.9.1-models.scorer --audio audio/2830-3980-0043.wav A pre-trained English model is available for use and can be downloaded following the instructions in :ref:`the usage docs `. For the latest release, including pre-trained models and checkpoints, `see the GitHub releases page `_. @@ -44,9 +44,9 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th pip3 install deepspeech-gpu # Transcribe an audio file. - deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.9.1-models.pbmm --scorer deepspeech-0.9.1-models.scorer --audio audio/2830-3980-0043.wav -Please ensure you have the required :ref:`CUDA dependencies `. +Please ensure you have the required :ref:`CUDA dependencies `. See the output of ``deepspeech -h`` for more information on the use of ``deepspeech``. (If you experience problems running ``deepspeech``, please check :ref:`required runtime dependencies `). From 73240a0f1d644483ad129f1cf2d3bc6176731796 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Fri, 4 Dec 2020 15:16:15 +0200 Subject: [PATCH 48/62] Add listing of code owners/reviewers and reference from contribution guidelines X-DeepSpeech: NOBUILD --- CODE_OWNERS.rst | 108 +++++++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.rst | 6 +++ 2 files changed, 114 insertions(+) create mode 100644 CODE_OWNERS.rst diff --git a/CODE_OWNERS.rst b/CODE_OWNERS.rst new file mode 100644 index 000000000..dfa9d0fef --- /dev/null +++ b/CODE_OWNERS.rst @@ -0,0 +1,108 @@ +DeepSpeech code owners +====================== + +This file describes reviewers who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping. + +There's overlap in the areas of expertise of each reviewer, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate question. + +Global reviewers +---------------- + +These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other reviewers, global reviewers will make a final decision. + +- Alexandre Lissy (@lissyx) +- Reuben Morais (@reuben) + +Training, feeding +----------------- + +- Reuben Morais (@reuben) + +Model exporting +--------------- + +- Alexandre Lissy (@lissyx) + +Transfer learning +----------------- + +- Josh Meyer (@JRMeyer) +- Reuben Morais (@reuben) + +Testing & CI +------------ + +- Alexandre Lissy (@lissyx) +- Reuben Morais (@reuben) + +Native inference client +----------------------- + +Everything that goes into libdeepspeech.so and is not specifically covered in another area fits here. + +- Alexandre Lissy (@lissyx) +- Reuben Morais (@reuben) + +Streaming decoder +----------------- + +- Reuben Morais (@reuben) +- @dabinat + +Python bindings +--------------- + +- Alexandre Lissy (@lissyx) +- Reuben Morais (@reuben) + +Java Bindings +------------- + +- Alexandre Lissy (@lissyx) + +JavaScript/NodeJS/ElectronJS bindings +------------------------------------- + +- Alexandre Lissy (@lissyx) +- Reuben Morais (@reuben) + +.NET bindings +------------- + +- Carlos Fonseca (@carlfm01) + +Swift bindings +-------------- + +- Reuben Morais (@reuben) + +Android support +--------------- + +- Alexandre Lissy (@lissyx) + +Raspberry Pi support +-------------------- + +- Alexandre Lissy (@lissyx) + +Windows support +--------------- + +- Carlos Fonseca (@carlfm01) + +iOS support +----------- + +- Reuben Morais (@reuben) + +Documentation +------------- + +- Alexandre Lissy (@lissyx) +- Reuben Morais (@reuben) + +Third party bindings +-------------------- + +Hosted externally and owned by the individual authors. See the `list of third-party bindings `_ for more info. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 028e4490d..c7970a340 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -33,6 +33,12 @@ Whenever you add a new feature to DeepSpeech and what to contribute that feature 2. You've made changes to the Python code. Make sure you run a linter (described below). 3. Make sure your new feature doesn't regress the project. If you've added a significant feature or amount of code, you want to be sure your new feature doesn't create performance issues. For example, if you've made a change to the DeepSpeech decoder, you should know that inference performance doesn't drop in terms of latency, accuracy, or memory usage. Unless you're proposing a new decoding algorithm, you probably don't have to worry about affecting accuracy. However, it's very possible you've affected latency or memory usage. You should run local performance tests to make sure no bugs have crept in. There are lots of tools to check latency and memory usage, and you should use what is most comfortable for you and gets the job done. If you're on Linux, you might find [[perf](https://perf.wiki.kernel.org/index.php/Main_Page)] to be a useful tool. You can use sample WAV files for testing which are provided in the `DeepSpeech/data/` directory. +Requesting review on your PR +---------------------------- + +Generally, a code owner will be notified of your pull request and will either review it or ask some other code owner for their review. If you'd like to proactively request review as you open the PR, see the the CODE_OWNERS.rst file which describes who's an appropriate reviewer depending on which parts of the code you're changing. + + Python Linter ------------- From 32b6067a0116bc5cdf2a24a079295c7d83f4b2f9 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Mon, 23 Nov 2020 18:32:18 -0800 Subject: [PATCH 49/62] Enable static build of DeepSpeech iOS framework Set up additional `deepspeech_ios` target with static build steps Xcode config: lock swift version at 5.0, bundle framework rather than dynamic lib, never strip swift symbols, add framework search paths, and bring in lstdc++ Runtime schema config: disable the main thread checker as this causes trouble with the static build Update model versions to 0.9.1 Remove libdeepspeech.so from example app bundling steps Swift lib embed settings that are somehow essential Attempt to adjust taskcluster build steps Add a basic podspec Add framework to gitignore Fix podspec version code Attempt to fix taskcluster unzip step Switch deepspeech targets for iOS build Try doing this unzip in one step Remove packaging steps for unneeded stuff because libdeepspeech.so is no longer a thing here. I suppose we could add a step to package the iOS static lib instead. Fix podspec version Set up podspec relative assuming a clone from the repo root Remove space in iOS package step Fix buildfile nit Link stdc++ in explicitly with iOS build only Revert "Remove space in iOS package step" This reverts commit 3e1922ea370c110f9854ae7e97101f2ea00f55c6. --- .gitignore | 1 + native_client/BUILD | 18 ++++++++++++-- native_client/swift/deepspeech-ios.podspec | 19 +++++++++++++++ .../deepspeech_ios.xcodeproj/project.pbxproj | 24 +++++++++++++++---- .../xcschemes/deepspeech_ios.xcscheme | 1 + .../project.pbxproj | 4 ---- .../SpeechRecognitionImpl.swift | 4 ++-- taskcluster/ios-build.sh | 2 +- taskcluster/ios-package.sh | 4 ---- taskcluster/tc-build-utils.sh | 2 +- 10 files changed, 61 insertions(+), 18 deletions(-) create mode 100644 native_client/swift/deepspeech-ios.podspec diff --git a/.gitignore b/.gitignore index eab56fe64..b4646cda1 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ doc/xml-c doc/xml-java doc/xml-dotnet convert_graphdef_memmapped_format +native_client/swift/deepspeech_ios.framework/deepspeech_ios diff --git a/native_client/BUILD b/native_client/BUILD index 9119f3926..c98d86a3d 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -3,6 +3,7 @@ load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_shared_object") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps") +load("@build_bazel_rules_apple//apple:ios.bzl", "ios_static_framework") load( "@org_tensorflow//tensorflow/lite:build_def.bzl", @@ -110,8 +111,8 @@ cc_library( ], ) -tf_cc_shared_object( - name = "libdeepspeech.so", +cc_library( + name = "deepspeech_bundle", srcs = [ "deepspeech.cc", "deepspeech.h", @@ -203,6 +204,19 @@ tf_cc_shared_object( ]) + [":decoder"], ) +tf_cc_shared_object( + name = "libdeepspeech.so", + deps = [":deepspeech_bundle"], +) + +ios_static_framework( + name = "deepspeech_ios", + deps = [":deepspeech_bundle"], + families = ["iphone", "ipad"], + minimum_os_version = "9.0", + linkopts = ["-lstdc++"], +) + genrule( name = "libdeepspeech_so_dsym", srcs = [":libdeepspeech.so"], diff --git a/native_client/swift/deepspeech-ios.podspec b/native_client/swift/deepspeech-ios.podspec new file mode 100644 index 000000000..d56d806aa --- /dev/null +++ b/native_client/swift/deepspeech-ios.podspec @@ -0,0 +1,19 @@ +# Pull in version from outside +version = File.read(File.join(__dir__, "../../training/deepspeech_training/VERSION")).split("\n")[0] + +Pod::Spec.new do |s| + s.name = "deepspeech-ios" + s.version = version + s.summary = "DeepSpeech" + s.homepage = "https://github.com/mozilla/DeepSpeech" + s.license = "Mozilla Public License 2.0" + s.authors = "Mozilla et al." + + s.platforms = { :ios => "9.0" } + s.source = { :git => "https://github.com/mozilla/DeepSpeech.git", :tag => "v#{s.version}" } + + # Assuming taskcluster build location. Depending on your Xcode setup, this might be in + # build/Release-iphoneos/deepspeech_ios.framework instead. + s.vendored_frameworks = "native_client/swift/DerivedData/Build/Products/Release-iphoneos/deepspeech_ios.framework" + s.source_files = "native_client/swift/deepspeech_ios/**/*.{h,m,mm,swift}" +end diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj index 59927e9ed..d400f5918 100644 --- a/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj @@ -11,7 +11,7 @@ 505B137224960D550007DADA /* deepspeech_ios.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B136424960D550007DADA /* deepspeech_ios.h */; settings = {ATTRIBUTES = (Public, ); }; }; 505B137D24961AF20007DADA /* deepspeech.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B137C24961AF20007DADA /* deepspeech.h */; settings = {ATTRIBUTES = (Private, ); }; }; 505B137F24961BA70007DADA /* DeepSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = 505B137E24961BA70007DADA /* DeepSpeech.swift */; }; - 507CD39B24B61FA100409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD39A24B61FA100409BBB /* libdeepspeech.so */; }; + AD2FD0F925678F8800314F2E /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AD2FD0F825678F8800314F2E /* deepspeech_ios.framework */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -45,7 +45,7 @@ 505B137B249619C90007DADA /* deepspeech_ios.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = deepspeech_ios.modulemap; sourceTree = ""; }; 505B137C24961AF20007DADA /* deepspeech.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = deepspeech.h; path = ../../deepspeech.h; sourceTree = ""; }; 505B137E24961BA70007DADA /* DeepSpeech.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeepSpeech.swift; sourceTree = ""; }; - 507CD39A24B61FA100409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = ""; }; + AD2FD0F825678F8800314F2E /* deepspeech_ios.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -53,7 +53,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - 507CD39B24B61FA100409BBB /* libdeepspeech.so in Frameworks */, + AD2FD0F925678F8800314F2E /* deepspeech_ios.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -101,7 +101,7 @@ 505B1380249620C60007DADA /* Frameworks */ = { isa = PBXGroup; children = ( - 507CD39A24B61FA100409BBB /* libdeepspeech.so */, + AD2FD0F825678F8800314F2E /* deepspeech_ios.framework */, ); name = Frameworks; sourceTree = ""; @@ -243,6 +243,7 @@ 505B137324960D550007DADA /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = NO; ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; @@ -296,8 +297,10 @@ MTL_FAST_MATH = YES; ONLY_ACTIVE_ARCH = YES; SDKROOT = iphoneos; + STRIP_SWIFT_SYMBOLS = NO; SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 5.0; VERSIONING_SYSTEM = "apple-generic"; VERSION_INFO_PREFIX = ""; }; @@ -306,6 +309,7 @@ 505B137424960D550007DADA /* Release */ = { isa = XCBuildConfiguration; buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = NO; ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; @@ -352,8 +356,10 @@ MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; SDKROOT = iphoneos; + STRIP_SWIFT_SYMBOLS = NO; SWIFT_COMPILATION_MODE = wholemodule; SWIFT_OPTIMIZATION_LEVEL = "-O"; + SWIFT_VERSION = 5.0; VALIDATE_PRODUCT = YES; VERSIONING_SYSTEM = "apple-generic"; VERSION_INFO_PREFIX = ""; @@ -372,6 +378,10 @@ DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; + FRAMEWORK_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); INFOPLIST_FILE = deepspeech_ios/Info.plist; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; LD_RUNPATH_SEARCH_PATHS = ( @@ -386,6 +396,7 @@ ); MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap; PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios"; + OTHER_LDFLAGS = "-lstdc++"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; SKIP_INSTALL = YES; SWIFT_OPTIMIZATION_LEVEL = "-Onone"; @@ -406,6 +417,10 @@ DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; + FRAMEWORK_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); INFOPLIST_FILE = deepspeech_ios/Info.plist; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; LD_RUNPATH_SEARCH_PATHS = ( @@ -420,6 +435,7 @@ ); MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap; PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios"; + OTHER_LDFLAGS = "-lstdc++"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; SKIP_INSTALL = YES; SWIFT_VERSION = 5.0; diff --git a/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme b/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme index b3ba37050..b0b1b83f5 100644 --- a/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme +++ b/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme @@ -44,6 +44,7 @@ buildConfiguration = "Debug" selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + disableMainThreadChecker = "YES" launchStyle = "0" useCustomWorkingDirectory = "NO" ignoresPersistentStateOnLaunch = "NO" diff --git a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj index a57f983c5..ea1c51f95 100644 --- a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj +++ b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj @@ -11,8 +11,6 @@ 504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; }; 504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; }; - 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; }; - 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 50F787F32497683900D52237 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F22497683900D52237 /* AppDelegate.swift */; }; 50F787F52497683900D52237 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F42497683900D52237 /* SceneDelegate.swift */; }; 50F787F72497683900D52237 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F62497683900D52237 /* ContentView.swift */; }; @@ -48,7 +46,6 @@ dstSubfolderSpec = 10; files = ( 504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */, - 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */, ); name = "Embed Frameworks"; runOnlyForDeploymentPostprocessing = 0; @@ -81,7 +78,6 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */, 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift b/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift index b3a4ac9bf..35dfe0a6d 100644 --- a/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift +++ b/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift @@ -26,8 +26,8 @@ class SpeechRecognitionImpl : NSObject, AVCaptureAudioDataOutputSampleBufferDele private var audioData = Data() override init() { - let modelPath = Bundle.main.path(forResource: "deepspeech-0.7.4-models", ofType: "tflite")! - let scorerPath = Bundle.main.path(forResource: "deepspeech-0.7.4-models", ofType: "scorer")! + let modelPath = Bundle.main.path(forResource: "deepspeech-0.9.1-models", ofType: "tflite")! + let scorerPath = Bundle.main.path(forResource: "deepspeech-0.9.1-models", ofType: "scorer")! model = try! DeepSpeechModel(modelPath: modelPath) try! model.enableExternalScorer(scorerPath: scorerPath) diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index 282f8c32a..38ace0906 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -9,7 +9,7 @@ source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" -//native_client:libdeepspeech.so +//native_client:deepspeech_ios " if [ "${arch}" = "--arm64" ]; then diff --git a/taskcluster/ios-package.sh b/taskcluster/ios-package.sh index 16cc9f961..9c688a65e 100755 --- a/taskcluster/ios-package.sh +++ b/taskcluster/ios-package.sh @@ -10,10 +10,6 @@ mkdir -p ${TASKCLUSTER_ARTIFACTS} || true cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/ -package_native_client "native_client.tar.xz" - -package_libdeepspeech_as_zip "libdeepspeech.zip" - case $arch in "--x86_64") release_folder="Release-iphonesimulator" diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index b1077adac..e823175ca 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -340,7 +340,7 @@ do_nuget_build() do_deepspeech_ios_framework_build() { arch=$1 - cp ${DS_TFDIR}/bazel-bin/native_client/libdeepspeech.so ${DS_DSDIR}/native_client/swift/libdeepspeech.so + unzip ${DS_TFDIR}/bazel-bin/native_client/deepspeech_ios.zip -d ${DS_DSDIR}/native_client/swift cd ${DS_DSDIR}/native_client/swift case $arch in "--x86_64") From c6318859df210da5c997720ff02ec23b91ae2f77 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Fri, 4 Dec 2020 11:40:48 +0200 Subject: [PATCH 50/62] Re-add missing TF flags to deepspeech_bundle library --- native_client/BUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/native_client/BUILD b/native_client/BUILD index c98d86a3d..d25454a1e 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -1,6 +1,6 @@ # Description: Deepspeech native client library. -load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_shared_object", "tf_copts", "lrt_if_needed") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps") load("@build_bazel_rules_apple//apple:ios.bzl", "ios_static_framework") @@ -131,7 +131,7 @@ cc_library( "tfmodelstate.cc", ], }), - copts = select({ + copts = tf_copts() + select({ # -fvisibility=hidden is not required on Windows, MSCV hides all declarations by default "//tensorflow:windows": ["/w"], # -Wno-sign-compare to silent a lot of warnings from tensorflow itself, @@ -144,7 +144,7 @@ cc_library( "//native_client:tflite": ["-DUSE_TFLITE"], "//conditions:default": ["-UUSE_TFLITE"], }) + tflite_copts(), - linkopts = select({ + linkopts = lrt_if_needed() + select({ "//tensorflow:macos": [], "//tensorflow:ios": ["-fembed-bitcode"], "//tensorflow:linux_x86_64": LINUX_LINKOPTS, From f822b04e1bdd956fdb5ca4bc797f7646a95b4bb5 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 7 Dec 2020 10:02:31 +0200 Subject: [PATCH 51/62] Branding cleanup Remove Mozilla trademarks. --- BIBLIOGRAPHY.md | 2 +- doc/TRAINING.rst | 2 +- doc/conf.py | 6 +++--- .../dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs | 2 +- native_client/dotnet/README.rst | 2 +- native_client/dotnet/nupkg/deepspeech.nuspec.in | 6 +++--- native_client/java/libdeepspeech/build.gradle | 2 +- native_client/javascript/package.json.in | 2 +- native_client/swift/deepspeech-ios.podspec | 2 +- setup.py | 4 ++-- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/BIBLIOGRAPHY.md b/BIBLIOGRAPHY.md index 19b14d27b..1d392a66a 100644 --- a/BIBLIOGRAPHY.md +++ b/BIBLIOGRAPHY.md @@ -1,5 +1,5 @@ This file contains a list of papers in chronological order that have been published -using Mozilla's DeepSpeech. +using DeepSpeech. To appear ========== diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 7eed6b35a..29671d0c1 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -245,7 +245,7 @@ N.B. - If you have access to a pre-trained model which uses UTF-8 bytes at the o Fine-Tuning (same alphabet) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you'd like to use one of the pre-trained models released by Mozilla to bootstrap your training process (fine tuning), you can do so by using the ``--checkpoint_dir`` flag in ``DeepSpeech.py``. Specify the path where you downloaded the checkpoint from the release, and training will resume from the pre-trained model. +If you'd like to use one of the pre-trained models to bootstrap your training process (fine tuning), you can do so by using the ``--checkpoint_dir`` flag in ``DeepSpeech.py``. Specify the path where you downloaded the checkpoint from the release, and training will resume from the pre-trained model. For example, if you want to fine tune the entire graph using your own data in ``my-train.csv``\ , ``my-dev.csv`` and ``my-test.csv``\ , for three epochs, you can something like the following, tuning the hyperparameters as needed: diff --git a/doc/conf.py b/doc/conf.py index a74fff706..401ba08bc 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -46,8 +46,8 @@ # -- Project information ----------------------------------------------------- project = u'DeepSpeech' -copyright = '2019-2020, Mozilla Corporation' -author = 'Mozilla Corporation' +copyright = '2019-2020 Mozilla Corporation, 2020 DeepSpeech authors' +author = 'DeepSpeech authors' with open('../VERSION', 'r') as ver: v = ver.read().strip() @@ -175,7 +175,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'DeepSpeech.tex', u'DeepSpeech Documentation', - u'Mozilla Research', 'manual'), + u'DeepSpeech authors', 'manual'), ] diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs index 344b758e7..fca21a576 100644 --- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs @@ -5,7 +5,7 @@ namespace DeepSpeechClient.Interfaces { /// - /// Client interface of Mozilla's DeepSpeech implementation. + /// Client interface for DeepSpeech /// public interface IDeepSpeech : IDisposable { diff --git a/native_client/dotnet/README.rst b/native_client/dotnet/README.rst index b10255736..7c9010c23 100644 --- a/native_client/dotnet/README.rst +++ b/native_client/dotnet/README.rst @@ -61,7 +61,7 @@ There should already be a symbolic link, for this example let's suppose that we ├── D:\ │ ├── cloned # Contains DeepSpeech and tensorflow side by side │ │ └── DeepSpeech # Root of the cloned DeepSpeech - │ │ ├── tensorflow # Root of the cloned Mozilla's tensorflow + │ │ ├── tensorflow # Root of the cloned mozilla/tensorflow └── ... diff --git a/native_client/dotnet/nupkg/deepspeech.nuspec.in b/native_client/dotnet/nupkg/deepspeech.nuspec.in index a4797177c..68e192c8c 100644 --- a/native_client/dotnet/nupkg/deepspeech.nuspec.in +++ b/native_client/dotnet/nupkg/deepspeech.nuspec.in @@ -4,13 +4,13 @@ $NUPKG_ID $NUPKG_VERSION DeepSpeech - Mozilla - Mozilla + DeepSpeech authors + DeepSpeech authors MPL-2.0 http://github.com/mozilla/DeepSpeech false A library for running inference with a DeepSpeech model - Copyright (c) 2019 Mozilla Corporation + Copyright (c) 2019-2020 Mozilla Corporation, 2020 DeepSpeech authors native speech speech_recognition diff --git a/native_client/java/libdeepspeech/build.gradle b/native_client/java/libdeepspeech/build.gradle index b4740333a..ab5bc3106 100644 --- a/native_client/java/libdeepspeech/build.gradle +++ b/native_client/java/libdeepspeech/build.gradle @@ -96,7 +96,7 @@ uploadArchives { developers { developer { id 'deepspeech' - name 'Mozilla DeepSpeech Team' + name 'DeepSpeech authors' email 'deepspeechs@lists.mozilla.org' } } diff --git a/native_client/javascript/package.json.in b/native_client/javascript/package.json.in index 42edc3c16..2494577f3 100644 --- a/native_client/javascript/package.json.in +++ b/native_client/javascript/package.json.in @@ -7,7 +7,7 @@ "bin": { "deepspeech": "./client.js" }, - "author" : "Mozilla", + "author" : "DeepSpeech authors", "license": "MPL-2.0", "homepage": "https://github.com/mozilla/DeepSpeech/tree/v$(PROJECT_VERSION)#project-deepspeech", "files": [ diff --git a/native_client/swift/deepspeech-ios.podspec b/native_client/swift/deepspeech-ios.podspec index d56d806aa..ad9eccf10 100644 --- a/native_client/swift/deepspeech-ios.podspec +++ b/native_client/swift/deepspeech-ios.podspec @@ -7,7 +7,7 @@ Pod::Spec.new do |s| s.summary = "DeepSpeech" s.homepage = "https://github.com/mozilla/DeepSpeech" s.license = "Mozilla Public License 2.0" - s.authors = "Mozilla et al." + s.authors = "DeepSpeech authors" s.platforms = { :ios => "9.0" } s.source = { :git => "https://github.com/mozilla/DeepSpeech.git", :tag => "v#{s.version}" } diff --git a/setup.py b/setup.py index 16c17eb7c..da7052622 100644 --- a/setup.py +++ b/setup.py @@ -98,9 +98,9 @@ def main(): setup( name='deepspeech_training', version=version, - description='Training code for mozilla DeepSpeech', + description='Training code for DeepSpeech', url='https://github.com/mozilla/DeepSpeech', - author='Mozilla', + author='DeepSpeech authors', license='MPL-2.0', # Classifiers help users find your project by categorizing it. # From da0209de012e335f244f7248ed7d111fbd7a2bdb Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 7 Dec 2020 10:18:56 +0200 Subject: [PATCH 52/62] Remove trademark from Java binding package names --- native_client/java/Makefile | 2 +- native_client/java/app/build.gradle | 2 +- .../org/mozilla/deepspeech/ExampleInstrumentedTest.java | 4 ++-- native_client/java/app/src/main/AndroidManifest.xml | 2 +- .../java/org/mozilla/deepspeech/DeepSpeechActivity.java | 4 ++-- .../test/java/org/mozilla/deepspeech/ExampleUnitTest.java | 2 +- native_client/java/libdeepspeech/build.gradle | 4 ++-- .../deepspeech/libdeepspeech/test/BasicTest.java | 8 ++++---- .../java/libdeepspeech/src/main/AndroidManifest.xml | 2 +- .../deepspeech/libdeepspeech/DeepSpeechModel.java | 2 +- .../libdeepspeech/DeepSpeechStreamingState.java | 2 +- .../deepspeech/libdeepspeech_doc/CandidateTranscript.java | 2 +- .../libdeepspeech_doc/DeepSpeech_Error_Codes.java | 2 +- .../deepspeech/libdeepspeech_doc/Metadata.java | 2 +- .../{mozilla => }/deepspeech/libdeepspeech_doc/README.rst | 2 +- .../deepspeech/libdeepspeech_doc/TokenMetadata.java | 2 +- .../deepspeech/libdeepspeech/ExampleUnitTest.java | 2 +- 17 files changed, 23 insertions(+), 23 deletions(-) rename native_client/java/libdeepspeech/src/androidTest/java/org/{mozilla => }/deepspeech/libdeepspeech/test/BasicTest.java (95%) rename native_client/java/libdeepspeech/src/main/java/org/{mozilla => }/deepspeech/libdeepspeech/DeepSpeechModel.java (99%) rename native_client/java/libdeepspeech/src/main/java/org/{mozilla => }/deepspeech/libdeepspeech/DeepSpeechStreamingState.java (85%) rename native_client/java/libdeepspeech/src/main/java/org/{mozilla => }/deepspeech/libdeepspeech_doc/CandidateTranscript.java (97%) rename native_client/java/libdeepspeech/src/main/java/org/{mozilla => }/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java (98%) rename native_client/java/libdeepspeech/src/main/java/org/{mozilla => }/deepspeech/libdeepspeech_doc/Metadata.java (97%) rename native_client/java/libdeepspeech/src/main/java/org/{mozilla => }/deepspeech/libdeepspeech_doc/README.rst (51%) rename native_client/java/libdeepspeech/src/main/java/org/{mozilla => }/deepspeech/libdeepspeech_doc/TokenMetadata.java (97%) rename native_client/java/libdeepspeech/src/test/java/org/{mozilla => }/deepspeech/libdeepspeech/ExampleUnitTest.java (88%) diff --git a/native_client/java/Makefile b/native_client/java/Makefile index 191b1013e..904936215 100644 --- a/native_client/java/Makefile +++ b/native_client/java/Makefile @@ -28,4 +28,4 @@ maven-bundle: apk $(GRADLE) zipMavenArtifacts bindings: clean ds-swig - $(DS_SWIG_ENV) swig -c++ -java -package org.mozilla.deepspeech.libdeepspeech -outdir libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/ -o jni/deepspeech_wrap.cpp jni/deepspeech.i + $(DS_SWIG_ENV) swig -c++ -java -package org.deepspeech.libdeepspeech -outdir libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/ -o jni/deepspeech_wrap.cpp jni/deepspeech.i diff --git a/native_client/java/app/build.gradle b/native_client/java/app/build.gradle index c1aed496a..3b5b124aa 100644 --- a/native_client/java/app/build.gradle +++ b/native_client/java/app/build.gradle @@ -4,7 +4,7 @@ android { compileSdkVersion 27 defaultConfig { - applicationId "org.mozilla.deepspeech" + applicationId "org.deepspeech" minSdkVersion 21 targetSdkVersion 27 versionName androidGitVersion.name() diff --git a/native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java b/native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java index 6c3e7f91f..ea6458a13 100644 --- a/native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java +++ b/native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java @@ -1,4 +1,4 @@ -package org.mozilla.deepspeech; +package org.deepspeech; import android.content.Context; import android.support.test.InstrumentationRegistry; @@ -21,6 +21,6 @@ public void useAppContext() { // Context of the app under test. Context appContext = InstrumentationRegistry.getTargetContext(); - assertEquals("org.mozilla.deepspeech", appContext.getPackageName()); + assertEquals("org.deepspeech", appContext.getPackageName()); } } diff --git a/native_client/java/app/src/main/AndroidManifest.xml b/native_client/java/app/src/main/AndroidManifest.xml index 0702cc107..668ef13f9 100644 --- a/native_client/java/app/src/main/AndroidManifest.xml +++ b/native_client/java/app/src/main/AndroidManifest.xml @@ -1,6 +1,6 @@ + package="org.deepspeech"> + package="org.deepspeech.libdeepspeech" /> diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/DeepSpeechModel.java similarity index 99% rename from native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java rename to native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/DeepSpeechModel.java index ce313d20d..5fd85b520 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java +++ b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/DeepSpeechModel.java @@ -1,4 +1,4 @@ -package org.mozilla.deepspeech.libdeepspeech; +package org.deepspeech.libdeepspeech; /** * @brief Exposes a DeepSpeech model in Java diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechStreamingState.java b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/DeepSpeechStreamingState.java similarity index 85% rename from native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechStreamingState.java rename to native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/DeepSpeechStreamingState.java index 9bced2547..cd9aafe42 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechStreamingState.java +++ b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/DeepSpeechStreamingState.java @@ -1,4 +1,4 @@ -package org.mozilla.deepspeech.libdeepspeech; +package org.deepspeech.libdeepspeech; public final class DeepSpeechStreamingState { private SWIGTYPE_p_StreamingState _sp; diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/CandidateTranscript.java b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/CandidateTranscript.java similarity index 97% rename from native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/CandidateTranscript.java rename to native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/CandidateTranscript.java index 72dc0f1f4..53a0ef643 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/CandidateTranscript.java +++ b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/CandidateTranscript.java @@ -6,7 +6,7 @@ * the SWIG interface file instead. * ----------------------------------------------------------------------------- */ -package org.mozilla.deepspeech.libdeepspeech; +package org.deepspeech.libdeepspeech; /** * A single transcript computed by the model, including a confidence
diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java similarity index 98% rename from native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java rename to native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java index 3fad4553b..0a05439d8 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java +++ b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java @@ -6,7 +6,7 @@ * the SWIG interface file instead. * ----------------------------------------------------------------------------- */ -package org.mozilla.deepspeech.libdeepspeech; +package org.deepspeech.libdeepspeech; public enum DeepSpeech_Error_Codes { ERR_OK(0x0000), diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/Metadata.java b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/Metadata.java similarity index 97% rename from native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/Metadata.java rename to native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/Metadata.java index 9ef25e1ea..b85fc82e3 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/Metadata.java +++ b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/Metadata.java @@ -6,7 +6,7 @@ * the SWIG interface file instead. * ----------------------------------------------------------------------------- */ -package org.mozilla.deepspeech.libdeepspeech; +package org.deepspeech.libdeepspeech; /** * An array of CandidateTranscript objects computed by the model. diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/README.rst b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/README.rst similarity index 51% rename from native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/README.rst rename to native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/README.rst index bd89f9b82..0181ab312 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/README.rst +++ b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/README.rst @@ -8,4 +8,4 @@ To update, please install SWIG (4.0 at least) and then run from native_client/ja .. code-block:: - swig -c++ -java -doxygen -package org.mozilla.deepspeech.libdeepspeech -outdir libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc -o jni/deepspeech_wrap.cpp jni/deepspeech.i + swig -c++ -java -doxygen -package org.deepspeech.libdeepspeech -outdir libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc -o jni/deepspeech_wrap.cpp jni/deepspeech.i diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/TokenMetadata.java b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/TokenMetadata.java similarity index 97% rename from native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/TokenMetadata.java rename to native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/TokenMetadata.java index 6f976f214..45ed90523 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/TokenMetadata.java +++ b/native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/TokenMetadata.java @@ -6,7 +6,7 @@ * the SWIG interface file instead. * ----------------------------------------------------------------------------- */ -package org.mozilla.deepspeech.libdeepspeech; +package org.deepspeech.libdeepspeech; /** * Stores text of an individual token, along with its timing information diff --git a/native_client/java/libdeepspeech/src/test/java/org/mozilla/deepspeech/libdeepspeech/ExampleUnitTest.java b/native_client/java/libdeepspeech/src/test/java/org/deepspeech/libdeepspeech/ExampleUnitTest.java similarity index 88% rename from native_client/java/libdeepspeech/src/test/java/org/mozilla/deepspeech/libdeepspeech/ExampleUnitTest.java rename to native_client/java/libdeepspeech/src/test/java/org/deepspeech/libdeepspeech/ExampleUnitTest.java index 793cbf711..862d6bd0d 100644 --- a/native_client/java/libdeepspeech/src/test/java/org/mozilla/deepspeech/libdeepspeech/ExampleUnitTest.java +++ b/native_client/java/libdeepspeech/src/test/java/org/deepspeech/libdeepspeech/ExampleUnitTest.java @@ -1,4 +1,4 @@ -package org.mozilla.deepspeech.libdeepspeech; +package org.deepspeech.libdeepspeech; import org.junit.Test; From c7ce999e02aceb0bb7c496be185f3dae2eab62fd Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 7 Dec 2020 10:20:02 +0200 Subject: [PATCH 53/62] Remove trademark from Swift binding project identifier --- .../swift/deepspeech_ios.xcodeproj/project.pbxproj | 8 ++++---- .../deepspeech_ios_test.xcodeproj/project.pbxproj | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj index d400f5918..215a8ae2c 100644 --- a/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj @@ -395,7 +395,7 @@ "$(PROJECT_DIR)", ); MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap; - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios"; OTHER_LDFLAGS = "-lstdc++"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; SKIP_INSTALL = YES; @@ -434,7 +434,7 @@ "$(PROJECT_DIR)", ); MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap; - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios"; OTHER_LDFLAGS = "-lstdc++"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; SKIP_INSTALL = YES; @@ -454,7 +454,7 @@ "@executable_path/Frameworks", "@loader_path/Frameworks", ); - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-iosTests"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-iosTests"; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; @@ -472,7 +472,7 @@ "@executable_path/Frameworks", "@loader_path/Frameworks", ); - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-iosTests"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-iosTests"; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; diff --git a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj index ea1c51f95..524126c86 100644 --- a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj +++ b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj @@ -486,7 +486,7 @@ "$(inherited)", "$(PROJECT_DIR)", ); - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-test"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-test"; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; @@ -511,7 +511,7 @@ "$(inherited)", "$(PROJECT_DIR)", ); - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-test"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-test"; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; @@ -531,7 +531,7 @@ "@executable_path/Frameworks", "@loader_path/Frameworks", ); - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testTests"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-testTests"; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; @@ -552,7 +552,7 @@ "@executable_path/Frameworks", "@loader_path/Frameworks", ); - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testTests"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-testTests"; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; @@ -571,7 +571,7 @@ "@executable_path/Frameworks", "@loader_path/Frameworks", ); - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testUITests"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-testUITests"; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; @@ -590,7 +590,7 @@ "@executable_path/Frameworks", "@loader_path/Frameworks", ); - PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testUITests"; + PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-testUITests"; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; From f6ddc4f72ca3cf5a85d5797b185a4c034ddee9ce Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 7 Dec 2020 10:53:21 +0200 Subject: [PATCH 54/62] Add some guidelines for conda environments for training --- doc/TRAINING.rst | 20 ++++++++++++++++++-- doc/examples | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 7eed6b35a..d5b1c7a95 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -15,17 +15,19 @@ Prerequisites for training a model Getting the training code ^^^^^^^^^^^^^^^^^^^^^^^^^ -Clone the latest released stable branch from Github (e.g. 0.8.2, check `here `_): +Clone the latest released stable branch from Github (e.g. 0.9.2, check `here `_): .. code-block:: bash - git clone --branch v0.8.2 https://github.com/mozilla/DeepSpeech + git clone --branch v0.9.2 https://github.com/mozilla/DeepSpeech If you plan on committing code or you want to report bugs, please use the master branch. Creating a virtual environment ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Throughout the documentation we assume you are using **virtualenv** to manage your Python environments. This setup is the one used and recommended by the project authors and is the easiest way to make sure you won't run into environment issues. If you're using **Anaconda, Miniconda or Mamba**, first read the instructions at :ref:`training-with-conda` and then continue from the installation step below. + In creating a virtual environment you will create a directory containing a ``python3`` binary and everything needed to run deepspeech. You can use whatever directory you want. For the purpose of the documentation, we will rely on ``$HOME/tmp/deepspeech-train-venv``. You can create it using this command: .. code-block:: @@ -527,3 +529,17 @@ Example of creating a pre-augmented test set: --augment overlay[source=noise.sdb,layers=1,snr=20~10] \ --augment resample[rate=12000:8000~4000] \ test.sdb test-augmented.sdb + +.. _training-with-conda: + +Training from an Anaconda or miniconda environment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Keep in mind that none of the core authors use Anaconda or miniconda, so this setup is not guaranteed to work. If you experience problems, try using a non-conda setup first. We're happy to accept pull requests fixing any incompatibilities with conda setups, but we will not offer any support ourselves beyond reviewing pull requests. + +To prevent common problems, make sure you **always use a separate environment when setting things up for training**: + +.. code-block:: bash + + (base) $ conda create -n deepspeech python=3.7 + (base) $ conda activate deepspeech diff --git a/doc/examples b/doc/examples index 6f5f501fa..59388e116 160000 --- a/doc/examples +++ b/doc/examples @@ -1 +1 @@ -Subproject commit 6f5f501fa62743f1b78fe162eb1a579a450bd38f +Subproject commit 59388e116b70440a772c32d09977f5295e3d69c4 From 6640cf2341b7fc5252ebd060ff14c3b65f960e28 Mon Sep 17 00:00:00 2001 From: Catalin Voss Date: Mon, 7 Dec 2020 04:07:34 -0800 Subject: [PATCH 55/62] Remote training I/O once more (#3437) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Redo remote I/O changes once more; this time without messing with taskcluster * Add bin changes * Fix merge-induced issue? * For the interleaved case with multiple collections, unpack audio on the fly To reproduce the previous failure rm data/smoke_test/ldc93s1.csv rm data/smoke_test/ldc93s1.sdb rm -rf /tmp/ldc93s1_cache_sdb_csv rm -rf /tmp/ckpt_sdb_csv rm -rf /tmp/train_sdb_csv ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 16000 python -u DeepSpeech.py --noshow_progressbar --noearly_stop --train_files ./data/smoke_test/ldc93s1.sdb,./data/smoke_test/ldc93s1.csv --train_batch_size 1 --feature_cache /tmp/ldc93s1_cache_sdb_csv --dev_files ./data/smoke_test/ldc93s1.sdb,./data/smoke_test/ldc93s1.csv --dev_batch_size 1 --test_files ./data/smoke_test/ldc93s1.sdb,./data/smoke_test/ldc93s1.csv --test_batch_size 1 --n_hidden 100 --epochs 109 --max_to_keep 1 --checkpoint_dir /tmp/ckpt_sdb_csv --learning_rate 0.001 --dropout_rate 0.05 --export_dir /tmp/train_sdb_csv --scorer_path data/smoke_test/pruned_lm.scorer --audio_sample_rate 16000 * Attempt to preserve length information with a wrapper around `map()`… this gets pretty python-y * Call the right `__next__()` * Properly implement the rest of the map wrappers here…… * Fix trailing whitespace situation and other linter complaints * Remove data accidentally checked in * Fix overlay augmentations * Wavs must be open in rb mode if we're passing in an external file pointer -- this confused me * Lint whitespace * Revert "Fix trailing whitespace situation and other linter complaints" This reverts commit c3c45397a2f98e9b00d00c18c4ced4fc52475032. * Fix linter issue but without such an aggressive diff * Move unpack_maybe into sample_collections * Use unpack_maybe in place of duplicate lambda * Fix confusing comment * Add clarifying comment for on-the-fly unpacking --- bin/compare_samples.py | 4 +- training/deepspeech_training/train.py | 28 +++--- training/deepspeech_training/util/audio.py | 42 ++++++--- .../deepspeech_training/util/augmentations.py | 14 ++- .../util/check_characters.py | 5 +- training/deepspeech_training/util/config.py | 4 +- .../deepspeech_training/util/downloader.py | 8 +- .../util/evaluate_tools.py | 4 +- training/deepspeech_training/util/helpers.py | 26 ++++++ training/deepspeech_training/util/io.py | 81 +++++++++++++++++ .../util/sample_collections.py | 88 ++++++++++++++----- 11 files changed, 249 insertions(+), 55 deletions(-) create mode 100644 training/deepspeech_training/util/io.py diff --git a/bin/compare_samples.py b/bin/compare_samples.py index 94108a7a0..27898cd1e 100755 --- a/bin/compare_samples.py +++ b/bin/compare_samples.py @@ -15,8 +15,8 @@ def fail(message): def compare_samples(): - sample1 = load_sample(CLI_ARGS.sample1) - sample2 = load_sample(CLI_ARGS.sample2) + sample1 = load_sample(CLI_ARGS.sample1).unpack() + sample2 = load_sample(CLI_ARGS.sample2).unpack() if sample1.audio_format != sample2.audio_format: fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format)) if sample1.duration != sample2.duration: diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 8bf7a3545..94ca7c04d 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -35,6 +35,7 @@ from .util.flags import create_flags, FLAGS from .util.helpers import check_ctcdecoder_version, ExceptionBox from .util.logging import create_progressbar, log_debug, log_error, log_info, log_progress, log_warn +from .util.io import open_remote, remove_remote, listdir_remote, is_remote_path, isdir_remote check_ctcdecoder_version() @@ -512,9 +513,10 @@ def train(): best_dev_path = os.path.join(FLAGS.save_checkpoint_dir, 'best_dev') # Save flags next to checkpoints - os.makedirs(FLAGS.save_checkpoint_dir, exist_ok=True) + if not is_remote_path(FLAGS.save_checkpoint_dir): + os.makedirs(FLAGS.save_checkpoint_dir, exist_ok=True) flags_file = os.path.join(FLAGS.save_checkpoint_dir, 'flags.txt') - with open(flags_file, 'w') as fout: + with open_remote(flags_file, 'w') as fout: fout.write(FLAGS.flags_into_string()) with tfv1.Session(config=Config.session_config) as session: @@ -541,7 +543,7 @@ def run_set(set_name, epoch, init_op, dataset=None): feature_cache_index = FLAGS.feature_cache + '.index' if epoch % FLAGS.cache_for_epochs == 0 and os.path.isfile(feature_cache_index): log_info('Invalidating feature cache') - os.remove(feature_cache_index) # this will let TF also overwrite the related cache data files + remove_remote(feature_cache_index) # this will let TF also overwrite the related cache data files # Setup progress bar class LossWidget(progressbar.widgets.FormatLabel): @@ -810,13 +812,13 @@ def export(): output_filename = FLAGS.export_file_name + '.pb' if FLAGS.remove_export: - if os.path.isdir(FLAGS.export_dir): + if isdir_remote(FLAGS.export_dir): log_info('Removing old export') - shutil.rmtree(FLAGS.export_dir) + remove_remote(FLAGS.export_dir) output_graph_path = os.path.join(FLAGS.export_dir, output_filename) - if not os.path.isdir(FLAGS.export_dir): + if not is_remote_path(FLAGS.export_dir) and not os.path.isdir(FLAGS.export_dir): os.makedirs(FLAGS.export_dir) frozen_graph = tfv1.graph_util.convert_variables_to_constants( @@ -829,7 +831,7 @@ def export(): dest_nodes=output_names) if not FLAGS.export_tflite: - with open(output_graph_path, 'wb') as fout: + with open_remote(output_graph_path, 'wb') as fout: fout.write(frozen_graph.SerializeToString()) else: output_tflite_path = os.path.join(FLAGS.export_dir, output_filename.replace('.pb', '.tflite')) @@ -840,7 +842,7 @@ def export(): converter.allow_custom_ops = True tflite_model = converter.convert() - with open(output_tflite_path, 'wb') as fout: + with open_remote(output_tflite_path, 'wb') as fout: fout.write(tflite_model) log_info('Models exported at %s' % (FLAGS.export_dir)) @@ -851,7 +853,7 @@ def export(): FLAGS.export_model_version)) model_runtime = 'tflite' if FLAGS.export_tflite else 'tensorflow' - with open(metadata_fname, 'w') as f: + with open_remote(metadata_fname, 'w') as f: f.write('---\n') f.write('author: {}\n'.format(FLAGS.export_author_id)) f.write('model_name: {}\n'.format(FLAGS.export_model_name)) @@ -873,8 +875,12 @@ def export(): def package_zip(): # --export_dir path/to/export/LANG_CODE/ => path/to/export/LANG_CODE.zip export_dir = os.path.join(os.path.abspath(FLAGS.export_dir), '') # Force ending '/' - zip_filename = os.path.dirname(export_dir) + if is_remote_path(export_dir): + log_error("Cannot package remote path zip %s. Please do this manually." % export_dir) + return + zip_filename = os.path.dirname(export_dir) + shutil.copy(FLAGS.scorer_path, export_dir) archive = shutil.make_archive(zip_filename, 'zip', export_dir) @@ -959,7 +965,7 @@ def main(_): tfv1.reset_default_graph() FLAGS.export_tflite = True - if os.listdir(FLAGS.export_dir): + if listdir_remote(FLAGS.export_dir): log_error('Directory {} is not empty, please fix this.'.format(FLAGS.export_dir)) sys.exit(1) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 031f13ed6..793182687 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -8,6 +8,7 @@ from .helpers import LimitingPool from collections import namedtuple +from .io import open_remote, remove_remote, copy_remote, is_remote_path AudioFormat = namedtuple('AudioFormat', 'rate channels width') @@ -117,15 +118,19 @@ def change_audio_type(self, new_audio_type, bitrate=None): self.audio_type = new_audio_type -def _change_audio_type(sample_and_audio_type): - sample, audio_type, bitrate = sample_and_audio_type +def _unpack_and_change_audio_type(sample_and_audio_type): + packed_sample, audio_type, bitrate = sample_and_audio_type + if hasattr(packed_sample, 'unpack'): + sample = packed_sample.unpack() + else: + sample = packed_sample sample.change_audio_type(audio_type, bitrate=bitrate) return sample -def change_audio_types(samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None): +def change_audio_types(packed_samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None): with LimitingPool(processes=processes, process_ahead=process_ahead) as pool: - yield from pool.imap(_change_audio_type, map(lambda s: (s, audio_type, bitrate), samples)) + yield from pool.imap(_unpack_and_change_audio_type, map(lambda s: (s, audio_type, bitrate), packed_samples)) def get_audio_type_from_extension(ext): @@ -168,29 +173,45 @@ def __init__(self, audio_path, as_path=False, audio_format=DEFAULT_FORMAT): self.audio_format = audio_format self.as_path = as_path self.open_file = None + self.open_wav = None self.tmp_file_path = None + self.tmp_src_file_path = None def __enter__(self): if self.audio_path.endswith('.wav'): - self.open_file = wave.open(self.audio_path, 'r') - if read_audio_format_from_wav_file(self.open_file) == self.audio_format: + self.open_file = open_remote(self.audio_path, 'rb') + self.open_wav = wave.open(self.open_file) + if read_audio_format_from_wav_file(self.open_wav) == self.audio_format: if self.as_path: + self.open_wav.close() self.open_file.close() return self.audio_path - return self.open_file + return self.open_wav + self.open_wav.close() self.open_file.close() + + # If the format isn't right, copy the file to local tmp dir and do the conversion on disk + if is_remote_path(self.audio_path): + _, self.tmp_src_file_path = tempfile.mkstemp(suffix='.wav') + copy_remote(self.audio_path, self.tmp_src_file_path) + self.audio_path = self.tmp_file_path + _, self.tmp_file_path = tempfile.mkstemp(suffix='.wav') convert_audio(self.audio_path, self.tmp_file_path, file_type='wav', audio_format=self.audio_format) if self.as_path: return self.tmp_file_path - self.open_file = wave.open(self.tmp_file_path, 'r') - return self.open_file + self.open_wav = wave.open(self.tmp_file_path, 'rb') + return self.open_wav def __exit__(self, *args): if not self.as_path: - self.open_file.close() + self.open_wav.close() + if self.open_file: + self.open_file.close() if self.tmp_file_path is not None: os.remove(self.tmp_file_path) + if self.tmp_src_file_path is not None: + os.remove(self.tmp_src_file_path) def read_frames(wav_file, frame_duration_ms=30, yield_remainder=False): @@ -320,6 +341,7 @@ def read_opus(opus_file): def write_wav(wav_file, pcm_data, audio_format=DEFAULT_FORMAT): + # wav_file is already a file-pointer here with wave.open(wav_file, 'wb') as wav_file_writer: wav_file_writer.setframerate(audio_format.rate) wav_file_writer.setnchannels(audio_format.channels) diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 941c17f2b..2422582cd 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -8,7 +8,7 @@ from multiprocessing import Queue, Process from .audio import gain_db_to_ratio, max_dbfs, normalize_audio, AUDIO_TYPE_NP, AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS from .helpers import LimitingPool, int_range, float_range, pick_value_from_range, tf_pick_value_from_range, MEGABYTE -from .sample_collections import samples_from_source +from .sample_collections import samples_from_source, unpack_maybe BUFFER_SIZE = 1 * MEGABYTE SPEC_PARSER = re.compile(r'^(?P[a-z_]+)(\[(?P.*)\])?$') @@ -150,6 +150,12 @@ def _init_augmentation_worker(preparation_context): AUGMENTATION_CONTEXT = preparation_context +def _load_and_augment_sample(timed_sample, context=None): + sample, clock = timed_sample + realized_sample = unpack_maybe(sample) + return _augment_sample((realized_sample, clock), context) + + def _augment_sample(timed_sample, context=None): context = AUGMENTATION_CONTEXT if context is None else context sample, clock = timed_sample @@ -213,12 +219,12 @@ def timed_samples(): context = AugmentationContext(audio_type, augmentations) if process_ahead == 0: for timed_sample in timed_samples(): - yield _augment_sample(timed_sample, context=context) + yield _load_and_augment_sample(timed_sample, context=context) else: with LimitingPool(process_ahead=process_ahead, initializer=_init_augmentation_worker, initargs=(context,)) as pool: - yield from pool.imap(_augment_sample, timed_samples()) + yield from pool.imap(_load_and_augment_sample, timed_samples()) finally: for augmentation in augmentations: augmentation.stop() @@ -256,6 +262,7 @@ def start(self, buffering=BUFFER_SIZE): self.enqueue_process.start() def apply(self, sample, clock=0.0): + sample = unpack_maybe(sample) sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) n_layers = pick_value_from_range(self.layers, clock=clock) audio = sample.audio @@ -265,6 +272,7 @@ def apply(self, sample, clock=0.0): while overlay_offset < len(audio): if self.current_sample is None: next_overlay_sample = self.queue.get() + next_overlay_sample = unpack_maybe(next_overlay_sample) next_overlay_sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) self.current_sample = next_overlay_sample.audio n_required = len(audio) - overlay_offset diff --git a/training/deepspeech_training/util/check_characters.py b/training/deepspeech_training/util/check_characters.py index f155b4ac0..7e6cdd0bd 100644 --- a/training/deepspeech_training/util/check_characters.py +++ b/training/deepspeech_training/util/check_characters.py @@ -19,6 +19,7 @@ import os import sys import unicodedata +from .io import open_remote def main(): parser = argparse.ArgumentParser() @@ -27,14 +28,14 @@ def main(): parser.add_argument("-alpha", "--alphabet-format", help="Bool. Print in format for alphabet.txt", action="store_true") parser.add_argument("-unicode", "--disable-unicode-variants", help="Bool. DISABLE check for unicode consistency (use with --alphabet-format)", action="store_true") args = parser.parse_args() - in_files = [os.path.abspath(i) for i in args.csv_files.split(",")] + in_files = args.csv_files.split(",") print("### Reading in the following transcript files: ###") print("### {} ###".format(in_files)) all_text = set() for in_file in in_files: - with open(in_file, "r") as csv_file: + with open_remote(in_file, "r") as csv_file: reader = csv.reader(csv_file) try: next(reader, None) # skip the file header (i.e. "transcript") diff --git a/training/deepspeech_training/util/config.py b/training/deepspeech_training/util/config.py index 0b9929e59..18da6eed1 100755 --- a/training/deepspeech_training/util/config.py +++ b/training/deepspeech_training/util/config.py @@ -13,7 +13,7 @@ from .logging import log_error, log_warn from .helpers import parse_file_size from .augmentations import parse_augmentations - +from .io import path_exists_remote class ConfigSingleton: _config = None @@ -139,7 +139,7 @@ def initialize_globals(): c.audio_step_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_step / 1000) if FLAGS.one_shot_infer: - if not os.path.exists(FLAGS.one_shot_infer): + if not path_exists_remote(FLAGS.one_shot_infer): log_error('Path specified in --one_shot_infer is not a valid file.') sys.exit(1) diff --git a/training/deepspeech_training/util/downloader.py b/training/deepspeech_training/util/downloader.py index a6d57e3e9..c527eb9b2 100644 --- a/training/deepspeech_training/util/downloader.py +++ b/training/deepspeech_training/util/downloader.py @@ -2,6 +2,7 @@ import progressbar from os import path, makedirs +from .io import open_remote, path_exists_remote, is_remote_path SIMPLE_BAR = ['Progress ', progressbar.Bar(), ' ', progressbar.Percentage(), ' completed'] @@ -9,17 +10,18 @@ def maybe_download(archive_name, target_dir, archive_url): # If archive file does not exist, download it... archive_path = path.join(target_dir, archive_name) - if not path.exists(target_dir): + if not is_remote_path(target_dir) and not path.exists(target_dir): print('No path "%s" - creating ...' % target_dir) makedirs(target_dir) - if not path.exists(archive_path): + if not path_exists_remote(archive_path): print('No archive "%s" - downloading...' % archive_path) req = requests.get(archive_url, stream=True) total_size = int(req.headers.get('content-length', 0)) done = 0 - with open(archive_path, 'wb') as f: + with open_remote(archive_path, 'wb') as f: bar = progressbar.ProgressBar(max_value=total_size if total_size > 0 else progressbar.UnknownLength, widgets=SIMPLE_BAR) + for data in req.iter_content(1024*1024): done += len(data) f.write(data) diff --git a/training/deepspeech_training/util/evaluate_tools.py b/training/deepspeech_training/util/evaluate_tools.py index 66fc82935..68d29f3ee 100644 --- a/training/deepspeech_training/util/evaluate_tools.py +++ b/training/deepspeech_training/util/evaluate_tools.py @@ -10,7 +10,7 @@ from .flags import FLAGS from .text import levenshtein - +from .io import open_remote def pmap(fun, iterable): pool = Pool() @@ -124,5 +124,5 @@ def save_samples_json(samples, output_path): We set ensure_ascii=True to prevent json from escaping non-ASCII chars in the texts. ''' - with open(output_path, 'w') as fout: + with open_remote(output_path, 'w') as fout: json.dump(samples, fout, default=float, ensure_ascii=False, indent=2) diff --git a/training/deepspeech_training/util/helpers.py b/training/deepspeech_training/util/helpers.py index 195c117e5..7545c8eea 100644 --- a/training/deepspeech_training/util/helpers.py +++ b/training/deepspeech_training/util/helpers.py @@ -78,6 +78,32 @@ def __len__(self): return self.len +class LenMap: + """ + Wrapper around python map() output object that preserves the original collection length + by implementing __len__. + """ + def __init__(self, fn, iterable): + try: + self.length = len(iterable) + except TypeError: + self.length = None + self.mapobj = map(fn, iterable) + + def __iter__(self): + self.mapobj = self.mapobj.__iter__() + return self + + def __next__(self): + return self.mapobj.__next__() + + def __getitem__(self, key): + return self.mapobj.__getitem__(key) + + def __len__(self): + return self.length + + class LimitingPool: """Limits unbound ahead-processing of multiprocessing.Pool's imap method before items get consumed by the iteration caller. diff --git a/training/deepspeech_training/util/io.py b/training/deepspeech_training/util/io.py new file mode 100644 index 000000000..947b43af6 --- /dev/null +++ b/training/deepspeech_training/util/io.py @@ -0,0 +1,81 @@ +""" +A set of I/O utils that allow us to open files on remote storage as if they were present locally and access +into HDFS storage using Tensorflow's C++ FileStream API. +Currently only includes wrappers for Google's GCS, but this can easily be expanded for AWS S3 buckets. +""" +import os +from tensorflow.io import gfile + + +def is_remote_path(path): + """ + Returns True iff the path is one of the remote formats that this + module supports + """ + return path.startswith('gs://') or path.startswith('hdfs://') + + +def path_exists_remote(path): + """ + Wrapper that allows existance check of local and remote paths like + `gs://...` + """ + if is_remote_path(path): + return gfile.exists(path) + return os.path.exists(path) + + +def copy_remote(src, dst, overwrite=False): + """ + Allows us to copy a file from local to remote or vice versa + """ + return gfile.copy(src, dst, overwrite) + + +def open_remote(path, mode='r', buffering=-1, encoding=None, newline=None, closefd=True, opener=None): + """ + Wrapper around open() method that can handle remote paths like `gs://...` + off Google Cloud using Tensorflow's IO helpers. + + buffering, encoding, newline, closefd, and opener are ignored for remote files + + This enables us to do: + with open_remote('gs://.....', mode='w+') as f: + do something with the file f, whether or not we have local access to it + """ + if is_remote_path(path): + return gfile.GFile(path, mode=mode) + return open(path, mode, buffering=buffering, encoding=encoding, newline=newline, closefd=closefd, opener=opener) + + +def isdir_remote(path): + """ + Wrapper to check if remote and local paths are directories + """ + if is_remote_path(path): + return gfile.isdir(path) + return os.path.isdir(path) + + +def listdir_remote(path): + """ + Wrapper to list paths in local dirs (alternative to using a glob, I suppose) + """ + if is_remote_path(path): + return gfile.listdir(path) + return os.listdir(path) + + +def glob_remote(filename): + """ + Wrapper that provides globs on local and remote paths like `gs://...` + """ + return gfile.glob(filename) + + +def remove_remote(filename): + """ + Wrapper that can remove local and remote files like `gs://...` + """ + # Conditional import + return gfile.remove_remote(filename) \ No newline at end of file diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index 3f1b55ea2..085439c98 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -8,7 +8,7 @@ from pathlib import Path from functools import partial -from .helpers import KILOBYTE, MEGABYTE, GIGABYTE, Interleaved +from .helpers import KILOBYTE, MEGABYTE, GIGABYTE, Interleaved, LenMap from .audio import ( Sample, DEFAULT_FORMAT, @@ -18,6 +18,7 @@ get_audio_type_from_extension, write_wav ) +from .io import open_remote, is_remote_path BIG_ENDIAN = 'big' INT_SIZE = 4 @@ -59,6 +60,37 @@ def __init__(self, audio_type, raw_data, transcript, audio_format=DEFAULT_FORMAT self.transcript = transcript +class PackedSample: + """ + A wrapper that we can carry around in an iterator and pass to a child process in order to + have the child process do the loading/unpacking of the sample, allowing for parallel file + I/O. + """ + def __init__(self, filename, audio_type, label): + self.filename = filename + self.audio_type = audio_type + self.label = label + + def unpack(self): + with open_remote(self.filename, 'rb') as audio_file: + data = audio_file.read() + if self.label is None: + s = Sample(self.audio_type, data, sample_id=self.filename) + s = LabeledSample(self.audio_type, data, self.label, sample_id=self.filename) + return s + + +def unpack_maybe(sample): + """ + Loads the supplied sample from disk (or the network) if the audio isn't loaded in to memory already. + """ + if hasattr(sample, 'unpack'): + realized_sample = sample.unpack() + else: + realized_sample = sample + return realized_sample + + def load_sample(filename, label=None): """ Loads audio-file as a (labeled or unlabeled) sample @@ -69,21 +101,19 @@ def load_sample(filename, label=None): Filename of the audio-file to load as sample label : str Label (transcript) of the sample. - If None: return util.audio.Sample instance - Otherwise: return util.sample_collections.LabeledSample instance + If None: returned result.unpack() will return util.audio.Sample instance + Otherwise: returned result.unpack() util.sample_collections.LabeledSample instance Returns ------- - util.audio.Sample instance if label is None, else util.sample_collections.LabeledSample instance + util.sample_collections.PackedSample, a wrapper object, on which calling unpack() will return + util.audio.Sample instance if label is None, else util.sample_collections.LabeledSample instance """ ext = os.path.splitext(filename)[1].lower() audio_type = get_audio_type_from_extension(ext) if audio_type is None: raise ValueError('Unknown audio type extension "{}"'.format(ext)) - with open(filename, 'rb') as audio_file: - if label is None: - return Sample(audio_type, audio_file.read(), sample_id=filename) - return LabeledSample(audio_type, audio_file.read(), label, sample_id=filename) + return PackedSample(filename, audio_type, label) class DirectSDBWriter: @@ -119,7 +149,7 @@ def __init__(self, raise ValueError('Audio type "{}" not supported'.format(audio_type)) self.audio_type = audio_type self.bitrate = bitrate - self.sdb_file = open(sdb_filename, 'wb', buffering=buffering) + self.sdb_file = open_remote(sdb_filename, 'wb', buffering=buffering) self.offsets = [] self.num_samples = 0 @@ -215,7 +245,7 @@ def __init__(self, """ self.sdb_filename = sdb_filename self.id_prefix = sdb_filename if id_prefix is None else id_prefix - self.sdb_file = open(sdb_filename, 'rb', buffering=REVERSE_BUFFER_SIZE if reverse else buffering) + self.sdb_file = open_remote(sdb_filename, 'rb', buffering=REVERSE_BUFFER_SIZE if reverse else buffering) self.offsets = [] if self.sdb_file.read(len(MAGIC)) != MAGIC: raise RuntimeError('No Sample Database') @@ -332,6 +362,8 @@ def __init__(self, labeled : bool or None If True: Writes labeled samples (util.sample_collections.LabeledSample) only. If False: Ignores transcripts (if available) and writes (unlabeled) util.audio.Sample instances. + + Currently only works with local files (not gs:// or hdfs://...) """ self.csv_filename = Path(csv_filename) self.csv_base_dir = self.csv_filename.parent.resolve().absolute() @@ -345,7 +377,7 @@ def __init__(self, self.labeled = labeled if labeled: fieldnames.append('transcript') - self.csv_file = open(csv_filename, 'w', encoding='utf-8', newline='') + self.csv_file = open_remote(csv_filename, 'w', encoding='utf-8', newline='') self.csv_writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames) self.csv_writer.writeheader() self.counter = 0 @@ -380,7 +412,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): class TarWriter: # pylint: disable=too-many-instance-attributes - """Sample collection writer for writing a CSV data-set and all its referenced WAV samples to a tar file""" + """Sample collection writer for writing a CSV data-set and all its referenced WAV samples to a tar file.""" def __init__(self, tar_filename, gz=False, @@ -398,6 +430,8 @@ def __init__(self, If False: Ignores transcripts (if available) and writes (unlabeled) util.audio.Sample instances. include : str[] List of files to include into tar root. + + Currently only works with local files (not gs:// or hdfs://...) """ self.tar = tarfile.open(tar_filename, 'w:gz' if gz else 'w') samples_dir = tarfile.TarInfo('samples') @@ -498,8 +532,7 @@ def __init__(self, csv_filename, labeled=None, reverse=False): If the order of the samples should be reversed """ rows = [] - csv_dir = Path(csv_filename).parent - with open(csv_filename, 'r', encoding='utf8') as csv_file: + with open_remote(csv_filename, 'r', encoding='utf8') as csv_file: reader = csv.DictReader(csv_file) if 'transcript' in reader.fieldnames: if labeled is None: @@ -508,9 +541,12 @@ def __init__(self, csv_filename, labeled=None, reverse=False): raise RuntimeError('No transcript data (missing CSV column)') for row in reader: wav_filename = Path(row['wav_filename']) - if not wav_filename.is_absolute(): - wav_filename = csv_dir / wav_filename - wav_filename = str(wav_filename) + if not wav_filename.is_absolute() and not is_remote_path(row['wav_filename']): + wav_filename = Path(csv_filename).parent / wav_filename + wav_filename = str(wav_filename) + else: + # Pathlib otherwise removes a / from filenames like hdfs:// + wav_filename = row['wav_filename'] wav_filesize = int(row['wav_filesize']) if 'wav_filesize' in row else 0 if labeled: rows.append((wav_filename, wav_filesize, row['transcript'])) @@ -554,6 +590,11 @@ def samples_from_sources(sample_sources, buffering=BUFFER_SIZE, labeled=None, re Loads and combines samples from a list of source files. Sources are combined in an interleaving way to keep default sample order from shortest to longest. + Note that when using distributed training, it is much faster to call this function with single pre- + sorted sample source, because this allows for parallelization of the file I/O. (If this function is + called with multiple sources, the samples have to be unpacked on a single parent process to allow + for reading their durations.) + Parameters ---------- sample_sources : list of str @@ -570,13 +611,20 @@ def samples_from_sources(sample_sources, buffering=BUFFER_SIZE, labeled=None, re Returns ------- - iterable of util.sample_collections.LabeledSample (labeled=True) or util.audio.Sample (labeled=False) supporting len + iterable of util.sample_collections.PackedSample if a single collection is provided, wrapping + LabeledSample (labeled=True) or util.audio.Sample (labeled=False) supporting len + or LabeledSample / util.audio.Sample directly, if multiple collections are provided """ sample_sources = list(sample_sources) if len(sample_sources) == 0: raise ValueError('No files') if len(sample_sources) == 1: return samples_from_source(sample_sources[0], buffering=buffering, labeled=labeled, reverse=reverse) - cols = [samples_from_source(source, buffering=buffering, labeled=labeled, reverse=reverse) - for source in sample_sources] + + # If we wish to interleave based on duration, we have to unpack the audio. Note that this unpacking should + # be done lazily onn the fly so that it respects the LimitingPool logic used in the feeding code. + cols = [LenMap( + unpack_maybe, samples_from_source(source, buffering=buffering, labeled=labeled, reverse=reverse)) + for source in sample_sources] + return Interleaved(*cols, key=lambda s: s.duration, reverse=reverse) From 4e55d63351b4d6d80b4057c258e2d129c428095b Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 8 Dec 2020 11:44:31 +0200 Subject: [PATCH 56/62] Fix package name reference in Java API docs (#3458) --- doc/Java-API.rst | 8 ++++---- doc/examples | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/Java-API.rst b/doc/Java-API.rst index e0c6a7dd9..a61bd1b16 100644 --- a/doc/Java-API.rst +++ b/doc/Java-API.rst @@ -4,26 +4,26 @@ Java DeepSpeechModel --------------- -.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::DeepSpeechModel +.. doxygenclass:: org::deepspeech::libdeepspeech::DeepSpeechModel :project: deepspeech-java :members: Metadata -------- -.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::Metadata +.. doxygenclass:: org::deepspeech::libdeepspeech::Metadata :project: deepspeech-java :members: getNumTranscripts, getTranscript CandidateTranscript ------------------- -.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::CandidateTranscript +.. doxygenclass:: org::deepspeech::libdeepspeech::CandidateTranscript :project: deepspeech-java :members: getNumTokens, getConfidence, getToken TokenMetadata ------------- -.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::TokenMetadata +.. doxygenclass:: org::deepspeech::libdeepspeech::TokenMetadata :project: deepspeech-java :members: getText, getTimestep, getStartTime diff --git a/doc/examples b/doc/examples index 59388e116..df643fca4 160000 --- a/doc/examples +++ b/doc/examples @@ -1 +1 @@ -Subproject commit 59388e116b70440a772c32d09977f5295e3d69c4 +Subproject commit df643fca46b4d8ab654f547ef2e36132f1154412 From 8c8387c45a12f54f8d9ea4d7f917cf76c21aa136 Mon Sep 17 00:00:00 2001 From: Sjors Holtrop <39123298+sholtrop@users.noreply.github.com> Date: Tue, 8 Dec 2020 12:19:21 +0100 Subject: [PATCH 57/62] Rename Stream class to StreamImpl, export its type as Stream (#3456) --- native_client/javascript/index.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/native_client/javascript/index.ts b/native_client/javascript/index.ts index cbc943334..eb60fb73c 100644 --- a/native_client/javascript/index.ts +++ b/native_client/javascript/index.ts @@ -65,7 +65,7 @@ export interface Metadata { * Provides an interface to a DeepSpeech stream. The constructor cannot be called * directly, use :js:func:`Model.createStream`. */ -class Stream { +class StreamImpl { /** @internal */ _impl: any; @@ -134,6 +134,12 @@ class Stream { return result; } } +/** + * Exposes the type of Stream without actually exposing the class. + * Because the Stream class should not be instantiated directly, + * but instead be created via :js:func:`Model.createStream`. + */ +export type Stream = StreamImpl; /** * An object providing an interface to a trained DeepSpeech model. @@ -306,12 +312,12 @@ export class Model { * * @throws on error */ - createStream(): Stream { + createStream(): StreamImpl { const [status, ctx] = binding.CreateStream(this._impl); if (status !== 0) { throw `CreateStream failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; } - return new Stream(ctx); + return new StreamImpl(ctx); } } @@ -341,7 +347,7 @@ export function FreeMetadata(metadata: Metadata): void { * * @param stream A streaming state pointer returned by :js:func:`Model.createStream`. */ -export function FreeStream(stream: Stream): void { +export function FreeStream(stream: StreamImpl): void { binding.FreeStream(stream._impl); } From 25c4f97aa74d091ed132d467676f47c1c115b67f Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 8 Dec 2020 12:10:51 +0200 Subject: [PATCH 58/62] Move linting job to CircleCI --- .circleci/config.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9e7f804d8..6727e3c27 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -72,6 +72,30 @@ jobs: docker push "${DOCKERHUB_REPO}:${CIRCLE_TAG}" fi + lint: + docker: + - image: circleci/python:3.7.9 + steps: + - checkout + - run: + name: Install dependencies + command: | + pip install --upgrade cardboardlint pylint + - run: + name: Run linter + command: | + set -ex + # Check if branch can be merged with master (if failing script will stop due to set -e) + git config user.email "you@example.com" + git config user.name "Your Name" + git merge --no-commit --no-ff origin/master + + # Undo merge changes if any + git reset --hard $CIRCLE_BRANCH + + # Lint differences against master + cardboardlinter --refspec origin/master -n auto; + workflows: version: 2 build-deploy: @@ -87,3 +111,7 @@ workflows: filters: tags: only: /.*/ + + lint: + jobs: + - lint From 0e2209e2b32ea48d9a6cd6d2906ffb04ffbf597e Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 8 Dec 2020 12:34:00 +0200 Subject: [PATCH 59/62] Remove Travis --- .travis.yml | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f64b1a349..000000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -language: python - -cache: pip -before_cache: - - rm ~/.cache/pip/log/debug.log - -python: - - "3.6" - -jobs: - include: - - name: cardboard linter - install: - - pip install --upgrade cardboardlint pylint - script: | - # Run cardboardlinter, in case of pull requests - if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then - if [ "$TRAVIS_BRANCH" != "master" ]; then - git fetch origin $TRAVIS_BRANCH:$TRAVIS_BRANCH - fi - cardboardlinter --refspec $TRAVIS_BRANCH -n auto; - fi From 1102185abfec9f3b4a870abb1f8e2d60f2ad4caa Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 8 Dec 2020 13:36:28 +0200 Subject: [PATCH 60/62] More branding fixes for docs & Java bindings --- doc/Java-Examples.rst | 8 ++++---- doc/NodeJS-API.rst | 2 +- doc/doxygen-java.conf | 2 +- .../{mozilla => }/deepspeech/ExampleInstrumentedTest.java | 0 .../org/{mozilla => }/deepspeech/DeepSpeechActivity.java | 0 .../org/{mozilla => }/deepspeech/ExampleUnitTest.java | 0 6 files changed, 6 insertions(+), 6 deletions(-) rename native_client/java/app/src/androidTest/java/org/{mozilla => }/deepspeech/ExampleInstrumentedTest.java (100%) rename native_client/java/app/src/main/java/org/{mozilla => }/deepspeech/DeepSpeechActivity.java (100%) rename native_client/java/app/src/test/java/org/{mozilla => }/deepspeech/ExampleUnitTest.java (100%) diff --git a/doc/Java-Examples.rst b/doc/Java-Examples.rst index 46ffa1751..04836ed52 100644 --- a/doc/Java-Examples.rst +++ b/doc/Java-Examples.rst @@ -1,12 +1,12 @@ Java API Usage example ====================== -Examples are from `native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java`. +Examples are from `native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java`. Creating a model instance and loading model ------------------------------------------- -.. literalinclude:: ../native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java +.. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java :language: java :linenos: :lineno-match: @@ -16,7 +16,7 @@ Creating a model instance and loading model Performing inference -------------------- -.. literalinclude:: ../native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java +.. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java :language: java :linenos: :lineno-match: @@ -26,4 +26,4 @@ Performing inference Full source code ---------------- -See :download:`Full source code<../native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java>`. +See :download:`Full source code<../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java>`. diff --git a/doc/NodeJS-API.rst b/doc/NodeJS-API.rst index b6170b5b7..1901e513d 100644 --- a/doc/NodeJS-API.rst +++ b/doc/NodeJS-API.rst @@ -10,7 +10,7 @@ Model Stream ------ -.. js:autoclass:: Stream +.. js:autoclass:: StreamImpl :members: Module exported methods diff --git a/doc/doxygen-java.conf b/doc/doxygen-java.conf index a8d65c693..9516d6eca 100644 --- a/doc/doxygen-java.conf +++ b/doc/doxygen-java.conf @@ -790,7 +790,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/ native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/ +INPUT = native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech/ native_client/java/libdeepspeech/src/main/java/org/deepspeech/libdeepspeech_doc/ # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java b/native_client/java/app/src/androidTest/java/org/deepspeech/ExampleInstrumentedTest.java similarity index 100% rename from native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java rename to native_client/java/app/src/androidTest/java/org/deepspeech/ExampleInstrumentedTest.java diff --git a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java b/native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java similarity index 100% rename from native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java rename to native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java diff --git a/native_client/java/app/src/test/java/org/mozilla/deepspeech/ExampleUnitTest.java b/native_client/java/app/src/test/java/org/deepspeech/ExampleUnitTest.java similarity index 100% rename from native_client/java/app/src/test/java/org/mozilla/deepspeech/ExampleUnitTest.java rename to native_client/java/app/src/test/java/org/deepspeech/ExampleUnitTest.java From d422955c4abdcfadf04e08755a4588c3d6a8b786 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 8 Dec 2020 13:52:04 +0200 Subject: [PATCH 61/62] Fix doc references to renamed StreamImpl class --- native_client/javascript/index.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/native_client/javascript/index.ts b/native_client/javascript/index.ts index eb60fb73c..e98c3012b 100644 --- a/native_client/javascript/index.ts +++ b/native_client/javascript/index.ts @@ -306,9 +306,9 @@ export class Model { } /** - * Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object. + * Create a new streaming inference state. One can then call :js:func:`StreamImpl.feedAudioContent` and :js:func:`StreamImpl.finishStream` on the returned stream object. * - * @return a :js:func:`Stream` object that represents the streaming state. + * @return a :js:func:`StreamImpl` object that represents the streaming state. * * @throws on error */ @@ -334,7 +334,7 @@ export function FreeModel(model: Model): void { /** * Free memory allocated for metadata information. * - * @param metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Stream.finishStreamWithMetadata` + * @param metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`StreamImpl.finishStreamWithMetadata` */ export function FreeMetadata(metadata: Metadata): void { binding.FreeMetadata(metadata); From 1be44c63fc1b241cc41fdcfb70b59fc4274cfc3d Mon Sep 17 00:00:00 2001 From: imrahul3610 Date: Sun, 8 Nov 2020 03:19:16 +0530 Subject: [PATCH 62/62] Hotword support for .NET client tests --- native_client/dotnet/DeepSpeechConsole/Program.cs | 15 +++++++++++++++ taskcluster/tc-asserts.sh | 5 +++-- taskcluster/tc-netframework-ds-tests.sh | 2 ++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/native_client/dotnet/DeepSpeechConsole/Program.cs b/native_client/dotnet/DeepSpeechConsole/Program.cs index 68f3fc54b..55bd8fd57 100644 --- a/native_client/dotnet/DeepSpeechConsole/Program.cs +++ b/native_client/dotnet/DeepSpeechConsole/Program.cs @@ -37,12 +37,14 @@ static void Main(string[] args) string model = null; string scorer = null; string audio = null; + string hotwords = null; bool extended = false; if (args.Length > 0) { model = GetArgument(args, "--model"); scorer = GetArgument(args, "--scorer"); audio = GetArgument(args, "--audio"); + hotwords = GetArgument(args, "--hot_words"); extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); } @@ -65,6 +67,19 @@ static void Main(string[] args) sttClient.EnableExternalScorer(scorer ?? "kenlm.scorer"); } + if(hotwords != null) + { + Console.WriteLine($"Adding hot-words {hotwords}"); + char[] sep = {','}; + string[] word_boosts = hotwords.Split(sep); + foreach(string word_boost in word_boosts) + { + char[] sep1 = {':'}; + string[] word = word_boost.Split(sep1); + sttClient.AddHotWord(word[0], float.Parse(word[1])); + } + } + string audioFile = audio ?? "arctic_a0024.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index d485846e9..3b23be1cb 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -526,11 +526,12 @@ run_multi_inference_tests() run_hotword_tests() { + DS_BINARY_FILE=${DS_BINARY_FILE:-"deepspeech"} set +e - hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) + hotwords_decode=$(${DS_BINARY_PREFIX}${DS_BINARY_FILE} --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e - assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" + assert_working_ldc93s1_lm "${hotwords_decode}" "$status" } run_android_hotword_tests() diff --git a/taskcluster/tc-netframework-ds-tests.sh b/taskcluster/tc-netframework-ds-tests.sh index e74d0cb86..d86598273 100644 --- a/taskcluster/tc-netframework-ds-tests.sh +++ b/taskcluster/tc-netframework-ds-tests.sh @@ -29,3 +29,5 @@ DS_BINARY_FILE="DeepSpeechConsole.exe" ensure_cuda_usage "$2" run_netframework_inference_tests + +run_hotword_tests \ No newline at end of file