Replies: 2 comments
-
Beta Was this translation helpful? Give feedback.
-
>>> opensorceror |
Beta Was this translation helpful? Give feedback.
-
Beta Was this translation helpful? Give feedback.
-
>>> opensorceror |
Beta Was this translation helpful? Give feedback.
-
>>> tuttlebr
[August 9, 2019, 3:52pm]
Has anyone created an implementation of their (completely trained) model
which operates as an API? Either with flask or tensorflow-serving? I am
only able to do single_shot implementations and loading the model each
time takes a long time.
warnings.filterwarnings('ignore')
from multiprocessing import cpu_count
import tensorflow as tf
from ds_ctcdecoder import ctc_beam_search_decoder_batch, Scorer
from DeepSpeech import create_model, try_loading
from util.config import Config, initialize_globals
from util.feeding import audiofile_to_features
from util.flags import create_flags, FLAGS
import argparse
from datetime import datetime
print('request received {} slash n'.format(datetime.utcnow()))
parser = argparse.ArgumentParser()
parser.add_argument('-a', '--audio', required=True, help='Required 8kHz 16bit PCM wav audio sample path.')
args = parser.parse_args()
lm_alpha=0.75
lm_beta=1.85
alphabet_config_path='../alphabet.txt'
lm_binary_path='../lm3.binary'
lm_trie_path= '../trie3'
audio_window_samples=256
audio_step_samples=160
n_input=26
n_context=9
beam_width=500
wav_filename = args.audio
def evaluate(wav_filename):
print('scorer initialized {} slash n'.format(datetime.utcnow()))
scorer = Scorer(lm_alpha, lm_beta,
lm_binary_path, lm_trie_path,
Config.alphabet)
print('prediction start {} slash n'.format(datetime.utcnow()))
features, features_len = audiofile_to_features(wav_filename)
print('features initialized {} slash n'.format(datetime.utcnow()))
# Add batch dimension
batch_x = tf.expand_dims(features, 0)
batch_x_len = tf.expand_dims(features_len, 0)
# One rate per layer
no_dropout = [None] DEEPSPEECH.cdx deepspeech.commands DEEPSPEECH.pages DEEPSPEECH.warc.gz discourse.mozilla.org html-to-markdown.sh shell-conver-html-to-split-posts.sh sorted-deepspeech-posts 6
logits, _ = create_model(batch_x=batch_x,
seq_length=batch_x_len,
dropout=no_dropout)
print('model initialized {} slash n'.format(datetime.utcnow()))
# Transpose to batch major and apply softmax for decoder
transposed = tf.nn.softmax(tf.transpose(logits, [1, 0, 2]))
tf.train.get_or_create_global_step()
# Get number of accessible CPU cores for this process
try:
num_processes = cpu_count()
except NotImplementedError:
num_processes = 1
# Create a saver using variables from the above newly created graph
saver = tf.train.Saver()
with tf.Session(config=Config.session_config) as session:
# Restore variables from training checkpoint
loaded = try_loading(session, saver, 'best_dev_checkpoint', 'best validation')
print('session initialized {} slash n'.format(datetime.utcnow()))
# First pass, compute losses and transposed logits for decoding
batch_logits, batch_lengths = session.run([transposed, batch_x_len])
decoded = ctc_beam_search_decoder_batch(batch_logits, batch_lengths, Config.alphabet, 1024,
num_processes=num_processes, scorer=scorer)
predictions = [d[0][1] for d in decoded][0]
print('prediction end {} slash n'.format(datetime.utcnow()))
return predictions
def main(_):
initialize_globals()
print('globals initialized {} slash n'.format(datetime.utcnow()))
return evaluate(wav_filename)
if name == 'main':
create_flags()
tf.app.run(main)```
[This is an archived TTS discussion thread from discourse.mozilla.org/t/deepspeech-restful-api]
Beta Was this translation helpful? Give feedback.
All reactions