forked from coqui-ai/STT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.build
186 lines (152 loc) · 4.69 KB
/
Dockerfile.build
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# Please refer to the USING documentation, "Dockerfile for building from source"
# Need devel version cause we need /usr/include/cudnn.h
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
# >> START Install base software
# Get basic packages
RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
bash-completion \
build-essential \
ca-certificates \
cmake \
curl \
g++ \
gcc \
git \
libbz2-dev \
libboost-all-dev \
libgsm1-dev \
libopusfile0 \
libltdl-dev \
liblzma-dev \
libmagic-dev \
libpng-dev \
libsox-fmt-mp3 \
libsox-dev \
locales \
openjdk-8-jdk \
pkg-config \
python3 \
python3-dev \
python3-pip \
python3-wheel \
python3-numpy \
sox \
unzip \
wget \
zlib1g-dev
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
# Install Bazel
RUN curl -LO "https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-linux-amd64" && \
mv bazelisk-linux-amd64 /usr/bin/bazel && \
chmod +x /usr/bin/bazel
# Try and free some space
RUN rm -rf /var/lib/apt/lists/*
# << END Install base software
# >> START Configure Tensorflow Build
# GPU Environment Setup
ENV TF_NEED_ROCM 0
ENV TF_NEED_OPENCL_SYCL 0
ENV TF_NEED_OPENCL 0
ENV TF_NEED_CUDA 1
ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
ENV TF_CUDA_VERSION 10.1
ENV TF_CUDNN_VERSION 7.6
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
ENV TF_NCCL_VERSION 2.8
# Common Environment Setup
ENV TF_BUILD_CONTAINER_TYPE GPU
ENV TF_BUILD_OPTIONS OPT
ENV TF_BUILD_DISABLE_GCP 1
ENV TF_BUILD_ENABLE_XLA 0
ENV TF_BUILD_PYTHON_VERSION PYTHON3
ENV TF_BUILD_IS_OPT OPT
ENV TF_BUILD_IS_PIP PIP
# Other Parameters
ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma
ENV TF_NEED_GCP 0
ENV TF_NEED_HDFS 0
ENV TF_NEED_JEMALLOC 1
ENV TF_NEED_OPENCL 0
ENV TF_CUDA_CLANG 0
ENV TF_NEED_MKL 0
ENV TF_ENABLE_XLA 0
ENV TF_NEED_AWS 0
ENV TF_NEED_KAFKA 0
ENV TF_NEED_NGRAPH 0
ENV TF_DOWNLOAD_CLANG 0
ENV TF_NEED_TENSORRT 0
ENV TF_NEED_GDR 0
ENV TF_NEED_VERBS 0
ENV TF_NEED_OPENCL_SYCL 0
ENV PYTHON_BIN_PATH /usr/bin/python3.6
ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
# << END Configure Tensorflow Build
# >> START Configure Bazel
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
RUN echo "startup --batch" >>/etc/bazel.bazelrc
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/etc/bazel.bazelrc
# << END Configure Bazel
WORKDIR /
COPY . /STT/
# >> START Build and bind
WORKDIR /STT/tensorflow
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
RUN ./configure
# Using CPU optimizations:
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
# Adding --config=cuda flag to build using CUDA.
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
# Build STT
RUN bazel build \
--verbose_failures \
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
-c opt \
--copt=-mtune=generic \
--copt=-march=x86-64 \
--copt=-msse \
--copt=-msse2 \
--copt=-msse3 \
--copt=-msse4.1 \
--copt=-msse4.2 \
--copt=-mavx \
--config=noaws \
--config=nogcp \
--config=nohdfs \
--config=nonccl \
//native_client:libstt.so
# Copy built libs to /STT/native_client
RUN cp bazel-bin/native_client/libstt.so bazel-bin/native_client/libkenlm.so /STT/native_client/
# Build client.cc and install Python client and decoder bindings
ENV TFDIR /STT/tensorflow
RUN nproc
WORKDIR /STT/native_client
RUN make NUM_PROCESSES=$(nproc) stt
WORKDIR /STT
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade pip
RUN pip3 install -U pip setuptools wheel
RUN pip3 install --upgrade native_client/python/dist/*.whl
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
# << END Build and bind
# Allow Python printing utf-8
ENV PYTHONIOENCODING UTF-8
# Build KenLM in /STT/native_client/kenlm folder
WORKDIR /STT/native_client
RUN rm -rf kenlm && \
git clone https://github.com/kpu/kenlm && \
cd kenlm && \
git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
mkdir -p build && \
cd build && \
cmake .. && \
make -j $(nproc)
# Done
WORKDIR /STT