-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathDockerfile
154 lines (141 loc) · 4.88 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# A base container which is used for both the builder and the final container
# This way rebuilds are speed up because the common dependencies are re-used.
# The final container is also smaller because it doesn't contain build dependencies.
FROM ubuntu:22.04 AS base
ENV DEBIAN_FRONTEND=noninteractive
ENV PATH /Alpino/bin:/Alpino/Tokenization:/usr/local/go/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV ALPINO_HOME /Alpino
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LD_LIBRARY_PATH :/Alpino/util:/Alpino/Tokenization:/Alpino/fadd:/Alpino/TreebankTools/IndexedCorpus:/Alpino/create_bin:/Alpino/util:/Alpino/create_bin/extralibs:/Alpino/create_bin/extralibs/boost:/usr/lib:/usr/local/lib
ENV TCL_LIBRARY /Alpino/create_bin/extralibs/tcl8.5
ENV TK_LIBRARY /Alpino/create_bin/extralibs/tk8.5
# Add libraries to standard path
RUN ldconfig /Alpino/boost /Alpino/fadd /Alpino/unix /Alpino/TreebankTools/IndexedCorpus
RUN apt-get update && apt-get install -y locales gettext
RUN echo "en_US UTF-8\nen_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen
# T-Scan dependencies:
RUN apt-get update && apt-get install -y libicu70 \
libxml2 \
libgomp1 \
libexttextcat-2.0-0 \
# Alpino dependencies:
libxft2 libxss1
# Clam dependencies:
RUN apt-get update && apt-get install -y --no-install-recommends \
runit \
curl \
ca-certificates \
nginx \
uwsgi \
uwsgi-plugin-python3 \
build-essential \
python3-dev \
python3-pip \
python3-yaml \
python3-lxml \
python3-requests \
pkg-config \
libmariadb-dev
# Support authentication through MariaDB/MySQL
RUN pip install mysqlclient
# T-Scan webservice dependencies:
RUN apt-get update && apt-get install -y antiword \
libmagic1 \
# MCS (compound splitter) dependencies:
default-jre \
# Frog dependency:
netbase \
# (found out the hard way after getting the following error message and being lost as to why)
# failure in getaddrinfo: Servname not supported for ai_socktype
# We need sudo to be able to restart projects using the right user
sudo
FROM base AS builder
RUN apt-get update && apt-get install -y autoconf \
autoconf-archive \
automake \
autotools-dev \
bash \
bzip2 \
ca-certificates \
ccache \
checkinstall \
clang-tools \
cppcheck \
curl \
expect \
ffmpeg \
flac \
g++ \
git \
lame \
libbz2-dev \
libexttextcat-dev \
libicu-dev \
libjpeg-dev \
libmad0 \
libsm6 \
libsox-fmt-mp3 \
libtar-dev \
libtcl8.6 \
libtk8.6 \
libtool \
libwww-perl \
libxml2-dev \
libxslt1-dev \
libxslt1.1 \
make \
pkg-config \
poppler-utils \
pstotext \
python3-dev \
python3-minimal \
sox \
subversion \
swig \
tesseract-ocr \
tk \
unrtf \
unzip \
wget \
zip \
zlib1g-dev
COPY docker/deployment/ /deployment/
# this might contain pre-downloaded Alpino
COPY data/ /src/tscan/data
# Pre-built packages and dependencies
# Acutal project files are excluded using dockerignore
COPY docker/data/ /src/tscan/docker/data/
WORKDIR /deployment
# Prepare and install all the dependencies
RUN ./add-alpino.sh
# These will create .deb packages which can be re-used by the final container
# or during a rebuild
COPY dependencies.cfg /src/tscan/
RUN ./prep-dep.sh ticcutils https://github.com/LanguageMachines/ticcutils
RUN ./prep-dep.sh libfolia https://github.com/LanguageMachines/libfolia
RUN ./prep-dep.sh uctodata https://github.com/LanguageMachines/uctodata
RUN ./prep-dep.sh ucto https://github.com/LanguageMachines/ucto
RUN ./prep-dep.sh timbl https://github.com/LanguageMachines/timbl
RUN ./prep-dep.sh mbt https://github.com/LanguageMachines/mbt
RUN ./prep-dep.sh mbtserver https://github.com/LanguageMachines/mbtserver
RUN ./prep-dep.sh frogdata https://github.com/LanguageMachines/frogdata
RUN ./prep-dep.sh frog https://github.com/LanguageMachines/frog
RUN ./prep-dep.sh wopr https://github.com/LanguageMachines/wopr
RUN ./build-compound-splitter.sh
# Have the copying and build as the very last item,
# this way only the source itself will have to rebuild during a change
COPY . /src/tscan
RUN ./build.sh
FROM base AS tscan
COPY --from=builder /src/*.deb /src/
COPY --from=builder /src/compound-splitter/ /src/compound-splitter/
# Build output (for caching)
COPY --from=builder /src/tscan/src/*.o /src/tscan/src/
COPY --from=builder /src/tscan/webservice/ /src/tscan/webservice/
COPY --from=builder /src/tscan/view/ /src/tscan/view/
COPY --from=builder /Alpino/ /Alpino/
COPY --from=builder /deployment/ /deployment/
WORKDIR /deployment
RUN ./install.sh