-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDockerfile.dali-cassandra
129 lines (114 loc) · 4.07 KB
/
Dockerfile.dali-cassandra
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Starting from NVIDIA PyTorch NGC Container
# https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch
FROM nvcr.io/nvidia/pytorch:25.01-py3
# install some useful tools
RUN \
export DEBIAN_FRONTEND=noninteractive \
&& apt-get update -y -q \
&& apt-get install -y \
aptitude \
automake \
bash-completion \
bison \
build-essential \
cmake \
dnsutils \
elinks \
emacs-nox emacs-goodies-el \
fish \
flex \
git \
htop \
iperf3 \
iproute2 \
iputils-ping \
less \
libtool \
libopencv-dev \
mc \
nload \
nmon \
openjdk-11-jdk \
psutils \
source-highlight \
ssh \
sudo \
tmux \
vim \
wget \
&& rm -rf /var/lib/apt/lists/*
########################################################################
# Cassandra C++ and Python drivers
########################################################################
# install cassandra C++ driver
RUN \
export DEBIAN_FRONTEND=noninteractive \
&& apt-get update -y -q \
&& apt-get install -y libuv1-dev libssl-dev \
&& rm -rf /var/lib/apt/lists/*
ARG CASS_DRIVER_VER=2.17.0
RUN \
wget -nv "https://github.com/datastax/cpp-driver/archive/$CASS_DRIVER_VER.tar.gz" \
&& tar xfz $CASS_DRIVER_VER.tar.gz \
&& cd cpp-driver-$CASS_DRIVER_VER \
&& mkdir build \
&& cd build \
&& cmake .. \
&& make -j \
&& make install
#install cassandra python driver + some python libraries
RUN \
pip3 install --upgrade --no-cache matplotlib pandas clize boto3 \
opencv-python cassandra-driver pybind11 tqdm tifffile pyyaml
RUN pip3 install --upgrade --no-cache lightning==2.3.1
########################################################################
# SPARK installation, to test examples
########################################################################
# download and install spark
ARG SPARK_V=3.5
RUN \
export SPARK_VER=$(curl 'https://archive.apache.org/dist/spark/' | grep -o "$SPARK_V\.[[:digit:]]\+" | tail -n 1) \
&& cd /tmp && wget -nv "https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop3.tgz" \
&& cd / && tar xfz "/tmp/spark-$SPARK_VER-bin-hadoop3.tgz" \
&& ln -s "spark-$SPARK_VER-bin-hadoop3" spark
ENV PYSPARK_DRIVER_PYTHON=python3
ENV PYSPARK_PYTHON=python3
EXPOSE 8080
EXPOSE 7077
EXPOSE 4040
########################################################################
# Download the Imagenette dataset
########################################################################
WORKDIR /tmp
RUN \
wget -nv 'https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-320.tgz' \
&& tar xfz 'imagenette2-320.tgz' \
&& rm 'imagenette2-320.tgz'
########################################################################
# Upgrade DALI, install plugin and run as ubuntu user
########################################################################
# Fix for error given by "from nvidia.dali.plugin.pytorch import DALIGenericIterator"
# - https://forums.developer.nvidia.com/t/issues-building-docker-image-from-ngc-container-nvcr-io-nvidia-pytorch-22-py3/209034
ENV PATH="${PATH}:/opt/hpcx/ompi/bin"
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/hpcx/ompi/lib:/opt/hpcx/ucx/lib:/opt/hpcx/ucc/lib"
RUN pip install --extra-index-url https://developer.download.nvidia.com/compute/redist \
--upgrade nvidia-dali-cuda120==1.45
RUN sed -i 's/ALL$/NOPASSWD:ALL/' /etc/sudoers
COPY . /home/ubuntu/cassandra-dali-plugin
COPY ./examples/common/private_data.template.py /home/ubuntu/cassandra-dali-plugin/examples/common/private_data.py
RUN chown -R ubuntu:ubuntu '/home/ubuntu/cassandra-dali-plugin'
RUN chown -R ubuntu:ubuntu "/spark/"
# create data dir
RUN mkdir /data
RUN chown ubuntu:ubuntu '/data'
# copy ssh keys to access cassandra container
COPY varia/ssh/ /home/ubuntu/.ssh/
RUN chown -R ubuntu:ubuntu '/home/ubuntu/.ssh' \
&& chmod 600 /home/ubuntu/.ssh/id_rsa
COPY varia/entrypoint-dali-cassandra.sh /tmp
# install plugin
WORKDIR /home/ubuntu/cassandra-dali-plugin
RUN pip3 install IPython
RUN pip3 install .
USER ubuntu
ENTRYPOINT ["/tmp/entrypoint-dali-cassandra.sh"]