Dockerfile.tmpl

ARG BASE_IMAGE_REPO
ARG BASE_IMAGE_TAG
ARG CPU_BASE_IMAGE_NAME
ARG GPU_BASE_IMAGE_NAME
ARG LIGHTGBM_VERSION
ARG TORCH_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHVISION_VERSION

{{ if eq .Accelerator "gpu" }}
FROM gcr.io/kaggle-images/python-lightgbm-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${LIGHTGBM_VERSION} AS lightgbm_whl
FROM gcr.io/kaggle-images/python-torch-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${TORCH_VERSION} AS torch_whl
FROM ${BASE_IMAGE_REPO}/${GPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
{{ else }}
FROM ${BASE_IMAGE_REPO}/${CPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
{{ end }}

# Ensures shared libraries installed with conda can be found by the dynamic link loader.
ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib"
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"

{{ if eq .Accelerator "gpu" }}
ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
# NVIDIA binaries from the host are mounted to /opt/bin.
ENV PATH=/opt/bin:${PATH}
# Add CUDA stubs to LD_LIBRARY_PATH to support building the GPU image on a CPU machine.
ENV LD_LIBRARY_PATH_NO_STUBS="$LD_LIBRARY_PATH"
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64/stubs"
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
{{ end }}

# Keep these variables in sync if base image is updated.
ENV TENSORFLOW_VERSION=2.11.0

# We need to redefine the ARG here to get the ARG value defined above the FROM instruction.
# See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
ARG LIGHTGBM_VERSION
ARG TORCH_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHVISION_VERSION

# Disable pesky logs like: KMP_AFFINITY: pid 6121 tid 6121 thread 0 bound to OS proc set 0
# See: https://stackoverflow.com/questions/57385766/disable-tensorflow-log-information
ENV KMP_WARNINGS=0
# Also make the KMP logs noverbose.
# https://stackoverflow.com/questions/70250304/stop-tensorflow-from-printing-warning-message
ENV KMP_SETTINGS=false

ADD clean-layer.sh  /tmp/clean-layer.sh
ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl
ADD patches/template_conf.json /opt/kaggle/conf.json

{{ if eq .Accelerator "gpu" }}
# b/200968891 Keeps horovod once torch is upgraded.
RUN pip uninstall -y horovod && \
    /tmp/clean-layer.sh
{{ end }}

# Update GPG key per documentation at https://cloud.google.com/compute/docs/troubleshooting/known-issues
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -

# Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections,
# as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346
RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \
    apt-get update && \
    # Needed by lightGBM (GPU build)
    # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm
    apt-get install -y build-essential unzip cmake libboost-dev libboost-system-dev libboost-filesystem-dev p7zip-full && \
    # b/182601974: ssh client was removed from the base image but is required for packages such as stable-baselines.
    apt-get install -y openssh-client && \
    /tmp/clean-layer.sh

# b/128333086: Set PROJ_LIB to points to the proj4 cartographic library.
ENV PROJ_LIB=/opt/conda/share/proj

# Install conda packages not available on pip.
# When using pip in a conda environment, conda commands should be ran first and then
# the remaining pip commands: https://www.anaconda.com/using-pip-in-a-conda-environment/
RUN conda config --add channels nvidia && \
    conda config --add channels rapidsai && \
    conda install -c conda-forge mamba && \
    # Base image channel order: conda-forge (highest priority), defaults.
    # End state: rapidsai (highest priority), nvidia, conda-forge, defaults.
    mamba install mkl cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
    /tmp/clean-layer.sh

{{ if eq .Accelerator "gpu" }}

# b/232247930: uninstall pyarrow to avoid double installation with the GPU specific version.
RUN pip uninstall -y pyarrow && \
    mamba install -y cudf cuml && \
    /tmp/clean-layer.sh
{{ end }}

# Install implicit
{{ if eq .Accelerator "gpu" }}
RUN mamba install implicit implicit-proc=*=gpu && \
    /tmp/clean-layer.sh
{{ else }}
RUN mamba install implicit && \
    /tmp/clean-layer.sh
{{ end}}

# Install PyTorch
{{ if eq .Accelerator "gpu" }}
COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
RUN mamba install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
    pip install /tmp/torch/*.whl && \
    # b/255757999 openmp (libomp.so) is an dependency of libtorchtext and libtorchaudio but
    mamba install -y openmp && \
    #pip install patchelf && \
    #patchelf --add-needed libomp.so /opt/conda/lib/python3.7/site-packages/torchtext/lib/libtorchtext.so && \
    #patchelf --add-needed libomp.so /opt/conda/lib/python3.7/site-packages/torchaudio/lib/libtorchaudio.so && \
    rm -rf /tmp/torch && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install \
        torch==$TORCH_VERSION+cpu \
        torchvision==$TORCHVISION_VERSION+cpu \
        torchaudio==$TORCHAUDIO_VERSION+cpu \
        torchtext==$TORCHTEXT_VERSION \
    -f https://download.pytorch.org/whl/torch_stable.html && \
    /tmp/clean-layer.sh
{{ end }}

# Install LightGBM
{{ if eq .Accelerator "gpu" }}
COPY --from=lightgbm_whl /tmp/whl/*.whl /tmp/lightgbm/
# Install OpenCL (required by LightGBM GPU version)
RUN apt-get install -y ocl-icd-libopencl1 clinfo && \
    mkdir -p /etc/OpenCL/vendors && \
    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \
    pip install /tmp/lightgbm/*.whl && \
    rm -rf /tmp/lightgbm && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install lightgbm==$LIGHTGBM_VERSION && \
    /tmp/clean-layer.sh
{{ end }}

# Install JAX
{{ if eq .Accelerator "gpu" }}
RUN pip install jax[cuda11_cudnn805] -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install jax[cpu] && \
    /tmp/clean-layer.sh
{{ end }}

# Install mxnet
{{ if eq .Accelerator "gpu" }}
# No specific package for 11.3 minor versions, using 11.2 instead.
RUN pip install mxnet-cu112 && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install mxnet && \
    /tmp/clean-layer.sh
{{ end}}

# Install spacy
{{ if eq .Accelerator "gpu" }}
RUN pip install spacy[cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION] && \
    /tmp/clean-layer.sh
{{ else }}
RUN pip install spacy && \
    /tmp/clean-layer.sh
{{ end}}

# Install GPU specific packages
{{ if eq .Accelerator "gpu" }}
# Install GPU-only packages
# No specific package for nnabla-ext-cuda 11.x minor versions.
RUN pip install pycuda \
        pynvrtc \
        pynvml && \
    /tmp/clean-layer.sh
{{ end }}

RUN pip install pysal \
        seaborn python-dateutil dask python-igraph \
        pyyaml joblib husl geopy mne pyshp \
        pandas \
        polars \
        flax && \
    # Install h2o from source.
    # Use `conda install -c h2oai h2o` once Python 3.7 version is released to conda.
    apt-get install -y default-jre-headless && \
    pip install -f https://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o \
        "tensorflow-gcs-config<=${TENSORFLOW_VERSION}" \
        "tensorflow==${TENSORFLOW_VERSION}" \
        tensorflow-addons \
        tensorflow_decision_forests \
        tensorflow_text && \
    /tmp/clean-layer.sh

RUN apt-get install -y libfreetype6-dev && \
    apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \
    # b/198300835 kornia 4.1.0 is not compatible with our version of numpy.
    pip install gensim==4.0.1 \
        textblob \
        wordcloud \
        xgboost \
        pydot \
        # Pinned because it breaks theano test with the latest version (b/178107003).
        theano-pymc==1.0.11 \
        python-Levenshtein \
        hep_ml && \
    # NLTK Project datasets
    mkdir -p /usr/share/nltk_data && \
    # NLTK Downloader no longer continues smoothly after an error, so we explicitly list
    # the corpuses that work
    # "yes | ..." answers yes to the retry prompt in case of an error. See b/133762095.
    yes | python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \
    basque_grammars biocreative_ppi bllip_wsj_no_aux \
    book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \
    comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \
    europarl_raw floresta gazetteers genesis gutenberg \
    ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \
    masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \
    mte_teip5 names nps_chat omw opinion_lexicon paradigms \
    pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \
    pros_cons ptb punkt qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \
    sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \
    state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \
    twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \
    vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe && \
    # Stop-words
    pip install stop-words \
        scikit-image && \
    /tmp/clean-layer.sh

RUN pip install ibis-framework && \
    pip install gluonnlp && \
    # b/212703016 4.5.4.62 segfault with readtext.
    pip install opencv-contrib-python==4.5.4.60 opencv-python==4.5.4.60 && \
    pip install gluoncv && \
    /tmp/clean-layer.sh

RUN pip install scipy \
        scikit-learn \
        # Scikit-learn accelerated library for x86
        scikit-learn-intelex>=2023.0.1 \
        # HDF5 support
        h5py \
        biopython \
        # PUDB, for local debugging convenience
        pudb \
        imbalanced-learn \
        # Profiling and other utilities
        line_profiler \
        orderedmultidict \
        smhasher \
        bokeh \
        numba \
        datashader \
        # Boruta (python implementation)
        Boruta && \

    apt-get install -y graphviz && pip install graphviz && \
    # Pandoc is a dependency of deap
    apt-get install -y pandoc && \
    pip install git+https://github.com/scikit-learn-contrib/py-earth.git@issue191 \
        essentia && \
    apt-get install -y git-lfs && \
    /tmp/clean-layer.sh

# vtk with dependencies
RUN apt-get install -y libgl1-mesa-glx && \
    pip install vtk && \
    # xvfbwrapper with dependencies
    apt-get install -y xvfb && \
    pip install xvfbwrapper && \
    /tmp/clean-layer.sh

RUN pip install mpld3 \
        gpxpy \
        arrow \
        nilearn \
        nibabel \
        pronouncing \
        markovify \
        imgaug \
        preprocessing \
        path.py \
        Geohash && \
    # https://github.com/vinsci/geohash/issues/4
    sed -i -- 's/geohash/.geohash/g' /opt/conda/lib/python3.7/site-packages/Geohash/__init__.py && \
    pip install deap \
        tpot \
        scikit-optimize \
        haversine \
        toolz cytoolz \
        plotly \
        hyperopt \
        fitter \
        langid \
        # Delorean. Useful for dealing with datetime
        delorean \
        trueskill \
        # Useful data exploration libraries (for missing data and generating reports)
        missingno \
        pandas-profiling \
        s2sphere \
        bayesian-optimization \
        matplotlib-venn \
        # b/184083722 pyldavis >= 3.3 requires numpy >= 1.20.0 but TensorFlow 2.4.1 / 2.5.0 requires 1.19.2
        pyldavis==3.2.2 \
        mlxtend \
        altair \
        ImageHash \
        ecos \
        CVXcanon \
        pymc3 \
        imagecodecs \
        tifffile \
        spectral \
        descartes \
        geojson \
        pydicom \
        wavio \
        SimpleITK \
        hmmlearn \
        bayespy \
        gplearn \
        PyAstronomy \
        squarify \
        fuzzywuzzy \
        python-louvain \
        pyexcel-ods \
        sklearn-pandas \
        stemming \
        # b/266272046 prophet 1.1.2 breaks the test
        prophet==1.1.1 \
        holoviews \
        geoviews \
        hypertools \
        py_stringsimjoin \
        mlens \
        scikit-multilearn \
        cleverhans \
        leven \
        catboost \
        lightfm \
        folium \
        scikit-plot \
        fury dipy \
        plotnine \
        scikit-surprise \
        pymongo \
        geoplot \
        eli5 \
        kaggle \
        mock && \
    /tmp/clean-layer.sh

RUN pip install tensorpack && \
    # Add google PAIR-code Facets
    cd /opt/ && git clone https://github.com/PAIR-code/facets && cd facets/ && jupyter nbextension install facets-dist/ --user && \
    export PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ && \
    pip install kmodes --no-dependencies && \
    pip install librosa \
        polyglot \
        mmh3 \
        fbpca \
        sentencepiece \
        cufflinks \
        lime \
        memory_profiler && \
    /tmp/clean-layer.sh


# Remove files that can't be uninstalled normally:
RUN rm /opt/conda/lib/python3.7/site-packages/google*/REQUESTED
RUN rm /opt/conda/lib/python3.7/site-packages/google*/direct_url.json
# install cython & cysignals before pyfasttext
RUN pip install cython \
        cysignals \
        pyfasttext \
        fasttext && \
    apt-get install -y libhunspell-dev && pip install hunspell
RUN pip install annoy \
        category_encoders && \
    # b/183041606#comment5: the Kaggle data proxy doesn't support these APIs. If the library is missing, it falls back to using a regular BigQuery query to fetch data.
    pip uninstall -y google-cloud-bigquery-storage && \
    # google-cloud-automl 2.0.0 introduced incompatible API changes, need to pin to 1.0.1
    # After launch this should be installed from pip
    pip install git+https://github.com/googleapis/python-aiplatform.git@mb-release \
        google-cloud-automl==1.0.1 \
        google-api-core==1.33.2 \
        google-cloud-bigquery==2.2.0 \
        google-cloud-storage \
        google-cloud-translate==3.* \
        google-cloud-language==2.* \
        google-cloud-videointelligence==2.* \
        google-cloud-vision==2.* \
        protobuf==3.20.3 \
        ortools \
        scattertext \
        # Pandas data reader
        pandas-datareader \
        wordsegment \
        wordbatch \
        emoji \
        # Add Japanese morphological analysis engine
        janome \
        wfdb \
        vecstack \
        # yellowbrick machine learning visualization library
        yellowbrick \
        mlcrate && \
    /tmp/clean-layer.sh


# Fix qgrid by pinning ipywidgets https://github.com/quantopian/qgrid/issues/376
#        allennlp \
RUN pip install bleach \
        certifi \
        cycler \
        decorator \
        entrypoints \
        html5lib \
        ipykernel \
        ipython \
        ipython-genutils \
        ipywidgets==7.7.1 \
        isoweek \
        jedi \
        jsonschema \
        jupyter-client \
        jupyter-console \
        jupyter-core \
        jupyterlab-lsp \
        MarkupSafe \
        mistune \
        nbformat \
        notebook \
        papermill \
        python-lsp-server[all] \
        olefile \
        # b/198300835 kornia 0.5.10 is not compatible with our version of numpy.
        kornia==0.5.8 \
        pandas_summary \
        pandocfilters \
        pexpect \
        pickleshare \
        Pillow && \
    # Install openslide and its python binding
    apt-get install -y openslide-tools && \
    pip install openslide-python \
        ptyprocess \
        Pygments \
        pyparsing \
        pytz \
        PyYAML \
        pyzmq \
        qtconsole \
        six \
        terminado \
        tornado \
        tqdm \
        traitlets \
        wcwidth \
        webencodings \
        widgetsnbextension \
        pyarrow \
        feather-format \
        fastai \
        importlib-metadata
RUN python -m spacy download en_core_web_sm && python -m spacy download en_core_web_lg && \
    apt-get install -y ffmpeg && \
    /tmp/clean-layer.sh

    ###########
    #
    #      NEW CONTRIBUTORS:
    # Please add new pip/apt installs in this block. Don't forget a "&& \" at the end
    # of all non-final lines. Thanks!
    #
    ###########

# dlib has a libmkl incompatibility:
# test_dlib_face_detector (test_dlib.TestDLib) ... INTEL MKL ERROR: /opt/conda/bin/../lib/libmkl_avx512.so.2: undefined symbol: mkl_sparse_optimize_bsr_trsm_i8.
# Intel MKL FATAL ERROR: Cannot load libmkl_avx512.so.2 or libmkl_def.so.2.
# nnabla breaks protobuf compatibiilty:
RUN pip install flashtext \
        wandb \
        # b/214080882 blake3 0.3.0 is not compatible with vaex.
        blake3==0.2.1 \
        vaex \
        marisa-trie \
        pyemd \
        pyupset \
        pympler \
        s3fs \
        featuretools \
        #-e git+https://github.com/SohierDane/BigQuery_Helper#egg=bq_helper \
        hpsklearn \
        git+https://github.com/Kaggle/learntools \
        kmapper \
        shap \
        ray \
        gym \
        pyarabic \
        pandasql \
        tensorflow_hub \
        tensorflowjs \
        jieba  \
        # ggplot is broken and main repo does not merge and release https://github.com/yhat/ggpy/pull/668
        https://github.com/hbasria/ggpy/archive/0.11.5.zip \
        cesium \
        rgf_python \
        # b/205704651 remove install cmd for matrixprofile after version > 1.1.10 is released.
        git+https://github.com/matrix-profile-foundation/matrixprofile.git@6bea7d4445284dbd9700a097974ef6d4613fbca7 \
        tsfresh \
        pykalman \
        optuna \
        plotly_express \
        albumentations \
        # b/254245259 catalyst requires accelerate but it breaks with the version 0.13.1
        accelerate==0.12.0 \
        catalyst \
        # b/206990323 osmx 1.1.2 requires numpy >= 1.21 which we don't want.
        osmnx==1.1.1 && \
    apt-get -y install libspatialindex-dev
RUN pip install pytorch-ignite \
        qgrid \
        bqplot \
        earthengine-api \
        transformers \
        # b/232247930 >= 2.2.0 requires pyarrow >= 6.0.0 which conflicts with dependencies for rapidsai 0.21.*
        datasets==2.1.0 \
        # b/271870650 1.13.0 installs pickle5 which breaks rgf_pythons tests
        kaggle-environments==1.12.0 \
        geopandas \
        # b/270162926 newer versions of shapely seem to break polygon data in geopandas
        shapely==1.8 \
        vowpalwabbit \
        pydub \
        pydegensac \
        torchmetrics \
        pytorch-lightning \
        datatable \
        sympy \
        # flask is used by agents in the simulation competitions.
        flask \
        # pycrypto is used by competitions team.
        pycryptodome \
        easyocr \
        # ipympl adds interactive widget support for matplotlib
        ipympl==0.7.0 \
        pandarallel \
        onnx \
        tables \
        openpyxl \
        timm \
        pycolmap \
        torchinfo && \
    /tmp/clean-layer.sh

# Download base easyocr models.
# https://github.com/JaidedAI/EasyOCR#usage
RUN mkdir -p /root/.EasyOCR/model && \
    wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/latin_g2.zip" -O /root/.EasyOCR/model/latin.zip && \
    unzip /root/.EasyOCR/model/latin.zip -d /root/.EasyOCR/model/ && \
    rm /root/.EasyOCR/model/latin.zip && \
    wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip" -O /root/.EasyOCR/model/english.zip && \
    unzip /root/.EasyOCR/model/english.zip -d /root/.EasyOCR/model/ && \
    rm /root/.EasyOCR/model/english.zip && \
    wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip" -O /root/.EasyOCR/model/craft_mlt_25k.zip && \
    unzip /root/.EasyOCR/model/craft_mlt_25k.zip -d /root/.EasyOCR/model/ && \
    rm /root/.EasyOCR/model/craft_mlt_25k.zip && \
    /tmp/clean-layer.sh

# Tesseract and some associated utility packages
RUN apt-get install tesseract-ocr -y && \
    pip install pytesseract \
        wand \
        pdf2image \
        PyPDF \
        pyocr && \
    /tmp/clean-layer.sh
ENV TESSERACT_PATH=/usr/bin/tesseract

# For Facets
ENV PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/
# For Theano with MKL
ENV MKL_THREADING_LAYER=GNU

# Temporary fixes and patches
# Temporary patch for Dask getting downgraded, which breaks Keras
RUN pip install --upgrade dask && \
    # Stop jupyter nbconvert trying to rewrite its folder hierarchy
    mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \
    mkdir -p /.jupyter && touch /.jupyter/jupyter_nbconvert_config.py && touch /.jupyter/migrated && \
    # Stop Matplotlib printing junk to the console on first load
    sed -i "s/^.*Matplotlib is building the font cache using fc-list.*$/# Warning removed by Kaggle/g" /opt/conda/lib/python3.7/site-packages/matplotlib/font_manager.py && \
    # Make matplotlib output in Jupyter notebooks display correctly
    mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \
    # Temporary patch for broken libpixman 0.38 in conda-forge, symlink to system libpixman 0.34 untile conda package gets updated to 0.38.5 or higher.
    ln -sf /usr/lib/x86_64-linux-gnu/libpixman-1.so.0.34.0 /opt/conda/lib/libpixman-1.so.0.38.0 && \
    /tmp/clean-layer.sh

RUN pip install setuptools==59.8.0 && pip install -e git+https://github.com/SohierDane/BigQuery_Helper#egg=bq_helper

# Add BigQuery client proxy settings
ENV PYTHONUSERBASE "/root/.local"
ADD patches/kaggle_gcp.py /root/.local/lib/python3.7/site-packages/kaggle_gcp.py
ADD patches/kaggle_secrets.py /root/.local/lib/python3.7/site-packages/kaggle_secrets.py
ADD patches/kaggle_session.py /root/.local/lib/python3.7/site-packages/kaggle_session.py
ADD patches/kaggle_web_client.py /root/.local/lib/python3.7/site-packages/kaggle_web_client.py
ADD patches/kaggle_datasets.py /root/.local/lib/python3.7/site-packages/kaggle_datasets.py
ADD patches/log.py /root/.local/lib/python3.7/site-packages/log.py
ADD patches/sitecustomize.py /root/.local/lib/python3.7/site-packages/sitecustomize.py
# Override default imagemagick policies
ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml

# Add Kaggle module resolver
ADD patches/kaggle_module_resolver.py /opt/conda/lib/python3.7/site-packages/tensorflow_hub/kaggle_module_resolver.py
RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py && \
    sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py

# TensorBoard Jupyter extension. Should be replaced with TensorBoard's provided magic once we have
# worker tunneling support in place.
# b/139212522 re-enable TensorBoard once solution for slowdown is implemented.
# ENV JUPYTER_CONFIG_DIR "/root/.jupyter/"
# RUN pip install jupyter_tensorboard && \
#     jupyter serverextension enable jupyter_tensorboard && \
#     jupyter tensorboard enable
# ADD patches/tensorboard/notebook.py /opt/conda/lib/python3.7/site-packages/tensorboard/notebook.py

# Disable unnecessary jupyter extensions
RUN jupyter-nbextension disable nb_conda --py --sys-prefix && \
    jupyter-serverextension disable nb_conda --py --sys-prefix && \
    python -m nb_conda_kernels.install --disable

# Force only one libcusolver
RUN rm /opt/conda/bin/../lib/libcusolver.so.11 && ln -s /usr/local/cuda/lib64/libcusolver.so.11 /opt/conda/bin/../lib/libcusolver.so.11

# b/270147159 conda ships with a version of libtinfo which is missing version info causing warnings, replace it with a good version.
RUN rm /opt/conda/lib/libtinfo.so.6 && ln -s /usr/lib/x86_64-linux-gnu/libtinfo.so.6 /opt/conda/lib/libtinfo.so.6

# Set backend for matplotlib
ENV MPLBACKEND "agg"

ARG GIT_COMMIT=unknown
ARG BUILD_DATE=unknown

LABEL git-commit=$GIT_COMMIT
LABEL build-date=$BUILD_DATE
ENV GIT_COMMIT=${GIT_COMMIT}
ENV BUILD_DATE=${BUILD_DATE}

LABEL tensorflow-version=$TENSORFLOW_VERSION
# Used in the Jenkins `Docker GPU Build` step to restrict the images being pruned.
LABEL kaggle-lang=python

# Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`.
RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE" > /etc/build_date

{{ if eq .Accelerator "gpu" }}
# Remove the CUDA stubs.
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH_NO_STUBS"
{{ end }}