diff --git a/Dockerfile b/Dockerfile index 211ada1..5f50412 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,21 @@ # DigiPres Toolbox -# A Docker image with some tools pre-installed -FROM python:3.10-bullseye +# ---------------- +# +# A Docker image with some DigiPres tools pre-installed +# -# Core Jupyter support: -RUN pip install --no-cache notebook jupyterlab bash_kernel -RUN python -m bash_kernel.install +# Start from as small an image as possible: +FROM python:3.11-slim-bullseye -# Some lightweight tools and support for installing more: -RUN apt-get update && apt-get install -y sudo mediainfo cloc && \ +# Some (smallish) tools and support for installing more (69MB) +RUN apt-get update && apt-get install -y curl mediainfo cloc unzip file && \ apt-get clean && rm -rf /var/lib/apt/lists/* -# Install Siegfried: +# Install JRE for Java programs (headless 250MB, headful is 431MB) +RUN apt-get update && apt-get install -y default-jre-headless && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Install Siegfried (71MB) ENV SF_VERSION=1.11.1 ENV SF_DEB=siegfried_${SF_VERSION}-1_amd64.deb RUN curl -s -L -O https://github.com/richardlehane/siegfried/releases/download/v${SF_VERSION}/${SF_DEB} && \ @@ -18,33 +23,44 @@ RUN curl -s -L -O https://github.com/richardlehane/siegfried/releases/download/v rm -f ${SF_DEB} && \ sf -update -# Install TRiD: +# Install TRiD (9MB) RUN curl -s -L -O http://mark0.net/download/trid_linux_64.zip && \ -curl -s -L -O http://mark0.net/download/triddefs.zip && \ -unzip trid_linux_64.zip && unzip triddefs.zip && chmod +x ./trid && \ -mv ./trid /usr/local/bin/trid && mv triddefs.trd /usr/local/bin/ && \ -rm -f trid_linux_64.zip triddefs.zip + curl -s -L -O http://mark0.net/download/triddefs.zip && \ + unzip trid_linux_64.zip && unzip triddefs.zip && chmod +x ./trid && \ + mv ./trid /usr/local/bin/trid && mv triddefs.trd /usr/local/bin/ && \ + rm -f trid_linux_64.zip triddefs.zip -# Install Fido: -RUN pip install --no-cache opf-fido +# Install Fido (10MB) +RUN pip install --no-cache-dir opf-fido -# Install JRE for Java programs and ffmpeg for a/v formats (c. 0.6GB!): -RUN apt-get update && apt-get install -y default-jre ffmpeg && \ - apt-get clean && rm -rf /var/lib/apt/lists/* +# Install RClone (62MB) as per https://rclone.org/install/#linux-precompiled +RUN curl -O https://downloads.rclone.org/rclone-current-linux-amd64.zip && \ + unzip rclone-current-linux-amd64.zip && \ + cp rclone-*-linux-amd64/rclone /usr/bin/ && \ + chown root:root /usr/bin/rclone && \ + chmod 755 /usr/bin/rclone && \ + rm -fr rclone-*-linux-* -# Install GitHub Linguist and it's build dependencies (c. 0.2GB): -RUN apt-get update && \ - apt-get install -y cmake pkg-config libicu-dev zlib1g-dev libcurl4-openssl-dev libssl-dev ruby-dev && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -RUN gem install github-linguist +# +# Skipping the following to try to keep the image size down... +# -# Install Apache Tika (needs Java): -ENV TIKA_VERSION=2.9.2 -RUN curl -s -L -o /usr/share/java/tika-app-${TIKA_VERSION}.jar https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-app-${TIKA_VERSION}.jar && \ - ln -s /usr/share/java/tika-app-${TIKA_VERSION}.jar /usr/share/java/tika-app.jar -COPY tika.sh /usr/local/bin/tika.sh +# Install GitHub Linguist (~200MB at least, depending on shared deps) +#RUN apt-get update && \ +# apt-get install -y build-essential cmake pkg-config libicu-dev zlib1g-dev libcurl4-openssl-dev libssl-dev ruby-dev && \ +# gem install github-linguist && \ +# apt-get clean && rm -rf /var/lib/apt/lists/* -# Install DROID (needs Java) -COPY droid /usr/share/java/droid -RUN ln -s /usr/share/java/droid/droid.sh /usr/local/bin/droid.sh +# Install ffmpeg for a/v formats (484MB!) +# RUN apt-get update && apt-get install -y ffmpeg && \ +# apt-get clean && rm -rf /var/lib/apt/lists/* + +# Install Apache Tika (needs Java) +#ENV TIKA_VERSION=2.9.2 +#RUN curl -s -L -o /usr/share/java/tika-app-${TIKA_VERSION}.jar https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-app-${TIKA_VERSION}.jar && \ +# ln -s /usr/share/java/tika-app-${TIKA_VERSION}.jar /usr/share/java/tika-app.jar +#COPY tika.sh /usr/local/bin/tika.sh +# Install DROID (needs Java) +#COPY droid /usr/share/java/droid +#RUN ln -s /usr/share/java/droid/droid.sh /usr/local/bin/droid.sh diff --git a/README.md b/README.md index f478af9..495c6f4 100644 --- a/README.md +++ b/README.md @@ -3,27 +3,52 @@ DigiPres Toolbox A Docker image designed to make it easy to experiment with tools for Digital Preservation. Designed to be used via the [DigiPres Sandbox](https://github.com/digipres/sandbox) and the [DigiPres Workbench](https://github.com/digipres/workbench). +Build locally with e.g. + +``` +docker build . -t toolbox +``` + +Then run with + +``` +docker run -it toolbox bash +``` + ## Supported Tools +Large (>>1GB) images don't seem to run well on Binder, so we can't install everything we'd like to. e.g. `ffmpeg` takes up 0.5GB! + +These sizes can be determined by using separate installation lines in the `Dockerfile` and then using commands like this to see what happened and what size the additional layer is: + +``` +docker history --no-trunc toolbox | grep ffmpeg +``` + ### Pre-installed - - [Apache Tika](https://tika.apache.org/) - [CLOC](https://github.com/AlDanial/cloc) - - [DROID](http://digital-preservation.github.io/droid/) - - [ffmpeg](https://ffmpeg.org) including [ffprobe](https://ffmpeg.org/ffprobe.html) - [Fido](https://github.com/openpreserve/fido) - [File](https://www.darwinsys.com/file/) - - [GitHub Linguist](https://github.com/github/linguist) - [MediaInfo](https://github.com/MediaArea/MediaInfo) - [Siegfried](https://www.itforarchivists.com/siegfried) - [TrID](http://mark0.net/soft-trid-e.html) ### Verified Installable -These aren't installed by default, but the [Sandbox](https://github.com/digipres/sandbox) shows how to install them. +These aren't installed by default because of their size, but the [Sandbox](https://github.com/digipres/sandbox) indicates how to download and install them. + - [Apache Tika](https://tika.apache.org/) + - [DROID](http://digital-preservation.github.io/droid/) + - [ffmpeg](https://ffmpeg.org) including [ffprobe](https://ffmpeg.org/ffprobe.html) - [pdfcpu](https://pdfcpu.io) +### Cannot Be Installed + +These require root access to install but take up too much space + + - [GitHub Linguist](https://github.com/github/linguist) (200-400MB in size depending on base image, mostly down to requiring a full build environment) + ### To Consider - RClone