diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..fb918cc84 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +python/python/bystro/*.so +python/target +python/build +python/python/bystro/examples +python/python/bystro/ancestry/data +**/*.ipynb +**/.mypy_cache +**/.coverage +**/.ipynb_checkpoints +**/*.log +**/__pycache__ +perl/.build +perl/.build_backup/ +perl/*tar.gz +perl/.vscode +perl/.tidyall.d +perl/Bystro-* diff --git a/.github/workflows/perl-build-docker.yml b/.github/workflows/perl-build-docker.yml new file mode 100644 index 000000000..3b10459fa --- /dev/null +++ b/.github/workflows/perl-build-docker.yml @@ -0,0 +1,40 @@ +name: (Annotator) Build and Run Annotator Perl Dockerfile + +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build Docker Image from Local Context + run: | + docker build -t bystro-annotator -f Dockerfile.perl . + + - name: Run Docker Image and Capture Help Output + continue-on-error: true # This is necessary because the help output returns a 255 + id: help-output + run: | + docker run --rm bystro-annotator &> help_output.txt + + - name: Assert Help Output Starts with Expected String + run: | + first_line=$(head -n 1 help_output.txt) + if [[ "$first_line" == "usage: bystro-annotate.pl"* ]]; then + echo "Help output assertion passed!" + else + echo "Help output assertion failed. Got: $first_line" + exit 1 + fi + + - name: Upload Help Output as Artifact + uses: actions/upload-artifact@v3 + with: + name: help-output + path: help_output.txt diff --git a/.github/workflows/perl-code-tidy.yml b/.github/workflows/perl-code-tidy.yml index e20795b38..7c5c437a4 100644 --- a/.github/workflows/perl-code-tidy.yml +++ b/.github/workflows/perl-code-tidy.yml @@ -3,14 +3,14 @@ name: Perl Tidy & Critic Check on: push: paths: - - '**.pl' - - '**.pm' - - '**.t' + - "**.pl" + - "**.pm" + - "**.t" pull_request: paths: - - '**.pl' - - '**.pm' - - '**.t' + - "**.pl" + - "**.pm" + - "**.t" jobs: tidy-critic-check: diff --git a/.github/workflows/perl-package-build.yml b/.github/workflows/perl-package-build.yml index 780a3f345..a429dfd0f 100644 --- a/.github/workflows/perl-package-build.yml +++ b/.github/workflows/perl-package-build.yml @@ -22,77 +22,77 @@ jobs: - name: Install Go shell: bash run: | - set -eux; \ - arch="$(dpkg --print-architecture)"; arch="${arch##*-}"; \ - url=; \ - case "$arch" in \ - 'amd64') \ - url='https://dl.google.com/go/go1.21.3.linux-amd64.tar.gz'; \ - sha256='1241381b2843fae5a9707eec1f8fb2ef94d827990582c7c7c32f5bdfbfd420c8'; \ - ;; \ - 'armel') \ - export GOARCH='arm' GOARM='5' GOOS='linux'; \ - ;; \ - 'armhf') \ - url='https://dl.google.com/go/go1.21.3.linux-armv6l.tar.gz'; \ - sha256='a1ddcaaf0821a12a800884c14cb4268ce1c1f5a0301e9060646f1e15e611c6c7'; \ - ;; \ - 'arm64') \ - url='https://dl.google.com/go/go1.21.3.linux-arm64.tar.gz'; \ - sha256='fc90fa48ae97ba6368eecb914343590bbb61b388089510d0c56c2dde52987ef3'; \ - ;; \ - 'i386') \ - url='https://dl.google.com/go/go1.21.3.linux-386.tar.gz'; \ - sha256='fb209fd070db500a84291c5a95251cceeb1723e8f6142de9baca5af70a927c0e'; \ - ;; \ - 'mips64el') \ - url='https://dl.google.com/go/go1.21.3.linux-mips64le.tar.gz'; \ - sha256='a569ffbc88b4e14cf2682f65cec950460665e4392b0d78b8868b4718c979bda8'; \ - ;; \ - 'ppc64el') \ - url='https://dl.google.com/go/go1.21.3.linux-ppc64le.tar.gz'; \ - sha256='3b0e10a3704f164a6e85e0377728ec5fd21524fabe4c925610e34076586d5826'; \ - ;; \ - 'riscv64') \ - url='https://dl.google.com/go/go1.21.3.linux-riscv64.tar.gz'; \ - sha256='67d14d3e513e505d1ec3ea34b55641c6c29556603c7899af94045c170c1c0f94'; \ - ;; \ - 's390x') \ - url='https://dl.google.com/go/go1.21.3.linux-s390x.tar.gz'; \ - sha256='4c78e2e6f4c684a3d5a9bdc97202729053f44eb7be188206f0627ef3e18716b6'; \ - ;; \ - *) echo >&2 "error: unsupported architecture '$arch' (likely packaging update needed)"; exit 1 ;; \ - esac; \ - build=; \ - if [ -z "$url" ]; then \ - # https://github.com/golang/go/issues/38536#issuecomment-616897960 - build=1; \ - url='https://dl.google.com/go/go1.21.3.src.tar.gz'; \ - sha256='186f2b6f8c8b704e696821b09ab2041a5c1ee13dcbc3156a13adcf75931ee488'; \ - echo >&2; \ - echo >&2 "warning: current architecture ($arch) does not have a compatible Go binary release; will be building from source"; \ - echo >&2; \ - fi; \ - \ - wget -O go.tgz.asc "$url.asc"; \ - wget -O go.tgz "$url" --progress=dot:giga; \ - echo "$sha256 *go.tgz" | sha256sum -c -; \ - \ - # https://github.com/golang/go/issues/14739#issuecomment-324767697 - GNUPGHOME="$(mktemp -d)"; export GNUPGHOME; \ - # https://www.google.com/linuxrepositories/ - gpg --batch --keyserver keyserver.ubuntu.com --recv-keys 'EB4C 1BFD 4F04 2F6D DDCC EC91 7721 F63B D38B 4796'; \ - # let's also fetch the specific subkey of that key explicitly that we expect "go.tgz.asc" to be signed by, just to make sure we definitely have it - gpg --batch --keyserver keyserver.ubuntu.com --recv-keys '2F52 8D36 D67B 69ED F998 D857 78BD 6547 3CB3 BD13'; \ - gpg --batch --verify go.tgz.asc go.tgz; \ - gpgconf --kill all; \ - rm -rf "$GNUPGHOME" go.tgz.asc; \ - \ - tar -C /usr/local -xzf go.tgz; \ - rm go.tgz; - /usr/local/go/bin/go version - echo "/usr/local/go/bin" >> $GITHUB_PATH - echo "${HOME}/go/bin" >> $GITHUB_PATH + set -eux; \ + arch="$(dpkg --print-architecture)"; arch="${arch##*-}"; \ + url=; \ + case "$arch" in \ + 'amd64') \ + url='https://dl.google.com/go/go1.21.3.linux-amd64.tar.gz'; \ + sha256='1241381b2843fae5a9707eec1f8fb2ef94d827990582c7c7c32f5bdfbfd420c8'; \ + ;; \ + 'armel') \ + export GOARCH='arm' GOARM='5' GOOS='linux'; \ + ;; \ + 'armhf') \ + url='https://dl.google.com/go/go1.21.3.linux-armv6l.tar.gz'; \ + sha256='a1ddcaaf0821a12a800884c14cb4268ce1c1f5a0301e9060646f1e15e611c6c7'; \ + ;; \ + 'arm64') \ + url='https://dl.google.com/go/go1.21.3.linux-arm64.tar.gz'; \ + sha256='fc90fa48ae97ba6368eecb914343590bbb61b388089510d0c56c2dde52987ef3'; \ + ;; \ + 'i386') \ + url='https://dl.google.com/go/go1.21.3.linux-386.tar.gz'; \ + sha256='fb209fd070db500a84291c5a95251cceeb1723e8f6142de9baca5af70a927c0e'; \ + ;; \ + 'mips64el') \ + url='https://dl.google.com/go/go1.21.3.linux-mips64le.tar.gz'; \ + sha256='a569ffbc88b4e14cf2682f65cec950460665e4392b0d78b8868b4718c979bda8'; \ + ;; \ + 'ppc64el') \ + url='https://dl.google.com/go/go1.21.3.linux-ppc64le.tar.gz'; \ + sha256='3b0e10a3704f164a6e85e0377728ec5fd21524fabe4c925610e34076586d5826'; \ + ;; \ + 'riscv64') \ + url='https://dl.google.com/go/go1.21.3.linux-riscv64.tar.gz'; \ + sha256='67d14d3e513e505d1ec3ea34b55641c6c29556603c7899af94045c170c1c0f94'; \ + ;; \ + 's390x') \ + url='https://dl.google.com/go/go1.21.3.linux-s390x.tar.gz'; \ + sha256='4c78e2e6f4c684a3d5a9bdc97202729053f44eb7be188206f0627ef3e18716b6'; \ + ;; \ + *) echo >&2 "error: unsupported architecture '$arch' (likely packaging update needed)"; exit 1 ;; \ + esac; \ + build=; \ + if [ -z "$url" ]; then \ + # https://github.com/golang/go/issues/38536#issuecomment-616897960 + build=1; \ + url='https://dl.google.com/go/go1.21.3.src.tar.gz'; \ + sha256='186f2b6f8c8b704e696821b09ab2041a5c1ee13dcbc3156a13adcf75931ee488'; \ + echo >&2; \ + echo >&2 "warning: current architecture ($arch) does not have a compatible Go binary release; will be building from source"; \ + echo >&2; \ + fi; \ + \ + wget -O go.tgz.asc "$url.asc"; \ + wget -O go.tgz "$url" --progress=dot:giga; \ + echo "$sha256 *go.tgz" | sha256sum -c -; \ + \ + # https://github.com/golang/go/issues/14739#issuecomment-324767697 + GNUPGHOME="$(mktemp -d)"; export GNUPGHOME; \ + # https://www.google.com/linuxrepositories/ + gpg --batch --keyserver keyserver.ubuntu.com --recv-keys 'EB4C 1BFD 4F04 2F6D DDCC EC91 7721 F63B D38B 4796'; \ + # let's also fetch the specific subkey of that key explicitly that we expect "go.tgz.asc" to be signed by, just to make sure we definitely have it + gpg --batch --keyserver keyserver.ubuntu.com --recv-keys '2F52 8D36 D67B 69ED F998 D857 78BD 6547 3CB3 BD13'; \ + gpg --batch --verify go.tgz.asc go.tgz; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" go.tgz.asc; \ + \ + tar -C /usr/local -xzf go.tgz; \ + rm go.tgz; + /usr/local/go/bin/go version + echo "/usr/local/go/bin" >> $GITHUB_PATH + echo "${HOME}/go/bin" >> $GITHUB_PATH - name: Install one-off Bystro dependencies shell: bash @@ -100,7 +100,6 @@ jobs: go install github.com/bystrogenomics/bystro-vcf@2.2.2 cpm install -g https://github.com/bystrogenomics/msgpack-perl.git cpm install -g --no-test MouseX::Getopt - cpm install -g DBD::mysql@4.051 git clone --depth 1 --recurse-submodules https://github.com/salortiz/LMDB_File.git \ && cd LMDB_File \ && cpanm --quiet . diff --git a/.github/workflows/perl-test-install-apt.yml b/.github/workflows/perl-test-install-apt.yml new file mode 100644 index 000000000..a77321e43 --- /dev/null +++ b/.github/workflows/perl-test-install-apt.yml @@ -0,0 +1,118 @@ +name: Install and Test Bystro (APT) + +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + # Checkout the repository + - name: Checkout repository + uses: actions/checkout@v4 + + # Run the install script using APT + - name: Install Bystro using APT script + run: | + touch ~/.profile + ./install-apt.sh --profile-file ~/.profile + + # Test that bystro-annotate.pl works + # When it runs with --help it exits with 255 + - name: Verify installation by running bystro-annotate.pl --help + continue-on-error: true + run: | + source ~/.profile + output=$(bystro-annotate.pl --help 2>&1) + exit_code=$? + first_line=$(echo "$output" | head -n 1) + + if [ "$exit_code" -eq 255 ]; then + echo "Exit code 255 confirmed." + else + echo "Unexpected exit code: $exit_code" >&2 + exit 1 + fi + + if [[ "$first_line" == "usage: bystro-annotate.pl"* ]]; then + echo "First line matches the expected 'usage: bystro-annotate.pl'." + else + echo "First line does not match the expected 'usage: bystro-annotate.pl'." >&2 + echo "Actual first line: $first_line" >&2 + exit 1 + fi + + # Test that bystro-build.pl works + # When it runs with --help it exits with 1 + - name: Verify installation by running bystro-build.pl --help + continue-on-error: true + run: | + source ~/.profile + output=$(bystro-build.pl --help 2>&1) + first_line=$(echo "$output" | head -n 1) + + if [[ "$first_line" == "Usage:"* ]]; then + echo "First line matches the expected 'Usage:'" + else + echo "First line does not match the expected 'Usage:'" >&2 + echo "Actual first line: $first_line" >&2 + exit 1 + fi + + - name: Run tests + run: | + cd perl + source ~/.profile + prove -r -j$(nproc) t + + - name: Check that expected programs are in the PATH + run: | + source ~/.profile + if command -v bystro-annotate.pl > /dev/null; then + echo "bystro-annotate.pl is in the PATH" + else + echo "bystro-annotate.pl is not in the PATH" >&2 + exit 1 + fi + + if command -v bystro-build.pl > /dev/null; then + echo "bystro-build.pl is in the PATH" + else + echo "bystro-build.pl is not in the PATH" >&2 + exit 1 + fi + + if command -v bystro-vcf > /dev/null; then + echo "bystro-vcf is in the PATH" + else + echo "bystro-vcf is not in the PATH" >&2 + exit 1 + fi + + if command -v bystro-snp > /dev/null; then + echo "bystro-snp is in the PATH" + else + echo "bystro-snp is not in the PATH" >&2 + exit 1 + fi + + if command -v bystro-stats > /dev/null; then + echo "bystro-stats is in the PATH" + else + echo "bystro-stats is not in the PATH" >&2 + exit 1 + fi + + if command -v bgzip > /dev/null; then + echo "bgzip is in the PATH" + else + echo "bgzip is not in the PATH" >&2 + exit 1 + fi + + if command -v yq > /dev/null; then + echo "yq is in the PATH" + else + echo "yq is not in the PATH" >&2 + exit 1 + fi diff --git a/.github/workflows/perl-test-install-rpm.yml b/.github/workflows/perl-test-install-rpm.yml new file mode 100644 index 000000000..a9a0b6005 --- /dev/null +++ b/.github/workflows/perl-test-install-rpm.yml @@ -0,0 +1,124 @@ +name: Install and Test Bystro (RPM) + +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + container: + image: amazonlinux:2023 + + steps: + - name: Install dependencies + run: | + dnf install -y sudo tar + + # Checkout the repository + - name: Checkout repository + uses: actions/checkout@v4 + + # Run the install script + - name: Install Bystro using RPM script + run: | + touch ~/.profile + ./install-rpm.sh --profile-file ~/.profile + + # Test that bystro-annotate.pl works + # When it runs with --help it exits with 255 + - name: Verify installation by running bystro-annotate.pl --help + continue-on-error: true + run: | + source ~/.profile + output=$(bystro-annotate.pl --help 2>&1) + exit_code=$? + first_line=$(echo "$output" | head -n 1) + + if [ "$exit_code" -eq 255 ]; then + echo "Exit code 255 confirmed." + else + echo "Unexpected exit code: $exit_code" >&2 + exit 1 + fi + + if [[ "$first_line" == "usage: bystro-annotate.pl"* ]]; then + echo "First line matches the expected 'usage: bystro-annotate.pl'." + else + echo "First line does not match the expected 'usage: bystro-annotate.pl'." >&2 + echo "Actual first line: $first_line" >&2 + exit 1 + fi + + # Test that bystro-build.pl works + # when it runs with --help it exits with 1 + - name: Verify installation by running bystro-build.pl --help + continue-on-error: true + run: | + source ~/.profile + output=$(bystro-build.pl --help 2>&1) + first_line=$(echo "$output" | head -n 1) + + if [[ "$first_line" == "Usage:"* ]]; then + echo "First line matches the expected 'Usage:'" + else + echo "First line does not match the expected 'Usage:'" >&2 + echo "Actual first line: $first_line" >&2 + exit 1 + fi + + - name: Run tests + run: | + cd perl + source ~/.profile + prove -r -j$(nproc) t + + - name: Check that expected programs are in the PATH + run: | + source ~/.profile + if command -v bystro-annotate.pl > /dev/null; then + echo "bystro-annotate.pl is in the PATH" + else + echo "bystro-annotate.pl is not in the PATH" >&2 + exit 1 + fi + + if command -v bystro-build.pl > /dev/null; then + echo "bystro-build.pl is in the PATH" + else + echo "bystro-build.pl is not in the PATH" >&2 + exit 1 + fi + + if command -v bystro-vcf > /dev/null; then + echo "bystro-vcf is in the PATH" + else + echo "bystro-vcf is not in the PATH" >&2 + exit 1 + fi + + if command -v bystro-snp > /dev/null; then + echo "bystro-snp is in the PATH" + else + echo "bystro-snp is not in the PATH" >&2 + exit 1 + fi + + if command -v bystro-stats > /dev/null; then + echo "bystro-stats is in the PATH" + else + echo "bystro-stats is not in the PATH" >&2 + exit 1 + fi + + if command -v bgzip > /dev/null; then + echo "bgzip is in the PATH" + else + echo "bgzip is not in the PATH" >&2 + exit 1 + fi + + if command -v yq > /dev/null; then + echo "yq is in the PATH" + else + echo "yq is not in the PATH" >&2 + exit 1 + fi diff --git a/.github/workflows/python-build-docker.yml b/.github/workflows/python-build-docker.yml new file mode 100644 index 000000000..786ce7555 --- /dev/null +++ b/.github/workflows/python-build-docker.yml @@ -0,0 +1,39 @@ +name: (ML/Bioinformatics) Build and Run Bystro Python Dockerfile + +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build Docker Image from Local Context + run: | + docker build -t bystro-python -f Dockerfile.python . + + - name: Run Docker Image and Capture Help Output + id: help-output + run: | + docker run --rm bystro-python > help_output_python.txt + + - name: Assert Help Output Starts with Expected String + run: | + first_line=$(head -n 1 help_output_python.txt) + if [[ "$first_line" == "usage: bystro-api"* ]]; then + echo "Help output assertion passed!" + else + echo "Help output assertion failed. Got: $first_line" + exit 1 + fi + + - name: Upload Help Output as Artifact + uses: actions/upload-artifact@v3 + with: + name: help-output-python + path: help_output_python.txt diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 3d6a48976..000000000 --- a/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -FROM perl:5.36.1 - -ENV PATH="/bystro/bin:/usr/local/go/bin:/go/bin/:${PATH}" \ - PERL5LIB="/bystro/lib:${PERL5LIB}" \ - GOPATH="/go" - -RUN cpanm --local-lib=/root/perl5 local::lib && eval $(perl -I /root/perl5/lib -Mlocal::lib) -ADD ./ /root/bystro/ -RUN apt-get update && apt-get install sudo -RUN git config --global url."https://".insteadOf git:// - -WORKDIR /root/bystro -RUN . install/install-lmdb-linux.sh -RUN wget https://dl.google.com/go/go1.13.6.linux-amd64.tar.gz \ - && tar -xf go1.13.6.linux-amd64.tar.gz \ - && mv go /usr/local - -ADD . /bystro -WORKDIR /bystro - -RUN git config --global url."https://".insteadOf git:// -RUN bash install/install-go-linux.sh -RUN bash install/install-go-packages.sh -RUN bash install/install-lmdb-linux.sh -RUN bash install/install-perl-libs.sh - - -WORKDIR /bystro/bin diff --git a/Dockerfile.perl b/Dockerfile.perl new file mode 100644 index 000000000..1949e4ad3 --- /dev/null +++ b/Dockerfile.perl @@ -0,0 +1,31 @@ +# Use an official Ubuntu base image +FROM ubuntu:24.04 + +# Set environment variables +ENV PERL_VERSION=5.34.0 + +# Set environment variables to reduce image size +ENV DEBIAN_FRONTEND=noninteractive + +# Install dependencies: sudo +RUN apt update && apt install -y git sudo + +# needed for minimal installs for Perl to compile +RUN apt-get update && apt-get install -y --no-install-recommends \ + perl \ + man-db \ + groff \ + libperl-dev + +COPY perl /bystro/perl +COPY go /bystro/go +COPY install /bystro/install + +# Copy your install-apt.sh script into the container +COPY install-apt.sh /bystro/install-apt.sh + +# Install dependencies +RUN cd /bystro && ./install-apt.sh + +# Symlink everything in /bystro/perl/bin to /usr/local/bin +ENTRYPOINT ["/bin/bash", "-c", "source ~/.profile && if [ \"$#\" -eq 0 ]; then bystro-annotate.pl --help; else exec \"$@\"; fi", "--"] diff --git a/Dockerfile.python b/Dockerfile.python index 3995e0601..ef072c6b9 100644 --- a/Dockerfile.python +++ b/Dockerfile.python @@ -1,13 +1,13 @@ # ---- Build Golang Binaries ---- -FROM golang:1.21.3 AS go-builder +FROM golang:1.21.4 AS go-builder # Set the environment variable for Go binaries. This makes sure the binaries are saved to a defined path. ENV GOBIN=/app/bin # Install the specific versions of the Go programs -RUN go install github.com/akotlar/bystro-stats@1.0.0 -RUN go install github.com/bystrogenomics/bystro-vcf@2.2.2; -RUN go install github.com/akotlar/bystro-snp@1.0.0 +RUN go install github.com/bystrogenomics/bystro-stats@1.0.1; +RUN go install github.com/bystrogenomics/bystro-vcf@2.2.3; +RUN go install github.com/bystrogenomics/bystro-snp@1.0.1; RUN go install github.com/mikefarah/yq@2.4.1 COPY ./go /app/bystro-go-tools @@ -45,8 +45,9 @@ ENV PATH="/usr/local/go/bin:${PATH}" # Set up the workspace WORKDIR /workspace -# Copy the entire monorepo +COPY ./startup.yml ./startup.yml COPY ./python ./python +COPY ./config ./config # Install Python and dependencies RUN pip install --upgrade pip && pip install python/ -r python/requirements-dev.txt @@ -55,6 +56,10 @@ COPY ./Makefile ./ RUN make install-python +# Run pytest before installation +RUN pytest python/ --maxfail=1 --disable-warnings + + ENTRYPOINT ["bystro-api"] CMD ["--help"] \ No newline at end of file diff --git a/INSTALL.md b/INSTALL.md index c32ffa776..76a7be1c4 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,32 +1,6 @@ -# Table of Contents - -1. [Installing Bystro Python Libraries and CLI Tools](#installing-the-bystro-python-libraries-and-cli-tools) -2. [Installing Bystro Using Docker](#installing-bystro-using-docker) - - [Building the Latest Version of Bystro in Docker](#building-the-latest-version-of-bystro-in-docker) -3. [Direct (Non-Docker) Installation](#direct-non-docker-installation) - - [Installing the Bystro Annotator (Perl/CLI)](#installing-the-bystro-annotator-perlcli) - - [For RPM-based Distros (Fedora, Red Hat, CentOS, etc.)](#fedora-redhat-centos-opensuse-mandriva) - - [For MacOS (Tested on High Sierra)](#macos-tested-on-highsierra-interactive) - - [For Ubuntu](#ubuntu) -4. [Configuring the Bystro Annotator](#configuring-the-bystro-annotator) -5. [Databases](#databases) -6. [Running Your First Annotation](#running-your-first-annotation) - -For most users, we recommend not installing the software, and using https://bystro.io, where the software is hosted - -The web app provides full functionality for any size experiment, a convenient search interface, and excellent performance - # Installing the Bystro Python libraries and cli tools -Bystro consists of 2 main components: - -1. The Bystro annotator (Perl) which is a command line tool for building new Bystro annotation databases, and for annotating VCF files with those databases. -2. The `bystro` Python package, which contains: - 1. The `bystro` library, which contains general purpose machine learning / statistical methods as well as applications of these methods in biology, with methods like global ancestry, polygenic risk score calculation, and proteomic analysis (data cleaning, pQTL, joining/filtering on genetic data). - 2. The `bystro-api` command line tool, which is a command line interface for the Bystro API server. This is used to login to Bystro cluster, submit jobs, and check job status. It has most of the functionality of the web application, but is more convenient for batch processing. - 3. For enterprise users that have their own Bystro cluster, the Bystro Python package also gives the ability to launch workers to handle Bystro API server requests (`bystro-save-worker`, `bystro-index-worker`). - -To install the Bystro Python package, run: +To install the Bystro Python package, which contains our machine learning library and some application in genetics and proteomics, run: ```sh pip install --pre bystro @@ -85,100 +59,10 @@ To start the workers, update `config/beanstalk.yml` to point to the correct bean make serve-dev ``` -# Installing Bystro using Docker - -###### The recommended way to use Bystro on the command line - -Make sure you have [Docker installed](https://store.docker.com/search?type=edition&offering=community) - -#### Building the latest version of Bystro in Docker - -``` -git clone https://github.com/bystrogenomics/bystro.git && cd bystro -docker build -t bystro . -docker run bystro bystro-annotate.pl #Annotate -docker run bystro bystro-build.pl #Build -``` - -# Direct (non-Docker) installation - -There are 2 components to Bystro: - -1. The Bystro annotator: a Perl program accessed through the command line (via bin/bystro-\*) -2. The Bystro Python package: where the rest of Bystro's functionality lives (statistics, proteomics, etc). - -## Installing the Bystro annotator (Perl/cli) - -##### (Fedora, Redhat, Centos, openSUSE, Mandriva) - -1. `git clone https://github.com/bystrogenomics/bystro.git && cd bystro && source ./install-rpm.sh` - -##### MacOS (tested on HighSierra, interactive) - -1. `git clone https://github.com/bystrogenomics/bystro.git && cd bystro && source ./install-mac.sh` - -##### Ubuntu +## Installing the Bystro Annotator -1. Ensure that packages are up to date (`sudo apt update`), or that you are satisified with the state of package versions. -2. `git clone https://github.com/bystrogenomics/bystro.git && cd bystro && source ./install-apt.sh` - - Please not that this installation script will ask you for the root password in order to install system dependencies - -## Configuring the Bystro annotator - -Once Bystro is installed, it needs to be configured. The easiest step is choosing the species/assemblies to annotate. - -1. Download the Bystro database for your species/assembly - -- **Example:** hg38 (human reference GRCh38): `wget https://s3.amazonaws.com/bystro-db/hg38_v11.tar.gz` - - You need ~691GB of free space for hg38 and ~376GB of free space for hg19, including the space for the tar.gz archives - - The unpacked databases are ~517GB for hg38 and ~283GB for hg19 - -2. To install the database: - - **Example:** - - ```shell - cd /mnt/annotator/ - wget https://s3.amazonaws.com/bystro-db/hg38_v11.tar.gz - bgzip -d -c --threads 32 hg38_v11.tar.gz | tar xvf - - ``` - - In this example the hg38 database would located in `/mnt/annotator/hg38` - -3. Update the YAML configuration for the species/assembly to point to the database. - - For human genome assemblies, we provide pre-configured hg19.yml and hg38.yml, which assume `/mnt/annotator/hg19_v10` and `/mnt/annotator/hg38_v11` database directories respectively. - - If using a different mount point, different database folder name, or a different (or custom-built) database altogether, - you will need to update the `database_dir` property of the yaml config. - - - Note for a custom database, you would also need to ensure the track `outputOrder` lists all tracks, and that each track has all desired `features` listed - - For instance, using `yq` to can configure the `database_dir` and set `temp_dir` to have in-progress annotations written to local disk - - ```shell - yq write -i config/hg38.yml database_dir /mnt/my_fast_local_storage/hg38_v11 - yq write -i config/hg38.yml temp_dir /mnt/my_fast_local_storage/tmp - ``` - -## Databases: - -1. Human (hg38): https://s3.amazonaws.com/bystro-db/hg38_v11.tar.gz -2. Human (hg19): https://s3.amazonaws.com/bystro-db/hg19_v10.tar.gz -3. There are no restrictions on species support, but we currently only build human genomes. Please create a GitHub issue if you would like us to support others. - -## Running your first annotation - -Ex: Runing hg38 annotation - -```sh -bin/bystro-annotate.pl --config config/hg38.yml --in /path/in.vcf.gz --out /path/outPrefix --run_statistics [0,1] --compress -``` +Besides the Bystro ML library, which lives in bystro/python, we also have a Perl library that is used to annotate genetic data, providing necessary information for the Bystro ML library bioinformatics modules. -The outputs will be: +The Bystro Annotator, which handles processing genetic data (VCF files), performing quality control, feature labeling (annotation) of variants and samples, and generating an annotation output and genotype dosage matrices, is written in Perl. -- Annotation (compressed, due to --compress flag): `outPrefix.annotation.tsv.gz` -- Annotation log: `outPrefix.log.txt` -- Statistics JSON file `outPrefix.statistics.json` -- Statistics tab-separated file: `outPrefix.statistics.tsv` - - Removing the `--run_statistics` flag will skip the generation of `outPrefix.statistics.*` files +To install and configure the Bystro Annotator, follow the instructions in [perl/INSTALL.md](perl/INSTALL.md). diff --git a/Makefile b/Makefile index 1d823ef93..9f512a6cf 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ install-python: build-python fi install-go: - go install github.com/bystrogenomics/bystro-vcf@2.2.2 + go install github.com/bystrogenomics/bystro-vcf@2.2.3 (cd ./go && go install bystro/cmd/dosage) (cd ./go && go install bystro/cmd/dosage-filter) (cd ./go && go install bystro/cmd/opensearch) diff --git a/README.md b/README.md index 5bbf0fa88..01a48bbfd 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ The Bystro ancestry CLI `score` tool (`bystro-api ancestry score`) parses VCF fi ```sh # Requires Go: install from https://golang.org/doc/install -go install github.com/bystrogenomics/bystro-vcf@2.2.2 +go install github.com/bystrogenomics/bystro-vcf@2.2.3 ``` Bystro is compatible with Linux and MacOS. Windows support is experimental. If you are installing on MacOS as a native binary (Arm), you will need to install the following additional dependencies: @@ -55,8 +55,8 @@ Please refer to [INSTALL.md](INSTALL.md) for instructions on how to install the Bystro relies on pluggable (via Bystro's YAML config) pre-processors to normalize variant inputs (**dealing with VCF issues such as padding**), calculate whether a site is a transition or transversion, calculate sample maf, identify hets/homozygotes/missing samples, calculate heterozygosity, homozygosity, missingness, and more. -1. VCF format: [Bystro-Vcf](https://github.com/bystrogenomics/bystro-vcf/tree/2.2.2) -2. SNP format: [Bystro-SNP](https://github.com/akotlar/bystro-snp) +1. VCF format: [Bystro-Vcf](https://github.com/bystrogenomics/bystro-vcf/tree/2.2.3) +2. SNP format: [Bystro-SNP](https://github.com/akotlar/bystro-snp/tree/1.0.1) 3. Create your own to support other formats! ## Annotation (Output) Field Descriptions diff --git a/TUTORIAL.md b/TUTORIAL.md index 7848820e1..d2909a033 100644 --- a/TUTORIAL.md +++ b/TUTORIAL.md @@ -12,7 +12,7 @@ analyses and handles large (Tb) sized experiments. Features include: ## Open Source -The open source command line annotator that powers bystro is located at the [bystro](https://github.com/akotlar/bystro). +The open source command line annotator that powers bystro is located at the [bystro](https://github.com/bystrogenomics/bystro). ## ACM-BCB 2018 Presentation diff --git a/config/hg19.yml b/config/hg19.yml index e45922d3c..8c53c358c 100644 --- a/config/hg19.yml +++ b/config/hg19.yml @@ -47,7 +47,7 @@ statistics: programPath: bystro-stats refTrackField: ref siteTypeField: refSeq.siteType -temp_dir: /mnt/annotator/tmp +temp_dir: ~ tracks: outputOrder: - ref diff --git a/config/hg38.yml b/config/hg38.yml index 55344e7d1..eae6108a7 100644 --- a/config/hg38.yml +++ b/config/hg38.yml @@ -47,7 +47,7 @@ statistics: programPath: bystro-stats refTrackField: ref siteTypeField: refSeq.siteType -temp_dir: /mnt/annotator/tmp +temp_dir: ~ tracks: outputOrder: - ref diff --git a/go/go.mod b/go/go.mod index 7d60f22d6..c8da52e1e 100644 --- a/go/go.mod +++ b/go/go.mod @@ -8,26 +8,20 @@ require ( github.com/biogo/hts v1.4.4 github.com/bystrogenomics/bystro-vcf v0.0.0-20240425204515-a3bed256638d github.com/bytedance/sonic v1.10.2 - github.com/dolthub/swiss v0.2.1 github.com/klauspost/compress v1.17.4 github.com/klauspost/pgzip v1.2.6 github.com/opensearch-project/opensearch-go/v2 v2.3.0 + github.com/tidwall/btree v1.7.0 gopkg.in/yaml.v3 v3.0.1 ) require ( - github.com/akotlar/bystro-utils v0.0.0-20180921004542-b5183a523f20 // indirect github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect github.com/chenzhuoyu/iasm v0.9.1 // indirect - github.com/dolthub/maphash v0.1.0 // indirect github.com/goccy/go-json v0.10.2 // indirect github.com/google/flatbuffers v23.5.26+incompatible // indirect github.com/klauspost/cpuid/v2 v2.2.6 // indirect - github.com/mhmtszr/concurrent-swiss-map v1.0.8 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect - github.com/raviqqe/hamt/v2 v2.0.0 // indirect - github.com/scylladb/go-set v1.0.2 // indirect - github.com/tidwall/btree v1.7.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/arch v0.6.0 // indirect diff --git a/go/go.sum b/go/go.sum index f21a0e355..fedf5d26b 100644 --- a/go/go.sum +++ b/go/go.sum @@ -1,5 +1,3 @@ -github.com/akotlar/bystro-utils v0.0.0-20180921004542-b5183a523f20 h1:DTPJA8O7qs7Dc+YRg9wZusDy/SoVHqn9udRQlr+TSFA= -github.com/akotlar/bystro-utils v0.0.0-20180921004542-b5183a523f20/go.mod h1:BHUTiQAFM7OSW8+09I/OwWMhgbsonqQ6J8jTlnpOPnk= github.com/apache/arrow/go/v14 v14.0.2 h1:N8OkaJEOfI3mEZt07BIkvo4sC6XDbL+48MBPWO5IONw= github.com/apache/arrow/go/v14 v14.0.2/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= github.com/aws/aws-sdk-go v1.44.263/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= @@ -20,8 +18,6 @@ github.com/beanstalkd/go-beanstalk v0.2.0/go.mod h1:/G8YTyChOtpOArwLTQPY1CHB+i21 github.com/biogo/boom v0.0.0-20150317015657-28119bc1ffc1/go.mod h1:fwtxkutinkQcME9Zlywh66T0jZLLjgrwSLY2WxH2N3U= github.com/biogo/hts v1.4.4 h1:Z+TminqAKRE/t6nyy5PwI/DL90kdew4GpghB+QdjjFk= github.com/biogo/hts v1.4.4/go.mod h1:AfPn4uJQ2zxi04Q/4vccdmCX16W+IsHXVguPsdh4HE4= -github.com/bystrogenomics/bystro-vcf v0.0.0-20240406002905-a9a245f1ffb0 h1:iAbMGYoCpvqcPLuolyHMH6HKdiYIg8EVzb/lbUjCZwI= -github.com/bystrogenomics/bystro-vcf v0.0.0-20240406002905-a9a245f1ffb0/go.mod h1:ssLIZJL1hUm8xIFZO33X2Mp0b3Ju7oJ2liBW0Qov0KQ= github.com/bystrogenomics/bystro-vcf v0.0.0-20240425204515-a3bed256638d h1:Y3qdBlf9Q1DJfGFP47GNwU7ytJklAqpuxgoapAs4U80= github.com/bystrogenomics/bystro-vcf v0.0.0-20240425204515-a3bed256638d/go.mod h1:ssLIZJL1hUm8xIFZO33X2Mp0b3Ju7oJ2liBW0Qov0KQ= github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= @@ -38,20 +34,15 @@ github.com/chenzhuoyu/iasm v0.9.1/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLI github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dolthub/maphash v0.1.0 h1:bsQ7JsF4FkkWyrP3oCnFJgrCUAFbFf3kOl4L/QxPDyQ= -github.com/dolthub/maphash v0.1.0/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4= -github.com/dolthub/swiss v0.2.1 h1:gs2osYs5SJkAaH5/ggVJqXQxRXtWshF6uE0lgR/Y3Gw= -github.com/dolthub/swiss v0.2.1/go.mod h1:8AhKZZ1HK7g18j7v7k6c5cYIGEZJcPn0ARsai8cUrh0= -github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg= github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= +github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA= -github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -64,23 +55,16 @@ github.com/kortschak/utter v0.0.0-20190412033250-50fe362e6560/go.mod h1:oDr41C7k github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/mhmtszr/concurrent-swiss-map v1.0.8 h1:GDSxgVrXsPFsraUJaPMm7ptYulj8qnWPgnwXcWbJNxo= -github.com/mhmtszr/concurrent-swiss-map v1.0.8/go.mod h1:F6QETL48Qn7jEJ3ZPt7EqRZjAAZu7lRQeQGIzXuUIDc= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/opensearch-project/opensearch-go/v2 v2.3.0 h1:nQIEMr+A92CkhHrZgUhcfsrZjibvB3APXf2a1VwCmMQ= github.com/opensearch-project/opensearch-go/v2 v2.3.0/go.mod h1:8LDr9FCgUTVoT+5ESjc2+iaZuldqE+23Iq0r1XeNue8= -github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= -github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/raviqqe/hamt/v2 v2.0.0 h1:7HlIX7A/mTVq7l03YzQ/33TzeOhwmvEVWeSFKjHzSvY= -github.com/raviqqe/hamt/v2 v2.0.0/go.mod h1:Rrh7mIjnwBME0QV5G3ZVgJJXpStBAM6DT0C3n53mKp0= -github.com/scylladb/go-set v1.0.2 h1:SkvlMCKhP0wyyct6j+0IHJkBkSZL+TDzZ4E7f7BCcRE= -github.com/scylladb/go-set v1.0.2/go.mod h1:DkpGd78rljTxKAnTDPFqXSGxvETQnJyuSOQwsHycqfs= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -88,14 +72,17 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/tidwall/btree v1.7.0 h1:L1fkJH/AuEh5zBnnBbmTwQ5Lt+bRJ5A8EWecslvo9iI= github.com/tidwall/btree v1.7.0/go.mod h1:twD9XRA5jj9VUQGELzDO4HPQTNJsoWWfYEL+EUQ2cKY= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= @@ -103,9 +90,9 @@ golang.org/x/arch v0.6.0 h1:S0JTfE48HbRj80+4tbvZDYsJ3tGv6BUU3XxyZ7CirAc= golang.org/x/arch v0.6.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= -golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -115,6 +102,8 @@ golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -122,8 +111,6 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= -golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -138,15 +125,13 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= -golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o= +gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/install-apt.sh b/install-apt.sh index d58c4341e..b1b193de8 100755 --- a/install-apt.sh +++ b/install-apt.sh @@ -1,38 +1,107 @@ #!/usr/bin/env bash +set -e +set -o pipefail -if [[ -n "$1" ]] -then - INSTALL_DIR=$1 +# Default values +DEFAULT_GO_PLATFORM="linux-amd64" +DEFAULT_GO_VERSION="1.21.4" +DEFAULT_PROFILE_FILE=$(./install/detect-shell-profile.sh "$HOME") + +# Function to display usage information +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --profile-file PROFILE_FILE Specify the shell profile file to update (default: auto-detected, e.g., ~/.bash_profile)" + echo " --go-platform GO_PLATFORM Specify the Go platform (default: linux-amd64)" + echo " --go-version GO_VERSION Specify the Go version (default: 1.21.4)" + echo " --help Show this help message and exit" + echo "" + exit 0 +} + +# Parse command-line arguments +PROFILE_FILE="$DEFAULT_PROFILE_FILE" +GO_PLATFORM="$DEFAULT_GO_PLATFORM" +GO_VERSION="$DEFAULT_GO_VERSION" +while [[ $# -gt 0 ]]; do + case $1 in + --profile-file) + PROFILE_FILE="$2" + shift 2 + ;; + --go-platform) + GO_PLATFORM="$2" + shift 2 + ;; + --go-version) + GO_VERSION="$2" + shift 2 + ;; + --help) + show_help + ;; + *) + echo "Unknown option: $1" + show_help + ;; + esac +done + +# Use the home directory of the invoking user, not root +if [[ -n "$SUDO_USER" ]]; then + HOME_DIR="$(getent passwd "$SUDO_USER" | cut -d: -f6)" else - INSTALL_DIR=~ + HOME_DIR="$HOME" fi -# # LiftOver is used for the LiftOverCadd.pm package, to liftOver cadd to hg38 -# and cadd's GRCh37.p13 MT to hg19 -. install/install-liftover-linux.sh; -. install/install-apt-deps.sh; -. install/install-lmdb-linux.sh; +echo "Home directory is $HOME_DIR" + +BYSTRO_INSTALL_DIR=$(pwd) +LOCAL_INSTALL_DIR="$HOME_DIR/.local" +BINARY_INSTALL_DIR="$HOME_DIR/.local/bin" + +echo "Install directory is $BYSTRO_INSTALL_DIR" +echo "PROFILE is $PROFILE_FILE" +echo "Go platform is $GO_PLATFORM" -. ~/.profile; +# Install RPM dependencies +sudo ./install/install-apt-deps.sh -# Perlbrew simplifies version management -. ./install/install-perlbrew-linux.sh $INSTALL_DIR perl-5.30.1 ~/.profile; -. ./install/install-perl-libs.sh; +# Install HTSlib +./install/install-htslib.sh "$PROFILE_FILE" "$LOCAL_INSTALL_DIR" -. ~/.profile; +# Install LiftOver +./install/install-liftover-linux.sh "$PROFILE_FILE" "$BINARY_INSTALL_DIR" -# # Bystro is increasingly a golang progrma. Perl currently handles db fetching, -. install/install-go-linux.sh $INSTALL_DIR ~/.profile; +# Install LMDB +sudo ./install/install-lmdb-linux.sh -. ~/.profile; +# Install Perlbrew +./install/install-perlbrew-linux.sh "$PROFILE_FILE" "$HOME_DIR" perl-5.34.0 -. install/install-go-packages.sh; -. install/update-packages.sh; +# Install Go +./install/install-go.sh "$PROFILE_FILE" "$HOME_DIR" "$LOCAL_INSTALL_DIR" "$BYSTRO_INSTALL_DIR" "$GO_PLATFORM" "$GO_VERSION" -. ./install/export-bystro-libs.sh ~/.profile +# Export Bystro libraries to shell profile +./install/export-bystro-libs.sh "$PROFILE_FILE" "$BYSTRO_INSTALL_DIR" -. ~/.profile; +# Create logs directory +mkdir -p logs + +echo "\nTesting Bystro installation" + +bash -c ". $PROFILE_FILE && cd perl && prove -r ./t -j$(nproc)" +if [ $? -eq 0 ]; then + echo "\nBystro installation succeeded!" +else + echo "\nBystro installation failed" + exit 1 +fi -mkdir -p logs; +echo -e "\n\nREMEMBER TO INCREASE ULIMIT ABOVE 1024 IF RUNNING MANY FORKS\n\n" -printf "\n\nREMEMBER TO INCREASE ULIMIT ABOVE 1024 IF RUNNING MANY FORKS\n\nIF RUNNING 1st TIME RUN: `source $PERLBREW_ROOT/etc/bashrc`"; +echo -e "To get started with Bystro, for instance to run Bystro Annotator: \n" +echo "Update your shell to reflect the newly installed programs: 'source $PROFILE_FILE'" +echo "Run Bystro Annotator: 'bystro-annotate.pl --help'" +echo -e "\n\n" diff --git a/install-mac.sh b/install-mac.sh deleted file mode 100755 index ab3bfd610..000000000 --- a/install-mac.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash - -if [[ -n "$1" ]] -then - INSTALL_DIR=$1 -else - INSTALL_DIR=~ -fi - -# LiftOver is used for the LiftOverCadd.pm package, to liftOver cadd to hg38 -# and cadd's GRCh37.p13 MT to hg19 -. ./install/install-liftover-linux.sh; -. ./install/install-mac-deps.sh; -. ./install/install-lmdb-linux.sh; - -. ~/.bash_profile; - -# Perlbrew simplifies version management -# Mac doesn't like/build 5.28.0 without failing some tests -# However, even 5.26.0 and .2 will fail test 21 "wrapped lines longer than" -# Which seems to be of little consequence -. ./install/install-perlbrew-linux.sh $INSTALL_DIR perl-5.26.2 ~/.bash_profile 1 -. ./install/install-perl-libs.sh; - -# Bystro is increasingly a golang progrma. Perl currently handles db fetching, -. ./install/install-go-mac.sh $INSTALL_DIR; - -. ~/.bash_profile; - -. ./install/install-go-packages.sh; -# Not necessary for first install, but allows us to have a single entry point -# for installation and updates -. ./install/update-packages.sh; - -. ./install/export-bystro-libs.sh ~/.bash_profile - -. ~/.bash_profile; - -mkdir -p logs; - -printf "\n\nREMEMBER TO INCREASE ULIMIT ABOVE 1024 IF RUNNING MANY FORKS\n\nIF RUNNING 1st TIME RUN: `source $PERLBREW_ROOT/etc/bashrc`"; diff --git a/install-rpm.sh b/install-rpm.sh index 110030eb6..8a6acac60 100755 --- a/install-rpm.sh +++ b/install-rpm.sh @@ -1,39 +1,107 @@ #!/usr/bin/env bash +set -e +set -o pipefail -if [[ -n "$1" ]] -then - INSTALL_DIR=$1 +# Default values +DEFAULT_GO_PLATFORM="linux-amd64" +DEFAULT_GO_VERSION="1.21.4" +DEFAULT_PROFILE_FILE=$(./install/detect-shell-profile.sh "$HOME") + +# Function to display usage information +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --profile-file PROFILE_FILE Specify the shell profile file to update (default: auto-detected, e.g., ~/.bash_profile)" + echo " --go-platform GO_PLATFORM Specify the Go platform (default: linux-amd64)" + echo " --go-version GO_VERSION Specify the Go version (default: 1.21.4)" + echo " --help Show this help message and exit" + echo "" + exit 0 +} + +# Parse command-line arguments +PROFILE_FILE="$DEFAULT_PROFILE_FILE" +GO_PLATFORM="$DEFAULT_GO_PLATFORM" +GO_VERSION="$DEFAULT_GO_VERSION" +while [[ $# -gt 0 ]]; do + case $1 in + --profile-file) + PROFILE_FILE="$2" + shift 2 + ;; + --go-platform) + GO_PLATFORM="$2" + shift 2 + ;; + --go-version) + GO_VERSION="$2" + shift 2 + ;; + --help) + show_help + ;; + *) + echo "Unknown option: $1" + show_help + ;; + esac +done + +# Use the home directory of the invoking user, not root +if [[ -n "$SUDO_USER" ]]; then + HOME_DIR="$(getent passwd "$SUDO_USER" | cut -d: -f6)" else - INSTALL_DIR=~ + HOME_DIR="$HOME" fi -# # LiftOver is used for the LiftOverCadd.pm package, to liftOver cadd to hg38 -# and cadd's GRCh37.p13 MT to hg19 -. install/install-liftover-linux.sh; -. install/install-rpm-deps.sh; -. install/install-lmdb-linux.sh; +echo "Home directory is $HOME_DIR" + +BYSTRO_INSTALL_DIR=$(pwd) +LOCAL_INSTALL_DIR="$HOME_DIR/.local" +BINARY_INSTALL_DIR="$HOME_DIR/.local/bin" -. ~/.bash_profile; +echo "Install directory is $BYSTRO_INSTALL_DIR" +echo "PROFILE is $PROFILE_FILE" +echo "Go platform is $GO_PLATFORM" -# Perlbrew simplifies version management -. ./install/install-perlbrew-linux.sh $INSTALL_DIR perl-5.30.1; -. ./install/install-perl-libs.sh; +# Install RPM dependencies +sudo ./install/install-rpm-deps.sh -. ~/.bash_profile; +# Install HTSlib +./install/install-htslib.sh "$PROFILE_FILE" "$LOCAL_INSTALL_DIR" -# # Bystro is increasingly a golang progrma. Perl currently handles db fetching, -. install/install-go-linux.sh $INSTALL_DIR; +# Install LiftOver +./install/install-liftover-linux.sh "$PROFILE_FILE" "$BINARY_INSTALL_DIR" -. ~/.bash_profile; +# Install LMDB +sudo ./install/install-lmdb-linux.sh -. install/install-go-packages.sh; -. install/update-packages.sh; +# Install Perlbrew +./install/install-perlbrew-linux.sh "$PROFILE_FILE" "$HOME_DIR" perl-5.34.0 -. ./install/export-bystro-libs.sh ~/.bash_profile +# Install Go +./install/install-go.sh "$PROFILE_FILE" "$HOME_DIR" "$LOCAL_INSTALL_DIR" "$BYSTRO_INSTALL_DIR" "$GO_PLATFORM" "$GO_VERSION" -. ~/.bash_profile; +# Export Bystro libraries to shell profile +./install/export-bystro-libs.sh "$PROFILE_FILE" "$BYSTRO_INSTALL_DIR" -mkdir -p logs; +# Create logs directory +mkdir -p logs + +echo "\nTesting Bystro installation" + +bash -c ". $PROFILE_FILE && cd perl && prove -r ./t -j$(nproc)" +if [ $? -eq 0 ]; then + echo "\nBystro installation succeeded!" +else + echo "\nBystro installation failed" + exit 1 +fi +echo -e "\n\nREMEMBER TO INCREASE ULIMIT ABOVE 1024 IF RUNNING MANY FORKS\n\n" -printf "\n\nREMEMBER TO INCREASE ULIMIT ABOVE 1024 IF RUNNING MANY FORKS\n\nF RUNNING 1st TIME RUN: `source $PERLBREW_ROOT/etc/bashrc`"; +echo -e "To get started with Bystro, for instance to run Bystro Annotator: \n" +echo "Update your shell to reflect the newly installed programs: 'source $PROFILE_FILE'" +echo "Run Bystro Annotator: 'bystro-annotate.pl --help'" +echo -e "\n\n" diff --git a/install/detect-shell-profile.sh b/install/detect-shell-profile.sh new file mode 100755 index 000000000..69f273a66 --- /dev/null +++ b/install/detect-shell-profile.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# detect-shell-profile.sh + +# Detect the shell profile file +USER_SHELL=$(getent passwd "$USER" | cut -d: -f7) + +if [[ -n "$SUDO_USER" ]]; then + HOME_DIR="$(getent passwd "$SUDO_USER" | cut -d: -f6)" +else + HOME_DIR="$HOME" +fi + +# Use ~/.profile unless ~/.bash_profile already exists +PROFILE_FILE="$HOME_DIR/.profile" + +if [ -f "$HOME_DIR/.bash_profile" ]; then + PROFILE_FILE="$HOME_DIR/.bash_profile" +fi + +# Ensure the profile file exists +if [ ! -f "$PROFILE_FILE" ]; then + touch "$PROFILE_FILE" + echo "# Created $PROFILE_FILE" >> "$PROFILE_FILE" + echo "$PROFILE_FILE has been created." >&2 +fi + +echo "$PROFILE_FILE" \ No newline at end of file diff --git a/install/export-bystro-libs.sh b/install/export-bystro-libs.sh old mode 100644 new mode 100755 index 260cbc912..48574a580 --- a/install/export-bystro-libs.sh +++ b/install/export-bystro-libs.sh @@ -1,18 +1,77 @@ #!/usr/bin/env bash +set -e -if [[ -n "$1" ]] -then - PROFILE=$1; -else - PROFILE=~/.bash_profile; +# Ensure that INSTALL_DIR is provided +if [ -z "$1" ]; then + echo "Usage: $0 " + echo "Install directory is the directory where Bystro source files are located" + exit 1 +fi + +PROFILE_FILE="$1" +BYSTRO_INSTALL_DIR="$2" + +#Strip BYSTRO_INSTALL_DIR of trailing slashes +BYSTRO_INSTALL_DIR="${BYSTRO_INSTALL_DIR%/}" + +echo "BYSTRO_INSTALL_DIR: $BYSTRO_INSTALL_DIR" + +# Verify that BYSTRO_INSTALL_DIR has perl, perl/bin, and perl/bin/bystro-annotate.pl +if [ ! -d "$BYSTRO_INSTALL_DIR/perl" ]; then + echo "Error: Directory $BYSTRO_INSTALL_DIR/perl does not exist." + exit 1 fi -DIR=$(pwd) +if [ ! -d "$BYSTRO_INSTALL_DIR/perl/bin" ]; then + echo "Error: Directory $BYSTRO_INSTALL_DIR/perl/bin does not exist." + exit 1 +fi + +if [ ! -f "$BYSTRO_INSTALL_DIR/perl/bin/bystro-annotate.pl" ]; then + echo "Error: File $BYSTRO_INSTALL_DIR/perl/bin/bystro-annotate.pl does not exist." + exit 1 +fi -echo -e "\n\nExporting $DIR/lib and $DIR/bin to $PROFILE\n"; +SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" +# exports append_if_missing +source "$SCRIPT_DIR/utils.sh" -if ! cat $PROFILE | grep "$DIR/bin"; then - (echo "" ; echo 'export PERL5LIB=$PERL5LIB:'$DIR'/lib') | sudo tee -a $PROFILE - # Not 100% sure why this is necessary; something still wonky during perlbrew install - (echo "" ; echo 'export PATH=$PATH:'$DIR'/bin') | sudo tee -a $PROFILE -fi \ No newline at end of file +echo -e "\n\nExporting paths from $BYSTRO_INSTALL_DIR to $PROFILE_FILE\n" + +# Verify that $INSTALL_DIR/bystro/lib exists and contains at least one .pm file +LIB_DIR="$BYSTRO_INSTALL_DIR/perl/lib" + +if [ ! -d "$LIB_DIR" ]; then + echo "Error: Directory $LIB_DIR does not exist." + exit 1 +fi + +# Check if there is at least one .pm file in LIB_DIR +if ! ls "$LIB_DIR"/*.pm >/dev/null 2>&1; then + echo "Error: No .pm files found in $LIB_DIR." + exit 1 +fi + +# Verify that $INSTALL_DIR/perl/bin exists and contains at least one .pl file +PERL_BIN_DIR="$BYSTRO_INSTALL_DIR/perl/bin" + +if [ ! -d "$PERL_BIN_DIR" ]; then + echo "Error: Directory $PERL_BIN_DIR does not exist." + exit 1 +fi + +# Check if there is at least one .pl file in PERL_BIN_DIR +if ! ls "$PERL_BIN_DIR"/*.pl >/dev/null 2>&1; then + echo "Error: No .pl files found in $PERL_BIN_DIR." + exit 1 +fi + +# Append entries only if they are missing +append_if_missing 'export PERL5LIB=$PERL5LIB:'"$LIB_DIR" "$PROFILE_FILE" + +# Check if $PERL_BIN_DIR is in the PATH and if not, add it +if [[ ":$PATH:" != *":$PERL_BIN_DIR:"* ]]; then + append_if_missing 'export PATH=$PATH:'"$PERL_BIN_DIR" "$PROFILE_FILE" +else + echo "$PERL_BIN_DIR is in PATH" +fi diff --git a/install/export-go-path-linux.sh b/install/export-go-path-linux.sh deleted file mode 100755 index 2d097ef1c..000000000 --- a/install/export-go-path-linux.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -if [[ -n "$1" ]] -then - DIR=$1; -else - DIR=~; -fi - -if [[ -n "$2" ]] -then - PROFILE=$2; -else - PROFILE=~/.bash_profile; -fi - -echo -e "\n\nStoring GOPATH in $PROFILE\n"; - -(echo ""; echo 'export PATH=$PATH:/usr/local/go/bin') >> $PROFILE; -(echo ""; echo "export GOPATH=$DIR/go") >> $PROFILE; -(echo ""; echo 'export PATH=$PATH:'$DIR'/go/bin/') >> $PROFILE; - -source $PROFILE; - -mkdir -p $GOPATH/src/github.com; \ No newline at end of file diff --git a/install/install-apt-deps.sh b/install/install-apt-deps.sh index dea05239b..80a170dd9 100755 --- a/install/install-apt-deps.sh +++ b/install/install-apt-deps.sh @@ -1,36 +1,57 @@ #!/usr/bin/env bash - -echo -e "\n\nInstalling Ubuntu/Debian (apt-get) dependencies\n"; - -# Installs gcc, and more; may be too much -sudo apt install -y build-essential; - -# Not strictly necessary, useful however for much of what we do -sudo apt install -y git-all; -# pigz for Bystro, used to speed up decompression primarily - -sudo apt install -y pigz; -sudo apt install -y unzip; -sudo apt install -y wget; -# For Search::Elasticsearch::Client::5_0::Direct -sudo apt install -y openssl libcurl4-openssl-dev libssl-dev; -# For tests involving querying ucsc directly -sudo apt install -y libmysqlclient-dev; - -# for perlbrew, in case you want to install a different perl version -#https://www.digitalocean.com/community/tutorials/how-to-install-perlbrew-and-manage-multiple-versions-of-perl-5-on-centos-7 -# centos 7 doesn't include bzip2 -sudo apt install -y bzip2; -sudo apt install -y lz4; -sudo apt install -y patch; - -sudo apt install -y cpan; - -sudo apt install -y nodejs; -sudo apt install -y npm; -sudo npm install -g pm2; - -sudo apt install -y awscli; - -# pkg-config is required for building the wheel -sudo apt install -y pkg-config; +set -e +set -o pipefail + +# Ensure the script is run with root privileges +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root. Use sudo." + exit 1 +fi + +echo -e "\n\nInstalling development tools and dependencies\n" + +apt update + +# Install build-essential and other required packages +apt install -y \ + build-essential \ + autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev \ + libmariadb-dev \ + cmake \ + git \ + pigz \ + unzip \ + wget \ + tar \ + bzip2 \ + lz4 \ + patch \ + pkg-config \ + grep + +# check whether curl is installed, because in some containers it is installed and then we get conflicts +if ! command -v curl &> /dev/null +then + echo "curl is not installed. Installing now..." + sudo apt install curl -y +else + echo "curl is already installed." +fi + +# Create a temporary directory +mkdir -p /tmp/awscli-install +cd /tmp/awscli-install +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +sudo ./aws/install --update +cd - +rm -rf /tmp/awscli-install + +# Install Node.js 20.x +curl -fsSL https://deb.nodesource.com/setup_20.x | bash - +apt install -y nodejs + +# Install pm2 globally using npm +npm install -g pm2 + +echo -e "\n\nAll dependencies have been installed successfully.\n" diff --git a/install/install-go-linux.sh b/install/install-go-linux.sh deleted file mode 100755 index 0201cfdce..000000000 --- a/install/install-go-linux.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -if [[ -n "$1" ]] -then - DIR=$1 -else - DIR=$HOME -fi - -if [[ -n "$2" ]] -then - PROFILE=$2; -else - PROFILE=~/.bash_profile; -fi - -echo -e "\n\nInstalling Go in /usr/local\n" - -# Clean in case somethign left over from old installation -GOFILE=go1.21.4.linux-amd64.tar.gz -wget https://dl.google.com/go/$GOFILE; -tar -xf $GOFILE; -echo "Deleting go in /usr/local" -sudo rm -rf /usr/local/go -sudo mv go /usr/local; -rm $GOFILE; - -. install/export-go-path-linux.sh $DIR $PROFILE diff --git a/install/install-go-mac.sh b/install/install-go-mac.sh deleted file mode 100755 index bea214ea2..000000000 --- a/install/install-go-mac.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -if [[ -n "$1" ]] -then - DIR=$1 -else - DIR=$HOME -fi - -echo -e "\n\nInstalling Go\n" - -GOFILE=go1.21.4.darwin-amd64.pkg -wget https://dl.google.com/go/$GOFILE; -tar -xf $GOFILE; -echo "Deleting go in /usr/local" -sudo rm -rf /usr/local/go -sudo mv go /usr/local; -rm $GOFILE; - -./install/export-go-path-linux.sh $DIR diff --git a/install/install-go-packages.sh b/install/install-go-packages.sh deleted file mode 100755 index 30805bbe9..000000000 --- a/install/install-go-packages.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -echo -e "\n\nInstalling go packages (bystro-vcf, stats, snp)\n" - -mkdir -p $GOPATH/src/github.com; - -go mod init bystro - -go install github.com/akotlar/bystro-stats@1.0.0; - -go install github.com/bystrogenomics/bystro-vcf@2.2.2; - -go install github.com/akotlar/bystro-snp@1.0.0; - -# allows us to modify our config files in place -go install github.com/mikefarah/yq@2.4.1; - -# install local go packages -(cd ./go && go install bystro/cmd/dosage); diff --git a/install/install-go.sh b/install/install-go.sh new file mode 100755 index 000000000..d2dd1f702 --- /dev/null +++ b/install/install-go.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +set -e + +########## This script installs Go and configures the Go environment. + +# Ensure that DIR and PROFILE_FILE are provided +if [ "$#" -ne 6 ]; then + echo "Usage: $0 " + exit 1 +fi + +PROFILE_FILE="$1" +INSTALL_DIR="$2" +GOPATH_PARENT_DIR="$3" +BYSTRO_INSTALL_DIR="$4" +GO_PLATFORM="$5" +GO_VERSION="$6" +GOFILE="go${GO_VERSION}.${GO_PLATFORM}.tar.gz" + +# Add Go binary directory to PATH +GOPATH="$GOPATH_PARENT_DIR/go" +BYSTRO_GO_PROGRAM_INSTALL_PATH="$GOPATH/bin" +GO_INSTALL_DIR="$INSTALL_DIR/go" +GO_BIN="$GO_INSTALL_DIR/bin" + +SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" +# exports append_if_missing +source "$SCRIPT_DIR/utils.sh" + +echo -e "\n\nInstalling Go\n" + +# Check if Go is already installed +if command -v go >/dev/null 2>&1; then + INSTALLED_GO_VERSION=$(go version | awk '{print $3}') + echo "Go is already installed: $INSTALLED_GO_VERSION" + echo "Skipping Go installation." +else + echo "Go is not installed. Proceeding with installation..." + + # Create temporary directory for download + TEMP_DIR=$(mktemp -d) + cd "$TEMP_DIR" + + # Download Go + echo "Downloading Go $GO_VERSION for $GO_PLATFORM..." + wget -q "https://dl.google.com/go/$GOFILE" + + # Verify download succeeded + if [[ ! -f "$GOFILE" ]]; then + echo "Error: Failed to download Go tarball." + exit 1 + fi + + # Remove existing Go installation + if [ -d "$GO_INSTALL_DIR" ]; then + echo "Removing existing Go installation at $GO_INSTALL_DIR..." + rm -rf "$GO_INSTALL_DIR" + fi + + # Extract and install Go + echo "Installing Go to $INSTALL_DIR..." + tar -C "$INSTALL_DIR" -xzf "$GOFILE" + + # Clean up + cd - + rm -rf "$TEMP_DIR" + + # Set up environment variables + echo -e "\n\nConfiguring Go environment in $PROFILE_FILE\n" + + # Ensure the profile file exists + touch "$PROFILE_FILE" + + # Check if $GO_BIN is in the PATH and if not, add it + if [[ ":$PATH:" != *":$GO_BIN:"* ]]; then + export PATH="$PATH:$GO_BIN" + append_if_missing "export PATH=\$PATH:$GO_BIN" "$PROFILE_FILE" + fi + + echo -e "\nGo installation complete in $GO_INSTALL_DIR. Installing Bystro Go dependencies...\n" +fi + +# Set GOPATH if it is not already in the profile file +export GOPATH=$GOPATH +append_if_missing "export GOPATH=$GOPATH" "$PROFILE_FILE" + +if [[ ":$PATH:" != *":$BYSTRO_GO_PROGRAM_INSTALL_PATH:"* ]]; then + export PATH="$PATH:$BYSTRO_GO_PROGRAM_INSTALL_PATH" + append_if_missing "export PATH=\$PATH:$BYSTRO_GO_PROGRAM_INSTALL_PATH" "$PROFILE_FILE" +else + echo "$BYSTRO_GO_PROGRAM_INSTALL_PATH in PATH, skipping" +fi +# Create GOPATH directories +mkdir -p "$GOPATH/src/github.com" + +#### Install go packages + +cd "$BYSTRO_INSTALL_DIR/go" +# Initialize the Go module if it doesn't exist +if [ ! -f "go.mod" ]; then + echo "Initializing Go module..." + go mod init bystro +fi + +# Ensure dependencies are up to date +echo "Tidying up module dependencies..." +go mod tidy + +# Install the local 'dosage' command +echo "Installing dosage command..." +go install ./cmd/dosage + +# Install external packages +echo "Installing bystro-stats..." +go install github.com/bystrogenomics/bystro-stats@1.0.1 + +echo "Installing bystro-vcf..." +go install github.com/bystrogenomics/bystro-vcf@2.2.3 + +echo "Installing bystro-snp..." +go install github.com/bystrogenomics/bystro-snp@1.0.1 + +# Install yq for modifying config files +echo "Installing yq..." +go install github.com/mikefarah/yq/v2@2.4.1 + +# Return to the previous directory +cd - + +echo -e "\nBystro Go dependency installation complete." + +#### + +echo "Please start a new shell session or run 'source $PROFILE_FILE' to apply the changes." diff --git a/install/install-htslib.sh b/install/install-htslib.sh new file mode 100755 index 000000000..1f565dc68 --- /dev/null +++ b/install/install-htslib.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +set -e +set -o pipefail + +# Ensure the script is run with root privileges +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +PROFILE_FILE="$1" +INSTALL_DIR="$2" + +SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" +# exports append_if_missing +source "$SCRIPT_DIR/utils.sh" + + +# Check if bgzip is already installed and skip installation if found +if command -v bgzip > /dev/null; then + echo "bgzip is already installed. Skipping installation." + exit 0 +fi + +echo "Installing libdeflate" + +# Set the libdeflate version +LIBDEFLATE_VERSION="1.21" +LIBDEFLATE_URL="https://github.com/ebiggers/libdeflate/archive/refs/tags/v${LIBDEFLATE_VERSION}.tar.gz" + +# Create a temporary directory +TEMP_DIR=$(mktemp -d) +echo "Created temporary directory: $TEMP_DIR" + +# Change to the temporary directory +cd $TEMP_DIR + +# Download libdeflate source code +echo "Downloading libdeflate version $LIBDEFLATE_VERSION from $LIBDEFLATE_URL..." +wget $LIBDEFLATE_URL + +# Extract the downloaded tar.gz file +echo "Extracting libdeflate-${LIBDEFLATE_VERSION}.tar.gz..." +tar -xvf v${LIBDEFLATE_VERSION}.tar.gz +cd libdeflate-${LIBDEFLATE_VERSION} + +# Configure and build libdeflate using CMake +echo "Building libdeflate..." +cmake -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR -DCMAKE_INSTALL_LIBDIR=$INSTALL_DIR/lib -B build + +# Change to the build directory and run make +cd build +echo "Running make..." +make + +# Install libdeflate +echo "Installing libdeflate..." +make install + +# Clean up by removing the temporary directory +echo "Cleaning up temporary files..." +rm -rf $TEMP_DIR + +# Add the installation directory to the PATH if it's not already in PATH +append_if_missing "export PATH=$INSTALL_DIR/bin:\$PATH" "$PROFILE_FILE" +export PATH=$INSTALL_DIR/bin:$PATH + +# ensure that the shared libraries are available +append_if_missing "export LD_LIBRARY_PATH=$INSTALL_DIR/lib:\$LD_LIBRARY_PATH" "$PROFILE_FILE" +export LD_LIBRARY_PATH=$INSTALL_DIR/lib:$LD_LIBRARY_PATH + +# Set the local installation directory +echo "Installing HTSlib to $INSTALL_DIR" + +# Create a temporary directory +TEMP_DIR=$(mktemp -d) +echo "Created temporary directory: $TEMP_DIR" + +# Change to the temporary directory +cd $TEMP_DIR + +# Download htslib 1.21 source code from the official release +HTSLIB_VERSION="1.21" +HTSLIB_URL="https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2" +echo "Downloading HTSlib version $HTSLIB_VERSION from $HTSLIB_URL..." +wget $HTSLIB_URL + +# Extract the downloaded tar.bz2 file +echo "Extracting htslib-${HTSLIB_VERSION}.tar.bz2..." +tar -xvjf htslib-${HTSLIB_VERSION}.tar.bz2 + +# Change to the extracted htslib directory +cd htslib-${HTSLIB_VERSION} + +# Run autoreconf to build the configure script (if needed) +echo "Running autoreconf to generate configure script..." +autoreconf -i + +# Configure the build environment with the local installation directory +echo "Running ./configure with prefix=$INSTALL_DIR..." +./configure --prefix=$INSTALL_DIR --libdir=$INSTALL_DIR/lib + +# Compile and install htslib +echo "Compiling HTSlib..." +make + +echo "Installing HTSlib locally to $INSTALL_DIR..." +make install + +# Clean up by removing the temporary directory +echo "Cleaning up temporary files..." +rm -rf $TEMP_DIR + +# Verify the installation of bgzip and tabix +echo "Verifying installation..." +bgzip_output=$(bgzip --version) +exit_code=$? + +# if exit code was 0 and bgzip output contains the expected version string +if [[ $exit_code -eq 0 ]] && [[ $bgzip_output == *"bgzip"* ]]; then + echo "bgzip installed successfully." +else + echo "bgzip installation failed." + exit 1 +fi \ No newline at end of file diff --git a/install/install-liftover-linux.sh b/install/install-liftover-linux.sh index 2ab805b38..6312b722d 100755 --- a/install/install-liftover-linux.sh +++ b/install/install-liftover-linux.sh @@ -1,8 +1,45 @@ #!/usr/bin/env bash +set -e -echo -e "\n\nInstalling liftover into /usr/local/bin/\n" +echo -e "\n\nInstalling liftOver\n" -# LiftOver is used for the LiftOverCadd.pm package, to liftOver cadd to hg38 -# and cadd's GRCh37.p13 MT to hg19 +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi -wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/liftOver && chmod +x liftOver && sudo mv $_ /usr/local/bin/; \ No newline at end of file +PROFILE_FILE=$1 +INSTALL_DIR=$2 + +# LiftOver is used for the LiftOverCadd.pm package to lift over CADD scores to hg38 +# and CADD's GRCh37.p13 MT to hg19. + +# Function to check if liftOver is installed +check_liftover_installed() { + if command -v liftOver >/dev/null 2>&1; then + echo "liftOver is already installed at $(command -v liftOver)" + return 0 + else + return 1 + fi +} + +# Install liftOver if not installed +if check_liftover_installed; then + echo "Skipping installation of liftOver." +else + echo "liftOver not found. Proceeding with installation..." + + mkdir -p "$INSTALL_DIR" + + wget -q http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/liftOver -O "$INSTALL_DIR/liftOver" + chmod +x "$INSTALL_DIR/liftOver" + + if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then + echo 'export PATH="$INSTALL_DIR:$PATH"' >> $PROFILE_FILE + export PATH="$INSTALL_DIR:$PATH" + echo "Added $INSTALL_DIR to PATH in $PROFILE_FILE" + fi + + echo "liftOver installed successfully to $INSTALL_DIR" +fi diff --git a/install/install-lmdb-linux.sh b/install/install-lmdb-linux.sh index 14cd552d3..c33d5108c 100755 --- a/install/install-lmdb-linux.sh +++ b/install/install-lmdb-linux.sh @@ -1,10 +1,27 @@ #!/usr/bin/env bash +set -e echo -e "\n\nInstalling LMDB\n" -# Bystro uses LMDB as its db engine. Fast, great use of cache -rm -rf lmdb -git clone git://github.com/LMDB/lmdb.git; -make -C lmdb/libraries/liblmdb; -sudo make install -C lmdb/libraries/liblmdb; -rm -rf lmdb \ No newline at end of file +# Bystro uses LMDB as its database engine. Fast, great use of cache. + +# Create a temporary directory for cloning +TEMP_DIR=$(mktemp -d) +echo "Cloning LMDB into temporary directory: $TEMP_DIR" + +# Clone the LMDB repository into the temporary directory +git clone https://github.com/LMDB/lmdb.git "$TEMP_DIR/lmdb" + +# Build LMDB +echo "Building LMDB..." +make -C "$TEMP_DIR/lmdb/libraries/liblmdb" + +# Install LMDB +echo "Installing LMDB..." +sudo make install -C "$TEMP_DIR/lmdb/libraries/liblmdb" + +# Clean up the temporary directory +echo "Cleaning up..." +rm -rf "$TEMP_DIR" + +echo "LMDB installation completed successfully." diff --git a/install/install-perl-libs.sh b/install/install-perl-libs.sh index 2acf216fd..cb5f602de 100755 --- a/install/install-perl-libs.sh +++ b/install/install-perl-libs.sh @@ -1,67 +1,39 @@ #!/usr/bin/env bash +set -e -echo -e "\n\nInstalling perl libs\n" +echo -e "\n\nInstalling Perl libraries\n" echo "PERL ROOT IN install/install-perl-libs.sh: $PERLBREW_ROOT" -cpanm install Capture::Tiny -cpanm install Mouse -cpanm install Path::Tiny -cpanm install namespace::autoclean -cpanm install DDP -cpanm install YAML::XS -cpanm install JSON::XS -cpanm install Getopt::Long::Descriptive -cpanm install Types::Path::Tiny -cpanm install Sereal # extra MCE performance -cpanm install MCE::Shared -cpanm install List::MoreUtils -cpanm install Log::Fast -cpanm install Parallel::ForkManager -cpanm install Cpanel::JSON::XS -cpanm install Mouse::Meta::Attribute::Custom::Trait::Array -cpanm install Net::HTTP -cpanm install Math::SigFigs -# For now we use our own library -# Avoid issues with system liblmdb -env ALIEN_INSTALL_TYPE=share cpanm Alien::LMDB -cpanm install LMDB_File -cpanm install PerlIO::utf8_strict -cpanm install PerlIO::gzip -cpanm install MouseX::SimpleConfig -cpanm install MouseX::ConfigFromFile -# May fail installation on 5.28.0 due to minor output formatting issues -cpanm install MouseX::Getopt --force -cpanm install Archive::Extract -cpanm install DBI -cpanm install String::Strip -# Needed for fetching SQL (Utils::SqlWriter::Connection) -cpanm install DBD::mysql -cpanm install IO/FDPass.pm -cpanm install Beanstalk::Client -cpanm install Sys::CpuAffinity - -cpanm install File::Which - -# Needed for bin/annotate.pl -cpanm install Hash::Merge::Simple - -# Custom branch of msgpack-perl that uses latest msgpack-c and -# allows prefer_float32 flag for 5-byte float storage -cpanm install Module::Build::XSUtil -cpanm install Test::LeakTrace -cpanm install Test::Pod -cpanm install Test::Exception - -cpanm install Log::Any::Adapter - -# A dependency of Data::MessagePack installation -cpanm install File::Copy::Recursive - -cpanm --uninstall -f Data::MessagePack -rm -rf msgpack-perl -git clone --recursive https://github.com/bystrogenomics/msgpack-perl.git && cd msgpack-perl && git checkout 6fe098dd91e705b12c68d63bcb1f31c369c81e01 -perl Build.PL -perl Build test -perl Build install -cd ../ && rm -rf msgpack-perl +BASEDIR=$(dirname $0) +echo "install-perl-libs.sh basedir: $BASEDIR" + +# Install modules that require special handling +ALIEN_INSTALL_TYPE=share cpm install -g --test Alien::LMDB +cpm install -g --test LMDB_File +cpm install -g MouseX::Getopt # fails due to differences from expected output; unimportant +cpm install -g --test IO::FDPass +cpm install -g --test Beanstalk::Client +cpm install -g --test Sys::CpuAffinity +cpm install -g --test DBD::MariaDB + +# Custom installation of Data::MessagePack +install_custom_msgpack() { + rm -rf msgpack-perl + git clone --recursive https://github.com/bystrogenomics/msgpack-perl.git + cd msgpack-perl + git checkout 6fe098dd91e705b12c68d63bcb1f31c369c81e01 + cpm install -g --test . + rm -rf msgpack-perl + cd - +} + +install_custom_msgpack + +# Install the rest of the modules +( + cd "$BASEDIR" || exit 1 + cd ../ || exit 1 + echo "Attempting to install remaining requirements from bystro/perl/cpanfile" + cpm install -g --test --cpanfile perl/cpanfile +) \ No newline at end of file diff --git a/install/install-perlbrew-linux.sh b/install/install-perlbrew-linux.sh index 65517c3a6..551c08ce2 100755 --- a/install/install-perlbrew-linux.sh +++ b/install/install-perlbrew-linux.sh @@ -1,62 +1,106 @@ #!/usr/bin/env bash -if [[ -n "$1" ]] -then - DIR=$1; -else - DIR=$HOME; -fi -if [[ -n "$2" ]] -then - VERSION=$2; -else - VERSION=perl-5.28.0; -fi +set -e +set -o pipefail -if [[ -n "$3" ]] -then - PROFILE=$3; -else - PROFILE=~/.bash_profile; +if [[ "$#" -lt 3 ]]; then + echo "Usage: $0 [skip_tests]" + exit 1 fi -if [[ -n "$4" ]] -then - NOTEST=$4; -else - NOTEST=0; -fi +# Parse command-line arguments or set default values +PROFILE=$1 +DIR=$2 +VERSION=$3 +NOTEST="${4:-0}" -export PERLBREW_ROOT=$DIR/perl5/perlbrew; -export PERLBREW_HOME=$DIR/.perlbrew; +# If $DIR ends in / strip it +DIR="${DIR%/}" -LOCAL_LIB="$DIR/perl5/lib/perl5" +echo "DIR IS $DIR" + +SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" +source "$SCRIPT_DIR/utils.sh" + +echo "Installation directory: $DIR" +echo "Perl version: $VERSION" +echo "Profile file: $PROFILE" +echo "SCRIPT_DIR: $SCRIPT_DIR" +echo "Skip tests during Perl installation: $NOTEST" -echo -e "\n\nInstalling local perl via perlbrew into $DIR\n"; +export PERLBREW_ROOT="$DIR/perl5/perlbrew" +export PERLBREW_HOME="$DIR/.perlbrew" +LOCAL_LIB="$DIR/perl5/lib/perl5" -(\curl -L https://install.perlbrew.pl || \wget -O - https://install.perlbrew.pl) | bash +echo -e "\nInstalling Perl via perlbrew into $DIR\n" -if ! cat $PROFILE | grep "perl5\/perlbrew\/etc\/bashrc"; then - (echo "" ; echo "export PERLBREW_HOME=$PERLBREW_HOME") | sudo tee -a $PROFILE; - # Not sure why this is necessary sometimes; something still wonky during perlbrew install - (echo "" ; echo 'export PATH='$DIR'/perl5/bin/:$PATH') | sudo tee -a $PROFILE; - (echo "" ; echo 'export PERL5LIB=$PERL5LIB:'$LOCAL_LIB) | sudo tee -a $PROFILE; - (echo "" ; echo "source $PERLBREW_ROOT/etc/bashrc") | sudo tee -a $PROFILE; +# Install perlbrew if not already installed +if ! command -v perlbrew >/dev/null 2>&1; then + if command -v curl >/dev/null 2>&1; then + curl -L https://install.perlbrew.pl | bash + elif command -v wget >/dev/null 2>&1; then + wget -O - https://install.perlbrew.pl | bash + else + echo "Error: Neither 'curl' nor 'wget' is installed. Please install one to proceed." + exit 1 + fi fi -source $PERLBREW_ROOT/etc/bashrc; +append_if_missing "export PERLBREW_ROOT=\"$PERLBREW_ROOT\"" "$PROFILE" +append_if_missing "export PERLBREW_HOME=\"$PERLBREW_HOME\"" "$PROFILE" +append_if_missing "export PERL5LIB=\"\${PERL5LIB:+\$PERL5LIB:}$LOCAL_LIB\"" "$PROFILE" +append_if_missing "source \"$PERLBREW_ROOT/etc/bashrc\"" "$PROFILE" -cnt=$(perlbrew list | grep $VERSION | wc -l); -nCores=$(getconf _NPROCESSORS_ONLN); +# Check if $DIR/perl5/bin: is in path and if not, add it +if [[ ":$PATH:" != *":$DIR/perl5/bin:"* ]]; then + append_if_missing "export PATH=\"$DIR/perl5/bin:\$PATH\"" "$PROFILE" +else + echo "$DIR/perl5/bin is in PATH, skipping addition" +fi -if [ $cnt == 0 ]; then - if [ $NOTEST == 0 ]; then - perlbrew install $VERSION -j $nCores; +# Source the perlbrew environment +if [[ -f "$PERLBREW_ROOT/etc/bashrc" ]]; then + set +e + source "$PERLBREW_ROOT/etc/bashrc" + set -e +else + echo "Error: perlbrew bashrc file not found at $PERLBREW_ROOT/etc/bashrc" + exit 1 +fi + +# Check if the desired Perl version is already installed +if ! perlbrew list | grep -q "$VERSION"; then + nCores=$(getconf _NPROCESSORS_ONLN) + if [[ "$NOTEST" -eq 0 ]]; then + echo "Installing Perl $VERSION with $nCores cores (running tests)" + perlbrew install "$VERSION" -n -j "$nCores" else - perlbrew install $VERSION -j $nCores -n; + echo "Installing Perl $VERSION with $nCores cores (skipping tests)" + perlbrew install "$VERSION" -n -j "$nCores" -n fi +else + echo "Perl version $VERSION is already installed." fi -perlbrew switch $VERSION; -perlbrew install-cpanm; -cpanm --local-lib=$DIR/perl5 local::lib && eval $(perl -I $LOCAL_LIB -Mlocal::lib); +# Switch to the installed Perl version +perlbrew switch "$VERSION" + +echo "PERLBREW_ROOT is set to: $PERLBREW_ROOT" +# Install cpm +if [[ ! -x "$PERLBREW_ROOT/bin/cpm" ]]; then + echo "Installing cpm for Perl version $VERSION" + perlbrew install-cpm +else + echo "cpm is already installed for Perl version $VERSION." +fi + +# Install local::lib and set up environment using cpm +cpm install -g --local-lib-contained="$DIR/perl5" local::lib +eval "$(perl -I"$DIR/perl5/lib/perl5" -Mlocal::lib="$DIR/perl5")" + +echo -e "\nPerlbrew installation and setup complete." + +source $SCRIPT_DIR/install-perl-libs.sh + +# Reminder to source the profile +echo -e "\nPlease run 'source \"$PROFILE\"' or start a new shell session to apply the changes." diff --git a/install/install-rpm-deps.sh b/install/install-rpm-deps.sh index d5d329c09..ce7b90b5c 100755 --- a/install/install-rpm-deps.sh +++ b/install/install-rpm-deps.sh @@ -1,37 +1,115 @@ #!/usr/bin/env bash +set -e +set -o pipefail -echo -e "\n\nInstalling Debian (rpm) dependencies\n"; +# Ensure the script is run with root privileges +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root. Use sudo." + exit 1 +fi -sudo yum install gcc -y; -sudo yum install openssl -y; -sudo yum install openssl-devel -y; -# Not strictly necessary, useful however for much of what we do -sudo yum install git-all -y; -# pigz for Bystro, used to speed up decompression primarily -sudo yum install pigz -y; -sudo yum install unzip -y; -sudo yum install wget -y; -# For tests involving querying ucsc directly -sudo yum install mysql-devel -y; -# For Search::Elasticsearch::Client::5_0::Direct -sudo yum install libcurl-devel -y +# Import the MariaDB GPG key +rpm --import https://yum.mariadb.org/RPM-GPG-KEY-MariaDB -# for perlbrew, in case you want to install a different perl version -#https://www.digitalocean.com/community/tutorials/how-to-install-perlbrew-and-manage-multiple-versions-of-perl-5-on-centos-7 -# centos 7 doesn't include bzip2 -sudo yum install bzip2 -y; -sudo yum install lz4 --enable-repo=epel; -sudo yum install patch -y; +# Source OS release information +. /etc/os-release -sudo yum install cpan -y; +# Determine the appropriate base URL for your OS +case "$ID" in + "fedora") + OS_NAME="fedora" + OS_VERSION="${VERSION_ID}" + ;; -curl --silent --location https://rpm.nodesource.com/setup_12.x | sudo bash -; + "centos"|"rhel") + OS_NAME="centos" + OS_VERSION="${VERSION_ID%%.*}" + ;; -sudo yum install nodejs -y; + "amzn") + if [[ "$VERSION_ID" == "2" ]]; then + OS_NAME="centos" + OS_VERSION="7" + elif [[ "$VERSION_ID" == "2023" ]]; then + OS_NAME="rhel" + OS_VERSION="9" + else + echo "Unsupported Amazon Linux version." + exit 1 + fi + ;; -sudo npm install -g pm2; + *) + echo "Unsupported OS. This script supports Fedora, CentOS, RHEL, and Amazon Linux." + exit 1 + ;; +esac -sudo yum install awscli -y; +# Set the MariaDB version you wish to install +MARIADB_VERSION="11.4" -# pkg-config is required for building the wheel -sudo yum install -y pkg-config; +# Create the MariaDB.repo file +cat </etc/yum.repos.d/MariaDB.repo +# MariaDB $MARIADB_VERSION repository list - created $(date +"%F %T") +# https://mariadb.org/download/ +[mariadb] +name = MariaDB +baseurl = https://yum.mariadb.org/$MARIADB_VERSION/$OS_NAME$OS_VERSION-amd64 +gpgkey = https://yum.mariadb.org/RPM-GPG-KEY-MariaDB +gpgcheck = 1 +EOF + +# Clean the dnf cache +dnf clean all + +# Install MariaDB-devel +dnf install -y MariaDB-devel + +# Check if mariadb_config is installed +if command -v mariadb_config > /dev/null; then + echo "MariaDB development libraries installed successfully." +else + echo "Failed to install MariaDB development libraries. Please check the repository configuration." + exit 1 +fi + +echo -e "\n\nInstalling RPM dependencies\n" + +dnf groupinstall -y "Development Tools" + +# Install all required packages +# autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel libdeflate-devel are required to build htslib +# cmake required to build libdeflate-devel, which is not available on amazonlinux 2023 +dnf install -y \ + autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel cmake \ + openssl \ + git \ + pigz \ + unzip \ + wget \ + tar \ + libcurl-devel \ + lz4 \ + patch \ + perl \ + perl-core \ + pkgconf-pkg-config \ + grep + +# Install Node.js 20.x +curl --silent --location https://rpm.nodesource.com/setup_20.x | bash - +dnf install -y nodejs + +# Create a temporary directory +mkdir -p /tmp/awscli-install +cd /tmp/awscli-install +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +sudo ./aws/install --update +cd - +rm -rf /tmp/awscli-install + +# Install pm2 globally using npm +npm install -g pm2 + +echo -e "\n\nAll dependencies have been installed successfully.\n" \ No newline at end of file diff --git a/install/update-packages.sh b/install/update-packages.sh deleted file mode 100755 index da86990fe..000000000 --- a/install/update-packages.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -echo -e "\n\nUpdating go packages\n" - -go get -u github.com/akotlar/... diff --git a/install/utils.sh b/install/utils.sh new file mode 100644 index 000000000..03b084c88 --- /dev/null +++ b/install/utils.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Function to append a line to PROFILE_FILE if it doesn't already exist +append_if_missing() { + local line="$1" + local profile_file="$2" + local found=0 + + # Read the profile file line by line and check if the line exists + while IFS= read -r current_line; do + if [ "$current_line" = "$line" ]; then + found=1 + break + fi + done < "$profile_file" + + # If the line was not found, append it to the file + if [ $found -eq 0 ]; then + echo -e "\n$line" >> "$profile_file" + echo "Added to $profile_file: $line" + else + echo "Already present in $profile_file: $line" + fi +} diff --git a/perl/.dockerignore b/perl/.dockerignore deleted file mode 100644 index 2a41cd376..000000000 --- a/perl/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -/.build -/.tidyall.d -Bytro-* -cpanfile diff --git a/perl/Dockerfile b/perl/Dockerfile deleted file mode 100644 index 0a561785e..000000000 --- a/perl/Dockerfile +++ /dev/null @@ -1,110 +0,0 @@ -# ---- Build Golang Binaries ---- -FROM golang:1.21.3 AS go-builder - -# Set the environment variable for Go binaries. This makes sure the binaries are saved to a defined path. -ENV GOBIN=/app/bin - -# Install the specific versions of the Go programs -RUN go install github.com/akotlar/bystro-stats@1.0.0 -RUN go install github.com/bystrogenomics/bystro-vcf@2.2.2 -RUN go install github.com/akotlar/bystro-snp@1.0.0 -RUN go install github.com/mikefarah/yq@2.4.1 - -COPY ./go /app/bystro-go-tools - -RUN cd /app/bystro-go-tools && go install bystro/cmd/dosage - -# ---- Perl Stage ---- -# see https://hub.docker.com/_/perl/ -# comes with cpanm -FROM perl:5.38 - -# Copy the compiled Go binaries from the builder stage -COPY --from=go-builder /app/bin/ /app/bin/ - -# Add app/bin to PATH -ENV PATH="/app/bin:${PATH}" - -# Update package lists and install dependencies -# Note: installing libmariadb-dev rather than libmysql-dev since the latter was removed -RUN apt-get update \ - && apt-get install -y \ - build-essential \ - bzip2 \ - git \ - libcurl4-openssl-dev \ - liblmdb-dev \ - libmariadb-dev \ - libssl-dev \ - lz4 \ - openssl \ - patch \ - pigz \ - pkg-config \ - unzip \ - wget \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /app/liftOver - -# Install liftOver into a common system directory, /usr/local/bin -RUN wget --quiet http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/liftOver \ - && chmod +x liftOver \ - && mv liftOver /usr/local/bin - -RUN rm -rf /app/liftOver - -# Change working directory to perl-deps -WORKDIR /app/perl-deps - -# install cpm for faster installation of perl packages -RUN curl -fsSL https://raw.githubusercontent.com/skaji/cpm/main/cpm | perl - install -g App::cpm - -# Install Dist::Zilla -RUN cpm install -g Dist::Zilla Archive::Tar::Wrapper - -# Install one-off Bystro dependencies that require special installation -RUN cpm install -g https://github.com/bystrogenomics/msgpack-perl.git -RUN cpm install -g --no-test MouseX::Getopt -RUN git clone --depth 1 --recurse-submodules https://github.com/salortiz/LMDB_File.git \ - && cd LMDB_File \ - && cpanm --quiet . - -# Install workaround because default DBD::mysql, i.e. version >5.x, will not automatically install on ubuntu -RUN cpm install -g DBD::mysql@4.051 - -# Remove perl-deps directory -RUN rm -rf /app/perl-deps - -# Setup bystro-perl directory to build and install distribution -WORKDIR /app/bystro-perl - -# Copy Bystro Package -COPY . /app/bystro-perl/ - -# Install build dependencies -RUN cpm install -g --show-build-log-on-failure $(dzil authordeps --missing) - -# install Bystro package dependencies -RUN cpm install -g --show-build-log-on-failure $(dzil listdeps --missing) - -# Test and Install Bystro -RUN dzil install - -RUN rm -rf /app/perl-deps - -WORKDIR /app - -# Copy entry point script that launches Bystro perl scripts -COPY entrypoint.sh /app/bin/ - -# Copy perl scripts -COPY ./bin/ /app/bin/ - -# Remove perl installation -RUN rm -rf /app/bystro-perl - -RUN chmod +x /app/bin/entrypoint.sh - -ENTRYPOINT ["/app/bin/entrypoint.sh"] diff --git a/perl/INSTALL.md b/perl/INSTALL.md new file mode 100644 index 000000000..b6c729f1c --- /dev/null +++ b/perl/INSTALL.md @@ -0,0 +1,321 @@ +# Bystro Annotator Package Installation and Configuration + +## Installation + +These instructions assume that you are in the `perl` directory of the Bystro repository, e.g. `~/bystro/perl`. + +### Installing Bystro Annotator using Docker + +To build a Docker image using the `Dockerfile`, run the following: + +```bash +cd ../ && docker build -t bystro-annotator -f Dockerfile.perl . +# Run Bystro Annotator from the new Docker container; replace with the desired command +# If no command provided, will automatically run bystro-annotate.pl --help +docker run bystro-annotator +``` + +- Commands: + - Run the annotator: `docker run bystro-annotator bystro-annotate.pl --help` + - Build a new Bystro database: `docker run bystro-annotator bystro-build.pl --help` + - Fetch dependencies, before building: `docker run bystro-annotator bystro-utils.pl --help` + +### Installing Bystro Annotator on Bare Metal / Directly on Host Operating System + +The easiest way to install Bystro directly on your machine is to run: + +- Debian/Ubuntu: `../install-apt.sh` +- Centos/Fedora/Amazon Linux: `../install-rpm.sh` + +You will be prompted for "sudo" access to install the necessary system level dependencies. + +### Manual/Custom Install + +The previous instructions configured a local copy of Perl for you, using Perlbrew. If you want to use your system's Perl, +or otherwise control the installation process, follow the "Manual/Custom Install" instructions to give you greater control over installation. + +Else, just skip to the next section [Configure Bystro Annotator](#configuring-the-bystro-annotator). + +First you'll need to install some prerequisites: + +- Debian/Ubuntu: `sudo ../install/install-apt-deps.sh` +- Centos/Fedora/Amazon Linux: `sudo ../install/install-rpm-deps.sh` +- bgzip: `../install/install-htslib.sh ~/.profile ~/.local` + +Bystro relies on a few `Go` programs, which can be installed with the following: + +```bash +# Where to install the Bystro Go programs (will go into ~/.local/go in this case) +BYSTRO_GO_PROGRAMS_INSTALL_DIR=~/.local +# Where to install Go itself (will go into ~/go in this case) +GOLANG_BINARY_INSTALL_DIR=~/ +# Where to add the Go binaries to your PATH +PROFILE_PATH=~/.profile +# Where Bystro is installed +BYSTRO_INSTALL_DIR=~/bystro +# The platform to install Go for +GO_PLATFORM=linux-amd64 +# The version of Go to install +GO_VERSION=1.21.4 + +# BYSTRO_GO_PROGRAMS_INSTALL_DIR and GO_BINARY_INSTALL_DIR directories must exist +mkdir -p $BYSTRO_GO_PROGRAMS_INSTALL_DIR +mkdir -p $GOLANG_BINARY_INSTALL_DIR + +# Assuming we are installing this on linux, on an x86 processor +# and that our login shell environment is stored in ~/.profile (another common one is ~/.bash_profile) +../install/install-go.sh $PROFILE_PATH $GOLANG_BINARY_INSTALL_DIR $BYSTRO_GO_PROGRAMS_INSTALL_DIR $BYSTRO_INSTALL_DIR $GO_PLATFORM $GO_VERSION + +source ~/.profile +``` + +Next, we need to install the Bystro Perl library and its Perl dependencies. The instructions for installing the Bystro Perl library use [`cpm`](https://metacpan.org/pod/App::cpanminus). + +- Alternatively you can use [cpanm](https://metacpan.org/dist/App-cpanminus/view/bin/cpanm), which can be installed with the following: `curl -fsSL https://cpanmin.us | perl - App::cpanminus` +- Just replace every `cpm install --test` and `cpm install` command with `cpanm` + +
+ +To install `cpm`, run the following: + +```bash +# Install cpm +curl -fsSL https://raw.githubusercontent.com/skaji/cpm/main/cpm | perl - install App::cpm +``` + +You will need to configure where Perl stores its libraries. By default, `cpm` will install libraries in `./local` in the current directory. + +- You will need to make sure that this path is in your `PERL5LIB` environment variable: + + ```bash + # Assuming you were in the ~/bystro/perl directory when you ran `cpm install`, you would get a folder `~/bystro/perl/local` with the libraries and binaries cpm installed + # We need to add this to our PERL5LIB and PATH environment variables + # You would put these commands in your ~/.profile or ~/.bash_profile + export PERL5LIB=~/bystro/perl/local/lib/perl5:$PERL5LIB + export PATH=~/bystro/perl/local/bin:$PATH + ``` + +- If you want to install libraries and binaries into a different local directory, replace `cpm install` with `cpm install -L=/path/to`, which will cause libraries to be installed in `/path/to/lib/perl5` and binaries into `/path/to/bin`. You will need to make sure that these paths are in your `PERL5LIB` and `PATH` environment variables respectively: + + ```bash + # Assuming you ran `cpm install -L=/path/to` for all of your cpm install commands + # Put this in your ~/.profile or ~/.bash_profile + export PERL5LIB=/path/to/lib/perl5:$PERL5LIB + export PATH=/path/to/bin:$PATH + ``` + +- If you want to install libraries in the default Perl library path, as specified by Perl's @INC, replace the `cpm install` commands with `cpm install -g` + +
+ +A few dependencies must be specially separately installed: + +```bash +cpm install --test https://github.com/bystrogenomics/msgpack-perl.git + +ALIEN_INSTALL_TYPE=share cpm install --test Alien::LMDB +cpm install --test LMDB_File + +# no --test option because it has a trivial failure related to formatting of cli help strings +cpm install MouseX::Getopt +``` + +However, if you are using Perl > 5.36.0, you will need to manually install LMDB_File 0.14, which will require `make`: + +```bash +ALIEN_INSTALL_TYPE=share cpm install --test Alien::LMDB +git clone --depth 1 --recurse-submodules https://github.com/salortiz/LMDB_File.git \ + && cd LMDB_File \ + && git checkout 34acb71d7d86575fe7abb3f7ad95e8653019b282 \ + && perl Makefile.PL && make distmeta \ + && ln -s MYMETA.json META.json && ln -s MYMETA.yml META.yml \ + && cpm install --show-build-log-on-failure --test . \ + && cd .. + && rm -rf LMDB_File +``` + +Now you can install the rest of the dependencies: + +```bash + cpm install +``` + +
+ +Now you're ready to try Bystro: + +```bash +# First let's run our test suite +cd ~/bystro/perl +prove -r ./t -j$(nproc) + +# Then let's try running bystro-annotate.pl +bystro-annotate.pl --help + +# Expected output +# usage: bystro-annotate.pl [-?cio] [long options...] +# --[no-]help (or -?) Prints this usage information. +# aka --usage +# --input STR... (or -i) Input files. Supports mulitiple files: +# --in file1 --in file2 --in file3 +# aka --in +# --output STR (or -o) Base path for output files: /path/to/output +# aka --out +# --[no-]json Do you want to output JSON instead? +# Incompatible with run_statistics +# --config STR (or -c) Yaml config file path. +# aka --configuration +# --overwrite INT Overwrite existing output file. +# --[no-]read_ahead For dense datasets, use system read-ahead +# --debug NUM +# --verbose INT +# --compress STR Enable compression. Specify the type of +# compression: lz4 gz bgz. `bgz` is an alias +# for gz (gzip); when bgzip is available, it +# will be used and will generate a block +# gzipped file with index +# --[no-]archive Place all outputs into a tarball? +# --run_statistics INT Create per-sample feature statistics (like +# transition:transversions)? +# --delete_temp INT Delete the temporary directory made during +# annotation +# --wantedChr STR Annotate a single chromosome +# aka --chr, --wanted_chr +# --maxThreads INT Number of CPU threads to use (optional) +# aka --threads +# --publisher STR Tell Bystro how to send messages to a +# plugged-in interface (such as a web +# interface) +# --[no-]ignore_unknown_chr Don't quit if we find a non-reference +# chromosome (like ChrUn) +# --json_config STR JSON config file path. Use this if you +# wish to invoke the annotator by file +# passing. +# --result_summary_path STR Where to output the result summary. +# Defaults to STDOUT +``` + +## Configuring the Bystro Annotator + +Once Bystro is installed, we need to download a database for the species/assembly we're going to be analyzing and then configure the Bystro Annotator to use it. + +Database configurations are stored in YAML files in the `config` directory. By default Bystro ships with configurations for human genome assemblies hg19 (~/bystro/config/hg19.yml) and hg38 (~/bystro/config/hg38.yml), though you can create your own configurations for other species/assemblies. + +### Example Configuration + +1. Download and unpack the human hg38 Bystro database + + ```bash + MY_DATABASE_DIR=/mnt/annotator + sudo mkdir -p $MY_DATABASE_DIR + sudo chown -R $USER:$USER $MY_DATABASE_DIR + cd $MY_DATABASE_DIR + wget https://s3.amazonaws.com/bystro-db/hg38_v11.tar.gz + bgzip -d -c --threads 32 hg38_v11.tar.gz | tar xvf - + ``` + + - You can chooose a directory other than `/mnt/annotator/`; that is just the default expected by ~/bystro/config/hg38.yml. If you choose something else, just update the `database_dir` property in the configuration file + + - with `yq`: + + ```bash + MY_DATABASE_DIR=/path/somewhere/else + # Assuming we downloaded and unpacked the database to /path/somewhere/else/hg38_v11 + # Update the database_dir property in the configuration file using `yq` + # You can also do this manually by editing the configuration file (in this example ~/bystro/config/hg38.yml) + yq write -i ~/bystro/config/hg38.yml database_dir $MY_DATABASE_DIR/hg38_v11 + ``` + + - `tar` is required to unpack the database, which is stored as a compresssed tarball, but you can unzip the tarball uzing `gzip -d -c` instead of `bgzip -d -c --threads 32` if you don't have `bgzip` installed. It will work, just slower. + + - You need ~691GB of free space for hg38 and ~376GB of free space for hg19, including the space for the tar.gz archives. + + - The unpacked databases are ~517GB for hg38 and ~283GB for hg19. + +2. (optional) Configure your Bystro Annotator to use a temporary directory with fast local storage, by editing the configuration files `tmp_dir` property to a directory on your fast local storage. This directory must be writable by the user running bystro-annotate.pl. + + If you've installed `yq` this is easy: + + ```bash + MY_FAST_LOCAL_TEMP_STORAGE_FOLDER=/mnt/annotator/tmp + mkdir -p $MY_FAST_LOCAL_STORAGE + + # Or edit ~/bystro/config/hg38.yml file manually + yq write -i ~/bystro/config/hg38.yml temp_dir $MY_FAST_LOCAL_TEMP_STORAGE_FOLDER + ``` + + If temp_dir is not set, the files will be written directly to the output directory (see `--output` option in `bystro-annotate.pl`). + +## Databases + +1. [Human (hg38) database](https://s3.amazonaws.com/bystro-db/hg38_v11.tar.gz) +2. [Human (hg19) database](https://s3.amazonaws.com/bystro-db/hg19_v10.tar.gz) +3. There are no restrictions on species support, but we currently only build human genomes. Please create a GitHub issue if you would like us to support others + +## Running your first annotation + +Example: Annotate an hg38 VCF file: + +```sh +bystro-annotate.pl --config ~/bystro/config/hg38.yml --threads 32 --input gnomad.genomes.v4.0.sites.chr22.vcf.bgz --output test/my_annotation --compress gz +``` + +The above command will annotate the `gnomad.genomes.v4.0.sites.chr22.vcf.bgz` file with the hg38 database, using 32 threads, and output the results to `test`, and will use `my_annotation` as the prefix for output files. + +The result of this command will be: + +```sh +Created completion file +{ + "error" : null, + "totalProgress" : 8599234, + "totalSkipped" : 0, + "results" : { + "header" : "my_annotation.annotation.header.json", + "sampleList" : "my_annotation.sample_list", + "annotation" : "my_annotation.annotation.tsv.gz", + "dosageMatrixOutPath" : "my_annotation.dosage.feather", + "config" : "hg38.yml", + "log" : "my_annotation.annotation.log.txt", + "statistics" : { + "qc" : "my_annotation.statistics.qc.tsv", + "json" : "my_annotation.statistics.json", + "tab" : "my_annotation.statistics.tsv" + } + } +} +``` + +Explanation of the output: + +- `my_annotation.annotation.header.json`: The header of the annotated dataset +- `my_annotation.sample_list`: The list of samples in the annotated dataset +- `my_annotation.annotation.tsv.gz`: A gzipped TSV file with one row per variant and one column per annotation +- `my_annotation.dosage.feather`: The dosage matrix file, where the first column is the `locus` column in the format "chr:pos:ref:alt", and columns following that are sample columns, with the dosage of the variant for that sample (0 for homozygous reference, 1 for 1 copy of the alternate allele, 2 for 2, and so on). -1 indicates missing genotypes. The dosage is the expected number of alternate alleles, given the genotype. This is useful for downstream analyses like imputation, or for calculating polygenic risk scores + - This file is in the [Arrow feather format](https://arrow.apache.org/docs/python/feather.html), also known as the "IPC" format. This is an ultra-efficient format for machine learning, and is widely supported, in Python libraries like [Pandas](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html), [Polars](https://docs.pola.rs/api/python/stable/reference/api/polars.read_ipc.html), [PyArrow](https://arrow.apache.org/docs/python/generated/pyarrow.feather.read_feather.html), as well as languages like [R](https://arrow.apache.org/docs/r/reference/read_feather.html) and [Julia](https://github.com/apache/arrow-julia) +- `hg38.yml`: The configuration file used for the annotation. You can use this to either re-build the Bystro database from scratch, or to re-run the annotation with the same configuration +- `my_annotation.annotation.log.txt`: The log file for the annotation +- `my_annotation.statistics.tsv`: A TSV file with sample-wise statistics on the annotation +- `my_annotation.statistics.qc.tsv`: A TSV file that lists any samples that failed quality control checks, currently defined as being outside 3 standard deviations from the mean on any of the sample-wise statistics +- `my_annotation.statistics.json`: A JSON file with the same sample-wise statistics on the annotation +- 'totalProgress': The number of variants processed; this is the number of variants passed to the Bystro annotator by the bystro-vcf pre-processor, which performs primary quality control checks, such as excluding sites that have no samples with non-missing genotypes, or which are not FILTER=PASS in the input VCF. We also exclude sites that are not in the Bystro database, and sites that are not in the Bystro database that are not in the input VCF. In more detail: + - Variants must have FILTER value of PASS or " . " + - Variants and ref must be ACTG (no structural variants retained) + - Multiallelics are split into separate records, and annotated separately + - MNPs are split into separate SNPs and annotated separately + - Indels are left-aligned + - The first base of an indel must be the reference base after multiallelic decomposition and left-alignment + - If genotypes are provided, entirely missing sites are dropped + +## Developer Resources + +### Coding style and tidying + +The `.perltidyrc` gives the coding style and `tidyall` from [Code::TidyAll](https://metacpan.org/dist/Code-TidyAll) can be used to tidy all files with `tidyall -a`. +Please tidy all files before submitting patches. + +Install tidyall, perltidy, and perlcritic like so: + +```bash +cpanm Code::TidyAll Perl::Tidy Perl::Critic +``` diff --git a/perl/README.md b/perl/README.md deleted file mode 100644 index f9d860c99..000000000 --- a/perl/README.md +++ /dev/null @@ -1,113 +0,0 @@ -# Bystro Perl package - - -## Installing Bystro using docker - -To build a docker image using the `Dockerfile`, run the following: - -```bash -docker build --tag bystro-cli . -``` - -## Installing Bystro using `cpam` - -The instructions for installing Bystro locally uses [`cpanm`](https://metacpan.org/pod/App::cpanminus). - -Assuming that you've cloned the repository and are working on it locally, then the dependencies can mostly be installed with cpanm. -But there are a few one-off dependencies that require a slightly modified approach. - -One-off dependencies can be installed as follows: - -```bash -cpanm --quiet https://github.com/bystrogenomics/msgpack-perl.git -cpanm --quiet --notest MouseX::Getopt -git clone --depth 1 --recurse-submodules https://github.com/salortiz/LMDB_File.git \ - && cd LMDB_File \ - && cpanm --quiet . \ - && cd .. \ - && rm -rf LMDB_File -# NOTE: you will need mysql_config to install this -# ubuntu 22.04 LTS => sudo apt install -y libmariadb-dev libmariadb-dev-compat -# amazon 2023 => sudo yum install -y -cpanm --quiet DBD::mysql@4.051 -``` - -The remaining dependencies are installed like this: - -```bash -cpanm --installdeps . -``` - -After installing dependencies, use `prove -lr t` to run tests. - -## Installing Bystro locally using dzil - -Bystro uses [`Dist::Zilla`](https://github.com/rjbs/dist-zilla) for packaging and is configured with `dist.ini`. -This approach requires installing `Dist::Zilla` and author dependencies and one off-dependencies described in the above. - -```bash -# Install Dist::Zilla and Archive::Tar::Wrapper (to slightly speed up building) -cpanm --quiet Dist::Zilla Archive::Tar::Wrapper - -# Install build dependencies -dzil authordeps --missing | cpanm --quiet - -# Install Bystro dependencies -dzil listdeps --missing | cpanm --quiet - -# Install Bystro -dzil install -``` - -## Install Bystro using `cpm` - -Install [cpm](https://metacpan.org/pod/App::cpm) with `curl -fsSL https://raw.githubusercontent.com/skaji/cpm/main/cpm | perl - install -g App::cpm`. - -```bash -# install msgpack fork -cpm install -g https://github.com/bystrogenomics/msgpack-perl.git - -# install MouseX::Getopt despite some tests failing -cpm install -g --no-test MouseX::Getopt - -# install LMDB_File that comes with latest LMDB -# NOTE: you will need mysql_config to install this -# ubuntu 22.04 LTS => sudo apt install -y libmariadb-dev libmariadb-dev-compat -# amazon 2023 => sudo yum install -y -git clone --depth 1 --recurse-submodules https://github.com/salortiz/LMDB_File.git \ - && cd LMDB_File \ - && cpanm . \ - && cd .. \ - && rm -rf LMDB_File - -# install mysql driver -# NOTE: you will need mysql_config to install this -# ubuntu 22.04 LTS => sudo apt install -y libmariadb-dev libmariadb-dev-compat -# amazon 2023 => sudo yum install -y -cpm install -g DBD::mysql@4.051 - -# clone bystro and change into perl package -git clone git@github.com:bystrogenomics/bystro.git && cd bystro/perl - -# install dependencies -cpm install -g --with-develop - -# Install dzil build dependencies -cpm install -g --show-build-log-on-failure $(dzil authordeps --missing) -``` - -## Coding style and tidying - -The `.perltidyrc` gives the coding style and `tidyall` from [Code::TidyAll](https://metacpan.org/dist/Code-TidyAll) can be used to tidy all files with `tidyall -a`. -Please tidy all files before submitting patches. - -Install tidyall, perltidy, and perlcritic like so - -```bash -cpanm Code::TidyAll Perl::Tidy Perl::Critic -``` - -## Specifying libraries in Perl - -To specify specific libraries for the Perl codebase, use `use ` (see [use documentation](https://perldoc.perl.org/functions/use)). -Packaging with `Dist::Zilla` will specify them in the `Makefile.PL` that it creates. \ No newline at end of file diff --git a/perl/cpanfile b/perl/cpanfile index ee422da2a..b27922470 100644 --- a/perl/cpanfile +++ b/perl/cpanfile @@ -6,11 +6,12 @@ requires "Carp" => "0"; requires "Clone" => "0"; requires "Cpanel::JSON::XS" => "0"; requires "Cwd" => "0"; -requires "DBD::mysql" => "4.051"; +requires "DBD::MariaDB" => "1.23"; requires "DBI" => "0"; requires "DDP" => "0"; requires "Data::MessagePack" => "0"; requires "Digest::MD5" => "0"; +requires "Fcntl" => "0"; requires "File::Basename" => "0"; requires "File::Glob" => "0"; requires "File::Which" => "0"; @@ -39,6 +40,7 @@ requires "Pod::Usage" => "0"; requires "Scalar::Util" => "0"; requires "String::Strip" => "0"; requires "Sys::CpuAffinity" => "0"; +requires "Time::HiRes" => "0"; requires "Time::localtime" => "0"; requires "Try::Tiny" => "0"; requires "Type::Params" => "0"; @@ -54,9 +56,13 @@ on 'test' => sub { requires "Exporter" => "0"; requires "ExtUtils::MakeMaker" => "0"; requires "File::Spec" => "0"; + requires "File::Temp" => "0"; + requires "IO::Compress::Gzip" => "0"; + requires "IO::Uncompress::Gunzip" => "0"; requires "Test::Exception" => "0"; requires "Test::More" => "0"; requires "Types::Common::String" => "0"; + requires "YAML::Tiny" => "0"; }; on 'test' => sub { diff --git a/perl/dist.ini b/perl/dist.ini index 1c7cdfb0b..d758afdfe 100644 --- a/perl/dist.ini +++ b/perl/dist.ini @@ -1,13 +1,13 @@ -name = Bytro -author = The Bytro Authors +name = Bystro +author = The Bystro Authors license = Apache_2_0 copyright_holder = The Bystro Authors copyright_year = 2023 -version = 0.001 +version = 2.0.0 ; install author dependencies with `dzil authordeps --missing | cpanm` -; install package depencies with `dzil listdeps --missing | cpanm` +; install package dependencies with `dzil listdeps --missing | cpanm` [GatherDir] exclude_match = Dockerfile @@ -66,9 +66,11 @@ strict = 1 [TestRelease] -; [Test::UnusedVars] - [RunExtraTests] [CopyFilesFromBuild::Filtered] copy = cpanfile + +; Releaser Plugin +[UploadToCPAN] + diff --git a/perl/entrypoint.sh b/perl/entrypoint.sh deleted file mode 100644 index 26c3da845..000000000 --- a/perl/entrypoint.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -# Determine which script to run based on the argument - -case "$1" in -bystro-annotate.pl) - exec /usr/local/bin/perl /app/bin/bystro-annotate.pl "${@:2}" # Run bystro-annotate.pl and pass any additional arguments - ;; -bystro-build.pl) - exec /usr/local/bin/perl /app/bin/bystro-build.pl "${@:2}" # Run bystro-build.pl and pass any additional arguments - ;; -bystro-server.pl) - exec /usr/local/bin/perl /app/bin/bystro-server.pl "${@:2}" # Run bystro-server.pl and pass any additional arguments - ;; -bystro-utils.pl) - exec /usr/local/bin/perl /app/bin/bystro-utils.pl "${@:2}" # Run bystro-utils.pl and pass any additional arguments - ;; -read_db_util.pl) - exec /usr/local/bin/perl /app/bin/read_db_util.pl "${@:2}" # Run read_db_util.pl and pass any additional arguments - ;; -*) - echo "Usage: docker run bystro {bystro-annotate.pl|bystro-build.pl|bystro-server.pl|bystro-utils.pl|read_db_util.pl} [args]" - exit 1 - ;; -esac diff --git a/perl/lib/Interface.pm b/perl/lib/Interface.pm index 50756b6a9..085ca4ef6 100644 --- a/perl/lib/Interface.pm +++ b/perl/lib/Interface.pm @@ -96,8 +96,9 @@ has compress => ( is => 'ro', isa => 'Str', metaclass => 'Getopt', - documentation => 'Compress the output?', - default => 0, + documentation => + 'Enable compression. Specify the type of compression: lz4 gz bgz. `bgz` is an alias for gz (gzip); when bgzip is available, it will be used and will generate a block gzipped file with index', + default => 0, ); has archive => ( diff --git a/perl/lib/Seq/Definition.pm b/perl/lib/Seq/Definition.pm index c99cb5da3..a49708243 100644 --- a/perl/lib/Seq/Definition.pm +++ b/perl/lib/Seq/Definition.pm @@ -46,8 +46,7 @@ has temp_dir => ( is => 'ro', isa => 'Maybe[Str]' ); # Do we want to compress? has compress => ( is => 'ro', isa => 'Str', default => 1 ); -has compressType => - ( is => 'ro', isa => enum( [qw/lz4 gz bgz zip/] ), default => 'gz' ); +has compressType => ( is => 'ro', isa => enum( [qw/lz4 gz bgz/] ), default => 'gz' ); # Do we want to tarball our results has archive => ( is => 'ro', isa => 'Bool', default => 0 ); diff --git a/perl/lib/Seq/Role/IO.pm b/perl/lib/Seq/Role/IO.pm index 833892989..5017d9646 100644 --- a/perl/lib/Seq/Role/IO.pm +++ b/perl/lib/Seq/Role/IO.pm @@ -160,7 +160,6 @@ sub getInnerFileCommand { my $compressed = $innerFile =~ /[.]gz$/ || $innerFile =~ /[.]bgz$/ - || $innerFile =~ /[.]zip$/ || $filePath =~ /[.]lz4$/; my $innerCommand; @@ -204,7 +203,7 @@ sub isCompressedSingle { return 0; } - if ( $basename =~ /[.]gz$/ || $basename =~ /[.]bgz$/ || $basename =~ /[.]zip$/ ) { + if ( $basename =~ /[.]gz$/ || $basename =~ /[.]bgz$/ ) { return "gzip"; } @@ -251,7 +250,7 @@ sub getWriteFh { } my $fh; - my $hasGz = $file =~ /[.]gz$/ || $file =~ /[.]bgz$/ || $file =~ /[.]zip$/; + my $hasGz = $file =~ /[.]gz$/ || $file =~ /[.]bgz$/; my $hasLz4 = $file =~ /[.]lz4$/; if ( $hasGz || $hasLz4 || $compress ) { if ( $hasLz4 || ( $compress && $compress =~ /[.]lz4$/ ) ) { diff --git a/perl/lib/Utils/SqlWriter/Connection.pm b/perl/lib/Utils/SqlWriter/Connection.pm index 49833b5cb..4bbaf350c 100644 --- a/perl/lib/Utils/SqlWriter/Connection.pm +++ b/perl/lib/Utils/SqlWriter/Connection.pm @@ -5,7 +5,7 @@ use warnings; package Utils::SqlWriter::Connection; use DBI; -use DBD::mysql 4.051; +use DBD::MariaDB 1.23; # Specify the version of DBD::MariaDB our $VERSION = '0.001'; @@ -15,7 +15,8 @@ use Mouse 2; use namespace::autoclean; # The actual configuration -has driver => ( is => 'ro', isa => 'Str', default => "DBI:mysql" ); +has driver => ( is => 'ro', isa => 'Str', default => "DBI:MariaDB" ) + ; # Use MariaDB driver has host => ( is => 'ro', isa => 'Str', default => "genome-mysql.soe.ucsc.edu" ); has user => ( is => 'ro', isa => 'Str', default => "genome" ); has password => ( is => 'ro', isa => 'Str', ); @@ -23,19 +24,6 @@ has port => ( is => 'ro', isa => 'Int', ); has socket => ( is => 'ro', isa => 'Str', ); has database => ( is => 'ro', isa => 'Maybe[Str]' ); -=method @public sub connect - - Build database object, and return a handle object - -Called in: none - -@params: - -@return {DBI} - A connection object - -=cut - around BUILDARGS => sub { my ( $orig, $self, $data ) = @_; @@ -57,8 +45,8 @@ sub connect { my $connection = $self->driver; $connection .= ":database=$databaseName;host=" . $self->host if $self->host; $connection .= ";port=" . $self->port if $self->port; - $connection .= ";mysql_socket=" . $self->port_num if $self->socket; - $connection .= ";mysql_read_default_group=client"; + $connection .= ";mariadb_socket=" . $self->port_num if $self->socket; + $connection .= ";mariadb_read_default_group=client"; # Change to MariaDB option return DBI->connect( $connection, diff --git a/python/python/bystro/covariance/tests/test_hypothesis_hd.py b/python/python/bystro/covariance/tests/test_hypothesis_hd.py index ea10b3850..57b9d4b0a 100644 --- a/python/python/bystro/covariance/tests/test_hypothesis_hd.py +++ b/python/python/bystro/covariance/tests/test_hypothesis_hd.py @@ -1,5 +1,8 @@ -import numpy as np import pickle +import os + +import numpy as np + from bystro.covariance.hypothesis_hd import ( sy2010, clx2013, @@ -9,11 +12,13 @@ schott2007, ) +def get_pickle_file_path(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + return os.path.join(script_dir, "hypothesis_hd_data.pkl") + def test_sy2010(): - with open( - "python/bystro/covariance/tests/hypothesis_hd_data.pkl", "rb" - ) as f: + with open(get_pickle_file_path(), "rb") as f: data = pickle.load(f) X = data["X"] Y = data["Y"] @@ -30,9 +35,7 @@ def test_sy2010(): def test_clx2013(): - with open( - "python/bystro/covariance/tests/hypothesis_hd_data.pkl", "rb" - ) as f: + with open(get_pickle_file_path(), "rb") as f: data = pickle.load(f) X = data["X"] Y = data["Y"] @@ -49,9 +52,7 @@ def test_clx2013(): def test_hc2018(): - with open( - "python/bystro/covariance/tests/hypothesis_hd_data.pkl", "rb" - ) as f: + with open(get_pickle_file_path(), "rb") as f: data = pickle.load(f) X = data["X"] Y = data["Y"] @@ -119,9 +120,7 @@ def test_hc2018(): def test_two_sample_test(): - with open( - "python/bystro/covariance/tests/hypothesis_hd_data.pkl", "rb" - ) as f: + with open(get_pickle_file_path(), "rb") as f: data = pickle.load(f) X = data["X"] Y = data["Y"] @@ -153,9 +152,7 @@ def test_two_sample_test(): def test_hd2017(): - with open( - "python/bystro/covariance/tests/hypothesis_hd_data.pkl", "rb" - ) as f: + with open(get_pickle_file_path(), "rb") as f: data = pickle.load(f) X = data["X"] Y = data["Y"] @@ -172,9 +169,7 @@ def test_hd2017(): def test_schott2007(): - with open( - "python/bystro/covariance/tests/hypothesis_hd_data.pkl", "rb" - ) as f: + with open(get_pickle_file_path(), "rb") as f: data = pickle.load(f) X = data["X"] Y = data["Y"] diff --git a/python/python/bystro/parent_of_origin/tests/test_poirot.py b/python/python/bystro/parent_of_origin/tests/test_poirot.py index f6197637f..2af0501c7 100644 --- a/python/python/bystro/parent_of_origin/tests/test_poirot.py +++ b/python/python/bystro/parent_of_origin/tests/test_poirot.py @@ -1,13 +1,20 @@ +import pickle +import os + import pandas as pd from pandas.testing import assert_frame_equal -import pickle + from bystro.parent_of_origin.poirot import extract_residuals, do_poirot_by_snp pd.options.future.infer_string = True # type: ignore +def get_data_path(): + # get data from script path + script_path = os.path.dirname(os.path.realpath(__file__)) + return f"{script_path}/poirot_data.pkl" def test_do_poirot_by_snp(): - with open("python/bystro/parent_of_origin/tests/poirot_data.pkl", "rb") as f: + with open(get_data_path(), "rb") as f: loaded_data_dict = pickle.load(f) PHENO = loaded_data_dict["phenotypes"] GENO = loaded_data_dict["variants"]