Skip to content

Commit

Permalink
Cleaned up short read workflows, minor feature additions (#461)
Browse files Browse the repository at this point in the history
- Added docker image and WDL to backup workspaces.
- Added `util-malaria-coi` docker image.
- Fixed bug in `SRFlowcell` that preferentially pulled the existing aligned bam rather than the merged aligned bam.
- Cleaned up redundant / unnecessary inputs and outputs from `SRWholeGenome.wdl`
- Enabled `SRWholeGenome.wdl` and `HaplotypeCaller.wdl` to use an `interval_list` to call variants in subsets of the genome.
- Renamed `ONTPfTypeDrugResistanceMarkers.wdl` to `PfalciparumTypeDrugResistanceMarkers.wdl`.
- Renamed `ONTPfHrp2Hrp3Status.wdl` to `PfalciparumHrp2Hrp3Status.wdl`
- `PfalciparumDrugResistanceSummary.wdl` now produces individual drug resistances/sensitivites as additional outputs.
- `PfalciparumDrugResistanceSummary.wdl` is now deprecated.
- `PfalciparumDrugResistanceMarkers.wdl` now produces drug resistances/sensitivites as well as the summary file.  This deprecates `PfalciparumDrugResistanceSummary.wdl`.
- Added `PfalciparumPolygenomicityBarcodeEstimate.wdl` to estimate CoI based on the 24 SNP molecular barcode.
- Cleaned up some outputs in `SRJointCallGVCFsWithGenomicsDB.wdl`.
- Added flag for dangling end recovery to `HaplotypeCaller.wdl`.
- Minor fixes for MiniWDL style suggestions (`TrainCnnFilters.wdl`, `LRCNVs.wdl`, `AlignedMetrics.wdl`, `FastQC.wdl`, `SRUtils.wdl`, `Utils.wdl`, `VariantUtils.wdl`, `SRJointGenotyping.wdl`, `NanoPlot.wdl`, `Pf_Niare_HaplotypeCaller.wdl`)
  • Loading branch information
jonn-smith authored Aug 6, 2024
1 parent 11c576a commit 78fbe5d
Show file tree
Hide file tree
Showing 26 changed files with 1,437 additions and 564 deletions.
14 changes: 10 additions & 4 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ workflows:
- name: ONTMethylation
subclass: wdl
primaryDescriptorPath: /wdl/pipelines/ONT/Epigenomics/ONTMethylation.wdl
- name: ONTPfHrp2Hrp3Status
- name: PfalciparumHrp2Hrp3Status
subclass: wdl
primaryDescriptorPath: /wdl/pipelines/ONT/MultiAnalysis/ONTPfHrp2Hrp3Status.wdl
- name: ONTPfTypeDrugResistanceMarkers
primaryDescriptorPath: /wdl/pipelines/TechAgnostic/TertiaryAnalysis/PfalciparumHrp2Hrp3Status.wdl
- name: PfalciparumTypeDrugResistanceMarkers
subclass: wdl
primaryDescriptorPath: /wdl/pipelines/ONT/MultiAnalysis/ONTPfTypeDrugResistanceMarkers.wdl
primaryDescriptorPath: /wdl/pipelines/TechAgnostic/TertiaryAnalysis/PfalciparumTypeDrugResistanceMarkers.wdl
- name: PBMASIsoSeqQuantify
subclass: wdl
primaryDescriptorPath: /wdl/pipelines/PacBio/Utility/PBMASIsoSeqQuantify.wdl
Expand Down Expand Up @@ -153,3 +153,9 @@ workflows:
- name: SvQCPlots
subclass: wdl
primaryDescriptorPath: /wdl/pipelines/TechAgnostic/Visualization/SvQCPlots.wdl
- name: PfalciparumPolygenomicityBarcodeEstimate
subclass: wdl
primaryDescriptorPath: /wdl/pipelines/TechAgnostic/TertiaryAnalysis/PfalciparumPolygenomicityBarcodeEstimate.wdl
- name: BackupWorkspace
subclass: wdl
primaryDescriptorPath: /wdl/pipelines/TechAgnostic/Utility/BackupWorkspace.wdl
51 changes: 51 additions & 0 deletions docker/lr-backup-workspace/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
############### stage 0
FROM continuumio/miniconda3:24.4.0-0 AS build
ARG PKM='conda'
ARG CONDA_ENV_NAME='env'
ARG INSTALL_CMD="${PKM} env create -n ${CONDA_ENV_NAME}"
ARG CLEAN_CMD="${PKM} clean --all --yes"

# conda-pack is needed to create a standalone conda environment that can be moved between systems.
# It allows you to package all the dependencies of this project into a single tarball.
# This tarball can then be unpacked on any system that has conda installed,
# allowing you to quickly and easily recreate your environment without transfering over
# the large conda cache, which can be several GB in size. This is especially useful for
# saving space on a docker image.
# Install conda-pack:
RUN ${PKM} update -y -n base conda && \
${PKM} install -y -c conda-forge conda-pack libmamba && \
${PKM} config --set solver libmamba && \
${CLEAN_CMD}

# Copy environment.yml to the container:
COPY environment.yml .
RUN ${INSTALL_CMD} \
--file environment.yml && \
${CLEAN_CMD}

# Use conda-pack to create a standalone enviornment
# in /venv:
RUN conda-pack \
-n ${CONDA_ENV_NAME} \
-o /tmp/env.tar && \
mkdir /venv && cd /venv && tar xf /tmp/env.tar && \
rm /tmp/env.tar
# We've put venv in same path it'll be in final image,
# so now fix up paths:
RUN /venv/bin/conda-unpack

RUN ${CLEAN_CMD}

############### stage 1
FROM ubuntu:20.04 AS runtime
# Copy /venv from the previous stage:
COPY --from=build /venv /venv
ENV VIRTUAL_ENV=/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

# Install our python packages:
RUN apt-get update -y
RUN apt-get install -y git
RUN python3 -m pip install --upgrade pip setuptools
RUN python3 -m pip install --upgrade git+https://github.com/broadinstitute/[email protected]

12 changes: 12 additions & 0 deletions docker/lr-backup-workspace/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
VERSION = 0.0.1
TAG1 = us.gcr.io/broad-dsp-lrma/lr-backup-workspace:$(VERSION)
TAG2 = us.gcr.io/broad-dsp-lrma/lr-backup-workspace:latest

all: build

build:
docker build --platform linux/amd64 -t $(TAG1) -t $(TAG2) .

push:
docker push $(TAG1)
docker push $(TAG2)
17 changes: 17 additions & 0 deletions docker/lr-backup-workspace/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: lr-papermill-base-base

channels:
- bioconda
- defaults
- conda-forge

dependencies:
- tree=2.1.1
- python=3.11.6
- jupyter=1.0.0
- papermill=2.4.0
- pandas=2.1.1
- numpy=1.26.0
- scipy=1.11.3
- matplotlib=3.8.0
- seaborn=0.12.2
40 changes: 40 additions & 0 deletions docker/util-malaria-coi/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
FROM rocker/r-ubuntu:22.04

MAINTAINER Jonn Smith

########################################################################################################################

# Setup python3 and crcmodc for gsutil:
RUN apt-get update && \
apt-get install -y gcc python3-dev python3-setuptools python3-pip && \
pip3 uninstall -y crcmod && \
pip3 install --no-cache-dir -U crcmod

# install gsutil
RUN apt-get --allow-releaseinfo-change update
RUN apt install -y curl git git-lfs time datamash
RUN curl https://sdk.cloud.google.com | bash

# Update our packages:
RUN apt-get install -y \
build-essential autoconf autoconf-archive libcppunit-dev

# Install other dependencies / tools:
RUN apt-get install -y vim

# Install DEploid:
RUN git clone https://github.com/DEploid-dev/DEploid.git && \
cd DEploid && \
./bootstrap && \
make install

# Install RealMcCOIL:
RUN git clone https://github.com/EPPIcenter/THEREALMcCOIL.git && \
cd THEREALMcCOIL/categorical_method && \
rm *.o *.so && \
R CMD SHLIB McCOIL_categorical_code.c llfunction_het.c && \
cd ../proportional_method && \
rm *.o *.so && \
R CMD SHLIB McCOIL_prop_code.c llfunction.c


18 changes: 18 additions & 0 deletions docker/util-malaria-coi/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
IMAGE_NAME = util-malaria-coi
VERSION = 0.0.1

TAG1 = us.gcr.io/broad-dsp-lrma/$(IMAGE_NAME):$(VERSION)
TAG2 = us.gcr.io/broad-dsp-lrma/$(IMAGE_NAME):latest

all: | build push

build:
docker build -t $(TAG1) -t $(TAG2) .

build_no_cache:
docker build --no-cache -t $(TAG1) -t $(TAG2) .

push:
docker push $(TAG1)
docker push $(TAG2)

11 changes: 7 additions & 4 deletions wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ workflow SRFlowcell {
}
}

File merged_bam = select_first([t_005_AlignReads.bam, t_006_MergeBamAlignment.bam])
File merged_bam = select_first([t_006_MergeBamAlignment.bam, t_005_AlignReads.bam])

# Mark Duplicates
call SRUTIL.MarkDuplicates as t_007_MarkDuplicates {
Expand Down Expand Up @@ -388,13 +388,16 @@ workflow SRFlowcell {
File? unaligned_bam = unaligned_bam_o
File? unaligned_bai = unaligned_bai_o

# Aligned BAM file
File aligned_bam = select_first([t_023_FinalizeAlignedBam.gcs_path, final_bam])
File aligned_bai = select_first([t_024_FinalizeAlignedBai.gcs_path, final_bai])

# Contaminated BAM file:
# TODO: This will need to be fixed for optional finalization:
File? contaminated_bam = DecontaminateSample.contaminated_bam

# Aligned BAM file
File aligned_bam = select_first([t_023_FinalizeAlignedBam.gcs_path, final_bam])
File aligned_bai = select_first([t_024_FinalizeAlignedBai.gcs_path, final_bai])
##############################
# Statistics:
# Unaligned read stats
Float num_reads = t_014_ComputeBamStats.results['reads']
Expand Down
Loading

0 comments on commit 78fbe5d

Please sign in to comment.