From 197d5b308649fcb25b5f5eb3fb7e9a7632a0001e Mon Sep 17 00:00:00 2001 From: Jonn Smith Date: Wed, 24 Apr 2024 11:03:00 -0400 Subject: [PATCH] Several updates for making Malaria Joint Calling easier. (#449) * Reintroduced `ReblockGVCF` step. - Updated HaplotypeCaller::ReblockGVCF to point to latest Docker image with fix for ReblockGVCF annotations. - Updated HaplotypeCaller::ReblockGVCF to use 2 cores. - Disabled DeepVariant/Pepper calling in `SRWholeGenome` by default. - Removed `SRJointGenotyping::ReblockGVCF` - it should only be defined in one place. * Removed hard filtered output file. * Updating all GATK 4.3 tasks to GATK 4.5 * Disabled QC when running on a singe bam file input. * Added a runtime_attr override for HaplotypeCaller subworkflow. * Update Utils.wdl Modified the disks from " LOCAL" to " SSD" * Updating tasks to use `SSD` rather than `LOCAL` disk. * Moved HaplotypeCaller and ReblockGVCF to SSD from HDD * Fixed a bug in `compute_sr_stats.py` that allowed `nan`s * Updated `sr-utils` docker to use `mamba` conda env solver. * Updates to `sr-utils` docker image. - Updated conda solver in `sr-utils` to `mamba`. - Fixed minor deprecation warning in `compute_sr_stats.py` - Fixed issue in `compute_sr_stats.py` that caused certain inputs to fail due to missing base qualities or `nan` values. - Updated version of `sr-utils` docker image to `0.2.2`. * Updated `sr-utils` docker image to version `0.2.2` * Added note for updating nightly GATK build. --------- Co-authored-by: Shadi Zaheri <74751641+shadizaheri@users.noreply.github.com> --- docker/sr-utils/Dockerfile | 6 + docker/sr-utils/Makefile | 2 +- docker/sr-utils/environment.yml | 1 - docker/sr-utils/python/compute_sr_stats.py | 20 +++- .../ILMN/VariantCalling/SRWholeGenome.wdl | 103 +++++++++--------- .../Preprocessing/CollectParentsKmerStats.wdl | 4 +- wdl/tasks/QC/Fingerprinting.wdl | 8 +- wdl/tasks/Utility/PBUtils.wdl | 5 +- wdl/tasks/Utility/SRUtils.wdl | 12 +- wdl/tasks/Utility/Utils.wdl | 6 +- wdl/tasks/Utility/VariantUtils.wdl | 55 +++++----- wdl/tasks/VariantCalling/CCSPepper.wdl | 8 +- wdl/tasks/VariantCalling/DeepVariant.wdl | 5 +- wdl/tasks/VariantCalling/HaplotypeCaller.wdl | 55 ++++++---- wdl/tasks/VariantCalling/ONTPepper.wdl | 5 +- .../VariantCalling/SRJointGenotyping.wdl | 70 +----------- wdl/tasks/Visualization/NanoPlot.wdl | 4 +- 17 files changed, 159 insertions(+), 210 deletions(-) diff --git a/docker/sr-utils/Dockerfile b/docker/sr-utils/Dockerfile index cc0adc19c..23b98a0ae 100644 --- a/docker/sr-utils/Dockerfile +++ b/docker/sr-utils/Dockerfile @@ -5,8 +5,14 @@ MAINTAINER Kiran V Garimella # copy other resources COPY ./environment.yml / +# Set new conda solver so we don't have to wait forever: +RUN conda update -n base conda +RUN conda install -n base conda-libmamba-solver +RUN conda config --set solver libmamba + # install conda packages RUN conda env create -f /environment.yml && conda clean -a +RUN echo "source activate sr-utils" > ~/.bashrc ENV PATH=/opt/conda/envs/sr-utils/bin/:/root/google-cloud-sdk/bin/:${PATH} # Install BWA-MEM2: diff --git a/docker/sr-utils/Makefile b/docker/sr-utils/Makefile index cbfd359fd..5e37106c0 100644 --- a/docker/sr-utils/Makefile +++ b/docker/sr-utils/Makefile @@ -1,5 +1,5 @@ IMAGE_NAME = sr-utils -VERSION = 0.2.1 +VERSION = 0.2.2 TAG1 = us.gcr.io/broad-dsp-lrma/$(IMAGE_NAME):$(VERSION) TAG2 = us.gcr.io/broad-dsp-lrma/$(IMAGE_NAME):latest diff --git a/docker/sr-utils/environment.yml b/docker/sr-utils/environment.yml index ebbc6f0bd..8b85d3464 100644 --- a/docker/sr-utils/environment.yml +++ b/docker/sr-utils/environment.yml @@ -9,4 +9,3 @@ dependencies: - pysam - numpy - tqdm - diff --git a/docker/sr-utils/python/compute_sr_stats.py b/docker/sr-utils/python/compute_sr_stats.py index aeb2e9fe9..f1546259a 100644 --- a/docker/sr-utils/python/compute_sr_stats.py +++ b/docker/sr-utils/python/compute_sr_stats.py @@ -10,7 +10,7 @@ def n50(lengths): csum = np.cumsum(all_len) n2 = int(sum(lengths) / 2) csumn2 = min(csum[csum >= n2]) - ind = np.where(csum == csumn2) + ind = np.where(csum == csumn2)[0] return all_len[int(ind[0])] @@ -36,7 +36,10 @@ def get_bam_stats(bam_file_path, qual_thresh=None): for read in tqdm(bam_file, desc=f"Collecting Bam Stats" + (f" (rq >= {qual_thresh})" if qual_thresh else ""), total=total_reads, unit=" read"): l = len(read.query_sequence) - q = np.mean(read.query_qualities) + if read.query_qualities is not None: + q = np.mean(read.query_qualities) + else: + q = 0 if qual_thresh and q < qual_thresh: continue @@ -51,6 +54,11 @@ def get_bam_stats(bam_file_path, qual_thresh=None): return n_reads, total_bases, np.mean(quals), np.median(quals), np.array(read_lengths) +def nan_zero_wrap(float_val: float) -> float: + """Return 0 if the value is NaN, otherwise return the value.""" + return float_val if not np.isnan(float_val) else 0 + + def main(): parser = argparse.ArgumentParser(description='Compute short read bam file stats', prog='compute_sr_stats') parser.add_argument('-q', '--qual-threshold', type=int, default=0, help="Phred-scale quality threshold") @@ -59,10 +67,10 @@ def main(): n_reads, n_bases, mean_qual, median_qual, read_lengths = get_bam_stats(args.bam_file_path, args.qual_threshold) - print(f"reads\t{n_reads}") - print(f"bases\t{n_bases}") - print(f"mean_qual\t{mean_qual}") - print(f"median_qual\t{median_qual}") + print(f"reads\t{nan_zero_wrap(n_reads)}") + print(f"bases\t{nan_zero_wrap(n_bases)}") + print(f"mean_qual\t{nan_zero_wrap(mean_qual)}") + print(f"median_qual\t{nan_zero_wrap(median_qual)}") print(f"read_mean\t{int(np.mean(read_lengths)) if len(read_lengths) > 0 else 0}") print(f"read_median\t{int(np.median(read_lengths)) if len(read_lengths) > 0 else 0}") diff --git a/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl b/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl index 003061a51..a644abdb6 100644 --- a/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl +++ b/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl @@ -138,39 +138,51 @@ workflow SRWholeGenome { File bam = select_first([MergeAllReads.merged_bam, aligned_bams[0]]) File bai = select_first([MergeAllReads.merged_bai, aligned_bais[0]]) - # Collect sample-level metrics: - call AM.SamStatsMap as SamStats { input: bam = bam } - call FastQC.FastQC as FastQC { input: bam = bam, bai = bai } - call Utils.ComputeGenomeLength as ComputeGenomeLength { input: fasta = ref_map['fasta'] } - call SRUTIL.ComputeBamStats as ComputeBamStats { input: bam_file = bam } - - if (defined(bed_to_compute_coverage)) { - call AM.MosDepthOverBed as MosDepth { - input: - bam = bam, - bai = bai, - bed = select_first([bed_to_compute_coverage]) - } + # Only collect metrics if we have multiple input bam files + if (length(aligned_bams) > 1) { + # Collect sample-level metrics: + call AM.SamStatsMap as SamStats { input: bam = bam } + call FastQC.FastQC as FastQC { input: bam = bam, bai = bai } + call Utils.ComputeGenomeLength as ComputeGenomeLength { input: fasta = ref_map['fasta'] } + call SRUTIL.ComputeBamStats as ComputeBamStats { input: bam_file = bam } + + if (defined(bed_to_compute_coverage)) { + call AM.MosDepthOverBed as MosDepth { + input: + bam = bam, + bai = bai, + bed = select_first([bed_to_compute_coverage]) + } - call COV.SummarizeDepthOverWholeBed as RegionalCoverage { - input: - mosdepth_output = MosDepth.regions + call COV.SummarizeDepthOverWholeBed as RegionalCoverage { + input: + mosdepth_output = MosDepth.regions + } } - } - call FF.FinalizeToFile as FinalizeBam { input: outdir = bam_dir, file = bam, name = "~{participant_name}.bam" } - call FF.FinalizeToFile as FinalizeBai { input: outdir = bam_dir, file = bai, name = "~{participant_name}.bam.bai" } + call FF.FinalizeToFile as FinalizeBam { input: outdir = bam_dir, file = bam, name = "~{participant_name}.bam" } + call FF.FinalizeToFile as FinalizeBai { input: outdir = bam_dir, file = bai, name = "~{participant_name}.bam.bai" } - if (defined(bed_to_compute_coverage)) { call FF.FinalizeToFile as FinalizeRegionalCoverage { input: outdir = bam_dir, file = select_first([RegionalCoverage.cov_summary]) } } + if (defined(bed_to_compute_coverage)) { call FF.FinalizeToFile as FinalizeRegionalCoverage { input: outdir = bam_dir, file = select_first([RegionalCoverage.cov_summary]) } } + call FF.FinalizeToFile as FinalizeFastQCReport { + input: + outdir = metrics_dir, + file = FastQC.report + } - call FF.FinalizeToFile as FinalizeFastQCReport { - input: - outdir = metrics_dir, - file = FastQC.report + # Calculate some final metrics that we need temporary variables for: + Float tmp_average_identity = 100.0 - (100.0*SamStats.stats_map['mismatches']/SamStats.stats_map['bases_mapped']) + Float tmp_aligned_frac_bases = SamStats.stats_map['bases_mapped']/SamStats.stats_map['total_length'] + Float tmp_aligned_est_fold_cov = SamStats.stats_map['bases_mapped']/ComputeGenomeLength.length + Float tmp_aligned_num_reads = FastQC.stats_map['number_of_reads'] + Float tmp_aligned_num_bases = SamStats.stats_map['bases_mapped'] + Float tmp_aligned_read_length_mean = FastQC.stats_map['read_length'] + Float tmp_insert_size_average = SamStats.stats_map['insert_size_average'] + Float tmp_insert_size_standard_deviation = SamStats.stats_map['insert_size_standard_deviation'] + Float tmp_pct_properly_paired_reads = SamStats.stats_map['percentage_of_properly_paired_reads_%'] } - #################################################################################################### # Some input handling: @@ -385,15 +397,8 @@ workflow SRWholeGenome { } } - call VARUTIL.SelectVariants as RemoveFilteredVariants { - input: - vcf = ScoreIndelVariantAnnotations.scored_vcf, - vcf_index = ScoreIndelVariantAnnotations.scored_vcf_index, - prefix = participant_name + ".filtered" - } - # Create a Keyfile for finalization: - File keyfile = RemoveFilteredVariants.vcf_out_index + File keyfile = select_first([FingerprintAndBarcodeVcf.barcode_file, ScoreIndelVariantAnnotations.scored_vcf_index]) # Finalize the raw Joint Calls: call FF.FinalizeToFile as FinalizeHCVcf { input: outdir = smalldir, keyfile = keyfile, file = RenameRawHcVcf.new_sample_name_vcf } @@ -406,8 +411,6 @@ workflow SRWholeGenome { # Finalize the reclibrated / filtered variants: call FF.FinalizeToFile as FinalizeHCRescoredVcf { input: outdir = smalldir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf } call FF.FinalizeToFile as FinalizeHCRescoredTbi { input: outdir = smalldir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf_index } - call FF.FinalizeToFile as FinalizeHCRescoredFilteredVcf { input: outdir = smalldir, keyfile = keyfile, file = RemoveFilteredVariants.vcf_out } - call FF.FinalizeToFile as FinalizeHCRescoredFilteredTbi { input: outdir = smalldir, keyfile = keyfile, file = RemoveFilteredVariants.vcf_out_index } # Finalize other outputs: if (defined(fingerprint_haploytpe_db_file)) { @@ -478,31 +481,31 @@ workflow SRWholeGenome { } output { - File aligned_bam = FinalizeBam.gcs_path - File aligned_bai = FinalizeBai.gcs_path - - Float aligned_num_reads = FastQC.stats_map['number_of_reads'] - Float aligned_num_bases = SamStats.stats_map['bases_mapped'] - Float aligned_frac_bases = SamStats.stats_map['bases_mapped']/SamStats.stats_map['total_length'] - Float aligned_est_fold_cov = SamStats.stats_map['bases_mapped']/ComputeGenomeLength.length + File? aligned_bam = FinalizeBam.gcs_path + File? aligned_bai = FinalizeBai.gcs_path - Float aligned_read_length_mean = FastQC.stats_map['read_length'] + Float? aligned_num_reads = tmp_aligned_num_reads + Float? aligned_num_bases = tmp_aligned_num_bases + Float? aligned_frac_bases = tmp_aligned_frac_bases + Float? aligned_est_fold_cov = tmp_aligned_est_fold_cov - Float insert_size_average = SamStats.stats_map['insert_size_average'] - Float insert_size_standard_deviation = SamStats.stats_map['insert_size_standard_deviation'] - Float pct_properly_paired_reads = SamStats.stats_map['percentage_of_properly_paired_reads_%'] + Float? aligned_read_length_mean = tmp_aligned_read_length_mean - Float average_identity = 100.0 - (100.0*SamStats.stats_map['mismatches']/SamStats.stats_map['bases_mapped']) + Float? insert_size_average = tmp_insert_size_average + Float? insert_size_standard_deviation = tmp_insert_size_standard_deviation + Float? pct_properly_paired_reads = tmp_pct_properly_paired_reads - File fastqc_report = FinalizeFastQCReport.gcs_path + Float? average_identity = tmp_average_identity - Boolean successfully_processed = true + File? fastqc_report = FinalizeFastQCReport.gcs_path File? bed_cov_summary = FinalizeRegionalCoverage.gcs_path File? fingerprint_vcf = FinalizeFingerprintVcf.gcs_path String? barcode = FingerprintAndBarcodeVcf.barcode + Boolean successfully_processed = true + ######################################## File? dvp_vcf = FinalizeDVPepperVcf.gcs_path @@ -520,7 +523,5 @@ workflow SRWholeGenome { File? hc_raw_tbi = FinalizeHCTbi.gcs_path File? hc_rescored_vcf = FinalizeHCRescoredVcf.gcs_path File? hc_rescored_tbi = FinalizeHCRescoredTbi.gcs_path - File? hc_rescored_filtered_vcf = FinalizeHCRescoredFilteredVcf.gcs_path - File? hc_rescored_filtered_tbi = FinalizeHCRescoredFilteredTbi.gcs_path } } diff --git a/wdl/tasks/Preprocessing/CollectParentsKmerStats.wdl b/wdl/tasks/Preprocessing/CollectParentsKmerStats.wdl index 281ca2f52..014066f98 100644 --- a/wdl/tasks/Preprocessing/CollectParentsKmerStats.wdl +++ b/wdl/tasks/Preprocessing/CollectParentsKmerStats.wdl @@ -219,7 +219,7 @@ task ParentalReadsRepartitionAndMerylConfigure { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" # LOCAL because this task is mostly IO operation + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" # If this is too slow, revert to LOCAL (LOCAL because this task is mostly IO operation) bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -484,7 +484,7 @@ task MerylMergeAndSubtract { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" # If this is too slow, revert to LOCAL bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) diff --git a/wdl/tasks/QC/Fingerprinting.wdl b/wdl/tasks/QC/Fingerprinting.wdl index 6c4cccdea..3c5a4d543 100644 --- a/wdl/tasks/QC/Fingerprinting.wdl +++ b/wdl/tasks/QC/Fingerprinting.wdl @@ -222,13 +222,15 @@ task ExtractRelevantGenotypingReads { RuntimeAttr? runtime_attr_override } + Int disk_size = 50 + 2*ceil(size(aligned_bam, "GB")) + 2*ceil(size(genotyping_sites_bed, "GB")) + 2*ceil(size(aligned_bai, "GB")) + command <<< set -eux export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token` - samtools view -h -@ 1 \ + samtools view -h -@ 2 \ --write-index \ -o "relevant_reads.bam##idx##relevant_reads.bam.bai" \ -M -L ~{genotyping_sites_bed} \ @@ -244,7 +246,7 @@ task ExtractRelevantGenotypingReads { RuntimeAttr default_attr = object { cpu_cores: 4, mem_gb: 8, - disk_gb: 375, # will use LOCAL SSD for speeding things up + disk_gb: disk_size, boot_disk_gb: 10, preemptible_tries: 0, max_retries: 1, @@ -254,7 +256,7 @@ task ExtractRelevantGenotypingReads { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" # if SSD is too slow, revert to LOCAL bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) diff --git a/wdl/tasks/Utility/PBUtils.wdl b/wdl/tasks/Utility/PBUtils.wdl index 635b721fe..cb68d3f00 100644 --- a/wdl/tasks/Utility/PBUtils.wdl +++ b/wdl/tasks/Utility/PBUtils.wdl @@ -79,6 +79,7 @@ task ShardLongReads { # when running large scale workflows, we sometimes see errors like the following # A resource limit has delayed the operation: generic::resource_exhausted: allocating: selecting resources: selecting region and zone: # no available zones: 2763 LOCAL_SSD_TOTAL_GB (738/30000 available) usage too high + # NOTE: Changed disk type to SSD to prevent the above issue -JTS zones: "select which zone (GCP) to run this task" num_ssds: "number of SSDs to use" runtime_attr_override: "Override default runtime attributes." @@ -136,7 +137,7 @@ task ShardLongReads { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" # If SSD is too slow, revert to LOCAL zones: zones bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) @@ -303,7 +304,7 @@ task ExtractHifiReads { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" # If SSD is too slow, revert to LOCAL bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) diff --git a/wdl/tasks/Utility/SRUtils.wdl b/wdl/tasks/Utility/SRUtils.wdl index 48b61a0fd..317d42037 100644 --- a/wdl/tasks/Utility/SRUtils.wdl +++ b/wdl/tasks/Utility/SRUtils.wdl @@ -219,7 +219,7 @@ task BwaMem2 { boot_disk_gb: 10, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-dsp-lrma/sr-utils:0.2.1" + docker: "us.gcr.io/broad-dsp-lrma/sr-utils:0.2.2" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -438,7 +438,7 @@ task BaseRecalibrator { boot_disk_gb: 10, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -521,7 +521,7 @@ task ApplyBQSR { boot_disk_gb: 10, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -637,7 +637,7 @@ task ComputeBamStats { boot_disk_gb: 10, preemptible_tries: 1, max_retries: 2, - docker: "us.gcr.io/broad-dsp-lrma/sr-utils:0.2.1" + docker: "us.gcr.io/broad-dsp-lrma/sr-utils:0.2.2" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -735,7 +735,7 @@ task IndexFeatureFile { boot_disk_gb: 10, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -807,7 +807,7 @@ task RevertBaseQualities { boot_disk_gb: 10, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { diff --git a/wdl/tasks/Utility/Utils.wdl b/wdl/tasks/Utility/Utils.wdl index 375a92472..5d2df34c3 100644 --- a/wdl/tasks/Utility/Utils.wdl +++ b/wdl/tasks/Utility/Utils.wdl @@ -735,7 +735,7 @@ task MergeBams { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -789,7 +789,7 @@ task Index { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -1074,7 +1074,7 @@ task DeduplicateBam { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) diff --git a/wdl/tasks/Utility/VariantUtils.wdl b/wdl/tasks/Utility/VariantUtils.wdl index 492888531..b41476a44 100644 --- a/wdl/tasks/Utility/VariantUtils.wdl +++ b/wdl/tasks/Utility/VariantUtils.wdl @@ -193,7 +193,7 @@ task MergeAndSortVCFs { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -258,7 +258,7 @@ task CollectDefinitions { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -707,13 +707,13 @@ task HardFilterVcf { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -761,13 +761,13 @@ task MakeSitesOnlyVcf { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -857,13 +857,13 @@ task AnnotateVcfWithBedRegions { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -985,13 +985,13 @@ task IndelsVariantRecalibrator { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -1112,13 +1112,13 @@ task SNPsVariantRecalibratorCreateModel { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -1197,13 +1197,13 @@ task ApplyVqsr { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -1252,13 +1252,13 @@ task SelectVariants { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -1313,13 +1313,13 @@ task RenameSingleSampleVcf { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -1380,7 +1380,7 @@ task GatherVcfs { boot_disk_gb: 10, preemptible_tries: 2, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -1440,7 +1440,7 @@ task ExtractFingerprint { boot_disk_gb: 10, preemptible_tries: 2, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -1597,6 +1597,7 @@ CODE output { File output_vcf = "~{prefix}.fingerprint.vcf" String barcode = read_string("~{prefix}.barcode.txt") + File barcode_file = "~{prefix}.barcode.txt" } ######################### @@ -1607,7 +1608,7 @@ CODE boot_disk_gb: 10, preemptible_tries: 2, max_retries: 1, - docker: "us.gcr.io/broad-dsp-lrma/sr-utils:0.2.1" + docker: "us.gcr.io/broad-dsp-lrma/sr-utils:0.2.2" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -1722,13 +1723,13 @@ task ExtractVariantAnnotations { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -1800,13 +1801,13 @@ task TrainVariantAnnotationsModel { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -1935,13 +1936,13 @@ task ScoreVariantAnnotations { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) diff --git a/wdl/tasks/VariantCalling/CCSPepper.wdl b/wdl/tasks/VariantCalling/CCSPepper.wdl index 6e59e6d56..b74052c9a 100644 --- a/wdl/tasks/VariantCalling/CCSPepper.wdl +++ b/wdl/tasks/VariantCalling/CCSPepper.wdl @@ -18,9 +18,6 @@ workflow CCSPepper { pepper_memory: "Memory for Pepper" dv_threads: "Number of threads for DeepVariant" dv_memory: "Memory for DeepVariant" - # when running large scale workflows, we sometimes see errors like the following - # A resource limit has delayed the operation: generic::resource_exhausted: allocating: selecting resources: selecting region and zone: - # no available zones: 2763 LOCAL_SSD_TOTAL_GB (738/30000 available) usage too high zones: "select which zone (GCP) to run this task" } @@ -89,8 +86,7 @@ task Pepper { RuntimeAttr? runtime_attr_override } - Int bam_sz = ceil(size(bam, "GB")) - Int disk_size = if bam_sz > 200 then 2*bam_sz else bam_sz + 200 + Int disk_size = 100 + 2*ceil(size(bam, "GB")) + 2*ceil(size(bai, "GB")) + 2*ceil(size(ref_fasta, "GB")) + 2*ceil(size(ref_fasta_fai, "GB")) String output_root = "/cromwell_root/pepper_output" @@ -153,7 +149,7 @@ task Pepper { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" zones: zones bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) diff --git a/wdl/tasks/VariantCalling/DeepVariant.wdl b/wdl/tasks/VariantCalling/DeepVariant.wdl index 57ba1d384..5a57ebcc3 100644 --- a/wdl/tasks/VariantCalling/DeepVariant.wdl +++ b/wdl/tasks/VariantCalling/DeepVariant.wdl @@ -26,9 +26,6 @@ workflow DeepVariant { } parameter_meta { - # when running large scale workflows, we sometimes see errors like the following - # A resource limit has delayed the operation: generic::resource_exhausted: allocating: selecting resources: selecting region and zone: - # no available zones: 2763 LOCAL_SSD_TOTAL_GB (738/30000 available) usage too high zones: "select which zone (GCP) to run this task" } @@ -122,7 +119,7 @@ task DV { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" zones: zones bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) diff --git a/wdl/tasks/VariantCalling/HaplotypeCaller.wdl b/wdl/tasks/VariantCalling/HaplotypeCaller.wdl index 6e49ebb75..6cbee8c2d 100644 --- a/wdl/tasks/VariantCalling/HaplotypeCaller.wdl +++ b/wdl/tasks/VariantCalling/HaplotypeCaller.wdl @@ -36,6 +36,8 @@ workflow CallVariantsWithHaplotypeCaller { String mito_contig = "chrM" Array[String] contigs_names_to_ignore = ["RANDOM_PLACEHOLDER_VALUE"] ## Required for ignoring any filtering - this is kind of a hack - TODO: fix the task! + + RuntimeAttr? haplotype_caller_runtime_attr_override } # Scatter by chromosome: @@ -67,7 +69,8 @@ workflow CallVariantsWithHaplotypeCaller { heterozygosity = heterozygosity, heterozygosity_stdev = heterozygosity_stdev, indel_heterozygosity = indel_heterozygosity, - use_spanning_event_genotyping = true + use_spanning_event_genotyping = true, + runtime_attr_override = haplotype_caller_runtime_attr_override } } @@ -92,25 +95,22 @@ workflow CallVariantsWithHaplotypeCaller { bam = MergeVariantCalledBamOuts.output_bam } -# We're disabling ReblockGVCF for now. -# It's removing some annotations we may need later. - -# # Now reblock the GVCF to combine hom ref blocks and save $ / storage: -# call ReblockGVCF { -# input: -# gvcf = MergeGVCFs.output_vcf, -# gvcf_index = IndexGVCF.index, -# ref_fasta = ref_fasta, -# ref_fasta_fai = ref_fasta_fai, -# ref_dict = ref_dict, -# prefix = prefix -# } - - # Collapse the GVCF into a regular VCF: + # Now reblock the GVCF to combine hom ref blocks and save $ / storage: + call ReblockGVCF as ReblockHcGVCF { + input: + gvcf = MergeGVCFs.output_vcf, + gvcf_index = MergeGVCFs.output_vcf_index, + ref_fasta = ref_fasta, + ref_fasta_fai = ref_fasta_fai, + ref_dict = ref_dict, + prefix = prefix + } + + # Collapse the Reblocked GVCF into a regular VCF: call SRJOINT.GenotypeGVCFs as CollapseGVCFtoVCF { input: - input_gvcf_data = MergeGVCFs.output_vcf, - input_gvcf_index = MergeGVCFs.output_vcf_index, + input_gvcf_data = ReblockHcGVCF.output_gvcf, + input_gvcf_index = ReblockHcGVCF.output_gvcf_index, interval_list = SmallVariantsScatterPrep.interval_list, ref_fasta = ref_fasta, ref_fasta_fai = ref_fasta_fai, @@ -237,14 +237,14 @@ task HaplotypeCaller_GATK4_VCF { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) @@ -324,6 +324,9 @@ task ReblockGVCF { File ref_dict String prefix + + Array[Int] gq_blocks = [20, 30, 40] + Float? tree_score_cutoff Array[String]? annotations_to_keep @@ -343,7 +346,10 @@ task ReblockGVCF { -R ~{ref_fasta} \ -V ~{gvcf} \ -do-qual-approx \ - --floor-blocks -GQB 20 -GQB 30 -GQB 40 \ + -A AssemblyComplexity \ + --annotate-with-num-discovered-alleles \ + --floor-blocks \ + -GQB ~{sep=" -GQB " gq_blocks} \ ~{"--tree-score-threshold-to-no-call " + tree_score_cutoff} \ ~{annotations_to_keep_arg} ~{sep=" --annotations-to-keep " annotations_to_keep} \ -O ~{prefix}.rb.g.vcf.gz @@ -351,20 +357,21 @@ task ReblockGVCF { ######################### RuntimeAttr default_attr = object { - cpu_cores: 1, + cpu_cores: 2, mem_gb: 4, disk_gb: disk_size, boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "broadinstitute/gatk-nightly:2024-04-16-4.5.0.0-25-g986cb1549-NIGHTLY-SNAPSHOT" } + # TODO: Fix this docker image to a stable version after the next GATK release! RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SDD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) diff --git a/wdl/tasks/VariantCalling/ONTPepper.wdl b/wdl/tasks/VariantCalling/ONTPepper.wdl index a1d147b05..e17387dfc 100644 --- a/wdl/tasks/VariantCalling/ONTPepper.wdl +++ b/wdl/tasks/VariantCalling/ONTPepper.wdl @@ -15,9 +15,6 @@ task Pepper { ref_fasta_fai: "The reference fasta index file." threads: "The number of threads to use." memory: "The amount of memory to use." - # when running large scale workflows, we sometimes see errors like the following - # A resource limit has delayed the operation: generic::resource_exhausted: allocating: selecting resources: selecting region and zone: - # no available zones: 2763 LOCAL_SSD_TOTAL_GB (738/30000 available) usage too high zones: "select which zone (GCP) to run this task" runtime_attr_override: "override the default runtime attributes" } @@ -116,7 +113,7 @@ task Pepper { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" zones: zones bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) diff --git a/wdl/tasks/VariantCalling/SRJointGenotyping.wdl b/wdl/tasks/VariantCalling/SRJointGenotyping.wdl index 0c8646962..ddb2109fc 100644 --- a/wdl/tasks/VariantCalling/SRJointGenotyping.wdl +++ b/wdl/tasks/VariantCalling/SRJointGenotyping.wdl @@ -169,73 +169,7 @@ task ImportGVCFs { boot_disk_gb: 15, preemptible_tries: 0, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: select_first([runtime_attr.docker, default_attr.docker]) - } -} - -task ReblockGVCF { - - input { - File input_gvcf - File input_gvcf_index - - File ref_fasta - File ref_fasta_fai - File ref_dict - - String prefix - - Array[Int] gq_blocks = [20, 30, 40] - - String? annotations_to_keep_command - Float? tree_score_cutoff - - RuntimeAttr? runtime_attr_override - } - - Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_fasta_fai, "GB") + size(ref_dict, "GB")) - - Int disk_size = 1 + 4*ceil(size(input_gvcf, "GB")) + 4*ceil(size(input_gvcf_index, "GB")) + ref_size - - command <<< - set -euxo pipefail - - gatk --java-options "-Xms3000m -Xmx3000m" \ - ReblockGVCF \ - -R ~{ref_fasta} \ - -V ~{input_gvcf} \ - -do-qual-approx \ - --floor-blocks \ - -GQB ~{sep=" -GQB " gq_blocks} \ - ~{annotations_to_keep_command} \ - ~{"--tree-score-threshold-to-no-call " + tree_score_cutoff} \ - -O ~{prefix}.reblocked.g.vcf.gz - >>> - - output { - File output_gvcf = "~{prefix}.reblocked.g.vcf.gz" - File output_gvcf_index = "~{prefix}.reblocked.g.vcf.gz.tbi" - } - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 2, - mem_gb: 4, - disk_gb: disk_size, - boot_disk_gb: 15, - preemptible_tries: 2, - max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { @@ -332,7 +266,7 @@ task GenotypeGVCFs { boot_disk_gb: 15, preemptible_tries: 1, max_retries: 1, - docker: "us.gcr.io/broad-gatk/gatk:4.3.0.0" + docker: "us.gcr.io/broad-gatk/gatk:4.5.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { diff --git a/wdl/tasks/Visualization/NanoPlot.wdl b/wdl/tasks/Visualization/NanoPlot.wdl index c0563d5bd..4c487baf9 100644 --- a/wdl/tasks/Visualization/NanoPlot.wdl +++ b/wdl/tasks/Visualization/NanoPlot.wdl @@ -198,7 +198,7 @@ task NanoPlotFromBam { RuntimeAttr? runtime_attr_override } - Int disk_size = 2*ceil(size(bam, "GB")) + 10 + Int disk_size = 10 + 2*ceil(size(bam, "GB")) + 2*ceil(size(bai, "GB")) command <<< set -euxo pipefail @@ -280,7 +280,7 @@ task NanoPlotFromBam { runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " LOCAL" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])