diff --git a/fc_germline_single_sample_workflow.wdl b/fc_germline_single_sample_workflow.wdl
index de638f6..d332b10 100644
--- a/fc_germline_single_sample_workflow.wdl
+++ b/fc_germline_single_sample_workflow.wdl
@@ -73,29 +73,12 @@ workflow germline_single_sample_workflow {
   Int preemptible_tries
   Int agg_preemptible_tries
 
-  # Optional input to increase all disk sizes in case of outlier sample with strange size behavior
-  Int? increase_disk_size
-
   Boolean skip_QC
   Boolean make_gatk4_single_sample_vcf
   Boolean use_gatk4_haplotype_caller
 
   Float cutoff_for_large_rg_in_gb = 20.0
 
-  # Some tasks need wiggle room, and we also need to add a small amount of disk to prevent getting a
-  # Cromwell error from asking for 0 disk when the input is less than 1GB
-  Int additional_disk = select_first([increase_disk_size, 20])
-  # Sometimes the output is larger than the input, or a task can spill to disk. In these cases we need to account for the
-  # input (1) and the output (1.5) or the input(1), the output(1), and spillage (.5).
-  Float bwa_disk_multiplier = 2.5
-  # SortSam spills to disk a lot more because we are only store 300000 records in RAM now because its faster for our data
-  # so it needs more disk space.  Also it spills to disk in an uncompressed format so we need to account for that with a
-  # larger multiplier
-  Float sort_sam_disk_multiplier = 3.25
-
-  # Mark Duplicates takes in as input readgroup bams and outputs a slightly smaller aggregated bam. Giving .25 as wiggleroom
-  Float md_disk_multiplier = 2.25
-
   String bwa_commandline="bwa mem -K 100000000 -p -v 3 -t 16 -Y $bash_ref_fasta"
 
   String recalibrated_bam_basename = base_file_name + ".aligned.duplicates_marked.recalibrated"
@@ -106,25 +89,19 @@ workflow germline_single_sample_workflow {
   # by MergeBamAlignment.
   call Alignment.GetBwaVersion
 
-  # Get the size of the standard reference files as well as the additional reference files needed for BWA
-  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
-  Float bwa_ref_size = ref_size + size(ref_alt, "GB") + size(ref_amb, "GB") + size(ref_ann, "GB") + size(ref_bwt, "GB") + size(ref_pac, "GB") + size(ref_sa, "GB")
-  Float dbsnp_size = size(dbSNP_vcf, "GB")
-
   # Align flowcell-level unmapped input bams in parallel
   scatter (unmapped_bam in flowcell_unmapped_bams) {
 
     Float unmapped_bam_size = size(unmapped_bam, "GB")
 
     String unmapped_bam_basename = basename(unmapped_bam, unmapped_bam_suffix)
-    
+
     if (!skip_QC) {
       # QC the unmapped BAM
       call QC.CollectQualityYieldMetrics as CollectQualityYieldMetrics {
         input:
           input_bam = unmapped_bam,
           metrics_filename = unmapped_bam_basename + ".unmapped.quality_yield_metrics",
-          disk_size = unmapped_bam_size + additional_disk,
           preemptible_tries = preemptible_tries
       }
     }
@@ -147,12 +124,8 @@ workflow germline_single_sample_workflow {
           ref_bwt = ref_bwt,
           ref_pac = ref_pac,
           ref_sa = ref_sa,
-          additional_disk = additional_disk,
           compression_level = compression_level,
-          preemptible_tries = preemptible_tries,
-          bwa_ref_size = bwa_ref_size,
-          disk_multiplier = bwa_disk_multiplier,
-          unmapped_bam_size = unmapped_bam_size
+          preemptible_tries = preemptible_tries
       }
     }
 
@@ -173,9 +146,6 @@ workflow germline_single_sample_workflow {
           ref_pac = ref_pac,
           ref_sa = ref_sa,
           bwa_version = GetBwaVersion.version,
-          # The merged bam can be bigger than only the aligned bam,
-          # so account for the output size by multiplying the input size by 2.75.
-          disk_size = unmapped_bam_size + bwa_ref_size + (bwa_disk_multiplier * unmapped_bam_size) + additional_disk,
           compression_level = compression_level,
           preemptible_tries = preemptible_tries
       }
@@ -192,7 +162,6 @@ workflow germline_single_sample_workflow {
         input:
           input_bam = output_aligned_bam,
           output_bam_prefix = unmapped_bam_basename + ".readgroup",
-          disk_size = mapped_bam_size + additional_disk,
           preemptible_tries = preemptible_tries
       }
     }
@@ -213,22 +182,16 @@ workflow germline_single_sample_workflow {
       input_bams = output_aligned_bam,
       output_bam_basename = base_file_name + ".aligned.unsorted.duplicates_marked",
       metrics_filename = base_file_name + ".duplicate_metrics",
-      # The merged bam will be smaller than the sum of the parts so we need to account for the unmerged inputs
-      # and the merged output.
-      disk_size = (md_disk_multiplier * SumFloats.total_size) + additional_disk,
+      total_input_size = SumFloats.total_size,
       compression_level = compression_level,
       preemptible_tries = agg_preemptible_tries
   }
 
-  Float agg_bam_size = size(MarkDuplicates.output_bam, "GB")
-
   # Sort aggregated+deduped BAM file
   call Processing.SortSam as SortSampleBam {
     input:
       input_bam = MarkDuplicates.output_bam,
       output_bam_basename = base_file_name + ".aligned.duplicate_marked.sorted",
-      # This task spills to disk so we need space for the input bam, the output bam, and any spillage.
-      disk_size = (sort_sam_disk_multiplier * agg_bam_size) + additional_disk,
       compression_level = compression_level,
       preemptible_tries = agg_preemptible_tries
   }
@@ -251,7 +214,6 @@ workflow germline_single_sample_workflow {
       ref_fasta = ref_fasta,
       ref_fasta_index = ref_fasta_index,
       output_prefix = base_file_name + ".preBqsr",
-      disk_size = agg_bam_size + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries,
       contamination_underestimation_factor = 0.75
   }
@@ -278,8 +240,7 @@ workflow germline_single_sample_workflow {
         ref_dict = ref_dict,
         ref_fasta = ref_fasta,
         ref_fasta_index = ref_fasta_index,
-        # We need disk to localize the sharded bam due to the scatter.
-        disk_size = (agg_bam_size / bqsr_divisor) + ref_size + dbsnp_size + additional_disk,
+        bqsr_scatter = bqsr_divisor,
         preemptible_tries = agg_preemptible_tries
     }
   }
@@ -290,7 +251,6 @@ workflow germline_single_sample_workflow {
     input:
       input_bqsr_reports = BaseRecalibrator.recalibration_report,
       output_report_filename = base_file_name + ".recal_data.csv",
-      disk_size = additional_disk,
       preemptible_tries = preemptible_tries
   }
 
@@ -305,20 +265,20 @@ workflow germline_single_sample_workflow {
         ref_dict = ref_dict,
         ref_fasta = ref_fasta,
         ref_fasta_index = ref_fasta_index,
-        # We need disk to localize the sharded bam and the sharded output due to the scatter.
-        disk_size = ((agg_bam_size * 3) / bqsr_divisor) + ref_size + additional_disk,
+        bqsr_scatter = bqsr_divisor,
         compression_level = compression_level,
         preemptible_tries = agg_preemptible_tries
     }
   }
 
+  Float agg_bam_size = size(SortSampleBam.output_bam, "GB")
+
   # Merge the recalibrated BAM files resulting from by-interval recalibration
-  call Processing.GatherBamFiles as GatherBamFiles {
+  call Processing.GatherSortedBamFiles as GatherBamFiles {
     input:
       input_bams = ApplyBQSR.recalibrated_bam,
       output_bam_basename = base_file_name,
-      # Multiply the input bam size by two to account for the input and output
-      disk_size = (2 * agg_bam_size) + additional_disk,
+      total_input_size = agg_bam_size,
       compression_level = compression_level,
       preemptible_tries = agg_preemptible_tries
   }
@@ -338,7 +298,6 @@ workflow germline_single_sample_workflow {
         ref_dict = ref_dict,
         ref_fasta = ref_fasta,
         ref_fasta_index = ref_fasta_index,
-        disk_size = binned_qual_bam_size + ref_size + additional_disk,
         preemptible_tries = agg_preemptible_tries
     }
 
@@ -351,7 +310,6 @@ workflow germline_single_sample_workflow {
         ref_dict = ref_dict,
         ref_fasta = ref_fasta,
         ref_fasta_index = ref_fasta_index,
-        disk_size = binned_qual_bam_size + ref_size + additional_disk,
         preemptible_tries = agg_preemptible_tries
     }
 
@@ -365,7 +323,6 @@ workflow germline_single_sample_workflow {
         ref_fasta_index = ref_fasta_index,
         wgs_coverage_interval_list = wgs_coverage_interval_list,
         read_length = read_length,
-        disk_size = binned_qual_bam_size + ref_size + additional_disk,
         preemptible_tries = agg_preemptible_tries
     }
 
@@ -379,7 +336,6 @@ workflow germline_single_sample_workflow {
         ref_fasta_index = ref_fasta_index,
         wgs_coverage_interval_list = wgs_coverage_interval_list,
         read_length = read_length,
-        disk_size = binned_qual_bam_size + ref_size + additional_disk,
         preemptible_tries = agg_preemptible_tries
     }
 
@@ -389,14 +345,10 @@ workflow germline_single_sample_workflow {
         input_bam = GatherBamFiles.output_bam,
         input_bam_index = GatherBamFiles.output_bam_index,
         read_group_md5_filename = recalibrated_bam_basename + ".bam.read_group_md5",
-        disk_size = binned_qual_bam_size + additional_disk,
         preemptible_tries = agg_preemptible_tries
     }
   }
 
-  # Germline single sample GVCFs shouldn't get bigger even when the input bam is bigger (after a certain size)
-  Int GVCF_disk_size = select_first([increase_disk_size, 30])
-
   # ValidateSamFile runs out of memory in mate validation on crazy edge case data, so we want to skip the mate validation
   # in those cases.  These values set the thresholds for what is considered outside the normal realm of "reasonable" data.
   Float max_duplication_in_reasonable_sample = 0.30
@@ -409,12 +361,9 @@ workflow germline_single_sample_workflow {
       ref_fasta = ref_fasta,
       ref_fasta_index = ref_fasta_index,
       output_basename = base_file_name,
-      disk_size = (2 * binned_qual_bam_size) + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
-  Float cram_size = size(ConvertToCram.output_cram, "GB")
-
   if (!skip_QC) {
     # Check whether the data has massively high duplication or chimerism rates
     call QC.CheckPreValidation as CheckPreValidation {
@@ -441,7 +390,6 @@ workflow germline_single_sample_workflow {
       ignore = ["MISSING_TAG_NM"],
       max_output = 1000000000,
       is_outlier_data = is_outlier_data,
-      disk_size = cram_size + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -474,8 +422,7 @@ workflow germline_single_sample_workflow {
           ref_dict = ref_dict,
           ref_fasta = ref_fasta,
           ref_fasta_index = ref_fasta_index,
-          # Divide the total output GVCF size and the input bam size to account for the smaller scattered input and output.
-          disk_size = ((binned_qual_bam_size + GVCF_disk_size) / hc_divisor) + ref_size + additional_disk,
+          hc_scatter = hc_divisor,
           preemptible_tries = agg_preemptible_tries
       }
 
@@ -486,7 +433,6 @@ workflow germline_single_sample_workflow {
             input_vcf_index = HaplotypeCaller4.output_vcf_index,
             vcf_basename = base_file_name,
             interval_list = ScatterIntervalList.out[index],
-            disk_size = GVCF_disk_size + GVCF_disk_size + additional_disk,
             preemptible_tries = preemptible_tries
         }
       }
@@ -501,8 +447,7 @@ workflow germline_single_sample_workflow {
           ref_dict = ref_dict,
           ref_fasta = ref_fasta,
           ref_fasta_index = ref_fasta_index,
-          # Divide the total output GVCF size and the input bam size to account for the smaller scattered input and output.
-          disk_size = ((binned_qual_bam_size + GVCF_disk_size) / hc_divisor) + ref_size + additional_disk,
+          hc_scatter = hc_divisor,
           preemptible_tries = agg_preemptible_tries
        }
      }
@@ -519,7 +464,6 @@ workflow germline_single_sample_workflow {
       input_vcfs = merge_input,
       input_vcfs_indexes = merge_input_index,
       output_vcf_name = final_vcf_base_name + name_token + ".vcf.gz",
-      disk_size = GVCF_disk_size,
       preemptible_tries = agg_preemptible_tries
   }
 
diff --git a/germline_single_sample_workflow.wdl b/germline_single_sample_workflow.wdl
index 154072c..04905b8 100644
--- a/germline_single_sample_workflow.wdl
+++ b/germline_single_sample_workflow.wdl
@@ -72,9 +72,6 @@ workflow germline_single_sample_workflow {
   Int preemptible_tries
   Int agg_preemptible_tries
 
-  # Optional input to increase all disk sizes in case of outlier sample with strange size behavior
-  Int? increase_disk_size
-
   call ToBam.to_bam_workflow {
     input:
       contamination_sites_ud = contamination_sites_ud,
@@ -103,21 +100,9 @@ workflow germline_single_sample_workflow {
       known_indels_sites_VCFs = known_indels_sites_VCFs,
       known_indels_sites_indices = known_indels_sites_indices,
       preemptible_tries = preemptible_tries,
-      agg_preemptible_tries = agg_preemptible_tries,
-      increase_disk_size = increase_disk_size
+      agg_preemptible_tries = agg_preemptible_tries
   }
 
-  # Some tasks need wiggle room, and we also need to add a small amount of disk to prevent getting a
-  # Cromwell error from asking for 0 disk when the input is less than 1GB
-  Int additional_disk = select_first([increase_disk_size, 20])
-  # Germline single sample GVCFs shouldn't get bigger even when the input bam is bigger (after a certain size)
-  Int GVCF_disk_size = select_first([increase_disk_size, 30])
-  #BQSR bins the qualities which makes a significantly smaller bam
-  Float binned_qual_bam_size = size(to_bam_workflow.output_bam, "GB")
-
-  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
-  Float dbsnp_size = size(dbSNP_vcf, "GB")
-
   # ValidateSamFile runs out of memory in mate validation on crazy edge case data, so we want to skip the mate validation
   # in those cases.  These values set the thresholds for what is considered outside the normal realm of "reasonable" data.
   Float max_duplication_in_reasonable_sample = 0.30
@@ -130,12 +115,9 @@ workflow germline_single_sample_workflow {
       ref_fasta = ref_fasta,
       ref_fasta_index = ref_fasta_index,
       output_basename = base_file_name,
-      disk_size = (2 * binned_qual_bam_size) + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
-  Float cram_size = size(ConvertToCram.output_cram, "GB")
-
   # Check whether the data has massively high duplication or chimerism rates
   call QC.CheckPreValidation as CheckPreValidation {
     input:
@@ -158,7 +140,6 @@ workflow germline_single_sample_workflow {
       ignore = ["MISSING_TAG_NM"],
       max_output = 1000000000,
       is_outlier_data = CheckPreValidation.is_outlier_data,
-      disk_size = cram_size + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -189,8 +170,7 @@ workflow germline_single_sample_workflow {
         ref_dict = ref_dict,
         ref_fasta = ref_fasta,
         ref_fasta_index = ref_fasta_index,
-        # Divide the total output GVCF size and the input bam size to account for the smaller scattered input and output.
-        disk_size = ((binned_qual_bam_size + GVCF_disk_size) / hc_divisor) + ref_size + additional_disk,
+        hc_scatter = hc_divisor,
         preemptible_tries = agg_preemptible_tries
      }
   }
@@ -201,7 +181,6 @@ workflow germline_single_sample_workflow {
       input_vcfs = HaplotypeCaller.output_gvcf,
       input_vcfs_indexes = HaplotypeCaller.output_gvcf_index,
       output_vcf_name = final_gvcf_base_name + ".g.vcf.gz",
-      disk_size = GVCF_disk_size,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -218,7 +197,6 @@ workflow germline_single_sample_workflow {
       ref_fasta_index = ref_fasta_index,
       ref_dict = ref_dict,
       wgs_calling_interval_list = wgs_calling_interval_list,
-      disk_size = gvcf_size + ref_size + dbsnp_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -232,7 +210,6 @@ workflow germline_single_sample_workflow {
       dbSNP_vcf_index = dbSNP_vcf_index,
       ref_dict = ref_dict,
       wgs_evaluation_interval_list = wgs_evaluation_interval_list,
-      disk_size = gvcf_size + dbsnp_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -296,4 +273,3 @@ workflow germline_single_sample_workflow {
     File output_vcf_index = MergeVCFs.output_vcf_index
   }
 }
-
diff --git a/tasks_pipelines/alignment.wdl b/tasks_pipelines/alignment.wdl
index bf57374..0ba0334 100644
--- a/tasks_pipelines/alignment.wdl
+++ b/tasks_pipelines/alignment.wdl
@@ -23,7 +23,7 @@ task GetBwaVersion {
     sed 's/Version: //'
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     memory: "1 GB"
   }
   output {
@@ -50,10 +50,17 @@ task SamToFastqAndBwaMemAndMba {
   File ref_bwt
   File ref_pac
   File ref_sa
-  Float disk_size
   Int compression_level
   Int preemptible_tries
 
+  Float unmapped_bam_size = size(input_bam, "GB")
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Float bwa_ref_size = ref_size + size(ref_alt, "GB") + size(ref_amb, "GB") + size(ref_ann, "GB") + size(ref_bwt, "GB") + size(ref_pac, "GB") + size(ref_sa, "GB")
+  # Sometimes the output is larger than the input, or a task can spill to disk.
+  # In these cases we need to account for the input (1) and the output (1.5) or the input(1), the output(1), and spillage (.5).
+  Float disk_multiplier = 2.5
+  Int disk_size = ceil(unmapped_bam_size + bwa_ref_size + (disk_multiplier * unmapped_bam_size) + 20)
+
   command <<<
     set -o pipefail
     set -e
@@ -107,11 +114,11 @@ task SamToFastqAndBwaMemAndMba {
     fi
   >>>
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "14 GB"
     cpu: "16"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File output_bam = "${output_bam_basename}.bam"
@@ -122,10 +129,14 @@ task SamToFastqAndBwaMemAndMba {
 task SamSplitter {
   File input_bam
   Int n_reads
-  Int disk_size
   Int preemptible_tries
   Int compression_level
 
+  Float unmapped_bam_size = size(input_bam, "GB")
+  # Since the output bams are less compressed than the input bam we need a disk multiplier that's larger than 2.
+  Float disk_multiplier = 2.5
+  Int disk_size = ceil(disk_multiplier * unmapped_bam_size + 20)
+
   command {
     set -e
     mkdir output_dir
diff --git a/tasks_pipelines/bam_processing.wdl b/tasks_pipelines/bam_processing.wdl
index de7c6b3..b97c4d5 100644
--- a/tasks_pipelines/bam_processing.wdl
+++ b/tasks_pipelines/bam_processing.wdl
@@ -13,13 +13,17 @@
 ## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
 ## licensing information pertaining to the included programs.
 
-# Sort BAM file by coordinate order 
+# Sort BAM file by coordinate order
 task SortSam {
   File input_bam
   String output_bam_basename
   Int preemptible_tries
   Int compression_level
-  Float disk_size
+
+  # SortSam spills to disk a lot more because we are only store 300000 records in RAM now because its faster for our data so it needs
+  # more disk space.  Also it spills to disk in an uncompressed format so we need to account for that with a larger multiplier
+  Float sort_sam_disk_multiplier = 3.25
+  Int disk_size = ceil(sort_sam_disk_multiplier * size(input_bam, "GB")) + 20
 
   command {
     java -Dsamjdk.compression_level=${compression_level} -Xms4000m -jar /usr/gitc/picard.jar \
@@ -33,8 +37,8 @@ task SortSam {
 
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
+    disks: "local-disk " + disk_size + " HDD"
     cpu: "1"
     memory: "5000 MB"
     preemptible: preemptible_tries
@@ -52,26 +56,30 @@ task SortSamSpark {
   String output_bam_basename
   Int preemptible_tries
   Int compression_level
-  Float disk_size
+
+  # SortSam spills to disk a lot more because we are only store 300000 records in RAM now because its faster for our data so it needs
+  # more disk space.  Also it spills to disk in an uncompressed format so we need to account for that with a larger multiplier
+  Float sort_sam_disk_multiplier = 3.25
+  Int disk_size = ceil(sort_sam_disk_multiplier * size(input_bam, "GB")) + 20
 
   command {
     set -e
     export GATK_LOCAL_JAR=/root/gatk.jar
 
-    gatk --java-options "-Dsamjdk.compression_level=${compression_level} -Xms13g -Xmx13g" \
+    gatk --java-options "-Dsamjdk.compression_level=${compression_level} -Xms100g -Xmx100g" \
       SortSamSpark \
       -I ${input_bam} \
       -O ${output_bam_basename}.bam \
       -- --conf spark.local.dir=. --spark-master 'local[16]' --conf 'spark.kryo.referenceTracking=false'
 
-      samtools index ${output_bam_basename}.bam ${output_bam_basename}.bai
+    samtools index ${output_bam_basename}.bam ${output_bam_basename}.bai
   }
   runtime {
-    docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:4.0.1.2-18-g78fbcd88a-ericSortSamEval"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    docker: "us.gcr.io/broad-gatk/gatk:4.0.2.1"
+    disks: "local-disk " + disk_size + " HDD"
     bootDiskSizeGb: "15"
     cpu: "16"
-    memory: "14 GB"
+    memory: "102 GB"
     preemptible: preemptible_tries
   }
   output {
@@ -85,10 +93,15 @@ task MarkDuplicates {
   Array[File] input_bams
   String output_bam_basename
   String metrics_filename
-  Float disk_size
+  Float total_input_size
   Int compression_level
   Int preemptible_tries
 
+  # The merged bam will be smaller than the sum of the parts so we need to account for the unmerged inputs and the merged output.
+  # Mark Duplicates takes in as input readgroup bams and outputs a slightly smaller aggregated bam. Giving .25 as wiggleroom
+  Float md_disk_multiplier = 2.25
+  Int disk_size = ceil(md_disk_multiplier * total_input_size) + 20
+
   # The program default for READ_NAME_REGEX is appropriate in nearly every case.
   # Sometimes we wish to supply "null" in order to turn off optical duplicate detection
   # This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing
@@ -111,7 +124,7 @@ task MarkDuplicates {
       ADD_PG_TAG_TO_READS=false
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "7 GB"
     disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
@@ -134,9 +147,13 @@ task BaseRecalibrator {
   File ref_dict
   File ref_fasta
   File ref_fasta_index
-  Float disk_size
+  Int bqsr_scatter
   Int preemptible_tries
 
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Float dbsnp_size = size(dbSNP_vcf, "GB")
+  Int disk_size = ceil((size(input_bam, "GB") / bqsr_scatter) + ref_size + dbsnp_size) + 20
+
   command {
     /usr/gitc/gatk4/gatk-launch --javaOptions "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -XX:+PrintFlagsFinal \
       -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintGCDetails \
@@ -151,10 +168,10 @@ task BaseRecalibrator {
       -L ${sep=" -L " sequence_group_interval}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "6 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File recalibration_report = "${recalibration_report_filename}"
@@ -170,10 +187,13 @@ task ApplyBQSR {
   File ref_dict
   File ref_fasta
   File ref_fasta_index
-  Float disk_size
   Int compression_level
+  Int bqsr_scatter
   Int preemptible_tries
 
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Int disk_size = ceil((size(input_bam, "GB") * 3 / bqsr_scatter) + ref_size) + 20
+
   command {
     /usr/gitc/gatk4/gatk-launch --javaOptions "-XX:+PrintFlagsFinal -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps \
       -XX:+PrintGCDetails -Xloggc:gc_log.log \
@@ -190,10 +210,10 @@ task ApplyBQSR {
       -L ${sep=" -L " sequence_group_interval}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3500 MB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File recalibrated_bam = "${output_bam_basename}.bam"
@@ -205,7 +225,6 @@ task ApplyBQSR {
 task GatherBqsrReports {
   Array[File] input_bqsr_reports
   String output_report_filename
-  Int disk_size
   Int preemptible_tries
 
   command {
@@ -215,24 +234,27 @@ task GatherBqsrReports {
       -O ${output_report_filename}
     }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3500 MB"
-    disks: "local-disk " + disk_size + " HDD"
+    disks: "local-disk 20 HDD"
   }
   output {
     File output_bqsr_report = "${output_report_filename}"
   }
 }
 
-# Combine multiple recalibrated BAM files
-task GatherBamFiles {
+# Combine multiple *sorted* BAM files
+task GatherSortedBamFiles {
   Array[File] input_bams
   String output_bam_basename
-  Float disk_size
+  Float total_input_size
   Int compression_level
   Int preemptible_tries
 
+  # Multiply the input bam size by two to account for the input and output
+  Int disk_size = ceil(2 * total_input_size) + 20
+
   command {
     java -Dsamjdk.compression_level=${compression_level} -Xms2000m -jar /usr/gitc/picard.jar \
       GatherBamFiles \
@@ -242,10 +264,10 @@ task GatherBamFiles {
       CREATE_MD5_FILE=true
     }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File output_bam = "${output_bam_basename}.bam"
@@ -254,6 +276,37 @@ task GatherBamFiles {
   }
 }
 
+# Combine multiple *unsorted* BAM files
+# Note that if/when WDL supports optional outputs, we should merge this task with the sorted version
+task GatherUnsortedBamFiles {
+  Array[File] input_bams
+  String output_bam_basename
+  Float total_input_size
+  Int compression_level
+  Int preemptible_tries
+
+  # Multiply the input bam size by two to account for the input and output
+  Int disk_size = ceil(2 * total_input_size) + 20
+
+  command {
+    java -Dsamjdk.compression_level=${compression_level} -Xms2000m -jar /usr/gitc/picard.jar \
+      GatherBamFiles \
+      INPUT=${sep=' INPUT=' input_bams} \
+      OUTPUT=${output_bam_basename}.bam \
+      CREATE_INDEX=false \
+      CREATE_MD5_FILE=false
+    }
+  runtime {
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
+    preemptible: preemptible_tries
+    memory: "3 GB"
+    disks: "local-disk " + disk_size + " HDD"
+  }
+  output {
+    File output_bam = "${output_bam_basename}.bam"
+  }
+}
+
 # Notes on the contamination estimate:
 # The contamination value is read from the FREEMIX field of the selfSM file output by verifyBamId
 #
@@ -276,10 +329,11 @@ task CheckContamination {
   File ref_fasta
   File ref_fasta_index
   String output_prefix
-  Float disk_size
   Int preemptible_tries
   Float contamination_underestimation_factor
 
+  Int disk_size = ceil(size(input_bam, "GB") + size(ref_fasta, "GB")) + 30
+
   command <<<
     set -e
 
@@ -322,7 +376,7 @@ task CheckContamination {
   runtime {
     preemptible: preemptible_tries
     memory: "2 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
     docker: "us.gcr.io/broad-gotc-prod/verify-bam-id:c8a66425c312e5f8be46ab0c41f8d7a1942b6e16-1500298351"
   }
   output {
diff --git a/tasks_pipelines/germline_variant_discovery.wdl b/tasks_pipelines/germline_variant_discovery.wdl
index 53f86ae..1a5ce3a 100644
--- a/tasks_pipelines/germline_variant_discovery.wdl
+++ b/tasks_pipelines/germline_variant_discovery.wdl
@@ -22,8 +22,11 @@ task HaplotypeCaller_GATK35_GVCF {
   File ref_fasta
   File ref_fasta_index
   Float? contamination
-  Float disk_size
   Int preemptible_tries
+  Int hc_scatter
+
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Int disk_size = ceil(((size(input_bam, "GB") + 30) / hc_scatter) + ref_size) + 20
 
   # We use interval_padding 500 below to make sure that the HaplotypeCaller has context on both sides around
   # the interval because the assembly uses them.
@@ -53,11 +56,11 @@ task HaplotypeCaller_GATK35_GVCF {
       --read_filter OverclippedRead
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "10 GB"
     cpu: "1"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File output_gvcf = "${gvcf_basename}.vcf.gz"
@@ -65,9 +68,6 @@ task HaplotypeCaller_GATK35_GVCF {
   }
 }
 
-# TODO --
-#       -O ${vcf_basename}.vcf.gz \
-#        -contamination ${default=0 contamination} ${true="-ERC GVCF" false="" make_gvcf}
 task HaplotypeCaller_GATK4_VCF {
   String input_bam
   File interval_list
@@ -77,8 +77,11 @@ task HaplotypeCaller_GATK4_VCF {
   File ref_fasta_index
   Float contamination
   Boolean make_gvcf
-  Float disk_size
   Int preemptible_tries
+  Int hc_scatter
+
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Int disk_size = ceil(((size(input_bam, "GB") + 30) / hc_scatter) + ref_size) + 20
 
   command <<<
 
@@ -90,14 +93,15 @@ task HaplotypeCaller_GATK4_VCF {
       -R ${ref_fasta} \
       -I ${input_bam} \
       -L ${interval_list} \
-      -O ${vcf_basename}.vcf.gz ${true="-ERC GVCF" false="" make_gvcf}
+      -O ${vcf_basename}.vcf.gz \
+      -contamination ${default=0 contamination} ${true="-ERC GVCF" false="" make_gvcf}
   >>>
   runtime {
-    docker: "broadinstitute/gatk-nightly:2018-02-08-4.0.1.1-11-g9b93440-SNAPSHOT"
+    docker: "us.gcr.io/broad-gatk/gatk:4.0.2.1"
     preemptible: preemptible_tries
     memory: "6.5 GB"
     cpu: "1"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File output_vcf = "${vcf_basename}.vcf.gz"
@@ -110,7 +114,6 @@ task MergeVCFs {
   Array[File] input_vcfs
   Array[File] input_vcfs_indexes
   String output_vcf_name
-  Int disk_size
   Int preemptible_tries
 
   # Using MergeVcfs instead of GatherVcfs so we can create indices
@@ -122,10 +125,10 @@ task MergeVCFs {
       OUTPUT=${output_vcf_name}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3 GB"
-    disks: "local-disk " + disk_size + " HDD"
+    disks: "local-disk 30 HDD"
   }
   output {
     File output_vcf = "${output_vcf_name}"
@@ -138,9 +141,9 @@ task HardFilterVcf {
   File input_vcf_index
   String vcf_basename
   File interval_list
-  Int disk_size
   Int preemptible_tries
 
+  Int disk_size = ceil(2 * size(input_vcf, "GB")) + 20
   String output_vcf_name = vcf_basename + ".filtered.vcf.gz"
 
   command {
@@ -163,4 +166,3 @@ task HardFilterVcf {
     disks: "local-disk " + disk_size + " HDD"
   }
 }
-
diff --git a/tasks_pipelines/qc.wdl b/tasks_pipelines/qc.wdl
index 4f54099..62225c7 100644
--- a/tasks_pipelines/qc.wdl
+++ b/tasks_pipelines/qc.wdl
@@ -17,9 +17,10 @@
 task CollectQualityYieldMetrics {
   File input_bam
   String metrics_filename
-  Float disk_size
   Int preemptible_tries
 
+  Int disk_size = ceil(size(input_bam, "GB")) + 20
+
   command {
     java -Xms2000m -jar /usr/gitc/picard.jar \
       CollectQualityYieldMetrics \
@@ -28,8 +29,8 @@ task CollectQualityYieldMetrics {
       OUTPUT=${metrics_filename}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
+    disks: "local-disk " + disk_size + " HDD"
     memory: "3 GB"
     preemptible: preemptible_tries
   }
@@ -43,7 +44,8 @@ task CollectUnsortedReadgroupBamQualityMetrics {
   File input_bam
   String output_bam_prefix
   Int preemptible_tries
-  Float disk_size
+
+  Int disk_size = ceil(size(input_bam, "GB")) + 20
 
   command {
     java -Xms5000m -jar /usr/gitc/picard.jar \
@@ -63,9 +65,9 @@ task CollectUnsortedReadgroupBamQualityMetrics {
     touch ${output_bam_prefix}.insert_size_histogram.pdf
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     memory: "7 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
     preemptible: preemptible_tries
   }
   output {
@@ -89,7 +91,9 @@ task CollectReadgroupBamQualityMetrics {
   File ref_fasta
   File ref_fasta_index
   Int preemptible_tries
-  Float disk_size
+
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Int disk_size = ceil(size(input_bam, "GB") + ref_size) + 20
 
   command {
     java -Xms5000m -jar /usr/gitc/picard.jar \
@@ -105,9 +109,9 @@ task CollectReadgroupBamQualityMetrics {
       METRIC_ACCUMULATION_LEVEL="READ_GROUP"
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     memory: "7 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
     preemptible: preemptible_tries
   }
   output {
@@ -127,7 +131,9 @@ task CollectAggregationMetrics {
   File ref_fasta
   File ref_fasta_index
   Int preemptible_tries
-  Float disk_size
+
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Int disk_size = ceil(size(input_bam, "GB") + ref_size) + 20
 
   command {
     java -Xms5000m -jar /usr/gitc/picard.jar \
@@ -150,9 +156,9 @@ task CollectAggregationMetrics {
     touch ${output_bam_prefix}.insert_size_histogram.pdf
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     memory: "7 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
     preemptible: preemptible_tries
   }
   output {
@@ -177,9 +183,11 @@ task CrossCheckFingerprints {
   Array[File] input_bam_indexes
   File? haplotype_database_file
   String metrics_filename
-  Float disk_size
+  Float total_input_size
   Int preemptible_tries
 
+  Int disk_size = ceil(total_input_size) + 20
+
   command <<<
     java -Dsamjdk.buffer_size=131072 \
       -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms2000m \
@@ -192,10 +200,10 @@ task CrossCheckFingerprints {
       LOD_THRESHOLD=-20.0
   >>>
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "2 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File cross_check_fingerprints_metrics = "${metrics_filename}"
@@ -211,9 +219,10 @@ task CheckFingerprint {
   File? genotypes
   File? genotypes_index
   String sample
-  Float disk_size
   Int preemptible_tries
 
+  Int disk_size = ceil(size(input_bam, "GB")) + 20
+
   command <<<
     java -Dsamjdk.buffer_size=131072 \
       -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms1024m  \
@@ -228,10 +237,10 @@ task CheckFingerprint {
 
   >>>
  runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "1 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File summary_metrics = "${output_basename}.fingerprinting_summary_metrics"
@@ -274,7 +283,7 @@ task CheckPreValidation {
 
   >>>
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     docker: "python:2.7"
     memory: "2 GB"
@@ -296,9 +305,11 @@ task ValidateSamFile {
   Int? max_output
   Array[String]? ignore
   Boolean? is_outlier_data
-  Float disk_size
   Int preemptible_tries
 
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Int disk_size = ceil(size(input_bam, "GB") + ref_size) + 20
+
   command {
     java -Xms6000m -jar /usr/gitc/picard.jar \
       ValidateSamFile \
@@ -312,10 +323,10 @@ task ValidateSamFile {
       IS_BISULFITE_SEQUENCED=false
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "7 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File report = "${report_filename}"
@@ -331,9 +342,11 @@ task CollectWgsMetrics {
   File ref_fasta
   File ref_fasta_index
   Int read_length
-  Float disk_size
   Int preemptible_tries
 
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB")
+  Int disk_size = ceil(size(input_bam, "GB") + ref_size) + 20
+
   command {
     java -Xms2000m -jar /usr/gitc/picard.jar \
       CollectWgsMetrics \
@@ -347,10 +360,10 @@ task CollectWgsMetrics {
       READ_LENGTH=${read_length}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File metrics = "${metrics_filename}"
@@ -366,9 +379,11 @@ task CollectRawWgsMetrics {
   File ref_fasta
   File ref_fasta_index
   Int read_length
-  Float disk_size
   Int preemptible_tries
 
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB")
+  Int disk_size = ceil(size(input_bam, "GB") + ref_size) + 20
+
   command {
     java -Xms2000m -jar /usr/gitc/picard.jar \
       CollectRawWgsMetrics \
@@ -382,10 +397,10 @@ task CollectRawWgsMetrics {
       READ_LENGTH=${read_length}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File metrics = "${metrics_filename}"
@@ -397,9 +412,10 @@ task CalculateReadGroupChecksum {
   File input_bam
   File input_bam_index
   String read_group_md5_filename
-  Float disk_size
   Int preemptible_tries
 
+  Int disk_size = ceil(size(input_bam, "GB")) + 20
+
   command {
     java -Xms1000m -jar /usr/gitc/picard.jar \
       CalculateReadGroupChecksum \
@@ -407,10 +423,10 @@ task CalculateReadGroupChecksum {
       OUTPUT=${read_group_md5_filename}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "2 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File md5_file = "${read_group_md5_filename}"
@@ -427,9 +443,11 @@ task ValidateGVCF {
   File dbSNP_vcf
   File dbSNP_vcf_index
   File wgs_calling_interval_list
-  Float disk_size
   Int preemptible_tries
 
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
+  Int disk_size = ceil(size(input_vcf, "GB") + size(dbSNP_vcf, "GB") + ref_size) + 20
+
   command {
     /usr/gitc/gatk4/gatk-launch --javaOptions "-Xms3000m" \
       ValidateVariants \
@@ -441,10 +459,10 @@ task ValidateGVCF {
       --dbsnp ${dbSNP_vcf}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3500 MB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
 }
 
@@ -457,9 +475,10 @@ task CollectGvcfCallingMetrics {
   File dbSNP_vcf_index
   File ref_dict
   File wgs_evaluation_interval_list
-  Float disk_size
   Int preemptible_tries
 
+  Int disk_size = ceil(size(input_vcf, "GB") + size(dbSNP_vcf, "GB")) + 20
+
   command {
     java -Xms2000m -jar /usr/gitc/picard.jar \
       CollectVariantCallingMetrics \
@@ -471,14 +490,13 @@ task CollectGvcfCallingMetrics {
       GVCF_INPUT=true
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3 GB"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File summary_metrics = "${metrics_basename}.variant_calling_summary_metrics"
     File detail_metrics = "${metrics_basename}.variant_calling_detail_metrics"
   }
 }
-
diff --git a/tasks_pipelines/split_large_readgroup.wdl b/tasks_pipelines/split_large_readgroup.wdl
index 1eee076..a3044d2 100644
--- a/tasks_pipelines/split_large_readgroup.wdl
+++ b/tasks_pipelines/split_large_readgroup.wdl
@@ -35,23 +35,14 @@ workflow split_large_readgroup {
   File ref_bwt
   File ref_pac
   File ref_sa
-  Int additional_disk
   Int compression_level
   Int preemptible_tries
   Int reads_per_file = 48000000
 
-  Float bwa_ref_size
-  Float disk_multiplier
-
-  Float unmapped_bam_size
-
   call Alignment.SamSplitter as SamSplitter {
     input :
       input_bam = input_bam,
       n_reads = reads_per_file,
-      # Since the output bams are less compressed than the input bam we need a disk multiplier
-      # that's larger than 2.
-      disk_size = ceil(disk_multiplier * unmapped_bam_size + additional_disk),
       preemptible_tries = preemptible_tries,
       compression_level = compression_level
   }
@@ -75,9 +66,6 @@ workflow split_large_readgroup {
         ref_pac = ref_pac,
         ref_sa = ref_sa,
         bwa_version = bwa_version,
-        # The merged bam can be bigger than only the aligned bam,
-        # so account for the output size by multiplying the input size by 2.75.
-        disk_size = current_unmapped_bam_size + bwa_ref_size + (disk_multiplier * current_unmapped_bam_size) + additional_disk,
         compression_level = compression_level,
         preemptible_tries = preemptible_tries
     }
@@ -91,10 +79,10 @@ workflow split_large_readgroup {
       preemptible_tries = preemptible_tries
   }
 
-  call Processing.GatherBamFiles as GatherMonolithicBamFile {
+  call Processing.GatherUnsortedBamFiles as GatherMonolithicBamFile {
     input:
       input_bams = SamToFastqAndBwaMemAndMba.output_bam,
-      disk_size = ceil((2 * SumSplitAlignedSizes.total_size) + additional_disk),
+      total_input_size = SumSplitAlignedSizes.total_size,
       output_bam_basename = output_bam_basename,
       preemptible_tries = preemptible_tries,
       compression_level = compression_level
diff --git a/tasks_pipelines/unmapped_bam_to_aligned_bam.wdl b/tasks_pipelines/unmapped_bam_to_aligned_bam.wdl
index c1fcc1f..6bb25cd 100644
--- a/tasks_pipelines/unmapped_bam_to_aligned_bam.wdl
+++ b/tasks_pipelines/unmapped_bam_to_aligned_bam.wdl
@@ -58,24 +58,7 @@ workflow to_bam_workflow {
 
   Float cutoff_for_large_rg_in_gb = 20.0
 
-  # Optional input to increase all disk sizes in case of outlier sample with strange size behavior
-  Int? increase_disk_size
-
-  # Some tasks need wiggle room, and we also need to add a small amount of disk to prevent getting a
-  # Cromwell error from asking for 0 disk when the input is less than 1GB
-  Int additional_disk = select_first([increase_disk_size, 20])
-  # Sometimes the output is larger than the input, or a task can spill to disk. In these cases we need to account for the
-  # input (1) and the output (1.5) or the input(1), the output(1), and spillage (.5).
-  Float bwa_disk_multiplier = 2.5
-  # SortSam spills to disk a lot more because we are only store 300000 records in RAM now because its faster for our data
-  # so it needs more disk space.  Also it spills to disk in an uncompressed format so we need to account for that with a
-  # larger multiplier
-  Float sort_sam_disk_multiplier = 3.25
-
-  # Mark Duplicates takes in as input readgroup bams and outputs a slightly smaller aggregated bam. Giving .25 as wiggleroom
-  Float md_disk_multiplier = 2.25
-
-  String bwa_commandline="bwa mem -K 100000000 -p -v 3 -t 16 -Y $bash_ref_fasta"
+  String bwa_commandline = "bwa mem -K 100000000 -p -v 3 -t 16 -Y $bash_ref_fasta"
 
   String recalibrated_bam_basename = base_file_name + ".aligned.duplicates_marked.recalibrated"
 
@@ -86,9 +69,6 @@ workflow to_bam_workflow {
   call Alignment.GetBwaVersion
 
   # Get the size of the standard reference files as well as the additional reference files needed for BWA
-  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
-  Float bwa_ref_size = ref_size + size(ref_alt, "GB") + size(ref_amb, "GB") + size(ref_ann, "GB") + size(ref_bwt, "GB") + size(ref_pac, "GB") + size(ref_sa, "GB")
-  Float dbsnp_size = size(dbSNP_vcf, "GB")
 
   # Align flowcell-level unmapped input bams in parallel
   scatter (unmapped_bam in flowcell_unmapped_bams) {
@@ -102,7 +82,6 @@ workflow to_bam_workflow {
       input:
         input_bam = unmapped_bam,
         metrics_filename = unmapped_bam_basename + ".unmapped.quality_yield_metrics",
-        disk_size = unmapped_bam_size + additional_disk,
         preemptible_tries = preemptible_tries
     }
 
@@ -124,12 +103,8 @@ workflow to_bam_workflow {
           ref_bwt = ref_bwt,
           ref_pac = ref_pac,
           ref_sa = ref_sa,
-          additional_disk = additional_disk,
           compression_level = compression_level,
-          preemptible_tries = preemptible_tries,
-          bwa_ref_size = bwa_ref_size,
-          disk_multiplier = bwa_disk_multiplier,
-          unmapped_bam_size = unmapped_bam_size
+          preemptible_tries = preemptible_tries
       }
     }
 
@@ -150,9 +125,6 @@ workflow to_bam_workflow {
           ref_pac = ref_pac,
           ref_sa = ref_sa,
           bwa_version = GetBwaVersion.version,
-          # The merged bam can be bigger than only the aligned bam,
-          # so account for the output size by multiplying the input size by 2.75.
-          disk_size = unmapped_bam_size + bwa_ref_size + (bwa_disk_multiplier * unmapped_bam_size) + additional_disk,
           compression_level = compression_level,
           preemptible_tries = preemptible_tries
       }
@@ -168,7 +140,6 @@ workflow to_bam_workflow {
       input:
         input_bam = output_aligned_bam,
         output_bam_prefix = unmapped_bam_basename + ".readgroup",
-        disk_size = mapped_bam_size + additional_disk,
         preemptible_tries = preemptible_tries
     }
   }
@@ -188,26 +159,22 @@ workflow to_bam_workflow {
       input_bams = output_aligned_bam,
       output_bam_basename = base_file_name + ".aligned.unsorted.duplicates_marked",
       metrics_filename = base_file_name + ".duplicate_metrics",
-      # The merged bam will be smaller than the sum of the parts so we need to account for the unmerged inputs
-      # and the merged output.
-      disk_size = (md_disk_multiplier * SumFloats.total_size) + additional_disk,
+      total_input_size = SumFloats.total_size,
       compression_level = compression_level,
       preemptible_tries = agg_preemptible_tries
   }
 
-  Float agg_bam_size = size(MarkDuplicates.output_bam, "GB")
-
   # Sort aggregated+deduped BAM file and fix tags
   call Processing.SortSam as SortSampleBam {
     input:
       input_bam = MarkDuplicates.output_bam,
       output_bam_basename = base_file_name + ".aligned.duplicate_marked.sorted",
-      # This task spills to disk so we need space for the input bam, the output bam, and any spillage.
-      disk_size = (sort_sam_disk_multiplier * agg_bam_size) + additional_disk,
       compression_level = compression_level,
       preemptible_tries = agg_preemptible_tries
   }
 
+  Float agg_bam_size = size(SortSampleBam.output_bam, "GB")
+
   if (defined(haplotype_database_file)) {
     # Check identity of fingerprints across readgroups
     call QC.CrossCheckFingerprints as CrossCheckFingerprints {
@@ -216,7 +183,7 @@ workflow to_bam_workflow {
         input_bam_indexes = SortSampleBam.output_bam_index,
         haplotype_database_file = haplotype_database_file,
         metrics_filename = base_file_name + ".crosscheck",
-        disk_size = agg_bam_size + additional_disk,
+        total_input_size = agg_bam_size,
         preemptible_tries = agg_preemptible_tries
     }
   }
@@ -239,7 +206,6 @@ workflow to_bam_workflow {
       ref_fasta = ref_fasta,
       ref_fasta_index = ref_fasta_index,
       output_prefix = base_file_name + ".preBqsr",
-      disk_size = agg_bam_size + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries,
       contamination_underestimation_factor = 0.75
   }
@@ -266,8 +232,7 @@ workflow to_bam_workflow {
         ref_dict = ref_dict,
         ref_fasta = ref_fasta,
         ref_fasta_index = ref_fasta_index,
-        # We need disk to localize the sharded bam due to the scatter.
-        disk_size = (agg_bam_size / bqsr_divisor) + ref_size + dbsnp_size + additional_disk,
+        bqsr_scatter = bqsr_divisor,
         preemptible_tries = agg_preemptible_tries
     }
   }
@@ -278,7 +243,6 @@ workflow to_bam_workflow {
     input:
       input_bqsr_reports = BaseRecalibrator.recalibration_report,
       output_report_filename = base_file_name + ".recal_data.csv",
-      disk_size = additional_disk,
       preemptible_tries = preemptible_tries
   }
 
@@ -293,27 +257,22 @@ workflow to_bam_workflow {
         ref_dict = ref_dict,
         ref_fasta = ref_fasta,
         ref_fasta_index = ref_fasta_index,
-        # We need disk to localize the sharded bam and the sharded output due to the scatter.
-        disk_size = ((agg_bam_size * 3) / bqsr_divisor) + ref_size + additional_disk,
+        bqsr_scatter = bqsr_divisor,
         compression_level = compression_level,
         preemptible_tries = agg_preemptible_tries
     }
   }
 
   # Merge the recalibrated BAM files resulting from by-interval recalibration
-  call Processing.GatherBamFiles as GatherBamFiles {
+  call Processing.GatherSortedBamFiles as GatherBamFiles {
     input:
       input_bams = ApplyBQSR.recalibrated_bam,
       output_bam_basename = base_file_name,
-      # Multiply the input bam size by two to account for the input and output
-      disk_size = (2 * agg_bam_size) + additional_disk,
+      total_input_size = agg_bam_size,
       compression_level = compression_level,
       preemptible_tries = agg_preemptible_tries
   }
 
-  #BQSR bins the qualities which makes a significantly smaller bam
-  Float binned_qual_bam_size = size(GatherBamFiles.output_bam, "GB")
-
   # QC the final BAM (consolidated after scattered BQSR)
   call QC.CollectReadgroupBamQualityMetrics as CollectReadgroupBamQualityMetrics {
     input:
@@ -323,7 +282,6 @@ workflow to_bam_workflow {
       ref_dict = ref_dict,
       ref_fasta = ref_fasta,
       ref_fasta_index = ref_fasta_index,
-      disk_size = binned_qual_bam_size + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -336,7 +294,6 @@ workflow to_bam_workflow {
       ref_dict = ref_dict,
       ref_fasta = ref_fasta,
       ref_fasta_index = ref_fasta_index,
-      disk_size = binned_qual_bam_size + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -351,7 +308,6 @@ workflow to_bam_workflow {
         genotypes_index = fingerprint_genotypes_index,
         output_basename = base_file_name,
         sample = sample_name,
-        disk_size = binned_qual_bam_size + additional_disk,
         preemptible_tries = agg_preemptible_tries
     }
   }
@@ -366,7 +322,6 @@ workflow to_bam_workflow {
       ref_fasta_index = ref_fasta_index,
       wgs_coverage_interval_list = wgs_coverage_interval_list,
       read_length = read_length,
-      disk_size = binned_qual_bam_size + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -380,7 +335,6 @@ workflow to_bam_workflow {
       ref_fasta_index = ref_fasta_index,
       wgs_coverage_interval_list = wgs_coverage_interval_list,
       read_length = read_length,
-      disk_size = binned_qual_bam_size + ref_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
@@ -390,7 +344,6 @@ workflow to_bam_workflow {
       input_bam = GatherBamFiles.output_bam,
       input_bam_index = GatherBamFiles.output_bam_index,
       read_group_md5_filename = recalibrated_bam_basename + ".bam.read_group_md5",
-      disk_size = binned_qual_bam_size + additional_disk,
       preemptible_tries = agg_preemptible_tries
   }
 
diff --git a/tasks_pipelines/utilities.wdl b/tasks_pipelines/utilities.wdl
index d2136aa..7845821 100644
--- a/tasks_pipelines/utilities.wdl
+++ b/tasks_pipelines/utilities.wdl
@@ -105,7 +105,7 @@ task ScatterIntervalList {
     Int interval_count = read_int(stdout())
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     memory: "2 GB"
   }
 }
@@ -117,9 +117,11 @@ task ConvertToCram {
   File ref_fasta
   File ref_fasta_index
   String output_basename
-  Float disk_size
   Int preemptible_tries
 
+  Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB")
+  Int disk_size = ceil(2 * size(input_bam, "GB") + ref_size) + 20
+
   command <<<
     set -e
     set -o pipefail
@@ -136,11 +138,11 @@ task ConvertToCram {
     samtools index ${output_basename}.cram
   >>>
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: preemptible_tries
     memory: "3 GB"
     cpu: "1"
-    disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
+    disks: "local-disk " + disk_size + " HDD"
   }
   output {
     File output_cram = "${output_basename}.cram"
@@ -165,7 +167,7 @@ task ConvertToBam {
     samtools index ${output_basename}.bam
   >>>
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"
     preemptible: 3
     memory: "3 GB"
     cpu: "1"
@@ -193,4 +195,3 @@ task SumFloats {
     preemptible: preemptible_tries
   }
 }
-