Skip to content

Commit

Permalink
Support phasing with a reference panel (#93)
Browse files Browse the repository at this point in the history
* add haplotype_reference_panel and its index file to ShapeIt4 call
  • Loading branch information
jessicaway authored Jul 28, 2021
1 parent f846808 commit 9e10e8e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 11 deletions.
14 changes: 10 additions & 4 deletions pipelines/phasing-vector/gcp/Phasing.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,20 @@ workflow Phasing {
Array[File] genetic_maps
Array[File] interval_lists

File? haplotype_reference_panel
Array[File]? haplotype_reference_panels
Array[File]? haplotype_reference_panel_indices

ReferenceSequence reference
RunTimeSettings runTimeSettings
}

# TODO: extract sample_id, sample_bam, and sample_vcf information from the maifest file (or inputs)
# This is a wdl hack to create a pseudo None
if (false) {
File? none = "None"
}

# Step 1: Read-backed phasing
# Scatter over chromosomes
Expand All @@ -54,7 +60,7 @@ workflow Phasing {
input_bam = input_bams[idx],
input_bam_index = input_bam_indices[idx],
sample_zarr = sample_zarrs[idx],
# sample_vcf = sample_vcfs[idx],
# sample_vcf = sample_vcfs[idx],
called_sites_zarr = called_sites_zarr,
phased_sites_zarr = phased_sites_zarr,
contig = chromosome,
Expand All @@ -72,9 +78,9 @@ workflow Phasing {
phased_sample_vcf_indices = ReadBackedPhasing.phased_sample_vcf_index,
contig = chromosome,
genetic_map = genetic_map,
haplotype_reference_panel = haplotype_reference_panel,
haplotype_reference_panel = if defined(haplotype_reference_panels) then select_first([haplotype_reference_panels])[chr_idx] else none,
haplotype_reference_panel_index = if defined(haplotype_reference_panel_indices) then select_first([haplotype_reference_panel_indices])[chr_idx] else none,
interval_list = interval_list,
reference = reference,
runTimeSettings = runTimeSettings
}
}
Expand Down
7 changes: 3 additions & 4 deletions pipelines/phasing-vector/gcp/StatisticalPhasing.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ version 1.0
##
import "../../../structs/gcp/RunTimeSettings.wdl"
import "../../../structs/ReferenceSequence.wdl"
import "../../../tasks/gcp/Tasks.wdl" as Tasks
import "../../../tasks/gcp/StatisticalPhasingTasks.wdl" as StatisticalPhasingTasks

Expand All @@ -22,10 +21,9 @@ workflow StatisticalPhasing {
File genetic_map
File interval_list

#TODO - plug in haplotype_reference_panel
File? haplotype_reference_panel
File? haplotype_reference_panel_index

ReferenceSequence reference
RunTimeSettings runTimeSettings
}

Expand Down Expand Up @@ -55,7 +53,8 @@ workflow StatisticalPhasing {
project_id = project_id,
region = region,
genetic_map = genetic_map,
reference = reference,
haplotype_reference_panel = haplotype_reference_panel,
haplotype_reference_panel_index = haplotype_reference_panel_index,
runTimeSettings = runTimeSettings
}
call Tasks.Tabix as Tabix {
Expand Down
7 changes: 4 additions & 3 deletions tasks/gcp/StatisticalPhasingTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ task ShapeIt4 {
Int num_threads = num_cpu
String mcmc_iterations = "5b,1p,1b,1p,1b,1p,5m"
Int pbwt_depth = 4
# TODO - how to handle refence as an option
ReferenceSequence? reference
File? haplotype_reference_panel
File? haplotype_reference_panel_index

String docker_tag = "us.gcr.io/broad-gotc-prod/malariagen/shapeit4:4.2.1"
# Compute Engine always stops preemptible instances after they run for 24 hours
Expand Down Expand Up @@ -77,7 +77,8 @@ task ShapeIt4 {
--sequencing \
--use-PS 0.0001 \
--log phased.log \
--output ~{output_filename}
--output ~{output_filename} \
~{"--reference " + haplotype_reference_panel}
}

runtime {
Expand Down

0 comments on commit 9e10e8e

Please sign in to comment.