-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_association_test_on_subsets_workflow.wdl
154 lines (117 loc) · 5.17 KB
/
run_association_test_on_subsets_workflow.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
version development
# import "https://raw.githubusercontent.com/shukwong/gwas-pipelines/master/tasks/preprocess_workflow.wdl" as preprocess
# import "https://raw.githubusercontent.com/shukwong/gwas-pipelines/master/tasks/saige_workflow.wdl" as saige
# import "https://raw.githubusercontent.com/shukwong/gwas-pipelines/master/tasks/bolt_workflow.wdl" as bolt
import "run_meta_analysis_workflow.wdl" as meta_analysis
workflow gwas_subsets {
input {
#File genotype_bed
#File genotype_bim
#File genotype_fam
#File genotype_samples_to_keep_file
#File imputed_samples_to_keep_file
File batch_tsv_file
File covariate_tsv_file
File variable_info_tsv_file
File sample_sets_json_file
# String phenoCol
# String covar_sampleID_colname
Boolean? useBOLT
Boolean? useSAIGE
#for bolt
File? genetic_map_file
File? ld_scores_file
Float? minMAF=0.001
Float? minMAC=1
File? chain_file
String? id_delim #delim character for vcf file, if not defined, double ID is assumed
String? dosageField
}
#read subset information
call get_covar_subsets {
input:
covariate_tsv_file = covariate_tsv_file,
variable_info_tsv_file = variable_info_tsv_file,
sample_sets_json_file = sample_sets_json_file
}
Array[String] binary_covar_list_lines = read_lines(get_covar_subsets.binary_covar_list_file)
String binary_covar_list = binary_covar_list_lines[0]
Array[String] continuous_covar_list_lines = read_lines(get_covar_subsets.continuous_covar_list_file)
String continuous_covar_list = continuous_covar_list_lines[0]
Array[String] phenoCol_lines = read_lines(get_covar_subsets.phenotype_line_file)
String phenoCol = phenoCol_lines[0]
Array[String] covar_sampleID_colname_lines = read_lines(get_covar_subsets.sampleid_line_file)
String covar_sampleID_colname = covar_sampleID_colname_lines[0]
Array[String] phenotype_type_lines = read_lines(get_covar_subsets.phenotype_type_file)
String phenotype_type = phenotype_type_lines[0]
scatter (covar_subset_file in get_covar_subsets.covar_subsets_files) {
String setname = basename(covar_subset_file, "_covars.tsv")
call meta_analysis.run_meta_analysis {
input:
batch_tsv_file = batch_tsv_file,
covariate_tsv_file = covar_subset_file,
variable_info_tsv_file = variable_info_tsv_file,
binary_covar_list = binary_covar_list,
continuous_covar_list = continuous_covar_list,
phenoCol = phenoCol,
covar_sampleID_colname = covar_sampleID_colname,
phenotype_type = phenotype_type,
setname = setname,
useBOLT = useBOLT,
useSAIGE = useSAIGE,
genetic_map_file = genetic_map_file,
ld_scores_file = ld_scores_file,
minMAF = minMAF,
minMAC = minMAC,
chain_file = chain_file,
id_delim = id_delim,
dosageField = dosageField
}
}
output {
Array[File?] bolt_output_file_list = run_meta_analysis.bolt_metal_output_file
Array[File?] bolt_metal_manhattan_file_list = run_meta_analysis.bolt_metal_manhattan_file
Array[File?] bolt_metal_qqplot_file_list = run_meta_analysis.bolt_metal_qqplot_file
Array[File?] saige_output_file_list = run_meta_analysis.saige_metal_output_file
Array[File?] saige_metal_manhattan_file_list = run_meta_analysis.saige_metal_manhattan_file
Array[File?] saige_metal_qqplot_file_list = run_meta_analysis.saige_metal_qqplot_file
}
meta {
author : "Wendy Wong"
email : "[email protected]"
description : "Biobank scale association study with mutiple subsets (sample target variable)."
}
}
task get_covar_subsets {
input {
File covariate_tsv_file
File variable_info_tsv_file
File sample_sets_json_file
Int? memory = 4
Int? disk = 200
Int? threads = 1
Int? preemptible_tries = 3
}
#TODO, change this to git clone a release version when the pipeline is finalized
command <<<
wget https://github.com/shukwong/gwas-pipelines/raw/master/scripts/create_covar_files_by_set.R
R --vanilla -e 'install.packages("fastDummies",repos = "https://cloud.r-project.org/")'
Rscript create_covar_files_by_set.R ~{covariate_tsv_file} ~{variable_info_tsv_file} ~{sample_sets_json_file}
>>>
runtime {
docker: "rocker/tidyverse:4.0.0"
memory: memory + " GiB"
disks: "local-disk " + disk + " HDD"
cpu: threads
preemptible: preemptible_tries
}
output {
Array[File] covar_subsets_files = glob("*_covars.tsv")
Array[File] covar_subsets_log_files = glob("*.log")
File binary_covar_list_file = "binary_covar_list.txt"
File continuous_covar_list_file = "continuous_covar_list.txt"
File phenotype_line_file = "phenotype_line.txt"
File phenotype_type_file = "phenotype_type.txt"
File sampleid_line_file = "sampleid_line.txt"
}
}