-
Notifications
You must be signed in to change notification settings - Fork 2
/
build-celltype-ref.nf
172 lines (154 loc) · 4.62 KB
/
build-celltype-ref.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
process save_singler_refs {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.singler_references_dir}"
label 'mem_8'
input:
tuple val(ref_name), val(ref_source)
output:
tuple val(ref_name), path("${ref_name}_${ref_source}_*.rds")
script:
"""
save_singler_refs.R \
--ref_name ${ref_name} \
--ref_file_prefix "${ref_name}_${ref_source}"
"""
stub:
// fill in a dummy version since we grab that as part of the script
"""
touch "${ref_name}_${ref_source}_0-0-0.rds"
"""
}
process train_singler_models {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.singler_models_dir}"
label 'cpus_4'
label 'mem_16'
input:
tuple val(ref_name), path(ref_file)
path t2g_3col_path
output:
path celltype_model
script:
ref_file_basename = file("${ref_file}").baseName
celltype_model = "${ref_file_basename}_model.rds"
"""
train_SingleR.R \
--ref_file ${ref_file} \
--output_file ${celltype_model} \
--fry_tx2gene ${t2g_3col_path} \
--label_name ${params.singler_label_name} \
--seed ${params.seed} \
--threads ${task.cpus}
"""
stub:
ref_file_basename = file("${ref_file}").baseName
celltype_model = "${ref_file_basename}_model.rds"
"""
touch ${celltype_model}
"""
}
process catalog_singler_models {
container params.TIDYVERSE_CONTAINER
publishDir "${params.singler_models_dir}"
input:
val celltype_references
output:
path "singler_models.tsv"
script:
"""
make_celltype_ref_table.R "${celltype_references}" singler_models.tsv
"""
stub:
"""
touch singler_models.tsv
"""
}
process generate_cellassign_refs {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.cellassign_ref_dir}"
label 'mem_8'
input:
tuple val(ref_name), val(ref_source), val(organs)
path ref_gtf
path marker_gene_file
output:
path ref_file
script:
// get ref version from filename
// this requires the date stored in the filename to be in ISO8601 format
ref_version = (marker_gene_file =~ /.+(20[0-9]{2}\-[0-9]{2}\-[0-9]{2}).tsv/)[0][1]
ref_file = "${ref_name}_${ref_source}_${ref_version}.tsv"
"""
generate_cellassign_refs.R \
--organs "${organs}" \
--marker_gene_file ${marker_gene_file} \
--gtf_file ${ref_gtf} \
--ref_mtx_file ${ref_file}
"""
stub:
ref_version = (marker_gene_file =~ /.+(20[0-9]{2}\-[0-9]{2}\-[0-9]{2}).tsv/)[0][1]
ref_file = "${ref_name}_${ref_source}_${ref_version}.tsv"
"""
touch ${ref_file}
"""
}
process catalog_cellassign_refs {
container params.TIDYVERSE_CONTAINER
publishDir "${params.cellassign_ref_dir}"
input:
val celltype_references
output:
path "cellassign_references.tsv"
script:
"""
make_celltype_ref_table.R "${celltype_references}" cellassign_references.tsv
"""
stub:
"""
touch cellassign_references.tsv
"""
}
workflow build_celltype_ref {
// read in json file with all reference paths
ref_paths = Utils.getMetaVal(file(params.ref_json), params.celltype_organism)
// get path to tx2gene and gtf
t2g_3col_path = file("${params.ref_rootdir}/${ref_paths["t2g_3col_path"]}")
ref_gtf = file("${params.ref_rootdir}/${ref_paths["ref_gtf"]}")
// create channel of cell type ref files and names
celltype_refs_ch = Channel.fromPath(params.celltype_ref_metadata)
.splitCsv(header: true, sep: '\t')
.branch{
singler: it.celltype_method == "SingleR"
cellassign: it.celltype_method == "CellAssign"
}
// singler refs to download and train
singler_refs_ch = celltype_refs_ch.singler
.map{[
ref_name: it.celltype_ref_name,
ref_source: it.celltype_ref_source
]}
// download and save reference files
save_singler_refs(singler_refs_ch)
// train cell type references using SingleR
train_singler_models(save_singler_refs.out, t2g_3col_path)
// join model file names into a comma separated string
singler_models = train_singler_models.out.reduce{a, b -> "$a,$b"}
catalog_singler_models(singler_models)
// cellassign refs
cellassign_refs_ch = celltype_refs_ch.cellassign
// create a channel with ref_name, source, organs
.map{[
ref_name: it.celltype_ref_name,
ref_source: it.celltype_ref_source,
organs: it.organs
]}
generate_cellassign_refs(cellassign_refs_ch, ref_gtf, params.panglao_marker_genes_file)
// join reference file names into a comma separated string
cellassign_refs = generate_cellassign_refs.out.reduce{a, b -> "$a,$b"}
catalog_cellassign_refs(cellassign_refs)
}
workflow {
build_celltype_ref()
}