-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsbx_coassembly.smk
144 lines (124 loc) · 4.34 KB
/
sbx_coassembly.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import yaml
COASSEMBLY_FP = ASSEMBLY_FP / "coassembly"
try:
BENCHMARK_FP
except NameError:
BENCHMARK_FP = Cfg["all"]["output_fp"] / "benchmarks"
try:
LOG_FP
except NameError:
LOG_FP = Cfg["all"]["output_fp"] / "logs"
def get_coassembly_ext_path() -> Path:
ext_path = Path(sunbeam_dir) / "extensions" / "sbx_coassembly"
if ext_path.exists():
return ext_path
raise Error(
"Filepath for assembly not found, are you sure it's installed under extensions/sbx_coassembly?"
)
SBX_COASSEMBLY_VERSION = open(get_coassembly_ext_path() / "VERSION").read().strip()
def zip3l(l1, l2, l3):
return list(zip(l1, l2, l3))
def coassembly_groups(fp, sample_list):
if fp == "":
K = ["all"] * (len(sample_list) * 2)
V = list(sorted(sample_list)) * 2
R = [1] * len(sample_list) + [2] * len(sample_list)
return [K, V, R]
groups = yaml.safe_load(open(str(fp)).read())
sorted_keys = sorted(groups.keys())
K = [] # group
V = [] # sample
for k in sorted_keys:
K += [k] * len(groups[k])
V += groups[k]
R = [1] * len(V) + [2] * len(V)
return [K + K, V + V, R]
localrules:
all_coassemble,
rule all_coassemble:
input:
a=expand(
str(COASSEMBLY_FP / "{group}_final_contigs.fa"),
group=list(
set(
coassembly_groups(
Cfg["sbx_coassembly"]["group_file"], Samples.keys()
)[0]
)
),
),
b=expand(
str(COASSEMBLY_FP / "agglomerate" / "{sample}_{group}_{rp}.fastq"),
zip3l,
group=coassembly_groups(
Cfg["sbx_coassembly"]["group_file"], Samples.keys()
)[0],
sample=coassembly_groups(
Cfg["sbx_coassembly"]["group_file"], Samples.keys()
)[1],
rp=coassembly_groups(Cfg["sbx_coassembly"]["group_file"], Samples.keys())[
2
],
),
rule prep_samples_for_concatenation_paired:
input:
r1=str(QC_FP / "decontam" / "{sample}_1.fastq.gz"),
r2=str(QC_FP / "decontam" / "{sample}_2.fastq.gz"),
output:
r1=temp(str(COASSEMBLY_FP / "agglomerate" / "{sample}_{group}_1.fastq")),
r2=temp(str(COASSEMBLY_FP / "agglomerate" / "{sample}_{group}_2.fastq")),
benchmark:
BENCHMARK_FP / "prep_samples_for_concatenation_paired_{sample}_{group}.tsv"
log:
LOG_FP / "prep_samples_for_concatenation_paired_{sample}_{group}.log",
threads: Cfg["sbx_coassembly"]["threads"]
conda:
"envs/sbx_coassembly_env.yml"
container:
f"docker://sunbeamlabs/sbx_coassembly:{SBX_COASSEMBLY_VERSION}"
shell:
"""
pigz -d -p {threads} -c {input.r1} > {output.r1}
pigz -d -p {threads} -c {input.r2} > {output.r2}
"""
rule combine_groups_paired:
input:
rules.all_coassemble.input.b,
output:
r1=str(COASSEMBLY_FP / "fastq" / "{group}_1.fastq.gz"),
r2=str(COASSEMBLY_FP / "fastq" / "{group}_2.fastq.gz"),
params:
w1=str(str(COASSEMBLY_FP / "agglomerate") + str("/*{group}_1.fastq")),
w2=str(str(COASSEMBLY_FP / "agglomerate") + str("/*{group}_2.fastq")),
threads: Cfg["sbx_coassembly"]["threads"]
conda:
"envs/sbx_coassembly_env.yml"
container:
f"docker://sunbeamlabs/sbx_coassembly:{SBX_COASSEMBLY_VERSION}"
shell:
"""
cat {params.w1} | pigz -p {threads} > {output.r1}
cat {params.w2} | pigz -p {threads} > {output.r2}
"""
rule coassemble_paired:
input:
r1=str(COASSEMBLY_FP / "fastq" / "{group}_1.fastq.gz"),
r2=str(COASSEMBLY_FP / "fastq" / "{group}_2.fastq.gz"),
output:
str(COASSEMBLY_FP / "{group}_final_contigs.fa"),
benchmark:
BENCHMARK_FP / "coassemble_paired_{group}.tsv"
log:
LOG_FP / "coassemble_paired_{group}.log",
params:
assembly_dir=str(COASSEMBLY_FP / "{group}"),
threads: Cfg["sbx_coassembly"]["threads"]
conda:
"envs/sbx_coassembly_env.yml"
container:
f"docker://sunbeamlabs/sbx_coassembly:{SBX_COASSEMBLY_VERSION}"
shell:
"""
megahit -1 {input.r1} -2 {input.r2} -t {threads} -o {params.assembly_dir} 2>&1 | tee {log}
mv {params.assembly_dir}/final.contigs.fa {output}
"""