-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy patheval_variant_custom.smk
92 lines (80 loc) · 3.24 KB
/
eval_variant_custom.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
include: "rules/load_config_custom.smk"
try:
vcfs = list(map(str.strip, config["vcfs"].split(",")))
except AttributeError as e:
raise PathNotGiven(
"The VCF files from SNP calling are not specified.")
snp_dir = results_dir + "/snp"
snpcall_dir = snp_dir + "/callers"
g1_name, g2_name = (os.path.splitext(os.path.basename(ref))[0] for ref in refs)
gdiff_name = g1_name + "_" + g2_name
labels = config['labels']
novenn = config['novenn']
callers = [os.path.splitext(os.path.basename(vcf))[0] for vcf in vcfs] if labels is None else [label for label in labels.split(',')]
caller_vcf_dict = dict(zip(callers, vcfs))
# def get_vcf(wc):
# return caller_vcf_dict[wc.snpcaller]
# for vcf_file in vcfs:
# if vcf_file.endswith(wc.snpcaller + ".vcf"):
# return vcf_file
rule all:
input:
snp_benchmark_figure = results_dir + "/final_figures/snpcall_benchmark.pdf",
# snp_venn_figure = results_dir + "/final_figures/snpcall_venn.pdf",
snp_benchmark_table = results_dir + "/final_tables/snpcall_benchmark.txt"
# The first given ref should be the ref used to generate the VCFs
rule gdiff:
input:
refs
output:
delta = snp_dir + "/nucmer/" + gdiff_name + ".delta",
snps = snp_dir + "/nucmer/" + gdiff_name + ".maskrepeat.snps"
# mask_repeat_variants = snp_dir + "/nucmer/" + gdiff_name + ".maskrepeat.variants",
# variants_vcf = snp_dir + "/nucmer/" + gdiff_name + ".maskrepeat.variants.vcf",
# variants_vcf_bgz = snp_dir + "/nucmer/" + gdiff_name + ".maskrepeat.variants.vcf.gz"
conda:
"config/conda_env.yaml"
params:
genome_diff_prefix = snp_dir + "/nucmer/" + gdiff_name
shell:
"""
nucmer --prefix={params.genome_diff_prefix} {input}
show-snps -CTHIlr <(delta-filter -r -q {output.delta}) > {output.snps}
"""
# python3 program/mummer2vcf.py -s {output.mask_repeat_variants} --output-header -n -g {input[0]} > \
# {output.variants_vcf}
# bgzip -c {output.variants_vcf} > {output.variants_vcf_bgz}
# tabix -p vcf {output.variants_vcf_bgz}
rule extract_TP:
input:
vcf = lambda wc: caller_vcf_dict[wc.snpcaller],
genome_diff = rules.gdiff.output.snps
output:
filtered = snpcall_dir + "/{snpcaller}.filtered.vcf",
fp = snpcall_dir + "/fp/{snpcaller}.fp.vcf"
params:
data = "custom",
outdir = snpcall_dir
conda:
"config/conda_env.yaml"
threads: threads
shell:
"""
python program/extract_TP_FP_SNPs.py {input.vcf} {input.genome_diff} {params.data} {params.outdir} {wildcards.snpcaller}
"""
rule snp_benchmark:
input:
vcfs = expand(snpcall_dir + "/{snpcaller}.filtered.vcf",
snpcaller=callers),
genome_diff = rules.gdiff.output.snps
output:
snp_benchmark_table = results_dir + "/final_tables/snpcall_benchmark.txt",
snp_benchmark_figure = results_dir + "/final_figures/snpcall_benchmark.pdf"
params:
callers = callers,
novenn = novenn,
snp_venn_figure = results_dir + "/final_figures/snpcall_venn.pdf"
conda:
"config/conda_env.yaml"
script:
"scripts/custom_snp_benchmark.R"