-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile_report
109 lines (96 loc) · 4.71 KB
/
Snakefile_report
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
include: "Snakefile"
def mutations_to_plot(v):
mutations = {('h3n2', 'ha'):["HA1:135K", "HA1:135N", "HA1:212T", "HA1:128A", "HA1:214T"],
('h3n2', 'na'):["NA:329S", "NA:386S", "NA:126L"],
('h1n1pdm', 'ha'):["HA1:164T","HA1:183P", "HA1:235D", "HA1:233I"],
('h1n1pdm', 'na'):["NA:77A","NA:93H", "NA:416N"],
}
return mutations[(v.lineage, v.segment)]
def region_translations(w):
genes = gene_names(w)
return ["results/full-aaseq-seasonal-%s_%s_%s_%s_%s.fasta"%(g, w.region, w.lineage, w.segment, w.resolution)
for g in genes]
regions_to_graph = ['north_america', 'europe', 'china']
for seg, genes in genes_to_translate.items():
rule:
input:
metadata = rules.parse.output.metadata,
sequences = 'results/sequences_{lineage}_{segment}.fasta', #.format(seg=seg),
exclude = files.outliers,
reference = "config/{lineage}_{segment}_outgroup.gb" #.format(seg=seg)
params:
genes=genes,
region="{region}"
output:
alignments = expand("results/full-aaseq-seasonal-{gene}_{{region}}_{{lineage}}_{{segment}}_{{resolution}}.fasta",
gene=genes)
shell:
"""
python scripts/full_region_alignments.py --sequences {input.sequences}\
--metadata {input.metadata} \
--exclude {input.exclude} \
--genes {params.genes} \
--region {params.region} \
--reference {input.reference} \
--output {output.alignments}
"""
rule complete_mutation_frequencies:
input:
metadata = rules.parse.output.metadata,
alignment = region_translations
params:
genes = gene_names,
min_date = min_date,
max_date = max_date,
min_freq = 0.01,
pivots_per_year = pivots_per_year
output:
mut_freq = "results/mutation_frequencies_{region}_{lineage}_{segment}_{resolution}.json"
shell:
"""
augur frequencies --alignments {input.alignment} \
--metadata {input.metadata} \
--gene-names {params.genes} \
--pivots-per-year {params.pivots_per_year} \
--min-date {params.min_date} \
--max-date {params.max_date} \
--minimal-frequency {params.min_freq} \
--output {output.mut_freq}
"""
rule frequency_graphs:
input:
mutations = expand("results/mutation_frequencies_{region}_{{lineage}}_{{segment}}_{{resolution}}.json",
region=regions_to_graph),
tree = "results/tree_frequencies_{lineage}_{segment}_{resolution}.json"
params:
mutations = mutations_to_plot,
regions = regions_to_graph
output:
mutations = "figures/mutation_frequencies_{lineage}_{segment}_{resolution}.png",
total_counts = "figures/total-sample-count_{lineage}_{segment}_{resolution}.png",
tree_counts = "figures/tree-sample-count_{lineage}_{segment}_{resolution}.png"
shell:
"""
python scripts/graph_frequencies.py --mutation-frequencies {input.mutations} \
--tree-frequencies {input.tree} \
--mutations {params.mutations} \
--regions {params.regions} \
--output-mutations {output.mutations} \
--output-total-counts {output.total_counts} \
--output-tree-counts {output.tree_counts}
"""
rule mutation_statistics:
input:
mutations = "results/mutation_frequencies_{region}_{lineage}_{segment}_{resolution}.json",
node_data = "results/aamuts_seasonal_{lineage}_{segment}_{resolution}.json"
params:
offset = 4,
n_out=20
output:
rising = "results/rising_mutations_{region}_{lineage}_{segment}_{resolution}.txt",
recurring_mut = "results/recurring_mutations_{region}_{lineage}_{segment}_{resolution}.txt",
recurring_pos = "results/recurring_positions_{region}_{lineage}_{segment}_{resolution}.txt"
run:
from scripts.mutation_statistics import rising_mutations, recurring_mutations
rising_mutations(input.mutations, offset=params.offset, fname=output.rising, n_out=params.n_out)
recurring_mutations(input.node_data, fname_by_position=output.recurring_pos, fname_by_mutation=output.recurring_mut, n_out=params.n_out)