Skip to content

Commit

Permalink
feat: add further plots to compare across models and missingness
Browse files Browse the repository at this point in the history
  • Loading branch information
dlaehnemann committed Nov 20, 2024
1 parent 70cd27d commit e8cebab
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 1 deletion.
3 changes: 2 additions & 1 deletion workflow/envs/ggtree.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ channels:
- nodefaults
dependencies:
- bioconductor-ggtree =3.8
- r-tidyverse =2.0
- r-tidyverse =2.0
- r-hexbin =1.28
3 changes: 3 additions & 0 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ def get_final_output():
expand(
[
"results/trees/{ind}.max_missing_stable_topology_selection.pdf",
"results/trees/{ind}.raxml.support_across_missingness.pdf",
"results/trees/{ind}.raxml.support_vs_branch_length.full_data.pdf",
"results/trees/{ind}.raxml.support_vs_branch_length.summary.pdf",
"results/trees/{ind}/{model}/max_{max_missing}_missing/{ind}.{model}.max_{max_missing}_missing.raxml.support.pdf",
],
ind=individual,
Expand Down
35 changes: 35 additions & 0 deletions workflow/rules/phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,41 @@ rule plot_support_tree:
"../scripts/plot_support_tree.R"


rule plot_support_values_across_missingness:
input:
support_trees=expand(
"results/raxml_ng/{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.support.raxml.support",
model=lookup("raxml_ng/models", within=config),
n_missing_cells=lookup(dpath="raxml_ng/max_missing", within=config)
)
output:
support_plot="results/trees/{individual}.raxml.support_across_missingness.pdf",
log:
"logs/trees/{individual}.raxml.support_across_missingness_plot.log",
conda:
"../envs/ggtree.yaml"
script:
"../scripts/plot_bootstrap_support_values_across_missingness.R"


rule plot_support_values_vs_branch_lengths:
input:
support_trees=expand(
"results/raxml_ng/{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.support.raxml.support",
model=lookup("raxml_ng/models", within=config),
n_missing_cells=lookup(dpath="raxml_ng/max_missing", within=config)
)
output:
data_plot="results/trees/{individual}.raxml.support_vs_branch_length.full_data.pdf",
summary_plot="results/trees/{individual}.raxml.support_vs_branch_length.summary.pdf",
log:
"logs/trees/{individual}.raxml.support_values_vs_branch_lengths.log",
conda:
"../envs/ggtree.yaml"
script:
"../scripts/plot_bootstrap_support_values_vs_branch_lengths.R"


rule raxml_ng_ancestral:
input:
msa="results/raxml_ng/input/{individual}.ml_gt_and_likelihoods.catg",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")


library("treeio")
library("tidyverse")

best_trees_support_values <- snakemake@input[["support_trees"]] |>
map(
\(filename)
read.newick(
filename,
node.label = "support"
) |>
as_tibble() |>
drop_na(support) |>
add_column(
max_missing = str_extract(
filename,
"max_(\\d+)_missing\\.support",
group = 1
),
model = str_extract(
filename,
"results/([^/]+)/max_",
group = 1
)
) |>
mutate(max_missing = as.integer(max_missing)) |>
select(support, max_missing, model)) |>
list_rbind()

support_across_missingness <- ggplot(
best_trees_support_values,
aes(x=factor(max_missing), y=support)
) +
geom_violin() +
geom_jitter(width=0.25) +
stat_summary(color="red") +
facet_grid(
cols = vars(model)
)

number_of_models <- best_trees_support_values |> distinct(model) |> count() |> pull(n)
plot_width = 2 + number_of_models * 5

ggsave(
filename = snakemake@output[["support_plot"]],
plot = support_across_missingness,
width = plot_width
)
148 changes: 148 additions & 0 deletions workflow/scripts/plot_bootstrap_support_values_vs_branch_lengths.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")


library("treeio")
library("tidyverse")
library("hexbin")

best_trees_support_vs_branch_length <- snakemake@input[["support_trees"]] |>
map(
\(filename)
read.newick(
filename,
node.label = "support"
) |>
as_tibble() |>
drop_na(
branch.length,
support
) |>
add_column(
max_missing = str_extract(
filename,
"max_(\\d+)_missing\\.support",
group = 1
),
model = str_extract(
filename,
"results/([^/]+)/max_",
group = 1
)
) |>
mutate(max_missing = as.integer(max_missing)) |>
select(
branch.length,
support,
max_missing,
model
)
) |>
list_rbind()

number_of_models <- best_trees_support_vs_branch_length |> distinct(model) |> count() |> pull(n)
plot_height = number_of_models * 3
plot_width = 2 + plot_height

data_plot <- ggplot(
data = best_trees_support_vs_branch_length,
mapping = aes(
x=branch.length,
y=support
)
) +
geom_hex(
bins = 10
) +
scale_fill_viridis_c() +
geom_point(
color = "orange",
alpha = .6
) +
facet_grid(
cols=vars(model),
rows=vars(max_missing)
) +
theme_bw() +
theme(
text = element_text(size=rel(5.5)),
# x-axis facet labels do not seem to inherit from text above
strip.text.x = element_text(size=rel(5.5)),
axis.text.x = element_text(
angle=45,
vjust=0.9,
hjust=0.9
)
)


ggsave(
filename = snakemake@output[["data_plot"]],
plot = data_plot,
width = plot_width,
height = plot_height
)


best_trees_2d_summary <- best_trees_support_vs_branch_length |>
group_by(model, max_missing) |>
summarise(
branch_length_summary = mean_se(branch.length),
support_summary = mean_se(support),
.groups = "drop"
) |>
unnest(
cols = c(
branch_length_summary,
support_summary
),
names_sep="_"
)

summary_plot <- ggplot(
data = best_trees_2d_summary,
mapping = aes(
x=branch_length_summary_y,
y=support_summary_y
)
) +
geom_point(
color="red"
) +
geom_errorbarh(
mapping = aes(
xmin=branch_length_summary_ymin,
xmax=branch_length_summary_ymax
),
color="red"
) +
geom_errorbar(
mapping = aes(
ymin=support_summary_ymin,
ymax=support_summary_ymax
),
color="red"
) +
facet_grid(
cols=vars(model),
rows=vars(max_missing)
) +
theme_bw() +
theme(
text = element_text(size=rel(5.5)),
# x-axis facet labels do not seem to inherit from text above
strip.text.x = element_text(size=rel(5.5)),
axis.text.x = element_text(
angle=45,
vjust=0.9,
hjust=0.9
)
)

ggsave(
filename = snakemake@output[["summary_plot"]],
plot = summary_plot,
width = plot_width,
height = plot_height
)

0 comments on commit e8cebab

Please sign in to comment.