diff --git a/experimental/GraphicalModels/README.md b/experimental/GraphicalModels/README.md new file mode 100644 index 000000000000..91cd0ee3dba2 --- /dev/null +++ b/experimental/GraphicalModels/README.md @@ -0,0 +1,13 @@ +# An example template for the experimental section + +## Aims + +This is an example for a file structure to set up a new package +in the experimental section. All files you find here are part of the +minimum requirements. See also the official Oscar documentation. + +## Status + +We plan to also provide a function to automatically copy this template +for you to start your own package. + diff --git a/experimental/GraphicalModels/docs/doc.main b/experimental/GraphicalModels/docs/doc.main new file mode 100644 index 000000000000..270b68367810 --- /dev/null +++ b/experimental/GraphicalModels/docs/doc.main @@ -0,0 +1,5 @@ +[ + "GraphicalModels" => [ + "graphicalmodels.md", + ] +] diff --git a/experimental/GraphicalModels/docs/src/graphicalmodels.md b/experimental/GraphicalModels/docs/src/graphicalmodels.md new file mode 100644 index 000000000000..43d59e63fc33 --- /dev/null +++ b/experimental/GraphicalModels/docs/src/graphicalmodels.md @@ -0,0 +1,67 @@ +```@meta +CurrentModule = Oscar +DocTestSetup = quote + using Oscar +end +``` +# Algebraic Phylogenetics + +The Algebraic Phylogenetics part of OSCAR provides functionality for +- specifying phylogenetic models +- parametrizing such models +- calculating their algebraic invariants + +## Models + +The five most common models in algebraic phylogenetics can automatically be specified by calling the below functions, each taking a tree as input and attaching transition matrices to its edges as defined by Jukes Cantor, Kimura, etc., respectively, returning a structure `PhylogeneticModel` or `GroupBasedPhylogeneticModel`. + +```@docs +cavender_farris_neyman_model(graph::Graph{Directed}) +jukes_cantor_model(graph::Graph{Directed}) +kimura2_model(graph::Graph{Directed}) +kimura3_model(graph::Graph{Directed}) +general_markov_model(graph::Graph{Directed}) +``` + +The models are by default in projective space. For their affine versions, call + +```@docs +affine_phylogenetic_model!(pm::PhylogeneticModel) +``` + +## Components of a model + +`PhylogeneticModel` specifies any phylogenetic tree model in probability coordinates and `GroupBasedPhylogeneticModel` can specify group-based, e.g. Fourier, coordinates. For any model, we can call its graph, transition matrices attached to the graph's edges, the number of states of each vertex random variable, and the corresponding polynomial rings. For instance for Jukes Cantor on the star with three leaves: + +```@docs +graph(pm::PhylogeneticModel) +transition_matrices(pm::PhylogeneticModel) +number_states(pm::PhylogeneticModel) +probability_ring(pm::PhylogeneticModel) +fourier_ring(pm::GroupBasedPhylogeneticModel) +fourier_parameters(pm::GroupBasedPhylogeneticModel) +group_of_model(pm::GroupBasedPhylogeneticModel) +``` + + +## Parametrization + +For each phylogenetic model, we can calculate the parametrization, a map from transition matrices to probabilities, parametrized in probability or Fourier coordinates. For group-based models, we can reparametrize between these and return the transformation matrix, and we can calculate equivalence classes of probabilities with the same parametrization. + +```@docs +probability_map(pm::PhylogeneticModel) +fourier_map(pm::GroupBasedPhylogeneticModel) +compute_equivalent_classes(parametrization::Dict{Tuple{Vararg{Int64}}, QQMPolyRingElem}) +sum_equivalent_classes(equivalent_classes::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) +specialized_fourier_transform(pm::GroupBasedPhylogeneticModel, p_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}, f_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) +inverse_specialized_fourier_transform(pm::GroupBasedPhylogeneticModel, p_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}, f_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) +``` + +## Contact + +Please direct questions about this part of OSCAR to the following people: +* [Marina Garrote López](https://sites.google.com/view/marinagarrotelopez), +* possibly others. + +You can ask questions in the [OSCAR Slack](https://www.oscar-system.org/community/#slack). +Alternatively, you can [raise an issue on github](https://www.oscar-system.org/community/#how-to-report-issues). diff --git a/experimental/GraphicalModels/src/GraphicalModels.jl b/experimental/GraphicalModels/src/GraphicalModels.jl new file mode 100644 index 000000000000..6fff1d0400b1 --- /dev/null +++ b/experimental/GraphicalModels/src/GraphicalModels.jl @@ -0,0 +1,38 @@ +include("PhylogeneticModels.jl") +include("PhylogeneticAuxiliary.jl") +include("PhylogeneticParametrization.jl") + +#export models +export cavender_farris_neyman_model +export jukes_cantor_model +export kimura2_model +export kimura3_model +export general_markov_model +export affine_phylogenetic_model! + +#export phylogenetic models attributes +export phylogenetic_model +export graph +export number_states +export transition_matrices +export probability_ring +export root_distribution +export fourier_parameters +export fourier_ring +export group_of_model + +#export probability and fourier map +export probability_map +export fourier_map + +# export functions to calculate equivalent classes +export compute_equivalent_classes +export sum_equivalent_classes + +# export transformation matrices +export specialized_fourier_transform +export inverse_specialized_fourier_transform + +# export structs for GroupBasedPhylogeneticModel,PhylogeneticModel +export PhylogeneticModel +export GroupBasedPhylogeneticModel diff --git a/experimental/GraphicalModels/src/PhylogeneticAuxiliary.jl b/experimental/GraphicalModels/src/PhylogeneticAuxiliary.jl new file mode 100644 index 000000000000..9cd9904c3be7 --- /dev/null +++ b/experimental/GraphicalModels/src/PhylogeneticAuxiliary.jl @@ -0,0 +1,89 @@ +########################## +#### GROUP OPERATIONS #### +########################## +# This function implements the group operation on Z2 x Z2 and is subsequently +# used to check the condition given in the computation of the Fourier +# coordinates and to calculate the sum of the leaves beneath a given edge. + +function group_sum(pm::GroupBasedPhylogeneticModel, states::Vector{Int}) + group = group_of_model(pm) + return sum(group[states]).%2 +end + +function is_zero_group_sum(pm::GroupBasedPhylogeneticModel, states::Vector{Int}) + ng = length(states) + return group_sum(pm, [states[1]]) == group_sum(pm, states[2:ng]) +end + +function which_group_element(pm::GroupBasedPhylogeneticModel, elem::Vector{Int64}) + group = group_of_model(pm) + return findall([all(group[i].==elem) for i in 1:length(group)])[1] +end + + +######################################## +#### AUXILIARY FUNCTIONS FOR GRAPHS #### +######################################## + +function interior_nodes(graph::Graph) + big_graph = Polymake.graph.Graph(ADJACENCY = pm_object(graph)) + degrees = big_graph.NODE_DEGREES + return findall(x -> x > 1, degrees) +end + +function leaves(graph::Graph) + big_graph = Polymake.graph.Graph(ADJACENCY = pm_object(graph)) + degrees = big_graph.NODE_DEGREES + return findall(x -> x == 1, degrees) +end + +function vertex_descendants(v::Int, gr::Graph, desc::Vector{Any}) + lvs = leaves(gr) + outn = outneighbors(gr, v) + + if v in lvs + return [v] + end + + innodes = setdiff(outn, lvs) + d = unique(append!(desc, intersect(outn, lvs))) + + if length(innodes) > 0 + for i in innodes + d = vertex_descendants(i, gr, d) + end + return d + end + + return d +end + +function cherries(graph::Graph) + lvs = leaves(graph) + cherry = [] + for l in lvs + in_node = inneighbors(graph,l)[1] + lvs_cherr = outneighbors(graph, inneighbors(graph,l)[1]) + if issubset(lvs_cherr, lvs) == 2 + cherry = append!(cherry, [[Edge(in_node, lvs_cherr[1]), Edge(in_node, lvs_cherr[2])]]) + end + end + + return unique(cherry) +end + +function root(graph::Graph) + n_parents = [length(inneighbors(graph, v)) for v in 1:n_vertices(graph)] + return findall(x -> x == 0, n_parents)[1] +end + + +######################## +#### SORT FUNCTIONS #### +######################## + +function sort_edges(graph::Graph) + edgs = collect(edges(graph)) + leaves_idx = findall(edge -> dst(edge) in Oscar.leaves(graph), edgs) + return edgs[vcat(leaves_idx, setdiff(1:length(edgs), leaves_idx))] +end diff --git a/experimental/GraphicalModels/src/PhylogeneticModels.jl b/experimental/GraphicalModels/src/PhylogeneticModels.jl new file mode 100644 index 000000000000..a490ef1119ab --- /dev/null +++ b/experimental/GraphicalModels/src/PhylogeneticModels.jl @@ -0,0 +1,473 @@ +###################################### +#### PHYLOGENETIC DATA STRUCTURES #### +###################################### + +struct PhylogeneticModel + graph::Graph{Directed} + n_states::Int + prob_ring::MPolyRing{QQFieldElem} + root_distr::Vector{Any} + trans_matrices::Dict{Edge, MatElem{QQMPolyRingElem}} +end + +function Base.show(io::IO, pm::PhylogeneticModel) + gr = graph(pm) + ns = number_states(pm) + nl = length(leaves(gr)) + ne = length(collect(edges(gr))) + root_dist = join(Oscar.root_distribution(pm), ", " ) + M = collect(values(transition_matrices(pm)))[1] + + print(io, "Phylogenetic model on a tree with $(nl) leaves and $(ne) edges \n with distribution at the root [$(root_dist)] \n") + print(io, " and transition matrix associated to edge i of the form \n ") + idx = string(split(string(M[1,1]), "[")[2][1]) + print(io, replace(replace(string(M), "["*idx => "[i"), ";" => ";\n ")) + print(io, ". ") +end + +struct GroupBasedPhylogeneticModel + phylo_model::PhylogeneticModel + fourier_ring::MPolyRing{QQFieldElem} + fourier_params::Dict{Edge, Vector{QQMPolyRingElem}} + group::Vector{Vector{Int64}} +end + +function Base.show(io::IO, pm::GroupBasedPhylogeneticModel) + gr = graph(pm) + nl = length(leaves(gr)) + ne = length(collect(edges(gr))) + root_dist = join(Oscar.root_distribution(pm), ", " ) + c_edg = 2 + p_edg = inneighbors(gr, c_edg)[1] + findall(x-> x==2, dst.(edges(gr))) + M = transition_matrices(pm)[Edge(p_edg, c_edg)] + idx = string(split(string(M[1,1]), "[")[2][1]) + + print(io, "Group-based phylogenetic model on a tree with $(nl) leaves and $(ne) edges \n with distribution at the root [$(root_dist)]. \n") + print(io, " The transition matrix associated to edge i is of the form \n ") + print(io, replace(replace(string(M), "["*idx => "[i"), ";" => ";\n ")) + print(io, ", \n and the Fourier parameters are ") + fp = transpose(fourier_parameters(pm)[Edge(p_edg, c_edg)]) + fp = replace(string(fp), "QQMPolyRingElem" => "") + print(io, replace(replace(replace(string(fp), "["*idx => "[i"), ";" => ";\n "), "]]" => "]].")) + +end + + +######################################################################### +#### ATTRIBUTES OF PhylogeneticModel AND GroupBasedPhylogeneticModel #### +######################################################################### + +@doc raw""" + phylogenetic_model(pm::GroupBasedPhylogeneticModel) + +Return the complete information of a `PhylogeneticModel` or `GroupBasedPhylogeneticModel` `pm`. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> phylogenetic_model(pm) +Phylogenetic model on a tree with 3 leaves and 3 edges + with distribution at the root [1//4, 1//4, 1//4, 1//4] + and transition matrix associated to edge i of the form + [a[i] b[i] b[i] b[i]; + b[i] a[i] b[i] b[i]; + b[i] b[i] a[i] b[i]; + b[i] b[i] b[i] a[i]]. +``` +""" +phylogenetic_model(pm::GroupBasedPhylogeneticModel) = pm.phylo_model + +@doc raw""" + graph(pm::PhylogeneticModel) + +Return the graph of a `PhylogeneticModel` `pm`. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> graph(pm) +Directed graph with 4 nodes and the following edges: +(4, 1)(4, 2)(4, 3) +``` +""" +graph(pm::PhylogeneticModel) = pm.graph +graph(pm::GroupBasedPhylogeneticModel) = pm.phylo_model.graph + +@doc raw""" + number_states(pm::PhylogeneticModel) + +Return the number of states of the `PhylogeneticModel` `pm`. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> number_states(pm) +4 +``` +""" +number_states(pm::PhylogeneticModel) = pm.n_states +number_states(pm::GroupBasedPhylogeneticModel) = pm.phylo_model.n_states + +@doc raw""" + transition_matrices(pm::PhylogeneticModel) + +Return a dictionary between the edges of the tree specifying the `PhylogeneticModel` `pm` and their attached transition matrices. + +# Examples +```jldoctest; filter = r".*" +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> transition_matrices(pm) +Dict{Edge, MatElem{QQMPolyRingElem}} with 3 entries: + Edge(4, 1) => [a[1] b[1] b[1] b[1]; b[1] a[1] b[1] b[1]; b[1] b[1] a[1] b[1];… + Edge(4, 2) => [a[2] b[2] b[2] b[2]; b[2] a[2] b[2] b[2]; b[2] b[2] a[2] b[2];… + Edge(4, 3) => [a[3] b[3] b[3] b[3]; b[3] a[3] b[3] b[3]; b[3] b[3] a[3] b[3];… +``` +""" +transition_matrices(pm::PhylogeneticModel) = pm.trans_matrices +transition_matrices(pm::GroupBasedPhylogeneticModel) = pm.phylo_model.trans_matrices + +@doc raw""" + probability_ring(pm::PhylogeneticModel) + +Return the ring of probability coordinates of the `PhylogeneticModel` `pm`. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> probability_ring(pm) +Multivariate polynomial ring in 6 variables a[1], a[2], a[3], b[1], ..., b[3] + over rational field +``` +""" +probability_ring(pm::PhylogeneticModel) = pm.prob_ring +probability_ring(pm::GroupBasedPhylogeneticModel) = pm.phylo_model.prob_ring + +@doc raw""" + root_distribution(pm::PhylogeneticModel) + +Return the distribution of the random variable at the root of the tree specifying the `PhylogeneticModel` `pm`. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> root_distribution(pm) +4-element Vector{Any}: + 1//4 + 1//4 + 1//4 + 1//4 +``` +""" +root_distribution(pm::PhylogeneticModel) = pm.root_distr +root_distribution(pm::GroupBasedPhylogeneticModel) = pm.phylo_model.root_distr + +@doc raw""" + fourier_parameters(pm::GroupBasedPhylogeneticModel) + +Return the Fourier parameters of the `GroupBasedPhylogeneticModel` `pm` as a vector of eigenvalues of the transition matrices. + +# Examples +```jldoctest; filter = r".*" +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> fourier_parameters(pm) +Dict{Edge, Vector{QQMPolyRingElem}} with 3 entries: + Edge(4, 1) => [x[1, 1], x[1, 2], x[1, 2], x[1, 2]] + Edge(4, 2) => [x[2, 1], x[2, 2], x[2, 2], x[2, 2]] + Edge(4, 3) => [x[3, 1], x[3, 2], x[3, 2], x[3, 2]] +``` +""" +fourier_parameters(pm::GroupBasedPhylogeneticModel) = pm.fourier_params + +@doc raw""" + fourier_ring(pm::GroupBasedPhylogeneticModel) + +Return the ring of Fourier coordinates of the `PhylogeneticModel` `pm`. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> fourier_ring(pm) +Multivariate polynomial ring in 6 variables x[1, 1], x[2, 1], x[3, 1], x[1, 2], ..., x[3, 2] + over rational field +``` +""" +fourier_ring(pm::GroupBasedPhylogeneticModel) = pm.fourier_ring + +@doc raw""" + group_of_model(pm::GroupBasedPhylogeneticModel) + +Returns the group the `GroupBasedPhylogeneticModel` `pm` is based on. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> group_of_model(pm) +4-element Vector{Vector{Int64}}: + [0, 0] + [0, 1] + [1, 0] + [1, 1] +``` +""" +group_of_model(pm::GroupBasedPhylogeneticModel) = pm.group + + +############################ +#### GROUP-BASED MODELS #### +############################ + +@doc raw""" + cavender_farris_neyman_model(graph::Graph{Directed}) + +Creates a `PhylogeneticModel` based on `graph` whose transition matrices are of type Cavender-Farris-Neyman. + +# Examples +```jldoctest +julia> cavender_farris_neyman_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) +Group-based phylogenetic model on a tree with 3 leaves and 3 edges + with distribution at the root [1//2, 1//2]. + The transition matrix associated to edge i is of the form + [a[i] b[i]; + b[i] a[i]], + and the Fourier parameters are [x[i, 1] x[i, 2]]. +``` +""" +function cavender_farris_neyman_model(graph::Graph{Directed}) + ns = 2 + ne = n_edges(graph) + R, list_a, list_b = polynomial_ring(QQ, :a => 1:ne, :b => 1:ne; cached=false) + + root_distr = repeat([1//ns], outer = ns) + edgs = sort_edges(graph) + matrices = Dict{Edge, MatElem}(e => matrix(R, [ + a b + b a]) for (a,b,e) in zip(list_a, list_b, edgs) + ) + + S, list_x = polynomial_ring(QQ, :x => (1:ne, 1:2); cached=false) + fourier_param = Dict{Edge, Vector{QQMPolyRingElem}}(e => + [list_x[i,1], list_x[i,2]] for (i, e) in zip(1:ne, edgs)) + + group = [[0],[1]] + + pm = PhylogeneticModel(graph, ns, R, root_distr, matrices) + return GroupBasedPhylogeneticModel(pm, S, fourier_param, group) +end + +@doc raw""" + jukes_cantor_model(graph::Graph{Directed}) + +Creates a `PhylogeneticModel` based on `graph` whose transition matrices are Jukes Cantor matrices. + +# Examples +```jldoctest +julia> jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) +Group-based phylogenetic model on a tree with 3 leaves and 3 edges + with distribution at the root [1//4, 1//4, 1//4, 1//4]. + The transition matrix associated to edge i is of the form + [a[i] b[i] b[i] b[i]; + b[i] a[i] b[i] b[i]; + b[i] b[i] a[i] b[i]; + b[i] b[i] b[i] a[i]], + and the Fourier parameters are [x[i, 1] x[i, 2] x[i, 2] x[i, 2]]. +``` +""" +function jukes_cantor_model(graph::Graph{Directed}) + ns = 4 + ne = n_edges(graph) + R, list_a, list_b = polynomial_ring(QQ, :a => 1:ne, :b => 1:ne; cached=false) + + root_distr = repeat([1//ns], outer = ns) + edgs = sort_edges(graph) + matrices = Dict{Edge, MatElem}(e => matrix(R, [ + a b b b + b a b b + b b a b + b b b a]) for (a,b,e) in zip(list_a, list_b, edgs) + ) + + S, list_x = polynomial_ring(QQ, :x => (1:ne, 1:2); cached=false) + fourier_param = Dict{Edge, Vector{QQMPolyRingElem}}(e => + [list_x[i,1], list_x[i,2], list_x[i,2], list_x[i,2]] for (i, e) in zip(1:ne, edgs)) + + group = [[0,0], [0,1], [1,0], [1,1]] + + pm = PhylogeneticModel(graph, ns, R, root_distr, matrices) + return GroupBasedPhylogeneticModel(pm, S, fourier_param, group) +end + +@doc raw""" + kimura2_model(graph::Graph{Directed}) + +Creates a `PhylogeneticModel` based on `graph` whose transition matrices are Kimura 2-parameter matrices. + +# Examples +```jldoctest +julia> kimura2_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) +Group-based phylogenetic model on a tree with 3 leaves and 3 edges + with distribution at the root [1//4, 1//4, 1//4, 1//4]. + The transition matrix associated to edge i is of the form + [a[i] b[i] c[i] b[i]; + b[i] a[i] b[i] c[i]; + c[i] b[i] a[i] b[i]; + b[i] c[i] b[i] a[i]], + and the Fourier parameters are [x[i, 1] x[i, 3] x[i, 2] x[i, 2]]. +``` +""" +function kimura2_model(graph::Graph{Directed}) + ns = 4 + ne = n_edges(graph) + R, list_a, list_b, list_c = polynomial_ring(QQ, :a => 1:ne, :b => 1:ne, :c => 1:ne; cached=false) + + root_distr = repeat([1//ns], outer = ns) + edgs = sort_edges(graph) + matrices = Dict{Edge, MatElem}(e => matrix(R, [ + a b c b + b a b c + c b a b + b c b a]) for (a,b,c,e) in zip(list_a, list_b, list_c, edgs) + ) + + S, list_x = polynomial_ring(QQ, :x => (1:ne, 1:3); cached=false) + fourier_param = Dict{Edge, Vector{QQMPolyRingElem}}(e => + [list_x[i,1], list_x[i,3], list_x[i,2], list_x[i,2]] for (i, e) in zip(1:ne, edgs)) + + group = [[0,0], [0,1], [1,0], [1,1]] + + pm = PhylogeneticModel(graph, ns, R, root_distr, matrices) + return GroupBasedPhylogeneticModel(pm, S, fourier_param, group) +end + +@doc raw""" + kimura3_model(graph::Graph{Directed}) + +Creates a `PhylogeneticModel` based on `graph` whose transition matrices are Kimura 3-parameter matrices. + +# Examples +```jldoctest +julia> kimura3_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) +Group-based phylogenetic model on a tree with 3 leaves and 3 edges + with distribution at the root [1//4, 1//4, 1//4, 1//4]. + The transition matrix associated to edge i is of the form + [a[i] b[i] c[i] d[i]; + b[i] a[i] d[i] c[i]; + c[i] d[i] a[i] b[i]; + d[i] c[i] b[i] a[i]], + and the Fourier parameters are [x[i, 1] x[i, 2] x[i, 3] x[i, 4]]. +``` +""" +function kimura3_model(graph::Graph{Directed}) + ns = 4 + ne = n_edges(graph) + R, list_a, list_b , list_c, list_d= polynomial_ring(QQ, :a => 1:ne, :b => 1:ne, :c => 1:ne, :d => 1:ne; cached=false) + + root_distr = repeat([1//ns], outer = ns) + edgs = sort_edges(graph) + matrices = Dict{Edge, MatElem}(e => matrix(R, [ + a b c d + b a d c + c d a b + d c b a]) for (a,b,c,d,e) in zip(list_a, list_b, list_c, list_d, edgs) + ) + + S, list_x = polynomial_ring(QQ, :x => (1:ne, 1:4); cached=false) + fourier_param = Dict{Edge, Vector{QQMPolyRingElem}}(e => + [list_x[i,1], list_x[i,2], list_x[i,3], list_x[i,4]] for (i, e) in zip(1:ne, edgs)) + + group = [[0,0], [0,1], [1,0], [1,1]] + + pm = PhylogeneticModel(graph, ns, R, root_distr, matrices) + return GroupBasedPhylogeneticModel(pm, S, fourier_param, group) +end + + +############################## +#### GENERAL MARKOV MODEL #### +############################## + +@doc raw""" + general_markov_model(graph::Graph{Directed}) + +Creates a `PhylogeneticModel` based on `graph` whose transition matrices are stochastic with no further constraints. + +# Examples +```jldoctest +julia> general_markov_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) +Phylogenetic model on a tree with 3 leaves and 3 edges + with distribution at the root [π[1], π[2], π[3], π[4]] + and transition matrix associated to edge i of the form + [m[i, 1, 1] m[i, 1, 2] m[i, 1, 3] m[i, 1, 4]; + m[i, 2, 1] m[i, 2, 2] m[i, 2, 3] m[i, 2, 4]; + m[i, 3, 1] m[i, 3, 2] m[i, 3, 3] m[i, 3, 4]; + m[i, 4, 1] m[i, 4, 2] m[i, 4, 3] m[i, 4, 4]]. +``` +""" +function general_markov_model(graph::Graph{Directed}; number_states = 4) + ns = number_states + ne = n_edges(graph) + R, root_distr, list_m = polynomial_ring(QQ, :π => 1:ns, :m => (1:(ne^2),1:(ns),1:(ns)); cached=false) + + edgs = sort_edges(graph) + matrices = Dict{Edge, MatElem}(e => matrix(R, reshape(list_m[i,:,:], ns, ns)) for (i,e) in zip(1:ne, edgs)) + + return PhylogeneticModel(graph, ns, R, root_distr, matrices) +end + + +############################## +#### AFFINE PHYLO MODELS ##### +############################## + +@doc raw""" + affine_phylogenetic_model!(pm::PhylogeneticModel) + +Moves a `PhylogeneticModel` or `GroupBasedPhylogeneticModel` from projective into affine space. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> affine_phylogenetic_model!(pm) +Group-based phylogenetic model on a tree with 3 leaves and 3 edges + with distribution at the root [1//4, 1//4, 1//4, 1//4]. + The transition matrix associated to edge i is of the form + [-3*b[i]+1 b[i] b[i] b[i]; + b[i] -3*b[i]+1 b[i] b[i]; + b[i] b[i] -3*b[i]+1 b[i]; + b[i] b[i] b[i] -3*b[i]+1], + and the Fourier parameters are [1 x[i, 2] x[i, 2] x[i, 2]]. +``` +""" +function affine_phylogenetic_model!(pm::PhylogeneticModel) + gr = graph(pm) + ns = number_states(pm) + trans_mat = transition_matrices(pm) + for e in edges(gr) + [trans_mat[e][i,i] = 1 - (sum(trans_mat[e][i,:]) - trans_mat[e][i,i]) for i in 1:ns] + end + r = root_distribution(pm) + r[1] = 1 - sum(r[2:ns]) + return PhylogeneticModel(gr, ns, probability_ring(pm), root_distribution(pm), trans_mat) +end + +function affine_phylogenetic_model!(pm::GroupBasedPhylogeneticModel) + gr = graph(pm) + affine_pm = affine_phylogenetic_model!(pm.phylo_model) + fourier_param = fourier_parameters(pm) + S = fourier_ring(pm) + for e in edges(gr) + fourier_param[e][1] = S(1) + end + return GroupBasedPhylogeneticModel(affine_pm, S, fourier_param, group_of_model(pm)) +end + diff --git a/experimental/GraphicalModels/src/PhylogeneticParametrization.jl b/experimental/GraphicalModels/src/PhylogeneticParametrization.jl new file mode 100644 index 000000000000..2a4a0826609b --- /dev/null +++ b/experimental/GraphicalModels/src/PhylogeneticParametrization.jl @@ -0,0 +1,363 @@ +################################################### +#### PARAMETRIZATION IN PROBABLITY COORDINATES #### +################################################### + +function monomial_parametrization(pm::PhylogeneticModel, states::Dict{Int, Int}) + gr = graph(pm) + tr_mat = transition_matrices(pm) + root_dist = root_distribution(pm) + + r = root(gr) + monomial = root_dist[states[r]] + for edge in edges(gr) + stateParent = states[src(edge)] + stateChild = states[dst(edge)] + monomial = monomial * tr_mat[edge][stateParent, stateChild] + end + + return monomial +end +function monomial_parametrization(pm::GroupBasedPhylogeneticModel, states::Dict{Int, Int}) + monomial_parametrization(phylogenetic_model(pm), states) +end + +function probability_parametrization(pm::PhylogeneticModel, leaves_states::Vector{Int}) + gr = graph(pm) + int_nodes = interior_nodes(gr) + lvs_nodes = leaves(gr) + n_states = number_states(pm) + + interior_indices = collect.(Iterators.product([collect(1:n_states) for _ in int_nodes]...)) + nodes_states = Dict(lvs_nodes[i] => leaves_states[i] for i in 1:length(lvs_nodes)) + + poly = 0 + # Might be useful in the future to use a polynomial ring context + for labels in interior_indices + for (int_node, label) in zip(int_nodes, labels) + nodes_states[int_node] = label + end + poly = poly + monomial_parametrization(pm, nodes_states) + end + return poly +end +function probability_parametrization(pm::GroupBasedPhylogeneticModel, leaves_states::Vector{Int}) + probability_parametrization(phylogenetic_model(pm), leaves_states) +end + +@doc raw""" + probability_map(pm::PhylogeneticModel) + +Create a parametrization for a `PhylogeneticModel` of type `Dictionary`. + +Iterate through all possible states of the leaf random variables and calculates their corresponding probabilities using the root distribution and laws of conditional independence. Return a dictionary of polynomials indexed by the states. Use auxiliary function `monomial_parametrization(pm::PhylogeneticModel, states::Dict{Int, Int})` and `probability_parametrization(pm::PhylogeneticModel, leaves_states::Vector{Int})`. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> p = probability_map(pm) +Dict{Tuple{Vararg{Int64}}, QQMPolyRingElem} with 64 entries: + (1, 2, 1) => 1//4*a[1]*a[3]*b[2] + 1//4*a[2]*b[1]*b[3] + 1//2*b[1]*b[2]*b[3] + (3, 1, 1) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*a[3]*b[1] + 1//2*b[1]*b[2]*b[3] + (4, 4, 2) => 1//4*a[1]*a[2]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//2*b[1]*b[2]*b[3] + (1, 2, 3) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] … + (3, 1, 3) => 1//4*a[1]*a[3]*b[2] + 1//4*a[2]*b[1]*b[3] + 1//2*b[1]*b[2]*b[3] + (3, 2, 4) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] … + (3, 2, 1) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] … + (2, 1, 4) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] … + (3, 2, 3) => 1//4*a[1]*a[3]*b[2] + 1//4*a[2]*b[1]*b[3] + 1//2*b[1]*b[2]*b[3] + (2, 1, 1) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*a[3]*b[1] + 1//2*b[1]*b[2]*b[3] + (1, 3, 2) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] … + (1, 4, 2) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] … + (2, 1, 3) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] … + (2, 2, 4) => 1//4*a[1]*a[2]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//2*b[1]*b[2]*b[3] + (4, 3, 4) => 1//4*a[1]*a[3]*b[2] + 1//4*a[2]*b[1]*b[3] + 1//2*b[1]*b[2]*b[3] + (2, 2, 1) => 1//4*a[1]*a[2]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//2*b[1]*b[2]*b[3] + (4, 4, 4) => 1//4*a[1]*a[2]*a[3] + 3//4*b[1]*b[2]*b[3] + (4, 3, 1) => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] … + (3, 3, 2) => 1//4*a[1]*a[2]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//2*b[1]*b[2]*b[3] + ⋮ => ⋮ +``` +""" +function probability_map(pm::PhylogeneticModel) + lvs_nodes = leaves(graph(pm)) + n_states = number_states(pm) + + leaves_indices = collect.(Iterators.product([collect(1:n_states) for _ in lvs_nodes]...)) + probability_coordinates = Dict{Tuple{Vararg{Int64}}, QQMPolyRingElem}(Tuple(leaves_states) => probability_parametrization(pm, leaves_states) for leaves_states in leaves_indices) + return probability_coordinates +end +function probability_map(pm::GroupBasedPhylogeneticModel) + probability_map(phylogenetic_model(pm)) +end + + +################################################ +#### PARAMETRIZATION IN FOURIER COORDINATES #### +################################################ + +function monomial_fourier(pm::GroupBasedPhylogeneticModel, leaves_states::Vector{Int}) + gr = graph(pm) + param = fourier_parameters(pm) + monomial = 1 + for edge in edges(gr) + dsc = vertex_descendants(dst(edge), gr, []) + elem = group_sum(pm, leaves_states[dsc]) + monomial = monomial * param[edge][which_group_element(pm, elem)] + end + return monomial +end + +function fourier_parametrization(pm::GroupBasedPhylogeneticModel, leaves_states::Vector{Int}) + S = fourier_ring(pm) + if is_zero_group_sum(pm, leaves_states) + poly = monomial_fourier(pm, leaves_states) + else + poly = S(0) + end + + return poly +end + + +@doc raw""" + fourier_map(pm::GroupBasedPhylogeneticModel) + +Create a parametrization for a `GroupBasedPhylogeneticModel` of type `Dictionary`. + +Iterate through all possible states of the leaf random variables and calculates their corresponding probabilities using group actions and laws of conditional independence. Return a dictionary of polynomials indexed by the states. Use auxiliary function `monomial_fourier(pm::GroupBasedPhylogeneticModel, leaves_states::Vector{Int})` and `fourier_parametrization(pm::GroupBasedPhylogeneticModel, leaves_states::Vector{Int})`. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> q = fourier_map(pm) +Dict{Tuple{Vararg{Int64}}, QQMPolyRingElem} with 64 entries: + (1, 2, 1) => 0 + (3, 1, 1) => 0 + (4, 4, 2) => 0 + (1, 2, 3) => 0 + (3, 1, 3) => x[2, 1]*x[1, 2]*x[3, 2] + (3, 2, 4) => x[1, 2]*x[2, 2]*x[3, 2] + (3, 2, 1) => 0 + (2, 1, 4) => 0 + (3, 2, 3) => 0 + (2, 1, 1) => 0 + (1, 3, 2) => 0 + (1, 4, 2) => 0 + (2, 1, 3) => 0 + (2, 2, 4) => 0 + (4, 3, 4) => 0 + (2, 2, 1) => x[3, 1]*x[1, 2]*x[2, 2] + (4, 4, 4) => 0 + (4, 3, 1) => 0 + (3, 3, 2) => 0 + ⋮ => ⋮ +``` +""" +function fourier_map(pm::GroupBasedPhylogeneticModel) + lvs_nodes = leaves(graph(pm)) + n_states = number_states(pm) + + leaves_indices = collect.(Iterators.product([collect(1:n_states) for _ in lvs_nodes]...)) + fourier_coordinates = Dict{Tuple{Vararg{Int64}}, QQMPolyRingElem}(Tuple(leaves_states) => fourier_parametrization(pm, leaves_states) for leaves_states in leaves_indices) + return fourier_coordinates +end + + +##################################### +#### COMPUTE EQUIVALENCE CLASSES #### +##################################### + +@doc raw""" + compute_equivalent_classes(parametrization::Dict{Tuple{Vararg{Int64}}, QQMPolyRingElem}) + +Given the parametrization of a `PhylogeneticModel`, cancel all duplicate entries and return equivalence classes of states which are attached the same probabilities. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> p = probability_map(pm); + +julia> q = fourier_map(pm); + +julia> p_equivclasses = compute_equivalent_classes(p) +Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem} with 5 entries: + [(1, 2, 2), (1, 3, 3), (… => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*a[3]*b[1] + 1//2… + [(1, 2, 3), (1, 2, 4), (… => 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4… + [(1, 2, 1), (1, 3, 1), (… => 1//4*a[1]*a[3]*b[2] + 1//4*a[2]*b[1]*b[3] + 1//2… + [(1, 1, 2), (1, 1, 3), (… => 1//4*a[1]*a[2]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//2… + [(1, 1, 1), (2, 2, 2), (… => 1//4*a[1]*a[2]*a[3] + 3//4*b[1]*b[2]*b[3] + +julia> q_equivclasses = compute_equivalent_classes(q) +Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem} with 6 entries: + [(2, 1, 2), (3, 1, 3), (4, 1, 4)] => x[2, 1]*x[1, 2]*x[3, 2] + [(2, 2, 1), (3, 3, 1), (4, 4, 1)] => x[3, 1]*x[1, 2]*x[2, 2] + [(1, 2, 2), (1, 3, 3), (1, 4, 4)] => x[1, 1]*x[2, 2]*x[3, 2] + [(2, 3, 4), (2, 4, 3), (3, 2, 4), (3, 4, 2), (4, 2… => x[1, 2]*x[2, 2]*x[3, 2] + [(1, 1, 2), (1, 1, 3), (1, 1, 4), (1, 2, 1), (1, 2… => 0 + [(1, 1, 1)] => x[1, 1]*x[2, 1]*x[3, 1] +``` +""" +function compute_equivalent_classes(parametrization::Dict{Tuple{Vararg{Int64}}, QQMPolyRingElem}) + polys = unique(collect(values(parametrization))) + + equivalent_keys = [] + for value in polys + eqv_class = [key for key in keys(parametrization) if parametrization[key] == value] + #sort!(eqv_class) + append!(equivalent_keys, [eqv_class]) + end + equivalenceclass_dictionary = Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}(sort(equivalent_keys[i]) => parametrization[equivalent_keys[i][1]] for i in 1:length(equivalent_keys)) + return equivalenceclass_dictionary +end + +@doc raw""" + sum_equivalent_classes(equivalent_classes::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) + +Take the output of the function `compute_equivalent_classes` for `PhylogeneticModel` and multiply by a factor to obtain probabilities as specified on the original small trees database. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> q = fourier_map(pm); + +julia> q_equivclasses = compute_equivalent_classes(q); + +julia> sum_equivalent_classes(q_equivclasses) +Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem} with 6 entries: + [(2, 1, 2), (3, 1, 3), (4, 1, 4)] => 3*x[2, 1]*x[1, 2]*x[3, 2] + [(2, 2, 1), (3, 3, 1), (4, 4, 1)] => 3*x[3, 1]*x[1, 2]*x[2, 2] + [(1, 2, 2), (1, 3, 3), (1, 4, 4)] => 3*x[1, 1]*x[2, 2]*x[3, 2] + [(2, 3, 4), (2, 4, 3), (3, 2, 4), (3, 4, 2), (4,… => 6*x[1, 2]*x[2, 2]*x[3, 2] + [(1, 1, 2), (1, 1, 3), (1, 1, 4), (1, 2, 1), (1,… => 0 + [(1, 1, 1)] => x[1, 1]*x[2, 1]*x[3, 1] +``` +""" +function sum_equivalent_classes(equivalent_classes::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) + return Dict(key => equivalent_classes[key]*length(vcat([key]...)) for key in keys(equivalent_classes)) +end + + +############################################## +#### SPECIALIZED FOURIER TRANSFORM MATRIX #### +############################################## + +@doc raw""" +specialized_fourier_transform(pm::GroupBasedPhylogeneticModel, p_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}, q_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) + +Reparametrize between a model specification in terms of probability and Fourier cooordinates. The input of equivalent classes is optional, if they are not entered they will be computed. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> p_equivclasses = compute_equivalent_classes(probability_map(pm)); + +julia> q_equivclasses = compute_equivalent_classes(fourier_map(pm)); + +julia> specialized_fourier_transform(pm, p_equivclasses, q_equivclasses) +5×5 Matrix{QQMPolyRingElem}: + 1 1 1 1 1 + 1 -1//3 -1//3 1 -1//3 + 1 -1//3 1 -1//3 -1//3 + 1 1 -1//3 -1//3 -1//3 + 1 -1//3 -1//3 -1//3 1//3 +``` +""" +function specialized_fourier_transform(pm::GroupBasedPhylogeneticModel, p_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}, q_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) + R = probability_ring(pm) + ns = number_states(pm) + + np = length(p_equivclasses) + nq = length(q_equivclasses) - 1 + + ## We need to sort the equivalence classes: both inside each class as well as the collection of classes. + p_equivclasses_sorted = collect(keys(p_equivclasses)) + [sort!(p_eqclass) for p_eqclass in p_equivclasses_sorted] + sort!(p_equivclasses_sorted) + + q_equivclasses = collect(keys(filter(x -> !is_zero(x.second), q_equivclasses))) + [sort!(f_eqclass) for f_eqclass in q_equivclasses] + sort!(q_equivclasses) + + H = R.(hadamard(matrix_space(ZZ, ns, ns))) + + specialized_ft_matrix = R.(Int.(zeros(nq, np))) + for i in 1:nq + current_fourier_classes = q_equivclasses[i] + for j in 1:np + current_prob_classes = p_equivclasses_sorted[j] + current_entriesin_M = [prod([H[y,x] for (x,y) in zip(p,q)]) for p in current_prob_classes, q in current_fourier_classes] + specialized_ft_matrix[i,j] = R.(1//(length(current_prob_classes)*length(current_fourier_classes))*sum(current_entriesin_M)) + end + end + return specialized_ft_matrix +end + +function specialized_fourier_transform(pm::GroupBasedPhylogeneticModel) + p_equivclasses = compute_equivalent_classes(probability_map(pm)) + q_equivclasses = compute_equivalent_classes(fourier_map(pm)) + specialized_fourier_transform(pm, p_equivclasses,q_equivclasses) +end + +@doc raw""" + inverse_specialized_fourier_transform(pm::GroupBasedPhylogeneticModel, p_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}, q_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) + +Reparametrize between a model specification in terms of Fourier and probability cooordinates. + +# Examples +```jldoctest +julia> pm = jukes_cantor_model(graph_from_edges(Directed,[[4,1],[4,2],[4,3]])); + +julia> p_equivclasses = compute_equivalent_classes(probability_map(pm)); + +julia> q_equivclasses = compute_equivalent_classes(fourier_map(pm)); + +julia> inverse_specialized_fourier_transform(pm, p_equivclasses, q_equivclasses) +5×5 Matrix{QQMPolyRingElem}: + 1//16 3//16 3//16 3//16 3//8 + 3//16 -3//16 -3//16 9//16 -3//8 + 3//16 -3//16 9//16 -3//16 -3//8 + 3//16 9//16 -3//16 -3//16 -3//8 + 3//8 -3//8 -3//8 -3//8 3//4 +``` +""" +function inverse_specialized_fourier_transform(pm::GroupBasedPhylogeneticModel, p_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}, q_equivclasses::Dict{Vector{Tuple{Vararg{Int64}}}, QQMPolyRingElem}) + R = probability_ring(pm) + ns = number_states(pm) + + np = length(p_equivclasses) + nq = length(q_equivclasses) - 1 + + ## We need to sort the equivalence classes: both inside each class as well as the collection of classes. + p_equivclasses_sorted = collect(keys(p_equivclasses)) + [sort!(p_eqclass) for p_eqclass in p_equivclasses_sorted] + sort!(p_equivclasses_sorted) + + q_equivclasses_sorted = collect(keys(filter(x -> !is_zero(x.second), q_equivclasses))) + [sort!(f_eqclass) for f_eqclass in q_equivclasses_sorted] + sort!(q_equivclasses_sorted) + + H = R.(hadamard(matrix_space(ZZ, ns, ns))) + Hinv = 1//ns * H + + inverse_spec_ft_matrix = R.(Int.(zeros(np, nq))) + for i in 1:np + current_prob_class = p_equivclasses_sorted[i] + for j in 1:nq + current_fourier_class = q_equivclasses_sorted[j] + current_entriesin_Minv = [prod([Hinv[x,y] for (x,y) in zip(p,q)]) for p in current_prob_class, q in current_fourier_class] + inverse_spec_ft_matrix[i,j] = R.(sum(current_entriesin_Minv)) + end + end + return inverse_spec_ft_matrix +end + +function inverse_specialized_fourier_transform(pm::GroupBasedPhylogeneticModel) + p_equivclasses = compute_equivalent_classes(probability_map(pm)) + q_equivclasses = compute_equivalent_classes(fourier_map(pm)) + inverse_specialized_fourier_transform(pm, p_equivclasses,q_equivclasses) +end diff --git a/experimental/GraphicalModels/test/runtests.jl b/experimental/GraphicalModels/test/runtests.jl new file mode 100644 index 000000000000..e2b0a1c16f11 --- /dev/null +++ b/experimental/GraphicalModels/test/runtests.jl @@ -0,0 +1,256 @@ +# exported items: experimental/GraphicalModels/src/GraphicalModels.jl +# TODOs: +# * how to check e.g. collect(values(compute_equivalent_classes(probability_map(model)))) against something hardcoded? (Marina) +# * specialized (inverse) fourier transform, two fuctions still as comments (Christiane) + +@testset "Graphical Models tests" begin + + tree = graph_from_edges(Directed,[[4,1],[4,2],[4,3]]) + + @testset "cavender_farris_neyman_model" begin + model = cavender_farris_neyman_model(tree) + @test is_isomorphic(graph(model), graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) + @test model isa GroupBasedPhylogeneticModel + #number of states + @test Oscar.number_states(model) == 2 + #root distribution + @test model.phylo_model.root_distr == [1//2,1//2] + #transition matricies + tr_mat = Oscar.transition_matrices(model) + @test tr_mat[Edge(4, 2)][1,1] == tr_mat[Edge(4, 2)][2,2] + @test tr_mat[Edge(4, 2)][1,2] == tr_mat[Edge(4, 2)][2,1] + # generators of the polynomial ring + @test length(gens(model.phylo_model.prob_ring)) == 2(length(collect(edges(graph(model))))) + @test length(gens(model.fourier_ring)) == 2(length(collect(edges(graph(model))))) + # fourier parameters + fp = model.fourier_params + for i in 1:3 + @test fp[Edge(4, i)][1] != fp[Edge(4, i)][2] + end + # group of the model + @test model.group == [[0],[1]] + end + + @testset "Jukes Cantor" begin + model = jukes_cantor_model(tree) + @test is_isomorphic(graph(model), graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) + @test model isa GroupBasedPhylogeneticModel + #number of states + @test Oscar.number_states(model) == 4 + #root distribution + model.phylo_model.root_distr == [1//4,1//4,1//4,1//4] + #transition matricies + tr_mat = Oscar.transition_matrices(model) + for i in 1:4, j in 1:4 + @test tr_mat[Edge(4, 2)][1, i == j ? 1 : 2] == tr_mat[Edge(4, 2)][i, j] # + end + # generators of the polynomial ring + @test length(gens(model.phylo_model.prob_ring)) == 2(length(collect(edges(graph(model))))) + @test length(gens(model.fourier_ring)) == 2(length(collect(edges(graph(model))))) + # fourier parameters + fp = model.fourier_params + for i in 1:3 + @test fp[Edge(4, i)][2] == fp[Edge(4, i)][3] == fp[Edge(4, i)][4] + end + #group of the model + @test model.group == [[0,0],[0,1],[1,0],[1,1]] + end + + @testset "kimura2_model" begin + model = kimura2_model(tree) + @test is_isomorphic(graph(model), graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) + @test model isa GroupBasedPhylogeneticModel + # number of states + @test Oscar.number_states(model) == 4 + # root distribution + @test model.phylo_model.root_distr == [1//4,1//4,1//4,1//4] + # transition matricies + tr_mat = Oscar.transition_matrices(model) + for i in 1:4, j in 1:4 + @test tr_mat[Edge(4, 2)][1, (i == j ? 1 : (isodd(i+j) ? 2 : 3))] == tr_mat[Edge(4, 2)][i, j] + end + # generators of the polynomial ring + @test length(gens(model.phylo_model.prob_ring)) == 3(length(collect(edges(graph(model))))) + @test length(gens(model.fourier_ring)) == 3(length(collect(edges(graph(model))))) + # fourier parameters + fp = model.fourier_params + for i in 1:3 + @test fp[Edge(4, i)][3] == fp[Edge(4, i)][4] + end + # group of the model + @test model.group == [[0,0],[0,1],[1,0],[1,1]] + end + + @testset "kimura3_model" begin + model = kimura3_model(tree) + @test is_isomorphic(graph(model), graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) + # number of states + @test Oscar.number_states(model) == 4 + # root distribution + @test model.phylo_model.root_distr == [1//4,1//4,1//4,1//4] + # transition matrices + tr_mat = Oscar.transition_matrices(model) + @test tr_mat[Edge(4, 2)][1,1] == tr_mat[Edge(4, 2)][2,2] == tr_mat[Edge(4, 2)][3,3] == tr_mat[Edge(4, 2)][4,4] + @test tr_mat[Edge(4, 2)][2,1] == tr_mat[Edge(4, 2)][1,2] == tr_mat[Edge(4, 2)][4,3] == tr_mat[Edge(4, 2)][3,4] + @test tr_mat[Edge(4, 2)][1,3] == tr_mat[Edge(4, 2)][3,1] == tr_mat[Edge(4, 2)][2,4] == tr_mat[Edge(4, 2)][4,2] + @test tr_mat[Edge(4, 2)][1,4] == tr_mat[Edge(4, 2)][4,1] == tr_mat[Edge(4, 2)][2,3] == tr_mat[Edge(4, 2)][3,2] + # generators of the polynomial ring + @test length(gens(model.phylo_model.prob_ring)) == 4(length(collect(edges(graph(model))))) + @test length(gens(model.fourier_ring)) == 4(length(collect(edges(graph(model))))) + # fourier parameters + fp = model.fourier_params + for i in 1:3 + @test length(fp[Edge(4, i)]) == length(unique(fp[Edge(4, i)])) + end + # group of the model + @test model.group == [[0,0],[0,1],[1,0],[1,1]] + end + + @testset "general_markov_model" begin + model = general_markov_model(tree) + @test is_isomorphic(graph(model), graph_from_edges(Directed,[[4,1],[4,2],[4,3]])) + @test model isa PhylogeneticModel + # number of states + @test Oscar.number_states(model) == 4 + # testing with specified states (n=20) + model = general_markov_model(tree; number_states = 20) + @test Oscar.number_states(model) == 20 + model = general_markov_model(tree) + # root distribution + @test model.root_distr[1] isa QQMPolyRingElem + # transition matrices + tr_mat = Oscar.transition_matrices(model) + @test tr_mat[Edge(4,1)] isa AbstractAlgebra.Generic.MatSpaceElem{QQMPolyRingElem} + # generators of the polynomial ring + @test length(gens(model.prob_ring)) == 148 + end + + @testset "affine models" begin + model = jukes_cantor_model(tree) + @test Oscar.affine_phylogenetic_model!(model) isa GroupBasedPhylogeneticModel + #sum of each row of transition matrices should equal one + tr_mat = Oscar.transition_matrices(Oscar.affine_phylogenetic_model!(model)) + for j in 1:4 + rowsum = 0 + for i in 1:4 + rowsum = rowsum+tr_mat[Edge(4, 2)][j,i] + end + @test rowsum == 1 + end + #sum of root distribution should equal one + @test sum(Oscar.affine_phylogenetic_model!(model).phylo_model.root_distr) == 1 + end + + # Test parametrizations for a specific tree and model + model = jukes_cantor_model(tree) + + @testset "Probability parametrization" begin + p = probability_map(model) + @test length(p) == number_states(model)^length(Oscar.leaves(tree)) + + R, a, b = polynomial_ring(QQ, :a => 1:n_edges(tree), :b => 1:n_edges(tree); cached=false) + H = Oscar.hom(probability_ring(model), R, gens(R)) + + # Test some entries of the joint distribution vector + @test H(p[1,1,1]) == 1//4*a[1]*a[2]*a[3] + 3//4*b[1]*b[2]*b[3] + @test H(p[1,2,3]) == 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//4*b[1]*b[2]*b[3] + @test H(p[4,4,3]) == 1//4*a[1]*a[2]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//2*b[1]*b[2]*b[3] + + @testset "Equivalent classes probabilities" begin + p_eqclasses = compute_equivalent_classes(p) + + classes = [1//4*a[1]*a[2]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//2*b[1]*b[2]*b[3], + 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*b[1]*b[3] + 1//4*a[3]*b[1]*b[2] + 1//4*b[1]*b[2]*b[3], + 1//4*a[1]*a[3]*b[2] + 1//4*a[2]*b[1]*b[3] + 1//2*b[1]*b[2]*b[3], + 1//4*a[1]*b[2]*b[3] + 1//4*a[2]*a[3]*b[1] + 1//2*b[1]*b[2]*b[3], + 1//4*a[1]*a[2]*a[3] + 3//4*b[1]*b[2]*b[3]] + + # Test that we get the expected number and polynomials in equivalent classes + @test length(unique(vcat(H.(collect(values(p_eqclasses))), classes))) == length(p_eqclasses) + + # Test the keys for a specific class + i = findall(x -> x == classes[5], H.(collect(values(p_eqclasses))))[1] + @test setdiff(collect(keys(p_eqclasses))[i], [Tuple([1,1,1]), Tuple([2,2,2]), Tuple([3,3,3]), Tuple([4,4,4])]) == [] + end + end + + @testset "Fourier parametrization" begin + q = fourier_map(model) + @test length(q) == number_states(model)^length(Oscar.leaves(tree)) + + S, x = polynomial_ring(QQ, :x => (1:n_edges(tree), 1:2); cached=false) + H = Oscar.hom(fourier_ring(model), S, gens(S)) + + # Test zero entries of the fourier vector + zerokeys = [(1,2,1), (3,1,1), (4,4,2), (1,2,3), (3,2,1), (2,1,4), (3,2,3), (2,1,1), + (1,3,2), (1,4,2), (2,1,3), (2,2,4), (4,3,4), (4,4,4), (4,3,1), (3,3,2), + (4,1,2), (2,2,3), (4,3,3), (4,4,3), (4,2,2), (1,3,4), (2,3,2), (1,3,1), + (2,4,2), (1,1,2), (1,4,1), (3,3,4), (1,4,3), (3,4,4), (4,1,1), (3,1,2), + (3,4,1), (3,3,3), (4,1,3), (4,2,4), (3,4,3), (4,2,1), (3,2,2), (2,3,1), + (2,4,4), (1,1,4), (2,4,1), (2,3,3), (1,1,3), (1,2,4), (2,2,2), (3,1,4)] + sum([q[i] for i in zerokeys]) == 0 + + # Test some entries of the joint distribution vector + @test H(q[3,2,4]) == x[1, 2]*x[2, 2]*x[3, 2] + @test H(q[2,1,2]) == x[2, 1]*x[1, 2]*x[3, 2] + @test H(q[1,1,1]) == x[1, 1]*x[2, 1]*x[3, 1] + + @testset "Equivalent classes Fourier" begin + q_eqclasses = compute_equivalent_classes(q) + + classes = [x[2, 1]*x[1, 2]*x[3, 2], x[3, 1]*x[1, 2]*x[2, 2], + x[1, 1]*x[2, 2]*x[3, 2], x[1, 2]*x[2, 2]*x[3, 2], + x[1, 1]*x[2, 1]*x[3, 1]] + + # Test that we get the expected number and polynomials in equivalent classes + @test length(unique(vcat(H.(collect(values(q_eqclasses))), classes))) == length(q_eqclasses) + + # Test the keys for a specific class + i = findall(x -> x == classes[1], H.(collect(values(q_eqclasses))))[1] + @test setdiff(collect(keys(q_eqclasses))[i], [Tuple([3,1,3]), Tuple([4,1,4]), Tuple([2,1,2])]) == [] + + # Thest that the keys for the zero coordinates are in the same class + i = findall(x -> x == 0, H.(collect(values(q_eqclasses))))[1] + @test setdiff(collect(keys(q_eqclasses))[i], zerokeys) == [] + end + end + + @testset "specialized (inverse) fourier transform" begin + # model = jukes_cantor_model(tree) + + FT = probability_ring(model).([1 1 1 1 1 + 1 -1//3 -1//3 1 -1//3 + 1 -1//3 1 -1//3 -1//3 + 1 1 -1//3 -1//3 -1//3 + 1 -1//3 -1//3 -1//3 1//3]) + + IFT = probability_ring(model).([1//16 3//16 3//16 3//16 3//8 + 3//16 -3//16 -3//16 9//16 -3//8 + 3//16 -3//16 9//16 -3//16 -3//8 + 3//16 9//16 -3//16 -3//16 -3//8 + 3//8 -3//8 -3//8 -3//8 3//4]) + + @test specialized_fourier_transform(model) == FT + @test inverse_specialized_fourier_transform(model) == IFT + + p_equivclasses = sum_equivalent_classes(compute_equivalent_classes(probability_map(model))) + f_equivclasses = sum_equivalent_classes(compute_equivalent_classes(fourier_map(model))) + @test specialized_fourier_transform(model,p_equivclasses,f_equivclasses) == FT + @test inverse_specialized_fourier_transform(model,p_equivclasses,f_equivclasses) == IFT + end + + @testset "Affine parametrization" begin + #Probability map + p = probability_map(affine_phylogenetic_model!(model)) + @test sum(collect(values(p))) == 1 + + #Fourier map + q = fourier_map(affine_phylogenetic_model!(model)) + @test q[1,1,1] == 1 + + # GMM model + p = probability_map(affine_phylogenetic_model!(general_markov_model(tree))) + @test sum(collect(values(p))) == 1 + end + +end