Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update main BMC struct #17

Merged
merged 7 commits into from
Nov 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
name = "BioMarkovChains"
uuid = "f861b655-cb5f-42ce-b66a-341b542d4f2c"
authors = ["Camilo García<[email protected]>"]
authors = ["Camilo García-Botero<[email protected]>"]
version = "0.6.0"

[weakdeps]
DiscreteMarkovChains = "8abcb7ef-b365-4f7b-ac38-56893fb62f9f"
MarkovChainHammer = "38c40fd0-bccb-4723-b30d-b2caea0ad8d9"

[extensions]
DiscreteMarkovChainsExt = "DiscreteMarkovChains"

[deps]
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"
VectorizedKmers = "2ef45bd6-4c2b-43d8-88b3-40597287359a"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"

[weakdeps]
DiscreteMarkovChains = "8abcb7ef-b365-4f7b-ac38-56893fb62f9f"
MarkovChainHammer = "38c40fd0-bccb-4723-b30d-b2caea0ad8d9"

[extensions]
DiscreteMarkovChainsExt = "DiscreteMarkovChains"
MarkovChainHammerExt = "MarkovChainHammer"

[compat]
BioSequences = "3"
DiscreteMarkovChains = "0.2"
MarkovChainHammer = "0.0.11"
PrecompileTools = "1"
StatsAPI = "1.6"
StatsBase = "0.34.0"
TestItemRunner = "0.2"
TestItems = "0.1"
VectorizedKmers = "0.7"
julia = "1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"

[targets]
Expand Down
40 changes: 38 additions & 2 deletions ext/DiscreteMarkovChainsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ module DiscreteMarkovChainsExt
using BioMarkovChains
using DiscreteMarkovChains:
DiscreteMarkovChains, DiscreteMarkovChain,
is_absorbing, is_ergodic, is_regular, is_reversible
is_absorbing, is_ergodic, is_regular, is_reversible,
stationary_distribution, first_passage_probabilities, exit_probabilities,
fundamental_matrix, mean_first_passage_time, mean_recurrence_time, mean_time_to_absorption

## Boolean functions ##

Expand All @@ -25,6 +27,40 @@ function DiscreteMarkovChains.is_reversible(bmc::BioMarkovChain)
return is_reversible(DiscreteMarkovChain(bmc.tpm))
end

##
## Probability functions

import DiscreteMarkovChains: stationary_distribution, first_passage_probabilities, exit_probabilities

function DiscreteMarkovChains.exit_probabilities(bmc::BioMarkovChain)
return exit_probabilities(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.stationary_distribution(bmc::BioMarkovChain)
return stationary_distribution(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.first_passage_probabilities(bmc::BioMarkovChain, t) # it recieves other args...
return first_passage_probabilities(DiscreteMarkovChain(bmc.tpm), t)
end

## Mean time functions

import DiscreteMarkovChains: fundamental_matrix, mean_first_passage_time, mean_recurrence_time, mean_time_to_absorption

function DiscreteMarkovChains.fundamental_matrix(bmc::BioMarkovChain)
return fundamental_matrix(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.mean_first_passage_time(bmc::BioMarkovChain)
return mean_first_passage_time(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.mean_recurrence_time(bmc::BioMarkovChain)
return mean_recurrence_time(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.mean_time_to_absorption(bmc::BioMarkovChain)
return mean_time_to_absorption(DiscreteMarkovChain(bmc.tpm))
end

end
14 changes: 10 additions & 4 deletions ext/MarkovChainHammerExt.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# module MarkovChainHammerExt
module MarkovChainHammerExt

# using BioMarkovChains
# using MarkovChainHammer.Trajectory: generate
using BioMarkovChains
using MarkovChainHammer: generate

# end
# function MarkovChainHammer.generate(pf::Matrix{Float64}, steps::Int64)
# return generate(pf, steps)
# end

export generate

end
21 changes: 6 additions & 15 deletions src/BioMarkovChains.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ using BioSequences:
NucleicAcidAlphabet,
DNA,
DNAAlphabet,
DNA_Gap, DNA_A, DNA_C, DNA_M, DNA_G, DNA_R, DNA_S, DNA_V, DNA_T, DNA_W, DNA_Y, DNA_H, DNA_K, DNA_D, DNA_B, DNA_N,

#RNA
RNA,
RNA_Gap, RNA_A, RNA_C, RNA_M, RNA_G, RNA_R, RNA_S, RNA_V, RNA_U, RNA_W, RNA_Y, RNA_H, RNA_K, RNA_D, RNA_B, RNA_N,

#AminoAcids
AminoAcid,
Expand All @@ -24,32 +22,27 @@ using BioSequences:
AA_A, AA_R, AA_N, AA_D, AA_C, AA_Q, AA_E, AA_G, AA_H, AA_I, AA_L, AA_K, AA_M, AA_F, AA_P, AA_S, AA_T, AA_W, AA_Y, AA_V, AA_O, AA_U, AA_B, AA_J, AA_Z, AA_X, AA_Term, AA_Gap,

# Other functions
SeqOrView,
alphabet,
@biore_str

SeqOrView, NucleicSeqOrView

using PrecompileTools: @setup_workload, @compile_workload
using TestItems: @testitem
using StatsAPI: StatsAPI, fit, fit!
using StatsBase: countmap
using VectorizedKmers: count_kmers

include("types.jl")
export BioMarkovChain, BMC

include("utils.jl")
export transitions, hasprematurestop, randbmc
export randbmc

include("transitions.jl")
export initials, transition_count_matrix, transition_probability_matrix, dnaseqprobability, logoddsratio, logoddsratioscore
export initials, transition_count_matrix, transition_probability_matrix, logoddsratio, logoddsratioscore

include("models.jl")
export ECOLICDS, ECOLINOCDS

include("perronfrobenius.jl")
export perronfrobenius, generatedna

include("constants.jl")
export perronfrobenius # generatedna

include("extended.jl")

Expand All @@ -61,12 +54,10 @@ include("extended.jl")
@compile_workload begin
# all calls in this block will be precompiled, regardless of whether
# they belong to your package or not (on Julia 1.8 and higher)
transitions(seq)
transition_count_matrix(seq)
transition_probability_matrix(seq)
dnaseqprobability(seq, ECOLICDS)
BioMarkovChain(seq)
perronfrobenius(seq)
BioMarkovChain(seq)
end
end

Expand Down
22 changes: 1 addition & 21 deletions src/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,5 @@

# const LongAminoAcidOrView = Union{LongSequence{<:AminoAcidAlphabet},LongSubSeq{<:AminoAcidAlphabet}}

const NUCLEICINDEXES = Dict(DNA_A => 1, DNA_C => 2, DNA_G => 3, DNA_T => 4)

const DINUCLEICINDEXES = Dict(
LongDNA{4}("AA") => [1, 1],
LongDNA{4}("AC") => [1, 2],
LongDNA{4}("AG") => [1, 3],
LongDNA{4}("AT") => [1, 4],
LongDNA{4}("CA") => [2, 1],
LongDNA{4}("CC") => [2, 2],
LongDNA{4}("CG") => [2, 3],
LongDNA{4}("CT") => [2, 4],
LongDNA{4}("GA") => [3, 1],
LongDNA{4}("GC") => [3, 2],
LongDNA{4}("GG") => [3, 3],
LongDNA{4}("GT") => [3, 4],
LongDNA{4}("TA") => [4, 1],
LongDNA{4}("TC") => [4, 2],
LongDNA{4}("TG") => [4, 3],
LongDNA{4}("TT") => [4, 4],
)

const AA20 = (AA_A, AA_R, AA_N, AA_D, AA_C, AA_Q, AA_E, AA_G, AA_H, AA_I, AA_L, AA_K, AA_M, AA_F, AA_P, AA_S, AA_T, AA_W, AA_Y, AA_V)
# const AA20 = (AA_A, AA_R, AA_N, AA_D, AA_C, AA_Q, AA_E, AA_G, AA_H, AA_I, AA_L, AA_K, AA_M, AA_F, AA_P, AA_S, AA_T, AA_W, AA_Y, AA_V)
5 changes: 3 additions & 2 deletions src/extended.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ function Base.show(io::IO, model::BioMarkovChain)
# println(io, "BioMarkovChain:")

# Determine the alphabet type
alphabet_type = length(model) == 20 ? "AminoAcids" : "DNA/RNA"
# alphabet_type = length(model) == 20 ? "AminoAcids" : "DNA/RNA"
alphabet_type = eltype(model)

# Print the type name with inferred alphabet type
println(io, "BioMarkovChain with $alphabet_type Alphabet:")
Expand Down Expand Up @@ -47,7 +48,7 @@ end
Base.length(bmc::BioMarkovChain) = length(bmc.inits)

function Base.eltype(bmc::BioMarkovChain)
return length(bmc) == 4 ? DNA : AminoAcid
return bmc.statespace
end

"""
Expand Down
8 changes: 4 additions & 4 deletions src/models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const ECOLICDS = begin

inits = [0.245, 0.243, 0.273, 0.239]

BMC(tpm, inits)
BMC(DNA, tpm, inits)
end

const ECOLINOCDS = begin
Expand All @@ -21,7 +21,7 @@ const ECOLINOCDS = begin

inits = [0.262, 0.239, 0.240, 0.259]

BMC(tpm, inits)
BMC(DNA, tpm, inits)
end


Expand All @@ -35,7 +35,7 @@ const CPGPOS = begin

inits = [0.262, 0.239, 0.240, 0.259] # not stablished

BMC(tpm, inits)
BMC(DNA, tpm, inits)
end


Expand All @@ -49,5 +49,5 @@ const CPGNEG = begin

inits = [0.262, 0.239, 0.240, 0.259] # not stablished

BMC(tpm, inits)
BMC(DNA, tpm, inits)
end
24 changes: 12 additions & 12 deletions src/perronfrobenius.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
### MarkdovChainHammer.jl ###

"""
perronfrobenius(sequence::LongNucOrView{4}, n::Int64=1)
perronfrobenius(sequence::NucleicSeqOrView{A}, n::Int64=1) where A

Compute the Perron-Frobenius matrix, a column-wise version of the transition probability matrix (TPM), for a given nucleotide sequence.

The Perron-Frobenius matrix captures the asymptotic probabilities of transitioning between nucleotides in the sequence over a specified number of steps `n`. It provides insight into the long-term behavior of a Markov chain or a dynamical system associated with the sequence.

# Arguments
- `sequence::LongNucOrView{4}`: A nucleotide sequence represented as a `LongNucOrView{4}` object.
- `sequence::NucleicSeqOrView{A}`: A nucleotide sequence represented as a `NucleicSeqOrView{A}` object.
- `n::Int64=1`: The number of steps to consider for the transition probability matrix. Default is 1.

# Returns
Expand All @@ -20,7 +20,7 @@ sequence = LongSequence{DNAAlphabet{4}}("ACGTCGTCCACTACGACATCAGC") # Replace wi
n = 2
pf = perronfrobenius(sequence, n)
"""
function perronfrobenius(sequence::LongNucOrView{4}; n::Int64=1)
function perronfrobenius(sequence::NucleicSeqOrView{A}; n::Int64=1) where A
tpm = transition_probability_matrix(sequence, n)
return copy(tpm')
end
Expand Down Expand Up @@ -66,12 +66,12 @@ pf = perronfrobenius(orfdna)
newdna = generatedna(pf, 100)
```
"""
function generatedna(pf::Matrix{Float64}, steps::Int64; extended_alphabet::Bool = false)
newseq = LongDNA{4}()
# pf = transpose(tpm) # The Perron-Frobenius matrix
trajectory = MarkovChainHammer.generate(pf, steps)
for i in trajectory
newseq = append!(newseq, _int_to_dna(i; extended_alphabet))
end
return newseq
end
# function generatedna(pf::Matrix{Float64}, steps::Int64; extended_alphabet::Bool = false)
# newseq = LongDNA{4}()
# # pf = transpose(tpm) # The Perron-Frobenius matrix
# trajectory = generate(pf, steps)
# for i in trajectory
# newseq = append!(newseq, _int_to_dna(i; extended_alphabet))
# end
# return newseq
# end
Loading
Loading