Skip to content

Commit

Permalink
Merge pull request #17 from camilogarciabotero/dev
Browse files Browse the repository at this point in the history
Update main BMC struct
  • Loading branch information
camilogarciabotero authored Nov 4, 2023
2 parents d52f136 + ee0b103 commit cf417f5
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 307 deletions.
22 changes: 11 additions & 11 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
name = "BioMarkovChains"
uuid = "f861b655-cb5f-42ce-b66a-341b542d4f2c"
authors = ["Camilo García<[email protected]>"]
authors = ["Camilo García-Botero<[email protected]>"]
version = "0.6.0"

[weakdeps]
DiscreteMarkovChains = "8abcb7ef-b365-4f7b-ac38-56893fb62f9f"
MarkovChainHammer = "38c40fd0-bccb-4723-b30d-b2caea0ad8d9"

[extensions]
DiscreteMarkovChainsExt = "DiscreteMarkovChains"

[deps]
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"
VectorizedKmers = "2ef45bd6-4c2b-43d8-88b3-40597287359a"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"

[weakdeps]
DiscreteMarkovChains = "8abcb7ef-b365-4f7b-ac38-56893fb62f9f"
MarkovChainHammer = "38c40fd0-bccb-4723-b30d-b2caea0ad8d9"

[extensions]
DiscreteMarkovChainsExt = "DiscreteMarkovChains"
MarkovChainHammerExt = "MarkovChainHammer"

[compat]
BioSequences = "3"
DiscreteMarkovChains = "0.2"
MarkovChainHammer = "0.0.11"
PrecompileTools = "1"
StatsAPI = "1.6"
StatsBase = "0.34.0"
TestItemRunner = "0.2"
TestItems = "0.1"
VectorizedKmers = "0.7"
julia = "1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"

[targets]
Expand Down
40 changes: 38 additions & 2 deletions ext/DiscreteMarkovChainsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ module DiscreteMarkovChainsExt
using BioMarkovChains
using DiscreteMarkovChains:
DiscreteMarkovChains, DiscreteMarkovChain,
is_absorbing, is_ergodic, is_regular, is_reversible
is_absorbing, is_ergodic, is_regular, is_reversible,
stationary_distribution, first_passage_probabilities, exit_probabilities,
fundamental_matrix, mean_first_passage_time, mean_recurrence_time, mean_time_to_absorption

## Boolean functions ##

Expand All @@ -25,6 +27,40 @@ function DiscreteMarkovChains.is_reversible(bmc::BioMarkovChain)
return is_reversible(DiscreteMarkovChain(bmc.tpm))
end

##
## Probability functions

import DiscreteMarkovChains: stationary_distribution, first_passage_probabilities, exit_probabilities

function DiscreteMarkovChains.exit_probabilities(bmc::BioMarkovChain)
return exit_probabilities(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.stationary_distribution(bmc::BioMarkovChain)
return stationary_distribution(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.first_passage_probabilities(bmc::BioMarkovChain, t) # it recieves other args...
return first_passage_probabilities(DiscreteMarkovChain(bmc.tpm), t)
end

## Mean time functions

import DiscreteMarkovChains: fundamental_matrix, mean_first_passage_time, mean_recurrence_time, mean_time_to_absorption

function DiscreteMarkovChains.fundamental_matrix(bmc::BioMarkovChain)
return fundamental_matrix(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.mean_first_passage_time(bmc::BioMarkovChain)
return mean_first_passage_time(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.mean_recurrence_time(bmc::BioMarkovChain)
return mean_recurrence_time(DiscreteMarkovChain(bmc.tpm))
end

function DiscreteMarkovChains.mean_time_to_absorption(bmc::BioMarkovChain)
return mean_time_to_absorption(DiscreteMarkovChain(bmc.tpm))
end

end
14 changes: 10 additions & 4 deletions ext/MarkovChainHammerExt.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# module MarkovChainHammerExt
module MarkovChainHammerExt

# using BioMarkovChains
# using MarkovChainHammer.Trajectory: generate
using BioMarkovChains
using MarkovChainHammer: generate

# end
# function MarkovChainHammer.generate(pf::Matrix{Float64}, steps::Int64)
# return generate(pf, steps)
# end

export generate

end
21 changes: 6 additions & 15 deletions src/BioMarkovChains.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ using BioSequences:
NucleicAcidAlphabet,
DNA,
DNAAlphabet,
DNA_Gap, DNA_A, DNA_C, DNA_M, DNA_G, DNA_R, DNA_S, DNA_V, DNA_T, DNA_W, DNA_Y, DNA_H, DNA_K, DNA_D, DNA_B, DNA_N,

#RNA
RNA,
RNA_Gap, RNA_A, RNA_C, RNA_M, RNA_G, RNA_R, RNA_S, RNA_V, RNA_U, RNA_W, RNA_Y, RNA_H, RNA_K, RNA_D, RNA_B, RNA_N,

#AminoAcids
AminoAcid,
Expand All @@ -24,32 +22,27 @@ using BioSequences:
AA_A, AA_R, AA_N, AA_D, AA_C, AA_Q, AA_E, AA_G, AA_H, AA_I, AA_L, AA_K, AA_M, AA_F, AA_P, AA_S, AA_T, AA_W, AA_Y, AA_V, AA_O, AA_U, AA_B, AA_J, AA_Z, AA_X, AA_Term, AA_Gap,

# Other functions
SeqOrView,
alphabet,
@biore_str

SeqOrView, NucleicSeqOrView

using PrecompileTools: @setup_workload, @compile_workload
using TestItems: @testitem
using StatsAPI: StatsAPI, fit, fit!
using StatsBase: countmap
using VectorizedKmers: count_kmers

include("types.jl")
export BioMarkovChain, BMC

include("utils.jl")
export transitions, hasprematurestop, randbmc
export randbmc

include("transitions.jl")
export initials, transition_count_matrix, transition_probability_matrix, dnaseqprobability, logoddsratio, logoddsratioscore
export initials, transition_count_matrix, transition_probability_matrix, logoddsratio, logoddsratioscore

include("models.jl")
export ECOLICDS, ECOLINOCDS

include("perronfrobenius.jl")
export perronfrobenius, generatedna

include("constants.jl")
export perronfrobenius # generatedna

include("extended.jl")

Expand All @@ -61,12 +54,10 @@ include("extended.jl")
@compile_workload begin
# all calls in this block will be precompiled, regardless of whether
# they belong to your package or not (on Julia 1.8 and higher)
transitions(seq)
transition_count_matrix(seq)
transition_probability_matrix(seq)
dnaseqprobability(seq, ECOLICDS)
BioMarkovChain(seq)
perronfrobenius(seq)
BioMarkovChain(seq)
end
end

Expand Down
22 changes: 1 addition & 21 deletions src/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,5 @@

# const LongAminoAcidOrView = Union{LongSequence{<:AminoAcidAlphabet},LongSubSeq{<:AminoAcidAlphabet}}

const NUCLEICINDEXES = Dict(DNA_A => 1, DNA_C => 2, DNA_G => 3, DNA_T => 4)

const DINUCLEICINDEXES = Dict(
LongDNA{4}("AA") => [1, 1],
LongDNA{4}("AC") => [1, 2],
LongDNA{4}("AG") => [1, 3],
LongDNA{4}("AT") => [1, 4],
LongDNA{4}("CA") => [2, 1],
LongDNA{4}("CC") => [2, 2],
LongDNA{4}("CG") => [2, 3],
LongDNA{4}("CT") => [2, 4],
LongDNA{4}("GA") => [3, 1],
LongDNA{4}("GC") => [3, 2],
LongDNA{4}("GG") => [3, 3],
LongDNA{4}("GT") => [3, 4],
LongDNA{4}("TA") => [4, 1],
LongDNA{4}("TC") => [4, 2],
LongDNA{4}("TG") => [4, 3],
LongDNA{4}("TT") => [4, 4],
)

const AA20 = (AA_A, AA_R, AA_N, AA_D, AA_C, AA_Q, AA_E, AA_G, AA_H, AA_I, AA_L, AA_K, AA_M, AA_F, AA_P, AA_S, AA_T, AA_W, AA_Y, AA_V)
# const AA20 = (AA_A, AA_R, AA_N, AA_D, AA_C, AA_Q, AA_E, AA_G, AA_H, AA_I, AA_L, AA_K, AA_M, AA_F, AA_P, AA_S, AA_T, AA_W, AA_Y, AA_V)
5 changes: 3 additions & 2 deletions src/extended.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ function Base.show(io::IO, model::BioMarkovChain)
# println(io, "BioMarkovChain:")

# Determine the alphabet type
alphabet_type = length(model) == 20 ? "AminoAcids" : "DNA/RNA"
# alphabet_type = length(model) == 20 ? "AminoAcids" : "DNA/RNA"
alphabet_type = eltype(model)

# Print the type name with inferred alphabet type
println(io, "BioMarkovChain with $alphabet_type Alphabet:")
Expand Down Expand Up @@ -47,7 +48,7 @@ end
Base.length(bmc::BioMarkovChain) = length(bmc.inits)

function Base.eltype(bmc::BioMarkovChain)
return length(bmc) == 4 ? DNA : AminoAcid
return bmc.statespace
end

"""
Expand Down
8 changes: 4 additions & 4 deletions src/models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const ECOLICDS = begin

inits = [0.245, 0.243, 0.273, 0.239]

BMC(tpm, inits)
BMC(DNA, tpm, inits)
end

const ECOLINOCDS = begin
Expand All @@ -21,7 +21,7 @@ const ECOLINOCDS = begin

inits = [0.262, 0.239, 0.240, 0.259]

BMC(tpm, inits)
BMC(DNA, tpm, inits)
end


Expand All @@ -35,7 +35,7 @@ const CPGPOS = begin

inits = [0.262, 0.239, 0.240, 0.259] # not stablished

BMC(tpm, inits)
BMC(DNA, tpm, inits)
end


Expand All @@ -49,5 +49,5 @@ const CPGNEG = begin

inits = [0.262, 0.239, 0.240, 0.259] # not stablished

BMC(tpm, inits)
BMC(DNA, tpm, inits)
end
24 changes: 12 additions & 12 deletions src/perronfrobenius.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
### MarkdovChainHammer.jl ###

"""
perronfrobenius(sequence::LongNucOrView{4}, n::Int64=1)
perronfrobenius(sequence::NucleicSeqOrView{A}, n::Int64=1) where A
Compute the Perron-Frobenius matrix, a column-wise version of the transition probability matrix (TPM), for a given nucleotide sequence.
The Perron-Frobenius matrix captures the asymptotic probabilities of transitioning between nucleotides in the sequence over a specified number of steps `n`. It provides insight into the long-term behavior of a Markov chain or a dynamical system associated with the sequence.
# Arguments
- `sequence::LongNucOrView{4}`: A nucleotide sequence represented as a `LongNucOrView{4}` object.
- `sequence::NucleicSeqOrView{A}`: A nucleotide sequence represented as a `NucleicSeqOrView{A}` object.
- `n::Int64=1`: The number of steps to consider for the transition probability matrix. Default is 1.
# Returns
Expand All @@ -20,7 +20,7 @@ sequence = LongSequence{DNAAlphabet{4}}("ACGTCGTCCACTACGACATCAGC") # Replace wi
n = 2
pf = perronfrobenius(sequence, n)
"""
function perronfrobenius(sequence::LongNucOrView{4}; n::Int64=1)
function perronfrobenius(sequence::NucleicSeqOrView{A}; n::Int64=1) where A
tpm = transition_probability_matrix(sequence, n)
return copy(tpm')
end
Expand Down Expand Up @@ -66,12 +66,12 @@ pf = perronfrobenius(orfdna)
newdna = generatedna(pf, 100)
```
"""
function generatedna(pf::Matrix{Float64}, steps::Int64; extended_alphabet::Bool = false)
newseq = LongDNA{4}()
# pf = transpose(tpm) # The Perron-Frobenius matrix
trajectory = MarkovChainHammer.generate(pf, steps)
for i in trajectory
newseq = append!(newseq, _int_to_dna(i; extended_alphabet))
end
return newseq
end
# function generatedna(pf::Matrix{Float64}, steps::Int64; extended_alphabet::Bool = false)
# newseq = LongDNA{4}()
# # pf = transpose(tpm) # The Perron-Frobenius matrix
# trajectory = generate(pf, steps)
# for i in trajectory
# newseq = append!(newseq, _int_to_dna(i; extended_alphabet))
# end
# return newseq
# end
Loading

0 comments on commit cf417f5

Please sign in to comment.