Skip to content

Commit

Permalink
Update docs
Browse files Browse the repository at this point in the history
  • Loading branch information
camilogarciabotero committed Jun 9, 2024
1 parent 09491f3 commit d30f9ee
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 76 deletions.
78 changes: 14 additions & 64 deletions src/transitions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,41 +145,6 @@ function odds_ratio_matrix(
return tpm ./ model.tpm
end

"""
log_odds_ratio_matrix(sequence::NucleicSeqOrView{A}, model::BioMarkovChain) where A
Calculates the log-odds ratio between the transition probability matrix of a given DNA sequence and a reference model.
# Arguments
- `sequence::NucleicSeqOrView{A}`: A DNA, RNA sequence or view with a length of 4 nucleotides.
- `model::BioMarkovChain`: A reference BioMarkovChain model.
# Examples
```julia
sequence = LongNucOrView{4}("ACGT")
model = BioMarkovChain(sequence) # Provide appropriate initialization for BioMarkovChain
result = log_odds_ratio_matrix(sequence, model)
```
"""
function log_odds_ratio_matrix(
sequence::SeqOrView{A},
model::BioMarkovChain;
b::Number =
) where {A <: Alphabet}
@assert model.alphabet == Alphabet(sequence) "Sequence and model state space are inconsistent."
@assert round.(sum(model.tpm, dims=2)') == [1.0 1.0 1.0 1.0] "Model transition probability matrix must be row-stochastic. That is, their row sums must be equal to 1."

tpm = transition_probability_matrix(sequence)
@assert round.(sum(tpm, dims=2)') == [1.0 1.0 1.0 1.0] "Sequence transition probability matrix must be row-stochastic. That is, their row sums must be equal to 1."

lorm = log.(b, tpm./model.tpm)
lorm[isnan.(lorm) .| isinf.(lorm)] .= 0.0
return lorm
end

@doc raw"""
log_odds_ratio_matrix(model1::BioMarkovChain, model2::BioMarkovChain)
Expand Down Expand Up @@ -239,63 +204,48 @@ function log_odds_ratio_score(
@assert modela.alphabet == Alphabet(sequence) "Sequence and model state space are inconsistent."
@assert modelb.alphabet == Alphabet(sequence) "Sequence and model state space are inconsistent."
# @assert round.(sum(model.tpm, dims=2)') == [1.0 1.0 1.0 1.0] "Model transition probability matrix must be row-stochastic. That is, their row sums must be equal to 1."
# tpm = transition_probability_matrix(sequence)

# @assert round.(sum(tpm, dims=2)') == [1.0 1.0 1.0 1.0] "Sequence transition probability matrix must be row-stochastic. That is, their row sums must be equal to 1."
# lorm[isnan.(lorm) .| isinf.(lorm)] .= 0.0

score = 0.0
lorm = log.(b, modela.tpm ./ modelb.tpm)
@inbounds for t in 1:length(sequence)-1
score += lorm[_dna_to_int(sequence[t]), _dna_to_int(sequence[t+1])]
end
# lorm[isnan.(lorm) .| isinf.(lorm)] .= 0.0

return score
end

@doc raw"""
dnaseqprobability(sequence::LongNucOrView{4}, model::BioMarkovChain)
markovprobability(sequence::LongNucOrView{4}, model::BioMarkovChain)
Compute the probability of a given sequence using a transition probability matrix and the initial probabilities distributions of a `BioMarkovModel`.
```math
P(X_1 = i_1, \ldots, X_T = i_T) = \pi_{i_1}^{T-1} \prod_{t=1}^{T-1} a_{i_t, i_{t+1}}
```
# Arguments
## Arguments
- `sequence::LongNucOrView{4}`: The input sequence of nucleotides.
- `model::BioMarkovChain`: A given `BioMarkovChain`.
## Keywords
- `model::BioMarkovChain=ECOLICDS`: A given `BioMarkovChain` model.
- `logscale::Bool=false`: If true, the function will return the log2 of the probability.
- `b::Number=2`: The base of the logarithm used to compute the log odds ratio.
# Returns
- `probability::Float64`: The probability of the input sequence given the model.
# Example
```julia
mainseq = LongDNA{4}("CCTCCCGGACCCTGGGCTCGGGAC")
seq = LongDNA{4}("CGCGCGCGCGCGCGCGCGCGCGCGCG")
bmc = BioMarkovChain(mainseq)
BioMarkovChain of DNAAlphabet{4}() and order 1:
- Transition Probability Matrix -> Matrix{Float64}(4 × 4):
0.0 1.0 0.0 0.0
0.0 0.5 0.2 0.3
0.25 0.125 0.625 0.0
0.0 0.6667 0.3333 0.0
- Initial Probabilities -> Vector{Float64}(4 × 1):
0.087
0.4348
0.3478
0.1304
markovprobability(seq, model=CPGPOS, logscale=true)
-45.073409957110556
newseq = LongDNA{4}("CCTG")
4nt DNA Sequence:
CCTG
dnaseqprobability(newseq, bmc)
0.021739130434782608
markovprobability(seq, model=CPGNEG, logscale=true)
-74.18912168395339
```
"""
function markovprobability(
Expand Down
22 changes: 10 additions & 12 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,31 @@ abstract type AbstractBioMarkovChain end
A BioMarkovChain represents a Markov chain used in biological sequence analysis. It contains a transition probability matrix (tpm) and an initial distribution of probabilities (inits) and also the order of the Markov chain.
# Fields
- `alphabet::A`: Is the state space of the sequence whether DNA, RNA AminoAcid `DataType`s.
## Fields
- `alphabet::A`: The state space of the sequence whether DNA, RNA AminoAcid `DataType`s.
- `tpm::M`: The transition probability matrix.
- `inits::I`: The initial distribution of probabilities.
- `n::N`: The order of the Markov chain.
# Constructors
## Constructors
- `BioMarkovChain(tpm::M, inits::I, n::N=1) where {M<:AbstractMatrix, I<:AbstractVector, N<:Integer}`: Constructs a BioMarkovChain object with the provided transition probability matrix, initial distribution, and order.
- `BioMarkovChain(sequence::LongNucOrView{4}, n::Int64=1)`: Constructs a BioMarkovChain object based on the DNA sequence and transition order.
# Example
## Example
```julia
sequence = LongDNA{4}("ACTACATCTA")
model = BioMarkovChain(sequence, 2)
BioMarkovChain of DNAAlphabet{4}() and order 1:
BioMarkovChain of DNAAlphabet{4}() and order 2:
- Transition Probability Matrix -> Matrix{Float64}(4 × 4):
0.0 0.6667 0.0 0.3333
0.3333 0.0 0.0 0.6667
0.4444 0.1111 0.0 0.4444
0.4444 0.4444 0.0 0.1111
0.0 0.0 0.0 0.0
0.6667 0.3333 0.0 0.0
0.1111 0.4444 0.0 0.4444
- Initial Probabilities -> Vector{Float64}(4 × 1):
0.3333
0.3333
0.0
0.3333
0.3333 0.3333 0.0 0.3333
```
"""
struct BioMarkovChain{A<:Alphabet, M<:AbstractMatrix, I<:AbstractVector, N<:Integer} <: AbstractBioMarkovChain
Expand Down

0 comments on commit d30f9ee

Please sign in to comment.