From 8cf5a0c835d183b76d1fcdd2726b7a42567350e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Sat, 6 Jan 2024 13:55:10 -0500 Subject: [PATCH 1/5] Add size function to BioMarkovChain and update show function formatting --- src/extended.jl | 17 +++++++++-------- src/models.jl | 1 - src/perronfrobenius.jl | 13 ++++++++++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/extended.jl b/src/extended.jl index 51f2079..7c0c435 100644 --- a/src/extended.jl +++ b/src/extended.jl @@ -1,7 +1,10 @@ -import Base: show, length, eltype +import Base: show, length, eltype, size # import StatsAPI: fit! -function Base.show(io::IO, model::BioMarkovChain) +function Base.show( + io::IO, + model::BioMarkovChain +) # # Print the type name # println(io, "BioMarkovChain:") @@ -10,7 +13,7 @@ function Base.show(io::IO, model::BioMarkovChain) alphabet_type = eltype(model) # Print the type name with inferred alphabet type - println(io, "BioMarkovChain with $alphabet_type Alphabet:") + println(io, "BioMarkovChain of $alphabet_type:") # Print the transition probability matrix println(io, " - Transition Probability Matrix -> Matrix{Float64}($(size(model.tpm, 1)) × $(size(model.tpm, 2))):") @@ -45,11 +48,9 @@ function Base.show(io::IO, model::BioMarkovChain) println(io, " ", "$(model.n)") end -Base.length(bmc::BioMarkovChain) = length(bmc.inits) - -function Base.eltype(bmc::BioMarkovChain) - return bmc.alphabet -end +@inline Base.length(bmc::BioMarkovChain) = length(bmc.inits) +@inline Base.size(bmc::BioMarkovChain) = size(bmc.tpm) +@inline Base.eltype(bmc::BioMarkovChain) = bmc.alphabet """ fit!(bmc::BMC, inits:Vector{Float64}, tpm::Matrix{Float64}) diff --git a/src/models.jl b/src/models.jl index a202bce..e20807a 100644 --- a/src/models.jl +++ b/src/models.jl @@ -38,7 +38,6 @@ const CPGPOS = begin BMC(DNAAlphabet{4}(), tpm, inits) end - const CPGNEG = begin tpm = [ 0.300 0.205 0.285 0.210 diff --git a/src/perronfrobenius.jl b/src/perronfrobenius.jl index 39aeff4..5979797 100644 --- a/src/perronfrobenius.jl +++ b/src/perronfrobenius.jl @@ -21,16 +21,23 @@ n = 2 pf = perronfrobenius(sequence, n) ``` """ -function perronfrobenius(sequence::SeqOrView{A}; n::Int64=1) where A +function perronfrobenius( + sequence::SeqOrView{A}; + n::Int64=1 +) where {A} tpm = transition_probability_matrix(sequence, n) return copy(tpm') end -function perronfrobenius(bmc::BioMarkovChain) +function perronfrobenius( + bmc::BioMarkovChain +) return copy(bmc.tpm') end -function perronfrobenius(tpm::Matrix{Float64}) +function perronfrobenius( + tpm::Matrix{Float64} +) return copy(tpm') end From e08694163ce83cabd5addb1c37e5f4094168727e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Thu, 18 Jan 2024 10:17:23 -0500 Subject: [PATCH 2/5] Add Aqua package and tests --- Project.toml | 3 ++- README.md | 1 + test/aquatests.jl | 10 ++++++++++ test/oddstests.jl | 6 ++++++ test/runtests.jl | 19 ++++--------------- test/tpmtests.jl | 11 +++++++++++ 6 files changed, 34 insertions(+), 16 deletions(-) create mode 100644 test/aquatests.jl create mode 100644 test/oddstests.jl create mode 100644 test/tpmtests.jl diff --git a/Project.toml b/Project.toml index 3406cb2..1eb4636 100644 --- a/Project.toml +++ b/Project.toml @@ -26,6 +26,7 @@ julia = "1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" [targets] -test = ["Test"] +test = ["Test", "Aqua"] diff --git a/README.md b/README.md index 01a6c00..8f74470 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/camilogarciabotero/BioMarkovChains.jl/blob/main/LICENSE) [![Work in Progress](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) [![Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/BioMarkovChains&label=downloads)](https://pkgs.genieframework.com?packages=BioMarkovChains) +[![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) diff --git a/test/aquatests.jl b/test/aquatests.jl new file mode 100644 index 0000000..a6ee0ae --- /dev/null +++ b/test/aquatests.jl @@ -0,0 +1,10 @@ +@testset "Aqua" begin + + Aqua.test_ambiguities(BioMarkovChains) + Aqua.test_persistent_tasks(BioMarkovChains) + Aqua.test_piracies(BioMarkovChains) + Aqua.test_stale_deps(BioMarkovChains) + Aqua.test_unbound_args(BioMarkovChains) + Aqua.test_undefined_exports(BioMarkovChains) + +end \ No newline at end of file diff --git a/test/oddstests.jl b/test/oddstests.jl new file mode 100644 index 0000000..2a1cfd5 --- /dev/null +++ b/test/oddstests.jl @@ -0,0 +1,6 @@ +@testset "lorm" begin + + lorm01 = log_odds_ratio_matrix(CPGPOS, CPGNEG, b=2) # Extracted from the Biological Sequence Analysis: probabilistic models of proteins and nucleic acids, Durbin et al. 1998 + @test lorm01 == [-0.7369655941662062 0.4185519834550808 0.5798915111737342 -0.8073549220576043; -0.913064363228719 0.3043934355948513 1.8126298640982785 -0.6838158876474414; -0.6232794322722583 0.4626269577971041 0.3315782649210818 -0.7346554334790053; -1.1638248019058943 0.5708084064112958 0.3951379418411391 -0.6820299186813209] + +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index d52cd8e..af2a417 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,23 +1,12 @@ module BioMarkovChainsTests using Test +using Aqua using BioMarkovChains using BioSequences -@testset "lorm" begin - - lorm01 = log_odds_ratio_matrix(CPGPOS, CPGNEG, b=2) # Extracted from the Biological Sequence Analysis: probabilistic models of proteins and nucleic acids, Durbin et al. 1998 - lorm01 == [-0.7369655941662062 0.4185519834550808 0.5798915111737342 -0.8073549220576043; -0.913064363228719 0.3043934355948513 1.8126298640982785 -0.6838158876474414; -0.6232794322722583 0.4626269577971041 0.3315782649210818 -0.7346554334790053; -1.1638248019058943 0.5708084064112958 0.3951379418411391 -0.6820299186813209] -end - -@testset "tpm" begin - seq01 = dna"CCTCCCGGACCCTGGGCTCGGGAC" - tpm01 = transition_probability_matrix(seq01) - @test round.(tpm01, digits = 3) == [0.0 1.0 0.0 0.0; 0.0 0.5 0.2 0.3; 0.25 0.125 0.625 0.0; 0.0 0.667 0.333 0.0] - - seq02 = aa"ACDEFGHIKLMNPQRSTVWY" - tpm02 = transition_probability_matrix(seq02) - @test round.(tpm02, digits = 3) == [0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0] -end +include("tpmtests.jl") +include("oddstests.jl") +include("aquatests.jl") end \ No newline at end of file diff --git a/test/tpmtests.jl b/test/tpmtests.jl new file mode 100644 index 0000000..14b6d97 --- /dev/null +++ b/test/tpmtests.jl @@ -0,0 +1,11 @@ +@testset "tpm" begin + + seq01 = dna"CCTCCCGGACCCTGGGCTCGGGAC" + tpm01 = transition_probability_matrix(seq01) + @test round.(tpm01, digits = 3) == [0.0 1.0 0.0 0.0; 0.0 0.5 0.2 0.3; 0.25 0.125 0.625 0.0; 0.0 0.667 0.333 0.0] + + seq02 = aa"ACDEFGHIKLMNPQRSTVWY" + tpm02 = transition_probability_matrix(seq02) + @test round.(tpm02, digits = 3) == [0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0] + +end \ No newline at end of file From ca1d9e59b6f5aa90c8f308779504f23d67110e50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Thu, 18 Jan 2024 10:17:31 -0500 Subject: [PATCH 3/5] Add Aqua QA badge --- docs/src/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/src/index.md b/docs/src/index.md index e5669d9..04df7af 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -20,6 +20,9 @@ License Work in Progress Downloads + + Aqua QA + From 4e8eda3a1240ddb85757eb1046be1fe5a850c97f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Thu, 18 Jan 2024 12:35:45 -0500 Subject: [PATCH 4/5] Update BioMarkovChain output --- README.md | 4 ++-- docs/src/biomarkovchains.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8f74470..b0b4f48 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ BioMarkovChain(orfdna, 2) ``` ``` -BioMarkovChain with DNAAlphabet{4}() Alphabet: +BioMarkovChain of DNAAlphabet{4}(): - Transition Probability Matrix -> Matrix{Float64}(4 × 4): 0.2123 0.2731 0.278 0.2366 0.2017 0.3072 0.2687 0.2224 @@ -95,7 +95,7 @@ ECOLICDS ``` ``` -BioMarkovChain with DNAAlphabet{4}() Alphabet: +BioMarkovChain of DNAAlphabet{4}(): - Transition Probability Matrix -> Matrix{Float64}(4 × 4): 0.31 0.224 0.199 0.268 0.251 0.215 0.313 0.221 diff --git a/docs/src/biomarkovchains.md b/docs/src/biomarkovchains.md index 78431a5..e1b6800 100644 --- a/docs/src/biomarkovchains.md +++ b/docs/src/biomarkovchains.md @@ -122,7 +122,7 @@ build a transition model (`BioMarkovChain`): BioMarkovChain(sequence) ``` - BioMarkovChain with DNA Alphabet: + BioMarkovChain of DNAAlphabet{4}(): - Transition Probability Matrix --> Matrix{Float64}(4 × 4): 0.0 1.0 0.0 0.0 0.0 0.5 0.2 0.3 @@ -149,7 +149,7 @@ the *n-step transition probability matrix* ``\mathscr{M}^{n} = (\mathscr{m}_{ij} BioMarkovChain(sequence, 2) ``` - BioMarkovChain with DNA Alphabet: + BioMarkovChain of DNAAlphabet{4}(): - Transition Probability Matrix --> Matrix{Float64}(4 × 4): 0.0 0.5 0.2 0.3 0.05 0.475 0.325 0.15 From 33143f2bd6377ab3f84aa35be9719809fca3c73f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Fri, 19 Jan 2024 15:44:11 -0500 Subject: [PATCH 5/5] Update method name in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b0b4f48..4961785 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Let find one ORF in a random `LongDNA` : using BioSequences, GeneFinder, BioMarkovChains sequence = randdnaseq(10^3) -orfdna = getorfdna(sequence, min_len=75)[1] +orfdna = get_orfs_dna(sequence, min_len=75)[1] ``` If we translate it, we get a 69aa sequence: