From 2f4b82cde5e67c20f23c694e6786f040d29e59cb Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 29 Dec 2023 11:06:15 +0100 Subject: [PATCH] Optimise complementation of nucleotides (#62) While BioSequences complement nucleotides in bulk, making this necessity not important, the upcoming Kmers.jl relies on efficient complementation of single nucleotides, so this is important. --- src/nucleicacid.jl | 12 ++++++------ test/runtests.jl | 24 +++++++++++------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/nucleicacid.jl b/src/nucleicacid.jl index 1467ec4..7e2e423 100644 --- a/src/nucleicacid.jl +++ b/src/nucleicacid.jl @@ -427,12 +427,12 @@ RNA_A ``` """ -function complement(nt::NucleicAcid) - bits = compatbits(nt) - return encode( - typeof(nt), - (bits & 0x01) << 3 | (bits & 0x08) >> 3 | - (bits & 0x02) << 1 | (bits & 0x04) >> 1) +function complement(nt::Union{DNA, RNA}) + # This is essentially a lookup table of 16 x 4 bits. + # It's the concatenation of the bitpatterns of the nucleotides, + # in order, complemented. + u64 = 0xf7b3d591e6a2c480 >>> ((4 * encoded_data(nt)) & 63) + reinterpret(typeof(nt), (u64 % UInt8) & 0x0f) end function Base.isvalid(::Type{T}, x::Integer) where T <: NucleicAcid diff --git a/test/runtests.jl b/test/runtests.jl index a80ef5d..706ab9c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -239,19 +239,17 @@ end end @testset "complement" begin - @test complement(DNA_A) === DNA_T - @test complement(DNA_C) === DNA_G - @test complement(DNA_G) === DNA_C - @test complement(DNA_T) === DNA_A - @test complement(DNA_Gap) === DNA_Gap - @test complement(DNA_N) === DNA_N - - @test complement(RNA_A) === RNA_U - @test complement(RNA_C) === RNA_G - @test complement(RNA_G) === RNA_C - @test complement(RNA_U) === RNA_A - @test complement(RNA_Gap) === RNA_Gap - @test complement(RNA_N) === RNA_N + for (a, b) in zip( + "-ACMGRSVTWYHKDBN", + "-TGKCYSBAWRDMHVN" + ) + da, db = DNA(a), DNA(b) + for (i, j) in ((da, db), (RNA(da), RNA(db))) + + @test complement(i) === j + @test complement(j) === i + end + end end @testset "Logic operations and Order" begin