diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml new file mode 100644 index 00000000..26131cad --- /dev/null +++ b/.github/workflows/Benchmark.yml @@ -0,0 +1,27 @@ +name: Run Benchmarks + +on: + - push + - pull_request + +jobs: + Benchmark: + runs-on: ubuntu-latest + env: + JULIA_DEBUG: BenchmarkCI + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: 1 + - name: Install Dependencies + run: julia -e 'using Pkg; pkg"add PkgBenchmark BenchmarkCI@0.1"' + - name: Run Benchmarks + run: julia -e "using BenchmarkCI; BenchmarkCI.judge()" + - name: Post Results + if: ${{ github.event_name == 'pull_request'}} + run: julia -e "using BenchmarkCI; BenchmarkCI.postjudge()" + - name: Print Judgement + run: julia -e 'using BenchmarkCI; BenchmarkCI.displayjudgement()' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index 8f165e50..369f736d 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -9,22 +9,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Add CompatHelper - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")' - name: Run CompatHelper env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: julia -e ' - using CompatHelper, Pkg; - my_registries = [ - Pkg.RegistrySpec( - name = "BioJuliaRegistry", - uuid = "ccbd2cc2-2954-11e9-1ccf-f3e7900901ca", - url = "https://github.com/BioJulia/BioJuliaRegistry.git" - ), - Pkg.RegistrySpec( - name = "General", - uuid = "23338594-aafe-5451-b93e-139f81909106", - url = "https://github.com/JuliaRegistries/General.git" - ) - ]; - CompatHelper.main(; registries = my_registries, master_branch = "master");' + COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} + run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")' diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index 9c6d41d5..98ec516d 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -1,4 +1,4 @@ -name: Documentation +name: Build Documentation on: push: @@ -14,14 +14,12 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@latest + - uses: julia-actions/setup-julia@v1 with: - version: '1.4' - - name: Install dependencies - run: | - julia ci_prep.jl; - julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - - name: Build and deploy + version: '1' + - name: Install Dependencies + run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + - name: Build and Deploy env: # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index e65374b0..32f5b036 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,7 +1,7 @@ name: TagBot on: schedule: - - cron: '0 * * * *' + - cron: 0 0 * * * jobs: TagBot: runs-on: ubuntu-latest @@ -10,4 +10,3 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} ssh: ${{ secrets.TAGBOT_KEY }} - registry: BioJulia/BioJuliaRegistry \ No newline at end of file diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 618c87e3..99ca6c23 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -1,4 +1,4 @@ -name: Unit tests +name: Unit Tests on: - push @@ -7,20 +7,30 @@ on: jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.experimental }} strategy: + fail-fast: false matrix: - julia-version: ['1.1', '1.2', '1.3', '1.4'] + julia-version: + - '1.0' # LTS + - '1' julia-arch: [x86] os: [ubuntu-latest, windows-latest, macOS-latest] + experimental: [false] + include: + - julia-version: nightly + julia-arch: x86 + os: ubuntu-latest + experimental: true steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 + - name: Checkout Repository + uses: actions/checkout@v2 + - name: Setup Julia + uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.julia-version }} - - name: Install dependencies - run: julia ci_prep.jl - - name: Run tests + - name: Run Tests uses: julia-actions/julia-runtest@latest - name: Create CodeCov uses: julia-actions/julia-processcoverage@v1 diff --git a/.gitignore b/.gitignore index 61654834..ac9256cb 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,7 @@ docs/site/ Manifest.toml .DS_Store + +.benchmarkci +benchmark/*.json +benchmark/results diff --git a/CHANGELOG.md b/CHANGELOG.md index 0179c5fa..73d94a4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [2.0.3] - 2020-06-13 + +### Added +- Julia LTS Support +- Benchmarks + +### Changed +- Documentation. +- Updated CI for General Repository. + ## [2.0.2] - 2020-05-21 ### Fixed diff --git a/Project.toml b/Project.toml index 32537bc7..a6003430 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GenomicFeatures" uuid = "899a7d2d-5c61-547b-bef9-6698a8d05446" authors = ["Kenta Sato ", "Ben J. Ward ", "Ciarán O’Mara "] -version = "2.0.2" +version = "2.0.3" [deps] BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" @@ -12,7 +12,7 @@ IntervalTrees = "524e6230-43b7-53ae-be76-1e9e4d08d11b" BioGenerics = "0.1" DataStructures = "0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17" IntervalTrees = "1.0" -julia = "1.1" +julia = "1" [extras] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/README.md b/README.md index 4213f5a7..7b536131 100644 --- a/README.md +++ b/README.md @@ -12,15 +12,8 @@ The GenomicFeatures package provides utilities for working with interval based g It builds on [IntervalTrees](https://github.com/biojulia/intervaltrees.jl) to provide a data-structures and algorithms for various formats such as [BED](https://github.com/biojulia/bed.jl), [GFF3](https://github.com/biojulia/gff3.jl), [bigWig](https://github.com/biojulia/bigwig.jl) and [bigBed](https://github.com/biojulia/bigbed.jl). ## Installation -Releases of GenomicFeatures version 2.0.0 and above are registered and made available to install through BioJulia's package registry. -By default, Julia's package manager only uses the "General" package registry. - -To add the BioJulia registry from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/), press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: -```julia -registry add https://github.com/BioJulia/BioJuliaRegistry.git -``` - -After adding the registry to your configuration, you can install GenomicFeatures while in [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/) with the following: +You can install the GenomicFeatures package from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/). +Press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: ```julia add GenomicFeatures ``` diff --git a/benchmark/Project.toml b/benchmark/Project.toml new file mode 100644 index 00000000..cb67d234 --- /dev/null +++ b/benchmark/Project.toml @@ -0,0 +1,9 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" +PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[compat] +GenomicFeatures = "2" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl new file mode 100644 index 00000000..b3aaa0ca --- /dev/null +++ b/benchmark/benchmarks.jl @@ -0,0 +1,55 @@ +using BenchmarkTools +# using BenchmarkTools: @benchmarkable, BenchmarkGroup + +using GenomicFeatures + +include(joinpath(@__DIR__, "..", "test", "Utilities.jl")) + +import ..Utilities: random_intervals + +N = 1000 +SEED = 1234 +SEQNAMES = "chr" .* string.(1:3) + +intervals = random_intervals(SEQNAMES, 1000, N, SEED) +intervals_sorted = sort(intervals) + +SUITE = BenchmarkGroup() + +let suite = SUITE["accessors"] = BenchmarkGroup() + s0 = suite["$(typeof(intervals))"] = BenchmarkGroup() + s0["seqname"] = @benchmarkable(seqname.($intervals)) + s0["leftposition"] = @benchmarkable(leftposition.($intervals)) + s0["rightposition"] = @benchmarkable(rightposition.($intervals)) + s0["strand"] = @benchmarkable(strand.($intervals)) + s0["metadata"] = @benchmarkable(metadata.($intervals)) +end + +let suite = SUITE["sort"] = BenchmarkGroup() + suite["$(typeof(intervals))"] = @benchmarkable(sort(i), setup=(i = copy($intervals))) +end + +let suite = SUITE["insert"] = BenchmarkGroup() + suite["shorthand"] = @benchmarkable(IntervalCollection($intervals_sorted)) + suite["type"] = @benchmarkable(IntervalCollection{Int}($intervals_sorted)) +end + +let suite = SUITE["push"] = BenchmarkGroup() + suite["$(typeof(intervals))"] = @benchmarkable([push!(col, i) for i in $intervals], setup=(col=IntervalCollection{Int}())) +end + +let suite = SUITE["eachoverlap"] = BenchmarkGroup() + intervals_a = intervals_sorted + intervals_b = sort(random_intervals(SEQNAMES, 1000, N, SEED+1)) + + col_a = IntervalCollection(intervals_a) + col_b = IntervalCollection(intervals_b) + + As = [intervals_a, col_a] + Bs = [intervals_b, col_b] + + for (A, B) in Iterators.product(As,Bs) + str = "$(typeof(A)), $(typeof(B))" + suite[str] = @benchmarkable(collect(eachoverlap($A,$B))) + end +end diff --git a/benchmark/runbenchmarks.jl b/benchmark/runbenchmarks.jl new file mode 100644 index 00000000..a9fa569b --- /dev/null +++ b/benchmark/runbenchmarks.jl @@ -0,0 +1,24 @@ +using Pkg + +Pkg.activate(@__DIR__) +Pkg.instantiate() + +Pkg.status() + +using PkgBenchmark + +results = benchmarkpkg( + dirname(@__DIR__), + BenchmarkConfig( + env = Dict( + "JULIA_NUM_THREADS" => "1", + "OMP_NUM_THREADS" => "1", + ), + ) +) + +dir_results = joinpath(@__DIR__, "results") +mkpath(dir_results) + +writeresults(joinpath(dir_results, "$(results.date).json"), results) +export_markdown(joinpath(dir_results, "$(results.date).md"), results) diff --git a/ci_prep.jl b/ci_prep.jl deleted file mode 100644 index f3a7535a..00000000 --- a/ci_prep.jl +++ /dev/null @@ -1,3 +0,0 @@ -using Pkg.Registry -Registry.add(Registry.RegistrySpec(url = "https://github.com/BioJulia/BioJuliaRegistry.git")) -Registry.add(Registry.RegistrySpec(url = "https://github.com/JuliaRegistries/General.git")) diff --git a/docs/make.jl b/docs/make.jl index 3fc235d4..d4576921 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -5,6 +5,8 @@ Pkg.instantiate() using Documenter, GenomicFeatures +DocMeta.setdocmeta!(GenomicFeatures, :DocTestSetup, :(using GenomicFeatures); recursive=true) + makedocs( format = Documenter.HTML( edit_link = :commit diff --git a/docs/src/index.md b/docs/src/index.md index b3ff905a..fa19db36 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -12,15 +12,8 @@ The GenomicFeatures package provides utilities for working with interval based g It builds on [IntervalTrees](https://github.com/biojulia/intervaltrees.jl) to provide a data-structures and algorithms for various formats such as [BED](https://github.com/biojulia/bed.jl), [GFF3](https://github.com/biojulia/gff3.jl), [bigWig](https://github.com/biojulia/bigwig.jl) and [bigBed](https://github.com/biojulia/bigbed.jl). ## Installation -Releases of GenomicFeatures version 2.0.0 and above are registered and made available to install through BioJulia's package registry. -By default, Julia's package manager only uses the "General" package registry. - -To add the BioJulia registry from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/), press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: -```julia -registry add https://github.com/BioJulia/BioJuliaRegistry.git -``` - -After adding the registry to your configuration, you can install GenomicFeatures while in [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/) with the following: +You can install the GenomicFeatures package from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/). +Press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: ```julia add GenomicFeatures ``` diff --git a/docs/src/man/intervals.md b/docs/src/man/intervals.md index 3cb7888c..de03a8c7 100644 --- a/docs/src/man/intervals.md +++ b/docs/src/man/intervals.md @@ -8,7 +8,7 @@ Intervals in `GenomicFeatures` are consistent with ranges in Julia: *1-based and When data is read from formats with different representations (i.e. 0-based and/or end-exclusive) they are always converted automatically. Similarly when writing data, you should not have to reason about off-by-one errors due to format differences while using functionality provided in `GenomicFeatures`. -The `Interval` type is defined as +The [`Interval`](@ref Interval) type is defined as ```julia struct Interval{T} <: IntervalTrees.AbstractInterval{Int64} seqname::String @@ -19,9 +19,9 @@ struct Interval{T} <: IntervalTrees.AbstractInterval{Int64} end ``` -The first three fields (`seqname`, `first`, and `last`) are mandatory arguments when constructing the `Interval` object. +The first three fields (`seqname`, `first`, and `last`) are mandatory arguments when constructing the [`Interval`](@ref Interval) object. The `seqname` field holds the sequence name associated with the interval. -The `first` and `last` fields are the leftmost and rightmost positions of the interval, which can be accessed with `leftposition` and `rightposition` functions, respectively. +The `first` and `last` fields are the leftmost and rightmost positions of the interval, which can be accessed with [`leftposition`](@ref leftposition) and [`rightposition`](@ref rightposition) functions, respectively. The `strand` field can take four kinds of values listed in the next table: @@ -32,12 +32,12 @@ The `strand` field can take four kinds of values listed in the next table: | `'-'` | `STRAND_NEG` | negative strand | | `'.'` | `STRAND_BOTH` | non-strand-specific feature | -`Interval` is parameterized on metadata type, which lets it efficiently and precisely be specialized to represent intervals from a variety of formats. +[`Interval`](@ref Interval) is parameterized on metadata type, which lets it efficiently and precisely be specialized to represent intervals from a variety of formats. The default strand and metadata value are `STRAND_BOTH` and `nothing`: -```jlcon +```jldoctest; setup = :(using GenomicFeatures) julia> Interval("chr1", 10000, 20000) -GenomicFeatures.Interval{Nothing}: +Interval{Nothing}: sequence name: chr1 leftmost position: 10000 rightmost position: 20000 @@ -45,19 +45,18 @@ GenomicFeatures.Interval{Nothing}: metadata: nothing julia> Interval("chr1", 10000, 20000, '+') -GenomicFeatures.Interval{Nothing}: +Interval{Nothing}: sequence name: chr1 leftmost position: 10000 rightmost position: 20000 strand: + metadata: nothing - ``` The following example shows all accessor functions for the five fields: -```jlcon +```jldoctest; setup = :(using GenomicFeatures) julia> i = Interval("chr1", 10000, 20000, '+', "some annotation") -GenomicFeatures.Interval{String}: +Interval{String}: sequence name: chr1 leftmost position: 10000 rightmost position: 20000 @@ -78,18 +77,18 @@ STRAND_POS julia> metadata(i) "some annotation" - ``` ## Collections of Intervals -Collections of intervals are represented using the `IntervalCollection` type, which is a general purpose indexed container for intervals. +Collections of intervals are represented using the [`IntervalCollection`](@ref IntervalCollection) type, which is a general purpose indexed container for intervals. It supports fast intersection operations as well as insertion, deletion, and sorted iteration. -Interval collections can be initialized by inserting elements one by one using `push!`. +Empty interval collections can be initialized, and intervals elements can be added to the collection one-by-one using `push!`. -```julia +```@example +using GenomicFeatures # hide # The type parameter (Nothing here) indicates the interval metadata type. col = IntervalCollection{Nothing}() @@ -98,18 +97,32 @@ for i in 1:100:10000 end ``` -Incrementally building an interval collection like this works, but `IntervalCollection` also has a bulk insertion constructor that is able to build the indexed data structure extremely efficiently from an array of intervals. +Incrementally building an interval collection like this works, but [`IntervalCollection`](@ref IntervalCollection) also has a bulk insertion constructor that is able to build the indexed data structure extremely efficiently from a sorted vector of intervals. -```julia +```jldoctest; setup = :(using GenomicFeatures), output = false col = IntervalCollection([Interval("chr1", i, i + 99) for i in 1:100:10000]) + +# output + +IntervalCollection{Nothing} with 100 intervals: + chr1:1-100 . nothing + chr1:101-200 . nothing + chr1:201-300 . nothing + chr1:301-400 . nothing + chr1:401-500 . nothing + chr1:501-600 . nothing + chr1:601-700 . nothing + chr1:701-800 . nothing + ⋮ + ``` -Building `IntervalCollections` in one shot like this should be preferred when it's convenient or speed is an issue. +Building [`IntervalCollection`](@ref IntervalCollection)s in one shot like this should be preferred when it's convenient or speed is an issue. ## Overlap Query -There are number of `eachoverlap` functions in the `GenomicFeatures` module. +There are number of [`eachoverlap`](@ref eachoverlap) functions in the `GenomicFeatures` module. They follow two patterns: - interval versus collection queries which return an iterator over intervals in the collection that overlap the query, and - collection versus collection queries which iterate over all pairs of overlapping intervals. @@ -118,7 +131,7 @@ They follow two patterns: eachoverlap ``` -The order of interval pairs is the same as the following nested loop but `eachoverlap` is often much faster: +The order of interval pairs is the same as the following nested loop but [`eachoverlap`](@ref eachoverlap) is often much faster: ```julia for a in intervals_a, b in intervals_b if isoverlapping(a, b) diff --git a/src/coverage.jl b/src/coverage.jl index 820a2fc0..814138cc 100644 --- a/src/coverage.jl +++ b/src/coverage.jl @@ -19,6 +19,23 @@ For example, given intervals like: This function would return a new set of disjoint intervals with annotated coverage like: [1][-2-][-1-][--2--][--1--] + +# Example + +```jldoctest +julia> intervals = [ + Interval("chr1", 1, 8), + Interval("chr1", 4, 20), + Interval("chr1", 14, 27)]; + +julia> coverage(intervals) +IntervalCollection{UInt32} with 5 intervals: + chr1:1-3 . 1 + chr1:4-8 . 2 + chr1:9-13 . 1 + chr1:14-20 . 2 + chr1:21-27 . 1 +``` """ function coverage(stream, seqname_isless::Function=isless) cov = IntervalCollection{UInt32}() diff --git a/src/interval.jl b/src/interval.jl index 41625773..f7c120c5 100644 --- a/src/interval.jl +++ b/src/interval.jl @@ -7,7 +7,18 @@ # License is MIT: https://github.com/BioJulia/Bio.jl/blob/master/LICENSE.md # Note, just to be clear: this shadows IntervalTrees.Interval -"A genomic interval specifies interval with some associated metadata" +""" + struct Interval{T} <: IntervalTrees.AbstractInterval{Int64} + +The first three fields (`seqname`, `first`, and `last`) are mandatory arguments when constructing the [`Interval`](@ref Interval) object. + +# Fields +- `seqname::String`: the sequence name associated with the interval. +- `first::Int64`: the leftmost position. +- `last::Int64`: the rightmost position. +- `strand::Strand`: the [`strand`](@ref Strand). +- `metadata::T` +""" struct Interval{T} <: IntervalTrees.AbstractInterval{Int64} seqname::String first::Int64 diff --git a/src/intervalcollection.jl b/src/intervalcollection.jl index 5ff7e55b..b862db5c 100644 --- a/src/intervalcollection.jl +++ b/src/intervalcollection.jl @@ -39,6 +39,7 @@ const ICTreeIntersection{T} = IntervalTrees.Intersection{Int64 const ICTreeIntersectionIterator{F,S,T} = IntervalTrees.IntersectionIterator{F,Int64,Interval{S},64,Interval{T},64} const ICTreeIntervalIntersectionIterator{F,T} = IntervalTrees.IntervalIntersectionIterator{F, Int64,Interval{T},64} +"An IntervalCollection is an efficiently stored and indexed set of annotated genomic intervals." mutable struct IntervalCollection{T} # Sequence name mapped to IntervalTree, which in turn maps intervals to a list of metadata. trees::Dict{String,ICTree{T}} @@ -51,11 +52,12 @@ mutable struct IntervalCollection{T} ordered_trees::Vector{ICTree{T}} ordered_trees_outdated::Bool + "Empty initaialzation." function IntervalCollection{T}() where T return new{T}(Dict{String,ICTree{T}}(), 0, ICTree{T}[], false) end - # Bulk insertion. + "Bulk insertion." function IntervalCollection{T}(intervals::AbstractVector{Interval{T}}, sort::Bool=false) where T if sort sort!(intervals) @@ -80,17 +82,26 @@ mutable struct IntervalCollection{T} end end -# Shorthand constructor. +""" + IntervalCollection(intervals::AbstractVector{Interval{T}}, sort::Bool=false) where T +Shorthand constructor. +""" function IntervalCollection(intervals::AbstractVector{Interval{T}}, sort::Bool=false) where T return IntervalCollection{T}(intervals, sort) end -# Constructor that offers conversion through collection. +""" + IntervalCollection{T}(data, sort::Bool=false) where T +Constructor that offers conversion through collection. +""" function IntervalCollection{T}(data, sort::Bool=false) where T return IntervalCollection(collect(Interval{T}, data), sort) end -# Constructor that guesses metadatatype, and offers conversion through collection. +""" + IntervalCollection(data, sort::Bool=false) +Constructor that guesses metadatatype, and offers conversion through collection. +""" function IntervalCollection(data, sort::Bool=false) return IntervalCollection(collect(Interval{metadatatype(data)}, data), sort) end diff --git a/src/strand.jl b/src/strand.jl index a3b63523..65648e23 100644 --- a/src/strand.jl +++ b/src/strand.jl @@ -6,9 +6,27 @@ # This file is a part of BioJulia. # License is MIT: https://github.com/BioJulia/Bio.jl/blob/master/LICENSE.md +""" +# Outer constructors +* [`Strand(strand::Char)`](@ref) +* [`Strand(strand::UInt8)`](@ref) + +[`Strand`](@ref) can take four kinds of values listed in the next table: + +| Symbol | Constant | Meaning | +| :----- | :-------------------- | :-------------------------------- | +| `'?'` | [`STRAND_NA`](@ref) | strand is unknown or inapplicable | +| `'+'` | [`STRAND_POS`](@ref) | positive strand | +| `'-'` | [`STRAND_NEG`](@ref) | negative strand | +| `'.'` | [`STRAND_BOTH`](@ref) | non-strand-specific feature | +""" primitive type Strand 8 end Base.convert(::Type{Strand}, strand::UInt8) = reinterpret(Strand, strand) + +""" + Strand(strand::UInt8) +""" Strand(strand::UInt8) = convert(Strand, strand) Base.convert(::Type{UInt8}, strand::Strand) = reinterpret(UInt8, strand) @@ -45,6 +63,10 @@ function Base.convert(::Type{Strand}, strand::Char) error("'$(strand)' is not a valid strand") end + +""" + Strand(strand::Char) +""" Strand(strand::Char) = convert(Strand, strand) function Base.convert(::Type{Char}, strand::Strand) diff --git a/test/Utilities.jl b/test/Utilities.jl new file mode 100644 index 00000000..ce68913a --- /dev/null +++ b/test/Utilities.jl @@ -0,0 +1,32 @@ +module Utilities + +using Distributions +using GenomicFeatures +using Random + +# Generate an array of n random Interval{Int} object. With sequence names +# samples from seqnames, and intervals drawn to lie in [1, maxpos]. +function random_intervals(seqnames::Vector{String}, maxpos::Int, n::Int) + seq_dist = Categorical(length(seqnames)) + strand_dist = Categorical(2) + length_dist = Normal(1000, 1000) + intervals = Vector{Interval{Int}}(undef, n) + for i in 1:n + intlen = maxpos + while intlen >= maxpos || intlen <= 0 + intlen = ceil(Int, rand(length_dist)) + end + first = rand(1:maxpos-intlen) + last = first + intlen - 1 + strand = rand(strand_dist) == 1 ? STRAND_POS : STRAND_NEG + intervals[i] = Interval{Int}(seqnames[rand(seq_dist)], first, last, strand, i) + end + return intervals +end + +function random_intervals(seqnames::Vector{String}, maxpos::Int, n::Int, seed::Int) + Random.seed!(seed) + return random_intervals(seqnames, maxpos, n) +end + +end # module Utilities diff --git a/test/runtests.jl b/test/runtests.jl index be6d8bcb..a6e387df 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,9 +1,11 @@ using GenomicFeatures using Test -using Distributions import Random +include(joinpath(@__DIR__, "Utilities.jl")) +import .Utilities: random_intervals + # Test that an array of intervals is well ordered function is_all_ordered(intervals::Vector{I}) where I <: Interval @@ -15,27 +17,6 @@ function is_all_ordered(intervals::Vector{I}) where I <: Interval return true end -# Generate an array of n random Interval{Int} object. With sequence names -# samples from seqnames, and intervals drawn to lie in [1, maxpos]. -function random_intervals(seqnames, maxpos::Int, n::Int) - seq_dist = Categorical(length(seqnames)) - strand_dist = Categorical(2) - length_dist = Normal(1000, 1000) - intervals = Vector{Interval{Int}}(undef, n) - for i in 1:n - intlen = maxpos - while intlen >= maxpos || intlen <= 0 - intlen = ceil(Int, rand(length_dist)) - end - first = rand(1:maxpos-intlen) - last = first + intlen - 1 - strand = rand(strand_dist) == 1 ? STRAND_POS : STRAND_NEG - intervals[i] = Interval{Int}(seqnames[rand(seq_dist)], - first, last, strand, i) - end - return intervals -end - # A simple interval intersection implementation to test against. function simple_intersection(intervals_a, intervals_b; filter=(a,b)->true) sort!(intervals_a)