From 101e7927329fb075cfe7fcb91f97694036a73509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Mon, 25 May 2020 12:21:01 +1000 Subject: [PATCH 1/9] Update CI and documentation for the general registry - Julia LTS - Name tweaks --- .github/workflows/CompatHelper.yml | 19 +++---------------- .github/workflows/Documentation.yml | 14 ++++++-------- .github/workflows/TagBot.yml | 3 +-- .github/workflows/UnitTests.yml | 24 +++++++++++++++++------- Project.toml | 2 +- README.md | 11 ++--------- ci_prep.jl | 3 --- docs/src/index.md | 11 ++--------- 8 files changed, 32 insertions(+), 55 deletions(-) delete mode 100644 ci_prep.jl diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index 8f165e50..369f736d 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -9,22 +9,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Add CompatHelper - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")' - name: Run CompatHelper env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: julia -e ' - using CompatHelper, Pkg; - my_registries = [ - Pkg.RegistrySpec( - name = "BioJuliaRegistry", - uuid = "ccbd2cc2-2954-11e9-1ccf-f3e7900901ca", - url = "https://github.com/BioJulia/BioJuliaRegistry.git" - ), - Pkg.RegistrySpec( - name = "General", - uuid = "23338594-aafe-5451-b93e-139f81909106", - url = "https://github.com/JuliaRegistries/General.git" - ) - ]; - CompatHelper.main(; registries = my_registries, master_branch = "master");' + COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} + run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")' diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index 9c6d41d5..98ec516d 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -1,4 +1,4 @@ -name: Documentation +name: Build Documentation on: push: @@ -14,14 +14,12 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@latest + - uses: julia-actions/setup-julia@v1 with: - version: '1.4' - - name: Install dependencies - run: | - julia ci_prep.jl; - julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - - name: Build and deploy + version: '1' + - name: Install Dependencies + run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + - name: Build and Deploy env: # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index e65374b0..32f5b036 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,7 +1,7 @@ name: TagBot on: schedule: - - cron: '0 * * * *' + - cron: 0 0 * * * jobs: TagBot: runs-on: ubuntu-latest @@ -10,4 +10,3 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} ssh: ${{ secrets.TAGBOT_KEY }} - registry: BioJulia/BioJuliaRegistry \ No newline at end of file diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 618c87e3..99ca6c23 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -1,4 +1,4 @@ -name: Unit tests +name: Unit Tests on: - push @@ -7,20 +7,30 @@ on: jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.experimental }} strategy: + fail-fast: false matrix: - julia-version: ['1.1', '1.2', '1.3', '1.4'] + julia-version: + - '1.0' # LTS + - '1' julia-arch: [x86] os: [ubuntu-latest, windows-latest, macOS-latest] + experimental: [false] + include: + - julia-version: nightly + julia-arch: x86 + os: ubuntu-latest + experimental: true steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 + - name: Checkout Repository + uses: actions/checkout@v2 + - name: Setup Julia + uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.julia-version }} - - name: Install dependencies - run: julia ci_prep.jl - - name: Run tests + - name: Run Tests uses: julia-actions/julia-runtest@latest - name: Create CodeCov uses: julia-actions/julia-processcoverage@v1 diff --git a/Project.toml b/Project.toml index 32537bc7..089df463 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,7 @@ IntervalTrees = "524e6230-43b7-53ae-be76-1e9e4d08d11b" BioGenerics = "0.1" DataStructures = "0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17" IntervalTrees = "1.0" -julia = "1.1" +julia = "1" [extras] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/README.md b/README.md index 4213f5a7..7b536131 100644 --- a/README.md +++ b/README.md @@ -12,15 +12,8 @@ The GenomicFeatures package provides utilities for working with interval based g It builds on [IntervalTrees](https://github.com/biojulia/intervaltrees.jl) to provide a data-structures and algorithms for various formats such as [BED](https://github.com/biojulia/bed.jl), [GFF3](https://github.com/biojulia/gff3.jl), [bigWig](https://github.com/biojulia/bigwig.jl) and [bigBed](https://github.com/biojulia/bigbed.jl). ## Installation -Releases of GenomicFeatures version 2.0.0 and above are registered and made available to install through BioJulia's package registry. -By default, Julia's package manager only uses the "General" package registry. - -To add the BioJulia registry from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/), press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: -```julia -registry add https://github.com/BioJulia/BioJuliaRegistry.git -``` - -After adding the registry to your configuration, you can install GenomicFeatures while in [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/) with the following: +You can install the GenomicFeatures package from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/). +Press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: ```julia add GenomicFeatures ``` diff --git a/ci_prep.jl b/ci_prep.jl deleted file mode 100644 index f3a7535a..00000000 --- a/ci_prep.jl +++ /dev/null @@ -1,3 +0,0 @@ -using Pkg.Registry -Registry.add(Registry.RegistrySpec(url = "https://github.com/BioJulia/BioJuliaRegistry.git")) -Registry.add(Registry.RegistrySpec(url = "https://github.com/JuliaRegistries/General.git")) diff --git a/docs/src/index.md b/docs/src/index.md index b3ff905a..fa19db36 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -12,15 +12,8 @@ The GenomicFeatures package provides utilities for working with interval based g It builds on [IntervalTrees](https://github.com/biojulia/intervaltrees.jl) to provide a data-structures and algorithms for various formats such as [BED](https://github.com/biojulia/bed.jl), [GFF3](https://github.com/biojulia/gff3.jl), [bigWig](https://github.com/biojulia/bigwig.jl) and [bigBed](https://github.com/biojulia/bigbed.jl). ## Installation -Releases of GenomicFeatures version 2.0.0 and above are registered and made available to install through BioJulia's package registry. -By default, Julia's package manager only uses the "General" package registry. - -To add the BioJulia registry from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/), press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: -```julia -registry add https://github.com/BioJulia/BioJuliaRegistry.git -``` - -After adding the registry to your configuration, you can install GenomicFeatures while in [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/) with the following: +You can install the GenomicFeatures package from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/). +Press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: ```julia add GenomicFeatures ``` From 20f418019af2e0d5af65c918a9dc291e6a9c3d36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Wed, 10 Jun 2020 02:56:38 +1000 Subject: [PATCH 2/9] Add Benchmarks - Local benchmark script --- .gitignore | 4 ++ benchmark/Project.toml | 9 +++++ benchmark/benchmarks.jl | 80 ++++++++++++++++++++++++++++++++++++++ benchmark/runbenchmarks.jl | 24 ++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 benchmark/Project.toml create mode 100644 benchmark/benchmarks.jl create mode 100644 benchmark/runbenchmarks.jl diff --git a/.gitignore b/.gitignore index 61654834..ac9256cb 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,7 @@ docs/site/ Manifest.toml .DS_Store + +.benchmarkci +benchmark/*.json +benchmark/results diff --git a/benchmark/Project.toml b/benchmark/Project.toml new file mode 100644 index 00000000..cb67d234 --- /dev/null +++ b/benchmark/Project.toml @@ -0,0 +1,9 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" +PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[compat] +GenomicFeatures = "2" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl new file mode 100644 index 00000000..5df8fb0d --- /dev/null +++ b/benchmark/benchmarks.jl @@ -0,0 +1,80 @@ +using BenchmarkTools +# using BenchmarkTools: @benchmarkable, BenchmarkGroup + +using GenomicFeatures + +include(joinpath(@__DIR__, "..", "test", "Utilities.jl")) + +import ..Utilities: random_intervals + +N = 1000 +SEED = 1234 +SEQNAMES = "chr" .* string.(1:3) + +# Generate an array of n random Interval{Int} object. With sequence names +# samples from seqnames, and intervals drawn to lie in [1, maxpos]. +function random_intervals(seqnames::Vector{String}, maxpos::Int, n::Int) + seq_dist = Categorical(length(seqnames)) + strand_dist = Categorical(2) + length_dist = Normal(1000, 1000) + intervals = Vector{Interval{Int}}(undef, n) + for i in 1:n + intlen = maxpos + while intlen >= maxpos || intlen <= 0 + intlen = ceil(Int, rand(length_dist)) + end + first = rand(1:maxpos-intlen) + last = first + intlen - 1 + strand = rand(strand_dist) == 1 ? STRAND_POS : STRAND_NEG + intervals[i] = Interval{Int}(seqnames[rand(seq_dist)], first, last, strand, i) + end + return intervals +end + +function random_intervals(seqnames::Vector{String}, maxpos::Int, n::Int, seed::Int) + Random.seed!(seed) + return random_intervals(seqnames, maxpos, n) +end + +intervals = random_intervals(SEQNAMES, 1000, N, SEED) +intervals_sorted = sort(intervals) + +SUITE = BenchmarkGroup() + +let suite = SUITE["accessors"] = BenchmarkGroup() + s0 = suite["$(typeof(intervals))"] = BenchmarkGroup() + s0["seqname"] = @benchmarkable(seqname.($intervals)) + s0["leftposition"] = @benchmarkable(leftposition.($intervals)) + s0["rightposition"] = @benchmarkable(rightposition.($intervals)) + s0["strand"] = @benchmarkable(strand.($intervals)) + s0["metadata"] = @benchmarkable(metadata.($intervals)) +end + +let suite = SUITE["sort"] = BenchmarkGroup() + suite["$(typeof(intervals))"] = @benchmarkable(sort(i), setup=(i = copy($intervals))) +end + +let suite = SUITE["insert"] = BenchmarkGroup() + suite["shorthand"] = @benchmarkable(IntervalCollection($intervals_sorted)) + suite["type"] = @benchmarkable(IntervalCollection{Int}($intervals_sorted)) +end + +let suite = SUITE["push"] = BenchmarkGroup() + suite["$(typeof(intervals))"] = @benchmarkable([push!(col, i) for i in $intervals], setup=(col=IntervalCollection{Int}())) +end + +let suite = SUITE["eachoverlap"] = BenchmarkGroup() + intervals_a = intervals_sorted + intervals_b = sort(random_intervals(SEQNAMES, 1000, N, SEED+1)) + + col_a = IntervalCollection(intervals_a) + col_b = IntervalCollection(intervals_b) + + As = [intervals_a, col_a] + Bs = [intervals_b, col_b] + + for (A, B) in Iterators.product(As,Bs) + str = "$(typeof(A)), $(typeof(B))" + suite[str] = @benchmarkable(collect(eachoverlap($A,$B))) + end +end diff --git a/benchmark/runbenchmarks.jl b/benchmark/runbenchmarks.jl new file mode 100644 index 00000000..a9fa569b --- /dev/null +++ b/benchmark/runbenchmarks.jl @@ -0,0 +1,24 @@ +using Pkg + +Pkg.activate(@__DIR__) +Pkg.instantiate() + +Pkg.status() + +using PkgBenchmark + +results = benchmarkpkg( + dirname(@__DIR__), + BenchmarkConfig( + env = Dict( + "JULIA_NUM_THREADS" => "1", + "OMP_NUM_THREADS" => "1", + ), + ) +) + +dir_results = joinpath(@__DIR__, "results") +mkpath(dir_results) + +writeresults(joinpath(dir_results, "$(results.date).json"), results) +export_markdown(joinpath(dir_results, "$(results.date).md"), results) From 481fe0d10669e168379b11d156cdca621e48aa4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Wed, 10 Jun 2020 03:00:14 +1000 Subject: [PATCH 3/9] Benchmark workflow Conditionally posts benchmark results. --- .github/workflows/Benchmark.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/Benchmark.yml diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml new file mode 100644 index 00000000..26131cad --- /dev/null +++ b/.github/workflows/Benchmark.yml @@ -0,0 +1,27 @@ +name: Run Benchmarks + +on: + - push + - pull_request + +jobs: + Benchmark: + runs-on: ubuntu-latest + env: + JULIA_DEBUG: BenchmarkCI + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: 1 + - name: Install Dependencies + run: julia -e 'using Pkg; pkg"add PkgBenchmark BenchmarkCI@0.1"' + - name: Run Benchmarks + run: julia -e "using BenchmarkCI; BenchmarkCI.judge()" + - name: Post Results + if: ${{ github.event_name == 'pull_request'}} + run: julia -e "using BenchmarkCI; BenchmarkCI.postjudge()" + - name: Print Judgement + run: julia -e 'using BenchmarkCI; BenchmarkCI.displayjudgement()' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From c10623f6a97f89e964bdc1d19c8f388dbdf362f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Wed, 10 Jun 2020 03:54:56 +1000 Subject: [PATCH 4/9] Move code common used for benchmarks and tests into Utilities.jl --- benchmark/benchmarks.jl | 25 ------------------------- test/Utilities.jl | 32 ++++++++++++++++++++++++++++++++ test/runtests.jl | 25 +++---------------------- 3 files changed, 35 insertions(+), 47 deletions(-) create mode 100644 test/Utilities.jl diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 5df8fb0d..b3aaa0ca 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -11,31 +11,6 @@ N = 1000 SEED = 1234 SEQNAMES = "chr" .* string.(1:3) -# Generate an array of n random Interval{Int} object. With sequence names -# samples from seqnames, and intervals drawn to lie in [1, maxpos]. -function random_intervals(seqnames::Vector{String}, maxpos::Int, n::Int) - seq_dist = Categorical(length(seqnames)) - strand_dist = Categorical(2) - length_dist = Normal(1000, 1000) - intervals = Vector{Interval{Int}}(undef, n) - for i in 1:n - intlen = maxpos - while intlen >= maxpos || intlen <= 0 - intlen = ceil(Int, rand(length_dist)) - end - first = rand(1:maxpos-intlen) - last = first + intlen - 1 - strand = rand(strand_dist) == 1 ? STRAND_POS : STRAND_NEG - intervals[i] = Interval{Int}(seqnames[rand(seq_dist)], first, last, strand, i) - end - return intervals -end - -function random_intervals(seqnames::Vector{String}, maxpos::Int, n::Int, seed::Int) - Random.seed!(seed) - return random_intervals(seqnames, maxpos, n) -end - intervals = random_intervals(SEQNAMES, 1000, N, SEED) intervals_sorted = sort(intervals) diff --git a/test/Utilities.jl b/test/Utilities.jl new file mode 100644 index 00000000..ce68913a --- /dev/null +++ b/test/Utilities.jl @@ -0,0 +1,32 @@ +module Utilities + +using Distributions +using GenomicFeatures +using Random + +# Generate an array of n random Interval{Int} object. With sequence names +# samples from seqnames, and intervals drawn to lie in [1, maxpos]. +function random_intervals(seqnames::Vector{String}, maxpos::Int, n::Int) + seq_dist = Categorical(length(seqnames)) + strand_dist = Categorical(2) + length_dist = Normal(1000, 1000) + intervals = Vector{Interval{Int}}(undef, n) + for i in 1:n + intlen = maxpos + while intlen >= maxpos || intlen <= 0 + intlen = ceil(Int, rand(length_dist)) + end + first = rand(1:maxpos-intlen) + last = first + intlen - 1 + strand = rand(strand_dist) == 1 ? STRAND_POS : STRAND_NEG + intervals[i] = Interval{Int}(seqnames[rand(seq_dist)], first, last, strand, i) + end + return intervals +end + +function random_intervals(seqnames::Vector{String}, maxpos::Int, n::Int, seed::Int) + Random.seed!(seed) + return random_intervals(seqnames, maxpos, n) +end + +end # module Utilities diff --git a/test/runtests.jl b/test/runtests.jl index be6d8bcb..a6e387df 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,9 +1,11 @@ using GenomicFeatures using Test -using Distributions import Random +include(joinpath(@__DIR__, "Utilities.jl")) +import .Utilities: random_intervals + # Test that an array of intervals is well ordered function is_all_ordered(intervals::Vector{I}) where I <: Interval @@ -15,27 +17,6 @@ function is_all_ordered(intervals::Vector{I}) where I <: Interval return true end -# Generate an array of n random Interval{Int} object. With sequence names -# samples from seqnames, and intervals drawn to lie in [1, maxpos]. -function random_intervals(seqnames, maxpos::Int, n::Int) - seq_dist = Categorical(length(seqnames)) - strand_dist = Categorical(2) - length_dist = Normal(1000, 1000) - intervals = Vector{Interval{Int}}(undef, n) - for i in 1:n - intlen = maxpos - while intlen >= maxpos || intlen <= 0 - intlen = ceil(Int, rand(length_dist)) - end - first = rand(1:maxpos-intlen) - last = first + intlen - 1 - strand = rand(strand_dist) == 1 ? STRAND_POS : STRAND_NEG - intervals[i] = Interval{Int}(seqnames[rand(seq_dist)], - first, last, strand, i) - end - return intervals -end - # A simple interval intersection implementation to test against. function simple_intersection(intervals_a, intervals_b; filter=(a,b)->true) sort!(intervals_a) From 4729ada47d2568d13c96d9402ba998a308e290d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Sat, 13 Jun 2020 22:26:33 +1000 Subject: [PATCH 5/9] Increment version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 089df463..a6003430 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GenomicFeatures" uuid = "899a7d2d-5c61-547b-bef9-6698a8d05446" authors = ["Kenta Sato ", "Ben J. Ward ", "Ciarán O’Mara "] -version = "2.0.2" +version = "2.0.3" [deps] BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" From b95741edb7271fb568391ee1163b2b3e34fd636f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Wed, 10 Jun 2020 15:16:59 +1000 Subject: [PATCH 6/9] Documentation tweaks --- docs/src/man/intervals.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/man/intervals.md b/docs/src/man/intervals.md index 3cb7888c..2e7b1052 100644 --- a/docs/src/man/intervals.md +++ b/docs/src/man/intervals.md @@ -87,7 +87,7 @@ julia> metadata(i) Collections of intervals are represented using the `IntervalCollection` type, which is a general purpose indexed container for intervals. It supports fast intersection operations as well as insertion, deletion, and sorted iteration. -Interval collections can be initialized by inserting elements one by one using `push!`. +Empty interval collections can be initialized, and intervals elements can be added to the collection one-by-one using `push!`. ```julia # The type parameter (Nothing here) indicates the interval metadata type. @@ -98,7 +98,7 @@ for i in 1:100:10000 end ``` -Incrementally building an interval collection like this works, but `IntervalCollection` also has a bulk insertion constructor that is able to build the indexed data structure extremely efficiently from an array of intervals. +Incrementally building an interval collection like this works, but `IntervalCollection` also has a bulk insertion constructor that is able to build the indexed data structure extremely efficiently from a sorted vector of intervals. ```julia col = IntervalCollection([Interval("chr1", i, i + 99) for i in 1:100:10000]) From 8a6c44c2ecbba7595f00b841cb0cf3d47bfdb9d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Sat, 13 Jun 2020 22:20:38 +1000 Subject: [PATCH 7/9] Add docstrings, jdoctests, and references - Strand docstrings - IntervalCollection docstrings - Interval docstring --- docs/make.jl | 2 ++ docs/src/man/intervals.md | 49 +++++++++++++++++++++++++-------------- src/interval.jl | 13 ++++++++++- src/intervalcollection.jl | 19 +++++++++++---- src/strand.jl | 22 ++++++++++++++++++ 5 files changed, 82 insertions(+), 23 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 3fc235d4..d4576921 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -5,6 +5,8 @@ Pkg.instantiate() using Documenter, GenomicFeatures +DocMeta.setdocmeta!(GenomicFeatures, :DocTestSetup, :(using GenomicFeatures); recursive=true) + makedocs( format = Documenter.HTML( edit_link = :commit diff --git a/docs/src/man/intervals.md b/docs/src/man/intervals.md index 2e7b1052..de03a8c7 100644 --- a/docs/src/man/intervals.md +++ b/docs/src/man/intervals.md @@ -8,7 +8,7 @@ Intervals in `GenomicFeatures` are consistent with ranges in Julia: *1-based and When data is read from formats with different representations (i.e. 0-based and/or end-exclusive) they are always converted automatically. Similarly when writing data, you should not have to reason about off-by-one errors due to format differences while using functionality provided in `GenomicFeatures`. -The `Interval` type is defined as +The [`Interval`](@ref Interval) type is defined as ```julia struct Interval{T} <: IntervalTrees.AbstractInterval{Int64} seqname::String @@ -19,9 +19,9 @@ struct Interval{T} <: IntervalTrees.AbstractInterval{Int64} end ``` -The first three fields (`seqname`, `first`, and `last`) are mandatory arguments when constructing the `Interval` object. +The first three fields (`seqname`, `first`, and `last`) are mandatory arguments when constructing the [`Interval`](@ref Interval) object. The `seqname` field holds the sequence name associated with the interval. -The `first` and `last` fields are the leftmost and rightmost positions of the interval, which can be accessed with `leftposition` and `rightposition` functions, respectively. +The `first` and `last` fields are the leftmost and rightmost positions of the interval, which can be accessed with [`leftposition`](@ref leftposition) and [`rightposition`](@ref rightposition) functions, respectively. The `strand` field can take four kinds of values listed in the next table: @@ -32,12 +32,12 @@ The `strand` field can take four kinds of values listed in the next table: | `'-'` | `STRAND_NEG` | negative strand | | `'.'` | `STRAND_BOTH` | non-strand-specific feature | -`Interval` is parameterized on metadata type, which lets it efficiently and precisely be specialized to represent intervals from a variety of formats. +[`Interval`](@ref Interval) is parameterized on metadata type, which lets it efficiently and precisely be specialized to represent intervals from a variety of formats. The default strand and metadata value are `STRAND_BOTH` and `nothing`: -```jlcon +```jldoctest; setup = :(using GenomicFeatures) julia> Interval("chr1", 10000, 20000) -GenomicFeatures.Interval{Nothing}: +Interval{Nothing}: sequence name: chr1 leftmost position: 10000 rightmost position: 20000 @@ -45,19 +45,18 @@ GenomicFeatures.Interval{Nothing}: metadata: nothing julia> Interval("chr1", 10000, 20000, '+') -GenomicFeatures.Interval{Nothing}: +Interval{Nothing}: sequence name: chr1 leftmost position: 10000 rightmost position: 20000 strand: + metadata: nothing - ``` The following example shows all accessor functions for the five fields: -```jlcon +```jldoctest; setup = :(using GenomicFeatures) julia> i = Interval("chr1", 10000, 20000, '+', "some annotation") -GenomicFeatures.Interval{String}: +Interval{String}: sequence name: chr1 leftmost position: 10000 rightmost position: 20000 @@ -78,18 +77,18 @@ STRAND_POS julia> metadata(i) "some annotation" - ``` ## Collections of Intervals -Collections of intervals are represented using the `IntervalCollection` type, which is a general purpose indexed container for intervals. +Collections of intervals are represented using the [`IntervalCollection`](@ref IntervalCollection) type, which is a general purpose indexed container for intervals. It supports fast intersection operations as well as insertion, deletion, and sorted iteration. Empty interval collections can be initialized, and intervals elements can be added to the collection one-by-one using `push!`. -```julia +```@example +using GenomicFeatures # hide # The type parameter (Nothing here) indicates the interval metadata type. col = IntervalCollection{Nothing}() @@ -98,18 +97,32 @@ for i in 1:100:10000 end ``` -Incrementally building an interval collection like this works, but `IntervalCollection` also has a bulk insertion constructor that is able to build the indexed data structure extremely efficiently from a sorted vector of intervals. +Incrementally building an interval collection like this works, but [`IntervalCollection`](@ref IntervalCollection) also has a bulk insertion constructor that is able to build the indexed data structure extremely efficiently from a sorted vector of intervals. -```julia +```jldoctest; setup = :(using GenomicFeatures), output = false col = IntervalCollection([Interval("chr1", i, i + 99) for i in 1:100:10000]) + +# output + +IntervalCollection{Nothing} with 100 intervals: + chr1:1-100 . nothing + chr1:101-200 . nothing + chr1:201-300 . nothing + chr1:301-400 . nothing + chr1:401-500 . nothing + chr1:501-600 . nothing + chr1:601-700 . nothing + chr1:701-800 . nothing + ⋮ + ``` -Building `IntervalCollections` in one shot like this should be preferred when it's convenient or speed is an issue. +Building [`IntervalCollection`](@ref IntervalCollection)s in one shot like this should be preferred when it's convenient or speed is an issue. ## Overlap Query -There are number of `eachoverlap` functions in the `GenomicFeatures` module. +There are number of [`eachoverlap`](@ref eachoverlap) functions in the `GenomicFeatures` module. They follow two patterns: - interval versus collection queries which return an iterator over intervals in the collection that overlap the query, and - collection versus collection queries which iterate over all pairs of overlapping intervals. @@ -118,7 +131,7 @@ They follow two patterns: eachoverlap ``` -The order of interval pairs is the same as the following nested loop but `eachoverlap` is often much faster: +The order of interval pairs is the same as the following nested loop but [`eachoverlap`](@ref eachoverlap) is often much faster: ```julia for a in intervals_a, b in intervals_b if isoverlapping(a, b) diff --git a/src/interval.jl b/src/interval.jl index 41625773..f7c120c5 100644 --- a/src/interval.jl +++ b/src/interval.jl @@ -7,7 +7,18 @@ # License is MIT: https://github.com/BioJulia/Bio.jl/blob/master/LICENSE.md # Note, just to be clear: this shadows IntervalTrees.Interval -"A genomic interval specifies interval with some associated metadata" +""" + struct Interval{T} <: IntervalTrees.AbstractInterval{Int64} + +The first three fields (`seqname`, `first`, and `last`) are mandatory arguments when constructing the [`Interval`](@ref Interval) object. + +# Fields +- `seqname::String`: the sequence name associated with the interval. +- `first::Int64`: the leftmost position. +- `last::Int64`: the rightmost position. +- `strand::Strand`: the [`strand`](@ref Strand). +- `metadata::T` +""" struct Interval{T} <: IntervalTrees.AbstractInterval{Int64} seqname::String first::Int64 diff --git a/src/intervalcollection.jl b/src/intervalcollection.jl index 5ff7e55b..b862db5c 100644 --- a/src/intervalcollection.jl +++ b/src/intervalcollection.jl @@ -39,6 +39,7 @@ const ICTreeIntersection{T} = IntervalTrees.Intersection{Int64 const ICTreeIntersectionIterator{F,S,T} = IntervalTrees.IntersectionIterator{F,Int64,Interval{S},64,Interval{T},64} const ICTreeIntervalIntersectionIterator{F,T} = IntervalTrees.IntervalIntersectionIterator{F, Int64,Interval{T},64} +"An IntervalCollection is an efficiently stored and indexed set of annotated genomic intervals." mutable struct IntervalCollection{T} # Sequence name mapped to IntervalTree, which in turn maps intervals to a list of metadata. trees::Dict{String,ICTree{T}} @@ -51,11 +52,12 @@ mutable struct IntervalCollection{T} ordered_trees::Vector{ICTree{T}} ordered_trees_outdated::Bool + "Empty initaialzation." function IntervalCollection{T}() where T return new{T}(Dict{String,ICTree{T}}(), 0, ICTree{T}[], false) end - # Bulk insertion. + "Bulk insertion." function IntervalCollection{T}(intervals::AbstractVector{Interval{T}}, sort::Bool=false) where T if sort sort!(intervals) @@ -80,17 +82,26 @@ mutable struct IntervalCollection{T} end end -# Shorthand constructor. +""" + IntervalCollection(intervals::AbstractVector{Interval{T}}, sort::Bool=false) where T +Shorthand constructor. +""" function IntervalCollection(intervals::AbstractVector{Interval{T}}, sort::Bool=false) where T return IntervalCollection{T}(intervals, sort) end -# Constructor that offers conversion through collection. +""" + IntervalCollection{T}(data, sort::Bool=false) where T +Constructor that offers conversion through collection. +""" function IntervalCollection{T}(data, sort::Bool=false) where T return IntervalCollection(collect(Interval{T}, data), sort) end -# Constructor that guesses metadatatype, and offers conversion through collection. +""" + IntervalCollection(data, sort::Bool=false) +Constructor that guesses metadatatype, and offers conversion through collection. +""" function IntervalCollection(data, sort::Bool=false) return IntervalCollection(collect(Interval{metadatatype(data)}, data), sort) end diff --git a/src/strand.jl b/src/strand.jl index a3b63523..65648e23 100644 --- a/src/strand.jl +++ b/src/strand.jl @@ -6,9 +6,27 @@ # This file is a part of BioJulia. # License is MIT: https://github.com/BioJulia/Bio.jl/blob/master/LICENSE.md +""" +# Outer constructors +* [`Strand(strand::Char)`](@ref) +* [`Strand(strand::UInt8)`](@ref) + +[`Strand`](@ref) can take four kinds of values listed in the next table: + +| Symbol | Constant | Meaning | +| :----- | :-------------------- | :-------------------------------- | +| `'?'` | [`STRAND_NA`](@ref) | strand is unknown or inapplicable | +| `'+'` | [`STRAND_POS`](@ref) | positive strand | +| `'-'` | [`STRAND_NEG`](@ref) | negative strand | +| `'.'` | [`STRAND_BOTH`](@ref) | non-strand-specific feature | +""" primitive type Strand 8 end Base.convert(::Type{Strand}, strand::UInt8) = reinterpret(Strand, strand) + +""" + Strand(strand::UInt8) +""" Strand(strand::UInt8) = convert(Strand, strand) Base.convert(::Type{UInt8}, strand::Strand) = reinterpret(UInt8, strand) @@ -45,6 +63,10 @@ function Base.convert(::Type{Strand}, strand::Char) error("'$(strand)' is not a valid strand") end + +""" + Strand(strand::Char) +""" Strand(strand::Char) = convert(Strand, strand) function Base.convert(::Type{Char}, strand::Strand) From 997132f94e654077a38c29f9c97570010cb2bfcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Wed, 10 Jun 2020 15:40:54 +1000 Subject: [PATCH 8/9] Coverage example --- src/coverage.jl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/coverage.jl b/src/coverage.jl index 820a2fc0..814138cc 100644 --- a/src/coverage.jl +++ b/src/coverage.jl @@ -19,6 +19,23 @@ For example, given intervals like: This function would return a new set of disjoint intervals with annotated coverage like: [1][-2-][-1-][--2--][--1--] + +# Example + +```jldoctest +julia> intervals = [ + Interval("chr1", 1, 8), + Interval("chr1", 4, 20), + Interval("chr1", 14, 27)]; + +julia> coverage(intervals) +IntervalCollection{UInt32} with 5 intervals: + chr1:1-3 . 1 + chr1:4-8 . 2 + chr1:9-13 . 1 + chr1:14-20 . 2 + chr1:21-27 . 1 +``` """ function coverage(stream, seqname_isless::Function=isless) cov = IntervalCollection{UInt32}() From d63fea96ef95da39038b95f7979d83bb403b637d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Sat, 13 Jun 2020 22:27:00 +1000 Subject: [PATCH 9/9] Add changelog entries --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0179c5fa..73d94a4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [2.0.3] - 2020-06-13 + +### Added +- Julia LTS Support +- Benchmarks + +### Changed +- Documentation. +- Updated CI for General Repository. + ## [2.0.2] - 2020-05-21 ### Fixed