Skip to content

Commit

Permalink
Add reader and writer string macros (#14)
Browse files Browse the repository at this point in the history
This commit implements the `@rdr_str` and `@wtr_str` macros, which autodetect
the correct readers, writers and de/compressors to open a biological file
based on the extensions of the path.

The system is extensible to arbitrary biological formats, but the extensions
of compression formats are hardcoded in this package.

I also add a dubious overload to `Base.open`, such that the readers and writer
macros can be used like so:
```julia
open(rdr"foo.fna", wtr"bar.fq") do reader, writer
    ...
end
```
  • Loading branch information
jakobnissen authored Sep 27, 2023
1 parent 3c20b77 commit 28e7237
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "BioGenerics"
uuid = "47718e42-2ac5-11e9-14af-e5595289c2ea"
authors = ["Ben J. Ward <[email protected]>"]
version = "0.1.2"
version = "0.1.3"

[deps]
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
Expand Down
2 changes: 2 additions & 0 deletions src/BioGenerics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ include("IO.jl")
#include("RecordHelper.jl")
include("Testing.jl")

using .IO: readertype, writertype, @rdr_str, @wtr_str

end # module BioGenerics
111 changes: 110 additions & 1 deletion src/IO.jl
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,115 @@ function Base.open(::Type{T}, filepath::AbstractString, args...; kwargs_...) whe
return T(open(filepath, append ? "a" : "w"), args...; kwargs...)
end

#
# We have this un-extendable function here because we expect
# to not be able to control compression-related code, whereas we might be able to get
# PRs to biological readers
# That's also why we return code here instead of objects - BioGenerics does not need
# to know what GzipDecompressorStream is, so we just return a symbol that could be anything,
# and let the module that used the macro resolve it.
function de_compressor_code(ending::Union{String, SubString{String}}, read::Bool)
# TODO: It would be nice to have a good specialized BGZIP implementation...
if in(ending, ("gzip", "gz", "bgzip"))
read ? quote GzipDecompressorStream end : quote GzipCompressorStream end
elseif ending == "xz"
read ? quote XzDecompressorStream end : quote XzCompressorStream end
elseif ending == "zst"
read ? quote ZstdDecompressorStream end : quote ZstdCompressorStream end
else
nothing
end
end

"""
readertype(::Val{S}, arg)::T
Determine the type of reader that opens extension named by `Symbol` `S`.
For example, `readertype(::Val{:fa}, arg) = FASTA.Reader`.
Should be extended by developers making new biological file format readers.
The extra argument `arg` can be passed like so `rdr"path.ext"arg`, and defaults
to the empty string. This can be used to pass an additional argument that is specific
to the person implementing the reader.
"""
readertype(@nospecialize(v::Val{S}), arg) where S = error("Unknown biological file extension: \"$(string(S))\"")

"""
writertype(::Val{S}, arg)::T
Determine the type of reader that can write a file with an extension named by `Symbol` `S`.
For example, `writertype(::Val{:fa}, arg) = FASTA.Writer`.
Should be extended by developers making new biological file format writers.
The extra argument `arg` can be passed like so `wtr"path.ext"arg`, and defaults
to the empty string. This can be used to pass an additional argument that is specific
to the person implementing the writer.
"""
writertype(@nospecialize(v::Val{S}), arg) where S = error("Unknown biological file extension: \"$(string(S))\"")

# Like splitext, but removes the dot from the extension
function pure_ext(path::Union{String, SubString{String}})
(path, ext) = splitext(path)
ext = (!isempty(ext) && first(ext) == '.') ? ext[2:end] : ext
String(path), String(ext)
end

function resolve_reader(path::Union{String, SubString{String}}, arg::String)
code = quote open($(path); lock=false) end
(path, ext) = pure_ext(path)
while (wrapper = de_compressor_code(ext, true)) !== nothing
code = quote $(wrapper)($code) end
(path, ext) = pure_ext(path)
end
quote $(readertype(Val(Symbol(ext)), arg))($code) end
end

function resolve_writer(path::Union{String, SubString{String}}, arg::String)
code = quote open($(path), "w"; lock=false) end
(path, ext) = pure_ext(path)
while (wrapper = de_compressor_code(ext, false)) !== nothing
code = quote $(wrapper)($code) end
(path, ext) = pure_ext(path)
end
quote $(writertype(Val(Symbol(ext)), arg))($code) end
end

macro rdr_str(path, arg)
esc(resolve_reader(path, arg))
end

macro rdr_str(path)
esc(resolve_reader(path, ""))
end

macro wtr_str(path, arg)
esc(resolve_writer(path, arg))
end

macro wtr_str(path)
esc(resolve_writer(path, ""))
end

"""
Base.open(f, ios::Vararg{AbstractFormattedIO})
Execute `f(ios...)`, then `close` each io.
`close` is run even if `f(ios...)` throws an exception.
# Examples
```julia
julia> open(rdr"path/to/seqs.fna") do reader
# do something with reader
end
```
"""
function Base.open(f::Function, first::AbstractFormattedIO, rest::Vararg{AbstractFormattedIO})
try
f(first, rest...)
finally
for i in (first, rest...)
close(i)
end
end
end

end # module BioGenerics.IO

2 comments on commit 28e7237

@jakobnissen
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/92343

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.3 -m "<description of version>" 28e72370614d32e7e729676a5e911bb5cad6eee0
git push origin v0.1.3

Please sign in to comment.