Skip to content

Commit

Permalink
Support GRanges RDS objects (#32).
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche authored Jan 22, 2024
1 parent c3013da commit da939fd
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 0 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ install_requires =
scipy
singlecellexperiment>=0.4.1
summarizedexperiment>=0.4.1
genomicranges>=0.4.9

[options.packages.find]
where = src
Expand Down
111 changes: 111 additions & 0 deletions src/rds2py/granges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from genomicranges import GenomicRanges, SeqInfo
from iranges import IRanges
from biocframe import BiocFrame

from .parser import get_class
from .pdf import as_pandas_from_dframe

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def as_granges(robj):
"""Parse an R object as a :py:class:`~genomicranges.GenomicRanges.GenomicRanges`.
Args:
robj:
Object parsed from the `RDS` file.
Usually the result of :py:func:`~rds2py.parser.read_rds`.
Returns:
A ``GenomicRanges`` object.
"""
_cls = get_class(robj)

if _cls not in ["GenomicRanges", "GRanges"]:
raise TypeError(f"obj is not genomic ranges, but is `{_cls}`.")

_range_start = robj["attributes"]["ranges"]["attributes"]["start"]["data"]
_range_width = robj["attributes"]["ranges"]["attributes"]["width"]["data"]
_range_names = None
if "NAMES" in robj["attributes"]["ranges"]["attributes"]:
_range_names = robj["attributes"]["ranges"]["attributes"]["NAMES"]["data"]
_ranges = IRanges(_range_start, _range_width, names=_range_names)

_seqnames = _as_list(robj["attributes"]["seqnames"])

_strand_obj = robj["attributes"]["strand"]["attributes"]["values"]
_strands = _strand_obj["data"]
if "attributes" in _strands:
if "levels" in _strands["attributes"]:
_levels_data = _strands["attributes"]["levels"]["data"]
_strands = [_levels_data[x] for x in _strands]

_seqinfo_seqnames = robj["attributes"]["seqinfo"]["attributes"]["seqnames"]["data"]
_seqinfo_seqlengths = robj["attributes"]["seqinfo"]["attributes"]["seqlengths"][
"data"
]
_seqinfo_is_circular = robj["attributes"]["seqinfo"]["attributes"]["is_circular"][
"data"
]
_seqinfo_genome = robj["attributes"]["seqinfo"]["attributes"]["genome"]["data"]
_seqinfo = SeqInfo(
seqnames=_seqinfo_seqnames,
seqlengths=[None if x == -2147483648 else x for x in _seqinfo_seqlengths],
is_circular=[None if x == -2147483648 else x for x in _seqinfo_is_circular],
genome=_seqinfo_genome,
)

_mcols = BiocFrame.from_pandas(
as_pandas_from_dframe(robj["attributes"]["elementMetadata"])
)

_gr_names = None
if "NAMES" in robj["attributes"]:
_gr_names = robj["attributes"]["NAMES"]["data"]

return GenomicRanges(
seqnames=_seqnames,
ranges=_ranges,
names=_gr_names,
mcols=_mcols,
seqinfo=_seqinfo,
)


def _as_list(robj):
"""Parse an R object as a :py:class:`~list`.
Args:
robj:
Object parsed from the `RDS` file.
Usually the result of :py:func:`~rds2py.parser.read_rds`.
Returns:
A ``list`` of the Rle class.
"""
_cls = get_class(robj)

if _cls not in ["Rle"]:
raise TypeError(f"obj is not Rle, but is `{_cls}`.")

_attr_vals = robj["attributes"]
_data = _attr_vals["values"]["data"].tolist()
if "attributes" in _attr_vals["values"]:
if "levels" in _attr_vals["values"]["attributes"]:
_levels_data = _attr_vals["values"]["attributes"]["levels"]["data"]
_data = [_levels_data[x - 1] for x in _data]

if "lengths" in _attr_vals:
_final = []
_lengths = _attr_vals["lengths"]["data"]

for idx, lg in enumerate(_lengths.tolist()):
_final.extend([_data[idx]] * lg)

_data = _final

return _data
11 changes: 11 additions & 0 deletions tests/data/generate_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,14 @@ saveRDS(y, file="s4_matrix.rds")
setClass("FOO", slots=c(bar="integer"))
y <- new("FOO", bar=2L)
saveRDS(y, file="s4_class.rds")

# GenomicRanges

gr <- GRanges(
seqnames = Rle(c("chr1", "chr2", "chr1", "chr3"), c(1, 3, 2, 4)),
ranges = IRanges(101:110, end = 111:120, names = head(letters, 10)),
strand = Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
score = 1:10,
GC = seq(1, 0, length=10))

saveRDS(gr, file="granges.rds")
Binary file added tests/data/granges.rds
Binary file not shown.
18 changes: 18 additions & 0 deletions tests/test_granges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest

from rds2py.granges import as_granges
from rds2py.parser import read_rds

from genomicranges import GenomicRanges

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def test_granges():
robj = read_rds("tests/data/granges.rds")

gr = as_granges(robj=robj)

assert isinstance(gr, GenomicRanges)

0 comments on commit da939fd

Please sign in to comment.