Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate changes #27

Merged
merged 2 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ If you do not have an RDS object handy, feel free to download one from [single-c
```python
from rds2py import as_summarized_experiment, read_rds

rObj = read_rds(<path_to_file>)
r_obj = read_rds(<path_to_file>)
```

Once we have a dictionary representation of the RDS file, we can now build useful Python representations from these objects.
This `r_obj` holds a dictionary representation of the RDS file, we can now transform this object into Python representations.

This `rObj` contains two keys
`rObj` always contains two keys

- `data`: If atomic entities, contains the numpy view of the memory space.
- `data`: If atomic entities, contains the NumPy view of the array.
- `attributes`: Additional properties available for the object.

The package provides friendly functions to easily convert a few R representations to Python.
In addition, the package provides functions to convert parsed R objects into Python representations.

```python
from rds2py import as_spase_matrix, as_summarized_experiment
Expand Down
12 changes: 7 additions & 5 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,8 @@ install_requires =
numpy
pandas
scipy
biocframe==0.3.20
singlecellexperiment==0.3.3
summarizedexperiment==0.3.7
anndata
mudata
singlecellexperiment>=0.4.1
summarizedexperiment>=0.4.1

[options.packages.find]
where = src
Expand All @@ -74,6 +71,11 @@ testing =
setuptools
pytest
pytest-cov
numpy
pandas
scipy
singlecellexperiment
summarizedexperiment

[options.entry_points]
# Add here console scripts like:
Expand Down
89 changes: 44 additions & 45 deletions src/rds2py/interface.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from typing import Literal, MutableMapping, Union
from typing import Literal

from numpy import ndarray
from pandas import DataFrame
from scipy.sparse import csc_matrix, csr_matrix
from singlecellexperiment import SingleCellExperiment
from summarizedexperiment import SummarizedExperiment
from biocframe import BiocFrame

from .parser import get_class
from .pdf import as_pandas_from_data_frame, as_pandas_from_dframe
Expand All @@ -14,20 +13,19 @@
__license__ = "MIT"


def as_pandas(robj: MutableMapping) -> DataFrame:
"""Read an R object as a :py:class:`~pandas.DataFrame`.
def as_pandas(robj):
"""Parse an R object as a :py:class:`~pandas.DataFrame`.

Currently supports ``DFrame`` or ``data.frame`` class objects from R.

Args:
robj (MutableMapping): Object parsed from the `RDS` file.
Usually the result of :py:func:`~rds2py.parser.read_rds`.
robj:
Object parsed from the `RDS` file.

Raises:
TypeError: Is not a valid class.
Usually the result of :py:func:`~rds2py.parser.read_rds`.

Returns:
DataFrame: A `DataFrame` containing the data from the R Object.
A :py:class:`~pandas.DataFrame` containing the data from the R Object.
"""
_cls = get_class(robj)

Expand All @@ -37,25 +35,26 @@ def as_pandas(robj: MutableMapping) -> DataFrame:
return as_pandas_from_data_frame(robj)
else:
raise TypeError(
f"`robj` must be either a 'DFrame' or 'data.frame' but is {_cls}"
f"`robj` must be either a 'DFrame' or 'data.frame' but is {_cls}."
)


def as_sparse_matrix(robj: MutableMapping) -> Union[csc_matrix, csc_matrix]:
"""Read an R object as a sparse matrix.
def as_sparse_matrix(robj):
"""Parse an R object as a sparse matrix.

Only supports reading of `dgCMatrix`, `dgRMatrix`, `dgTMatrix` marices.

Args:
robj (MutableMapping): Object parsed from the `RDS` file.
Usually the result of :py:func:`~rds2py.parser.read_rds`.
robj:
Object parsed from the `RDS` file.

Raises:
TypeError: If sparse representation in ``robj`` is not a supported class.
Usually the result of :py:func:`~rds2py.parser.read_rds`.

Returns:
Union[csc_matrix, csc_matrix]: A sparse matrix of the R object.
A sparse matrix of the R object.
"""
from scipy.sparse import csc_matrix, csr_matrix

_cls = get_class(robj)

if _cls not in ["dgCMatrix", "dgRMatrix", "dgTMatrix"]:
Expand Down Expand Up @@ -96,29 +95,31 @@ def as_sparse_matrix(robj: MutableMapping) -> Union[csc_matrix, csc_matrix]:
)


def as_dense_matrix(robj: MutableMapping, order: Literal["C", "F"] = "F") -> ndarray:
"""Read an R object as a :py:class:`~numpy.ndarray`.
def as_dense_matrix(robj, order: Literal["C", "F"] = "F") -> ndarray:
"""Parse an R object as a :py:class:`~numpy.ndarray`.

Args:
robj (MutableMapping): Object parsed from the `RDS` file.
robj:
Object parsed from the `RDS` file.

Usually the result of :py:func:`~rds2py.parser.read_rds`.

order (Literal["C", "F"]): Row-major (**C**-style) or Column-major (**F**ortran-style)
order. Defaults to "F".
order:
Row-major (**C**-style) or Column-major (**F**ortran-style)
order.

Raises:
TypeError: If ``robj`` does not contain a dense matrix.
Defaults to "F".

Returns:
ndarray: A dense ndarray of the R object.
An ``ndarray`` of the R object.
"""
_cls = get_class(robj)

if order not in ["C", "F"]:
raise ValueError("order must be either 'C' or 'F'.")

if _cls not in ["densematrix"]:
raise TypeError(f"obj is not a supported dense matrix format, but is `{_cls}`")
raise TypeError(f"obj is not a supported dense matrix format, but is `{_cls}`.")

return ndarray(
shape=tuple(robj["attributes"]["dim"]["data"].tolist()),
Expand All @@ -128,32 +129,30 @@ def as_dense_matrix(robj: MutableMapping, order: Literal["C", "F"] = "F") -> nda
)


def as_summarized_experiment(
robj: MutableMapping,
) -> Union[SummarizedExperiment, SingleCellExperiment]:
"""Read an R object as a :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment` or
def as_summarized_experiment(robj):
"""Parse an R object as a :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment` or
:py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.

Note: This function demonstrates how to parse a complex RDS object.
Note: This function demonstrates how to parse a complex RDS objects in Python and may not work for all
scenarios.

Args:
robj (MutableMapping): Object parsed from the `RDS` file.
Usually the result of :py:func:`~rds2py.parser.read_rds`.
robj:
Object parsed from the `RDS` file.

order (Literal["C", "F"]): Row-major (**C**-style) or Column-major (**F**ortran-style)
order.
Usually the result of :py:func:`~rds2py.parser.read_rds`.

Only used if the ``robj`` contains a :py:class:`~numpy.ndarray`.
order:
Row-major (**C**-style) or Column-major (**F**ortran-style)
order. Only used if the ``robj`` contains a :py:class:`~numpy.ndarray`.

Defaults to "F".

Raises:
TypeError: If ``robj`` is not a supported class.

Returns:
Union[SummarizedExperiment, SingleCellExperiment]: A `SummarizedExperiment` or
A `SummarizedExperiment` or
`SingleCellExperiment` from the R object.
"""

_cls = get_class(robj)

if _cls not in ["SingleCellExperiment", "SummarizedExperiment"]:
Expand Down Expand Up @@ -188,7 +187,7 @@ def as_summarized_experiment(
# parse coldata
robj_coldata = as_pandas_from_dframe(robj["attributes"]["colData"])
if robj_coldata.empty:
robj_coldata = DataFrame({"_cols": range(assay_dims[1])})
robj_coldata = BiocFrame({"_cols": range(assay_dims[1])})

# parse rowRanges
robj_rowdata = None
Expand All @@ -197,7 +196,7 @@ def as_summarized_experiment(
robj["attributes"]["rowRanges"]["attributes"]["elementMetadata"]
)
else:
robj_rowdata = DataFrame({"_rows": range(assay_dims[0])})
robj_rowdata = BiocFrame({"_rows": range(assay_dims[0])})

# check red. dims, alternative expts
robj_reduced_dims = None
Expand Down Expand Up @@ -243,13 +242,13 @@ def as_summarized_experiment(

if _cls == "SummarizedExperiment":
return SummarizedExperiment(
assays=robj_asys, row_data=robj_rowdata, col_data=robj_coldata
assays=robj_asys, row_data=robj_rowdata, column_data=robj_coldata
)
elif _cls == "SingleCellExperiment":
return SingleCellExperiment(
assays=robj_asys,
row_data=robj_rowdata,
col_data=robj_coldata,
column_data=robj_coldata,
alternative_experiments=robj_altExps,
reduced_dims=robj_reduced_dims,
)
Expand Down
28 changes: 12 additions & 16 deletions src/rds2py/pdf.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,23 @@
from typing import MutableMapping

from pandas import DataFrame

from .parser import get_class

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


def as_pandas_from_data_frame(robj: MutableMapping) -> DataFrame:
def as_pandas_from_data_frame(robj):
"""Read an R object to a :py:class:`~pandas.DataFrame`.

Args:
robj (MutableMapping): Object parsed from the `RDS` file.
Usually the result of :py:func:`~rds2py.parser.read_rds`.
robj:
Object parsed from the `RDS` file.

Raises:
TypeError: If ``robj`` is not a compatible class.
Usually the result of :py:func:`~rds2py.parser.read_rds`.

Returns:
DataFrame: A `DataFrame` from the R Object.
A `DataFrame` from the R Object.
"""
from pandas import DataFrame

cls = get_class(robj)

Expand All @@ -37,19 +33,19 @@ def as_pandas_from_data_frame(robj: MutableMapping) -> DataFrame:
return df


def as_pandas_from_dframe(robj: MutableMapping) -> DataFrame:
def as_pandas_from_dframe(robj):
"""Convert a realized R object to a pandas data frame representation.

Args:
robj (MutableMapping): Object parsed from the `RDS` file.
Usually the result of :py:func:`~rds2py.parser.read_rds`.
robj:
Object parsed from the `RDS` file.

Raises:
Exception: If ``robj`` does not contain any dataframe.
Usually the result of :py:func:`~rds2py.parser.read_rds`.

Returns:
DataFrame: A `DataFrame` from the R Object.
A `DataFrame` from the R Object.
"""
from pandas import DataFrame

cls = get_class(robj)

Expand Down
Loading