Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: handle sparse matrix reading for big-endian byte order #163

Merged
merged 4 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/elisa/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
significance_lima,
)
from elisa.plot.misc import get_colors
from elisa.util.misc import to_native_byteorder

if TYPE_CHECKING:
NDArray = np.ndarray
Expand Down Expand Up @@ -1138,8 +1139,11 @@ def __init__(
photon_egrid = np.array(photon_egrid, dtype=np.float64, order='C')
channel_emin = np.array(channel_emin, dtype=np.float64, order='C')
channel_emax = np.array(channel_emax, dtype=np.float64, order='C')
response_matrix = coo_array(response_matrix)
response_matrix.eliminate_zeros()
if not isinstance(response_matrix, sparray):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question (bug_risk): Revisit the conditional conversion for response_matrix.

Currently, the conversion to native byte order is skipped if response_matrix is already an instance of sparray. Ensure that sparse matrices are always natively ordered or consider if they might also need conversion to avoid potential incompatibility issues with scipy.sparse.

# convert to native byteorder to be compatible with scipy.sparse
response_matrix = to_native_byteorder(response_matrix)
response_matrix = coo_array(response_matrix)
response_matrix.eliminate_zeros()
channel = np.array(channel, dtype=str, order='C')

photon_egrid_shape = np.shape(photon_egrid)
Expand Down
4 changes: 4 additions & 0 deletions src/elisa/data/ogip.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from scipy.sparse import coo_array

from elisa.data.base import ObservationData, ResponseData, SpectrumData
from elisa.util.misc import to_native_byteorder

if TYPE_CHECKING:
NDArray = np.ndarray
Expand Down Expand Up @@ -629,6 +630,9 @@ def _read_response(self, file: str, response_id: int) -> dict:
else:
reduced_matrix = np.hstack(matrix)

# convert to native byteorder to be compatible with scipy.sparse
reduced_matrix = to_native_byteorder(reduced_matrix)

sparse_matrix = coo_array(
(reduced_matrix, (rows, cols)),
shape=(len(response_data), channel_number),
Expand Down
10 changes: 10 additions & 0 deletions src/elisa/util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from collections.abc import Sequence
from typing import Callable, Literal, TypeVar

from numpy import ndarray as NDArray

from elisa.util.typing import CompEval

T = TypeVar('T')
Expand Down Expand Up @@ -571,3 +573,11 @@ def _wrapper_progress_bar(i, vals):
return _wrapper_progress_bar

return progress_bar_fori_loop


def to_native_byteorder(arr: NDArray) -> NDArray:
"""Convert an array to native byte order."""
if arr.dtype.byteorder != '=':
return arr.astype(arr.dtype.newbyteorder('='))
else:
return arr
41 changes: 41 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import numpy as np
import pytest
from jax.experimental.sparse import BCSR

from elisa.data.grouping import significance_gv, significance_lima
from elisa.data.ogip import Response, ResponseData
from elisa.models import PowerLaw


Expand Down Expand Up @@ -117,3 +120,41 @@ def test_data_grouping():
data.spec_counts, data.back_counts, data.back_errors, data.back_ratio
)
assert np.all(sig >= scale)


@pytest.mark.parametrize(
'file',
[
'docs/notebooks/data/P011160500104_LE.rsp',
'docs/notebooks/data/P011160500104_ME.rsp',
'docs/notebooks/data/P011160500104_HE.rsp',
],
)
def test_load_response(file):
# test Response against big-endian files
rsp = Response(file)
# test if the response matrix can be converted to a BCSR matrix in JAX
assert np.all(rsp.channel_fwhm > 0)
assert np.array_equal(
rsp.matrix,
BCSR.from_scipy_sparse(rsp.sparse_matrix).todense(),
)


def test_response():
# test ResponseData against different endianness
photon_egrid = np.linspace(1.0, 100.0, 101)
channel = np.arange(100)
channel_emin = photon_egrid[:-1]
channel_emax = photon_egrid[1:]
mat1 = np.eye(100).astype('<f4')
mat2 = np.eye(100).astype('>f4')
Comment on lines +150 to +151
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consider testing non-identity matrices.

Using identity matrices might not fully exercise the byte-swapping logic. Consider adding tests with more complex matrices to ensure all data is handled correctly.

r1 = ResponseData(photon_egrid, channel_emin, channel_emax, mat1, channel)
r2 = ResponseData(photon_egrid, channel_emin, channel_emax, mat2, channel)
# test if the response matrix can be converted to a BCSR matrix in JAX
for r in [r1, r2]:
assert np.array_equal(
r.matrix, BCSR.from_scipy_sparse(r.sparse_matrix).todense()
)
assert np.all(r1.channel == r2.channel)
assert np.all(r1.channel_fwhm == r2.channel_fwhm)