From 801dc0ab3e785082deeaf1633c90ab7f6ab75a04 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 3 Dec 2024 13:22:11 -0800 Subject: [PATCH 1/6] TST: unit test for #983, wrap np.allclose --- biom/tests/test_table.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py index f22283f6..93065d24 100644 --- a/biom/tests/test_table.py +++ b/biom/tests/test_table.py @@ -2561,6 +2561,16 @@ def test_sort(self): with self.assertRaises(UnknownAxisError): t.sort(axis='foo') + def test_allclose(self): + self.assertTrue(self.st1.allclose(self.st1)) + self.assertTrue(self.st1.allclose(self.st2)) + self.assertFalse(self.st1.allclose(self.st3)) + + st4 = self.st1.copy() + st4._data.data += 0.0001 + self.assertFalse(self.st1.allclose(st4)) + self.assertTrue(self.st1.allclose(st4, atol=1e-1)) + def test_eq(self): """sparse equality""" self.assertTrue(self.st1 == self.st2) @@ -2573,9 +2583,9 @@ def test_eq(self): def test_data_equality(self): """check equality between tables""" - self.assertTrue(self.st1._data_equality(self.st2._data)) - self.assertTrue(self.st1._data_equality(self.st1._data)) - self.assertFalse(self.st1._data_equality(self.st3._data)) + self.assertTrue(self.st1._data_equality(self.st2)) + self.assertTrue(self.st1._data_equality(self.st1)) + self.assertFalse(self.st1._data_equality(self.st3)) def test_nonzero(self): """Return a list of nonzero positions""" From 7a0b1966e935164485ec8f963185ef37e944b23d Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 3 Dec 2024 13:22:34 -0800 Subject: [PATCH 2/6] API: support #983 by providing Table.allclose --- biom/table.py | 63 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/biom/table.py b/biom/table.py index 4b56ab56..ed8327ec 100644 --- a/biom/table.py +++ b/biom/table.py @@ -1839,28 +1839,72 @@ def descriptive_equality(self, other): return "Observation metadata are not the same" if not np.array_equal(self.metadata(), other.metadata()): return "Sample metadata are not the same" - if not self._data_equality(other._data): + if not self._data_equality(other): return "Data elements are not the same" return "Tables appear equal" def __eq__(self, other): """Equality is determined by the data matrix, metadata, and IDs""" + if not self._data_equality_meta(other): + return False + + if not self._data_equality(other): + return False + + return True + + def allclose(self, other, **allclose_kwargs): + """Allow for almost equality testing using np.allclose + + Parameters + ---------- + other : biom.Table + The table to compare against. + allclose_kwargs : dict + Any keyword arguments to provide to np.allclose + + Returns + ------- + bool + Whether the two tables are equal within tolerance. + """ + if not self._data_equality_meta(other): + return False + + self_data = self._data.tocsr().data + other_data = other._data.tocsr().data + + return np.allclose(self_data, other_data, **allclose_kwargs) + + def _data_equality_meta(self, other): if not isinstance(other, self.__class__): return False + if self.type != other.type: return False + if not np.array_equal(self.ids(axis='observation'), other.ids(axis='observation')): return False + if not np.array_equal(self.ids(), other.ids()): return False + if not np.array_equal(self.metadata(axis='observation'), other.metadata(axis='observation')): return False + if not np.array_equal(self.metadata(), other.metadata()): return False - if not self._data_equality(other._data): + + if self._data.shape != other._data.shape: + return False + + if self._data.dtype != other._data.dtype: + return False + + if self._data.nnz != other._data.nnz: return False return True @@ -1879,19 +1923,10 @@ def _data_equality(self, other): necessary before performing the final comparison. """ - if self._data.shape != other.shape: - return False - - if self._data.dtype != other.dtype: - return False - - if self._data.nnz != other.nnz: - return False - - self._data = self._data.tocsr() - other = other.tocsr() + self_data = self._data.tocsr() + other_data = other._data.tocsr() - if (self._data != other).nnz > 0: + if (self_data != other_data).nnz > 0: return False return True From 1f884a1e3cfc334064b92960a18087f9dde1e076 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 3 Dec 2024 13:29:22 -0800 Subject: [PATCH 3/6] TST: unit test for #982, verify nan equality support --- biom/tests/test_table.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py index 93065d24..bc074454 100644 --- a/biom/tests/test_table.py +++ b/biom/tests/test_table.py @@ -2571,6 +2571,16 @@ def test_allclose(self): self.assertFalse(self.st1.allclose(st4)) self.assertTrue(self.st1.allclose(st4, atol=1e-1)) + st5 = self.st1.copy() + st6 = self.st1.copy() + + st5._data.data[0] = np.nan + st6._data.data[0] = np.nan + + self.assertFalse(st5.allclose(st6)) + self.assertFalse(st5.allclose(st6, atol=1e-1)) + self.assertTrue(st5.allclose(st6, equal_nan=True)) + def test_eq(self): """sparse equality""" self.assertTrue(self.st1 == self.st2) From ac0b93576f4ccf841014849a817a9bd2b1b56169 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 3 Dec 2024 13:29:56 -0800 Subject: [PATCH 4/6] DOC: note how #982 is provided by Table.allclose --- biom/table.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/biom/table.py b/biom/table.py index ed8327ec..7d028f3d 100644 --- a/biom/table.py +++ b/biom/table.py @@ -1864,6 +1864,10 @@ def allclose(self, other, **allclose_kwargs): allclose_kwargs : dict Any keyword arguments to provide to np.allclose + Notes + ----- + Specify `equal_nan=True` to allow Nan to test equal. + Returns ------- bool From e98ebb9acae9c51713cdd75da935fab34e858934 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 3 Dec 2024 13:31:56 -0800 Subject: [PATCH 5/6] DOC: note API addition in changelog --- ChangeLog.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 3e28019d..7eb229e1 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,6 +4,10 @@ BIOM-Format ChangeLog biom-2.1.16-dev --------------- +New Features: + +* `Table.allclose` is now available to provide almost equality support including equality of `nan` by wrapping NumPy's `allclose`. See issues [#982](https://github.com/biocore/biom-format/issues/982) and [#983](https://github.com/biocore/biom-format/issues/983). + Maintenance: * Python 3.7 and 3.8 removed from CI as they are [end-of-life](https://devguide.python.org/versions/). Python 3.13 added to CI. See PR[#986](https://github.com/biocore/biom-format/pull/986). From f1670059fa19047d883b5e373a575dd81b175a74 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Wed, 11 Dec 2024 08:44:48 -0800 Subject: [PATCH 6/6] DOC: shift docs slightly, thanks @amandabirmingham! --- biom/table.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/biom/table.py b/biom/table.py index 7d028f3d..18d9787f 100644 --- a/biom/table.py +++ b/biom/table.py @@ -1845,7 +1845,19 @@ def descriptive_equality(self, other): return "Tables appear equal" def __eq__(self, other): - """Equality is determined by the data matrix, metadata, and IDs""" + """Equality is determined by the data matrix, metadata, and IDs + + Matrices are equal iff the following items are equal: + - shape + - dtype + - size (nnz) + - matrix data (more expensive, so checked last) + + The sparse format does not need to be the same between the two + matrices. ``self`` and ``other`` will be converted to csr format if + necessary before performing the final comparison. + + """ if not self._data_equality_meta(other): return False @@ -1857,6 +1869,14 @@ def __eq__(self, other): def allclose(self, other, **allclose_kwargs): """Allow for almost equality testing using np.allclose + Matrices must have identical: + - shape + - dtype + - size (nnz) + + Assuming those properties are identical, the matrix data are then + tested for equality within tolerance using `np.allclose`. + Parameters ---------- other : biom.Table @@ -1914,19 +1934,6 @@ def _data_equality_meta(self, other): return True def _data_equality(self, other): - """Return ``True`` if both matrices are equal. - - Matrices are equal iff the following items are equal: - - shape - - dtype - - size (nnz) - - matrix data (more expensive, so checked last) - - The sparse format does not need to be the same between the two - matrices. ``self`` and ``other`` will be converted to csr format if - necessary before performing the final comparison. - - """ self_data = self._data.tocsr() other_data = other._data.tocsr()