Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IMP: adds distance-matrix-to-pd-series-transformer #354

Merged
merged 2 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions q2_types/distance_matrix/_deferred_setup/_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# ----------------------------------------------------------------------------

import skbio
import pandas as pd

from .. import LSMatFormat

Expand All @@ -24,3 +25,34 @@ def _1(data: skbio.DistanceMatrix) -> LSMatFormat:
@plugin.register_transformer
def _2(ff: LSMatFormat) -> skbio.DistanceMatrix:
return skbio.DistanceMatrix.read(str(ff), format='lsmat', verify=False)


@plugin.register_transformer
def _3(ff: LSMatFormat) -> pd.Series:
dm = skbio.DistanceMatrix.read(str(ff), format='lsmat', verify=False)
series = dm.to_series()
assert series.size != 0, ("Distance Matrix must contain more "
"than one sample")
return series


@plugin.register_transformer
def _4(data: pd.Series) -> LSMatFormat:
ids = data.index.get_level_values(0).unique().union(
data.index.get_level_values(1).unique(), sort=False).values
dm_df = pd.DataFrame(data=[], index=ids, columns=ids)
for index, row in dm_df.iterrows():
dm_df.loc[index, index] = float(0)
for col in dm_df.columns:
if dm_df.loc[index, col] != 0:
try:
dm_df.loc[index, col] = data[index, col]
dm_df.loc[col, index] = data[index, col]
except KeyError:
dm_df.loc[index, col] = data[col, index]
dm_df.loc[col, index] = data[col, index]
dm = skbio.DistanceMatrix(dm_df, ids=dm_df.index)
ff = LSMatFormat()
with ff.open() as fh:
dm.write(fh, format='lsmat')
return ff
31 changes: 31 additions & 0 deletions q2_types/distance_matrix/tests/test_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import unittest

import skbio
import pandas as pd

from q2_types.distance_matrix import LSMatFormat
from qiime2.plugin.testing import TestPluginBase
Expand Down Expand Up @@ -40,6 +41,36 @@ def test_lsmat_format_to_skbio_distance_matrix(self):
exp = skbio.DistanceMatrix.read(str(input))
self.assertEqual(obs, exp)

def test_lsmat_format_to_pd_series(self):
filenames = ('distance-matrix-2x2.tsv',
'distance-matrix-NxN.tsv')
for filename in filenames:
input, obs = self.transform_format(LSMatFormat,
pd.Series,
filename=filename)
exp = skbio.DistanceMatrix.read(str(input)).to_series()
pd.testing.assert_series_equal(obs, exp)

def test_lsmat_format_to_pd_series_1x1(self):
filename = 'distance-matrix-1x1.tsv'
with self.assertRaisesRegex(AssertionError, "Distance Matrix *"):
self.transform_format(LSMatFormat,
pd.Series,
filename=filename)

def test_pd_series_to_skbio_distance_matrix(self):
transformer = self.get_transformer(pd.Series, LSMatFormat)

filenames = ('distance-matrix-NxN.tsv', 'distance-matrix-2x2.tsv')
for filename in filenames:
input = skbio.DistanceMatrix.read(self.get_data_path(filename))
obs = transformer(input.to_series())
obs = skbio.DistanceMatrix.read(str(obs))

exp = input

self.assertEqual(obs, exp)


if __name__ == "__main__":
unittest.main()
Loading