Skip to content

Commit

Permalink
Merge branch 'main' into version-bump
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG authored Aug 14, 2024
2 parents 48058c3 + ecff59b commit 3b9a7f2
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 8 deletions.
15 changes: 12 additions & 3 deletions psm_utils/io/tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@
from pydantic import ValidationError

from psm_utils.io._base_classes import ReaderBase, WriterBase
from psm_utils.io._utils import set_csv_field_size_limit
from psm_utils.io.exceptions import PSMUtilsIOException
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io._utils import set_csv_field_size_limit

set_csv_field_size_limit()

Expand All @@ -74,12 +74,21 @@ def __iter__(self):
"""Iterate over file and return PSMs one-by-one."""
with open(self.filename, "rt") as open_file:
reader = csv.DictReader(open_file, delimiter="\t")
failed_rows = 0
for row in reader:
try:
yield PSM(**self._parse_entry(row))
except ValidationError:
except ValidationError as e:
failed_rows += 1
logger.warning(f"Could not parse PSM from row: `{row}`")
continue
if failed_rows >= 3:
raise PSMUtilsIOException(
"Could not parse PSM from three consecutive rows. Verify that the "
"file is formatted correctly as a psm_utils TSV file or that the "
"correct file type reader is used."
) from e
else:
failed_rows = 0

@staticmethod
def _parse_entry(entry: dict) -> dict:
Expand Down
13 changes: 9 additions & 4 deletions psm_utils/peptidoform.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,23 @@ def __str__(self) -> str:
def __hash__(self) -> int:
return hash(self.proforma)

def __eq__(self, __o: Peptidoform) -> bool:
try:
def __eq__(self, __o: Union[Peptidoform, str]) -> bool:
if isinstance(__o, str):
return self.proforma == __o
elif isinstance(__o, Peptidoform):
return self.proforma == __o.proforma
except AttributeError:
raise NotImplementedError("Object is not a Peptidoform")
else:
raise TypeError(f"Cannot compare {type(__o)} with Peptidoform.")

def __iter__(self) -> Iterable[Tuple[str, Union[None, List[proforma.TagBase]]]]:
return self.parsed_sequence.__iter__()

def __len__(self) -> int:
return self.parsed_sequence.__len__()

def __getitem__(self, key: int) -> Tuple[str, Union[None, List[proforma.TagBase]]]:
return self.parsed_sequence.__getitem__(key)

@property
def proforma(self) -> str:
"""
Expand Down
4 changes: 4 additions & 0 deletions tests/test_data/test.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
spectrum_id peptidoform
peptide1 ACDEK/2
peptide2 AC[Carbamidomethyl]DEFGR/3
peptide3 [Acetyl]-AC[Carbamidomethyl]DEFGHIK/2
22 changes: 21 additions & 1 deletion tests/test_io/test_tsv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
"""Tests for psm_utils.io.tsv."""

from psm_utils.io.tsv import TSVReader, TSVWriter # noqa: F401
import pytest

from psm_utils.io.exceptions import PSMUtilsIOException
from psm_utils.io.tsv import TSVReader
from psm_utils.peptidoform import Peptidoform

test_cases = [
(
Expand Down Expand Up @@ -30,3 +34,19 @@ class TestTSVReader:
def test__parse_entry(self):
for test_in, expected_out in test_cases:
assert TSVReader._parse_entry(test_in) == expected_out

def test_iter(self):
reader = TSVReader("tests/test_data/test.tsv")
for psm in reader:
assert psm.peptidoform == Peptidoform("ACDEK/2")
assert psm.spectrum_id == "peptide1"
assert psm.provenance_data == {}
assert psm.metadata == {}
assert psm.rescoring_features == {}
break

def test_iter_raises(self):
with TSVReader("tests/test_data/peprec.tsv") as reader:
with pytest.raises(PSMUtilsIOException):
for psm in reader:
pass
33 changes: 33 additions & 0 deletions tests/test_peptidoform.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pytest
from pyteomics import proforma

from psm_utils.peptidoform import Peptidoform, format_number_as_string
Expand All @@ -18,6 +19,38 @@ def test__len__(self):
peptidoform = Peptidoform(test_case_in)
assert len(peptidoform) == expected_out

def test__eq__(self):
test_cases = [
("ACDEFGHIK", "ACDEFGHIK", True),
("ACDEFGHIK", "ACDEFGHI", False),
("ACDEFGHIK/2", "ACDEFGHIK/2", True),
("ACDEFGHIK/2", "ACDEFGHIK/3", False),
("[ac]-AC[cm]DEFGHIK", "[ac]-AC[cm]DEFGHIK", True),
("[ac]-AC[cm]DEFGHIK", "[ac]-AC[cm]DEFGH", False),
("[ac]-AC[cm]DEFGHIK", "[ac]-AC[cm]DEFGH", False),
("[ac]-AC[cm]DEFGHIK", "[ac]-AC[cm]DEFGH", False),
]

for test_case_in_1, test_case_in_2, expected_out in test_cases:
assert (Peptidoform(test_case_in_1) == test_case_in_2) == expected_out
assert (Peptidoform(test_case_in_1) == Peptidoform(test_case_in_2)) == expected_out

with pytest.raises(TypeError):
Peptidoform("ACDEFGHIK") == 1

def test__getitem__(self):
test_cases = [
("ACDEFGHIK", 0, ("A", None)),
("ACDEFGHIK", 8, ("K", None)),
("[ac]-AC[cm]DEFGHIK", 0, ("A", None)),
("[ac]-AC[cm]DEFGHIK", 1, ("C", [proforma.GenericModification("cm")])),
("[ac]-AC[cm]DEFGHIK", 8, ("K", None)),
]

for test_case_in, index, expected_out in test_cases:
peptidoform = Peptidoform(test_case_in)
assert peptidoform[index] == expected_out

def test__iter__(self):
for aa, mods in Peptidoform("ACDEM[U:35]K"):
assert isinstance(aa, str)
Expand Down

0 comments on commit 3b9a7f2

Please sign in to comment.