From 099fd806d8884324106d0b85692f26abd56c075f Mon Sep 17 00:00:00 2001 From: Yuta Nagano <52748151+yutanagano@users.noreply.github.com> Date: Sun, 12 Jan 2025 11:19:36 +0000 Subject: [PATCH 1/2] junction standardisation operates on C and F ends independently --- src/tidytcells/junction/_standardize.py | 9 ++++++++- tests/test_junction.py | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/tidytcells/junction/_standardize.py b/src/tidytcells/junction/_standardize.py index d36fd2c..9cb21bb 100644 --- a/src/tidytcells/junction/_standardize.py +++ b/src/tidytcells/junction/_standardize.py @@ -150,10 +150,17 @@ def standardize( logger.warning( f"Failed to standardize {original_input}: not a valid junction sequence." ) + if on_fail == "reject": return None + return original_input - seq = "C" + seq + "F" + + if not seq.startswith("C"): + seq = "C" + seq + + if not JUNCTION_MATCHING_REGEX.match(seq): + seq = seq + "F" return seq diff --git a/tests/test_junction.py b/tests/test_junction.py index 52735fc..85ce845 100644 --- a/tests/test_junction.py +++ b/tests/test_junction.py @@ -30,8 +30,8 @@ def test_various_rejections(self, seq, caplog): ( ("casqyf", "CASQYF"), ("ASQY", "CASQYF"), - ("CASQY", "CCASQYF"), - ("ASQYF", "CASQYFF"), + ("CASQY", "CASQYF"), + ("ASQYF", "CASQYF"), ), ) def test_various_corrections(self, seq, expected): From f91170a5ef07a9af73a2478e6d866a127c452990 Mon Sep 17 00:00:00 2001 From: Yuta Nagano <52748151+yutanagano@users.noreply.github.com> Date: Sun, 12 Jan 2025 11:21:34 +0000 Subject: [PATCH 2/2] update junction standardisation documentation --- src/tidytcells/junction/_standardize.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/tidytcells/junction/_standardize.py b/src/tidytcells/junction/_standardize.py index 9cb21bb..016d2b8 100644 --- a/src/tidytcells/junction/_standardize.py +++ b/src/tidytcells/junction/_standardize.py @@ -59,7 +59,7 @@ def standardize( :return: If possible, a standardized version of the input string is returned. - If the input string cannot be standardized, the function follows the behaviour as set by ``on_fail``. + If the input string cannot be standardized, the function follows the behaviour as set by `on_fail`. :rtype: Union[str, None] @@ -67,17 +67,17 @@ def standardize( Strings that look like junction sequences will be accepted, and returned in capitalised form. - >>> tt.junction.standardize("csadaff") - 'CSADAFF' + >>> tt.junction.standardize("csadaf") + 'CSADAF' - Strings that are valid amino acid sequences but do not stard and end with the appropriate residues will have a C and an F appended to its beginning and end respectively. + Strings that are valid amino acid sequences but do not stard and end with the appropriate residues will have a C and an F appended to its beginning and end as required. - >>> tt.junction.standardize("sadaf") - 'CSADAFF' + >>> tt.junction.standardize("sada") + 'CSADAF' - However, setting ``strict`` to ``True`` will cause these cases to be rejected. + However, setting `strict` to ``True`` will cause these cases to be rejected. - >>> result = tt.junction.standardize("sadaf", strict=True) + >>> result = tt.junction.standardize("sada", strict=True) Input sadaf was rejected as it is not a valid junction sequence. >>> print(result) None @@ -92,11 +92,11 @@ def standardize( IF input sequence contains non-amino acid symbols: set standardization status to failed - IF input sequence does not start with C and end with F: + IF input sequence does not start with C and end with W / F: IF strict is set to True: set standardization status to failed ELSE: - add C to the beginning and F to the end of the input sequence + add C to the beginning and F to the end of the input sequence as required set standardization status to successful ELSE: set standardization status to successful