Skip to content

Commit

Permalink
feat(g_to_c): this adds support for g_to_c of uncertain coordinates. …
Browse files Browse the repository at this point in the history
…Since the behavior is a bit unspecified, we fall back to the inner (confident) interval of the uncertain range for this projection.
  • Loading branch information
andreasprlic committed Dec 12, 2023
1 parent c15c552 commit 8a4341a
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 30 deletions.
15 changes: 11 additions & 4 deletions src/hgvs/alignmentmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,17 @@ def g_to_n(self, g_interval, strict_bounds=None):
if strict_bounds is None:
strict_bounds = global_config.mapping.strict_bounds

grs, gre = (
g_interval.start.base - 1 - self.gc_offset,
g_interval.end.base - 1 - self.gc_offset,
)
# in case of uncertain ranges, we fall back to the inner (more confident) interval
if g_interval.start.uncertain:
grs = g_interval.start.end.base - 1 - self.gc_offset
else:
grs = g_interval.start.base - 1 - self.gc_offset

if g_interval.end.uncertain:
gre = g_interval.end.start.base - 1 - self.gc_offset
else:
gre = g_interval.end.base - 1 - self.gc_offset

# frs, fre = (f)orward (r)na (s)tart & (e)nd; forward w.r.t. genome
frs, frs_offset, frs_cigar = self.cigarmapper.map_ref_to_tgt(
pos=grs, end="start", strict_bounds=strict_bounds
Expand Down
12 changes: 12 additions & 0 deletions src/hgvs/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ def __lt__(lhs, rhs):
assert type(lhs) == type(rhs), "Cannot compare coordinates of different representations"
if lhs.uncertain or rhs.uncertain:
raise HGVSUnsupportedOperationError("Cannot compare coordinates of uncertain positions")

if lhs.base is None and rhs.base is None:
raise HGVSUnsupportedOperationError("Cannot compare two positions without bases")

# imprecise positions can be on both sides of an interval
# This is weird, but because an unknown breakpoint can be expressed on both sides
# with a ? character we need to support that both options are true
if lhs.base is None or rhs.base is None:
return True

return lhs.base < rhs.base


Expand Down Expand Up @@ -326,9 +336,11 @@ def validate(self):
(res, msg) = self.end.validate()
if res != ValidationLevel.VALID:
return (res, msg)

# Check start less than or equal to end
if not self.start or not self.end:
return (ValidationLevel.VALID, None)

try:
if self.start <= self.end:
return (ValidationLevel.VALID, None)
Expand Down
11 changes: 8 additions & 3 deletions src/hgvs/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,14 @@ def _ref_is_valid(self, var):
else:
var_ref_seq = getattr(var.posedit.edit, "ref", None) or None
var_n = self.vm.c_to_n(var) if var.type == "c" else var
ref_checks.append(
(var_n.ac, var_n.posedit.pos.start.base, var_n.posedit.pos.end.base, var_ref_seq)
)
if var_n.posedit.pos.start.uncertain or var_n.posedit.pos.end.uncertain:
ref_checks.append(
(var_n.ac, None, None, var_ref_seq)
)
else:
ref_checks.append(
(var_n.ac, var_n.posedit.pos.start.base, var_n.posedit.pos.end.base, var_ref_seq)
)

for ac, var_ref_start, var_ref_end, var_ref_seq in ref_checks:
if var_ref_start is None or var_ref_end is None or not var_ref_seq:
Expand Down
11 changes: 9 additions & 2 deletions src/hgvs/variantmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,15 @@ def _replace_reference(self, var):
else:
pos = var.posedit.pos

seq_start = pos.start.base - 1
seq_end = pos.end.base
if pos.start.uncertain:
seq_start = pos.start.end.base - 1
else:
seq_start = pos.start.base - 1

if pos.end.uncertain:
seq_end = pos.end.start.base
else:
seq_end = pos.end.base

# When strict_bounds is False and an error occurs, return
# variant as-is
Expand Down
Binary file modified tests/data/cache-py3.hdp
Binary file not shown.
63 changes: 42 additions & 21 deletions tests/test_hgvs_sequencevariant.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import unittest

import pytest

from hgvs.exceptions import HGVSParseError
from support import CACHE

import hgvs
Expand All @@ -22,42 +24,47 @@ def test_gene_formatting(parser):
@pytest.mark.quick
@pytest.mark.models
class Test_SequenceVariant(unittest.TestCase):

@classmethod
def setUpClass(cls):
cls.hdp = hgvs.dataproviders.uta.connect(
mode=os.environ.get("HGVS_CACHE_MODE", "run"), cache=CACHE
)
cls.vm = hgvs.variantmapper.VariantMapper(cls.hdp)
cls.hp = hgvs.parser.Parser()


def test_SequenceVariant(self):
var = hgvs.sequencevariant.SequenceVariant(ac="AC", type="B", posedit="1234DE>FG")
self.assertEqual(str(var), "AC:B.1234DE>FG")

def test_fill_ref(self):
hp = hgvs.parser.Parser()
hdp = hgvs.dataproviders.uta.connect(
mode=os.environ.get("HGVS_CACHE_MODE", "run"), cache=CACHE
)

# fill reference for sequence variants
var = hp.parse_hgvs_variant("NM_001166478.1:c.31_32del").fill_ref(hdp)
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.31_32del").fill_ref(self.hdp)
self.assertEqual(var.format({"max_ref_length": None}), "NM_001166478.1:c.31_32delTT")

var = hp.parse_hgvs_variant("NM_001166478.1:c.31_32del2").fill_ref(hdp)
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.31_32del2").fill_ref(self.hdp)
self.assertEqual(var.format({"max_ref_length": None}), "NM_001166478.1:c.31_32delTT")

var = hp.parse_hgvs_variant("NM_001166478.1:c.2_7delinsTTTAGA").fill_ref(hdp)
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.2_7delinsTTTAGA").fill_ref(self.hdp)
self.assertEqual(
var.format({"max_ref_length": None}), "NM_001166478.1:c.2_7delTGAAGAinsTTTAGA"
)

var = hp.parse_hgvs_variant("NM_001166478.1:c.35_36dup").fill_ref(hdp)
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.35_36dup").fill_ref(self.hdp)
self.assertEqual(var.format({"max_ref_length": None}), "NM_001166478.1:c.35_36dupTC")

var = hp.parse_hgvs_variant("NM_001166478.1:c.18_19insACT").fill_ref(hdp)
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.18_19insACT").fill_ref(self.hdp)
self.assertEqual(var.format({"max_ref_length": None}), "NM_001166478.1:c.18_19insACT")

var = hp.parse_hgvs_variant("NM_001166478.1:c.31=").fill_ref(hdp)
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.31=").fill_ref(self.hdp)
self.assertEqual(var.format({"max_ref_length": None}), "NM_001166478.1:c.31T=")

def test_format(self):
hp = hgvs.parser.Parser()

# Global default settings
var = hp.parse_hgvs_variant("NP_001628.1:p.Gly528Arg")
var = self.hp.parse_hgvs_variant("NP_001628.1:p.Gly528Arg")
self.assertEqual(str(var), "NP_001628.1:p.Gly528Arg")
self.assertEqual(var.format(), "NP_001628.1:p.Gly528Arg")

Expand All @@ -70,7 +77,7 @@ def test_format(self):
conf = {"p_3_letter": False}
self.assertEqual(var.format(conf), "NP_001628.1:p.G528R")

var = hp.parse_hgvs_variant("NP_001628.1:p.Gly528Ter")
var = self.hp.parse_hgvs_variant("NP_001628.1:p.Gly528Ter")
conf = {"p_term_asterisk": True}
self.assertEqual(var.format(conf), "NP_001628.1:p.Gly528*")
self.assertEqual(var.format(), "NP_001628.1:p.Gly528Ter")
Expand All @@ -79,29 +86,28 @@ def test_format(self):
self.assertEqual(var.format(), "NP_001628.1:p.Gly528Ter")

# Remove reference sequence
var = hp.parse_hgvs_variant("NM_001166478.1:c.31_32delTT")
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.31_32delTT")
self.assertEqual(str(var), "NM_001166478.1:c.31_32del")
self.assertEqual(var.format(conf={"max_ref_length": 1}), "NM_001166478.1:c.31_32del")
self.assertEqual(var.format(conf={"max_ref_length": 2}), "NM_001166478.1:c.31_32delTT")
self.assertEqual(var.format(conf={"max_ref_length": None}), "NM_001166478.1:c.31_32delTT")

var = hp.parse_hgvs_variant("NM_001166478.1:c.31_32del2")
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.31_32del2")
self.assertEqual(str(var), "NM_001166478.1:c.31_32del")
self.assertEqual(var.format(conf={"max_ref_length": None}), "NM_001166478.1:c.31_32del2")

var = hp.parse_hgvs_variant("NM_001166478.1:c.31_32delTTinsAA")
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.31_32delTTinsAA")
self.assertEqual(str(var), "NM_001166478.1:c.31_32delinsAA")
var = hp.parse_hgvs_variant("NM_001166478.1:c.35_36dupTC")
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.35_36dupTC")
self.assertEqual(str(var), "NM_001166478.1:c.35_36dup")
var = hp.parse_hgvs_variant("NM_001166478.1:c.31T=")
var = self.hp.parse_hgvs_variant("NM_001166478.1:c.31T=")
self.assertEqual(str(var), "NM_001166478.1:c.31=")
self.assertEqual(var.format(conf={"max_ref_length": None}), "NM_001166478.1:c.31T=")

def test_uncertain(self):
hp = hgvs.parser.Parser()

vs = "NC_000005.9:g.(90136803_90144453)_(90159675_90261231)dup"
v = hp.parse(vs)
v = self.hp.parse(vs)
self.assertEqual(vs, str(v))
self.assertEqual(v.posedit.pos.start.start.base, 90136803)
self.assertEqual(v.posedit.pos.start.end.base, 90144453)
Expand All @@ -112,7 +118,7 @@ def test_uncertain(self):
self.assertEqual(type(v.posedit.edit).__name__, "Dup")

vs2 = "NC_000009.11:g.(?_108337304)_(108337428_?)del"
v2 = hp.parse(vs2)
v2 = self.hp.parse(vs2)
self.assertEqual(vs2, str(v2))
self.assertEqual(v2.posedit.pos.start.start.base, None)
self.assertEqual(v2.posedit.pos.start.uncertain, True)
Expand All @@ -122,6 +128,21 @@ def test_uncertain(self):
self.assertEqual(v2.posedit.pos.end.uncertain, True)
self.assertEqual(type(v2.posedit.edit).__name__, "NARefAlt")

def test_uncertain_projection_confidence(self):

data = [
("NC_000005.9:g.(90136803_90144453)_(90159675_90261231)dup", "NM_032119.3:c.17020-1_17856+1dup"),
("NC_000019.9:g.(11211022_11213339)_(11217364_11218067)dup", "NM_000527.5:c.191-1_817+1dup"),
("NC_000009.11:g.(?_108337304)_(108337428_?)del", "NM_001079802.1:c.-10_105+10del")
]

for hgvs_g, hgvs_c in data:
var_g = self.hp.parse(hgvs_g)
self.assertEqual(hgvs_g, str(var_g))

acc = hgvs_c.split(":")[0]
var_c = self.vm.g_to_c(var_g, acc)
self.assertEqual(hgvs_c, str(var_c))


if __name__ == "__main__":
Expand Down

0 comments on commit 8a4341a

Please sign in to comment.