Skip to content

Commit

Permalink
issue biocommons#606 - move error catching to _get_altered_sequence m…
Browse files Browse the repository at this point in the history
…ethod and add unit tests
  • Loading branch information
kayleeyuhas committed Dec 23, 2020
1 parent 9f109ab commit 3bb89f7
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 49 deletions.
89 changes: 40 additions & 49 deletions hgvs/variantmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,10 @@ def g_to_n(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln
pos_n.end.base -= 1
edit_n.ref = ''
else:
try:
# variant at alignment gap
pos_g = mapper.n_to_g(pos_n)
edit_n = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
except IndexError:
raise HGVSInvalidIntervalError("Variant is at alignment gap.")
# variant at alignment gap
pos_g = mapper.n_to_g(pos_n)
edit_n = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
pos_n.uncertain = var_g.posedit.pos.uncertain
var_n = hgvs.sequencevariant.SequenceVariant(
ac=tx_ac, type="n", posedit=hgvs.posedit.PosEdit(pos_n, edit_n))
Expand Down Expand Up @@ -213,13 +210,10 @@ def n_to_g(self, var_n, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al
pos_g.end.base -= 1
edit_g.ref = ''
else:
try:
# variant at alignment gap
pos_n = mapper.g_to_n(pos_g)
edit_g = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_n, var_n))
except IndexError:
raise HGVSInvalidIntervalError("Variant is at alignment gap.")
# variant at alignment gap
pos_n = mapper.g_to_n(pos_g)
edit_g = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_n, var_n))
pos_g.uncertain = var_n.posedit.pos.uncertain
var_g = hgvs.sequencevariant.SequenceVariant(
ac=alt_ac, type="g", posedit=hgvs.posedit.PosEdit(pos_g, edit_g))
Expand Down Expand Up @@ -258,13 +252,10 @@ def g_to_c(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln
pos_c.end.base -= 1
edit_c.ref = ''
else:
try:
# variant at alignment gap
pos_g = mapper.c_to_g(pos_c)
edit_c = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
except IndexError:
raise HGVSInvalidIntervalError("Variant is at alignment gap.")
# variant at alignment gap
pos_g = mapper.c_to_g(pos_c)
edit_c = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
pos_c.uncertain = var_g.posedit.pos.uncertain
var_c = hgvs.sequencevariant.SequenceVariant(
ac=tx_ac, type="c", posedit=hgvs.posedit.PosEdit(pos_c, edit_c))
Expand Down Expand Up @@ -302,16 +293,13 @@ def c_to_g(self, var_c, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al
pos_g.end.base -= 1
edit_g.ref = ''
else:
try:
# variant at alignment gap
var_n = copy.deepcopy(var_c)
var_n.posedit.pos = mapper.c_to_n(var_c.posedit.pos)
var_n.type = 'n'
pos_n = mapper.g_to_n(pos_g)
edit_g = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_n, var_n))
except IndexError:
raise HGVSInvalidIntervalError("Variant is at alignment gap.")
# variant at alignment gap
var_n = copy.deepcopy(var_c)
var_n.posedit.pos = mapper.c_to_n(var_c.posedit.pos)
var_n.type = 'n'
pos_n = mapper.g_to_n(pos_g)
edit_g = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_n, var_n))
pos_g.uncertain = var_c.posedit.pos.uncertain
var_g = hgvs.sequencevariant.SequenceVariant(
ac=alt_ac, type="g", posedit=hgvs.posedit.PosEdit(pos_g, edit_g))
Expand Down Expand Up @@ -533,24 +521,27 @@ def _get_altered_sequence(self, strand, interval, var):
pos_end = var.posedit.pos.end.base - interval.start.base + 1
edit = var.posedit.edit

if edit.type == 'sub':
seq[pos_start] = edit.alt
elif edit.type == 'del':
del seq[pos_start:pos_end]
elif edit.type == 'ins':
seq.insert(pos_start + 1, edit.alt)
elif edit.type == 'delins':
del seq[pos_start:pos_end]
seq.insert(pos_start, edit.alt)
elif edit.type == 'dup':
seq.insert(pos_end, ''.join(seq[pos_start:pos_end]))
elif edit.type == 'inv':
seq[pos_start:pos_end] = list(reverse_complement(''.join(seq[pos_start:pos_end])))
elif edit.type == 'identity':
pass
else:
raise HGVSUnsupportedOperationError(
"Getting altered sequence for {type} is unsupported".format(type=edit.type))
try:
if edit.type == 'sub':
seq[pos_start] = edit.alt
elif edit.type == 'del':
del seq[pos_start:pos_end]
elif edit.type == 'ins':
seq.insert(pos_start + 1, edit.alt)
elif edit.type == 'delins':
del seq[pos_start:pos_end]
seq.insert(pos_start, edit.alt)
elif edit.type == 'dup':
seq.insert(pos_end, ''.join(seq[pos_start:pos_end]))
elif edit.type == 'inv':
seq[pos_start:pos_end] = list(reverse_complement(''.join(seq[pos_start:pos_end])))
elif edit.type == 'identity':
pass
else:
raise HGVSUnsupportedOperationError(
"Getting altered sequence for {type} is unsupported".format(type=edit.type))
except IndexError:
raise HGVSInvalidIntervalError("Specified index does not exist within sequence.")

seq = ''.join(seq)
if strand == -1:
Expand Down
42 changes: 42 additions & 0 deletions tests/issues/test_606.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import unittest

from hgvs.exceptions import HGVSInvalidIntervalError
import hgvs


class TestIssue606(unittest.TestCase):

def test_606(self):
"""https://github.com/biocommons/hgvs/issues/606"""

from hgvs.easy import am37, parser

"""
Occasionally, an IndexError is thrown by the _get_altered_sequence method. This seems to occur
when there is either inconsistent data for a transcript in UTA or an invalid variant input.
There could be other root causes, but these are two that we have observed and are illustrated in the
examples below.
Example 1 (NC_000001.10:g.16890441C>G):
Transcript NM_017940.4 has inconsistent data. 25 exons in the 'transcript' alignment to itself,
but 28 exons in the 'splign' alignment to the reference sequence.
Ends up with an index error in _get_altered_sequence.
Example 2 (NM_001291722.1:c.283-3C>T):
This is invalid input -- according to the transcript in UTA, exon 5 starts at position 286 of the transcript,
not position 283. Ends up with an index error in _get_altered_sequence.
"""

# Example 1
hgvs.global_config.mapping.strict_bounds = False
hgvs_g = 'NC_000001.10:g.16890441C>G'
var_g = parser.parse_hgvs_variant(hgvs_g)
with self.assertRaises(HGVSInvalidIntervalError):
var_c = am37.g_to_c(var_g, 'NM_017940.4')
hgvs.global_config.mapping.strict_bounds = True

# Example 2
hgvs_c = 'NM_001291722.1:c.283-3C>T'
var_c = parser.parse_hgvs_variant(hgvs_c)
with self.assertRaises(HGVSInvalidIntervalError):
var_g = am37.c_to_g(var_c)

0 comments on commit 3bb89f7

Please sign in to comment.