diff --git a/hgvs/variantmapper.py b/hgvs/variantmapper.py index 2cf98520..eaf9a53a 100644 --- a/hgvs/variantmapper.py +++ b/hgvs/variantmapper.py @@ -68,13 +68,11 @@ class VariantMapper(object): :class:`hgvs.sequencevariant.SequenceVariant`. """ - def __init__(self, hdp, replace_reference=hgvs.global_config.mapping.replace_reference, prevalidation_level=hgvs.global_config.mapping.prevalidation_level, - add_gene_symbol=hgvs.global_config.mapping.add_gene_symbol - ): + add_gene_symbol=hgvs.global_config.mapping.add_gene_symbol): """ :param bool replace_reference: replace reference (entails additional network access) :param str prevalidation_level: None or Intrinsic or Extrinsic validation before mapping @@ -93,8 +91,9 @@ def __init__(self, self._validator = hgvs.validator.IntrinsicValidator(strict=False) else: self._validator = hgvs.validator.Validator(self.hdp, strict=False) - self.left_normalizer = hgvs.normalizer.Normalizer(hdp, shuffle_direction=5, variantmapper=self) - + self.left_normalizer = hgvs.normalizer.Normalizer(hdp, + shuffle_direction=5, + variantmapper=self) # ############################################################################ # g⟷t @@ -104,14 +103,19 @@ def g_to_t(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln if self._validator: self._validator.validate(var_g) var_g.fill_ref(self.hdp) - mapper = self._fetch_AlignmentMapper( - tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method) + mapper = self._fetch_AlignmentMapper(tx_ac=tx_ac, + alt_ac=var_g.ac, + alt_aln_method=alt_aln_method) if mapper.is_coding_transcript: - var_out = VariantMapper.g_to_c( - self, var_g=var_g, tx_ac=tx_ac, alt_aln_method=alt_aln_method) + var_out = VariantMapper.g_to_c(self, + var_g=var_g, + tx_ac=tx_ac, + alt_aln_method=alt_aln_method) else: - var_out = VariantMapper.g_to_n( - self, var_g=var_g, tx_ac=tx_ac, alt_aln_method=alt_aln_method) + var_out = VariantMapper.g_to_n(self, + var_g=var_g, + tx_ac=tx_ac, + alt_aln_method=alt_aln_method) return var_out def t_to_g(self, var_t, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method): @@ -120,14 +124,19 @@ def t_to_g(self, var_t, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al if self._validator: self._validator.validate(var_t) var_t.fill_ref(self.hdp) - mapper = self._fetch_AlignmentMapper( - tx_ac=var_t.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method) + mapper = self._fetch_AlignmentMapper(tx_ac=var_t.ac, + alt_ac=alt_ac, + alt_aln_method=alt_aln_method) if var_t.type == "c": - var_out = VariantMapper.c_to_g( - self, var_c=var_t, alt_ac=alt_ac, alt_aln_method=alt_aln_method) + var_out = VariantMapper.c_to_g(self, + var_c=var_t, + alt_ac=alt_ac, + alt_aln_method=alt_aln_method) else: - var_out = VariantMapper.n_to_g( - self, var_n=var_t, alt_ac=alt_ac, alt_aln_method=alt_aln_method) + var_out = VariantMapper.n_to_g(self, + var_n=var_t, + alt_ac=alt_ac, + alt_aln_method=alt_aln_method) return var_out # ############################################################################ @@ -149,12 +158,12 @@ def g_to_n(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln raise HGVSInvalidVariantError("Expected a g. variant; got " + str(var_g)) if self._validator: self._validator.validate(var_g) - mapper = self._fetch_AlignmentMapper( - tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method) + mapper = self._fetch_AlignmentMapper(tx_ac=tx_ac, + alt_ac=var_g.ac, + alt_aln_method=alt_aln_method) - if (mapper.strand == -1 - and not hgvs.global_config.mapping.strict_bounds - and not mapper.g_interval_is_inbounds(var_g.posedit.pos)): + if (mapper.strand == -1 and not hgvs.global_config.mapping.strict_bounds + and not mapper.g_interval_is_inbounds(var_g.posedit.pos)): _logger.info("Renormalizing out-of-bounds minus strand variant on genomic sequence") var_g = self.left_normalizer.normalize(var_g) @@ -169,14 +178,14 @@ def g_to_n(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln else: # variant at alignment gap pos_g = mapper.n_to_g(pos_n) - edit_n = hgvs.edit.NARefAlt( - ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g)) + edit_n = hgvs.edit.NARefAlt(ref='', + alt=self._get_altered_sequence(mapper.strand, pos_g, var_g)) pos_n.uncertain = var_g.posedit.pos.uncertain - var_n = hgvs.sequencevariant.SequenceVariant( - ac=tx_ac, type="n", posedit=hgvs.posedit.PosEdit(pos_n, edit_n)) - if (self.replace_reference - and var_n.posedit.pos.start.base >= 0 - and var_n.posedit.pos.end.base < mapper.tgt_len): + var_n = hgvs.sequencevariant.SequenceVariant(ac=tx_ac, + type="n", + posedit=hgvs.posedit.PosEdit(pos_n, edit_n)) + if (self.replace_reference and var_n.posedit.pos.start.base >= 0 + and var_n.posedit.pos.end.base < mapper.tgt_len): self._replace_reference(var_n) if self.add_gene_symbol: self._update_gene_symbol(var_n, var_g.gene) @@ -200,8 +209,9 @@ def n_to_g(self, var_n, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al if self._validator: self._validator.validate(var_n) var_n.fill_ref(self.hdp) - mapper = self._fetch_AlignmentMapper( - tx_ac=var_n.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method) + mapper = self._fetch_AlignmentMapper(tx_ac=var_n.ac, + alt_ac=alt_ac, + alt_aln_method=alt_aln_method) pos_g = mapper.n_to_g(var_n.posedit.pos) if not pos_g.uncertain: edit_g = self._convert_edit_check_strand(mapper.strand, var_n.posedit.edit) @@ -212,11 +222,12 @@ def n_to_g(self, var_n, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al else: # variant at alignment gap pos_n = mapper.g_to_n(pos_g) - edit_g = hgvs.edit.NARefAlt( - ref='', alt=self._get_altered_sequence(mapper.strand, pos_n, var_n)) + edit_g = hgvs.edit.NARefAlt(ref='', + alt=self._get_altered_sequence(mapper.strand, pos_n, var_n)) pos_g.uncertain = var_n.posedit.pos.uncertain - var_g = hgvs.sequencevariant.SequenceVariant( - ac=alt_ac, type="g", posedit=hgvs.posedit.PosEdit(pos_g, edit_g)) + var_g = hgvs.sequencevariant.SequenceVariant(ac=alt_ac, + type="g", + posedit=hgvs.posedit.PosEdit(pos_g, edit_g)) if self.replace_reference: self._replace_reference(var_g) # No gene symbol for g. variants (actually, *should* for NG, but no way to distinguish) @@ -242,8 +253,9 @@ def g_to_c(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln if self._validator: self._validator.validate(var_g) var_g.fill_ref(self.hdp) - mapper = self._fetch_AlignmentMapper( - tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method) + mapper = self._fetch_AlignmentMapper(tx_ac=tx_ac, + alt_ac=var_g.ac, + alt_aln_method=alt_aln_method) pos_c = mapper.g_to_c(var_g.posedit.pos) if not pos_c.uncertain: edit_c = self._convert_edit_check_strand(mapper.strand, var_g.posedit.edit) @@ -254,11 +266,12 @@ def g_to_c(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln else: # variant at alignment gap pos_g = mapper.c_to_g(pos_c) - edit_c = hgvs.edit.NARefAlt( - ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g)) + edit_c = hgvs.edit.NARefAlt(ref='', + alt=self._get_altered_sequence(mapper.strand, pos_g, var_g)) pos_c.uncertain = var_g.posedit.pos.uncertain - var_c = hgvs.sequencevariant.SequenceVariant( - ac=tx_ac, type="c", posedit=hgvs.posedit.PosEdit(pos_c, edit_c)) + var_c = hgvs.sequencevariant.SequenceVariant(ac=tx_ac, + type="c", + posedit=hgvs.posedit.PosEdit(pos_c, edit_c)) if self.replace_reference: self._replace_reference(var_c) if self.add_gene_symbol: @@ -283,8 +296,9 @@ def c_to_g(self, var_c, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al if self._validator: self._validator.validate(var_c) var_c.fill_ref(self.hdp) - mapper = self._fetch_AlignmentMapper( - tx_ac=var_c.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method) + mapper = self._fetch_AlignmentMapper(tx_ac=var_c.ac, + alt_ac=alt_ac, + alt_aln_method=alt_aln_method) pos_g = mapper.c_to_g(var_c.posedit.pos) if not pos_g.uncertain: edit_g = self._convert_edit_check_strand(mapper.strand, var_c.posedit.edit) @@ -298,11 +312,12 @@ def c_to_g(self, var_c, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al var_n.posedit.pos = mapper.c_to_n(var_c.posedit.pos) var_n.type = 'n' pos_n = mapper.g_to_n(pos_g) - edit_g = hgvs.edit.NARefAlt( - ref='', alt=self._get_altered_sequence(mapper.strand, pos_n, var_n)) + edit_g = hgvs.edit.NARefAlt(ref='', + alt=self._get_altered_sequence(mapper.strand, pos_n, var_n)) pos_g.uncertain = var_c.posedit.pos.uncertain - var_g = hgvs.sequencevariant.SequenceVariant( - ac=alt_ac, type="g", posedit=hgvs.posedit.PosEdit(pos_g, edit_g)) + var_g = hgvs.sequencevariant.SequenceVariant(ac=alt_ac, + type="g", + posedit=hgvs.posedit.PosEdit(pos_g, edit_g)) if self.replace_reference: self._replace_reference(var_g) return var_g @@ -325,8 +340,9 @@ def c_to_n(self, var_c): if self._validator: self._validator.validate(var_c) var_c.fill_ref(self.hdp) - mapper = self._fetch_AlignmentMapper( - tx_ac=var_c.ac, alt_ac=var_c.ac, alt_aln_method="transcript") + mapper = self._fetch_AlignmentMapper(tx_ac=var_c.ac, + alt_ac=var_c.ac, + alt_aln_method="transcript") pos_n = mapper.c_to_n(var_c.posedit.pos) if (isinstance(var_c.posedit.edit, hgvs.edit.NARefAlt) or isinstance(var_c.posedit.edit, hgvs.edit.Dup) @@ -335,8 +351,9 @@ def c_to_n(self, var_c): else: raise HGVSUnsupportedOperationError( "Only NARefAlt/Dup/Inv types are currently implemented") - var_n = hgvs.sequencevariant.SequenceVariant( - ac=var_c.ac, type="n", posedit=hgvs.posedit.PosEdit(pos_n, edit_n)) + var_n = hgvs.sequencevariant.SequenceVariant(ac=var_c.ac, + type="n", + posedit=hgvs.posedit.PosEdit(pos_n, edit_n)) if self.replace_reference: self._replace_reference(var_n) if self.add_gene_symbol: @@ -359,8 +376,9 @@ def n_to_c(self, var_n): if self._validator: self._validator.validate(var_n) var_n.fill_ref(self.hdp) - mapper = self._fetch_AlignmentMapper( - tx_ac=var_n.ac, alt_ac=var_n.ac, alt_aln_method="transcript") + mapper = self._fetch_AlignmentMapper(tx_ac=var_n.ac, + alt_ac=var_n.ac, + alt_aln_method="transcript") pos_c = mapper.n_to_c(var_n.posedit.pos) if (isinstance(var_n.posedit.edit, hgvs.edit.NARefAlt) or isinstance(var_n.posedit.edit, hgvs.edit.Dup) @@ -369,8 +387,9 @@ def n_to_c(self, var_n): else: raise HGVSUnsupportedOperationError( "Only NARefAlt/Dup/Inv types are currently implemented") - var_c = hgvs.sequencevariant.SequenceVariant( - ac=var_n.ac, type="c", posedit=hgvs.posedit.PosEdit(pos_c, edit_c)) + var_c = hgvs.sequencevariant.SequenceVariant(ac=var_n.ac, + type="c", + posedit=hgvs.posedit.PosEdit(pos_c, edit_c)) if self.replace_reference: self._replace_reference(var_c) if self.add_gene_symbol: @@ -436,15 +455,16 @@ def _replace_reference(self, var): # For c. variants, we need coords on underlying sequences if var.type == "c": - mapper = self._fetch_AlignmentMapper( - tx_ac=var.ac, alt_ac=var.ac, alt_aln_method="transcript") + mapper = self._fetch_AlignmentMapper(tx_ac=var.ac, + alt_ac=var.ac, + alt_aln_method="transcript") pos = mapper.c_to_n(var.posedit.pos) else: pos = var.posedit.pos seq_start = pos.start.base - 1 seq_end = pos.end.base - + # When strict_bounds is False and an error occurs, return # variant as-is @@ -460,8 +480,8 @@ def _replace_reference(self, var): edit = var.posedit.edit if edit.ref != seq: - _logger.debug("Replaced reference sequence in {var} with {seq}".format( - var=var, seq=seq)) + _logger.debug("Replaced reference sequence in {var} with {seq}".format(var=var, + seq=seq)) edit.ref = seq return var @@ -472,8 +492,10 @@ def _fetch_AlignmentMapper(self, tx_ac, alt_ac, alt_aln_method): Get a new AlignmentMapper for the given transcript accession (ac), possibly caching the result. """ - return hgvs.alignmentmapper.AlignmentMapper( - self.hdp, tx_ac=tx_ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method) + return hgvs.alignmentmapper.AlignmentMapper(self.hdp, + tx_ac=tx_ac, + alt_ac=alt_ac, + alt_aln_method=alt_aln_method) @staticmethod def _convert_edit_check_strand(strand, edit_in): @@ -551,7 +573,7 @@ def _get_altered_sequence(self, strand, interval, var): def _update_gene_symbol(self, var, symbol): if not symbol: symbol = self.hdp.get_tx_identity_info(var.ac).get("hgnc", None) - var.gene = symbol + var.gene = symbol return var diff --git a/tests/issues/test_606.py b/tests/issues/test_606.py index f3cfd54a..fed5e3c9 100644 --- a/tests/issues/test_606.py +++ b/tests/issues/test_606.py @@ -5,12 +5,10 @@ class TestIssue606(unittest.TestCase): - def test_606(self): """https://github.com/biocommons/hgvs/issues/606""" from hgvs.easy import am37, parser - """ Occasionally, an IndexError is thrown by the _get_altered_sequence method. This seems to occur when there is either inconsistent data for a transcript in UTA or an invalid variant input.