diff --git a/jannovar-hgvs/src/main/antlr4/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParser.g4 b/jannovar-hgvs/src/main/antlr4/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParser.g4 index 0bee40176a..e74de478b4 100644 --- a/jannovar-hgvs/src/main/antlr4/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParser.g4 +++ b/jannovar-hgvs/src/main/antlr4/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParser.g4 @@ -383,6 +383,8 @@ nt_change_inner | nt_change_insertion | nt_change_inversion | nt_change_substitution + | nt_change_sequenced_repeat + | nt_change_not_sequenced_repeat | nt_change_ssr | nt_change_unchanged | nt_change_misc @@ -472,6 +474,40 @@ nt_change_ssr ) NT_PAREN_OPEN NT_NUMBER NT_UNDERSCORE NT_NUMBER NT_PAREN_CLOSE ; +/** DNA repeat (sequenced) */ +nt_change_sequenced_repeat +: + ( + nt_point_location + | nt_range + ) nt_change_repeat_sequence* +; + +nt_change_repeat_sequence +: + NT_STRING NT_SQUARE_PAREN_OPEN NT_NUMBER NT_SQUARE_PAREN_CLOSE +; + +/** DNA repeat (not sequenced) */ +nt_change_not_sequenced_repeat +: + ( + nt_point_location + | nt_range + ) + ( + NT_INS + | NT_DEL + ) + NT_PAREN_OPEN + ( + NT_NUMBER + | NT_NUMBER NT_UNDERSCORE NT_NUMBER + ) + NT_PAREN_CLOSE +; + + /** nucleotide substitution */ nt_change_substitution : diff --git a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideNotSequencedRepeat.java b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideNotSequencedRepeat.java new file mode 100644 index 0000000000..b48735aedf --- /dev/null +++ b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideNotSequencedRepeat.java @@ -0,0 +1,117 @@ +package de.charite.compbio.jannovar.hgvs.nts.change; + +import java.util.Objects; +import de.charite.compbio.jannovar.hgvs.AminoAcidCode; +import de.charite.compbio.jannovar.hgvs.nts.NucleotideRange; + + +/** + * A repeat that is not completely sequenced (i.e. unknown bases). + * + * @author Mark Woon + */ +public class NucleotideNotSequencedRepeat extends NucleotideChange { + public enum InDelType { INS, DEL} + private final NucleotideRange range; + private final InDelType type; + /** + * The lower bound on the length of the repeat, inclusive. + */ + private final int minCount; + /** + * The upper bound on the length of the repeat, inclusive. + */ + private final int maxCount; + + + public NucleotideNotSequencedRepeat(boolean onlyPredicted, NucleotideRange range, InDelType type, + int minCount, int maxCount) { + super(onlyPredicted); + this.range = range; + this.type = type; + this.minCount = minCount; + this.maxCount = maxCount; + } + + + /** + * @return range of repeat + */ + public NucleotideRange getRange() { + return range; + } + + public boolean isInsertion() { + return type == InDelType.INS; + } + + public boolean isDeletion() { + return type == InDelType.DEL; + } + + /** + * Gets the lower bound on the length of the repeat, inclusive. + */ + public int getMinCount() { + return minCount; + } + + /** + * Gets the upper bound on the length of the repeat, inclusive. + */ + public int getMaxCount() { + return maxCount; + } + + + @Override + public NucleotideChange withOnlyPredicted(boolean flag) { + return new NucleotideNotSequencedRepeat(flag, range, type, minCount, maxCount); + } + + + @Override + public String toHGVSString() { + StringBuilder builder = new StringBuilder(range.toHGVSString()) + .append(type.name().toLowerCase()) + .append("(") + .append(minCount); + if (minCount != maxCount) { + builder.append("_") + .append(maxCount); + } + builder.append(")"); + return wrapIfOnlyPredicted(builder.toString()); + } + + @Override + public String toHGVSString(AminoAcidCode code) { + return toHGVSString(); + } + + @Override + public String toString() { + return "NucleotideNotSequencedRepeat [range=" + range + "type=" + type.name() + ", minCount=" + + minCount + ", maxCount=" + maxCount + "]"; + } + + @Override + public int hashCode() { + return Objects.hash(range, type, minCount, maxCount); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final NucleotideNotSequencedRepeat other = (NucleotideNotSequencedRepeat)obj; + return Objects.equals(range, other.range) && + Objects.equals(type, other.type) && + Objects.equals(minCount, other.getMinCount()) && + Objects.equals(maxCount, other.getMaxCount()); + } +} diff --git a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideRepeatSequence.java b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideRepeatSequence.java new file mode 100644 index 0000000000..e04befe1fe --- /dev/null +++ b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideRepeatSequence.java @@ -0,0 +1,65 @@ +package de.charite.compbio.jannovar.hgvs.nts.change; + +import java.util.Objects; +import de.charite.compbio.jannovar.hgvs.AminoAcidCode; +import de.charite.compbio.jannovar.hgvs.ConvertibleToHGVSString; + + +/** + * A repeated sequence. + * + * @author Mark Woon + */ +public class NucleotideRepeatSequence implements ConvertibleToHGVSString { + final String sequence; + final int copyNumber; + + + public NucleotideRepeatSequence(String sequence, int copyNumber) { + this.sequence = sequence; + this.copyNumber = copyNumber; + } + + + public String getSequence() { + return sequence; + } + + public int getCopyNumber() { + return copyNumber; + } + + + @Override + public String toHGVSString() { + return sequence + "[" + copyNumber + "]"; + } + + @Override + public String toHGVSString(AminoAcidCode code) { + return toHGVSString(); + } + + @Override + public String toString() { + return "NucleotideRepeatSequence [sequence=" + sequence + ", copyNumber=" + copyNumber + "]"; + } + + @Override + public int hashCode() { + return Objects.hash(sequence, copyNumber); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final NucleotideRepeatSequence other = (NucleotideRepeatSequence)obj; + return Objects.equals(sequence, other.sequence) && + Objects.equals(copyNumber, other.copyNumber); + } +} diff --git a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideSequencedRepeat.java b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideSequencedRepeat.java new file mode 100644 index 0000000000..5ff41b4ac9 --- /dev/null +++ b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideSequencedRepeat.java @@ -0,0 +1,89 @@ +package de.charite.compbio.jannovar.hgvs.nts.change; + +import java.util.List; +import java.util.Objects; +import com.google.common.base.Joiner; +import de.charite.compbio.jannovar.hgvs.nts.NucleotideRange; + + +/** + * A repeat that has been sequenced. + * + * @author Mark Woon + */ +public class NucleotideSequencedRepeat extends NucleotideChange { + private final NucleotideRange range; + public List sequencedRepeats; + public NucleotideNotSequencedRepeat notSequencedRepeat; + + + public NucleotideSequencedRepeat(boolean onlyPredicted, NucleotideRange range, + List sequencedRepeats) { + super(onlyPredicted); + this.range = range; + this.sequencedRepeats = sequencedRepeats; + } + + + /** + * @return range of repeat + */ + public NucleotideRange getRange() { + return range; + } + + /** + * Gets the sequenced repeats. Null if this repeat is not sequenced. + */ + public List getSequencedRepeats() { + return sequencedRepeats; + } + + /** + * Gets the repeat if it was not sequenced. Null if this repeat is sequenced. + */ + public NucleotideNotSequencedRepeat getNotSequencedRepeat() { + return notSequencedRepeat; + } + + + @Override + public NucleotideChange withOnlyPredicted(boolean flag) { + return new NucleotideSequencedRepeat(flag, range, sequencedRepeats); + } + + + @Override + public String toHGVSString() { + StringBuilder builder = new StringBuilder(range.toHGVSString()); + sequencedRepeats.stream() + .map(NucleotideRepeatSequence::toHGVSString) + .forEach(builder::append); + return wrapIfOnlyPredicted(builder.toString()); + } + + @Override + public String toString() { + return "NucleotideRepeat [range=" + range + ", sequences=(" + + Joiner.on(", ").join(sequencedRepeats) + ")]"; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), range, sequencedRepeats); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final NucleotideSequencedRepeat other = (NucleotideSequencedRepeat)obj; + return super.equals(obj) && + Objects.equals(range, other.range) && + Objects.equals(sequencedRepeats, other.sequencedRepeats); + } +} diff --git a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParserListenerImpl.java b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParserListenerImpl.java index b9f7733727..bd9c91c78a 100644 --- a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParserListenerImpl.java +++ b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParserListenerImpl.java @@ -20,11 +20,13 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; // TODO(holtgrewe): support parsing amino acid changes /** - * Master ParseTreeListener used in {@link HVSParser} setB + * Master ParseTreeListener used in {@link HGVSParser} setB * * @author Manuel Holtgrewe */ @@ -451,7 +453,60 @@ public void exitNt_change_ssr(Nt_change_ssrContext ctx) { new NucleotideShortSequenceRepeatVariability(false, range, minCount, maxCount)); } - /** + @Override + public void exitNt_change_sequenced_repeat(Nt_change_sequenced_repeatContext ctx) { + LOGGER.debug("Leaving nt_change_sequenced_repeat"); + + final NucleotideRange range; + if (ctx.nt_range() != null) + range = (NucleotideRange) getValue(ctx.nt_range()); + else + range = new NucleotideRange((NucleotidePointLocation) getValue(ctx.nt_point_location()), + (NucleotidePointLocation) getValue(ctx.nt_point_location())); + + List sequences = null; + if (ctx.nt_change_repeat_sequence() != null) { + sequences = ctx.nt_change_repeat_sequence().stream() + .map(rsc -> (NucleotideRepeatSequence)getValue(rsc)) + .collect(Collectors.toList()); + } + + setValue(ctx, new NucleotideSequencedRepeat(false, range, sequences)); + } + + @Override + public void exitNt_change_repeat_sequence(Nt_change_repeat_sequenceContext ctx) { + LOGGER.debug("Leaving nt_change_repeat_sequence"); + final String sequence = ctx.NT_STRING().getText(); + final int copyNumber = Integer.parseInt(ctx.NT_NUMBER().getText()); + setValue(ctx, new NucleotideRepeatSequence(sequence, copyNumber)); + } + + @Override + public void exitNt_change_not_sequenced_repeat(Nt_change_not_sequenced_repeatContext ctx) { + LOGGER.debug("Leaving nt_change_not_sequenced_repeat"); + + final NucleotideRange range; + if (ctx.nt_range() != null) + range = (NucleotideRange) getValue(ctx.nt_range()); + else + range = new NucleotideRange((NucleotidePointLocation) getValue(ctx.nt_point_location()), + (NucleotidePointLocation) getValue(ctx.nt_point_location())); + + NucleotideNotSequencedRepeat.InDelType type = NucleotideNotSequencedRepeat.InDelType.INS; + if (ctx.NT_DEL() != null) { + type = NucleotideNotSequencedRepeat.InDelType.DEL; + } + final int minCount = Integer.parseInt(ctx.NT_NUMBER(0).getText()); + int maxCount = minCount; + if (ctx.NT_NUMBER().size() > 1) { + maxCount = Integer.parseInt(ctx.NT_NUMBER(1).getText()); + } + setValue(ctx, new NucleotideNotSequencedRepeat(false, range, type, minCount, maxCount)); + } + + + /** * Leaving of nt_change_misc rule *

* Construct {@link NucleotideMiscChange} from the children's values and label ctx with this. @@ -478,7 +533,7 @@ public void exitReference(ReferenceContext ctx) { if (transcriptID.contains(".")) { int pos = transcriptID.lastIndexOf('.'); transcriptVersion = Integer - .parseInt(transcriptID.substring(pos + 1, transcriptID.length())); + .parseInt(transcriptID.substring(pos + 1)); transcriptID = transcriptID.substring(0, pos); } if (ctx.PAREN_OPEN() != null) diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideNotSequencedRepeatTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideNotSequencedRepeatTest.java new file mode 100644 index 0000000000..1007a64225 --- /dev/null +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideNotSequencedRepeatTest.java @@ -0,0 +1,70 @@ +package de.charite.compbio.jannovar.hgvs.nts.change; + +import de.charite.compbio.jannovar.hgvs.nts.NucleotidePointLocation; +import de.charite.compbio.jannovar.hgvs.nts.NucleotideRange; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static de.charite.compbio.jannovar.hgvs.nts.change.NucleotideNotSequencedRepeat.InDelType.DEL; +import static de.charite.compbio.jannovar.hgvs.nts.change.NucleotideNotSequencedRepeat.InDelType.INS; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + + +/** + * Unit test for {@link NucleotideNotSequencedRepeat}. + * + * @author Mark Woon + */ +class NucleotideNotSequencedRepeatTest { + private NucleotideNotSequencedRepeat firstRepeatSeqA1; + private NucleotideNotSequencedRepeat firstRepeatSeqA2; + private NucleotideNotSequencedRepeat firstRepeatSeqB1; + private NucleotideNotSequencedRepeat firstRepeatSeqB2; + private NucleotideNotSequencedRepeat secondRepeatSeqA1; + private NucleotideNotSequencedRepeat secondRepeatSeqA2; + private NucleotideNotSequencedRepeat secondRepeatSeqB1; + private NucleotideNotSequencedRepeat secondRepeatSeqB2; + + + @BeforeEach + public void setUp() { + NucleotideRange range1 = new NucleotideRange(NucleotidePointLocation.build(1), + NucleotidePointLocation.build(1)); + NucleotideRange range2 = new NucleotideRange(NucleotidePointLocation.build(1), + NucleotidePointLocation.build(5)); + + + firstRepeatSeqA1 = new NucleotideNotSequencedRepeat(false, range1, INS, 2, 2); + firstRepeatSeqA2 = new NucleotideNotSequencedRepeat(false, range2, INS, 2, 4); + firstRepeatSeqB1 = new NucleotideNotSequencedRepeat(false, range1, DEL, 2, 2); + firstRepeatSeqB2 = new NucleotideNotSequencedRepeat(false, range2, DEL, 2, 4); + + secondRepeatSeqA1 = new NucleotideNotSequencedRepeat(false, range1, INS, 2, 2); + secondRepeatSeqA2 = new NucleotideNotSequencedRepeat(false, range2, INS, 2, 4); + secondRepeatSeqB1 = new NucleotideNotSequencedRepeat(false, range1, DEL, 2, 2); + secondRepeatSeqB2 = new NucleotideNotSequencedRepeat(false, range2, DEL, 2, 4); + } + + @Test + public void testEquals() { + assertEquals(firstRepeatSeqA1, secondRepeatSeqA1); + assertEquals(firstRepeatSeqA2, secondRepeatSeqA2); + assertEquals(firstRepeatSeqB1, secondRepeatSeqB1); + assertEquals(firstRepeatSeqB2, secondRepeatSeqB2); + assertNotEquals(firstRepeatSeqA1, firstRepeatSeqA2); + assertNotEquals(firstRepeatSeqA1, firstRepeatSeqB1); + assertNotEquals(firstRepeatSeqA1, firstRepeatSeqB2); + assertNotEquals(firstRepeatSeqA2, firstRepeatSeqB1); + assertNotEquals(firstRepeatSeqA2, firstRepeatSeqB2); + assertNotEquals(firstRepeatSeqB1, firstRepeatSeqB2); + } + + @Test + public void testToHGVSString() { + assertEquals("2ins(2)", firstRepeatSeqA1.toHGVSString()); + assertEquals("2_6ins(2_4)", firstRepeatSeqA2.toHGVSString()); + assertEquals("2del(2)", firstRepeatSeqB1.toHGVSString()); + assertEquals("2_6del(2_4)", firstRepeatSeqB2.toHGVSString()); + } +} diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideRepeatSequenceTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideRepeatSequenceTest.java new file mode 100644 index 0000000000..208e40e740 --- /dev/null +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideRepeatSequenceTest.java @@ -0,0 +1,41 @@ +package de.charite.compbio.jannovar.hgvs.nts.change; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + + +/** + * Unit test for {@link NucleotideRepeatSequence}. + * + * @author Mark Woon + */ +class NucleotideRepeatSequenceTest { + private NucleotideRepeatSequence firstSeq; + private NucleotideRepeatSequence secondSeq; + private NucleotideRepeatSequence thirdSeq; + private NucleotideRepeatSequence fourthSeq; + + @BeforeEach + public void setUp() { + firstSeq = new NucleotideRepeatSequence("TT", 2); + secondSeq = new NucleotideRepeatSequence("TT", 2); + thirdSeq = new NucleotideRepeatSequence("AT", 2); + fourthSeq = new NucleotideRepeatSequence("TT", 4); + } + + @Test + public void testEquals() { + assertEquals(firstSeq, secondSeq); + assertNotEquals(firstSeq, thirdSeq); + assertNotEquals(firstSeq, fourthSeq); + } + + @Test + public void testToHGVSString() { + assertEquals("TT[2]", firstSeq.toHGVSString()); + assertEquals("AT[2]", thirdSeq.toHGVSString()); + assertEquals("TT[4]", fourthSeq.toHGVSString()); + } +} diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideSequencedRepeatTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideSequencedRepeatTest.java new file mode 100644 index 0000000000..05ce8b3733 --- /dev/null +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideSequencedRepeatTest.java @@ -0,0 +1,71 @@ +package de.charite.compbio.jannovar.hgvs.nts.change; + +import com.google.common.collect.Lists; +import de.charite.compbio.jannovar.hgvs.nts.NucleotidePointLocation; +import de.charite.compbio.jannovar.hgvs.nts.NucleotideRange; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + + +/** + * Unit test for {@link NucleotideSequencedRepeat}. + * + * @author Mark Woon + */ +class NucleotideSequencedRepeatTest { + private NucleotideSequencedRepeat firstRepeatSeqA1; + private NucleotideSequencedRepeat firstRepeatSeqA2; + private NucleotideSequencedRepeat firstRepeatSeqB1; + private NucleotideSequencedRepeat firstRepeatSeqB2; + private NucleotideSequencedRepeat secondRepeatSeqA1; + private NucleotideSequencedRepeat secondRepeatSeqA2; + private NucleotideSequencedRepeat secondRepeatSeqB1; + private NucleotideSequencedRepeat secondRepeatSeqB2; + + + @BeforeEach + public void setUp() { + NucleotideRange range1 = new NucleotideRange(NucleotidePointLocation.build(1), + NucleotidePointLocation.build(1)); + NucleotideRange range2 = new NucleotideRange(NucleotidePointLocation.build(1), + NucleotidePointLocation.build(5)); + NucleotideRepeatSequence seqRepeat1 = new NucleotideRepeatSequence("AA", 4); + NucleotideRepeatSequence seqRepeat2 = new NucleotideRepeatSequence("CC", 8); + + firstRepeatSeqA1 = new NucleotideSequencedRepeat(false, range1, Lists.newArrayList(seqRepeat1)); + firstRepeatSeqA2 = new NucleotideSequencedRepeat(false, range1, Lists.newArrayList(seqRepeat1, seqRepeat2)); + firstRepeatSeqB1 = new NucleotideSequencedRepeat(false, range2, Lists.newArrayList(seqRepeat1)); + firstRepeatSeqB2 = new NucleotideSequencedRepeat(false, range2, Lists.newArrayList(seqRepeat1, seqRepeat2)); + + secondRepeatSeqA1 = new NucleotideSequencedRepeat(false, range1, Lists.newArrayList(seqRepeat1)); + secondRepeatSeqA2 = new NucleotideSequencedRepeat(false, range1, Lists.newArrayList(seqRepeat1, seqRepeat2)); + secondRepeatSeqB1 = new NucleotideSequencedRepeat(false, range2, Lists.newArrayList(seqRepeat1)); + secondRepeatSeqB2 = new NucleotideSequencedRepeat(false, range2, Lists.newArrayList(seqRepeat1, seqRepeat2)); + } + + + @Test + public void testEquals() { + assertEquals(firstRepeatSeqA1, secondRepeatSeqA1); + assertEquals(firstRepeatSeqA2, secondRepeatSeqA2); + assertEquals(firstRepeatSeqB1, secondRepeatSeqB1); + assertEquals(firstRepeatSeqB2, secondRepeatSeqB2); + assertNotEquals(firstRepeatSeqA1, firstRepeatSeqA2); + assertNotEquals(firstRepeatSeqA1, firstRepeatSeqB1); + assertNotEquals(firstRepeatSeqA1, firstRepeatSeqB2); + assertNotEquals(firstRepeatSeqA2, firstRepeatSeqB1); + assertNotEquals(firstRepeatSeqA2, firstRepeatSeqB2); + assertNotEquals(firstRepeatSeqB1, firstRepeatSeqB2); + } + + @Test + public void testToHGVSString() { + assertEquals("2AA[4]", firstRepeatSeqA1.toHGVSString()); + assertEquals("2AA[4]CC[8]", firstRepeatSeqA2.toHGVSString()); + assertEquals("2_6AA[4]", firstRepeatSeqB1.toHGVSString()); + assertEquals("2_6AA[4]CC[8]", firstRepeatSeqB2.toHGVSString()); + } +} diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideNotSequencedRepeatTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideNotSequencedRepeatTest.java new file mode 100644 index 0000000000..d418013d0d --- /dev/null +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideNotSequencedRepeatTest.java @@ -0,0 +1,45 @@ +package de.charite.compbio.jannovar.hgvs.parser; + +import de.charite.compbio.jannovar.hgvs.HGVSVariant; +import de.charite.compbio.jannovar.hgvs.nts.variant.SingleAlleleNucleotideVariant; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + + +/** + * Test parser for not-sequenced repeats. + * + * Examples from https://varnomen.hgvs.org/recommendations/DNA/variant/repeated/: + * + * NM_000333.3:c.(4_246)ins(9) + * NC_000003.12:g.(63912602_63912844)del(15) + * NM_002024.5:c.(-144_-16)ins(1800_2400) + * + * @author Mark Woon + */ +public class HGVSParserDriverNucleotideNotSequencedRepeatTest { + + HGVSParser driver; + + @BeforeEach + public void setUp() throws Exception { + driver = new HGVSParser(false); + } + + @Test + public void test() { + String[] hgvsStrings = new String[]{ + "NM_000333.3:c.4_246ins(9)", + "NC_000003.12:g.63912602_63912844del(15)", + "NM_002024.5:c.-144_-16ins(1800_2400)", + }; + + for (String hgvsString : hgvsStrings) { + HGVSVariant variant = driver.parseHGVSString(hgvsString); + + Assertions.assertTrue(variant instanceof SingleAlleleNucleotideVariant, variant.getClass().getName()); + Assertions.assertEquals(hgvsString, variant.toHGVSString()); + } + } +} diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideSequencedRepeatTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideSequencedRepeatTest.java new file mode 100644 index 0000000000..a697e14ca4 --- /dev/null +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideSequencedRepeatTest.java @@ -0,0 +1,59 @@ +package de.charite.compbio.jannovar.hgvs.parser; + +import de.charite.compbio.jannovar.hgvs.HGVSVariant; +import de.charite.compbio.jannovar.hgvs.nts.variant.MultiAlleleNucleotideVariant; +import de.charite.compbio.jannovar.hgvs.nts.variant.SingleAlleleNucleotideVariant; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + + +/** + * Test parser for sequenced repeats. + * + * Examples from https://varnomen.hgvs.org/recommendations/DNA/variant/repeated/: + * + * NC_000003.12:c.89AGC[13] + * NC_000003.12:g.63912687AGC[13] + * NM_002024.5:c.-129CGG[79] + * LRG_763t1:c.53AGC[23] + * NM_000492.3:c.1210-12T[7] + * NC_000012.11:g.112036755_112036823CTG[9]TTG[1]CTG[13] + * NM_000492.3:c.1210-33_1210-6GT[11]T[6] + * NM_021080.3:c.-136-75952ATTTT[15] + * NM_002024.5:c.-128_-69GGC[10]GGA[1]GGC[9]GGA[1]GGC[10] + * + * NM_023035.2(CACNA1A):c.6955CAG[26] + * + * @author Mark Woon + */ +public class HGVSParserDriverNucleotideSequencedRepeatTest { + + HGVSParser driver; + + @BeforeEach + public void setUp() throws Exception { + driver = new HGVSParser(false); + } + + @Test + public void test() { + String[] hgvsStrings = new String[]{ + "NC_000003.12:c.89AGC[13]", + "NM_002024.5:c.-129CGG[79]", + "LRG_763t1:c.53AGC[23]", + "NM_000492.3:c.1210-12T[7]", + "NC_000012.11:g.112036755_112036823CTG[9]TTG[1]CTG[13]", + "NM_000492.3:c.1210-33_1210-6GT[11]T[6]", + "NM_021080.3:c.-136-75952ATTTT[15]", + "NM_002024.5:c.-128_-69GGC[10]GGA[1]GGC[9]GGA[1]GGC[10]", + }; + + for (String hgvsString : hgvsStrings) { + HGVSVariant variant = driver.parseHGVSString(hgvsString); + + Assertions.assertTrue(variant instanceof SingleAlleleNucleotideVariant, variant.getClass().getName()); + Assertions.assertEquals(hgvsString, variant.toHGVSString()); + } + } +} diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/change/HGVSParserNucleotideNotSequencedRepeatTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/change/HGVSParserNucleotideNotSequencedRepeatTest.java new file mode 100644 index 0000000000..5cbe75ba7d --- /dev/null +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/change/HGVSParserNucleotideNotSequencedRepeatTest.java @@ -0,0 +1,46 @@ +package de.charite.compbio.jannovar.hgvs.parser.nts.change; + +import de.charite.compbio.jannovar.hgvs.parser.Antlr4HGVSLexer; +import de.charite.compbio.jannovar.hgvs.parser.Antlr4HGVSParser; +import de.charite.compbio.jannovar.hgvs.parser.HGVSParserTestBase; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + + +/** + * Parser for HGVS not-sequenced repeats. + * + * Examples from https://varnomen.hgvs.org/recommendations/DNA/variant/repeated/: + * + * NM_000333.3:c.(4_246)ins(9) + * NC_000003.12:g.(63912602_63912844)del(15) + * NM_002024.5:c.(-144_-16)ins(1800_2400) + * + * @author Mark Woon + */ +public class HGVSParserNucleotideNotSequencedRepeatTest extends HGVSParserTestBase { + + @Test + public void testIns() { + Antlr4HGVSParser parser = buildParserForString("4_246ins(9)", Antlr4HGVSLexer.NUCLEOTIDE_CHANGE, false); + Antlr4HGVSParser.Nt_change_not_sequenced_repeatContext repeat = parser.nt_change_not_sequenced_repeat(); + Assertions.assertEquals("(nt_change_not_sequenced_repeat (nt_range (nt_point_location (nt_base_location (nt_number 4))) _ (nt_point_location (nt_base_location (nt_number 246)))) ins ( 9 ))", + repeat.toStringTree(parser)); + } + + @Test + public void testDel() { + Antlr4HGVSParser parser = buildParserForString("-14_-5del(15)", Antlr4HGVSLexer.NUCLEOTIDE_CHANGE, false); + Antlr4HGVSParser.Nt_change_not_sequenced_repeatContext repeat = parser.nt_change_not_sequenced_repeat(); + Assertions.assertEquals("(nt_change_not_sequenced_repeat (nt_range (nt_point_location (nt_base_location - (nt_number 14))) _ (nt_point_location (nt_base_location - (nt_number 5)))) del ( 15 ))", + repeat.toStringTree(parser)); + } + + @Test + public void testRange() { + Antlr4HGVSParser parser = buildParserForString("-144_-16ins(1800_2400)", Antlr4HGVSLexer.NUCLEOTIDE_CHANGE, false); + Antlr4HGVSParser.Nt_change_not_sequenced_repeatContext repeat = parser.nt_change_not_sequenced_repeat(); + Assertions.assertEquals("(nt_change_not_sequenced_repeat (nt_range (nt_point_location (nt_base_location - (nt_number 144))) _ (nt_point_location (nt_base_location - (nt_number 16)))) ins ( 1800 _ 2400 ))", + repeat.toStringTree(parser)); + } +} diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/change/HGVSParserNucleotideSequencedRepeatTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/change/HGVSParserNucleotideSequencedRepeatTest.java new file mode 100644 index 0000000000..4bda65fd34 --- /dev/null +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/change/HGVSParserNucleotideSequencedRepeatTest.java @@ -0,0 +1,54 @@ +package de.charite.compbio.jannovar.hgvs.parser.nts.change; + +import de.charite.compbio.jannovar.hgvs.parser.Antlr4HGVSLexer; +import de.charite.compbio.jannovar.hgvs.parser.Antlr4HGVSParser; +import de.charite.compbio.jannovar.hgvs.parser.HGVSParserTestBase; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + + +/** + * Test parser for sequenced repeats. + * + * Examples from https://varnomen.hgvs.org/recommendations/DNA/variant/repeated/: + * + * NC_000003.12:c.89AGC[13] + * NC_000003.12:g.63912687AGC[13] + * NM_002024.5:c.-129CGG[79] + * LRG_763t1:c.53AGC[23] + * NM_000492.3:c.1210-12T[7] + * NC_000012.11:g.112036755_112036823CTG[9]TTG[1]CTG[13] + * NM_000492.3:c.1210-33_1210-6GT[11]T[6] + * NM_021080.3:c.-136-75952ATTTT[15] + * NM_002024.5:c.-128_-69GGC[10]GGA[1]GGC[9]GGA[1]GGC[10] + * + * NM_023035.2(CACNA1A):c.6955CAG[26] + * + * @author Mark Woon + */ +public class HGVSParserNucleotideSequencedRepeatTest extends HGVSParserTestBase { + + @Test + public void testLengthOne() { + Antlr4HGVSParser parser = buildParserForString("123T[3]", Antlr4HGVSLexer.NUCLEOTIDE_CHANGE, false); + Antlr4HGVSParser.Nt_change_sequenced_repeatContext nt_change_repeat = parser.nt_change_sequenced_repeat(); + Assertions.assertEquals("(nt_change_sequenced_repeat (nt_point_location (nt_base_location (nt_number 123))) (nt_change_repeat_sequence T [ 3 ]))", + nt_change_repeat.toStringTree(parser)); + } + + @Test + public void testLengthTwo() { + Antlr4HGVSParser parser = buildParserForString("-123TA[13]", Antlr4HGVSLexer.NUCLEOTIDE_CHANGE, false); + Antlr4HGVSParser.Nt_change_sequenced_repeatContext nt_change_repeat = parser.nt_change_sequenced_repeat(); + Assertions.assertEquals("(nt_change_sequenced_repeat (nt_point_location (nt_base_location - (nt_number 123))) (nt_change_repeat_sequence TA [ 13 ]))", + nt_change_repeat.toStringTree(parser)); + } + + @Test + public void testRange() { + Antlr4HGVSParser parser = buildParserForString("755_823CTG[9]TTG[1]CTG[13]", Antlr4HGVSLexer.NUCLEOTIDE_CHANGE, false); + Antlr4HGVSParser.Nt_change_sequenced_repeatContext nt_change_repeat = parser.nt_change_sequenced_repeat(); + Assertions.assertEquals("(nt_change_sequenced_repeat (nt_range (nt_point_location (nt_base_location (nt_number 755))) _ (nt_point_location (nt_base_location (nt_number 823)))) (nt_change_repeat_sequence CTG [ 9 ]) (nt_change_repeat_sequence TTG [ 1 ]) (nt_change_repeat_sequence CTG [ 13 ]))", + nt_change_repeat.toStringTree(parser)); + } +}