diff --git a/.travis.yml b/.travis.yml index 9fbf327bf..9b25c4011 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,9 +14,16 @@ before_install: - mvn clean install -DskipTests -B && cd .. - mv ./repseqio/.cache .cache +script: + - mvn test -B && ./itests.sh test + jdk: - openjdk7 +before_cache: + - rm -f $HOME/.m2/repository/io/repseqio/ + - rm -f $HOME/.m2/repository/com/milaboratory/ + cache: directories: - $HOME/.m2 diff --git a/CHANGELOG b/CHANGELOG index 326002892..c8d3a7319 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,33 @@ +MiXCR 2.1.1 ( 3 Mar 2017) +======================== + +-- Additional check for the same V hits in merging sequences in `assemblePartial` +-- Fix: list of C hits limited by `relativeMinScore` in `assemblePartial` +-- Not aligned mates from fully aligned opposite PE reads are now taken into `assemblePartial` + procedure (case when V and J parts of the target read are shorter than 12 nt) +-- Additional consistency cehcks in `assemblePartial`, overlaps of non-CDR3-covering mates of PE + reads are also taken into account +-- More sophisticated algorithm for alignment-guided merging of paired-end reads in `align` +-- Several anchor point positions in export points changed their meaning. +-- Fixed absent space in `AA. Seq. ...` column name and excessive space in the end of several + column names (affects `exportClones` and `exportAlignments` with `-v` option; fixes compatibility + with VDJTools v1.1.3 and below). +-- Fixes NPE in `exportClones` in some cases with mmu reference +-- `-p` option in `assemblePartial` enabled by default; deprecated; will be removed in 2.2 +-- Added `-d` option in `assemblePartial` to drop non-assembled partial reads to reduce output file + size +-- Added `sortAlignments` action. Sort `*.vdjca` files by read id, allows diffing alignments after + `assembleParial` and other actions that shuffle records inside file. +-- Fixed incorrect behaviour of `exportClones` and `exportAlignments` in cases like `mixcr + exportAlignments -p full -descrR1 ...` +-- minor: Multiple reads in `exportAlignmentsPretty` and `filterAlignments` should be now specified + as `-i 123 -i 456` or `-i 123,456` etc. +-- minor: `--reads-ids` in `exportAlignmentsPretty` and `--readsIds` in `filterAlignments` both + renamed to `--read-ids` +-- minor: infrastructure for post-build tests in Travis CI + + MiXCR 2.1 ( 6 Feb 2017) ======================== diff --git a/README.md b/README.md index da306d5ca..007ea1eef 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,12 @@ Dependancy: To build MiXCR from source: +- Clone repository + + ``` + git clone https://github.com/milaboratory/mixcr.git + ``` + - Refresh git submodules ``` diff --git a/doc/assemble.rst b/doc/assemble.rst index 0225de597..7141b66b9 100644 --- a/doc/assemble.rst +++ b/doc/assemble.rst @@ -271,72 +271,41 @@ In order to turn off clustering one should use the following parameters: mixcr assemble -OcloneClusteringParameters=null alignments.vdjca output.clns -Clone factory parameters ------------------------- +.. Clone factory parameters +.. ------------------------ -Parameters which control final alignment of clonal sequences are placed -in ``cloneFactoryParameters`` group. These parameters includes separate -groups for V, D, J and C aligners: ``vParameters``, ``dParameters``, -``jParameters`` and ``cParameters``. The D aligner is the same as used -in ``align`` and thus all its parameters and their default values are -the same as described for :ref:`D aligner in align `. One -can override these parameters in the following way: +.. Parameters which control final alignment of clonal sequences are placed in ``cloneFactoryParameters`` group. These parameters includes separate groups for V, D, J and C aligners: ``vParameters``, ``dParameters``, ``jParameters`` and ``cParameters``. The D aligner is the same as used in ``align`` and thus all its parameters and their default values are the same as described for :ref:`D aligner in align `. One can override these parameters in the following way: -:: - - mixcr assemble -OcloneFactoryParameters.dParameters.absoluteMinScore=10 alignments.vdjca output.clns - -:: - - mixcr assemble -OcloneFactoryParameters.dParameters.scoring.gapOpenPenalty=-10 alignments.vdjca output.clns - -The aligners used to build alignments with V, J and C genes are -different from those used by ``align``. +.. -V, J and C aligner parameters -''''''''''''''''''''''''''''' +.. mixcr assemble -OcloneFactoryParameters.dParameters.absoluteMinScore=10 alignments.vdjca output.clns -The following table lists parameters of V, J and C aligners: +.. +.. mixcr assemble -OcloneFactoryParameters.dParameters.scoring.gapOpenPenalty=-10 alignments.vdjca output.clns +.. The aligners used to build alignments with V, J and C genes are different from those used by ``align``. -+----------------------+-----------------+-----------------+------------------+-----------------------------------------------------------+ -| Parameter | Default V value | Default J value | Default C value |Description | -+======================+=================+=================+==================+===========================================================+ -| ``featureToAlign`` | ``VTranscript`` | ``JRegion`` | ``CExon1`` | Gene region used to build alignments. | -+----------------------+-----------------+-----------------+------------------+-----------------------------------------------------------+ -| ``relativeMinScore`` | ``0.8`` | ``0.8`` | ``0.8`` | Relative minimal score of hit: hits with score less than | -| | | | | ``relativeMinScore * maxScore`` (``maxScore`` is score of | -| | | | | best hit) will be dropped. | -+----------------------+-----------------+-----------------+------------------+-----------------------------------------------------------+ +.. The scoring parameters are placed in group ``alignmentParameters.scoring``: -One can override these parameters in the following way +.. +-------------------------+----------------------------------------+--------------------------------------------------------------------+ +.. | Parameter | Default value (same for V, J, C) | Description | +.. +=========================+========================================+====================================================================+ +.. | ``subsMatrix`` | ``simple(match = 5,`` | Substitution matrix. Available types: | +.. | | ``mismatch = -9)`` | | +.. | | | - ``simple`` --- a matrix with diagonal elements equal to | +.. | | | ``match`` and other elements equal to ``mismatch`` | +.. | | | - ``raw`` --- a complete set of 16 matrix elements should be | +.. | | | specified; for  example: | +.. | | | ``raw(5,-9,-9,-9,-9,5,-9,-9,-9,-9,5,-9,-9,-9,-9,5)`` | +.. | | |   (*equivalent to the default value*) | +.. +-------------------------+----------------------------------------+--------------------------------------------------------------------+ +.. | ``gapPenalty`` | ``-12`` | Penalty for gap. | +.. +-------------------------+----------------------------------------+--------------------------------------------------------------------+ -:: - - mixcr assemble -OcloneFactoryParameters.jParameters.featureToAlign=JRegion(-6,0) alignments.vdjca output.clns - -The scoring parameters are placed in group -``alignmentParameters.scoring``: +.. One can override these parameters in the following way -+-------------------------+----------------------------------------+--------------------------------------------------------------------+ -| Parameter | Default value (same for V, J, C) | Description | -+=========================+========================================+====================================================================+ -| ``subsMatrix`` | ``simple(match = 5,`` | Substitution matrix. Available types: | -| | ``mismatch = -9)`` | | -| | | - ``simple`` --- a matrix with diagonal elements equal to | -| | | ``match`` and other elements equal to ``mismatch`` | -| | | - ``raw`` --- a complete set of 16 matrix elements should be | -| | | specified; for  example: | -| | | ``raw(5,-9,-9,-9,-9,5,-9,-9,-9,-9,5,-9,-9,-9,-9,5)`` | -| | |   (*equivalent to the default value*) | -+-------------------------+----------------------------------------+--------------------------------------------------------------------+ -| ``gapPenalty`` | ``-12`` | Penalty for gap. | -+-------------------------+----------------------------------------+--------------------------------------------------------------------+ - -One can override these parameters in the following way - -:: +.. - mixcr assemble -OcloneFactoryParameters.vParameters.alignmentParameters.scoring.gapPenalty=-5 \ - alignments.vdjca output.clns +.. mixcr assemble -OcloneFactoryParameters.vParameters.alignmentParameters.scoring.gapPenalty=-5 \ +.. alignments.vdjca output.clns diff --git a/doc/export.rst b/doc/export.rst index 0d30f7d8e..e2e9eb5d1 100644 --- a/doc/export.rst +++ b/doc/export.rst @@ -175,34 +175,44 @@ The following table shows the correspondance between anchor point and positions +--------------------------+---------------------+--------------------+ | FR3End / CDR3Begin | 9 | 10 | +--------------------------+---------------------+--------------------+ -| VEnd / *PSegmentBegin* | 10 | 11 | -+--------------------------+---------------------+--------------------+ -| Number of 3' V deletions | 11 | 12 | +| Number of 3' V deletions | 10 | 11 | | (negative value), or | | | | length of 3' V P-segment | | | | (positive value) | | | +--------------------------+---------------------+--------------------+ -| Number of 5' D deletions | 12 | 13 | +| VEndTrimmed, next | 11 | 12 | +| position after last | | | +| aligned nucleotide of V | | | +| gene | | | ++--------------------------+---------------------+--------------------+ +| DBeginTrimmed, position | 12 | 13 | +| of first aligned | | | +| nucleotide of D gene | | | ++--------------------------+---------------------+--------------------+ +| Number of 5' D deletions | 13 | 14 | | (negative value), or | | | | length of 5' D P-segment | | | | (positive value) | | | +--------------------------+---------------------+--------------------+ -| DBegin / *PSegmentEnd* | 13 | 14 | -+--------------------------+---------------------+--------------------+ -| DEnd / *PSegmentBegin* | 14 | 15 | -+--------------------------+---------------------+--------------------+ -| Number of 3' D deletions | 15 | 16 | +| Number of 3' D deletions | 14 | 15 | | (negative value), or | | | | length of 3' D P-segment | | | | (positive value) | | | +--------------------------+---------------------+--------------------+ -| Number of 3' J deletions | 16 | 17 | +| DEndTrimmed, next | 15 | 16 | +| position after last | | | +| aligned nucleotide of D | | | +| gene | | | ++--------------------------+---------------------+--------------------+ +| JBeginTrimmed, position | 16 | 17 | +| of first aligned | | | +| nucleotide of J gene | | | ++--------------------------+---------------------+--------------------+ +| Number of 3' J deletions | 17 | 18 | | (negative value), or | | | | length of 3' J P-segment | | | | (positive value) | | | +--------------------------+---------------------+--------------------+ -| JBegin / *PSegmentEnd* | 17 | 18 | -+--------------------------+---------------------+--------------------+ | CDR3End / FR4Begin | 18 | 19 | +--------------------------+---------------------+--------------------+ | FR4End | 19 | 20 | @@ -212,8 +222,6 @@ The following table shows the correspondance between anchor point and positions | CExon1End | 21 | 22 | +--------------------------+---------------------+--------------------+ -Positions of anchor points like ``VEnd`` are printed only if corresponding P-segment was detected in the sequence, in this case e.g. P-segment of V gene can be found between positions of ``VEnd`` and ``VEndTrimmed``. - Examples -------- diff --git a/doc/quickstart.rst b/doc/quickstart.rst index 49710342d..e460ccb58 100644 --- a/doc/quickstart.rst +++ b/doc/quickstart.rst @@ -239,9 +239,9 @@ MiXCR allows to extract TCR and BCR CDR3 repertoires from RNA-Seq data. Extracti .. code-block:: console - > mixcr assemblePartial -p alignments.vdjca alignmentsRescued_1.vdjca + > mixcr assemblePartial alignments.vdjca alignmentsRescued_1.vdjca - > mixcr assemblePartial -p alignmentsRescued_1.vdjca alignmentsRescued_2.vdjca + > mixcr assemblePartial alignmentsRescued_1.vdjca alignmentsRescued_2.vdjca 3. Extend TCR alignments with uniquely determined V and J genes and having incomplete coverage of CDR3s using germline sequences: diff --git a/doc/rnaseq.rst b/doc/rnaseq.rst index 694ddd112..a9718b315 100644 --- a/doc/rnaseq.rst +++ b/doc/rnaseq.rst @@ -75,10 +75,8 @@ Typical analysis workflow :: - mixcr assemblePartial -p alignments.vdjca alignments_rescued_1.vdjca - mixcr assemblePartial -p alignments_rescued_1.vdjca alignments_rescued_2.vdjca - - ``-p`` option tells MiXCRsquences to pass unassembled alignments to the output file. + mixcr assemblePartial alignments.vdjca alignments_rescued_1.vdjca + mixcr assemblePartial alignments_rescued_1.vdjca alignments_rescued_2.vdjca 3. (optional) Perform extension of incomplete TCR CDR3s with uniquely determined V and J genes using germline sequences. As described in the :ref:`last paragraph of introduction ` diff --git a/itests.sh b/itests.sh index a203898f9..be423c082 100755 --- a/itests.sh +++ b/itests.sh @@ -1,5 +1,8 @@ #!/bin/bash +set -e +set -o pipefail + # "Integration" tests for MiXCR # Test standard analysis pipeline results @@ -46,6 +49,26 @@ case $os in ;; esac +create_standard_results=false +run_tests=false +while [[ $# > 0 ]] +do + key="$1" + shift + case $key in + std) + create_standard_results=true + ;; + test) + run_tests=true + ;; + *) + echo "Unknown option $key"; + exit 1 + ;; +esac +done + rm -rf ${dir}/test_target mkdir ${dir}/test_target @@ -60,17 +83,27 @@ which mixcr mixcr -v function go_assemble { - mixcr assemble -r $1.clns.report $1.vdjca $1.clns || exit 1 + mixcr assemble -r $1.clns.report $1.vdjca $1.clns for c in TCR IG TRB TRA TRG TRD IGH IGL IGK ALL do - mixcr exportClones -c ${c} -s $1.clns $1.clns.${c}.txt || exit 1 + mixcr exportClones -c ${c} -s $1.clns $1.clns.${c}.txt done } -for s in sample_IGH test; -do - mixcr align -r ${s}_paired.vdjca.report ${s}_R1.fastq ${s}_R2.fastq ${s}_paired.vdjca || exit 1 - go_assemble ${s}_paired - mixcr align -r ${s}_single.vdjca.report ${s}_R1.fastq ${s}_single.vdjca || exit 1 - go_assemble ${s}_single -done +if [[ $create_standard_results == true ]]; then + for s in sample_IGH test; + do + mixcr align -r ${s}_paired.vdjca.report ${s}_R1.fastq ${s}_R2.fastq ${s}_paired.vdjca + go_assemble ${s}_paired + mixcr align -r ${s}_single.vdjca.report ${s}_R1.fastq ${s}_single.vdjca + go_assemble ${s}_single + done +fi + +# UseCase 1 + +if [[ $run_tests == true ]]; then + echo "Running test case 1" + mixcr align -OvParameters.geneFeatureToAlign=VGeneWithP test_R1.fastq test_R2.fastq case1.vdjca + mixcr assemble case1.vdjca case1.clns +fi diff --git a/pom.xml b/pom.xml index 74f9cff29..bf0aec46b 100644 --- a/pom.xml +++ b/pom.xml @@ -32,7 +32,7 @@ com.milaboratory mixcr - 2.1 + 2.1.1 jar MiXCR @@ -44,14 +44,14 @@ UTF-8 - 1.7.1 + 1.7.2 io.repseq repseqio - 1.2.6 + 1.2.7 diff --git a/repseqio b/repseqio index 958e0196d..39c72f9ea 160000 --- a/repseqio +++ b/repseqio @@ -1 +1 @@ -Subproject commit 958e0196db110cf3a0dfabf905d45e2edc4d49c0 +Subproject commit 39c72f9ea985fc55af8140e8d59333352c5abb77 diff --git a/src/main/java/com/milaboratory/mixcr/assembler/AlignmentsToClonesMappingContainer.java b/src/main/java/com/milaboratory/mixcr/assembler/AlignmentsToClonesMappingContainer.java index 76a4ecc51..c52c578cc 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/AlignmentsToClonesMappingContainer.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/AlignmentsToClonesMappingContainer.java @@ -1,7 +1,7 @@ package com.milaboratory.mixcr.assembler; import cc.redberry.pipe.OutputPort; -import com.milaboratory.mixcr.util.TempFileManager; +import com.milaboratory.util.TempFileManager; import java.io.*; import java.nio.ByteBuffer; @@ -143,7 +143,7 @@ public static void writeMapping(final OutputPort mappingPort public static void writeMapping(final OutputPort mappingPort, final int cloneCount, final File file) throws IOException { - try (DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file)))) { + try(DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file)))) { writeMapping(mappingPort, cloneCount, dos, DEFAULT_SORTING_CHUNK_SIZE); } } @@ -166,7 +166,7 @@ public static void writeMapping(final OutputPort mappingPort // Sorting blocks (sortingChunkSize) of records by clone id (for "by clone id" index file section) // Simultaneously writing records sorted "by alignment id" - try (final DataOutputStream tempOutput = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tempFile), 262144))) { + try(final DataOutputStream tempOutput = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tempFile), 262144))) { final ReadToCloneMapping[] buffer = new ReadToCloneMapping[sortingChunkSize]; ReadToCloneMapping mapping; ReadToCloneMapping previous = null; diff --git a/src/main/java/com/milaboratory/mixcr/assembler/AssemblerEventLogger.java b/src/main/java/com/milaboratory/mixcr/assembler/AssemblerEventLogger.java index 8289d5cba..d43365424 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/AssemblerEventLogger.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/AssemblerEventLogger.java @@ -31,7 +31,7 @@ import cc.redberry.pipe.CUtils; import cc.redberry.pipe.OutputPortCloseable; import com.milaboratory.core.io.util.IOUtil; -import com.milaboratory.mixcr.util.TempFileManager; +import com.milaboratory.util.TempFileManager; import java.io.*; import java.util.ArrayList; @@ -179,7 +179,7 @@ public AssemblerEvent take() { if (closed) return null; final int cloneIndex; - synchronized (this) { + synchronized ( this ){ if (closed) return null; else { diff --git a/src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java b/src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java index 1d25bc944..0595d5d8c 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java @@ -42,7 +42,7 @@ import com.milaboratory.core.tree.NeighborhoodIterator; import com.milaboratory.core.tree.SequenceTreeMap; import com.milaboratory.mixcr.basictypes.*; -import io.repseq.core.*; +import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters; import com.milaboratory.util.CanReportProgress; import com.milaboratory.util.Factory; import com.milaboratory.util.HashFunctions; @@ -51,6 +51,8 @@ import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.map.hash.TObjectFloatHashMap; import gnu.trove.procedure.TObjectProcedure; +import io.repseq.core.GeneFeature; +import io.repseq.core.*; import java.util.*; import java.util.concurrent.ConcurrentHashMap; @@ -82,6 +84,7 @@ public final class CloneAssembler implements CanReportProgress, AutoCloseable { volatile boolean deferredExists = false; volatile boolean preClusteringDone = false; final TIntIntHashMap preClustered = new TIntIntHashMap(); + final EnumMap featuresToAlign; public static final Factory> LIST_FACTORY = new Factory>() { @Override @@ -90,8 +93,11 @@ public ArrayList create() { } }; - public CloneAssembler(CloneAssemblerParameters parameters, boolean logAssemblerEvents, Collection genes) { + public CloneAssembler(CloneAssemblerParameters parameters, boolean logAssemblerEvents, Collection genes, EnumMap featuresToAlign) { + if (!parameters.isComplete()) + throw new IllegalArgumentException("Not complete parameters"); this.parameters = parameters.clone(); + this.featuresToAlign = featuresToAlign; if (!logAssemblerEvents && !parameters.isMappingEnabled()) globalLogger = null; else @@ -100,6 +106,10 @@ public CloneAssembler(CloneAssemblerParameters parameters, boolean logAssemblerE usedGenes.put(gene.getId(), gene); } + public CloneAssembler(CloneAssemblerParameters parameters, boolean logAssemblerEvents, Collection genes, VDJCAlignerParameters alignerParameters) { + this(parameters.clone().updateFrom(alignerParameters), logAssemblerEvents, genes, alignerParameters.getFeaturesToAlignMap()); + } + /* Initial Assembly Events */ void onNewCloneCreated(CloneAccumulator accumulator) { @@ -165,13 +175,13 @@ public void setListener(CloneAssemblerListener listener) { private ClonalSequence extractClonalSequence(VDJCAlignments alignments) { final NSequenceWithQuality[] targets = new NSequenceWithQuality[parameters.assemblingFeatures.length]; - int totalLengt = 0; + int totalLength = 0; for (int i = 0; i < targets.length; ++i) if ((targets[i] = alignments.getFeature(parameters.assemblingFeatures[i])) == null) return null; else - totalLengt += targets[i].size(); - if (totalLengt < parameters.minimalClonalSequenceLength) + totalLength += targets[i].size(); + if (totalLength < parameters.minimalClonalSequenceLength) return null; return new ClonalSequence(targets); } @@ -296,7 +306,7 @@ public void close() { public CloneSet getCloneSet() { EnumMap features = new EnumMap<>(GeneType.class); for (GeneType geneType : GeneType.values()) { - GeneFeature gf = parameters.cloneFactoryParameters.getFeatureToAlign(geneType); + GeneFeature gf = featuresToAlign.get(geneType); if (gf != null) features.put(geneType, gf); } @@ -506,7 +516,7 @@ public boolean isFinished() { void buildClones() { CloneFactory cloneFactory = new CloneFactory(parameters.getCloneFactoryParameters(), - parameters.getAssemblingFeatures(), usedGenes); + parameters.getAssemblingFeatures(), usedGenes, featuresToAlign); Collection source; if (clusteredClonesAccumulators != null) source = clusteredClonesAccumulators; diff --git a/src/main/java/com/milaboratory/mixcr/assembler/CloneAssemblerParameters.java b/src/main/java/com/milaboratory/mixcr/assembler/CloneAssemblerParameters.java index 6b2d490bf..5ca39fe92 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/CloneAssemblerParameters.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/CloneAssemblerParameters.java @@ -32,8 +32,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; -import io.repseq.core.GeneFeature; import com.milaboratory.core.sequence.quality.QualityAggregationType; +import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters; +import io.repseq.core.GeneFeature; import java.util.Arrays; import java.util.regex.Matcher; @@ -90,6 +91,18 @@ public CloneAssemblerParameters(@JsonProperty("assemblingFeatures") GeneFeature[ updateVariants(); } + public CloneAssemblerParameters updateFrom(VDJCAlignerParameters alignerParameters) { + if (cloneFactoryParameters != null) + cloneFactoryParameters.update(alignerParameters); + return this; + } + + public boolean isComplete() { + if (cloneFactoryParameters != null) + return cloneFactoryParameters.isComplete(); + return true; + } + public static final Pattern thresholdPattern = Pattern.compile("\\s*(\\d+)\\s*(of|from)\\s*(\\d+)\\s*"); private void updateVariants() { @@ -295,13 +308,13 @@ public int hashCode() { result = 31 * result + (separateByJ ? 1 : 0); result = 31 * result + (separateByC ? 1 : 0); temp = Double.doubleToLongBits(maximalPreClusteringRatio); - result = 31 * result + (int) (temp ^ (temp >>> 32)); + result = 31 * result + (int) (temp^(temp >>> 32)); result = 31 * result + (addReadsCountOnClustering ? 1 : 0); result = 31 * result + (int) badQualityThreshold; temp = Double.doubleToLongBits(maxBadPointsPercent); - result = 31 * result + (int) (temp ^ (temp >>> 32)); - result = 31 * result + (int) (variants ^ (variants >>> 32)); - result = 31 * result + (int) (minimalQuality ^ (minimalQuality >>> 32)); + result = 31 * result + (int) (temp^(temp >>> 32)); + result = 31 * result + (int) (variants^(variants >>> 32)); + result = 31 * result + (int) (minimalQuality^(minimalQuality >>> 32)); return result; } } diff --git a/src/main/java/com/milaboratory/mixcr/assembler/CloneFactory.java b/src/main/java/com/milaboratory/mixcr/assembler/CloneFactory.java index 33ffc0025..e6f61bdf6 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/CloneFactory.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/CloneFactory.java @@ -47,17 +47,19 @@ class CloneFactory { final HashMap usedGenes; final GeneFeature[] assemblingFeatures; final int indexOfAssemblingFeatureWithD; + final EnumMap featuresToAlign; CloneFactory(CloneFactoryParameters parameters, GeneFeature[] assemblingFeatures, - HashMap usedGenes) { + HashMap usedGenes, EnumMap featuresToAlign) { this.parameters = parameters.clone(); this.assemblingFeatures = assemblingFeatures.clone(); this.usedGenes = usedGenes; + this.featuresToAlign = featuresToAlign; List dGenes = new ArrayList<>(); for (VDJCGene gene : usedGenes.values()) if (gene.getGeneType() == GeneType.Diversity) dGenes.add(gene); - this.dAligner = new SingleDAligner(parameters.getDParameters(), dGenes); + this.dAligner = new SingleDAligner(parameters.getDParameters(), featuresToAlign.get(GeneType.Diversity), dGenes); int indexOfAssemblingFeatureWithD = -1; for (int i = 0; i < assemblingFeatures.length; ++i) @@ -76,7 +78,7 @@ Clone create(int id, CloneAccumulator accumulator) { if (vjcParameters == null) continue; - GeneFeature featureToAlign = vjcParameters.getFeatureToAlign(); + GeneFeature featureToAlign = featuresToAlign.get(geneType); TObjectFloatHashMap geneScores = accumulator.geneScores.get(geneType); if (geneScores == null) diff --git a/src/main/java/com/milaboratory/mixcr/assembler/CloneFactoryParameters.java b/src/main/java/com/milaboratory/mixcr/assembler/CloneFactoryParameters.java index bd6abf1cf..f59db6f6f 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/CloneFactoryParameters.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/CloneFactoryParameters.java @@ -29,25 +29,29 @@ package com.milaboratory.mixcr.assembler; +import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import io.repseq.core.GeneFeature; +import com.milaboratory.mixcr.basictypes.ClonalUpdatableParameters; +import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters; import io.repseq.core.GeneType; -import com.milaboratory.mixcr.vdjaligners.DAlignerParameters; import java.util.EnumMap; import java.util.Map; +@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.ANY, isGetterVisibility = JsonAutoDetect.Visibility.NONE, + getterVisibility = JsonAutoDetect.Visibility.NONE) public final class CloneFactoryParameters implements java.io.Serializable { EnumMap vdcParameters = new EnumMap<>(GeneType.class); - DAlignerParameters dParameters; + @JsonSerialize(as = DClonalAlignerParameters.class) + DClonalAlignerParameters dParameters; @JsonCreator public CloneFactoryParameters(@JsonProperty("vParameters") VJCClonalAlignerParameters vParameters, @JsonProperty("jParameters") VJCClonalAlignerParameters jParameters, @JsonProperty("cParameters") VJCClonalAlignerParameters cParameters, - @JsonProperty("dParameters") DAlignerParameters dParameters) { + @JsonProperty("dParameters") DClonalAlignerParameters dParameters) { if (vParameters != null) vdcParameters.put(GeneType.Variable, vParameters); if (jParameters != null) @@ -57,11 +61,32 @@ public CloneFactoryParameters(@JsonProperty("vParameters") VJCClonalAlignerParam this.dParameters = dParameters; } - CloneFactoryParameters(EnumMap vdcParameters, DAlignerParameters dParameters) { + CloneFactoryParameters(EnumMap vdcParameters, DClonalAlignerParameters dParameters) { this.vdcParameters = vdcParameters; this.dParameters = dParameters; } + private ClonalUpdatableParameters uParameters(GeneType gt) { + return gt == GeneType.Diversity ? dParameters : vdcParameters.get(gt); + } + + public void update(VDJCAlignerParameters alignerParameters) { + for (GeneType gt : GeneType.VDJC_REFERENCE) { + ClonalUpdatableParameters up = uParameters(gt); + if (up != null) + up.updateFrom(alignerParameters.getGeneAlignerParameters(gt)); + } + } + + public boolean isComplete() { + for (GeneType gt : GeneType.VDJC_REFERENCE) { + ClonalUpdatableParameters up = uParameters(gt); + if (up != null && !up.isComplete()) + return false; + } + return true; + } + public VJCClonalAlignerParameters getVJCParameters(GeneType geneType) { return vdcParameters.get(geneType); } @@ -86,39 +111,10 @@ public VJCClonalAlignerParameters getCParameters() { @JsonProperty("dParameters") @JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL) - public DAlignerParameters getDParameters() { + public DClonalAlignerParameters getDParameters() { return dParameters; } - public GeneFeature getFeatureToAlign(GeneType geneType) { - if (geneType == GeneType.Diversity) - if (dParameters == null) - return null; - else - return dParameters.getGeneFeatureToAlign(); - VJCClonalAlignerParameters params = getVJCParameters(geneType); - if (params == null) - return null; - else - return params.getFeatureToAlign(); - } - - public CloneFactoryParameters setFeatureToAlign(GeneType geneType, GeneFeature feature) { - if (geneType == GeneType.Diversity) - if (dParameters == null) - throw new IllegalArgumentException("No D parameters."); - else - dParameters.setGeneFeatureToAlign(feature); - else { - VJCClonalAlignerParameters params = getVJCParameters(geneType); - if (params == null) - throw new IllegalArgumentException("No parameters for " + geneType + "."); - else - params.setFeatureToAlign(feature); - } - return this; - } - public CloneFactoryParameters setVJCParameters(GeneType geneType, VJCClonalAlignerParameters parameters) { if (parameters == null) vdcParameters.remove(geneType); @@ -142,7 +138,7 @@ public CloneFactoryParameters setCParameters(VJCClonalAlignerParameters paramete return this; } - public CloneFactoryParameters setDParameters(DAlignerParameters dParameters) { + public CloneFactoryParameters setDParameters(DClonalAlignerParameters dParameters) { this.dParameters = dParameters; return this; } diff --git a/src/main/java/com/milaboratory/mixcr/assembler/DClonalAlignerParameters.java b/src/main/java/com/milaboratory/mixcr/assembler/DClonalAlignerParameters.java index 18a8ca99a..1d485527b 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/DClonalAlignerParameters.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/DClonalAlignerParameters.java @@ -1,31 +1,3 @@ -/* - * Copyright (c) 2014-2015, Bolotin Dmitry, Chudakov Dmitry, Shugay Mikhail - * (here and after addressed as Inventors) - * All Rights Reserved - * - * Permission to use, copy, modify and distribute any part of this program for - * educational, research and non-profit purposes, by non-profit institutions - * only, without fee, and without a written agreement is hereby granted, - * provided that the above copyright notice, this paragraph and the following - * three paragraphs appear in all copies. - * - * Those desiring to incorporate this work into commercial products or use for - * commercial purposes should contact the Inventors using one of the following - * email addresses: chudakovdm@mail.ru, chudakovdm@gmail.com - * - * IN NO EVENT SHALL THE INVENTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, - * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, - * ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF THE INVENTORS HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE INVENTORS HAS - * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR - * MODIFICATIONS. THE INVENTORS MAKES NO REPRESENTATIONS AND EXTENDS NO - * WARRANTIES OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A - * PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY - * PATENT, TRADEMARK OR OTHER RIGHTS. - */ package com.milaboratory.mixcr.assembler; import com.fasterxml.jackson.annotation.JsonAutoDetect; @@ -33,54 +5,123 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.milaboratory.core.alignment.AlignmentScoring; import com.milaboratory.core.sequence.NucleotideSequence; -import io.repseq.core.GeneFeature; +import com.milaboratory.mixcr.basictypes.ClonalUpdatableParameters; +import com.milaboratory.mixcr.vdjaligners.ClonalGeneAlignmentParameters; +/** + * Some fields of this object might not be set, to indicate that their values must be taken from original alignment + * parameters (from *.vdjca file) + * + * Created by poslavsky on 01/03/2017. + */ @JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.ANY, isGetterVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE) -public final class DClonalAlignerParameters extends AbstractClonalAlignerParameters - implements java.io.Serializable { - AlignmentScoring scoring; +public class DClonalAlignerParameters> implements + ClonalUpdatableParameters, ClonalGeneAlignmentParameters, java.io.Serializable { + protected Float relativeMinScore; + protected Float absoluteMinScore; + protected Integer maxHits; + protected AlignmentScoring scoring; @JsonCreator public DClonalAlignerParameters( - @JsonProperty("relativeMinScore") float relativeMinScore, - @JsonProperty("featureToAlign") GeneFeature featureToAlign, + @JsonProperty("relativeMinScore") Float relativeMinScore, + @JsonProperty("absoluteMinScore") Float absoluteMinScore, + @JsonProperty("maxHits") Integer maxHits, @JsonProperty("scoring") AlignmentScoring scoring) { - super(featureToAlign, relativeMinScore); + this.relativeMinScore = relativeMinScore; + this.absoluteMinScore = absoluteMinScore; + this.maxHits = maxHits; this.scoring = scoring; } + @Override + public void updateFrom(ClonalGeneAlignmentParameters alignerParameters) { + if (!(alignerParameters instanceof DClonalAlignerParameters)) + throw new IllegalArgumentException(); + DClonalAlignerParameters oth = (DClonalAlignerParameters) alignerParameters; + + if (relativeMinScore == null) + relativeMinScore = oth.relativeMinScore; + if (absoluteMinScore == null) + absoluteMinScore = oth.absoluteMinScore; + if (maxHits == null) + maxHits = oth.maxHits; + if (scoring == null) + scoring = oth.scoring; + } + + @Override + public boolean isComplete() { + return relativeMinScore != null && absoluteMinScore != null && maxHits != null && scoring != null; + } + + @Override public AlignmentScoring getScoring() { return scoring; } - public DClonalAlignerParameters setScoring(AlignmentScoring scoring) { + + public T setRelativeMinScore(float relativeMinScore) { + this.relativeMinScore = relativeMinScore; + return (T) this; + } + + @Override + public float getRelativeMinScore() { + return relativeMinScore; + } + + + public T setScoring(AlignmentScoring scoring) { this.scoring = scoring; - return this; + return (T) this; + } + + public float getAbsoluteMinScore() { + return absoluteMinScore; + } + + public T setAbsoluteMinScore(float absoluteMinScore) { + this.absoluteMinScore = absoluteMinScore; + return (T) this; + } + + public int getMaxHits() { + return maxHits; + } + + public T setMaxHits(int maxHits) { + this.maxHits = maxHits; + return (T) this; } @Override public DClonalAlignerParameters clone() { - return new DClonalAlignerParameters(relativeMinScore, featureToAlign, scoring); + return new DClonalAlignerParameters(relativeMinScore, absoluteMinScore, maxHits, scoring); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - if (!super.equals(o)) return false; DClonalAlignerParameters that = (DClonalAlignerParameters) o; - if (!scoring.equals(that.scoring)) return false; - - return true; + if (relativeMinScore != null ? !relativeMinScore.equals(that.relativeMinScore) : that.relativeMinScore != null) + return false; + if (absoluteMinScore != null ? !absoluteMinScore.equals(that.absoluteMinScore) : that.absoluteMinScore != null) + return false; + if (maxHits != null ? !maxHits.equals(that.maxHits) : that.maxHits != null) return false; + return scoring != null ? scoring.equals(that.scoring) : that.scoring == null; } @Override public int hashCode() { - int result = super.hashCode(); - result = 31 * result + scoring.hashCode(); + int result = relativeMinScore != null ? relativeMinScore.hashCode() : 0; + result = 31 * result + (absoluteMinScore != null ? absoluteMinScore.hashCode() : 0); + result = 31 * result + (maxHits != null ? maxHits.hashCode() : 0); + result = 31 * result + (scoring != null ? scoring.hashCode() : 0); return result; } } diff --git a/src/main/java/com/milaboratory/mixcr/assembler/VJCClonalAlignerParameters.java b/src/main/java/com/milaboratory/mixcr/assembler/VJCClonalAlignerParameters.java index 80b9fcf28..06ce576f5 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/VJCClonalAlignerParameters.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/VJCClonalAlignerParameters.java @@ -28,59 +28,143 @@ */ package com.milaboratory.mixcr.assembler; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.*; +import com.milaboratory.core.alignment.AffineGapAlignmentScoring; +import com.milaboratory.core.alignment.AlignmentScoring; import com.milaboratory.core.alignment.BandedAlignerParameters; import com.milaboratory.core.sequence.NucleotideSequence; -import io.repseq.core.GeneFeature; +import com.milaboratory.mixcr.basictypes.ClonalUpdatableParameters; +import com.milaboratory.mixcr.vdjaligners.ClonalGeneAlignmentParameters; +/** + * Some fields of this object might not be set, to indicate that their values must be taken from original alignment + * parameters (from *.vdjca file) + */ @JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.ANY, isGetterVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE) -public final class VJCClonalAlignerParameters extends AbstractClonalAlignerParameters - implements java.io.Serializable { - BandedAlignerParameters alignmentParameters; +@JsonIgnoreProperties({"featureToAlign", "alignmentParameters"}) +public final class VJCClonalAlignerParameters + implements ClonalGeneAlignmentParameters, java.io.Serializable, ClonalUpdatableParameters { + Float relativeMinScore; + AlignmentScoring scoring; + @JsonIgnore + int maxAlignmentWidthLinear; + @JsonIgnore + int maxAlignmentWidthAffine; + + public VJCClonalAlignerParameters( + float relativeMinScore, + AlignmentScoring scoring, + int maxAlignmentWidth) { + this.relativeMinScore = relativeMinScore; + this.scoring = scoring; + this.maxAlignmentWidthLinear = maxAlignmentWidth; + this.maxAlignmentWidthAffine = maxAlignmentWidth; + } @JsonCreator public VJCClonalAlignerParameters( - @JsonProperty("featureToAlign") GeneFeature featureToAlign, - @JsonProperty("relativeMinScore") float relativeMinScore, - @JsonProperty("alignmentParameters") BandedAlignerParameters alignmentParameters) { - super(featureToAlign, relativeMinScore); - this.alignmentParameters = alignmentParameters; + @JsonProperty("relativeMinScore") Float relativeMinScore, + @JsonProperty("scoring") AlignmentScoring scoring, + @JsonProperty("maxAlignmentWidth") Integer maxAlignmentWidth, + @JsonProperty("maxAlignmentWidthLinear") Integer maxAlignmentWidthLinear, + @JsonProperty("maxAlignmentWidthAffine") Integer maxAlignmentWidthAffine) { + this.relativeMinScore = relativeMinScore; + this.scoring = scoring; + + if (maxAlignmentWidth == null && (maxAlignmentWidthAffine == null || maxAlignmentWidthLinear == null)) + throw new IllegalArgumentException("maxAlignmentWidth or maxAlignmentWidthAffine and maxAlignmentWidthLinear are not specified"); + + this.maxAlignmentWidthLinear = maxAlignmentWidth != null ? maxAlignmentWidth : maxAlignmentWidthLinear; + this.maxAlignmentWidthAffine = maxAlignmentWidth != null ? maxAlignmentWidth : maxAlignmentWidthAffine; } public BandedAlignerParameters getAlignmentParameters() { - return alignmentParameters; + if (!isComplete()) + throw new IllegalStateException(); + return new BandedAlignerParameters<>(scoring, getMaxAlignmentWidth(), Integer.MIN_VALUE); + } + + @Override + public void updateFrom(ClonalGeneAlignmentParameters alignerParameters) { + if (scoring == null) + scoring = alignerParameters.getScoring(); + if (relativeMinScore == null) + relativeMinScore = alignerParameters.getRelativeMinScore(); + } + + @Override + public boolean isComplete() { + return relativeMinScore != null && scoring != null; + } + + @Override + public AlignmentScoring getScoring() { + return scoring; } - public VJCClonalAlignerParameters setAlignmentParameters(BandedAlignerParameters alignmentParameters) { - this.alignmentParameters = alignmentParameters; + public VJCClonalAlignerParameters setScoring(AlignmentScoring scoring) { + this.scoring = scoring; return this; } + @Override + public float getRelativeMinScore() { + return relativeMinScore; + } + + public VJCClonalAlignerParameters setRelativeMinScore(Float relativeMinScore) { + this.relativeMinScore = relativeMinScore; + return this; + } + + public int getMaxAlignmentWidth() { + return scoring instanceof AffineGapAlignmentScoring ? maxAlignmentWidthAffine : maxAlignmentWidthLinear; + } + + @JsonProperty("maxAlignmentWidth") + @JsonInclude(JsonInclude.Include.NON_NULL) + public Integer getMaxAlignmentWidthJSON() { + return (maxAlignmentWidthAffine == maxAlignmentWidthLinear) ? maxAlignmentWidthAffine : null; + } + + @JsonProperty("maxAlignmentWidthLinear") + @JsonInclude(JsonInclude.Include.NON_NULL) + public Integer getMaxAlignmentWidthLinearJSON() { + return (maxAlignmentWidthAffine != maxAlignmentWidthLinear) ? maxAlignmentWidthLinear : null; + } + + @JsonProperty("maxAlignmentWidthAffine") + @JsonInclude(JsonInclude.Include.NON_NULL) + public Integer getMaxAlignmentWidthAffineJSON() { + return (maxAlignmentWidthAffine != maxAlignmentWidthLinear) ? maxAlignmentWidthAffine : null; + } + @Override public VJCClonalAlignerParameters clone() { - return new VJCClonalAlignerParameters(featureToAlign, relativeMinScore, alignmentParameters.clone()); + return new VJCClonalAlignerParameters(relativeMinScore, scoring, null, maxAlignmentWidthLinear, maxAlignmentWidthAffine); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - if (!super.equals(o)) return false; VJCClonalAlignerParameters that = (VJCClonalAlignerParameters) o; - if (!alignmentParameters.equals(that.alignmentParameters)) return false; - - return true; + if (maxAlignmentWidthLinear != that.maxAlignmentWidthLinear) return false; + if (maxAlignmentWidthAffine != that.maxAlignmentWidthAffine) return false; + if (relativeMinScore != null ? !relativeMinScore.equals(that.relativeMinScore) : that.relativeMinScore != null) + return false; + return scoring != null ? scoring.equals(that.scoring) : that.scoring == null; } @Override public int hashCode() { - int result = super.hashCode(); - result = 31 * result + alignmentParameters.hashCode(); + int result = relativeMinScore != null ? relativeMinScore.hashCode() : 0; + result = 31 * result + (scoring != null ? scoring.hashCode() : 0); + result = 31 * result + maxAlignmentWidthLinear; + result = 31 * result + maxAlignmentWidthAffine; return result; } } diff --git a/src/main/java/com/milaboratory/mixcr/basictypes/ClonalUpdatableParameters.java b/src/main/java/com/milaboratory/mixcr/basictypes/ClonalUpdatableParameters.java new file mode 100644 index 000000000..94990d0b5 --- /dev/null +++ b/src/main/java/com/milaboratory/mixcr/basictypes/ClonalUpdatableParameters.java @@ -0,0 +1,20 @@ +package com.milaboratory.mixcr.basictypes; + +import com.milaboratory.mixcr.vdjaligners.ClonalGeneAlignmentParameters; + +/** + * Created by poslavsky on 01/03/2017. + */ +public interface ClonalUpdatableParameters { + /** + * Set absent parameters from ClonalGeneAlignmentParameters object + * + * @param alignerParameters + */ + void updateFrom(ClonalGeneAlignmentParameters alignerParameters); + + /** + * Returns true if all parameters are defined + */ + boolean isComplete(); +} diff --git a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java index a9c71cf97..bf54740ac 100644 --- a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java +++ b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java @@ -28,11 +28,13 @@ */ package com.milaboratory.mixcr.basictypes; +import com.milaboratory.core.alignment.Alignment; import com.milaboratory.core.sequence.NSequenceWithQuality; +import com.milaboratory.core.sequence.NucleotideSequence; import com.milaboratory.primitivio.annotations.Serializable; -import io.repseq.core.Chains; import io.repseq.core.GeneType; +import java.util.Arrays; import java.util.EnumMap; @Serializable(by = IO.VDJCAlignmentsSerializer.class) @@ -67,6 +69,33 @@ public VDJCAlignments(long readId, VDJCHit[] vHits, VDJCHit[] dHits, VDJCHit[] j this.readId = readId; } + public VDJCAlignments removeBestHitAlignment(GeneType geneType, int targetId) { + if (getBestHit(geneType) == null) + return this; + EnumMap hits = new EnumMap<>(this.hits); + VDJCHit[] gHits = hits.get(geneType).clone(); + Alignment[] als = gHits[0].getAlignments().clone(); + als[targetId] = null; + gHits[0] = new VDJCHit(gHits[0].getGene(), als, gHits[0].getAlignedFeature()); + Arrays.sort(gHits); + hits.put(geneType, gHits); + + VDJCAlignments result = new VDJCAlignments(readId, hits, targets); + result.alignmentsIndex = alignmentsIndex; + return result; + } + + public boolean hasNoHitsInTarget(int i) { + for (VDJCHit[] vdjcHits : hits.values()) { + if (vdjcHits == null) + continue; + for (VDJCHit hit : vdjcHits) + if (hit.getAlignment(i) != null) + return false; + } + return true; + } + public long getReadId() { return readId; } @@ -186,7 +215,7 @@ public boolean equals(Object o) { @Override public int hashCode() { int result = super.hashCode(); - result = 31 * result + (int) (readId ^ (readId >>> 32)); + result = 31 * result + (int) (readId^(readId >>> 32)); return result; } } diff --git a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignmentsReader.java b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignmentsReader.java index 6da0d0267..fe072112f 100644 --- a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignmentsReader.java +++ b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignmentsReader.java @@ -42,7 +42,9 @@ import io.repseq.core.VDJCLibraryRegistry; import java.io.*; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import static com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter.*; @@ -123,6 +125,10 @@ public void setIndexer(TLongArrayList index) { } public void init() { + init(null); + } + + void init(Map geneFeatureRefs) { if (usedGenes != null) return; @@ -152,6 +158,16 @@ public void init() { GeneFeature featureDeserialized = input.readObject(GeneFeature.class); if (!Objects.equals(featureDeserialized, featureParams)) throw new RuntimeException("Wrong format."); + + // Find corresponding reference + if (geneFeatureRefs != null) { + featureParams = geneFeatureRefs.get(featureParams); + if (featureParams == null) + throw new RuntimeException("Absent record for " + featureDeserialized + " in geneFeatureRefs map."); + } + +// parameters.getGeneAlignerParameters(gt).setGeneFeatureToAlign(featureParams); + if (featureDeserialized != null) input.putKnownReference(featureParams); } @@ -243,4 +259,29 @@ public synchronized VDJCAlignments take() { return alignments; } + + /** + * Produce reader that uses the same reference for geneFeatures. + * + * @param reader target reader + * @param parameters parameters to take reference from + */ + public static void initGeneFeatureReferencesFrom(VDJCAlignmentsReader reader, VDJCAlignerParameters parameters) { + Map featureRefs = new HashMap<>(); + for (GeneType gt : GeneType.VDJC_REFERENCE) { + GeneFeature f = parameters.getFeatureToAlign(gt); + featureRefs.put(f, f); + } + reader.init(featureRefs); + } + + /** + * Produce reader that uses the same reference for geneFeatures. + * + * @param reader target reader + * @param sourceReader reader to take reference from + */ + public static void initGeneFeatureReferencesFrom(VDJCAlignmentsReader reader, VDJCAlignmentsReader sourceReader) { + initGeneFeatureReferencesFrom(reader, sourceReader.getParameters()); + } } diff --git a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java index a71dc4cf0..3c0cea15b 100644 --- a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java +++ b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java @@ -36,10 +36,9 @@ import io.repseq.core.Chains; import io.repseq.core.GeneFeature; import io.repseq.core.GeneType; +import io.repseq.core.VDJCGeneId; -import java.util.Arrays; -import java.util.EnumMap; -import java.util.Map; +import java.util.*; public class VDJCObject { protected final NSequenceWithQuality[] targets; @@ -76,6 +75,25 @@ protected static EnumMap createHits(VDJCHit[] vHits, VDJCHi return hits; } + @SuppressWarnings("unchecked") + private Set getGenes(GeneType gt) { + VDJCHit[] hits = getHits(gt); + if (hits == null) + return Collections.EMPTY_SET; + Set genes = new HashSet<>(); + for (VDJCHit hit : hits) + genes.add(hit.getGene().getId()); + return genes; + } + + public final boolean hasCommonGenes(GeneType gt, VDJCObject other) { + Set thisGenes = this.getGenes(gt); + for (VDJCGeneId gene : other.getGenes(gt)) + if (thisGenes.contains(gene)) + return true; + return false; + } + public final VDJCHit[] getHits(GeneType type) { VDJCHit[] hits = this.hits.get(type); return hits == null ? new VDJCHit[0] : hits; @@ -149,7 +167,7 @@ public final NSequenceWithQuality getTarget(int target) { return targets[target]; } - public final NSequenceWithQuality[] getTargets(){ + public final NSequenceWithQuality[] getTargets() { return targets.clone(); } @@ -200,60 +218,60 @@ public NSequenceWithQuality getFeature(GeneFeature geneFeature) { if (tmp != null && (feature == null || feature.getQuality().minValue() < tmp.getQuality().minValue())) feature = tmp; } - if (feature == null && targets.length == 2) { - VDJCHit bestVHit = getBestHit(GeneType.Variable); - if (bestVHit == null) - return null; - - //TODO check for V feature compatibility - Alignment - lAlignment = bestVHit.getAlignment(0), - rAlignment = bestVHit.getAlignment(1); - - if (lAlignment == null || rAlignment == null) - return null; - - int lTargetIndex = 0; - - int lFrom, rTo, f, t; - if ((f = getPartitionedTarget(1).getPartitioning().getPosition(geneFeature.getFirstPoint())) >= 0 && - (t = getPartitionedTarget(0).getPartitioning().getPosition(geneFeature.getLastPoint())) >= 0) { - lAlignment = bestVHit.getAlignment(1); - rAlignment = bestVHit.getAlignment(0); - lFrom = f; - rTo = t; - lTargetIndex = 1; - } else if ((f = getPartitionedTarget(0).getPartitioning().getPosition(geneFeature.getFirstPoint())) < 0 || - (t = getPartitionedTarget(1).getPartitioning().getPosition(geneFeature.getLastPoint())) < 0) - return null; - else { - lFrom = f; - rTo = t; - } - - Range intersection = lAlignment.getSequence1Range().intersection(rAlignment.getSequence1Range()); - if (intersection == null) - return null; - - NSequenceWithQuality intersectionSequence = Merger.merge(intersection, - new Alignment[]{bestVHit.getAlignment(0), bestVHit.getAlignment(1)}, - targets); - - Range lRange = new Range( - lFrom, - aabs(lAlignment.convertToSeq2Position(intersection.getFrom()))); - Range rRange = new Range( - aabs(rAlignment.convertToSeq2Position(intersection.getTo())), - rTo); - - feature = - new NSequenceWithQualityBuilder() - .ensureCapacity(lRange.length() + rRange.length() + intersectionSequence.size()) - .append(targets[lTargetIndex].getRange(lRange)) - .append(intersectionSequence) - .append(targets[1 - lTargetIndex].getRange(rRange)) - .createAndDestroy(); - } +// if (feature == null && targets.length == 2) { +// VDJCHit bestVHit = getBestHit(GeneType.Variable); +// if (bestVHit == null) +// return null; +// +// //TODO check for V feature compatibility +// Alignment +// lAlignment = bestVHit.getAlignment(0), +// rAlignment = bestVHit.getAlignment(1); +// +// if (lAlignment == null || rAlignment == null) +// return null; +// +// int lTargetIndex = 0; +// +// int lFrom, rTo, f, t; +// if ((f = getPartitionedTarget(1).getPartitioning().getPosition(geneFeature.getFirstPoint())) >= 0 && +// (t = getPartitionedTarget(0).getPartitioning().getPosition(geneFeature.getLastPoint())) >= 0) { +// lAlignment = bestVHit.getAlignment(1); +// rAlignment = bestVHit.getAlignment(0); +// lFrom = f; +// rTo = t; +// lTargetIndex = 1; +// } else if ((f = getPartitionedTarget(0).getPartitioning().getPosition(geneFeature.getFirstPoint())) < 0 || +// (t = getPartitionedTarget(1).getPartitioning().getPosition(geneFeature.getLastPoint())) < 0) +// return null; +// else { +// lFrom = f; +// rTo = t; +// } +// +// Range intersection = lAlignment.getSequence1Range().intersection(rAlignment.getSequence1Range()); +// if (intersection == null) +// return null; +// +// NSequenceWithQuality intersectionSequence = Merger.merge(intersection, +// new Alignment[]{bestVHit.getAlignment(0), bestVHit.getAlignment(1)}, +// targets); +// +// Range lRange = new Range( +// lFrom, +// aabs(lAlignment.convertToSeq2Position(intersection.getFrom()))); +// Range rRange = new Range( +// aabs(rAlignment.convertToSeq2Position(intersection.getTo())), +// rTo); +// +// feature = +// new NSequenceWithQualityBuilder() +// .ensureCapacity(lRange.length() + rRange.length() + intersectionSequence.size()) +// .append(targets[lTargetIndex].getRange(lRange)) +// .append(intersectionSequence) +// .append(targets[1 - lTargetIndex].getRange(rRange)) +// .createAndDestroy(); +// } return feature; } diff --git a/src/main/java/com/milaboratory/mixcr/cli/ActionAssemble.java b/src/main/java/com/milaboratory/mixcr/cli/ActionAssemble.java index 89a178c46..be88e1fb1 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/ActionAssemble.java +++ b/src/main/java/com/milaboratory/mixcr/cli/ActionAssemble.java @@ -45,8 +45,6 @@ import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters; import com.milaboratory.primitivio.PipeWriter; import com.milaboratory.util.SmartProgressReporter; -import io.repseq.core.GeneFeature; -import io.repseq.core.GeneType; import io.repseq.core.VDJCGene; import io.repseq.core.VDJCLibraryRegistry; @@ -67,7 +65,7 @@ public void go(ActionHelper helper) throws Exception { // Extracting V/D/J/C gene list from input vdjca file final List genes; final VDJCAlignerParameters alignerParameters; - try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(actionParameters.getInputFileName(), + try(VDJCAlignmentsReader reader = new VDJCAlignmentsReader(actionParameters.getInputFileName(), VDJCLibraryRegistry.getDefault())) { genes = reader.getUsedGenes(); // Saving aligner parameters to correct assembler parameters @@ -79,6 +77,8 @@ public void go(ActionHelper helper) throws Exception { VDJCLibraryRegistry.getDefault()); CloneAssemblerParameters assemblerParameters = actionParameters.getCloneAssemblerParameters(); + //set aligner parameters + assemblerParameters.updateFrom(alignerParameters); // Overriding JSON parameters if (!actionParameters.overrides.isEmpty()) { @@ -90,19 +90,9 @@ public void go(ActionHelper helper) throws Exception { } } - // Adjusting features to align for correct processing - for (GeneType geneType : GeneType.values()) { - GeneFeature featureAssemble = assemblerParameters.getCloneFactoryParameters().getFeatureToAlign(geneType); - GeneFeature featureAlignment = alignerParameters.getFeatureToAlign(geneType); - if (featureAssemble == null || featureAlignment == null) - continue; - GeneFeature intersection = GeneFeature.intersection(featureAlignment, featureAssemble); - assemblerParameters.getCloneFactoryParameters().setFeatureToAlign(geneType, intersection); - } - // Performing assembly - try (CloneAssembler assembler = new CloneAssembler(assemblerParameters, - actionParameters.readsToClonesMapping != null, genes)) { + try(CloneAssembler assembler = new CloneAssembler(assemblerParameters, + actionParameters.readsToClonesMapping != null, genes, alignerParameters.getFeaturesToAlignMap())) { // Creating event listener to collect run statistics CloneAssemblerReport report = new CloneAssemblerReport(); assembler.setListener(report); @@ -122,7 +112,7 @@ public void go(ActionHelper helper) throws Exception { chainsStatistics.put(clone); // Writing results - try (CloneSetIO.CloneSetWriter writer = new CloneSetIO.CloneSetWriter(cloneSet, actionParameters.getOutputFileName())) { + try(CloneSetIO.CloneSetWriter writer = new CloneSetIO.CloneSetWriter(cloneSet, actionParameters.getOutputFileName())) { SmartProgressReporter.startProgressReport(writer); writer.write(); } @@ -144,7 +134,7 @@ public void go(ActionHelper helper) throws Exception { // Writing raw events (not documented feature) if (actionParameters.events != null) - try (PipeWriter writer = new PipeWriter<>(actionParameters.events)) { + try(PipeWriter writer = new PipeWriter<>(actionParameters.events)) { CUtils.drain(assembler.getAssembledReadsPort(), writer); } diff --git a/src/main/java/com/milaboratory/mixcr/cli/ActionAssemblePartialAlignments.java b/src/main/java/com/milaboratory/mixcr/cli/ActionAssemblePartialAlignments.java index d5e5471b1..d479ab3d2 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/ActionAssemblePartialAlignments.java +++ b/src/main/java/com/milaboratory/mixcr/cli/ActionAssemblePartialAlignments.java @@ -27,12 +27,17 @@ public final class ActionAssemblePartialAlignments implements Action { @Override public void go(ActionHelper helper) throws Exception { + if (parameters.writePartial != null) + System.err.println("'-p' option is deprecated and will be removed in 2.2. " + + "Use '-d' option to drop not-overlapped partial reads."); + // Saving initial timestamp long beginTimestamp = System.currentTimeMillis(); PartialAlignmentsAssemblerParameters assemblerParameters = PartialAlignmentsAssemblerParameters.getDefault(); if (!parameters.overrides.isEmpty()) { - assemblerParameters = JsonOverrider.override(assemblerParameters, PartialAlignmentsAssemblerParameters.class, parameters.overrides); + assemblerParameters = JsonOverrider.override(assemblerParameters, + PartialAlignmentsAssemblerParameters.class, parameters.overrides); if (assemblerParameters == null) { System.err.println("Failed to override some parameter."); return; @@ -40,8 +45,9 @@ public void go(ActionHelper helper) throws Exception { } long start = System.currentTimeMillis(); - try (PartialAlignmentsAssembler assembler = new PartialAlignmentsAssembler(assemblerParameters, parameters.getOutputFileName(), - parameters.getWritePartial(), parameters.getOverlappedOnly())) { + try (PartialAlignmentsAssembler assembler = new PartialAlignmentsAssembler(assemblerParameters, + parameters.getOutputFileName(), !parameters.doDropPartial(), + parameters.getOverlappedOnly())) { try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(parameters.getInputFileName())) { SmartProgressReporter.startProgressReport("Building index", reader); assembler.buildLeftPartsIndex(reader); @@ -53,7 +59,8 @@ public void go(ActionHelper helper) throws Exception { if (parameters.report != null) Util.writeReport(parameters.getInputFileName(), parameters.getOutputFileName(), - helper.getCommandLineArguments(), parameters.report, System.currentTimeMillis() - start, assembler + helper.getCommandLineArguments(), parameters.report, + System.currentTimeMillis() - start, assembler ); long time = System.currentTimeMillis() - beginTimestamp; @@ -90,10 +97,15 @@ private static class AssemblePartialAlignmentsParameters extends ActionParameter names = {"-o", "--overlapped-only"}) public Boolean overlappedOnly; - @Parameter(description = "Write partial sequences (for recurrent overlapping).", + @Parameter(description = "[Deprecated, enabled by default] Write partial sequences (for recurrent overlapping).", names = {"-p", "--write-partial"}) public Boolean writePartial; + @Parameter(description = "Drop partial sequences which were not assembled. Can be used to reduce output file " + + "size if no additional rounds of 'assemblePartial' are required.", + names = {"-d", "--drop-partial"}) + public Boolean dropPartial; + public String getInputFileName() { return parameters.get(0); } @@ -106,8 +118,8 @@ public Boolean getOverlappedOnly() { return overlappedOnly != null && overlappedOnly; } - public Boolean getWritePartial() { - return writePartial != null && writePartial; + public Boolean doDropPartial() { + return dropPartial != null && dropPartial; } @Override diff --git a/src/main/java/com/milaboratory/mixcr/cli/ActionExportAlignmentsPretty.java b/src/main/java/com/milaboratory/mixcr/cli/ActionExportAlignmentsPretty.java index 85f661421..6211ea72e 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/ActionExportAlignmentsPretty.java +++ b/src/main/java/com/milaboratory/mixcr/cli/ActionExportAlignmentsPretty.java @@ -33,6 +33,7 @@ import com.beust.jcommander.Parameter; import com.beust.jcommander.ParameterException; import com.beust.jcommander.Parameters; +import com.beust.jcommander.converters.LongConverter; import com.milaboratory.cli.Action; import com.milaboratory.cli.ActionHelper; import com.milaboratory.cli.ActionParameters; @@ -69,9 +70,9 @@ public class ActionExportAlignmentsPretty implements Action { public void go(ActionHelper helper) throws Exception { Filter filter = actionParameters.getFilter(); long total = 0, filtered = 0; - try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(actionParameters.getInputFileName()); - PrintStream output = actionParameters.getOutputFileName().equals("-") ? System.out : - new PrintStream(new BufferedOutputStream(new FileOutputStream(actionParameters.getOutputFileName()), 32768)) + try(VDJCAlignmentsReader reader = new VDJCAlignmentsReader(actionParameters.getInputFileName()); + PrintStream output = actionParameters.getOutputFileName().equals("-") ? System.out : + new PrintStream(new BufferedOutputStream(new FileOutputStream(actionParameters.getOutputFileName()), 32768)) ) { long countBefore = actionParameters.limitBefore == null ? Long.MAX_VALUE : actionParameters.limitBefore; long countAfter = actionParameters.limitAfter == null ? Long.MAX_VALUE : actionParameters.limitAfter; @@ -287,22 +288,21 @@ public static final class AParameters extends ActionParameters { public Boolean descr = null; @Parameter(description = "List of read ids to export", - names = {"-i", "--reads-ids"}, variableArity = true) - public List ids = new ArrayList<>(); + names = {"-i", "--read-ids"}, + converter = LongConverter.class) + public List ids = new ArrayList<>(); TLongHashSet getReadIds() { if (ids.isEmpty()) return null; - TLongHashSet r = new TLongHashSet(ids.size()); - for (String id : ids) - r.add(Long.parseLong(id)); - return r; + return new TLongHashSet(ids); } public Chains getChain() { return Util.parseLoci(chain); } + @SuppressWarnings("unchecked") public Filter getFilter() { final Chains chains = getChain(); diff --git a/src/main/java/com/milaboratory/mixcr/cli/ActionExportParameters.java b/src/main/java/com/milaboratory/mixcr/cli/ActionExportParameters.java index ccd21938b..baf43d0f0 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/ActionExportParameters.java +++ b/src/main/java/com/milaboratory/mixcr/cli/ActionExportParameters.java @@ -45,9 +45,9 @@ public class ActionExportParameters extends ActionParamete names = {"-l", "--loci"}, hidden = true) public String chains_legacy = null; - @Parameter(description = "Specify preset of export fields (full, min)", + @Parameter(description = "Specify preset of export fields (possible values: 'full', 'min'; 'full' by default)", names = {"-p", "--preset"}) - public String preset = "full"; + public String preset; @Parameter(description = "Specify preset file of export fields", names = {"-pf", "--preset-file"}) @@ -162,7 +162,7 @@ public static void parse(Class clazz, final String[] args, ActionExportParameter parameters.exporters = new ArrayList<>(); //if preset was explicitly specified - if (!parameters.preset.equals(DEFAULT_PRESET)) + if (parameters.preset != null) parameters.exporters.addAll(getPresetParameters(outputMode, clazz, parameters.preset)); if (parameters.presetFile != null) diff --git a/src/main/java/com/milaboratory/mixcr/cli/ActionFilterAlignments.java b/src/main/java/com/milaboratory/mixcr/cli/ActionFilterAlignments.java index 3eac7dd61..5becf8c08 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/ActionFilterAlignments.java +++ b/src/main/java/com/milaboratory/mixcr/cli/ActionFilterAlignments.java @@ -4,6 +4,7 @@ import cc.redberry.primitives.Filter; import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameters; +import com.beust.jcommander.converters.LongConverter; import com.beust.jcommander.validators.PositiveInteger; import com.milaboratory.cli.Action; import com.milaboratory.cli.ActionHelper; @@ -142,16 +143,13 @@ public static final class FilterAlignmentsFilterParameters extends ActionParamet public long limit = 0; @Parameter(description = "List of read ids to export", - names = {"-i", "--readsIds"}, variableArity = true) - public List ids = new ArrayList<>(); + names = {"-i", "--read-ids"}, converter = LongConverter.class) + public List ids = new ArrayList<>(); TLongHashSet getReadIds() { if (ids.isEmpty()) return null; - TLongHashSet r = new TLongHashSet(ids.size()); - for (String id : ids) - r.add(Long.parseLong(id)); - return r; + return new TLongHashSet(ids); } @Override diff --git a/src/main/java/com/milaboratory/mixcr/cli/ActionSortAlignments.java b/src/main/java/com/milaboratory/mixcr/cli/ActionSortAlignments.java new file mode 100644 index 000000000..d81171df7 --- /dev/null +++ b/src/main/java/com/milaboratory/mixcr/cli/ActionSortAlignments.java @@ -0,0 +1,125 @@ +package com.milaboratory.mixcr.cli; + +import cc.redberry.pipe.CUtils; +import cc.redberry.pipe.OutputPort; +import cc.redberry.pipe.OutputPortCloseable; +import cc.redberry.pipe.util.CountingOutputPort; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.ParameterException; +import com.beust.jcommander.Parameters; +import com.milaboratory.cli.Action; +import com.milaboratory.cli.ActionHelper; +import com.milaboratory.cli.ActionParameters; +import com.milaboratory.cli.ActionParametersWithOutput; +import com.milaboratory.mixcr.basictypes.VDJCAlignments; +import com.milaboratory.mixcr.basictypes.VDJCAlignmentsReader; +import com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter; +import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters; +import com.milaboratory.util.SmartProgressReporter; +import com.milaboratory.util.Sorter; +import com.milaboratory.util.TempFileManager; +import io.repseq.core.VDJCGene; + +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; + +/** + * Created by poslavsky on 28/02/2017. + */ +public final class ActionSortAlignments implements Action { + final AParameters parameters = new AParameters(); + + @Override + public void go(ActionHelper helper) throws Exception { + try(VDJCAlignmentsReader reader = new VDJCAlignmentsReader(parameters.getInputFile())) { + SmartProgressReporter.startProgressReport("Reading vdjca", reader); + try(OutputPortCloseable sorted = + Sorter.sort(reader, idComparator, 1024 * 512, new VDJCAlignmnetsSerializer(reader), TempFileManager.getTempFile()); + VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(parameters.getOutputFile())) { + + writer.header(reader.getParameters(), reader.getUsedGenes()); + + final long nReads = reader.getNumberOfReads(); + final CountingOutputPort counter = new CountingOutputPort<>(sorted); + SmartProgressReporter.startProgressReport("Writing sorted alignments", SmartProgressReporter.extractProgress(counter, nReads)); + for (VDJCAlignments res : CUtils.it(counter)) + writer.write(res); + writer.setNumberOfProcessedReads(nReads); + } + } + } + + @Override + public String command() { + return "sortAlignments"; + } + + @Override + public ActionParameters params() { + return parameters; + } + + private static final Comparator idComparator = new Comparator() { + @Override + public int compare(VDJCAlignments o1, VDJCAlignments o2) { + return Long.compare(o1.getReadId(), o2.getReadId()); + } + }; + + private static final class VDJCAlignmnetsSerializer implements Sorter.ObjectSerializer { + final VDJCAlignerParameters parameters; + final List usedAlleles; + + public VDJCAlignmnetsSerializer(VDJCAlignmentsReader reader) { + this.parameters = reader.getParameters(); + this.usedAlleles = reader.getUsedGenes(); + } + + @Override + public void write(Collection data, OutputStream stream) { + try(VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(stream)) { + writer.header(parameters, usedAlleles); + for (VDJCAlignments datum : data) + writer.write(datum); + } + } + + @Override + public OutputPort read(InputStream stream) { + VDJCAlignmentsReader reader = new VDJCAlignmentsReader(stream); + VDJCAlignmentsReader.initGeneFeatureReferencesFrom(reader, parameters); + return reader; + } + } + + @Parameters(commandDescription = "Sort alignments in vdjca file") + private static final class AParameters extends ActionParametersWithOutput { + @Parameter(description = "input.vdjca output.vdjca") + public List parameters; + + public String getInputFile() { + return parameters.get(0); + } + + + public String getOutputFile() { + return parameters.get(1); + } + + @Override + protected List getOutputFiles() { + return Arrays.asList(getOutputFile()); + } + + @Override + public void validate() { + if (parameters.size() != 2) + throw new ParameterException("Wrong number of parameters."); + super.validate(); + } + } +} diff --git a/src/main/java/com/milaboratory/mixcr/cli/Main.java b/src/main/java/com/milaboratory/mixcr/cli/Main.java index 5e117d6bd..bfd83043a 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/Main.java +++ b/src/main/java/com/milaboratory/mixcr/cli/Main.java @@ -30,15 +30,13 @@ import com.milaboratory.cli.JCommanderBasedMain; import com.milaboratory.mixcr.util.MiXCRVersionInfo; -import com.milaboratory.mixcr.util.TempFileManager; +import com.milaboratory.util.TempFileManager; import io.repseq.core.VDJCLibraryRegistry; import io.repseq.seqbase.SequenceResolvers; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.security.SecureRandom; -import java.util.Arrays; public class Main { private static volatile boolean initialized = false; @@ -48,7 +46,7 @@ public static void main(String... args) throws Exception { String command = System.getProperty("mixcr.command", "java -jar mixcr.jar"); if (!initialized) { - TempFileManager.seed(Arrays.hashCode(args) + 17 * (new SecureRandom()).nextLong()); + TempFileManager.setPrefix("mixcr_"); Path cachePath = Paths.get(System.getProperty("user.home"), ".mixcr", "cache"); //if (System.getProperty("allow.http") != null || System.getenv("MIXCR_ALLOW_HTTP") != null) @@ -95,7 +93,8 @@ public static void main(String... args) throws Exception { new ActionClonesDiff(), new ActionFilterAlignments(), new ActionListLibraries(), - new ActionExtendAlignments()); + new ActionExtendAlignments(), + new ActionSortAlignments()); // Adding version info callback main.setVersionInfoCallback(new Runnable() { diff --git a/src/main/java/com/milaboratory/mixcr/export/FeatureExtractors.java b/src/main/java/com/milaboratory/mixcr/export/FeatureExtractors.java index 40a9ded14..236e009ac 100644 --- a/src/main/java/com/milaboratory/mixcr/export/FeatureExtractors.java +++ b/src/main/java/com/milaboratory/mixcr/export/FeatureExtractors.java @@ -55,7 +55,7 @@ protected GeneFeature[] getParameters(String[] strings) { @Override protected String getHeader(OutputMode outputMode, GeneFeature[] features) { - return FieldExtractors.choose(outputMode, header0(hPrefix, features) + " ", header0(sPrefix, features)); + return FieldExtractors.choose(outputMode, header0(hPrefix, features), header0(sPrefix, features)); } @Override diff --git a/src/main/java/com/milaboratory/mixcr/export/FieldExtractors.java b/src/main/java/com/milaboratory/mixcr/export/FieldExtractors.java index ac590f9aa..319273e61 100644 --- a/src/main/java/com/milaboratory/mixcr/export/FieldExtractors.java +++ b/src/main/java/com/milaboratory/mixcr/export/FieldExtractors.java @@ -280,7 +280,8 @@ public String convert(NSequenceWithQuality seq) { } }); - descriptorsList.add(new FeatureExtractors.WithHeader("-aaFeature", "Export amino acid sequence of specified gene feature", 1, new String[]{"AA. Seq."}, new String[]{"aaSeq"}) { + descriptorsList.add(new FeatureExtractors.WithHeader("-aaFeature", "Export amino acid sequence of specified gene feature", + 1, new String[]{"AA. Seq. "}, new String[]{"aaSeq"}) { @Override protected String extractValue(VDJCObject object, GeneFeature[] parameters) { GeneFeature geneFeature = parameters[parameters.length - 1]; @@ -705,15 +706,15 @@ protected String extract(VDJCObject object) { for (int j = 0; ; j++) { ReferencePoint refPoint = ReferencePoint.DefaultReferencePoints[j]; - // Processing special cases of trimmed points - if (refPoint.equals(ReferencePoint.VEndTrimmed)) - sb.append(trimmedPosition(bestVHit, i, refPoint, ReferencePoint.VEnd)); - else if (refPoint.equals(ReferencePoint.DBeginTrimmed)) - sb.append(trimmedPosition(bestDHit, i, refPoint, ReferencePoint.DBegin)); - else if (refPoint.equals(ReferencePoint.DEndTrimmed)) - sb.append(trimmedPosition(bestDHit, i, refPoint, ReferencePoint.DEnd)); - else if (refPoint.equals(ReferencePoint.JBeginTrimmed)) - sb.append(trimmedPosition(bestJHit, i, refPoint, ReferencePoint.JBegin)); + // Processing special cases for number of deleted / P-segment nucleotides + if (refPoint.equals(ReferencePoint.VEnd)) + sb.append(trimmedPosition(bestVHit, i, ReferencePoint.VEndTrimmed, ReferencePoint.VEnd)); + else if (refPoint.equals(ReferencePoint.DBegin)) + sb.append(trimmedPosition(bestDHit, i, ReferencePoint.DBeginTrimmed, ReferencePoint.DBegin)); + else if (refPoint.equals(ReferencePoint.DEnd)) + sb.append(trimmedPosition(bestDHit, i, ReferencePoint.DEndTrimmed, ReferencePoint.DEnd)); + else if (refPoint.equals(ReferencePoint.JBegin)) + sb.append(trimmedPosition(bestJHit, i, ReferencePoint.JBeginTrimmed, ReferencePoint.JBegin)); else { // Normal points diff --git a/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java b/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java index 07009e662..b0438c77b 100644 --- a/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java +++ b/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java @@ -274,160 +274,186 @@ private OverlapSearchResult searchOverlaps(final VDJCAlignments rightAl, NucleotideSequence rightSeq = rightSeqQ.getSequence(); int stop = rightTarget.getPartitioning().getPosition(ReferencePoint.JBeginTrimmed); - assert stop != -1; - - stop -= kOffset; - - int maxOverlap = -1, maxDelta = -1, - maxOverlapIndexInList = -1, - maxBegin = -1, maxEnd = -1; - List maxOverlapList = null; - boolean isMaxOverOverlapped = false; - for (int rFrom = 0; rFrom < stop && rFrom + kValue < rightSeqQ.size(); rFrom++) { - long kMer = kMer(rightSeqQ.getSequence(), rFrom, kValue); - List match = kToIndexLeft.get(kMer); - if (match == null) - continue; - - out: - for (int i = 0; i < match.size(); i++) { - boolean isOverOverlapped = false; - final VDJCAlignments leftAl = match.get(i).getAlignments(); - - if (leftAl.getAlignmentsIndex() == rightAl.getAlignmentsIndex() || // You shall not merge with yourself - alreadyMergedIds.contains(leftAl.getAlignmentsIndex())) + if (stop == -1) + stop = rightTarget.getSequence().size(); + else + stop -= kOffset; + + // black list of left parts failed due to inconsistent overlapped alignments (failed AMerge) + TLongHashSet blackList = new TLongHashSet(); + SEARCH_LEFT_PARTS: + while (true) { + int maxOverlap = -1, maxDelta = -1, + maxOverlapIndexInList = -1, + maxBegin = -1, maxEnd = -1; + List maxOverlapList = null; + boolean isMaxOverOverlapped = false; + for (int rFrom = 0; rFrom < stop && rFrom + kValue < rightSeqQ.size(); rFrom++) { + long kMer = kMer(rightSeqQ.getSequence(), rFrom, kValue); + List match = kToIndexLeft.get(kMer); + if (match == null) continue; - // Checking chains compatibility - if (!allowChimeras && !leftAl.getAllChains(GeneType.Variable).intersects(jChains)) - continue; - - final NucleotideSequence leftSeq = leftAl.getPartitionedTarget(getLeftPartitionedSequence(leftAl)) - .getSequence().getSequence(); - int lFrom = match.get(i).kMerPositionFrom; + out: + for (int i = 0; i < match.size(); i++) { + boolean isOverOverlapped = false; + final VDJCAlignments leftAl = match.get(i).getAlignments(); - int delta, begin = delta = lFrom - rFrom; - if (begin < 0) { - begin = 0; - isOverOverlapped = true; - } - int end = leftSeq.size(); - if (end - delta >= rightSeq.size()) { - end = rightSeq.size() + delta; - isOverOverlapped = true; - } + if (blackList.contains(leftAl.getReadId())) + continue; - for (int j = begin; j < end; j++) - if (leftSeq.codeAt(j) != rightSeq.codeAt(j - delta)) - continue out; - - int overlap = end - begin; - if (maxOverlap < overlap) { - maxOverlap = overlap; - maxOverlapList = match; - maxOverlapIndexInList = i; - maxDelta = delta; - maxBegin = begin; - maxEnd = end; - isMaxOverOverlapped = isOverOverlapped; - } - } - } + if (leftAl.getAlignmentsIndex() == rightAl.getAlignmentsIndex() || // You shall not merge with yourself + alreadyMergedIds.contains(leftAl.getAlignmentsIndex())) + continue; - if (maxOverlapList == null) - return null; + // Checking chains compatibility + if (!allowChimeras && jChains != null && !leftAl.getAllChains(GeneType.Variable).intersects(jChains)) + continue; - if (maxOverlap < minimalAssembleOverlap) - return null; + // Check for the same V + if (leftAl.getBestHit(GeneType.Variable) != null + && rightAl.getBestHit(GeneType.Variable) != null + && !leftAl.hasCommonGenes(GeneType.Variable, rightAl)) + continue; - if (isMaxOverOverlapped) - overoverlapped.incrementAndGet(); + final NucleotideSequence leftSeq = leftAl.getPartitionedTarget(getLeftPartitionedSequence(leftAl)) + .getSequence().getSequence(); + int lFrom = match.get(i).kMerPositionFrom; - final KMerInfo left = maxOverlapList.remove(maxOverlapIndexInList); - VDJCAlignments leftAl = left.alignments; - - final long readId = rightAl.getReadId(); - - ArrayList leftTargets = extractAlignedTargets(leftAl, true); - ArrayList rightTargets = extractAlignedTargets(rightAl, false); - - AlignedTarget leftCentral = leftTargets.get(left.targetId); - AlignedTarget rightCentral = rightTargets.get(rightTargetId); - - AlignedTarget central = targetMerger.merge(readId, leftCentral, rightCentral, maxDelta) - .overrideDescription("VJOverlap(" + maxOverlap + ") = " + leftCentral.getDescription() + " + " + rightCentral.getDescription()); - - // Setting overlap position - central = AlignedTarget.setOverlapRange(central, maxBegin, maxEnd); - - final List leftDescriptors = new ArrayList<>(2), - rightDescriptors = new ArrayList<>(2); - - for (int i = 0; i < left.targetId; ++i) - leftDescriptors.add(leftTargets.get(i)); - for (int i = left.targetId + 1; i < leftAl.numberOfTargets(); ++i) - rightDescriptors.add(leftTargets.get(i)); - for (int i = 0; i < rightTargetId; ++i) - leftDescriptors.add(rightTargets.get(i)); - for (int i = rightTargetId + 1; i < rightAl.numberOfTargets(); ++i) - rightDescriptors.add(rightTargets.get(i)); - - - // Merging to VJ junction - List[] allDescriptors = new List[]{leftDescriptors, rightDescriptors}; - TargetMerger.TargetMergingResult bestResult = null; - int bestI; + int delta, begin = delta = lFrom - rFrom; + if (begin < 0) { + begin = 0; + isOverOverlapped = true; + } + int end = leftSeq.size(); + if (end - delta >= rightSeq.size()) { + end = rightSeq.size() + delta; + isOverOverlapped = true; + } - // Trying to merge left and right reads to central one - for (List descriptors : allDescriptors) - do { - bestI = -1; - for (int i = 0; i < descriptors.size(); i++) { - TargetMerger.TargetMergingResult result = targetMerger.merge(readId, descriptors.get(i), central); - if (result != null && (bestResult == null || bestResult.score < result.score)) { - bestResult = result; - bestI = i; + for (int j = begin; j < end; j++) + if (leftSeq.codeAt(j) != rightSeq.codeAt(j - delta)) + continue out; + + int overlap = end - begin; + if (maxOverlap < overlap) { + maxOverlap = overlap; + maxOverlapList = match; + maxOverlapIndexInList = i; + maxDelta = delta; + maxBegin = begin; + maxEnd = end; + isMaxOverOverlapped = isOverOverlapped; } } + } - if (bestI != -1) { - central = bestResult.result.overrideDescription( - central.getDescription() + " / " + mergeTypePrefix(bestResult.usingAlignments) + "MergedFrom" + - (descriptors == leftDescriptors ? "Left" : "Right") + - "(" + getMMDescr(bestResult.matched, bestResult.mismatched) + ") = " + - descriptors.get(bestI).getDescription()); - descriptors.remove(bestI); - } - } while (bestI != -1); - - - // Merging left+left / right+right - outer: - for (int d = 0; d < allDescriptors.length; d++) { - List descriptors = allDescriptors[d]; - for (int i = 0; i < descriptors.size(); i++) - for (int j = i + 1; j < descriptors.size(); j++) { - TargetMerger.TargetMergingResult result = targetMerger.merge(readId, descriptors.get(i), descriptors.get(j)); - if (result != null) { - descriptors.set(i, result.result.overrideDescription( - mergeTypePrefix(result.usingAlignments) + - "Merged(" + getMMDescr(result.matched, result.mismatched) + ") = " + descriptors.get(i).getDescription() + - " + " + descriptors.get(j).getDescription())); - descriptors.remove(j); - --d; - continue outer; + if (maxOverlapList == null) + return null; + + if (maxOverlap < minimalAssembleOverlap) + return null; + + final KMerInfo left = maxOverlapList.remove(maxOverlapIndexInList); + VDJCAlignments leftAl = left.alignments; + + final long readId = rightAl.getReadId(); + + ArrayList leftTargets = extractAlignedTargets(leftAl, true); + ArrayList rightTargets = extractAlignedTargets(rightAl, false); + + AlignedTarget leftCentral = leftTargets.get(left.targetId); + AlignedTarget rightCentral = rightTargets.get(rightTargetId); + + AlignedTarget central = targetMerger.merge(readId, leftCentral, rightCentral, maxDelta) + .overrideDescription("VJOverlap(" + maxOverlap + ") = " + leftCentral.getDescription() + " + " + rightCentral.getDescription()); + + // Setting overlap position + central = AlignedTarget.setOverlapRange(central, maxBegin, maxEnd); + + final List leftDescriptors = new ArrayList<>(2), + rightDescriptors = new ArrayList<>(2); + + for (int i = 0; i < left.targetId; ++i) + leftDescriptors.add(leftTargets.get(i)); + for (int i = left.targetId + 1; i < leftAl.numberOfTargets(); ++i) + rightDescriptors.add(leftTargets.get(i)); + for (int i = 0; i < rightTargetId; ++i) + leftDescriptors.add(rightTargets.get(i)); + for (int i = rightTargetId + 1; i < rightAl.numberOfTargets(); ++i) + rightDescriptors.add(rightTargets.get(i)); + + + // Merging to VJ junction + List[] allDescriptors = new List[]{leftDescriptors, rightDescriptors}; + TargetMerger.TargetMergingResult bestResult = TargetMerger.FAILED_RESULT; + int bestI; + + // Trying to merge left and right reads to central one + for (List descriptors : allDescriptors) + do { + bestI = -1; + for (int i = 0; i < descriptors.size(); i++) { + TargetMerger.TargetMergingResult result = targetMerger.merge(readId, descriptors.get(i), central); + if (result.failedDueInconsistentAlignments()) { + // Inconsistent alignments -> retry + blackList.add(leftAl.getReadId()); + continue SEARCH_LEFT_PARTS; + } + if (bestResult.getScore() < result.getScore()) { + bestResult = result; + bestI = i; + } } - } - } - // Creating pre-list of resulting targets - List result = new ArrayList<>(); - result.addAll(leftDescriptors); - result.add(central); - result.addAll(rightDescriptors); + if (bestI != -1) { + assert bestResult != TargetMerger.FAILED_RESULT; + central = bestResult.getResult().overrideDescription( + central.getDescription() + " / " + mergeTypePrefix(bestResult.isUsingAlignments()) + "MergedFrom" + + (descriptors == leftDescriptors ? "Left" : "Right") + + "(" + getMMDescr(bestResult.getMatched(), bestResult.getMismatched()) + ") = " + + descriptors.get(bestI).getDescription()); + descriptors.remove(bestI); + } + } while (bestI != -1); + + + // Merging left+left / right+right + outer: + for (int d = 0; d < allDescriptors.length; d++) { + List descriptors = allDescriptors[d]; + for (int i = 0; i < descriptors.size(); i++) + for (int j = i + 1; j < descriptors.size(); j++) { + TargetMerger.TargetMergingResult result = targetMerger.merge(readId, descriptors.get(i), descriptors.get(j)); + if (result.failedDueInconsistentAlignments()) { + // Inconsistent alignments -> retry + blackList.add(leftAl.getReadId()); + continue SEARCH_LEFT_PARTS; + } + if (result.isSuccessful()) { + descriptors.set(i, result.getResult().overrideDescription( + mergeTypePrefix(result.isUsingAlignments()) + + "Merged(" + getMMDescr(result.getMatched(), result.getMismatched()) + ") = " + descriptors.get(i).getDescription() + + " + " + descriptors.get(j).getDescription())); + descriptors.remove(j); + --d; + continue outer; + } + } + } + + if (isMaxOverOverlapped) + overoverlapped.incrementAndGet(); + + // Creating pre-list of resulting targets + List result = new ArrayList<>(); + result.addAll(leftDescriptors); + result.add(central); + result.addAll(rightDescriptors); - // Ordering and filtering final targets - return new OverlapSearchResult(maxOverlapList, left, AlignedTarget.orderTargets(result)); + // Ordering and filtering final targets + return new OverlapSearchResult(maxOverlapList, left, AlignedTarget.orderTargets(result)); + } } private static String mergeTypePrefix(boolean usingAlignment) { @@ -455,9 +481,6 @@ public void writeReport(ReportHelper helper) { } private int getLeftPartitionedSequence(VDJCAlignments alignment) { - //TODO why > 2 ? - if (alignment.numberOfTargets() > 2) - return -1; for (int i = 0; i < alignment.numberOfTargets(); i++) { if (alignment.getBestHit(GeneType.Joining) != null && alignment.getBestHit(GeneType.Joining) @@ -471,9 +494,6 @@ private int getLeftPartitionedSequence(VDJCAlignments alignment) { } private int getRightPartitionedSequence(VDJCAlignments alignment) { - //TODO why > 2 ? - if (alignment.numberOfTargets() > 2) - return -1; for (int i = 0; i < alignment.numberOfTargets(); i++) { if (alignment.getBestHit(GeneType.Variable) != null && alignment.getBestHit(GeneType.Variable) @@ -483,9 +503,34 @@ private int getRightPartitionedSequence(VDJCAlignments alignment) { if (ps.getPartitioning().isAvailable(ReferencePoint.JBeginTrimmed)) return i; } + + if (alignment.numberOfTargets() != 2) + return -1; + + + if (getAlignmentLength(alignment, GeneType.Variable, 0) == alignment.getTarget(0).size() + && alignment.hasNoHitsInTarget(1)) + return 1; + + if (getAlignmentLength(alignment, GeneType.Constant, 1) + + getAlignmentLength(alignment, GeneType.Joining, 1) == alignment.getTarget(1).size() + && alignment.hasNoHitsInTarget(0)) + return 0; + return -1; } + private static int getAlignmentLength(VDJCAlignments alignment, GeneType gt, int id) { + VDJCHit bh = alignment.getBestHit(gt); + if (bh == null) + return 0; + Alignment al = bh.getAlignment(id); + if (al == null) + return 0; + return al.getSequence2Range().length(); + } + + private boolean addLeftToIndex(VDJCAlignments alignment) { int leftTargetId = getLeftPartitionedSequence(alignment); if (leftTargetId == -1) @@ -523,7 +568,7 @@ private static long kMer(NucleotideSequence seq, int from, int length) { byte c = seq.codeAt(j); if (NucleotideSequence.ALPHABET.isWildcard(c)) return -1; - kmer = (kmer << 2 | c); + kmer = (kmer << 2|c); } return kmer; } @@ -542,6 +587,7 @@ public KMerInfo(VDJCAlignments alignments, int kMerPositionFrom, int targetId) { public VDJCAlignments getAlignments() { return alignments; } + } private static AlignedTarget overrideDescription(AlignedTarget target, boolean isLeft) { diff --git a/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssemblerAligner.java b/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssemblerAligner.java index cf8fd6582..c19b1e0c2 100644 --- a/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssemblerAligner.java +++ b/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssemblerAligner.java @@ -251,7 +251,7 @@ protected VDJCAlignmentResult process0(VDJCMultiRead input) { vResult, dResult, jResult, - vdjcHits.get(GeneType.Constant), + cutRelativeScore(vdjcHits.get(GeneType.Constant), parameters.getCAlignerParameters().getRelativeMinScore(), parameters.getMaxHits()), targets ); return new VDJCAlignmentResult<>(input, alignment); diff --git a/src/main/java/com/milaboratory/mixcr/partialassembler/TargetMerger.java b/src/main/java/com/milaboratory/mixcr/partialassembler/TargetMerger.java index 4eb1d9cdc..5f7de7b72 100644 --- a/src/main/java/com/milaboratory/mixcr/partialassembler/TargetMerger.java +++ b/src/main/java/com/milaboratory/mixcr/partialassembler/TargetMerger.java @@ -223,62 +223,19 @@ static Alignment merge(AlignmentScoring return BandedAligner.alignGlobal(scoring, left == null ? right.getSequence1() : left.getSequence1(), seq, seq1From, seq1To - seq1From, seq2From, seq2To - seq2From, bandedWidth); } -// -// static Alignment merge1(AlignmentScoring scoring, -// int bandedWidth, -// NucleotideSequence seq, int offset, -// Alignment left, -// Alignment right) { -// assert left != null || right != null; -// assert offset >= 0; -// assert left == null || right == null || left.getSequence1().equals(right.getSequence1()); -// -// int seq1From = -1, seq2From = -1, seq1To = -1, seq2To = -1; -// -// if (left != null && right != null) { -// if (left.convertPosition(right.getSequence1Range().getFrom()) != right.getSequence2Range().getFrom() + offset) { -// if (left.getScore() > right.getScore()) -// right = null; -// else -// left = null; -// } else { -// if (left.getSequence1Range().getFrom() < right.getSequence1Range().getFrom()) { -// seq1From = left.getSequence1Range().getFrom(); -// seq2From = left.getSequence2Range().getFrom(); -// } else { -// seq1From = right.getSequence1Range().getFrom(); -// seq2From = right.getSequence2Range().getFrom() + offset; -// } -// -// if (left.getSequence1Range().getTo() > right.getSequence1Range().getTo()) { -// seq1To = left.getSequence1Range().getTo(); -// seq2To = left.getSequence2Range().getTo(); -// } else { -// seq1To = right.getSequence1Range().getTo(); -// seq2To = right.getSequence2Range().getTo() + offset; -// } -// } -// } -// -// if (left == null) { -// seq1From = right.getSequence1Range().getFrom(); -// seq1To = right.getSequence1Range().getTo(); -// -// seq2From = right.getSequence2Range().getFrom() + offset; -// seq2To = right.getSequence2Range().getTo() + offset; -// } else if (right == null) { -// seq1From = left.getSequence1Range().getFrom(); -// seq1To = left.getSequence1Range().getTo(); -// -// seq2From = left.getSequence2Range().getFrom(); -// seq2To = left.getSequence2Range().getTo(); -// } -// -// return BandedAligner.alignGlobal(scoring, left == null ? right.getSequence1() : left.getSequence1(), -// seq, seq1From, seq1To - seq1From, seq2From, seq2To - seq2From, bandedWidth); -// } public TargetMergingResult merge(long readId, AlignedTarget targetLeft, AlignedTarget targetRight) { + return merge(readId, targetLeft, targetRight, true); + } + + /** + * @param readId read id + * @param targetLeft left sequence + * @param targetRight right sequence + * @param trySequenceMerge whether to try merging using sequence overlap (if alignment overlap failed) + */ + public TargetMergingResult merge(long readId, AlignedTarget targetLeft, AlignedTarget targetRight, + boolean trySequenceMerge) { for (GeneType geneType : GeneType.VJC_REFERENCE) { Map map = extractHitsMapping(targetLeft, targetRight, geneType); @@ -319,19 +276,24 @@ public int compare(HitMappingRecord o1, HitMappingRecord o2) { overlap); if (1.0 - 1.0 * mismatches / overlap < minimalAlignmentMergeIdentity) - continue; + return new TargetMergingResult(geneType); final AlignedTarget merge = merge(readId, targetLeft, targetRight, delta); - return new TargetMergingResult(merge, + return new TargetMergingResult(true, null, merge, PairedReadMergingResult.MATCH_SCORE * (overlap - mismatches) + - PairedReadMergingResult.MISMATCH_SCORE * mismatches, true, overlap, mismatches); + PairedReadMergingResult.MISMATCH_SCORE * mismatches, overlap, mismatches); } } + if (!trySequenceMerge) + return new TargetMergingResult(); + final PairedReadMergingResult merge = merger.merge(targetLeft.getTarget(), targetRight.getTarget()); if (!merge.isSuccessful()) - return null; - return new TargetMergingResult(merge(readId, targetLeft, targetRight, merge.getOffset()), merge.score(), false, merge.getOverlap(), merge.getErrors()); + return new TargetMergingResult(); + return new TargetMergingResult(false, null, + merge(readId, targetLeft, targetRight, merge.getOffset()), + merge.score(), merge.getOverlap(), merge.getErrors()); } private static int sumScore(Alignment[] als) { @@ -343,18 +305,78 @@ private static int sumScore(Alignment[] als) { return r; } + public static final TargetMergingResult FAILED_RESULT = new TargetMergingResult(); + public final static class TargetMergingResult { - public final AlignedTarget result; - public final int score; - public final boolean usingAlignments; - public final int matched, mismatched; + private final boolean usingAlignments; + private final GeneType failedMergedGeneType; + + private final AlignedTarget result; + private final int score; + private final int matched, mismatched; + +// public TargetMergingResult(AlignedTarget result, int score, boolean usingAlignments, int matched, int mismatched) { +// this.result = result; +// this.score = score; +// this.usingAlignments = usingAlignments; +// this.matched = matched; +// this.mismatched = mismatched; +// } - public TargetMergingResult(AlignedTarget result, int score, boolean usingAlignments, int matched, int mismatched) { + private TargetMergingResult() { + this(false, null, null, 0, 0, 0); + } + + private TargetMergingResult(GeneType failedGeneType) { + this(true, failedGeneType, null, 0, 0, 0); + } + + private TargetMergingResult(boolean usingAlignments, GeneType failedMergedGeneType, AlignedTarget result, int score, int matched, int mismatched) { + this.usingAlignments = usingAlignments; + this.failedMergedGeneType = failedMergedGeneType; this.result = result; this.score = score; - this.usingAlignments = usingAlignments; this.matched = matched; this.mismatched = mismatched; } + + public boolean isSuccessful() { + return result != null; + } + + public boolean isUsingAlignments() { + return usingAlignments; + } + + public boolean failedDueInconsistentAlignments() { + return failedMergedGeneType != null; + } + + public GeneType getFailedMergedGeneType() { + return failedMergedGeneType; + } + + private void checkSuccessful() { + if (!isSuccessful()) + throw new IllegalStateException(); + } + + public AlignedTarget getResult() { + checkSuccessful(); + return result; + } + + public int getScore() { + return score; + } + + public int getMatched() { + return matched; + } + + public int getMismatched() { + checkSuccessful(); + return mismatched; + } } } diff --git a/src/main/java/com/milaboratory/mixcr/util/PositiveLongValidator.java b/src/main/java/com/milaboratory/mixcr/util/PositiveLongValidator.java new file mode 100644 index 000000000..291540d13 --- /dev/null +++ b/src/main/java/com/milaboratory/mixcr/util/PositiveLongValidator.java @@ -0,0 +1,15 @@ +package com.milaboratory.mixcr.util; + +import com.beust.jcommander.IValueValidator; +import com.beust.jcommander.ParameterException; + +/** + * Created by poslavsky on 20/02/2017. + */ +public final class PositiveLongValidator implements IValueValidator { + @Override + public void validate(String name, Long value) throws ParameterException { + if (value < 0) + throw new ParameterException(name + ": positive input required (found " + value + ")"); + } +} diff --git a/src/main/java/com/milaboratory/mixcr/util/RunMiXCR.java b/src/main/java/com/milaboratory/mixcr/util/RunMiXCR.java index 052da3bc9..968912d6c 100644 --- a/src/main/java/com/milaboratory/mixcr/util/RunMiXCR.java +++ b/src/main/java/com/milaboratory/mixcr/util/RunMiXCR.java @@ -51,7 +51,7 @@ public final class RunMiXCR { public static AssembleResult assemble(final AlignResult align) { RunMiXCRAnalysis parameters = align.parameters; try (CloneAssembler assembler = new CloneAssembler(parameters.cloneAssemblerParameters, - false, align.usedGenes)) { + false, align.usedGenes, align.parameters.alignerParameters)) { CloneAssemblerReport report = new CloneAssemblerReport(); assembler.setListener(report); diff --git a/src/main/java/com/milaboratory/mixcr/util/TempFileManager.java b/src/main/java/com/milaboratory/mixcr/util/TempFileManager.java deleted file mode 100644 index 7cd328634..000000000 --- a/src/main/java/com/milaboratory/mixcr/util/TempFileManager.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2014-2015, Bolotin Dmitry, Chudakov Dmitry, Shugay Mikhail - * (here and after addressed as Inventors) - * All Rights Reserved - * - * Permission to use, copy, modify and distribute any part of this program for - * educational, research and non-profit purposes, by non-profit institutions - * only, without fee, and without a written agreement is hereby granted, - * provided that the above copyright notice, this paragraph and the following - * three paragraphs appear in all copies. - * - * Those desiring to incorporate this work into commercial products or use for - * commercial purposes should contact the Inventors using one of the following - * email addresses: chudakovdm@mail.ru, chudakovdm@gmail.com - * - * IN NO EVENT SHALL THE INVENTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, - * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, - * ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF THE INVENTORS HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE INVENTORS HAS - * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR - * MODIFICATIONS. THE INVENTORS MAKES NO REPRESENTATIONS AND EXTENDS NO - * WARRANTIES OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A - * PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY - * PATENT, TRADEMARK OR OTHER RIGHTS. - */ -package com.milaboratory.mixcr.util; - -import org.apache.commons.math3.random.RandomDataGenerator; -import org.apache.commons.math3.random.Well44497b; - -import java.io.File; -import java.io.IOException; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicBoolean; - -public class TempFileManager { - private static final AtomicBoolean initialized = new AtomicBoolean(false); - static final ConcurrentHashMap createdFiles = new ConcurrentHashMap<>(); - static final RandomDataGenerator randomGenerator = new RandomDataGenerator(new Well44497b()); - - public static void seed(long seed) { - synchronized (randomGenerator) { - randomGenerator.reSeed(seed); - } - } - - //static final String tmpdir = getTmpDir(); - //private static String getTmpDir() { - // String tmpdir = AccessController.doPrivileged(new GetPropertyAction("java.io.tmpdir")); - // if (!tmpdir.endsWith(File.separator)) - // tmpdir = tmpdir + File.separator; - // return tmpdir; - //} - - public static File getTempFile() { - try { - if (initialized.compareAndSet(false, true)) - // Adding delete files shutdown hook on the very firs execution of getTempFile() - Runtime.getRuntime().addShutdownHook(new Thread(new RemoveAction(), "DeleteTempFiles")); - - File file; - String name; - - do { - synchronized (randomGenerator) { - name = "mixcr_" + randomGenerator.nextHexString(40); - } - file = File.createTempFile(name, null); - } while (createdFiles.putIfAbsent(name, file) != null); - if (file.length() != 0) - throw new RuntimeException(); - return file; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private static final class RemoveAction implements Runnable { - @Override - public void run() { - for (File file : createdFiles.values()) { - if (file.exists()) - try { - file.delete(); - } catch (RuntimeException e) { - } - } - } - } -} diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/ClonalGeneAlignmentParameters.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/ClonalGeneAlignmentParameters.java new file mode 100644 index 000000000..8f942df8d --- /dev/null +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/ClonalGeneAlignmentParameters.java @@ -0,0 +1,35 @@ +package com.milaboratory.mixcr.vdjaligners; + +import com.milaboratory.core.alignment.AlignmentScoring; +import com.milaboratory.core.sequence.NucleotideSequence; + +/** + * Define common fields for alignment parameters for clones. + * + * ClonalGeneAlignmentParameters don't have some properties like target gene feature like {@link + * GeneAlignmentParameters}. + * + * Created by poslavsky on 01/03/2017. + */ +public interface ClonalGeneAlignmentParameters { + /** + * Define score threshold for hits relative to top hit score. + * + * @return relative min score + */ + float getRelativeMinScore(); + + /** + * Alignment scoring + * + * @return alignment scoring + */ + AlignmentScoring getScoring(); + + /** + * Returns a copy of this object + * + * @return copy of this object + */ + ClonalGeneAlignmentParameters clone(); +} diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/DAlignerParameters.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/DAlignerParameters.java index e644a2b9a..c7384ee00 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/DAlignerParameters.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/DAlignerParameters.java @@ -28,74 +28,58 @@ */ package com.milaboratory.mixcr.vdjaligners; -import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.milaboratory.core.alignment.AlignmentScoring; import com.milaboratory.core.sequence.NucleotideSequence; +import com.milaboratory.mixcr.assembler.DClonalAlignerParameters; import io.repseq.core.GeneFeature; +import io.repseq.core.GeneType; -@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.ANY, isGetterVisibility = JsonAutoDetect.Visibility.NONE, - getterVisibility = JsonAutoDetect.Visibility.NONE) -public final class DAlignerParameters extends GeneAlignmentParameters - implements java.io.Serializable { - private float absoluteMinScore, relativeMinScore; - private int maxHits; - private AlignmentScoring scoring; +public final class DAlignerParameters extends DClonalAlignerParameters + implements GeneAlignmentParameters, java.io.Serializable { + private GeneFeature geneFeatureToAlign; @JsonCreator public DAlignerParameters( @JsonProperty("geneFeatureToAlign") GeneFeature geneFeatureToAlign, - @JsonProperty("absoluteMinScore") float absoluteMinScore, - @JsonProperty("relativeMinScore") float relativeMinScore, - @JsonProperty("maxHits") int maxHits, - @JsonProperty("scoring") AlignmentScoring scoring) { - super(geneFeatureToAlign); - this.absoluteMinScore = absoluteMinScore; - this.relativeMinScore = relativeMinScore; - this.maxHits = maxHits; - this.scoring = scoring; + @JsonProperty("relativeMinScore") Float relativeMinScore, + @JsonProperty("absoluteMinScore") Float absoluteMinScore, + @JsonProperty("maxHits") Integer maxHits, + @JsonProperty("scoring") AlignmentScoring scoring) { + super(relativeMinScore, absoluteMinScore, maxHits, scoring); + this.geneFeatureToAlign = geneFeatureToAlign; } - public AlignmentScoring getScoring() { - return scoring; - } - - public DAlignerParameters setScoring(AlignmentScoring scoring) { - this.scoring = scoring; - return this; - } - - public float getAbsoluteMinScore() { - return absoluteMinScore; + @Override + public GeneFeature getGeneFeatureToAlign() { + return geneFeatureToAlign; } - public DAlignerParameters setAbsoluteMinScore(float absoluteMinScore) { - this.absoluteMinScore = absoluteMinScore; + public DAlignerParameters setGeneFeatureToAlign(GeneFeature geneFeatureToAlign) { + this.geneFeatureToAlign = geneFeatureToAlign; return this; } - public float getRelativeMinScore() { - return relativeMinScore; + @Override + public GeneType getGeneType() { + return GeneType.Diversity; } - public DAlignerParameters setRelativeMinScore(float relativeMinScore) { - this.relativeMinScore = relativeMinScore; - return this; + @Override + public void updateFrom(ClonalGeneAlignmentParameters alignerParameters) { + throw new IllegalStateException(); } - public int getMaxHits() { - return maxHits; + @Override + public boolean isComplete() { + throw new IllegalStateException(); } - public DAlignerParameters setMaxHits(int maxHits) { - this.maxHits = maxHits; - return this; - } @Override public DAlignerParameters clone() { - return new DAlignerParameters(geneFeatureToAlign, absoluteMinScore, relativeMinScore, maxHits, scoring); + return new DAlignerParameters(geneFeatureToAlign, relativeMinScore, absoluteMinScore, maxHits, scoring); } @Override @@ -111,26 +95,18 @@ public String toString() { @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof DAlignerParameters)) return false; + if (o == null || getClass() != o.getClass()) return false; if (!super.equals(o)) return false; DAlignerParameters that = (DAlignerParameters) o; - if (Float.compare(that.absoluteMinScore, absoluteMinScore) != 0) return false; - if (maxHits != that.maxHits) return false; - if (Float.compare(that.relativeMinScore, relativeMinScore) != 0) return false; - if (!scoring.equals(that.scoring)) return false; - - return true; + return geneFeatureToAlign != null ? geneFeatureToAlign.equals(that.geneFeatureToAlign) : that.geneFeatureToAlign == null; } @Override public int hashCode() { int result = super.hashCode(); - result = 31 * result + (absoluteMinScore != +0.0f ? Float.floatToIntBits(absoluteMinScore) : 0); - result = 31 * result + (relativeMinScore != +0.0f ? Float.floatToIntBits(relativeMinScore) : 0); - result = 31 * result + maxHits; - result = 31 * result + scoring.hashCode(); + result = 31 * result + (geneFeatureToAlign != null ? geneFeatureToAlign.hashCode() : 0); return result; } } diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/GeneAlignmentParameters.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/GeneAlignmentParameters.java index 74982b956..fa78deecd 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/GeneAlignmentParameters.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/GeneAlignmentParameters.java @@ -1,73 +1,27 @@ -/* - * Copyright (c) 2014-2015, Bolotin Dmitry, Chudakov Dmitry, Shugay Mikhail - * (here and after addressed as Inventors) - * All Rights Reserved - * - * Permission to use, copy, modify and distribute any part of this program for - * educational, research and non-profit purposes, by non-profit institutions - * only, without fee, and without a written agreement is hereby granted, - * provided that the above copyright notice, this paragraph and the following - * three paragraphs appear in all copies. - * - * Those desiring to incorporate this work into commercial products or use for - * commercial purposes should contact the Inventors using one of the following - * email addresses: chudakovdm@mail.ru, chudakovdm@gmail.com - * - * IN NO EVENT SHALL THE INVENTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, - * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, - * ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF THE INVENTORS HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE INVENTORS HAS - * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR - * MODIFICATIONS. THE INVENTORS MAKES NO REPRESENTATIONS AND EXTENDS NO - * WARRANTIES OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A - * PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY - * PATENT, TRADEMARK OR OTHER RIGHTS. - */ package com.milaboratory.mixcr.vdjaligners; import io.repseq.core.GeneFeature; import io.repseq.core.GeneType; -public abstract class GeneAlignmentParameters> - implements java.io.Serializable { - protected GeneFeature geneFeatureToAlign; - - protected GeneAlignmentParameters(GeneFeature geneFeatureToAlign) { - this.geneFeatureToAlign = geneFeatureToAlign; - } - - public GeneFeature getGeneFeatureToAlign() { - return geneFeatureToAlign; - } - - public T setGeneFeatureToAlign(GeneFeature geneFeatureToAlign) { - this.geneFeatureToAlign = geneFeatureToAlign; - return (T) this; - } - - public GeneType getGeneType() { - return geneFeatureToAlign.getGeneType(); - } - - public abstract T clone(); - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof GeneAlignmentParameters)) return false; - - GeneAlignmentParameters that = (GeneAlignmentParameters) o; - - if (!geneFeatureToAlign.equals(that.geneFeatureToAlign)) return false; - - return true; - } - +/** + * Define additional properties required to align raw sequencing reads. + * + * Created by poslavsky on 01/03/2017. + */ +public interface GeneAlignmentParameters extends ClonalGeneAlignmentParameters { + /** + * Part of gene to use as alignment subject (sequence1 in terms of MiLib) + */ + GeneFeature getGeneFeatureToAlign(); + + /** + * getGeneFeatureToAlign().getGeneType() + */ + GeneType getGeneType(); + + /** + * Return copy of this object + */ @Override - public int hashCode() { - return geneFeatureToAlign.hashCode(); - } + GeneAlignmentParameters clone(); } diff --git a/src/main/java/com/milaboratory/mixcr/assembler/AbstractClonalAlignerParameters.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/GeneAlignmentParametersAbstract.java similarity index 61% rename from src/main/java/com/milaboratory/mixcr/assembler/AbstractClonalAlignerParameters.java rename to src/main/java/com/milaboratory/mixcr/vdjaligners/GeneAlignmentParametersAbstract.java index 33eb350c7..b17a48714 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/AbstractClonalAlignerParameters.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/GeneAlignmentParametersAbstract.java @@ -26,39 +26,45 @@ * PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY * PATENT, TRADEMARK OR OTHER RIGHTS. */ -package com.milaboratory.mixcr.assembler; +package com.milaboratory.mixcr.vdjaligners; +import com.milaboratory.mixcr.basictypes.ClonalUpdatableParameters; import io.repseq.core.GeneFeature; +import io.repseq.core.GeneType; -public abstract class AbstractClonalAlignerParameters> - implements java.io.Serializable { - protected float relativeMinScore; - protected GeneFeature featureToAlign; +public abstract class GeneAlignmentParametersAbstract> + implements java.io.Serializable, GeneAlignmentParameters { + protected GeneFeature geneFeatureToAlign; + protected Float relativeMinScore; - protected AbstractClonalAlignerParameters() { + protected GeneAlignmentParametersAbstract(GeneFeature geneFeatureToAlign, Float relativeMinScore) { + this.geneFeatureToAlign = geneFeatureToAlign; + this.relativeMinScore = relativeMinScore; } - protected AbstractClonalAlignerParameters(GeneFeature featureToAlign, float relativeMinScore) { + public T setRelativeMinScore(float relativeMinScore) { this.relativeMinScore = relativeMinScore; - this.featureToAlign = featureToAlign; + return (T) this; } + @Override public float getRelativeMinScore() { return relativeMinScore; } - public T setRelativeMinScore(float relativeMinScore) { - this.relativeMinScore = relativeMinScore; - return (T) this; + @Override + public GeneFeature getGeneFeatureToAlign() { + return geneFeatureToAlign; } - public GeneFeature getFeatureToAlign() { - return featureToAlign; + public T setGeneFeatureToAlign(GeneFeature geneFeatureToAlign) { + this.geneFeatureToAlign = geneFeatureToAlign; + return (T) this; } - public T setFeatureToAlign(GeneFeature featureToAlign) { - this.featureToAlign = featureToAlign; - return (T) this; + @Override + public GeneType getGeneType() { + return geneFeatureToAlign.getGeneType(); } public abstract T clone(); @@ -66,20 +72,18 @@ public T setFeatureToAlign(GeneFeature featureToAlign) { @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof AbstractClonalAlignerParameters)) return false; + if (o == null || getClass() != o.getClass()) return false; - AbstractClonalAlignerParameters that = (AbstractClonalAlignerParameters) o; + GeneAlignmentParametersAbstract that = (GeneAlignmentParametersAbstract) o; if (Float.compare(that.relativeMinScore, relativeMinScore) != 0) return false; - if (!featureToAlign.equals(that.featureToAlign)) return false; - - return true; + return geneFeatureToAlign != null ? geneFeatureToAlign.equals(that.geneFeatureToAlign) : that.geneFeatureToAlign == null; } @Override public int hashCode() { - int result = (relativeMinScore != +0.0f ? Float.floatToIntBits(relativeMinScore) : 0); - result = 31 * result + featureToAlign.hashCode(); + int result = geneFeatureToAlign != null ? geneFeatureToAlign.hashCode() : 0; + result = 31 * result + (relativeMinScore != +0.0f ? Float.floatToIntBits(relativeMinScore) : 0); return result; } } diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/KGeneAlignmentParameters.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/KGeneAlignmentParameters.java index c6156f696..52759dd15 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/KGeneAlignmentParameters.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/KGeneAlignmentParameters.java @@ -31,18 +31,18 @@ import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; -import com.milaboratory.core.alignment.batch.BatchAlignerWithBaseParameters; +import com.milaboratory.core.alignment.AlignmentScoring; import com.milaboratory.core.alignment.kaligner1.AbstractKAlignerParameters; import com.milaboratory.core.alignment.kaligner1.KAlignerParameters; +import com.milaboratory.core.sequence.NucleotideSequence; import io.repseq.core.GeneFeature; @JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.ANY, isGetterVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE) -public final class KGeneAlignmentParameters extends GeneAlignmentParameters +public final class KGeneAlignmentParameters extends GeneAlignmentParametersAbstract implements java.io.Serializable { private AbstractKAlignerParameters parameters; private int minSumScore; - private float relativeMinScore; @JsonCreator public KGeneAlignmentParameters( @@ -50,22 +50,11 @@ public KGeneAlignmentParameters( @JsonProperty("minSumScore") int minSumScore, @JsonProperty("relativeMinScore") float relativeMinScore, @JsonProperty("parameters") AbstractKAlignerParameters parameters) { - super(geneFeatureToAlign); + super(geneFeatureToAlign, relativeMinScore); this.minSumScore = minSumScore; - this.relativeMinScore = relativeMinScore; this.parameters = parameters; } - - public KGeneAlignmentParameters setRelativeMinScore(float relativeMinScore) { - this.relativeMinScore = relativeMinScore; - return this; - } - - public float getRelativeMinScore() { - return relativeMinScore; - } - public int getMinSumScore() { return minSumScore; } @@ -89,6 +78,11 @@ public KGeneAlignmentParameters clone() { return new KGeneAlignmentParameters(geneFeatureToAlign, minSumScore, relativeMinScore, parameters.clone()); } + @Override + public AlignmentScoring getScoring() { + return parameters.getScoring(); + } + @Override public String toString() { return "KGeneAlignmentParameters{" + @@ -107,17 +101,14 @@ public boolean equals(Object o) { KGeneAlignmentParameters that = (KGeneAlignmentParameters) o; if (minSumScore != that.minSumScore) return false; - if (Float.compare(that.relativeMinScore, relativeMinScore) != 0) return false; - return parameters.equals(that.parameters); - + return parameters != null ? parameters.equals(that.parameters) : that.parameters == null; } @Override public int hashCode() { int result = super.hashCode(); - result = 31 * result + parameters.hashCode(); + result = 31 * result + (parameters != null ? parameters.hashCode() : 0); result = 31 * result + minSumScore; - result = 31 * result + (relativeMinScore != +0.0f ? Float.floatToIntBits(relativeMinScore) : 0); return result; } } diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/SingleDAligner.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/SingleDAligner.java index ed7728347..21fc45f77 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/SingleDAligner.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/SingleDAligner.java @@ -35,6 +35,7 @@ import com.milaboratory.core.alignment.Alignment; import com.milaboratory.core.alignment.AlignmentScoring; import com.milaboratory.core.sequence.NucleotideSequence; +import com.milaboratory.mixcr.assembler.DClonalAlignerParameters; import com.milaboratory.mixcr.basictypes.VDJCHit; import io.repseq.core.Chains; import io.repseq.core.GeneFeature; @@ -67,11 +68,16 @@ public List load(NucleotideSequence key) { public SingleDAligner(DAlignerParameters parameters, List genes) { + this(parameters, parameters.getGeneFeatureToAlign(), genes); + } + + public SingleDAligner(DClonalAlignerParameters parameters, GeneFeature geneFeature, + List genes) { this.scoring = parameters.getScoring(); this.absoluteMinScore = parameters.getAbsoluteMinScore(); this.relativeMinScore = parameters.getRelativeMinScore(); this.maxHits = parameters.getMaxHits(); - this.featureToAlign = parameters.getGeneFeatureToAlign(); + this.featureToAlign = geneFeature; for (VDJCGene gene : genes) sequences.add(new SequenceWithChains(gene, featureToAlign)); this.genes = new ArrayList<>(genes); diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java index 347ce5e8b..1b691ac80 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java @@ -83,21 +83,30 @@ protected VDJCAlignmentResult process0(final PairedRead input) { VDJCAlignmentResult result = parameters.getAllowPartialAlignments() ? processPartial(input, helpers) : processStrict(input, helpers); + if (result.alignment != null) { final VDJCAlignments alignment = result.alignment; final TargetMerger.TargetMergingResult mergeResult = alignmentsMerger.merge( input.getId(), new AlignedTarget(alignment, 0), - new AlignedTarget(alignment, 1)); - - if (mergeResult != null) { - NSequenceWithQuality alignedTarget = mergeResult.result.getTarget(); + new AlignedTarget(alignment, 1), + false); + + if (mergeResult.failedDueInconsistentAlignments()) { + GeneType gt = mergeResult.getFailedMergedGeneType(); + int removeId = + alignment.getBestHit(gt).getAlignment(0).getScore() + > alignment.getBestHit(gt).getAlignment(1).getScore() + ? 1 : 0; + return new VDJCAlignmentResult<>(input, alignment.removeBestHitAlignment(gt, removeId)); + } else if (mergeResult.isSuccessful()) { + NSequenceWithQuality alignedTarget = mergeResult.getResult().getTarget(); SingleRead sRead = new SingleReadImpl(input.getId(), alignedTarget, ""); VDJCAlignmentResult sResult = sAligner.process0(sRead); if (sResult.alignment == null) return result; VDJCAlignments sAlignment = sResult.alignment; - sAlignment.setTargetDescriptions(new String[]{"AOverlap(" + mergeResult.matched + ")"}); + sAlignment.setTargetDescriptions(new String[]{"AOverlap(" + mergeResult.getMatched() + ")"}); return new VDJCAlignmentResult<>(input, sAlignment); } } diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java index c0a350821..51c6c3645 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java @@ -260,6 +260,13 @@ public GeneFeature getFeatureToAlign(GeneType type) { return params == null ? null : params.getGeneFeatureToAlign(); } + public EnumMap getFeaturesToAlignMap(){ + EnumMap res = new EnumMap<>(GeneType.class); + for (GeneType gt : GeneType.VDJC_REFERENCE) + res.put(gt,getFeatureToAlign(gt)); + return res; + } + protected EnumMap getCloneOfAlignmentParameters() { EnumMap map = new EnumMap(GeneType.class); for (Map.Entry entry : alignmentParameters.entrySet()) diff --git a/src/main/resources/parameters/assembler_parameters.json b/src/main/resources/parameters/assembler_parameters.json index c6a79a18e..d20607de8 100644 --- a/src/main/resources/parameters/assembler_parameters.json +++ b/src/main/resources/parameters/assembler_parameters.json @@ -19,130 +19,31 @@ }, "cloneFactoryParameters": { "vParameters": { - "featureToAlign": "VTranscriptWithP", - "relativeMinScore": 0.8, - "alignmentParameters": { - "scoring": { - "type": "linear", - "subsMatrix": "simple(match = 5, mismatch = -9)", - "gapPenalty": -12 - }, - "width": 5, - "stopPenalty": -1500 - } + "maxAlignmentWidth": null, + "maxAlignmentWidthLinear" : 5, + "maxAlignmentWidthAffine" : 500, + "relativeMinScore": null, + "scoring": null }, "jParameters": { - "featureToAlign": "JRegionWithP", - "relativeMinScore": 0.8, - "alignmentParameters": { - "scoring": { - "type": "linear", - "subsMatrix": "simple(match = 5, mismatch = -9)", - "gapPenalty": -12 - }, - "width": 5, - "stopPenalty": -1500 - } + "maxAlignmentWidth": null, + "maxAlignmentWidthLinear" : 5, + "maxAlignmentWidthAffine" : 500, + "relativeMinScore": null, + "scoring": null }, "dParameters": { - "geneFeatureToAlign": "DRegionWithP", - "absoluteMinScore": 25.0, - "relativeMinScore": 0.85, - "maxHits": 3, - "scoring": { - "type": "linear", - "subsMatrix": "simple(match = 5, mismatch = -9)", - "gapPenalty": -12 - } + "absoluteMinScore": null, + "relativeMinScore": null, + "maxHits": null, + "scoring": null }, "cParameters": { - "featureToAlign": "CExon1", - "relativeMinScore": 0.8, - "alignmentParameters": { - "scoring": { - "type": "linear", - "subsMatrix": "simple(match = 5, mismatch = -9)", - "gapPenalty": -12 - }, - "width": 5 - } - } - }, - "addReadsCountOnClustering": false, - "badQualityThreshold": 20, - "maxBadPointsPercent": 0.7, - "mappingThreshold": "2of5" - }, - "default_affine": { - "assemblingFeatures": ["CDR3"], - "minimalQuality" : 0, - "minimalClonalSequenceLength": 12, - "qualityAggregationType": "Max", - "separateByV": false, - "separateByJ": false, - "separateByC": false, - "maximalPreClusteringRatio": 1.0, - "cloneClusteringParameters": { - "searchDepth": 2, - "allowedMutationsInNRegions": 1, - "searchParameters": "twoMismatchesOrIndels", - "clusteringFilter": { - "type": "relativeConcentration", - "specificMutationProbability": 1E-3 - } - }, - "cloneFactoryParameters": { - "vParameters": { - "featureToAlign": "VTranscriptWithP", - "relativeMinScore": 0.8, - "alignmentParameters": { - "scoring" : { - "type" : "affine", - "subsMatrix" : "simple(match = 10, mismatch = -19)", - "gapOpenPenalty" : -40, - "gapExtensionPenalty" : -11 - }, - "width": 500, - "stopPenalty": 0 - } - }, - "jParameters": { - "featureToAlign": "JRegionWithP", - "relativeMinScore": 0.8, - "alignmentParameters": { - "scoring" : { - "type" : "affine", - "subsMatrix" : "simple(match = 10, mismatch = -19)", - "gapOpenPenalty" : -40, - "gapExtensionPenalty" : -11 - }, - "width": 500, - "stopPenalty": 0 - } - }, - "dParameters": { - "geneFeatureToAlign": "DRegionWithP", - "absoluteMinScore": 25.0, - "relativeMinScore": 0.85, - "maxHits": 3, - "scoring" : { - "type" : "affine", - "subsMatrix" : "simple(match = 10, mismatch = -30)", - "gapOpenPenalty" : -40, - "gapExtensionPenalty" : -10 - } - }, - "cParameters": { - "featureToAlign": "CExon1", - "relativeMinScore": 0.8, - "alignmentParameters": { - "scoring": { - "type": "linear", - "subsMatrix": "simple(match = 5, mismatch = -9)", - "gapPenalty": -12 - }, - "width": 5 - } + "maxAlignmentWidth": null, + "maxAlignmentWidthLinear" : 5, + "maxAlignmentWidthAffine" : 500, + "relativeMinScore": null, + "scoring": null } }, "addReadsCountOnClustering": false, diff --git a/src/test/java/com/milaboratory/mixcr/assembler/AlignmentsToClonesMappingContainerTest.java b/src/test/java/com/milaboratory/mixcr/assembler/AlignmentsToClonesMappingContainerTest.java index 7bdec0682..89b8e6cbd 100644 --- a/src/test/java/com/milaboratory/mixcr/assembler/AlignmentsToClonesMappingContainerTest.java +++ b/src/test/java/com/milaboratory/mixcr/assembler/AlignmentsToClonesMappingContainerTest.java @@ -55,9 +55,15 @@ public void test(int minRecords, int maxRecords, int minClones, int maxClones, i Well19937c rnd = RandomUtil.getThreadLocalRandom(); RandomDataGenerator rndD = RandomUtil.getThreadLocalRandomData(); - ReadToCloneMapping[] mappings = new ReadToCloneMapping[minRecords + rnd.nextInt(maxRecords - minRecords)]; + int readsCount, clonesCount; - TLongHashSet[] clones = new TLongHashSet[minClones + rnd.nextInt(maxClones - minClones)]; + do { + readsCount = minRecords + rnd.nextInt(maxRecords - minRecords); + clonesCount = minClones + rnd.nextInt(maxClones - minClones); + } while (readsCount <= clonesCount); + + ReadToCloneMapping[] mappings = new ReadToCloneMapping[readsCount]; + TLongHashSet[] clones = new TLongHashSet[clonesCount]; int[] initialReads = rndD.nextPermutation(mappings.length, clones.length); @@ -87,7 +93,7 @@ public void test(int minRecords, int maxRecords, int minClones, int maxClones, i } File tempFile = TempFileManager.getTempFile(); - try (DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tempFile)))) { + try(DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tempFile)))) { AlignmentsToClonesMappingContainer.writeMapping(CUtils.asOutputPort(mappings), clones.length, dos, sortChunk); } diff --git a/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerParametersTest.java b/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerParametersTest.java index c35b73617..adc1d1c5d 100644 --- a/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerParametersTest.java +++ b/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerParametersTest.java @@ -45,11 +45,11 @@ public class CloneAssemblerParametersTest { @Test public void test1() throws Exception { CloneFactoryParameters factoryParameters = new CloneFactoryParameters( - new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.3f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3, -50)), - new VJCClonalAlignerParameters(GeneFeature.JRegion, 0.4f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5, -40)), - null, new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) + new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3), + new VJCClonalAlignerParameters(0.4f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5), + null, new DClonalAlignerParameters(0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) ); CloneAssemblerParameters params = new CloneAssemblerParameters(new GeneFeature[]{GeneFeature.FR1, GeneFeature.CDR3}, 12, @@ -73,11 +73,11 @@ null, new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlig @Test public void test2() throws Exception { CloneFactoryParameters factoryParameters = new CloneFactoryParameters( - new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.3f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3, -50)), - new VJCClonalAlignerParameters(GeneFeature.JRegion, 0.4f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5, -40)), - null, new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) + new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3), + new VJCClonalAlignerParameters(0.4f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5), + null, new DClonalAlignerParameters(0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) ); CloneAssemblerParameters params = new CloneAssemblerParameters(new GeneFeature[]{GeneFeature.FR1, GeneFeature.CDR3}, 12, diff --git a/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java b/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java index 6a0e39d09..6b2d40304 100644 --- a/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java +++ b/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java @@ -30,7 +30,6 @@ import cc.redberry.pipe.CUtils; import cc.redberry.pipe.OutputPortCloseable; -import com.milaboratory.core.alignment.BandedAlignerParameters; import com.milaboratory.core.alignment.LinearGapAlignmentScoring; import com.milaboratory.core.io.sequence.SequenceRead; import com.milaboratory.core.io.sequence.SequenceReader; @@ -94,7 +93,7 @@ private static CloneSet runFullPipeline(String... fastqFiles) throws IOException //write alignments to byte array ByteArrayOutputStream alignmentsSerialized = new ByteArrayOutputStream(); - try (VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(alignmentsSerialized)) { + try(VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(alignmentsSerialized)) { writer.header(aligner); for (Object read : CUtils.it(reader)) { VDJCAlignmentResult result = (VDJCAlignmentResult) aligner.process((SequenceRead) read); @@ -109,12 +108,12 @@ private static CloneSet runFullPipeline(String... fastqFiles) throws IOException LinearGapAlignmentScoring scoring = new LinearGapAlignmentScoring<>(NucleotideSequence.ALPHABET, 5, -9, -12); CloneFactoryParameters factoryParameters = new CloneFactoryParameters( - new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.8f, - new BandedAlignerParameters(scoring, 5, -150)), - new VJCClonalAlignerParameters(GeneFeature.JRegion, 0.8f, - new BandedAlignerParameters(scoring, 5, -150)), + new VJCClonalAlignerParameters(0.8f, + scoring, 5), + new VJCClonalAlignerParameters(0.8f, + scoring, 5), null, - new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, scoring) + new DAlignerParameters(GeneFeature.DRegion, 0.85f, 30.0f, 3, scoring) ); CloneAssemblerParameters assemblerParameters = new CloneAssemblerParameters( @@ -126,7 +125,7 @@ private static CloneSet runFullPipeline(String... fastqFiles) throws IOException System.out.println(GlobalObjectMappers.toOneLine(assemblerParameters)); CloneAssemblerRunner assemblerRunner = new CloneAssemblerRunner(alignmentsProvider, - new CloneAssembler(assemblerParameters, true, aligner.getUsedGenes()), 2); + new CloneAssembler(assemblerParameters, true, aligner.getUsedGenes(), alignerParameters), 2); SmartProgressReporter.startProgressReport(assemblerRunner); assemblerRunner.run(); diff --git a/src/test/java/com/milaboratory/mixcr/assembler/CloneFactoryParametersTest.java b/src/test/java/com/milaboratory/mixcr/assembler/CloneFactoryParametersTest.java index 8c3c21972..1eba040f6 100644 --- a/src/test/java/com/milaboratory/mixcr/assembler/CloneFactoryParametersTest.java +++ b/src/test/java/com/milaboratory/mixcr/assembler/CloneFactoryParametersTest.java @@ -31,9 +31,9 @@ import com.milaboratory.core.alignment.AffineGapAlignmentScoring; import com.milaboratory.core.alignment.BandedAlignerParameters; import com.milaboratory.core.alignment.LinearGapAlignmentScoring; -import io.repseq.core.GeneFeature; import com.milaboratory.mixcr.vdjaligners.DAlignerParameters; import com.milaboratory.util.GlobalObjectMappers; +import io.repseq.core.GeneFeature; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -43,13 +43,13 @@ public class CloneFactoryParametersTest { @Test public void test1() throws Exception { CloneFactoryParameters paramentrs = new CloneFactoryParameters( - new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.3f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3, -50)), - new VJCClonalAlignerParameters(GeneFeature.JRegion, 0.4f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5, -40)), - new VJCClonalAlignerParameters(GeneFeature.CExon1, 0.2f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 9, -30)), - new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) + new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3), + new VJCClonalAlignerParameters(0.4f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5), + new VJCClonalAlignerParameters(0.2f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 9), + new DClonalAlignerParameters(0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) ); String str = GlobalObjectMappers.PRETTY.writeValueAsString(paramentrs); //System.out.println(str); @@ -64,11 +64,11 @@ public void test1() throws Exception { @Test public void test2() throws Exception { CloneFactoryParameters paramentrs = new CloneFactoryParameters( - new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.3f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3, -50)), - new VJCClonalAlignerParameters(GeneFeature.JRegion, 0.4f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5, -40)), - null, new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) + new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3), + new VJCClonalAlignerParameters(0.4f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5), + null, new DClonalAlignerParameters(0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) ); String str = GlobalObjectMappers.PRETTY.writeValueAsString(paramentrs); //System.out.println(str); diff --git a/src/test/java/com/milaboratory/mixcr/assembler/DClonalAlignerParametersTest.java b/src/test/java/com/milaboratory/mixcr/assembler/DClonalAlignerParametersTest.java deleted file mode 100644 index 2faf499f6..000000000 --- a/src/test/java/com/milaboratory/mixcr/assembler/DClonalAlignerParametersTest.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2014-2015, Bolotin Dmitry, Chudakov Dmitry, Shugay Mikhail - * (here and after addressed as Inventors) - * All Rights Reserved - * - * Permission to use, copy, modify and distribute any part of this program for - * educational, research and non-profit purposes, by non-profit institutions - * only, without fee, and without a written agreement is hereby granted, - * provided that the above copyright notice, this paragraph and the following - * three paragraphs appear in all copies. - * - * Those desiring to incorporate this work into commercial products or use for - * commercial purposes should contact the Inventors using one of the following - * email addresses: chudakovdm@mail.ru, chudakovdm@gmail.com - * - * IN NO EVENT SHALL THE INVENTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, - * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, - * ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF THE INVENTORS HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE INVENTORS HAS - * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR - * MODIFICATIONS. THE INVENTORS MAKES NO REPRESENTATIONS AND EXTENDS NO - * WARRANTIES OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A - * PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY - * PATENT, TRADEMARK OR OTHER RIGHTS. - */ -package com.milaboratory.mixcr.assembler; - -import com.milaboratory.core.alignment.LinearGapAlignmentScoring; -import io.repseq.core.GeneFeature; -import com.milaboratory.util.GlobalObjectMappers; -import org.junit.Test; - -import static org.junit.Assert.assertEquals; - -public class DClonalAlignerParametersTest { - @Test - public void test1() throws Exception { - DClonalAlignerParameters paramentrs = new DClonalAlignerParameters(0.3f, GeneFeature.DRegion, - LinearGapAlignmentScoring.getNucleotideBLASTScoring()); - String str = GlobalObjectMappers.PRETTY.writeValueAsString(paramentrs); - DClonalAlignerParameters deser = GlobalObjectMappers.PRETTY.readValue(str, DClonalAlignerParameters.class); - assertEquals(paramentrs, deser); - assertEquals(paramentrs, deser.clone()); - } -} \ No newline at end of file diff --git a/src/test/java/com/milaboratory/mixcr/assembler/VJCClonalAlignerParametersTest.java b/src/test/java/com/milaboratory/mixcr/assembler/VJCClonalAlignerParametersTest.java index 030a3333c..9ab5998dd 100644 --- a/src/test/java/com/milaboratory/mixcr/assembler/VJCClonalAlignerParametersTest.java +++ b/src/test/java/com/milaboratory/mixcr/assembler/VJCClonalAlignerParametersTest.java @@ -28,9 +28,8 @@ */ package com.milaboratory.mixcr.assembler; -import com.milaboratory.core.alignment.BandedAlignerParameters; import com.milaboratory.core.alignment.LinearGapAlignmentScoring; -import io.repseq.core.GeneFeature; +import com.milaboratory.test.TestUtil; import com.milaboratory.util.GlobalObjectMappers; import org.junit.Test; @@ -39,11 +38,23 @@ public class VJCClonalAlignerParametersTest { @Test public void test1() throws Exception { - VJCClonalAlignerParameters paramentrs = new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.3f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3, -50)); + VJCClonalAlignerParameters paramentrs = new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3); String str = GlobalObjectMappers.PRETTY.writeValueAsString(paramentrs); VJCClonalAlignerParameters deser = GlobalObjectMappers.PRETTY.readValue(str, VJCClonalAlignerParameters.class); assertEquals(paramentrs, deser); assertEquals(paramentrs, deser.clone()); } + + @Test + public void test2() throws Exception { + TestUtil.assertJson(new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3), true); + + TestUtil.assertJson(new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), null, 2, 3), true); + + TestUtil.assertJson(new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 1, 2, 3), true); + } } \ No newline at end of file diff --git a/src/test/java/com/milaboratory/mixcr/cli/JsonOverriderTest.java b/src/test/java/com/milaboratory/mixcr/cli/JsonOverriderTest.java index ccb28821e..7adc88232 100644 --- a/src/test/java/com/milaboratory/mixcr/cli/JsonOverriderTest.java +++ b/src/test/java/com/milaboratory/mixcr/cli/JsonOverriderTest.java @@ -29,17 +29,15 @@ package com.milaboratory.mixcr.cli; import com.milaboratory.core.alignment.AffineGapAlignmentScoring; -import com.milaboratory.core.alignment.BandedAlignerParameters; import com.milaboratory.core.alignment.LinearGapAlignmentScoring; import com.milaboratory.core.alignment.kaligner1.KAlignerParameters; import com.milaboratory.core.sequence.NucleotideSequence; import com.milaboratory.core.sequence.quality.QualityAggregationType; import com.milaboratory.core.tree.TreeSearchParameters; import com.milaboratory.mixcr.assembler.*; -import io.repseq.core.GeneFeature; -import com.milaboratory.mixcr.vdjaligners.DAlignerParameters; import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters; import com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets; +import io.repseq.core.GeneFeature; import org.junit.Assert; import org.junit.Test; @@ -89,11 +87,11 @@ public void test2() throws Exception { @Test public void testArray1() throws Exception { CloneFactoryParameters factoryParameters = new CloneFactoryParameters( - new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.3f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3, -50)), - new VJCClonalAlignerParameters(GeneFeature.JRegion, 0.4f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5, -40)), - null, new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) + new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3), + new VJCClonalAlignerParameters(0.4f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5), + null, new DClonalAlignerParameters(0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) ); CloneAssemblerParameters params = new CloneAssemblerParameters(new GeneFeature[]{GeneFeature.FR1, GeneFeature.CDR3}, 12, @@ -118,17 +116,17 @@ null, new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlig @Test public void testCloneFactoryParameters2() throws Exception { CloneFactoryParameters factoryParameters = new CloneFactoryParameters( - new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.3f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3, -50)), - new VJCClonalAlignerParameters(GeneFeature.JRegion, 0.4f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5, -40)), - null, new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) + new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3), + new VJCClonalAlignerParameters(0.4f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5), + null, new DClonalAlignerParameters(0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) ); CloneAssemblerParameters params = new CloneAssemblerParameters(new GeneFeature[]{GeneFeature.FR1, GeneFeature.CDR3}, 12, QualityAggregationType.Average, new CloneClusteringParameters(2, 1, TreeSearchParameters.ONE_MISMATCH, new RelativeConcentrationFilter(1.0E-6)), - factoryParameters, true, true, false, 0.4, true, (byte) 20, .8, "2of6", (byte) 15); + factoryParameters, true, true, false, 0.4, true, (byte) 20, .8, "2of6", (byte) 15); CloneAssemblerParameters override = JsonOverrider.override( params, @@ -136,11 +134,11 @@ null, new DAlignerParameters(GeneFeature.DRegion, 30.0f, 0.85f, 3, AffineGapAlig "dParameters.absoluteMinScore=101"); CloneFactoryParameters expectedFactoryParameters = new CloneFactoryParameters( - new VJCClonalAlignerParameters(GeneFeature.VRegion, 0.3f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3, -50)), - new VJCClonalAlignerParameters(GeneFeature.JRegion, 0.4f, - new BandedAlignerParameters(LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5, -40)), - null, new DAlignerParameters(GeneFeature.DRegion, 101f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) + new VJCClonalAlignerParameters(0.3f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 3), + new VJCClonalAlignerParameters(0.4f, + LinearGapAlignmentScoring.getNucleotideBLASTScoring(), 5), + null, new DClonalAlignerParameters(0.85f, 101f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()) ); Assert.assertEquals(expectedFactoryParameters, override.getCloneFactoryParameters()); diff --git a/src/test/java/com/milaboratory/mixcr/export/FieldExtractorsTest.java b/src/test/java/com/milaboratory/mixcr/export/FieldExtractorsTest.java index f6c764d22..f6d2eb12d 100644 --- a/src/test/java/com/milaboratory/mixcr/export/FieldExtractorsTest.java +++ b/src/test/java/com/milaboratory/mixcr/export/FieldExtractorsTest.java @@ -124,27 +124,27 @@ public Integer[][] go(String seq, int len, int offset1, int offset2, int offset3 Integer[][] r = goAssert.go("{CDR3Begin(-250):VEnd(-3)} 'CCAAA' {DBegin(0):DEnd(0)} 'AAA' {JBegin(2):FR4End} " + "{CBegin}C*100 N*100", 100, 240, 307, 450, ""); - assertExportPoint(r[0], ReferencePoint.VEndTrimmed, -3); - assertExportPoint(r[0], ReferencePoint.DBeginTrimmed, 0); - assertExportPoint(r[0], ReferencePoint.DEndTrimmed, 0); - assertExportPoint(r[0], ReferencePoint.JBeginTrimmed, -2); + assertExportPoint(r[0], ReferencePoint.VEnd, -3); + assertExportPoint(r[0], ReferencePoint.DBegin, 0); + assertExportPoint(r[0], ReferencePoint.DEnd, 0); + assertExportPoint(r[0], ReferencePoint.JBegin, -2); r = goAssert.go("{CDR3Begin(-250):VEnd(0)} 'CCAAA' {DBegin(0):DEnd(-2)} 'AAA' {JBegin:FR4End} {CBegin}C*100 N*100", 100, 240, 307, 450, ""); - assertExportPoint(r[0], ReferencePoint.VEndTrimmed, 0); - assertExportPoint(r[0], ReferencePoint.DBeginTrimmed, 0); - assertExportPoint(r[0], ReferencePoint.DEndTrimmed, -2); - assertExportPoint(r[0], ReferencePoint.JBeginTrimmed, 0); + assertExportPoint(r[0], ReferencePoint.VEnd, 0); + assertExportPoint(r[0], ReferencePoint.DBegin, 0); + assertExportPoint(r[0], ReferencePoint.DEnd, -2); + assertExportPoint(r[0], ReferencePoint.JBegin, 0); // With PSegments r = goAssert.go("{CDR3Begin(-250):VEnd(0)} {VEnd:VEnd(-3)} 'CCAAA' {DBegin(3):DBegin} {DBegin:DEnd(-2)} 'AAA' " + "{JBegin(2):JBegin} {JBegin:FR4End} {CBegin}C*100 N*100", 100, 240, 307, 450, ""); - assertExportPoint(r[0], ReferencePoint.VEndTrimmed, 3); - assertExportPoint(r[0], ReferencePoint.DBeginTrimmed, 3); - assertExportPoint(r[0], ReferencePoint.DEndTrimmed, -2); - assertExportPoint(r[0], ReferencePoint.JBeginTrimmed, 2); + assertExportPoint(r[0], ReferencePoint.VEnd, 3); + assertExportPoint(r[0], ReferencePoint.DBegin, 3); + assertExportPoint(r[0], ReferencePoint.DEnd, -2); + assertExportPoint(r[0], ReferencePoint.JBegin, 2); } static void assertExportPoint(Integer[] r, ReferencePoint rp, Integer value) { diff --git a/src/test/java/com/milaboratory/mixcr/tests/BackwardCompatibilityTests.java b/src/test/java/com/milaboratory/mixcr/tests/BackwardCompatibilityTests.java index f5197aae7..a161034d6 100644 --- a/src/test/java/com/milaboratory/mixcr/tests/BackwardCompatibilityTests.java +++ b/src/test/java/com/milaboratory/mixcr/tests/BackwardCompatibilityTests.java @@ -42,11 +42,7 @@ public class BackwardCompatibilityTests { @Test public void testAlignments() throws Exception { - //assertGoodVDJCA("/backward_compatibility/alignments_v1.6.vdjca.gz", 98); - //assertGoodVDJCA("/backward_compatibility/test_16.vdjca.gz", 76); - //assertGoodVDJCA("/backward_compatibility/test_17.vdjca.gz", 75); - //assertGoodVDJCA("/backward_compatibility/test_18.vdjca.gz", 76); - //assertGoodVDJCA("/backward_compatibility/test_183.vdjca.gz", 76); + assertGoodVDJCA("/backward_compatibility/2.1.0/test.vdjca.gz", 76); } public static void assertGoodVDJCA(String resource, int size) throws IOException { @@ -68,10 +64,7 @@ public static void assertGoodVDJCA(String resource, int size) throws IOException @Test public void testBC16Cloneset() throws Exception { - //assertGoodCLNS("/backward_compatibility/clones_v1.6.clns.gz", 24, 24); - //assertGoodCLNS("/backward_compatibility/test_16.clns.gz", 22, 17); - //assertGoodCLNS("/backward_compatibility/test_17.clns.gz", 21, 16); - //assertGoodCLNS("/backward_compatibility/test_18.clns.gz", 81, 66); + assertGoodCLNS("/backward_compatibility/2.1.0/test.clns.gz", 22, 17); } public static void assertGoodCLNS(String resource, int size, int good) throws IOException { diff --git a/src/test/java/com/milaboratory/mixcr/util/RunMiXCRTest.java b/src/test/java/com/milaboratory/mixcr/util/RunMiXCRTest.java index 4054282c0..d156df554 100644 --- a/src/test/java/com/milaboratory/mixcr/util/RunMiXCRTest.java +++ b/src/test/java/com/milaboratory/mixcr/util/RunMiXCRTest.java @@ -6,6 +6,7 @@ import com.milaboratory.mixcr.basictypes.*; import com.milaboratory.mixcr.cli.ActionAlign; import com.milaboratory.mixcr.vdjaligners.VDJCAligner; +import com.milaboratory.util.TempFileManager; import io.repseq.core.Chains; import io.repseq.core.GeneType; import org.junit.Assert; @@ -87,7 +88,7 @@ public void test2() throws Exception { RunMiXCR.AlignResult align = RunMiXCR.align(params); List reads = new ArrayList<>(); - try (PairedFastqReader fReader = new PairedFastqReader( + try(PairedFastqReader fReader = new PairedFastqReader( RunMiXCR.class.getResource("/sequences/test_R1.fastq").getFile(), RunMiXCR.class.getResource("/sequences/test_R2.fastq").getFile())) { for (PairedRead s : CUtils.it(fReader)) @@ -95,13 +96,13 @@ public void test2() throws Exception { } File tempFile = TempFileManager.getTempFile(); - try (VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(tempFile)) { + try(VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(tempFile)) { writer.header(align.aligner); for (VDJCAlignments alignment : align.alignments) writer.write(alignment); } - try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(tempFile)) { + try(VDJCAlignmentsReader reader = new VDJCAlignmentsReader(tempFile)) { int tr = 0; for (VDJCAlignments alignment : CUtils.it(reader)) { PairedRead actual = reads.get((int) alignment.getReadId()); @@ -124,7 +125,7 @@ public void test3() throws Exception { RunMiXCR.AlignResult align = RunMiXCR.align(params); List reads = new ArrayList<>(); - try (PairedFastqReader fReader = new PairedFastqReader( + try(PairedFastqReader fReader = new PairedFastqReader( "/Users/poslavsky/Projects/milab/temp/R1_part.fastq.gz", "/Users/poslavsky/Projects/milab/temp/R2_part.fastq.gz", true)) { for (PairedRead s : CUtils.it(fReader)) @@ -132,13 +133,13 @@ public void test3() throws Exception { } File tempFile = TempFileManager.getTempFile(); - try (VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(tempFile)) { + try(VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(tempFile)) { writer.header(align.aligner); for (VDJCAlignments alignment : align.alignments) writer.write(alignment); } - try (VDJCAlignmentsReader reader = new VDJCAlignmentsReader(tempFile)) { + try(VDJCAlignmentsReader reader = new VDJCAlignmentsReader(tempFile)) { int tr = 0; for (VDJCAlignments alignment : CUtils.it(reader)) { PairedRead actual = reads.get((int) alignment.getReadId()); diff --git a/src/test/java/com/milaboratory/mixcr/vdjaligners/DAlignerParametersTest.java b/src/test/java/com/milaboratory/mixcr/vdjaligners/DAlignerParametersTest.java index 452d8c6c8..1d076d69b 100644 --- a/src/test/java/com/milaboratory/mixcr/vdjaligners/DAlignerParametersTest.java +++ b/src/test/java/com/milaboratory/mixcr/vdjaligners/DAlignerParametersTest.java @@ -40,7 +40,7 @@ public class DAlignerParametersTest { @Test public void test1() throws Exception { DAlignerParameters paramentrs = new DAlignerParameters(GeneFeature.DRegion, - 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()); + 0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()); String str = GlobalObjectMappers.PRETTY.writeValueAsString(paramentrs); DAlignerParameters deser = GlobalObjectMappers.PRETTY.readValue(str, DAlignerParameters.class); assertEquals(paramentrs, deser); @@ -51,7 +51,7 @@ public void test1() throws Exception { @Test public void test2() throws Exception { DAlignerParameters se = new DAlignerParameters(GeneFeature.DRegion, - 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()); + 0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()); IOTestUtil.assertJavaSerialization(se); } } \ No newline at end of file diff --git a/src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java b/src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java index 69b8522a5..6ddf8db31 100644 --- a/src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java +++ b/src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java @@ -49,7 +49,7 @@ public void test1() throws Exception { 1.5f, 0.75f, 1.0f, -0.1f, -0.3f, 4, 10, 15, 2, -10, 40.0f, 0.87f, 7, LinearGapAlignmentScoring.getNucleotideBLASTScoring())), new DAlignerParameters(GeneFeature.DRegion, - 30.0f, 0.85f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()), + 0.85f, 30.0f, 3, AffineGapAlignmentScoring.getNucleotideBLASTScoring()), new KGeneAlignmentParameters(GeneFeature.JRegion, 120, 0.87f, new KAlignerParameters(5, false, false, 1.5f, 0.75f, 1.0f, -0.1f, -0.3f, 4, 10, 15, 2, -10, 40.0f, 0.87f, 7, diff --git a/src/test/resources/backward_compatibility/2.1.0/test.clns.gz b/src/test/resources/backward_compatibility/2.1.0/test.clns.gz new file mode 100644 index 000000000..3ee6fe624 Binary files /dev/null and b/src/test/resources/backward_compatibility/2.1.0/test.clns.gz differ diff --git a/src/test/resources/backward_compatibility/2.1.0/test.vdjca.gz b/src/test/resources/backward_compatibility/2.1.0/test.vdjca.gz new file mode 100644 index 000000000..2f9864a7c Binary files /dev/null and b/src/test/resources/backward_compatibility/2.1.0/test.vdjca.gz differ