Skip to content

Commit

Permalink
fix assembleContigs for long reads
Browse files Browse the repository at this point in the history
  • Loading branch information
gnefedev committed Apr 24, 2024
1 parent e929f89 commit f520a37
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.file.Path
import java.util.*
import kotlin.math.max

object CommandAssembleContigs {
const val COMMAND_NAME = AnalyzeCommandDescriptor.assembleContigs.name
Expand Down Expand Up @@ -236,25 +237,35 @@ object CommandAssembleContigs {
reportBuilder.onAssemblyCanceled(clone)
return@mapInParallelOrdered arrayOf(clone)
}
var maxShiftForVGene = 0
cloneAlignments.alignments().forEach { alignments ->
for ((key, value) in alignments.hitsMap) {
for (hit in value) {
coverages[key]?.let { it[hit.gene.id]?.accumulate(hit) }
for ((geneType, hits) in alignments.hitsMap) {
for (hit in hits) {
coverages[geneType]?.let { it[hit.gene.id]?.accumulate(hit) }
}
}
alignments.getHits(Variable).forEach { hit ->
val shift = (0 until alignments.numberOfTargets()).minOf { i ->
hit.getAlignment(i)?.sequence2Range?.from ?: Int.MAX_VALUE
}
check(shift != Int.MAX_VALUE)
maxShiftForVGene = max(shift, maxShiftForVGene)
}
}

// Selecting best hits for clonal sequence assembly based in the coverage information
val bestGenes = coverages.mapValuesTo(EnumMap(GeneType::class.java)) { (_, value) ->
value.values.maxByOrNull { it.getNumberOfCoveredPoints(1) }?.hit
}
val bestGenes =
coverages.mapValuesTo(EnumMap(GeneType::class.java)) { (_, value) ->
value.values.maxByOrNull { it.getNumberOfCoveredPoints(1) }?.hit
}

// Performing contig assembly
val fullSeqAssembler = FullSeqAssembler(
cloneFactory, cmdParams.parameters,
header.assemblerParameters!!.assemblingFeatures,
clone, header.alignerParameters,
bestGenes[Variable], bestGenes[Joining]
bestGenes[Variable], bestGenes[Joining],
maxShiftForVGene
)
fullSeqAssembler.report = reportBuilder
val rawVariantsData =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import io.repseq.core.GeneFeature
import io.repseq.core.GeneFeature.CDR3
import io.repseq.core.GeneFeatures
import io.repseq.core.GeneType
import io.repseq.core.GeneType.Joining
import io.repseq.core.GeneType.Variable
import org.apache.commons.math3.random.RandomDataGenerator
import org.apache.commons.math3.random.Well19937c
Expand Down Expand Up @@ -208,7 +209,10 @@ class FullSeqAssemblerTest {
DEFAULT_PARAMETERS,
align.parameters.cloneAssemblerParameters.assemblingFeatures,
assemble.cloneSet[0],
align.parameters.alignerParameters
align.parameters.alignerParameters,
assemble.cloneSet[0].getBestHit(Variable),
assemble.cloneSet[0].getBestHit(Joining),
1 shl 14
)
val prep = agg.calculateRawData {
CUtils.asOutputPort(
Expand Down Expand Up @@ -282,7 +286,10 @@ class FullSeqAssemblerTest {
DEFAULT_PARAMETERS,
align.parameters.cloneAssemblerParameters.assemblingFeatures,
assemble.cloneSet[0],
align.parameters.alignerParameters
align.parameters.alignerParameters,
assemble.cloneSet[0].getBestHit(Variable),
assemble.cloneSet[0].getBestHit(Joining),
1 shl 14
)
val r2s = agg.toPointSequences(align.alignments[1])
val p2 = TIntHashSet(Arrays.stream(r2s).mapToInt { s: PointSequence -> s.point }
Expand Down Expand Up @@ -426,7 +433,10 @@ class FullSeqAssemblerTest {
DEFAULT_PARAMETERS,
align.parameters.cloneAssemblerParameters.assemblingFeatures,
initialClone,
align.parameters.alignerParameters
align.parameters.alignerParameters,
initialClone.getBestHit(Variable),
initialClone.getBestHit(Joining),
1 shl 14
)
val prep = agg.calculateRawData { alignments.asOutputPort() }
val clones = listOf(*agg.callVariants(prep))
Expand Down

0 comments on commit f520a37

Please sign in to comment.