Skip to content

Commit

Permalink
Revert "add chunking before filtering for PacBio"
Browse files Browse the repository at this point in the history
This reverts commit 83dd8cd.
  • Loading branch information
reichan1998 committed Oct 15, 2024
1 parent 83dd8cd commit 3cd2c01
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 154 deletions.
26 changes: 1 addition & 25 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -35,45 +35,21 @@ process {
withName: SAMTOOLS_COLLATETOFASTA {
beforeScript = { "export REF_PATH=spoof"}
ext.args = { (params.use_work_dir_as_temp ? "-T." : "") }
ext.prefix = { "${meta.chunk_id}" }
}

withName: SAMTOOLS_FILTERTOFASTQ {
ext.prefix = { "${meta.chunk_id}" }
}

withName: BLAST_BLASTN {
ext.args = '-task blastn -reward 1 -penalty -5 -gapopen 3 -gapextend 3 -dust yes -soft_masking true -evalue .01 -searchsp 1750000000000 -outfmt 6'
ext.prefix = { "${meta.chunk_id}" }
}

withName: PACBIO_FILTER {
ext.prefix = { "${meta.chunk_id}" }
}

withName: SAMTOOLS_CONVERT {
beforeScript = { "export REF_PATH=spoof"}
ext.args = "--output-fmt bam --write-index"
ext.prefix = { "${meta.chunk_id}" }
ext.args = "-be '[rq]>=0.99' -x fi -x fp -x ri -x rp --write-index"
}

withName: CONVERT_CRAM {
ext.args = "--output-fmt cram"
}

withName: CONVERT_FQ_CRAM {
ext.args = "--output-fmt cram"
ext.prefix = { "${meta.chunk_id}" }
}

withName: SAMTOOLS_INDEX_FQ {
ext.prefix = { "${meta.chunk_id}" }
}

withName: GENERATE_CRAM_CSV_FQ {
ext.prefix = { "${meta.chunk_id}" }
}

withName: ".*:ALIGN_ILLUMINA:.*:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" {
ext.args = ""
ext.args1 = { "-F 0x200 -nt" }
Expand Down
46 changes: 0 additions & 46 deletions modules/local/cram_filter.nf

This file was deleted.

47 changes: 15 additions & 32 deletions subworkflows/local/align_pacbio.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@
include { FILTER_PACBIO } from '../../subworkflows/local/filter_pacbio'
include { SAMTOOLS_ADDREPLACERG } from '../../modules/local/samtools_addreplacerg'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FQ } from '../../modules/nf-core/samtools/index/main'
include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv'
include { GENERATE_CRAM_CSV as GENERATE_CRAM_CSV_FQ } from '../../modules/local/generate_cram_csv'
include { MINIMAP2_MAPREDUCE } from '../../subworkflows/local/minimap2_mapreduce'
include { SAMTOOLS_SORMADUP as CONVERT_CRAM } from '../../modules/local/samtools_sormadup'
include { SAMTOOLS_SORMADUP as CONVERT_FQ_CRAM } from '../../modules/local/samtools_sormadup'
include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main'
include { CREATE_CRAM_FILTER_INPUT } from '../../subworkflows/local/create_cram_filter_input'


workflow ALIGN_PACBIO {
take:
Expand All @@ -25,55 +22,41 @@ workflow ALIGN_PACBIO {
ch_versions = Channel.empty()
ch_merged_bam = Channel.empty()

// Convert input to CRAM
CONVERT_CRAM ( reads, fasta )
// Filter BAM and output as FASTQ
FILTER_PACBIO ( reads, db )
ch_versions = ch_versions.mix ( FILTER_PACBIO.out.versions )

// Convert FASTQ to CRAM
CONVERT_CRAM ( FILTER_PACBIO.out.fastq, fasta )
ch_versions = ch_versions.mix ( CONVERT_CRAM.out.versions )

SAMTOOLS_ADDREPLACERG ( CONVERT_CRAM.out.bam )
ch_versions = ch_versions.mix ( SAMTOOLS_ADDREPLACERG.out.versions )

// Index the CRAM file
SAMTOOLS_INDEX ( SAMTOOLS_ADDREPLACERG.out.cram )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions )

SAMTOOLS_ADDREPLACERG.out.cram
| join ( SAMTOOLS_INDEX.out.crai )
| set { ch_reads_cram }

GENERATE_CRAM_CSV( ch_reads_cram )
ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions )

CREATE_CRAM_FILTER_INPUT ( GENERATE_CRAM_CSV.out.csv, fasta )
ch_versions = ch_versions.mix( CREATE_CRAM_FILTER_INPUT.out.versions )

// Filter BAM and output as FASTQ
FILTER_PACBIO ( CREATE_CRAM_FILTER_INPUT.out.chunked_cram, db )
ch_versions = ch_versions.mix ( FILTER_PACBIO.out.versions )

// Convert FASTQ to CRAM
CONVERT_FQ_CRAM ( FILTER_PACBIO.out.fastq, fasta )
ch_versions = ch_versions.mix ( CONVERT_FQ_CRAM.out.versions )

SAMTOOLS_INDEX_FQ ( CONVERT_FQ_CRAM.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX_FQ.out.versions )
// Index the CRAM file
SAMTOOLS_INDEX ( ch_reads_cram )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions )

CONVERT_FQ_CRAM.out.bam
| join ( SAMTOOLS_INDEX_FQ.out.crai )
ch_reads_cram
| join ( SAMTOOLS_INDEX.out.crai )
| set { ch_reads_cram_crai }


//
// MODULE: generate a CRAM CSV file containing the required parametres for CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT
//
GENERATE_CRAM_CSV_FQ( ch_reads_cram_crai )
ch_versions = ch_versions.mix( GENERATE_CRAM_CSV_FQ.out.versions )
GENERATE_CRAM_CSV( ch_reads_cram_crai )
ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions )

//
// SUBWORKFLOW: mapping pacbio reads using minimap2
//
MINIMAP2_MAPREDUCE (
fasta,
GENERATE_CRAM_CSV_FQ.out.csv
GENERATE_CRAM_CSV.out.csv
)
ch_versions = ch_versions.mix( MINIMAP2_MAPREDUCE.out.versions )
ch_merged_bam = ch_merged_bam.mix(MINIMAP2_MAPREDUCE.out.mergedbam)
Expand Down
40 changes: 0 additions & 40 deletions subworkflows/local/create_cram_filter_input.nf

This file was deleted.

61 changes: 50 additions & 11 deletions subworkflows/local/filter_pacbio.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,40 +9,63 @@ include { BLAST_BLASTN } from '../../modules/nf-core/blast/
include { PACBIO_FILTER } from '../../modules/local/pacbio_filter'
include { SAMTOOLS_FILTERTOFASTQ } from '../../modules/local/samtools_filtertofastq'
include { SEQKIT_FQ2FA } from '../../modules/nf-core/seqkit/fq2fa'
include { BBMAP_FILTERBYNAME } from '../../modules/nf-core/bbmap/filterbyname'
include { BBMAP_FILTERBYNAME } from '../../modules/nf-core/bbmap/filterbyname'


workflow FILTER_PACBIO {
take:
reads // channel: [ val(meta), /path/to/datafile ]
db // channel: /path/to/vector_db


main:
ch_versions = Channel.empty()

// Convert from PacBio CRAM to BAM

// Check file types and branch
reads
| map { meta, cram -> [ meta, cram, [] ] }
| branch {
meta, reads ->
fastq : reads.findAll { it.getName().toLowerCase() =~ /.*f.*\.gz/ }
bam : true
}
| set { ch_reads }


// Convert from PacBio BAM to Samtools BAM
ch_reads.bam
| map { meta, bam -> [ meta, bam, [] ] }
| set { ch_pacbio }

SAMTOOLS_CONVERT ( ch_pacbio, [ [], [] ], [] )
ch_versions = ch_versions.mix ( SAMTOOLS_CONVERT.out.versions )
ch_versions = ch_versions.mix ( SAMTOOLS_CONVERT.out.versions.first() )


// Collate BAM file to create interleaved FASTA
SAMTOOLS_COLLATETOFASTA ( SAMTOOLS_CONVERT.out.bam )
ch_versions = ch_versions.mix ( SAMTOOLS_COLLATETOFASTA.out.versions )
ch_versions = ch_versions.mix ( SAMTOOLS_COLLATETOFASTA.out.versions.first() )


// Convert FASTQ to FASTA using SEQKIT_FQ2FA
SEQKIT_FQ2FA ( ch_reads.fastq )
ch_versions = ch_versions.mix ( SEQKIT_FQ2FA.out.versions.first() )

// Combine BAM-derived FASTA

// Combine BAM-derived FASTA with converted FASTQ inputs
SAMTOOLS_COLLATETOFASTA.out.fasta
| concat( SEQKIT_FQ2FA.out.fasta )
| set { ch_fasta }


// Nucleotide BLAST
BLAST_BLASTN ( ch_fasta, db )
ch_versions = ch_versions.mix ( BLAST_BLASTN.out.versions )
ch_versions = ch_versions.mix ( BLAST_BLASTN.out.versions.first() )


// Filter BLAST output
PACBIO_FILTER ( BLAST_BLASTN.out.txt )
ch_versions = ch_versions.mix ( PACBIO_FILTER.out.versions )
ch_versions = ch_versions.mix ( PACBIO_FILTER.out.versions.first() )


// Filter the input BAM and output as interleaved FASTA
SAMTOOLS_CONVERT.out.bam
Expand All @@ -55,13 +78,29 @@ workflow FILTER_PACBIO {
| set { ch_bam_reads }

SAMTOOLS_FILTERTOFASTQ ( ch_bam_reads.bams, ch_bam_reads.lists )
ch_versions = ch_versions.mix ( SAMTOOLS_FILTERTOFASTQ.out.versions )
ch_versions = ch_versions.mix ( SAMTOOLS_FILTERTOFASTQ.out.versions.first() )


// Filter inputs provided as FASTQ and output as interleaved FASTQ
ch_reads.fastq
| join(PACBIO_FILTER.out.list)
| multiMap { meta, fastq, list -> \
fastqs: [meta, fastq]
lists: list
}
| set { ch_reads_fastq }

BBMAP_FILTERBYNAME ( ch_reads_fastq.fastqs, ch_reads_fastq.lists , "fastq", true)
ch_versions = ch_versions.mix ( BBMAP_FILTERBYNAME.out.versions.first() )


// Merge filtered outputs as ch_output_fastq
SAMTOOLS_FILTERTOFASTQ.out.fastq
BBMAP_FILTERBYNAME.out.reads
| concat ( SAMTOOLS_FILTERTOFASTQ.out.fastq )
| set { ch_filtered_fastq }


emit:
fastq = ch_filtered_fastq // channel: [ meta, /path/to/fastq ]
versions = ch_versions // channel: [ versions.yml ]
}
}

0 comments on commit 3cd2c01

Please sign in to comment.