naobservatory · simonleandergrimm · Feb 8, 2025 · Feb 8, 2025 · Feb 8, 2025 · Feb 8, 2025
diff --git a/configs/containers.config b/configs/containers.config
@@ -90,6 +90,16 @@ process {
         //- bioconda::samtools=1.21
         //- conda-forge::gzip=1.13
     }
+    withLabel: minimap2_samtools {
+        container = "community.wave.seqera.io/library/minimap2_samtools:03e1e7cf6ec6695d"
+        // Built with Seqera Containers
+        //channels:
+        //- conda-forge
+        //- bioconda
+        //dependencies:
+        //- bioconda::minimap2=2.28
+        //- bioconda::samtools=1.21
+    }
     withLabel: bbtools_samtools {
         container = "community.wave.seqera.io/library/bbmap_samtools_gzip:fc8114c072e9de02"
         // Built with Seqera Containers

diff --git a/modules/local/minimap2/main.nf b/modules/local/minimap2/main.nf
@@ -0,0 +1,41 @@
+// Run minimap2 on a single input FASTQ file and partition reads based on alignment status
+process MINIMAP2 {
+    label "large"
+    label "minimap2_samtools"
+    input:
+        tuple val(sample), path(reads)
+        path(index_dir)
+        val(suffix)
+        val(remove_sq)
+    output:
+        tuple val(sample), path("${sample}_${suffix}_minimap2_mapped.sam.gz"), emit: sam
+        tuple val(sample), path("${sample}_${suffix}_minimap2_mapped.fastq.gz"), emit: reads_mapped
+        tuple val(sample), path("${sample}_${suffix}_minimap2_unmapped.fastq.gz"), emit: reads_unmapped
+        tuple val(sample), path("${sample}_${suffix}_minimap2_in.fastq.gz"), emit: input
+    shell:
+        '''
+        set -euo pipefail
+        # Prepare inputs
+        idx="!{index_dir}/mm2_index.mmi"
+        sam="!{sample}_!{suffix}_minimap2_mapped.sam.gz"
+        al="!{sample}_!{suffix}_minimap2_mapped.fastq.gz"
+        un="!{sample}_!{suffix}_minimap2_unmapped.fastq.gz"
+
+        # Run pipeline
+        # Outputs a SAM file for all reads, which is then partitioned based on alignment status
+        #   - First branch (samtools view -u -f 4 -) filters SAM to unaligned reads and saves FASTQ
+        #   - Second branch (samtools view -u -F 4 -) filters SAM to aligned reads and saves FASTQ
+        #   - Third branch (samtools view -h -F 4 -) also filters SAM to aligned reads and saves SAM
+        zcat !{reads} \
+            | minimap2 -a ${idx} /dev/fd/0 \
+            | tee \
+                >(samtools view -u -f 4 - \
+                    | samtools fastq - | gzip -c > ${un}) \
+                >(samtools view -u -F 4 - \
+                    | samtools fastq - | gzip -c > ${al}) \
+            | samtools view -h -F 4 - \
+            !{ remove_sq ? "| grep -v '^@SQ'" : "" } | gzip -c > ${sam}
+        # Link input to output for testing
+        ln -s !{reads} !{sample}_!{suffix}_minimap2_in.fastq.gz
+        '''
+}
diff --git a/nf-test.config b/nf-test.config
@@ -9,6 +9,7 @@ config {
         load "[email protected]"
         load "[email protected]"
         load "[email protected]"
+        load "[email protected]"
     }
 
 }
diff --git a/subworkflows/local/profile/main.nf b/subworkflows/local/profile/main.nf
@@ -15,6 +15,8 @@ include { ADD_FIXED_COLUMN as ADD_KRAKEN_NORIBO } from "../../../modules/local/a
 include { ADD_FIXED_COLUMN as ADD_BRACKEN_NORIBO } from "../../../modules/local/addFixedColumn"
 include { CONCATENATE_TSVS as CONCATENATE_KRAKEN } from "../../../modules/local/concatenateTsvs"
 include { CONCATENATE_TSVS as CONCATENATE_BRACKEN } from "../../../modules/local/concatenateTsvs"
+include { MINIMAP2 as MINIMAP2_RIBO } from "../../../modules/local/minimap2"
+
 
 /****************
 | MAIN WORKFLOW |
@@ -27,16 +29,35 @@ workflow PROFILE {
         ref_dir
         min_kmer_fraction
         k
-        bbduk_suffix
+        ribo_suffix
         bracken_threshold
         single_end
+        ont
     main:
         // Separate ribosomal reads
-        ribo_path = "${ref_dir}/results/ribo-ref-concat.fasta.gz"
-        ribo_ch = BBDUK(reads_ch, ribo_path, min_kmer_fraction, k, bbduk_suffix, !single_end)
+        if (ont) {
+            ribo_ref = "${projectDir}/results/mm2-ribo-index"
+            mapped_ch = MINIMAP2_RIBO(reads_ch, ribo_ref, ribo_suffix, false)
+            ribo_ch = mapped_ch
+        } else {
+            ribo_path = "${ref_dir}/results/ribo-ref-concat.fasta.gz"
+            ribo_ch = BBDUK(reads_ch, ribo_path, min_kmer_fraction, k, ribo_suffix, !single_end)
+        }
         // Run taxonomic profiling separately on ribo and non-ribo reads
-        tax_ribo_ch = TAXONOMY_RIBO(ribo_ch.match, kraken_db_ch, "D", bracken_threshold, single_end)
-        tax_noribo_ch = TAXONOMY_NORIBO(ribo_ch.nomatch, kraken_db_ch, "D", bracken_threshold, single_end)
+        tax_ribo_ch = TAXONOMY_RIBO(
+            ont ? ribo_ch.reads_mapped : ribo_ch.match,
+            kraken_db_ch,
+            "D",
+            bracken_threshold,
+            single_end
+        )
+        tax_noribo_ch = TAXONOMY_NORIBO(
+            ont ? ribo_ch.reads_unmapped : ribo_ch.nomatch,
+            kraken_db_ch,
+            "D",
+            bracken_threshold,
+            single_end
+        )
         // Add ribosomal status to output TSVs
         kr_ribo = ADD_KRAKEN_RIBO(tax_ribo_ch.kraken_reports, "ribosomal", "TRUE", "ribo")
         kr_noribo = ADD_KRAKEN_NORIBO(tax_noribo_ch.kraken_reports, "ribosomal", "FALSE", "noribo")

diff --git a/tests/modules/local/minimap2/main.nf.test b/tests/modules/local/minimap2/main.nf.test
@@ -0,0 +1,71 @@
+nextflow_process {
+    name "Test process MINIMAP2"
+    script "modules/local/minimap2/main.nf"
+    process "MINIMAP2"
+    config "tests/run_dev_se.config"
+    tag "module"
+    tag "minimap2"
+
+    setup {
+        run("LOAD_SAMPLESHEET") {
+            script "subworkflows/local/loadSampleSheet/main.nf"
+            process {
+                """
+                input[0] = "${projectDir}/test-data/ont-samplesheet.csv"
+                input[1] = true
+                """
+            }
+        }
+    }
+
+    test("Should correctly partition reads based on alignment status") {
+        tag "expect_success"
+        tag "single_end"
+        when {
+            params {}
+            process {
+                '''
+                input[0] = LOAD_SAMPLESHEET.out.samplesheet
+                input[1] = "${params.ref_dir}/results/mm2-ribo-index"
+                input[2] = "test"
+                input[3] = false
+                '''
+            }
+        }
+        then {
+            // Should run without failures
+            assert process.success
+
+            // Check all expected output files exist
+            assert path(process.out.sam[0][1]).exists()
+            assert path(process.out.reads_mapped[0][1]).exists()
+            assert path(process.out.reads_unmapped[0][1]).exists()
+            assert path(process.out.input[0][1]).exists()
+
+            // Get read IDs from output FASTQ files
+            def fastq_mapped = path(process.out.reads_mapped[0][1]).fastq
+            def fastq_unmapped = path(process.out.reads_unmapped[0][1]).fastq
+            def fastq_read_ids_mapped = fastq_mapped.readNames.toSet()
+            def fastq_read_ids_unmapped = fastq_unmapped.readNames.toSet()
+
+            // Get read IDs from SAM file
+            def samlines = sam(process.out.sam[0][1]).getSamLines()
+            def sam_read_ids_mapped = samlines
+                .collect { line -> line.split('\t')[0] }  // Get read IDs
+                .toSet()
+
+            // Get input read IDs
+            def input_reads = path(process.out.input[0][1]).fastq.readNames.toSet()
+
+            // Verify read partitioning is correct
+            assert sam_read_ids_mapped == fastq_read_ids_mapped
+
+            // Verify no overlapping reads between mapped and unmapped sets
+            assert fastq_read_ids_mapped.intersect(fastq_read_ids_unmapped).size() == 0
+
+            // Verify input FASTQ contains all reads
+            assert input_reads == fastq_read_ids_mapped + fastq_read_ids_unmapped
+        }
+    }
+}
+
diff --git a/workflows/run.nf b/workflows/run.nf
@@ -63,7 +63,7 @@ workflow RUN {
 
     // Profile ribosomal and non-ribosomal reads of the subset adapter-trimmed reads
     PROFILE(SUBSET_TRIM.out.trimmed_subset_reads, kraken_db_path, params.ref_dir, "0.4", "27", "ribo",
-        params.bracken_threshold, params.single_end)
+        params.bracken_threshold, params.single_end, params.ont)
 
     // Publish results
     params_str = JsonOutput.prettyPrint(JsonOutput.toJson(params))

diff --git a/workflows/run_dev_se.nf b/workflows/run_dev_se.nf
@@ -45,7 +45,7 @@ workflow RUN_DEV_SE {
 
     // Profile ribosomal and non-ribosomal reads of the subset adapter-trimmed reads
     PROFILE(SUBSET_TRIM.out.trimmed_subset_reads, kraken_db_path, params.ref_dir, "0.4", "27", "ribo",
-        params.bracken_threshold, params.single_end)
+        params.bracken_threshold, params.single_end, params.ont)
 
     // Publish results
     params_str = JsonOutput.prettyPrint(JsonOutput.toJson(params))