From ec57c84f3c1624b7b9526a8ae770299ff4f5553a Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Wed, 20 Nov 2024 13:20:01 +0000
Subject: [PATCH 01/13] feat: consensus method for dada2_denoising

---
 CHANGELOG.md                     | 13 +++++
 conf/modules.config              |  9 ++-
 modules/local/dada2_denoising.nf | 52 +++++++++++++++--
 nextflow.config                  |  8 ++-
 nextflow_schema.json             | 98 ++++++++++++++++++++++++++++----
 5 files changed, 160 insertions(+), 20 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 97d7a41d..3dc535b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,8 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
+- []() - New parameters introduced related to `--concatenate_reads consensus`
+
+| **Parameter**             | **Description**                                                                           | **Default Value** |
+| ------------------------- | ----------------------------------------------------------------------------------------- | ----------------- |
+| **asv_match**             | The score assigned for each matching base pair during sequence alignment.                 | 1                 |
+| **asv_mismatch**          | The penalty score assigned for each mismatched base pair during sequence alignment.       | 0                 |
+| **asv_gap**               | The penalty score assigned for each gap introduced during sequence alignment.             | -64               |
+| **asv_minoverlap**        | The minimum number of overlapping base pairs required to merge forward and reverse reads. | 12                |
+| **asv_maxmismatch**       | The maximum number of mismatches allowed within the overlapping region for merging reads. | 0                 |
+| **asv_percentile_cutoff** | The percentile cutoff determining the minimum observed overlap in the dataset.            | 0.001             |
+
 ### `Changed`
 
+- []() - Changed DADA2_DENOISING to support new method named "consensus", by setting `--concatenate_reads consensus`.
+
 ### `Fixed`
 
 ### `Dependencies`
diff --git a/conf/modules.config b/conf/modules.config
index 5f940e57..852972eb 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -232,8 +232,13 @@ process {
         ].join(',').replaceAll('(,)*$', "")
         // setting from https://rdrr.io/bioc/dada2/man/mergePairs.html & https://rdrr.io/bioc/dada2/man/nwalign.html & match = getDadaOpt("MATCH"), mismatch = getDadaOpt("MISMATCH"), gap = getDadaOpt("GAP_PENALTY"), missing from the list below is: 'band = -1'
         ext.args2 = [
-            'minOverlap = 12, maxMismatch = 0, returnRejects = FALSE, propagateCol = character(0), trimOverhang = FALSE, match = 1, mismatch = -64, gap = -64, homo_gap = NULL, endsfree = TRUE, vec = FALSE',
-            params.concatenate_reads ? "justConcatenate = TRUE" : "justConcatenate = FALSE"
+            "minOverlap = ${params.asv_minoverlap ?: 12}, maxMismatch = ${params.asv_maxmismatch ?: 0}, propagateCol = character(0), gap = ${params.asv_gap ?: -64}, homo_gap = NULL, endsfree = TRUE, vec = FALSE",
+            params.asv_concatenate_reads == "consensus" ?
+                "returnRejects = TRUE, match = ${params.match ?: 5}, mismatch = ${params.mismatch ?: -6}" :
+                "justConcatenate = ${params.asv_concatenate_reads == 'concatenate' ? 'TRUE' : 'FALSE'}, returnRejects = FALSE, match = ${params.asv_match ?: 1}, mismatch = ${params.asv_mismatch ?: -64}"
+        ].join(',').replaceAll('(,)*$', "")
+        ext.args3 = [
+            "quantile = ${params.asv_percentile_cutoff ?: 0.001}"
         ].join(',').replaceAll('(,)*$', "")
         publishDir = [
             [
diff --git a/modules/local/dada2_denoising.nf b/modules/local/dada2_denoising.nf
index 637bd898..ac292e06 100644
--- a/modules/local/dada2_denoising.nf
+++ b/modules/local/dada2_denoising.nf
@@ -26,13 +26,14 @@ process DADA2_DENOISING {
     def prefix = task.ext.prefix ?: "prefix"
     def args = task.ext.args ?: ''
     def args2 = task.ext.args2 ?: ''
+    def args3 = task.ext.args3 ?: '0.001'
     if (!meta.single_end) {
         """
         #!/usr/bin/env Rscript
         suppressPackageStartupMessages(library(dada2))
 
-        errF = readRDS("${errormodel[0]}")
-        errR = readRDS("${errormodel[1]}")
+        errF <- readRDS("${errormodel[0]}")
+        errR <- readRDS("${errormodel[1]}")
 
         filtFs <- sort(list.files("./filtered/", pattern = "_1.filt.fastq.gz", full.names = TRUE), method = "radix")
         filtRs <- sort(list.files("./filtered/", pattern = "_2.filt.fastq.gz", full.names = TRUE), method = "radix")
@@ -45,9 +46,50 @@ process DADA2_DENOISING {
         saveRDS(dadaRs, "${prefix}_2.dada.rds")
         sink(file = NULL)
 
-        #make table
-        mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, verbose=TRUE)
-        saveRDS(mergers, "${prefix}.mergers.rds")
+        # merge
+        if ("${params.concatenate_reads}" == "consensus") {
+            mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, justConcatenate = FALSE, verbose=TRUE)
+            concats <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, justConcatenate = TRUE, verbose=TRUE)
+
+            # in case there is only one sample in the entire run
+            if (is.data.frame(mergers)) {
+                mergers <- list(sample = mergers)
+                concats <- list(sample = concats)
+            }
+
+            # define the overlap threshold to decide if concatenation or not
+            min_overlap_obs <- lapply(mergers, function(X) {
+                mergers_accepted <- X[["accept"]]
+                if (sum(mergers_accepted) > 0) {
+                    min_overlap_obs <- X[["nmatch"]][mergers_accepted] + X[["nmismatch"]][mergers_accepted]
+                    rep(min_overlap_obs, X[["abundance"]][mergers_accepted])
+                } else {
+                    NA
+                }
+            })
+
+            min_overlap_obs <- Reduce(c, min_overlap_obs)
+            min_overlap_obs <- min_overlap_obs[!is.na(min_overlap_obs)]
+            min_overlap_obs <- quantile(min_overlap_obs, $args3)
+
+            for (x in names(mergers)) {
+                to_concat <- !mergers[[x]][["accept"]] & (mergers[[x]][["nmismatch"]] + mergers[[x]][["nmatch"]]) < min_overlap_obs
+
+                if (sum(to_concat) > 0) {
+                    mergers[[x]][to_concat, ] <- concats[[x]][to_concat, ]
+                    # filter out unaccepted non concatenated sequences
+                    mergers[[x]] <- mergers[[x]][mergers[[x]][["accept"]], ]
+                }
+
+            }
+
+        } else {
+            mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, verbose=TRUE)
+        }
+
+        saveRDS(mergers, "${meta.run}.mergers.rds")
+
+        # make table
         seqtab <- makeSequenceTable(mergers)
         saveRDS(seqtab, "${prefix}.seqtab.rds")
 
diff --git a/nextflow.config b/nextflow.config
index 1982ab26..38edf772 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -46,7 +46,13 @@ params {
     sample_inference           = "independent"
     illumina_novaseq           = false
     illumina_pe_its            = false
-    concatenate_reads          = false
+    asv_concatenate_reads      = false
+    asv_minoverlap             = 12
+    asv_maxmismatch            = 0
+    asv_gap                    = -64
+    asv_match                  = 1
+    asv_mismatch               = -64
+    asv_percentile_cutoff      = 0.001
     cut_its                    = "none"
     its_partial                = 0
     picrust                    = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index e0b263b2..3a71fee6 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -82,7 +82,9 @@
                     "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
                 }
             },
-            "required": ["outdir"],
+            "required": [
+                "outdir"
+            ],
             "fa_icon": "fas fa-terminal"
         },
         "sequencing_input": {
@@ -259,12 +261,57 @@
                     "default": "independent",
                     "help_text": "If samples are treated independent (lowest sensitivity and lowest resources), pooled (highest sensitivity and resources) or pseudo-pooled (balance between required resources and sensitivity).",
                     "description": "Mode of sample inference: \"independent\", \"pooled\" or \"pseudo\"",
-                    "enum": ["independent", "pooled", "pseudo"]
+                    "enum": [
+                        "independent",
+                        "pooled",
+                        "pseudo"
+                    ]
                 },
-                "concatenate_reads": {
-                    "type": "boolean",
-                    "description": "Not recommended: When paired end reads are not sufficiently overlapping for merging.",
-                    "help_text": "This parameters specifies that paired-end reads are not merged after denoising but concatenated (separated by 10 N's). This is of advantage when an amplicon was sequenced that is too long for merging (i.e. bad experimental design). This is an alternative to only analyzing the forward or reverse read in case of non-overlapping paired-end sequencing data.\n\n**This parameter is not recommended! Only if all other options fail.**"
+                "asv_concatenate_reads": {
+                    "type": "string",
+                    "default": "false",
+                    "description": "Strategy to merge paired end reads. When paired end reads are not sufficiently overlapping for merging, you can use \"concatenate\" (not recommended). When you have a mix of overlapping and non overlapping reads use \"consensus\"",
+                    "help_text": "This parameters specifies that paired-end reads are not merged after denoising but concatenated (separated by 10 N's). This is of advantage when an amplicon was sequenced that is too long for merging (i.e. bad experimental design). This is an alternative to only analyzing the forward or reverse read in case of non-overlapping paired-end sequencing data.\n\n**This parameter is not recommended! Only if all other options fail.**",
+                    "enum": [
+                        "false",
+                        "true",
+                        "consensus"
+                    ]
+                },
+                "asv_match": {
+                    "type": "integer",
+                    "default": 1,
+                    "description": "The score assigned for each matching base pair during sequence alignment.",
+                    "help_text": "This parameter specifies the numerical value added to the alignment score for every pair of bases that match between the forward and reverse reads. A higher value increases the preference for alignments with more matching bases."
+                },
+                "asv_mismatch": {
+                    "type": "integer",
+                    "default": 0,
+                    "description": "The penalty score assigned for each mismatched base pair during sequence alignment.",
+                    "help_text": "This parameter defines the numerical penalty subtracted from the alignment score for each base pair mismatch between the forward and reverse reads. A higher penalty reduces the likelihood of accepting alignments with mismatches."
+                },
+                "asv_gap": {
+                    "type": "integer",
+                    "default": -64,
+                    "description": "The penalty score assigned for each gap introduced during sequence alignment.",
+                    "help_text": "This parameter sets the numerical penalty subtracted from the alignment score for each gap (insertion or deletion) introduced to align the forward and reverse reads. A higher penalty discourages alignments that require gaps."
+                },
+                "asv_minoverlap": {
+                    "type": "integer",
+                    "default": 12,
+                    "description": "The minimum number of overlapping base pairs required to merge forward and reverse reads.",
+                    "help_text": "This parameter specifies the smallest number of consecutive base pairs that must overlap between the forward and reverse reads for them to be merged. Ensuring sufficient overlap is crucial for accurate merging."
+                },
+                "asv_maxmismatch": {
+                    "type": "integer",
+                    "default": 0,
+                    "description": "The maximum number of mismatches allowed within the overlapping region for merging reads.",
+                    "help_text": "This parameter defines the highest number of mismatched base pairs permitted in the overlap region between forward and reverse reads for a successful merge. Setting this value helps control the stringency of read merging, balancing between sensitivity and accuracy."
+                },
+                "asv_percentile_cutoff": {
+                    "type": "number",
+                    "default": 0.001,
+                    "description": "The percentile used to determine a stringent cutoff which will correspond to the minimum observed overlap in the dataset. This ensures that only read pairs with high overlap are merged into consensus sequences. Those with insufficient overlap are concatenated."
                 }
             },
             "fa_icon": "fas fa-braille"
@@ -438,7 +485,10 @@
                     "type": "string",
                     "description": "Method used for alignment, \"hmmer\" or \"mafft\"",
                     "default": "hmmer",
-                    "enum": ["hmmer", "mafft"]
+                    "enum": [
+                        "hmmer",
+                        "mafft"
+                    ]
                 },
                 "pplace_taxonomy": {
                     "type": "string",
@@ -454,7 +504,13 @@
                     "type": "string",
                     "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.",
                     "description": "Name of supported database, and optionally also version number",
-                    "enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"]
+                    "enum": [
+                        "silva=138",
+                        "silva",
+                        "greengenes85",
+                        "greengenes2",
+                        "greengenes2=2022.10"
+                    ]
                 },
                 "qiime_ref_tax_custom": {
                     "type": "string",
@@ -529,7 +585,12 @@
                     "help_text": "If data is long read ITS sequences, that need to be cut to ITS region (full ITS, only ITS1, or only ITS2) for taxonomy assignment.",
                     "description": "Part of ITS region to use for taxonomy assignment: \"full\", \"its1\", or \"its2\"",
                     "default": "none",
-                    "enum": ["none", "full", "its1", "its2"]
+                    "enum": [
+                        "none",
+                        "full",
+                        "its1",
+                        "its2"
+                    ]
                 },
                 "its_partial": {
                     "type": "integer",
@@ -549,7 +610,13 @@
                     "type": "string",
                     "help_text": "",
                     "description": "Name of supported database, and optionally also version number",
-                    "enum": ["silva", "silva=128", "greengenes", "greengenes=13_8", "greengenes88"]
+                    "enum": [
+                        "silva",
+                        "silva=128",
+                        "greengenes",
+                        "greengenes=13_8",
+                        "greengenes88"
+                    ]
                 },
                 "sidle_ref_tax_custom": {
                     "type": "string",
@@ -822,7 +889,14 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+                    "enum": [
+                        "symlink",
+                        "rellink",
+                        "link",
+                        "copy",
+                        "copyNoFollow",
+                        "move"
+                    ],
                     "hidden": true
                 },
                 "email_on_fail": {
@@ -996,4 +1070,4 @@
             "$ref": "#/$defs/institutional_config_options"
         }
     ]
-}
+}
\ No newline at end of file

From cc338b083f1549ab98dd217a501b75b045452605 Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Wed, 20 Nov 2024 15:04:21 +0000
Subject: [PATCH 02/13] remove default values from modules.config, refactor
 quantile parameter definition in the process , update CHANGELOG, fix
 asv_concatenate_reads definition, fix default asv_match & asv_mismatch values

---
 CHANGELOG.md                     |  4 ++--
 conf/modules.config              | 10 ++++------
 modules/local/dada2_denoising.nf |  6 +++---
 nextflow.config                  |  6 +++---
 nextflow_schema.json             |  4 ++--
 5 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3dc535b0..5b4c799c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
-- []() - New parameters introduced related to `--concatenate_reads consensus`
+- [#803](https://github.com/nf-core/ampliseq/pull/803) - New parameters introduced related to `--asv_concatenate_reads consensus`
 
 | **Parameter**             | **Description**                                                                           | **Default Value** |
 | ------------------------- | ----------------------------------------------------------------------------------------- | ----------------- |
@@ -20,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Changed`
 
-- []() - Changed DADA2_DENOISING to support new method named "consensus", by setting `--concatenate_reads consensus`.
+- [#803](https://github.com/nf-core/ampliseq/pull/803) - Changed DADA2_DENOISING : `--concatenate_reads` renaming to `--asv_concatenate_reads` ;  support new method named "consensus" by setting `--asv_concatenate_reads consensus` ; changed options of `--asv_concatenate_reads` from TRUE/FALSE (boolean) to ["merge", "concatenate", "consensus"].
 
 ### `Fixed`
 
diff --git a/conf/modules.config b/conf/modules.config
index 852972eb..b227acb0 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -232,14 +232,12 @@ process {
         ].join(',').replaceAll('(,)*$', "")
         // setting from https://rdrr.io/bioc/dada2/man/mergePairs.html & https://rdrr.io/bioc/dada2/man/nwalign.html & match = getDadaOpt("MATCH"), mismatch = getDadaOpt("MISMATCH"), gap = getDadaOpt("GAP_PENALTY"), missing from the list below is: 'band = -1'
         ext.args2 = [
-            "minOverlap = ${params.asv_minoverlap ?: 12}, maxMismatch = ${params.asv_maxmismatch ?: 0}, propagateCol = character(0), gap = ${params.asv_gap ?: -64}, homo_gap = NULL, endsfree = TRUE, vec = FALSE",
+            "minOverlap = ${params.asv_minoverlap}, maxMismatch = ${params.asv_maxmismatch}, propagateCol = character(0), gap = ${params.asv_gap}, homo_gap = NULL, endsfree = TRUE, vec = FALSE",
             params.asv_concatenate_reads == "consensus" ?
-                "returnRejects = TRUE, match = ${params.match ?: 5}, mismatch = ${params.mismatch ?: -6}" :
-                "justConcatenate = ${params.asv_concatenate_reads == 'concatenate' ? 'TRUE' : 'FALSE'}, returnRejects = FALSE, match = ${params.asv_match ?: 1}, mismatch = ${params.asv_mismatch ?: -64}"
-        ].join(',').replaceAll('(,)*$', "")
-        ext.args3 = [
-            "quantile = ${params.asv_percentile_cutoff ?: 0.001}"
+                "returnRejects = TRUE, match = ${params.asv_match}, mismatch = ${params.asv_mismatch}" :
+                "justConcatenate = ${params.asv_concatenate_reads == 'concatenate' ? 'TRUE' : 'FALSE'}, returnRejects = FALSE, match = ${params.asv_match}, mismatch = ${params.asv_mismatch}"
         ].join(',').replaceAll('(,)*$', "")
+        ext.quantile = "${params.asv_percentile_cutoff}"
         publishDir = [
             [
                 path: { "${params.outdir}/dada2/args" },
diff --git a/modules/local/dada2_denoising.nf b/modules/local/dada2_denoising.nf
index ac292e06..143bb9cb 100644
--- a/modules/local/dada2_denoising.nf
+++ b/modules/local/dada2_denoising.nf
@@ -26,7 +26,7 @@ process DADA2_DENOISING {
     def prefix = task.ext.prefix ?: "prefix"
     def args = task.ext.args ?: ''
     def args2 = task.ext.args2 ?: ''
-    def args3 = task.ext.args3 ?: '0.001'
+    def quantile = task.ext.quantile ?: 0.001
     if (!meta.single_end) {
         """
         #!/usr/bin/env Rscript
@@ -47,7 +47,7 @@ process DADA2_DENOISING {
         sink(file = NULL)
 
         # merge
-        if ("${params.concatenate_reads}" == "consensus") {
+        if ("${params.asv_concatenate_reads}" == "consensus") {
             mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, justConcatenate = FALSE, verbose=TRUE)
             concats <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, justConcatenate = TRUE, verbose=TRUE)
 
@@ -70,7 +70,7 @@ process DADA2_DENOISING {
 
             min_overlap_obs <- Reduce(c, min_overlap_obs)
             min_overlap_obs <- min_overlap_obs[!is.na(min_overlap_obs)]
-            min_overlap_obs <- quantile(min_overlap_obs, $args3)
+            min_overlap_obs <- quantile(min_overlap_obs, $quantile)
 
             for (x in names(mergers)) {
                 to_concat <- !mergers[[x]][["accept"]] & (mergers[[x]][["nmismatch"]] + mergers[[x]][["nmatch"]]) < min_overlap_obs
diff --git a/nextflow.config b/nextflow.config
index 38edf772..87d93323 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -46,12 +46,12 @@ params {
     sample_inference           = "independent"
     illumina_novaseq           = false
     illumina_pe_its            = false
-    asv_concatenate_reads      = false
+    asv_concatenate_reads      = "merge"
     asv_minoverlap             = 12
     asv_maxmismatch            = 0
     asv_gap                    = -64
-    asv_match                  = 1
-    asv_mismatch               = -64
+    asv_match                  = 5
+    asv_mismatch               = -6
     asv_percentile_cutoff      = 0.001
     cut_its                    = "none"
     its_partial                = 0
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 3a71fee6..c4e16d16 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -273,8 +273,8 @@
                     "description": "Strategy to merge paired end reads. When paired end reads are not sufficiently overlapping for merging, you can use \"concatenate\" (not recommended). When you have a mix of overlapping and non overlapping reads use \"consensus\"",
                     "help_text": "This parameters specifies that paired-end reads are not merged after denoising but concatenated (separated by 10 N's). This is of advantage when an amplicon was sequenced that is too long for merging (i.e. bad experimental design). This is an alternative to only analyzing the forward or reverse read in case of non-overlapping paired-end sequencing data.\n\n**This parameter is not recommended! Only if all other options fail.**",
                     "enum": [
-                        "false",
-                        "true",
+                        "merge",
+                        "concatenate",
                         "consensus"
                     ]
                 },

From 747435d00b714bb6024a54526b26b659479ecbdf Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Wed, 20 Nov 2024 15:16:57 +0000
Subject: [PATCH 03/13] rollback to correct match & mismatch values

---
 nextflow.config      | 4 ++--
 nextflow_schema.json | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 87d93323..4e6c6f40 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -50,8 +50,8 @@ params {
     asv_minoverlap             = 12
     asv_maxmismatch            = 0
     asv_gap                    = -64
-    asv_match                  = 5
-    asv_mismatch               = -6
+    asv_match                  = 1
+    asv_mismatch               = -64
     asv_percentile_cutoff      = 0.001
     cut_its                    = "none"
     its_partial                = 0
diff --git a/nextflow_schema.json b/nextflow_schema.json
index c4e16d16..aaaca5b2 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -286,7 +286,7 @@
                 },
                 "asv_mismatch": {
                     "type": "integer",
-                    "default": 0,
+                    "default": -64,
                     "description": "The penalty score assigned for each mismatched base pair during sequence alignment.",
                     "help_text": "This parameter defines the numerical penalty subtracted from the alignment score for each base pair mismatch between the forward and reverse reads. A higher penalty reduces the likelihood of accepting alignments with mismatches."
                 },

From 8e46db719ea1ae8b5c108ebd4e1d742e1be14ee3 Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Thu, 21 Nov 2024 09:42:32 +0000
Subject: [PATCH 04/13] fix: default value in schema not valid

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index aaaca5b2..2a8f493f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -269,7 +269,7 @@
                 },
                 "asv_concatenate_reads": {
                     "type": "string",
-                    "default": "false",
+                    "default": "merge",
                     "description": "Strategy to merge paired end reads. When paired end reads are not sufficiently overlapping for merging, you can use \"concatenate\" (not recommended). When you have a mix of overlapping and non overlapping reads use \"consensus\"",
                     "help_text": "This parameters specifies that paired-end reads are not merged after denoising but concatenated (separated by 10 N's). This is of advantage when an amplicon was sequenced that is too long for merging (i.e. bad experimental design). This is an alternative to only analyzing the forward or reverse read in case of non-overlapping paired-end sequencing data.\n\n**This parameter is not recommended! Only if all other options fail.**",
                     "enum": [

From 9ee15fe5a29db4e2e0dcf2a06a2cb4791a2bcb07 Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Thu, 21 Nov 2024 10:06:46 +0000
Subject: [PATCH 05/13] chore: set fixed parameters when consensus disabled,
 use config parameters when enabled

---
 conf/modules.config |   6 +-
 nextflow.config     | 130 ++++++++++++++++++++++----------------------
 2 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index b227acb0..0c00faf3 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -232,10 +232,10 @@ process {
         ].join(',').replaceAll('(,)*$', "")
         // setting from https://rdrr.io/bioc/dada2/man/mergePairs.html & https://rdrr.io/bioc/dada2/man/nwalign.html & match = getDadaOpt("MATCH"), mismatch = getDadaOpt("MISMATCH"), gap = getDadaOpt("GAP_PENALTY"), missing from the list below is: 'band = -1'
         ext.args2 = [
-            "minOverlap = ${params.asv_minoverlap}, maxMismatch = ${params.asv_maxmismatch}, propagateCol = character(0), gap = ${params.asv_gap}, homo_gap = NULL, endsfree = TRUE, vec = FALSE",
+            "homo_gap = NULL, endsfree = TRUE, vec = FALSE, propagateCol = character(0)",
             params.asv_concatenate_reads == "consensus" ?
-                "returnRejects = TRUE, match = ${params.asv_match}, mismatch = ${params.asv_mismatch}" :
-                "justConcatenate = ${params.asv_concatenate_reads == 'concatenate' ? 'TRUE' : 'FALSE'}, returnRejects = FALSE, match = ${params.asv_match}, mismatch = ${params.asv_mismatch}"
+                "returnRejects = TRUE, match = ${params.asv_match}, mismatch = ${params.asv_mismatch}, minOverlap = ${params.asv_minoverlap}, maxMismatch = ${params.asv_maxmismatch}, gap = ${params.asv_gap}" :
+                "justConcatenate = ${params.asv_concatenate_reads == 'concatenate' ? 'TRUE' : 'FALSE'}, returnRejects = FALSE, match = 1, mismatch = -64, gap = -64, minOverlap = 12, maxMismatch = 0"
         ].join(',').replaceAll('(,)*$', "")
         ext.quantile = "${params.asv_percentile_cutoff}"
         publishDir = [
diff --git a/nextflow.config b/nextflow.config
index 4e6c6f40..91dcba0d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -23,71 +23,71 @@ params {
     metadata                   = null
 
     // Other options
-    save_intermediates         = false
-    trunc_qmin                 = 25
-    trunc_rmin                 = 0.75
-    trunclenf                  = null
-    trunclenr                  = null
-    max_ee                     = 2
-    max_len                    = null
-    ignore_failed_filtering    = false
-    min_len                    = 50
-    metadata_category          = null
-    metadata_category_barplot  = null
-    double_primer              = false
-    retain_untrimmed           = false
-    cutadapt_min_overlap       = 3
-    cutadapt_max_error_rate    = 0.1
-    exclude_taxa               = "mitochondria,chloroplast"
-    min_frequency              = 1
-    min_samples                = 1
-    multiple_sequencing_runs   = false
-    single_end                 = false
-    sample_inference           = "independent"
-    illumina_novaseq           = false
-    illumina_pe_its            = false
-    asv_concatenate_reads      = "merge"
-    asv_minoverlap             = 12
-    asv_maxmismatch            = 0
-    asv_gap                    = -64
-    asv_match                  = 1
-    asv_mismatch               = -64
-    asv_percentile_cutoff      = 0.001
-    cut_its                    = "none"
-    its_partial                = 0
-    picrust                    = false
-    sbdiexport                 = false
-    addsh                      = false
-    tax_agglom_min             = 2
-    tax_agglom_max             = 6
-    min_read_counts            = 1
-    ignore_failed_trimming     = false
-    ignore_empty_input_files   = false
-    qiime_adonis_formula       = null
-    seed                       = 100
-    filter_ssu                 = null
-    min_len_asv                = null
-    max_len_asv                = null
-    filter_codons              = null
-    orf_start                  = 1
-    orf_end                    = null
-    stop_codons                = "TAA,TAG"
-    pplace_tree                = null
-    pplace_aln                 = null
-    pplace_model               = null
-    pplace_alnmethod           = 'hmmer'
-    pplace_taxonomy            = null
-    pplace_name                = null
-    diversity_rarefaction_depth= 500
-    ancom_sample_min_count     = 1
-    vsearch_cluster            = null
-    vsearch_cluster_id         = 0.97
-    ancom                      = false
-    ancombc                    = false
-    ancombc_effect_size        = 1
-    ancombc_significance       = 0.05
-    ancombc_formula            = null
-    ancombc_formula_reflvl     = null
+    save_intermediates                   = false
+    trunc_qmin                           = 25
+    trunc_rmin                           = 0.75
+    trunclenf                            = null
+    trunclenr                            = null
+    max_ee                               = 2
+    max_len                              = null
+    ignore_failed_filtering              = false
+    min_len                              = 50
+    metadata_category                    = null
+    metadata_category_barplot            = null
+    double_primer                        = false
+    retain_untrimmed                     = false
+    cutadapt_min_overlap                 = 3
+    cutadapt_max_error_rate              = 0.1
+    exclude_taxa                         = "mitochondria,chloroplast"
+    min_frequency                        = 1
+    min_samples                          = 1
+    multiple_sequencing_runs             = false
+    single_end                           = false
+    sample_inference                     = "independent"
+    illumina_novaseq                     = false
+    illumina_pe_its                      = false
+    asv_concatenate_reads                = "merge"
+    asv_minoverlap                       = 12
+    asv_maxmismatch                      = 0
+    asv_gap                              = -64
+    asv_match                            = 5
+    asv_mismatch                         = -6
+    asv_percentile_cutoff                = 0.001
+    cut_its                              = "none"
+    its_partial                          = 0
+    picrust                              = false
+    sbdiexport                           = false
+    addsh                                = false
+    tax_agglom_min                       = 2
+    tax_agglom_max                       = 6
+    min_read_counts                      = 1
+    ignore_failed_trimming               = false
+    ignore_empty_input_files             = false
+    qiime_adonis_formula                 = null
+    seed                                 = 100
+    filter_ssu                           = null
+    min_len_asv                          = null
+    max_len_asv                          = null
+    filter_codons                        = null
+    orf_start                            = 1
+    orf_end                              = null
+    stop_codons                          = "TAA,TAG"
+    pplace_tree                          = null
+    pplace_aln                           = null
+    pplace_model                         = null
+    pplace_alnmethod                     = 'hmmer'
+    pplace_taxonomy                      = null
+    pplace_name                          = null
+    diversity_rarefaction_depth          = 500
+    ancom_sample_min_count               = 1
+    vsearch_cluster                      = null
+    vsearch_cluster_id                   = 0.97
+    ancom                                = false
+    ancombc                              = false
+    ancombc_effect_size                  = 1
+    ancombc_significance                 = 0.05
+    ancombc_formula                      = null
+    ancombc_formula_reflvl               = null
 
     // Report options
     report_template   = "${projectDir}/assets/report_template.Rmd"

From e073f97be93d6adda426afac6f8cd3fb9ac3791c Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Thu, 21 Nov 2024 12:42:31 +0000
Subject: [PATCH 06/13] chore: update newly introduced parameters prefix to :
 "mergepairs_consensus_" and change "asv_concatenate_reads" to
 "mergepairs_strategy"

---
 CHANGELOG.md         |  20 +++----
 conf/modules.config  |  16 +++---
 nextflow.config      | 130 +++++++++++++++++++++----------------------
 nextflow_schema.json |  14 ++---
 4 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5b4c799c..e70b8e1a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,20 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
-- [#803](https://github.com/nf-core/ampliseq/pull/803) - New parameters introduced related to `--asv_concatenate_reads consensus`
+- [#803](https://github.com/nf-core/ampliseq/pull/803) - New parameters introduced related to `--mergepairs_strategy`. These parameters would only be effective if `--mergepairs_strategy consensus` is set.
 
-| **Parameter**             | **Description**                                                                           | **Default Value** |
-| ------------------------- | ----------------------------------------------------------------------------------------- | ----------------- |
-| **asv_match**             | The score assigned for each matching base pair during sequence alignment.                 | 1                 |
-| **asv_mismatch**          | The penalty score assigned for each mismatched base pair during sequence alignment.       | 0                 |
-| **asv_gap**               | The penalty score assigned for each gap introduced during sequence alignment.             | -64               |
-| **asv_minoverlap**        | The minimum number of overlapping base pairs required to merge forward and reverse reads. | 12                |
-| **asv_maxmismatch**       | The maximum number of mismatches allowed within the overlapping region for merging reads. | 0                 |
-| **asv_percentile_cutoff** | The percentile cutoff determining the minimum observed overlap in the dataset.            | 0.001             |
+| **Parameter**                              | **Description**                                                                           | **Default Value** |
+| ------------------------------------------ | ----------------------------------------------------------------------------------------- | ----------------- |
+| **mergepairs_consensus_match**             | The score assigned for each matching base pair during sequence alignment.                 | 5                 |
+| **mergepairs_consensus_mismatch**          | The penalty score assigned for each mismatched base pair during sequence alignment.       | -6                |
+| **mergepairs_consensus_gap**               | The penalty score assigned for each gap introduced during sequence alignment.             | -64               |
+| **mergepairs_consensus_minoverlap**        | The minimum number of overlapping base pairs required to merge forward and reverse reads. | 12                |
+| **mergepairs_consensus_maxmismatch**       | The maximum number of mismatches allowed within the overlapping region for merging reads. | 0                 |
+| **mergepairs_consensus_percentile_cutoff** | The percentile cutoff determining the minimum observed overlap in the dataset.            | 0.001             |
 
 ### `Changed`
 
-- [#803](https://github.com/nf-core/ampliseq/pull/803) - Changed DADA2_DENOISING : `--concatenate_reads` renaming to `--asv_concatenate_reads` ;  support new method named "consensus" by setting `--asv_concatenate_reads consensus` ; changed options of `--asv_concatenate_reads` from TRUE/FALSE (boolean) to ["merge", "concatenate", "consensus"].
+- [#803](https://github.com/nf-core/ampliseq/pull/803) - Changed DADA2_DENOISING : `--concatenate_reads` renaming to `--mergepairs_strategy` ; support new method named "consensus" by setting `--mergepairs_strategy consensus` ; changed options of `--mergepairs_strategy` from TRUE/FALSE (boolean) to ["merge", "concatenate", "consensus"].
 
 ### `Fixed`
 
diff --git a/conf/modules.config b/conf/modules.config
index 0c00faf3..ebb2ed18 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -233,11 +233,11 @@ process {
         // setting from https://rdrr.io/bioc/dada2/man/mergePairs.html & https://rdrr.io/bioc/dada2/man/nwalign.html & match = getDadaOpt("MATCH"), mismatch = getDadaOpt("MISMATCH"), gap = getDadaOpt("GAP_PENALTY"), missing from the list below is: 'band = -1'
         ext.args2 = [
             "homo_gap = NULL, endsfree = TRUE, vec = FALSE, propagateCol = character(0)",
-            params.asv_concatenate_reads == "consensus" ?
-                "returnRejects = TRUE, match = ${params.asv_match}, mismatch = ${params.asv_mismatch}, minOverlap = ${params.asv_minoverlap}, maxMismatch = ${params.asv_maxmismatch}, gap = ${params.asv_gap}" :
-                "justConcatenate = ${params.asv_concatenate_reads == 'concatenate' ? 'TRUE' : 'FALSE'}, returnRejects = FALSE, match = 1, mismatch = -64, gap = -64, minOverlap = 12, maxMismatch = 0"
+            params.mergepairs_strategy == "consensus" ?
+                "returnRejects = TRUE, match = ${params.mergepairs_consensus_match}, mismatch = ${params.mergepairs_consensus_mismatch}, minOverlap = ${params.mergepairs_consensus_minoverlap}, maxMismatch = ${params.mergepairs_consensus_maxmismatch}, gap = ${params.mergepairs_consensus_gap}" :
+                "justConcatenate = ${params.mergepairs_strategy == 'concatenate' ? 'TRUE' : 'FALSE'}, returnRejects = FALSE, match = 1, mismatch = -64, gap = -64, minOverlap = 12, maxMismatch = 0"
         ].join(',').replaceAll('(,)*$', "")
-        ext.quantile = "${params.asv_percentile_cutoff}"
+        ext.quantile = "${params.mergepairs_consensus_percentile_cutoff}"
         publishDir = [
             [
                 path: { "${params.outdir}/dada2/args" },
@@ -448,7 +448,7 @@ process {
         ext.min_len_asv = { params.min_len_asv }
         ext.max_len_asv = { params.max_len_asv }
         publishDir = [
-            path: { "${params.outdir}/asv_length_filter" },
+            path: { "${params.outdir}/mergepairs_consensus_length_filter" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
@@ -493,7 +493,7 @@ process {
         publishDir = [
             path: { "${params.outdir}/itsx" },
             mode: params.publish_dir_mode,
-            pattern: "ASV_*"
+            pattern: "mergepairs_consensus_*"
         ]
     }
 
@@ -501,7 +501,7 @@ process {
         publishDir = [
             path: { "${params.outdir}/dada2" },
             mode: params.publish_dir_mode,
-            pattern: "{ref_taxonomy*.txt,ASV_tax*.tsv,ASV_tax_species*.tsv}"
+            pattern: "{ref_taxonomy*.txt,mergepairs_consensus_tax*.tsv,mergepairs_consensus_tax_species*.tsv}"
         ]
     }
 
@@ -816,7 +816,7 @@ process {
         ]
     }
 
-    withName: 'QIIME2_INASV_BPAVG' {
+    withName: 'QIIME2_INmergepairs_consensus_BPAVG' {
         publishDir = [
             path: { "${params.outdir}/qiime2/barplot_average" },
             mode: params.publish_dir_mode,
diff --git a/nextflow.config b/nextflow.config
index 91dcba0d..5b2b2bd0 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -23,71 +23,71 @@ params {
     metadata                   = null
 
     // Other options
-    save_intermediates                   = false
-    trunc_qmin                           = 25
-    trunc_rmin                           = 0.75
-    trunclenf                            = null
-    trunclenr                            = null
-    max_ee                               = 2
-    max_len                              = null
-    ignore_failed_filtering              = false
-    min_len                              = 50
-    metadata_category                    = null
-    metadata_category_barplot            = null
-    double_primer                        = false
-    retain_untrimmed                     = false
-    cutadapt_min_overlap                 = 3
-    cutadapt_max_error_rate              = 0.1
-    exclude_taxa                         = "mitochondria,chloroplast"
-    min_frequency                        = 1
-    min_samples                          = 1
-    multiple_sequencing_runs             = false
-    single_end                           = false
-    sample_inference                     = "independent"
-    illumina_novaseq                     = false
-    illumina_pe_its                      = false
-    asv_concatenate_reads                = "merge"
-    asv_minoverlap                       = 12
-    asv_maxmismatch                      = 0
-    asv_gap                              = -64
-    asv_match                            = 5
-    asv_mismatch                         = -6
-    asv_percentile_cutoff                = 0.001
-    cut_its                              = "none"
-    its_partial                          = 0
-    picrust                              = false
-    sbdiexport                           = false
-    addsh                                = false
-    tax_agglom_min                       = 2
-    tax_agglom_max                       = 6
-    min_read_counts                      = 1
-    ignore_failed_trimming               = false
-    ignore_empty_input_files             = false
-    qiime_adonis_formula                 = null
-    seed                                 = 100
-    filter_ssu                           = null
-    min_len_asv                          = null
-    max_len_asv                          = null
-    filter_codons                        = null
-    orf_start                            = 1
-    orf_end                              = null
-    stop_codons                          = "TAA,TAG"
-    pplace_tree                          = null
-    pplace_aln                           = null
-    pplace_model                         = null
-    pplace_alnmethod                     = 'hmmer'
-    pplace_taxonomy                      = null
-    pplace_name                          = null
-    diversity_rarefaction_depth          = 500
-    ancom_sample_min_count               = 1
-    vsearch_cluster                      = null
-    vsearch_cluster_id                   = 0.97
-    ancom                                = false
-    ancombc                              = false
-    ancombc_effect_size                  = 1
-    ancombc_significance                 = 0.05
-    ancombc_formula                      = null
-    ancombc_formula_reflvl               = null
+    save_intermediates                        = false
+    trunc_qmin                                = 25
+    trunc_rmin                                = 0.75
+    trunclenf                                 = null
+    trunclenr                                 = null
+    max_ee                                    = 2
+    max_len                                   = null
+    ignore_failed_filtering                   = false
+    min_len                                   = 50
+    metadata_category                         = null
+    metadata_category_barplot                 = null
+    double_primer                             = false
+    retain_untrimmed                          = false
+    cutadapt_min_overlap                      = 3
+    cutadapt_max_error_rate                   = 0.1
+    exclude_taxa                              = "mitochondria,chloroplast"
+    min_frequency                             = 1
+    min_samples                               = 1
+    multiple_sequencing_runs                  = false
+    single_end                                = false
+    sample_inference                          = "independent"
+    illumina_novaseq                          = false
+    illumina_pe_its                           = false
+    mergepairs_strategy                       = "merge"
+    mergepairs_consensus_minoverlap           = 12
+    mergepairs_consensus_maxmismatch          = 0
+    mergepairs_consensus_gap                  = -64
+    mergepairs_consensus_match                = 5
+    mergepairs_consensus_mismatch             = -6
+    mergepairs_consensus_percentile_cutoff    = 0.001
+    cut_its                                   = "none"
+    its_partial                               = 0
+    picrust                                   = false
+    sbdiexport                                = false
+    addsh                                     = false
+    tax_agglom_min                            = 2
+    tax_agglom_max                            = 6
+    min_read_counts                           = 1
+    ignore_failed_trimming                    = false
+    ignore_empty_input_files                  = false
+    qiime_adonis_formula                      = null
+    seed                                      = 100
+    filter_ssu                                = null
+    min_len_asv                               = null
+    max_len_asv                               = null
+    filter_codons                             = null
+    orf_start                                 = 1
+    orf_end                                   = null
+    stop_codons                               = "TAA,TAG"
+    pplace_tree                               = null
+    pplace_aln                                = null
+    pplace_model                              = null
+    pplace_alnmethod                          = 'hmmer'
+    pplace_taxonomy                           = null
+    pplace_name                               = null
+    diversity_rarefaction_depth               = 500
+    ancom_sample_min_count                    = 1
+    vsearch_cluster                           = null
+    vsearch_cluster_id                        = 0.97
+    ancom                                     = false
+    ancombc                                   = false
+    ancombc_effect_size                       = 1
+    ancombc_significance                      = 0.05
+    ancombc_formula                           = null
+    ancombc_formula_reflvl                    = null
 
     // Report options
     report_template   = "${projectDir}/assets/report_template.Rmd"
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2a8f493f..ba402064 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -267,7 +267,7 @@
                         "pseudo"
                     ]
                 },
-                "asv_concatenate_reads": {
+                "mergepairs_strategy": {
                     "type": "string",
                     "default": "merge",
                     "description": "Strategy to merge paired end reads. When paired end reads are not sufficiently overlapping for merging, you can use \"concatenate\" (not recommended). When you have a mix of overlapping and non overlapping reads use \"consensus\"",
@@ -278,37 +278,37 @@
                         "consensus"
                     ]
                 },
-                "asv_match": {
+                "mergepairs_consensus_match": {
                     "type": "integer",
                     "default": 1,
                     "description": "The score assigned for each matching base pair during sequence alignment.",
                     "help_text": "This parameter specifies the numerical value added to the alignment score for every pair of bases that match between the forward and reverse reads. A higher value increases the preference for alignments with more matching bases."
                 },
-                "asv_mismatch": {
+                "mergepairs_consensus_mismatch": {
                     "type": "integer",
                     "default": -64,
                     "description": "The penalty score assigned for each mismatched base pair during sequence alignment.",
                     "help_text": "This parameter defines the numerical penalty subtracted from the alignment score for each base pair mismatch between the forward and reverse reads. A higher penalty reduces the likelihood of accepting alignments with mismatches."
                 },
-                "asv_gap": {
+                "mergepairs_consensus_gap": {
                     "type": "integer",
                     "default": -64,
                     "description": "The penalty score assigned for each gap introduced during sequence alignment.",
                     "help_text": "This parameter sets the numerical penalty subtracted from the alignment score for each gap (insertion or deletion) introduced to align the forward and reverse reads. A higher penalty discourages alignments that require gaps."
                 },
-                "asv_minoverlap": {
+                "mergepairs_consensus_minoverlap": {
                     "type": "integer",
                     "default": 12,
                     "description": "The minimum number of overlapping base pairs required to merge forward and reverse reads.",
                     "help_text": "This parameter specifies the smallest number of consecutive base pairs that must overlap between the forward and reverse reads for them to be merged. Ensuring sufficient overlap is crucial for accurate merging."
                 },
-                "asv_maxmismatch": {
+                "mergepairs_consensus_maxmismatch": {
                     "type": "integer",
                     "default": 0,
                     "description": "The maximum number of mismatches allowed within the overlapping region for merging reads.",
                     "help_text": "This parameter defines the highest number of mismatched base pairs permitted in the overlap region between forward and reverse reads for a successful merge. Setting this value helps control the stringency of read merging, balancing between sensitivity and accuracy."
                 },
-                "asv_percentile_cutoff": {
+                "mergepairs_consensus_percentile_cutoff": {
                     "type": "number",
                     "default": 0.001,
                     "description": "The percentile used to determine a stringent cutoff which will correspond to the minimum observed overlap in the dataset. This ensures that only read pairs with high overlap are merged into consensus sequences. Those with insufficient overlap are concatenated."

From a9ed8c4000dac8d95296d3b27f2ff5afe80d70e8 Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Thu, 21 Nov 2024 12:49:31 +0000
Subject: [PATCH 07/13] fix: fix wrongly replaced values

---
 conf/modules.config | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ebb2ed18..f6c94c46 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -448,7 +448,7 @@ process {
         ext.min_len_asv = { params.min_len_asv }
         ext.max_len_asv = { params.max_len_asv }
         publishDir = [
-            path: { "${params.outdir}/mergepairs_consensus_length_filter" },
+            path: { "${params.outdir}/asv_length_filter" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
@@ -493,7 +493,7 @@ process {
         publishDir = [
             path: { "${params.outdir}/itsx" },
             mode: params.publish_dir_mode,
-            pattern: "mergepairs_consensus_*"
+            pattern: "ASV_*"
         ]
     }
 
@@ -501,7 +501,7 @@ process {
         publishDir = [
             path: { "${params.outdir}/dada2" },
             mode: params.publish_dir_mode,
-            pattern: "{ref_taxonomy*.txt,mergepairs_consensus_tax*.tsv,mergepairs_consensus_tax_species*.tsv}"
+            pattern: "{ref_taxonomy*.txt,ASV_tax*.tsv,ASV_tax_species*.tsv}"
         ]
     }
 
@@ -816,7 +816,7 @@ process {
         ]
     }
 
-    withName: 'QIIME2_INmergepairs_consensus_BPAVG' {
+    withName: 'QIIME2_INASV_BPAVG' {
         publishDir = [
             path: { "${params.outdir}/qiime2/barplot_average" },
             mode: params.publish_dir_mode,

From 11e8c7e92e2b2860b253b5a758eb3f7b3eedd22b Mon Sep 17 00:00:00 2001
From: Daniel Straub <42973691+d4straub@users.noreply.github.com>
Date: Fri, 20 Dec 2024 16:15:13 +0100
Subject: [PATCH 08/13] Apply suggestions from code review

---
 conf/modules.config              | 2 +-
 modules/local/dada2_denoising.nf | 2 +-
 nextflow_schema.json             | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index f6c94c46..3f797b44 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -232,7 +232,7 @@ process {
         ].join(',').replaceAll('(,)*$', "")
         // setting from https://rdrr.io/bioc/dada2/man/mergePairs.html & https://rdrr.io/bioc/dada2/man/nwalign.html & match = getDadaOpt("MATCH"), mismatch = getDadaOpt("MISMATCH"), gap = getDadaOpt("GAP_PENALTY"), missing from the list below is: 'band = -1'
         ext.args2 = [
-            "homo_gap = NULL, endsfree = TRUE, vec = FALSE, propagateCol = character(0)",
+            "homo_gap = NULL, endsfree = TRUE, vec = FALSE, propagateCol = character(0), trimOverhang = FALSE",
             params.mergepairs_strategy == "consensus" ?
                 "returnRejects = TRUE, match = ${params.mergepairs_consensus_match}, mismatch = ${params.mergepairs_consensus_mismatch}, minOverlap = ${params.mergepairs_consensus_minoverlap}, maxMismatch = ${params.mergepairs_consensus_maxmismatch}, gap = ${params.mergepairs_consensus_gap}" :
                 "justConcatenate = ${params.mergepairs_strategy == 'concatenate' ? 'TRUE' : 'FALSE'}, returnRejects = FALSE, match = 1, mismatch = -64, gap = -64, minOverlap = 12, maxMismatch = 0"
diff --git a/modules/local/dada2_denoising.nf b/modules/local/dada2_denoising.nf
index 143bb9cb..4e77238b 100644
--- a/modules/local/dada2_denoising.nf
+++ b/modules/local/dada2_denoising.nf
@@ -47,7 +47,7 @@ process DADA2_DENOISING {
         sink(file = NULL)
 
         # merge
-        if ("${params.asv_concatenate_reads}" == "consensus") {
+        if ("${params.mergepairs_strategy}" == "consensus") {
             mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, justConcatenate = FALSE, verbose=TRUE)
             concats <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, justConcatenate = TRUE, verbose=TRUE)
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index f77790d3..5fdcbac8 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -280,13 +280,13 @@
                 },
                 "mergepairs_consensus_match": {
                     "type": "integer",
-                    "default": 1,
+                    "default": 5,
                     "description": "The score assigned for each matching base pair during sequence alignment.",
                     "help_text": "This parameter specifies the numerical value added to the alignment score for every pair of bases that match between the forward and reverse reads. A higher value increases the preference for alignments with more matching bases."
                 },
                 "mergepairs_consensus_mismatch": {
                     "type": "integer",
-                    "default": -64,
+                    "default": -6,
                     "description": "The penalty score assigned for each mismatched base pair during sequence alignment.",
                     "help_text": "This parameter defines the numerical penalty subtracted from the alignment score for each base pair mismatch between the forward and reverse reads. A higher penalty reduces the likelihood of accepting alignments with mismatches."
                 },

From 57b7a243a780d99870879260f8fd9620fc7749f4 Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Fri, 20 Dec 2024 15:25:13 +0000
Subject: [PATCH 09/13] [automated] Fix code linting

---
 nextflow_schema.json | 55 ++++++++------------------------------------
 1 file changed, 9 insertions(+), 46 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 9d6cd5e9..313f5d7e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -82,9 +82,7 @@
                     "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
                 }
             },
-            "required": [
-                "outdir"
-            ],
+            "required": ["outdir"],
             "fa_icon": "fas fa-terminal"
         },
         "sequencing_input": {
@@ -261,22 +259,14 @@
                     "default": "independent",
                     "help_text": "If samples are treated independent (lowest sensitivity and lowest resources), pooled (highest sensitivity and resources) or pseudo-pooled (balance between required resources and sensitivity).",
                     "description": "Mode of sample inference: \"independent\", \"pooled\" or \"pseudo\"",
-                    "enum": [
-                        "independent",
-                        "pooled",
-                        "pseudo"
-                    ]
+                    "enum": ["independent", "pooled", "pseudo"]
                 },
                 "mergepairs_strategy": {
                     "type": "string",
                     "default": "merge",
                     "description": "Strategy to merge paired end reads. When paired end reads are not sufficiently overlapping for merging, you can use \"concatenate\" (not recommended). When you have a mix of overlapping and non overlapping reads use \"consensus\"",
                     "help_text": "This parameters specifies that paired-end reads are not merged after denoising but concatenated (separated by 10 N's). This is of advantage when an amplicon was sequenced that is too long for merging (i.e. bad experimental design). This is an alternative to only analyzing the forward or reverse read in case of non-overlapping paired-end sequencing data.\n\n**This parameter is not recommended! Only if all other options fail.**",
-                    "enum": [
-                        "merge",
-                        "concatenate",
-                        "consensus"
-                    ]
+                    "enum": ["merge", "concatenate", "consensus"]
                 },
                 "mergepairs_consensus_match": {
                     "type": "integer",
@@ -495,10 +485,7 @@
                     "type": "string",
                     "description": "Method used for alignment, \"hmmer\" or \"mafft\"",
                     "default": "hmmer",
-                    "enum": [
-                        "hmmer",
-                        "mafft"
-                    ]
+                    "enum": ["hmmer", "mafft"]
                 },
                 "pplace_taxonomy": {
                     "type": "string",
@@ -514,13 +501,7 @@
                     "type": "string",
                     "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.",
                     "description": "Name of supported database, and optionally also version number",
-                    "enum": [
-                        "silva=138",
-                        "silva",
-                        "greengenes85",
-                        "greengenes2",
-                        "greengenes2=2022.10"
-                    ]
+                    "enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"]
                 },
                 "qiime_ref_tax_custom": {
                     "type": "string",
@@ -595,12 +576,7 @@
                     "help_text": "If data is long read ITS sequences, that need to be cut to ITS region (full ITS, only ITS1, or only ITS2) for taxonomy assignment.",
                     "description": "Part of ITS region to use for taxonomy assignment: \"full\", \"its1\", or \"its2\"",
                     "default": "none",
-                    "enum": [
-                        "none",
-                        "full",
-                        "its1",
-                        "its2"
-                    ]
+                    "enum": ["none", "full", "its1", "its2"]
                 },
                 "its_partial": {
                     "type": "integer",
@@ -620,13 +596,7 @@
                     "type": "string",
                     "help_text": "",
                     "description": "Name of supported database, and optionally also version number",
-                    "enum": [
-                        "silva",
-                        "silva=128",
-                        "greengenes",
-                        "greengenes=13_8",
-                        "greengenes88"
-                    ]
+                    "enum": ["silva", "silva=128", "greengenes", "greengenes=13_8", "greengenes88"]
                 },
                 "sidle_ref_tax_custom": {
                     "type": "string",
@@ -899,14 +869,7 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": [
-                        "symlink",
-                        "rellink",
-                        "link",
-                        "copy",
-                        "copyNoFollow",
-                        "move"
-                    ],
+                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                     "hidden": true
                 },
                 "email_on_fail": {
@@ -1080,4 +1043,4 @@
             "$ref": "#/$defs/institutional_config_options"
         }
     ]
-}
\ No newline at end of file
+}

From f8a44ad6638f30b982d5c8501ba66aad850919a0 Mon Sep 17 00:00:00 2001
From: Daniel Straub <42973691+d4straub@users.noreply.github.com>
Date: Fri, 20 Dec 2024 16:34:44 +0100
Subject: [PATCH 10/13] Update nextflow_schema.json

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 313f5d7e..04388305 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -265,7 +265,7 @@
                     "type": "string",
                     "default": "merge",
                     "description": "Strategy to merge paired end reads. When paired end reads are not sufficiently overlapping for merging, you can use \"concatenate\" (not recommended). When you have a mix of overlapping and non overlapping reads use \"consensus\"",
-                    "help_text": "This parameters specifies that paired-end reads are not merged after denoising but concatenated (separated by 10 N's). This is of advantage when an amplicon was sequenced that is too long for merging (i.e. bad experimental design). This is an alternative to only analyzing the forward or reverse read in case of non-overlapping paired-end sequencing data.\n\n**This parameter is not recommended! Only if all other options fail.**",
+                    "help_text": "This parameters specifies how paired-end reads are merged after denoising. By default, read pairs will be merged by overlap. Concatenating read pairs (separated by 10 N's) is an alternative to only analyzing the forward or reverse read in case of non-overlapping paired-end sequencing data, this is not recommended, only if all other options fail. The consensus strategy is merging read pairs by overlap if possible and concatenates non-overlapping read pairs, based on `--mergepairs_consensus_*` parameters.",
                     "enum": ["merge", "concatenate", "consensus"]
                 },
                 "mergepairs_consensus_match": {

From 871703b88ef9b4e8bac32ac26545b8f51263dd1e Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Tue, 14 Jan 2025 14:46:30 +0000
Subject: [PATCH 11/13] chore: update default values for consensus

---
 CHANGELOG.md         | 6 +++---
 nextflow.config      | 6 +++---
 nextflow_schema.json | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d5f540d3..8b8d0d21 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,9 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 | **Parameter**                              | **Description**                                                                           | **Default Value** |
 | ------------------------------------------ | ----------------------------------------------------------------------------------------- | ----------------- |
-| **mergepairs_consensus_match**             | The score assigned for each matching base pair during sequence alignment.                 | 5                 |
-| **mergepairs_consensus_mismatch**          | The penalty score assigned for each mismatched base pair during sequence alignment.       | -6                |
-| **mergepairs_consensus_gap**               | The penalty score assigned for each gap introduced during sequence alignment.             | -64               |
+| **mergepairs_consensus_match**             | The score assigned for each matching base pair during sequence alignment.                 | 1                 |
+| **mergepairs_consensus_mismatch**          | The penalty score assigned for each mismatched base pair during sequence alignment.       | -2                |
+| **mergepairs_consensus_gap**               | The penalty score assigned for each gap introduced during sequence alignment.             | -4                |
 | **mergepairs_consensus_minoverlap**        | The minimum number of overlapping base pairs required to merge forward and reverse reads. | 12                |
 | **mergepairs_consensus_maxmismatch**       | The maximum number of mismatches allowed within the overlapping region for merging reads. | 0                 |
 | **mergepairs_consensus_percentile_cutoff** | The percentile cutoff determining the minimum observed overlap in the dataset.            | 0.001             |
diff --git a/nextflow.config b/nextflow.config
index 477d0e43..50126233 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -49,9 +49,9 @@ params {
     mergepairs_strategy                       = "merge"
     mergepairs_consensus_minoverlap           = 12
     mergepairs_consensus_maxmismatch          = 0
-    mergepairs_consensus_gap                  = -64
-    mergepairs_consensus_match                = 5
-    mergepairs_consensus_mismatch             = -6
+    mergepairs_consensus_match                = 1
+    mergepairs_consensus_mismatch             = -2
+    mergepairs_consensus_gap                  = -4
     mergepairs_consensus_percentile_cutoff    = 0.001
     cut_its                                   = "none"
     its_partial                               = 0
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4d280394..058500fd 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -276,13 +276,13 @@
                 },
                 "mergepairs_consensus_mismatch": {
                     "type": "integer",
-                    "default": -6,
+                    "default": -2,
                     "description": "The penalty score assigned for each mismatched base pair during sequence alignment.",
                     "help_text": "This parameter defines the numerical penalty subtracted from the alignment score for each base pair mismatch between the forward and reverse reads. A higher penalty reduces the likelihood of accepting alignments with mismatches."
                 },
                 "mergepairs_consensus_gap": {
                     "type": "integer",
-                    "default": -64,
+                    "default": -4,
                     "description": "The penalty score assigned for each gap introduced during sequence alignment.",
                     "help_text": "This parameter sets the numerical penalty subtracted from the alignment score for each gap (insertion or deletion) introduced to align the forward and reverse reads. A higher penalty discourages alignments that require gaps."
                 },

From 845aac4ed86c6b07e180a37022cb1d6e4969a9fc Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Tue, 14 Jan 2025 14:49:11 +0000
Subject: [PATCH 12/13] chore: update default values for consensus (missing
 match in nextflow_schema.json)

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 058500fd..65d31c68 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -270,7 +270,7 @@
                 },
                 "mergepairs_consensus_match": {
                     "type": "integer",
-                    "default": 5,
+                    "default": 1,
                     "description": "The score assigned for each matching base pair during sequence alignment.",
                     "help_text": "This parameter specifies the numerical value added to the alignment score for every pair of bases that match between the forward and reverse reads. A higher value increases the preference for alignments with more matching bases."
                 },

From 2cbab9efe4b8e329b7565a901d7e899cdca5f82d Mon Sep 17 00:00:00 2001
From: Daniel Straub <42973691+d4straub@users.noreply.github.com>
Date: Fri, 24 Jan 2025 15:31:57 +0100
Subject: [PATCH 13/13] Update modules/local/dada2_denoising.nf

---
 modules/local/dada2_denoising.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/dada2_denoising.nf b/modules/local/dada2_denoising.nf
index 4e77238b..d39988de 100644
--- a/modules/local/dada2_denoising.nf
+++ b/modules/local/dada2_denoising.nf
@@ -87,7 +87,7 @@ process DADA2_DENOISING {
             mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, $args2, verbose=TRUE)
         }
 
-        saveRDS(mergers, "${meta.run}.mergers.rds")
+        saveRDS(mergers, "${prefix}.mergers.rds")
 
         # make table
         seqtab <- makeSequenceTable(mergers)