Refactor: Rework sample channel creation

Ferlab-Ste-Justine · Jul 10, 2024 · 6063676 · 6063676
1 parent 35a672c
commit 6063676
Show file tree

Hide file tree

Showing 6 changed files with 24 additions and 218 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 Initial release of ferlab/postprocessing, created with the [nf-core](https://nf-co.re/) template.
 
 ### `Added`
-[#2]https://github.com/FelixAntoineLeSieur/Post-processing-Pipeline/pull/2 Added tests and samplefile channel functions
+[#2](https://github.com/FelixAntoineLeSieur/Post-processing-Pipeline/pull/2) Added tests and samplefile channel functions
 
 ### `Fixed`
 [#1](https://github.com/FelixAntoineLeSieur/Post-processing-Pipeline/pull/1) Fixed template schemas

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -1,5 +1,5 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/ferlab/postprocessing/releases/tag/1.0.1" target="_blank">ferlab/postprocessing</a>
+  This report has been generated by the <a href="https://github.com/ferlab/postprocessing/releases/tag/1.0.2" target="_blank">ferlab/postprocessing</a>
   analysis pipeline.
 report_section_order:
   "ferlab-postprocessing-methods-description":

diff --git a/main.nf b/main.nf
@@ -89,20 +89,14 @@ workflow {
     FERLAB_POSTPROCESSING (
         PIPELINE_INITIALISATION.out.samplesheet
     )
-
+*/
     //
     // SUBWORKFLOW: Run completion tasks
     //
     PIPELINE_COMPLETION (
-        params.email,
-        params.email_on_fail,
-        params.plaintext_email,
         params.outdir,
         params.monochrome_logs,
-        params.hook_url,
-        FERLAB_POSTPROCESSING.out.multiqc_report
     )
-    */
 }
 
 /*

diff --git a/nextflow.config b/nextflow.config
@@ -239,7 +239,7 @@ manifest {
     description     = """Variant analysis for genome and exome GVCFs"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '1.0.1'
+    version         = '1.0.2'
     doi             = ''
 }
 

diff --git a/subworkflows/local/utils_nfcore_postprocessing_pipeline/main.nf b/subworkflows/local/utils_nfcore_postprocessing_pipeline/main.nf
@@ -78,55 +78,23 @@ workflow PIPELINE_INITIALISATION {
     validateInputParameters()
 
         //_________Local___________
-    def rowMapper = getRowMapper()
-
+    //
+    // Create channel from input file provided through params.input
+    //
     Channel.fromPath(file("$params.input"))
         .splitCsv(sep: '\t', strip: true)
-        .view{"Split CSV: $it" }
-        .map(rowMapper)
-        .view{"rowMapper: $it" }
+        .map{rowMapperV2(it)}
         .flatMap { it ->
             return it.files.collect{f -> [familyId: it.familyId, sequencingType: it.sequencingType, size: it.files.size(), file: f]};
         }.multiMap { it ->
             meta: tuple(it.familyId, [size: it.size, sequencingType: it.sequencingType])
             files: tuple(it.familyId, file("${it.file}*"))
         }
-        .set { sampleChannel}
+        .set { ch_sampleChannel}
         emit:
-        sampleFiles = sampleChannel.files
-        sampleMeta = sampleChannel.meta
+        sampleFiles = ch_sampleChannel.files
+        sampleMeta = ch_sampleChannel.meta
         versions = ch_versions
-
-    sampleMeta | view{"Meta: $it"}
-    sampleFiles | view{"files: $it"}
-    //
-    // Create channel from input file provided through params.input
-    //
-    /*
-    Channel
-        .fromSamplesheet("input")
-        .map {
-            meta, fastq_1, fastq_2 ->
-                if (!fastq_2) {
-                    return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
-                } else {
-                    return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
-                }
-        }
-        .groupTuple()
-        .map {
-            validateInputSamplesheet(it)
-        }
-        .map {
-            meta, fastqs ->
-                return [ meta, fastqs.flatten() ]
-        }
-        .set { ch_samplesheet }
-
-    emit:
-    samplesheet = ch_samplesheet
-    versions    = ch_versions
-    */
 }
 
 /*
@@ -138,31 +106,18 @@ workflow PIPELINE_INITIALISATION {
 workflow PIPELINE_COMPLETION {
 
     take:
-    email           //  string: email address
-    email_on_fail   //  string: email address sent on pipeline failure
-    plaintext_email // boolean: Send plain-text email instead of HTML
     outdir          //    path: Path to output directory where results will be published
     monochrome_logs // boolean: Disable ANSI colour codes in log output
-    hook_url        //  string: hook URL for notifications
-    multiqc_report  //  string: Path to MultiQC report
 
     main:
 
     summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
 
     //
-    // Completion email and summary
+    // Completion summary
     //
     workflow.onComplete {
-        if (email || email_on_fail) {
-            completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList())
-        }
-
         completionSummary(monochrome_logs)
-
-        if (hook_url) {
-            imNotification(summary_params, hook_url)
-        }
     }
 
     workflow.onError {
@@ -180,93 +135,31 @@ workflow PIPELINE_COMPLETION {
 enum SequencingType {
     WGS,
     WES
-}
-
-enum SampleFileFormat {
-    V1,
-    V2
-}
-
-def findParamInEnum(paramName, paramValue, enumInstance) {
-    def validValues = enumInstance.values()*.name()
-    if (!validValues.contains(paramValue)) {
-        def validValuesStr = validValues.collect{"`$it`"}.join(", ")
-        error("Invalid value for parameter `$paramName`: `$paramValue`. Possible values are: $validValuesStr")
-    }
-    return enumInstance.valueOf(paramValue)
-}
-def getSampleFileFormat() {
-    if (!params.sampleFileFormat) {
-        log.warn("Using default value `V1` for parameter `sampleFileFormat`")
-        params.sampleFileFormat="V1"
-    }
-    return findParamInEnum("sampleFileFormat", params.sampleFileFormat.toUpperCase(), SampleFileFormat)
-}
-
-def getSequencingType() {
-    if (!params.sequencingType) {
-        log.warn("Using default value `WGS` for parameter `sequencingType`")
-        params.sequencingType="WGS"
-    }
-    return findParamInEnum("sequencingType", params.sequencingType.toUpperCase(), SequencingType)
-}
-
-
-/**
-Get row mapper that match the configured sample file format
-
-Note: it is returned as a closure to guaranty the compatibility with nextflow channel operators
-*/
-def getRowMapper() {
-    def format = getSampleFileFormat()
-
-    if (format == SampleFileFormat.V1) {
-        def sequencingType = getSequencingType()
-        return {columns -> rowMapperV1(columns, sequencingType)}
-    }
-    return {columns -> rowMapperV2(columns)}
-}
 
-
-//Transform a row from the sample file in V1 format from a list structure to a map structure.
-def rowMapperV1(columns, sequencingType) {
-    if ((columns[1] == "WGS") || (columns[1] == "WES")){
-        error("Error: SampleFileFormat stated as V1 (possibly by default), \
-however V2 format seems to be in use. \n Please check the sample file and use\
---sampleFileFormat V2 as needed")
-        exit(0)
+    public static boolean contains(String s) {
+        for(SequencingType sequencingType in SequencingType.values()){
+            if(sequencingType.name().equals(s)){
+                return true
+            }
+        }
+        return false
     }
-    print(columns[1])
-    return [
-        familyId: columns[0],
-        sequencingType: sequencingType,
-        files: columns.tail()
-    ]
 }
 
-
 //Transform a row from the sample file in V2 format from a list structure to a map structure
 def rowMapperV2(columns) {
-    def sampleSeqType = columns[1]
-    if ((sampleSeqType != "WGS") && (sampleSeqType != "WES")){
-        error("Error: SampleFileFormat stated as V2, \
-however V2 format is not respected \n Please check the sample file and use \
---sampleFileFormat V1 as needed")
-        exit(0)
-    }
-    if (sampleSeqType != params.sequencingType){
-        error("Error: sequencingType stated as '$params.sequencingType', however sample file states '$sampleSeqType'. \
-Please make sure to input the correct type as --sequencingType")
-        exit(0)
+    def sampleSeqType = columns[1].toUpperCase()
+    if (!(SequencingType.contains(sampleSeqType))){
+        error("Error: Second column of the sample sheet should be either 'WES' or 'WGS'")
+        exit(1)
     }
     return [
         familyId: columns[0],
-        sequencingType: columns[1].toUpperCase() as SequencingType,
+        sequencingType: sampleSeqType.toUpperCase() as SequencingType,
         files: columns[2..-1]
     ]
 }
 
-
 //_____________Template functions_____________
 //
 // Check and validate pipeline parameters
@@ -275,22 +168,6 @@ def validateInputParameters() {
     genomeExistsError()
 }
 
-//
-// Validate channels from input samplesheet
-//
-/*
-def validateInputSamplesheet(input) {
-    def (metas, fastqs) = input[1..2]
-
-    // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
-    def endedness_ok = metas.collect{ it.single_end }.unique().size == 1
-    if (!endedness_ok) {
-        error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
-    }
-
-    return [ metas[0], fastqs ]
-}
-*/
 //
 // Get attribute from genome config file e.g. fasta
 //
@@ -316,67 +193,3 @@ def genomeExistsError() {
         error(error_string)
     }
 }
-
-//
-// Generate methods description for MultiQC
-//
-def toolCitationText() {
-    // TODO nf-core: Optionally add in-text citation tools to this list.
-    // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "",
-    // Uncomment function in methodsDescriptionText to render in MultiQC report
-    def citation_text = [
-            "Tools used in the workflow included:",
-            "FastQC (Andrews 2010),",
-            "MultiQC (Ewels et al. 2016)",
-            "."
-        ].join(' ').trim()
-
-    return citation_text
-}
-
-def toolBibliographyText() {
-    // TODO nf-core: Optionally add bibliographic entries to this list.
-    // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "<li>Author (2023) Pub name, Journal, DOI</li>" : "",
-    // Uncomment function in methodsDescriptionText to render in MultiQC report
-    def reference_text = [
-            "<li>Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).</li>",
-            "<li>Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354</li>"
-        ].join(' ').trim()
-
-    return reference_text
-}
-
-def methodsDescriptionText(mqc_methods_yaml) {
-    // Convert  to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file
-    def meta = [:]
-    meta.workflow = workflow.toMap()
-    meta["manifest_map"] = workflow.manifest.toMap()
-
-    // Pipeline DOI
-    if (meta.manifest_map.doi) {
-        // Using a loop to handle multiple DOIs
-        // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
-        // Removing ` ` since the manifest.doi is a string and not a proper list
-        def temp_doi_ref = ""
-        String[] manifest_doi = meta.manifest_map.doi.tokenize(",")
-        for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: <a href=\'https://doi.org/${doi_ref.replace("https://doi.org/", "").replace(" ", "")}\'>${doi_ref.replace("https://doi.org/", "").replace(" ", "")}</a>), "
-        meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2)
-    } else meta["doi_text"] = ""
-    meta["nodoi_text"] = meta.manifest_map.doi ? "" : "<li>If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. </li>"
-
-    // Tool references
-    meta["tool_citations"] = ""
-    meta["tool_bibliography"] = ""
-
-    // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled!
-    // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".")
-    // meta["tool_bibliography"] = toolBibliographyText()
-
-
-    def methods_text = mqc_methods_yaml.text
-
-    def engine =  new groovy.text.SimpleTemplateEngine()
-    def description_html = engine.createTemplate(methods_text).make(meta)
-
-    return description_html.toString()
-}
diff --git a/workflows/postprocessing.nf b/workflows/postprocessing.nf
@@ -9,7 +9,6 @@ include { MULTIQC                } from '../modules/nf-core/multiqc/main'
 include { paramsSummaryMap       } from 'plugin/nf-validation'
 include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~