Skip to content

Commit

Permalink
Refactor: Rework sample channel creation
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixAntoineLeSieur committed Jul 10, 2024
1 parent 35a672c commit 6063676
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 218 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Initial release of ferlab/postprocessing, created with the [nf-core](https://nf-co.re/) template.

### `Added`
[#2]https://github.com/FelixAntoineLeSieur/Post-processing-Pipeline/pull/2 Added tests and samplefile channel functions
[#2](https://github.com/FelixAntoineLeSieur/Post-processing-Pipeline/pull/2) Added tests and samplefile channel functions

### `Fixed`
[#1](https://github.com/FelixAntoineLeSieur/Post-processing-Pipeline/pull/1) Fixed template schemas
Expand Down
2 changes: 1 addition & 1 deletion assets/multiqc_config.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
report_comment: >
This report has been generated by the <a href="https://github.com/ferlab/postprocessing/releases/tag/1.0.1" target="_blank">ferlab/postprocessing</a>
This report has been generated by the <a href="https://github.com/ferlab/postprocessing/releases/tag/1.0.2" target="_blank">ferlab/postprocessing</a>
analysis pipeline.
report_section_order:
"ferlab-postprocessing-methods-description":
Expand Down
8 changes: 1 addition & 7 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -89,20 +89,14 @@ workflow {
FERLAB_POSTPROCESSING (
PIPELINE_INITIALISATION.out.samplesheet
)
*/
//
// SUBWORKFLOW: Run completion tasks
//
PIPELINE_COMPLETION (
params.email,
params.email_on_fail,
params.plaintext_email,
params.outdir,
params.monochrome_logs,
params.hook_url,
FERLAB_POSTPROCESSING.out.multiqc_report
)
*/
}

/*
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ manifest {
description = """Variant analysis for genome and exome GVCFs"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '1.0.1'
version = '1.0.2'
doi = ''
}

Expand Down
227 changes: 20 additions & 207 deletions subworkflows/local/utils_nfcore_postprocessing_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -78,55 +78,23 @@ workflow PIPELINE_INITIALISATION {
validateInputParameters()

//_________Local___________
def rowMapper = getRowMapper()

//
// Create channel from input file provided through params.input
//
Channel.fromPath(file("$params.input"))
.splitCsv(sep: '\t', strip: true)
.view{"Split CSV: $it" }
.map(rowMapper)
.view{"rowMapper: $it" }
.map{rowMapperV2(it)}
.flatMap { it ->
return it.files.collect{f -> [familyId: it.familyId, sequencingType: it.sequencingType, size: it.files.size(), file: f]};
}.multiMap { it ->
meta: tuple(it.familyId, [size: it.size, sequencingType: it.sequencingType])
files: tuple(it.familyId, file("${it.file}*"))
}
.set { sampleChannel}
.set { ch_sampleChannel}
emit:
sampleFiles = sampleChannel.files
sampleMeta = sampleChannel.meta
sampleFiles = ch_sampleChannel.files
sampleMeta = ch_sampleChannel.meta
versions = ch_versions

sampleMeta | view{"Meta: $it"}
sampleFiles | view{"files: $it"}
//
// Create channel from input file provided through params.input
//
/*
Channel
.fromSamplesheet("input")
.map {
meta, fastq_1, fastq_2 ->
if (!fastq_2) {
return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
} else {
return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
}
}
.groupTuple()
.map {
validateInputSamplesheet(it)
}
.map {
meta, fastqs ->
return [ meta, fastqs.flatten() ]
}
.set { ch_samplesheet }
emit:
samplesheet = ch_samplesheet
versions = ch_versions
*/
}

/*
Expand All @@ -138,31 +106,18 @@ workflow PIPELINE_INITIALISATION {
workflow PIPELINE_COMPLETION {

take:
email // string: email address
email_on_fail // string: email address sent on pipeline failure
plaintext_email // boolean: Send plain-text email instead of HTML
outdir // path: Path to output directory where results will be published
monochrome_logs // boolean: Disable ANSI colour codes in log output
hook_url // string: hook URL for notifications
multiqc_report // string: Path to MultiQC report

main:

summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")

//
// Completion email and summary
// Completion summary
//
workflow.onComplete {
if (email || email_on_fail) {
completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList())
}

completionSummary(monochrome_logs)

if (hook_url) {
imNotification(summary_params, hook_url)
}
}

workflow.onError {
Expand All @@ -180,93 +135,31 @@ workflow PIPELINE_COMPLETION {
enum SequencingType {
WGS,
WES
}

enum SampleFileFormat {
V1,
V2
}

def findParamInEnum(paramName, paramValue, enumInstance) {
def validValues = enumInstance.values()*.name()
if (!validValues.contains(paramValue)) {
def validValuesStr = validValues.collect{"`$it`"}.join(", ")
error("Invalid value for parameter `$paramName`: `$paramValue`. Possible values are: $validValuesStr")
}
return enumInstance.valueOf(paramValue)
}
def getSampleFileFormat() {
if (!params.sampleFileFormat) {
log.warn("Using default value `V1` for parameter `sampleFileFormat`")
params.sampleFileFormat="V1"
}
return findParamInEnum("sampleFileFormat", params.sampleFileFormat.toUpperCase(), SampleFileFormat)
}

def getSequencingType() {
if (!params.sequencingType) {
log.warn("Using default value `WGS` for parameter `sequencingType`")
params.sequencingType="WGS"
}
return findParamInEnum("sequencingType", params.sequencingType.toUpperCase(), SequencingType)
}


/**
Get row mapper that match the configured sample file format
Note: it is returned as a closure to guaranty the compatibility with nextflow channel operators
*/
def getRowMapper() {
def format = getSampleFileFormat()

if (format == SampleFileFormat.V1) {
def sequencingType = getSequencingType()
return {columns -> rowMapperV1(columns, sequencingType)}
}
return {columns -> rowMapperV2(columns)}
}


//Transform a row from the sample file in V1 format from a list structure to a map structure.
def rowMapperV1(columns, sequencingType) {
if ((columns[1] == "WGS") || (columns[1] == "WES")){
error("Error: SampleFileFormat stated as V1 (possibly by default), \
however V2 format seems to be in use. \n Please check the sample file and use\
--sampleFileFormat V2 as needed")
exit(0)
public static boolean contains(String s) {
for(SequencingType sequencingType in SequencingType.values()){
if(sequencingType.name().equals(s)){
return true
}
}
return false
}
print(columns[1])
return [
familyId: columns[0],
sequencingType: sequencingType,
files: columns.tail()
]
}


//Transform a row from the sample file in V2 format from a list structure to a map structure
def rowMapperV2(columns) {
def sampleSeqType = columns[1]
if ((sampleSeqType != "WGS") && (sampleSeqType != "WES")){
error("Error: SampleFileFormat stated as V2, \
however V2 format is not respected \n Please check the sample file and use \
--sampleFileFormat V1 as needed")
exit(0)
}
if (sampleSeqType != params.sequencingType){
error("Error: sequencingType stated as '$params.sequencingType', however sample file states '$sampleSeqType'. \
Please make sure to input the correct type as --sequencingType")
exit(0)
def sampleSeqType = columns[1].toUpperCase()
if (!(SequencingType.contains(sampleSeqType))){
error("Error: Second column of the sample sheet should be either 'WES' or 'WGS'")
exit(1)
}
return [
familyId: columns[0],
sequencingType: columns[1].toUpperCase() as SequencingType,
sequencingType: sampleSeqType.toUpperCase() as SequencingType,
files: columns[2..-1]
]
}


//_____________Template functions_____________
//
// Check and validate pipeline parameters
Expand All @@ -275,22 +168,6 @@ def validateInputParameters() {
genomeExistsError()
}

//
// Validate channels from input samplesheet
//
/*
def validateInputSamplesheet(input) {
def (metas, fastqs) = input[1..2]
// Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
def endedness_ok = metas.collect{ it.single_end }.unique().size == 1
if (!endedness_ok) {
error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
}
return [ metas[0], fastqs ]
}
*/
//
// Get attribute from genome config file e.g. fasta
//
Expand All @@ -316,67 +193,3 @@ def genomeExistsError() {
error(error_string)
}
}

//
// Generate methods description for MultiQC
//
def toolCitationText() {
// TODO nf-core: Optionally add in-text citation tools to this list.
// Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "",
// Uncomment function in methodsDescriptionText to render in MultiQC report
def citation_text = [
"Tools used in the workflow included:",
"FastQC (Andrews 2010),",
"MultiQC (Ewels et al. 2016)",
"."
].join(' ').trim()

return citation_text
}

def toolBibliographyText() {
// TODO nf-core: Optionally add bibliographic entries to this list.
// Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "<li>Author (2023) Pub name, Journal, DOI</li>" : "",
// Uncomment function in methodsDescriptionText to render in MultiQC report
def reference_text = [
"<li>Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).</li>",
"<li>Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354</li>"
].join(' ').trim()

return reference_text
}

def methodsDescriptionText(mqc_methods_yaml) {
// Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file
def meta = [:]
meta.workflow = workflow.toMap()
meta["manifest_map"] = workflow.manifest.toMap()

// Pipeline DOI
if (meta.manifest_map.doi) {
// Using a loop to handle multiple DOIs
// Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
// Removing ` ` since the manifest.doi is a string and not a proper list
def temp_doi_ref = ""
String[] manifest_doi = meta.manifest_map.doi.tokenize(",")
for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: <a href=\'https://doi.org/${doi_ref.replace("https://doi.org/", "").replace(" ", "")}\'>${doi_ref.replace("https://doi.org/", "").replace(" ", "")}</a>), "
meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2)
} else meta["doi_text"] = ""
meta["nodoi_text"] = meta.manifest_map.doi ? "" : "<li>If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. </li>"

// Tool references
meta["tool_citations"] = ""
meta["tool_bibliography"] = ""

// TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled!
// meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".")
// meta["tool_bibliography"] = toolBibliographyText()


def methods_text = mqc_methods_yaml.text

def engine = new groovy.text.SimpleTemplateEngine()
def description_html = engine.createTemplate(methods_text).make(meta)

return description_html.toString()
}
1 change: 0 additions & 1 deletion workflows/postprocessing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { paramsSummaryMap } from 'plugin/nf-validation'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down

0 comments on commit 6063676

Please sign in to comment.