diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 379d2fa..681a935 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -3,6 +3,7 @@ // include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' workflow INPUT_CHECK { @@ -11,20 +12,35 @@ workflow INPUT_CHECK { main: + ch_versions = Channel.empty() + + // Read the samplesheet SAMPLESHEET_CHECK ( samplesheet ).csv | splitCsv ( header:true, sep:',' ) - | map { create_data_channel( it ) } + // Prepare the channel for SAMTOOLS_FLAGSTAT + | map { row -> [row + [id: file(row.datafile).baseName], file(row.datafile, checkIfExists: true), []] } + | set { samplesheet_rows } + ch_versions = ch_versions.mix ( SAMPLESHEET_CHECK.out.versions.first() ) + + // Get stats from each input file + SAMTOOLS_FLAGSTAT ( samplesheet_rows ) + ch_versions = ch_versions.mix ( SAMTOOLS_FLAGSTAT.out.versions.first() ) + + // Create the read channel for the rest of the pipeline + samplesheet_rows + | join( SAMTOOLS_FLAGSTAT.out.flagstat ) + | map { meta, datafile, dummy, stats -> create_data_channel( meta, datafile, stats ) } | set { reads } emit: reads // channel: [ val(meta), /path/to/datafile ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } // Function to get list of [ meta, reads ] -def create_data_channel ( LinkedHashMap row ) { +def create_data_channel ( LinkedHashMap row, datafile, stats ) { // create meta map def meta = [:] meta.id = row.sample @@ -39,13 +55,15 @@ def create_data_channel ( LinkedHashMap row ) { } meta.read_group = "\'@RG\\tID:" + row.datafile.split('/')[-1].split('\\.')[0] + "\\tPL:" + platform + "\\tSM:" + meta.id.split('_')[0..-2].join('_') + "\'" - - // add path(s) of the read file(s) to the meta map - def data_meta = [] - if ( !file(row.datafile).exists() ) { - exit 1, "ERROR: Please check input samplesheet -> Data file does not exist!\n${row.datafile}" - } else { - data_meta = [ meta, file(row.datafile) ] + // Read the first line of the flagstat file + // 3127898040 + 0 in total (QC-passed reads + QC-failed reads) + // and make the sum of both integers + stats.withReader { + line = it.readLine() + def lspl = line.split() + def read_count = lspl[0].toInteger() + lspl[2].toInteger() + meta.read_count = read_count } - return data_meta + + return [meta, datafile] }