Skip to content

Commit

Permalink
Collect the read counts from the input files
Browse files Browse the repository at this point in the history
  • Loading branch information
muffato committed Nov 6, 2023
1 parent 093b313 commit bdd1c2e
Showing 1 changed file with 29 additions and 11 deletions.
40 changes: 29 additions & 11 deletions subworkflows/local/input_check.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//

include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main'


workflow INPUT_CHECK {
Expand All @@ -11,20 +12,35 @@ workflow INPUT_CHECK {


main:
ch_versions = Channel.empty()

// Read the samplesheet
SAMPLESHEET_CHECK ( samplesheet ).csv
| splitCsv ( header:true, sep:',' )
| map { create_data_channel( it ) }
// Prepare the channel for SAMTOOLS_FLAGSTAT
| map { row -> [row + [id: file(row.datafile).baseName], file(row.datafile, checkIfExists: true), []] }
| set { samplesheet_rows }
ch_versions = ch_versions.mix ( SAMPLESHEET_CHECK.out.versions.first() )

// Get stats from each input file
SAMTOOLS_FLAGSTAT ( samplesheet_rows )
ch_versions = ch_versions.mix ( SAMTOOLS_FLAGSTAT.out.versions.first() )

// Create the read channel for the rest of the pipeline
samplesheet_rows
| join( SAMTOOLS_FLAGSTAT.out.flagstat )
| map { meta, datafile, dummy, stats -> create_data_channel( meta, datafile, stats ) }
| set { reads }


emit:
reads // channel: [ val(meta), /path/to/datafile ]
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
versions = ch_versions // channel: [ versions.yml ]
}


// Function to get list of [ meta, reads ]
def create_data_channel ( LinkedHashMap row ) {
def create_data_channel ( LinkedHashMap row, datafile, stats ) {
// create meta map
def meta = [:]
meta.id = row.sample
Expand All @@ -39,13 +55,15 @@ def create_data_channel ( LinkedHashMap row ) {
}
meta.read_group = "\'@RG\\tID:" + row.datafile.split('/')[-1].split('\\.')[0] + "\\tPL:" + platform + "\\tSM:" + meta.id.split('_')[0..-2].join('_') + "\'"


// add path(s) of the read file(s) to the meta map
def data_meta = []
if ( !file(row.datafile).exists() ) {
exit 1, "ERROR: Please check input samplesheet -> Data file does not exist!\n${row.datafile}"
} else {
data_meta = [ meta, file(row.datafile) ]
// Read the first line of the flagstat file
// 3127898040 + 0 in total (QC-passed reads + QC-failed reads)
// and make the sum of both integers
stats.withReader {
line = it.readLine()
def lspl = line.split()
def read_count = lspl[0].toInteger() + lspl[2].toInteger()
meta.read_count = read_count
}
return data_meta

return [meta, datafile]
}

0 comments on commit bdd1c2e

Please sign in to comment.