Skip to content

Commit

Permalink
Bbmap filterbyname (#2206)
Browse files Browse the repository at this point in the history
* Added bbmap/filterbyname module

* updated tests/config/pytest_modules.yml

* Formatting

* [automated] Fix linting with Prettier

* removed white spaces

* 1) Added an option for interleaved output into args
2) Output extension is now also controlled by args

* Removed TO-DO strings + reverted gitignore file

* output now depends on "output_extension"

* 1) Added more tests
2) Removed "names" entirely (should be passed inside `args` if needed
)

* Update modules/nf-core/bbmap/filterbyname/main.nf

Co-authored-by: FriederikeHanssen <[email protected]>

* FIxed typo (me5sum -> md5sum)

* [automated] Fix linting with Prettier

* 1) Changed container
2) Fixed tests

* [automated] Fix linting with Prettier

* Update main.nf

Try grepping to remove the odd extra text on the singularity test

* Updated version with nf-test

* Update meta

* Address review comments

---------

Co-authored-by: nf-core-bot <[email protected]>
Co-authored-by: FriederikeHanssen <[email protected]>
Co-authored-by: Simon Pearce <[email protected]>
Co-authored-by: Fabian Egli <[email protected]>
Co-authored-by: Matthias De Smet <[email protected]>
  • Loading branch information
6 people authored Jun 18, 2024
1 parent c379338 commit 2864fbf
Show file tree
Hide file tree
Showing 7 changed files with 505 additions and 1 deletion.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,3 @@ test_output/
tests/data/
work/
.github/CODEOWNERS-tmp

7 changes: 7 additions & 0 deletions modules/nf-core/bbmap/filterbyname/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: bbmap_filterbyname
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bbmap=39.01
71 changes: 71 additions & 0 deletions modules/nf-core/bbmap/filterbyname/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
process BBMAP_FILTERBYNAME {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bbmap:39.01--h5c4e2a8_0':
'biocontainers/bbmap:39.01--h5c4e2a8_0' }"

input:
tuple val(meta), path(reads)
val(names_to_filter)
val(output_format)
val(interleaved_output)

output:
tuple val(meta), path("*.${output_format}"), emit: reads
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input = meta.single_end ? "in=${reads}" : "in=${reads[0]} in2=${reads[1]}"
def output = (meta.single_end || interleaved_output) ?
"out=${prefix}.${output_format}" :
"out1=${prefix}_1.${output_format} out2=${prefix}_2.${output_format}"
def names_command = names_to_filter ? "names=${names_to_filter}": ""

def avail_mem = 3
if (!task.memory) {
log.info '[filterbyname] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}

"""
filterbyname.sh \\
-Xmx${avail_mem}g \\
$input \\
$output \\
$names_command \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset")
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def filtered = (meta.single_end || interleaved_output) ?
"echo '' | gzip > ${prefix}.${output_format}" :
"echo '' | gzip >${prefix}_1.${output_format} ; echo '' | gzip >${prefix}_2.${output_format}"

"""
$filtered
touch ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset")
END_VERSIONS
"""

}
62 changes: 62 additions & 0 deletions modules/nf-core/bbmap/filterbyname/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: bbmap_filterbyname
description: Filter out sequences by sequence header name(s)
keywords:
- fastq
- fasta
- filter
tools:
- bbmap:
description: BBMap is a short read aligner, as well as various other bioinformatic tools.
homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/
documentation: https://www.biostars.org/p/225338/
licence: ["UC-LBL license (see package)"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and
paired-end data, respectively.
- names_to_filter:
type: string
description: |
String containing names of reads to filter out of the fastq files.
- output_format:
type: string
description: |
String with the format of the output file, e.g. fastq.gz, fasta, fasta.bz2
- interleaved_output:
type: boolean
description: |
Whether to produce an interleaved fastq output file
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: The trimmed/modified fastq reads
pattern: "*${output_format}"
- log:
type: file
description: filterbyname.sh log file
pattern: "*.filterbyname.log"

authors:
- "@tokarevvasily"
- "@sppearce"

maintainers:
- "@sppearce"
218 changes: 218 additions & 0 deletions modules/nf-core/bbmap/filterbyname/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
nextflow_process {

name "Test Process BBMAP_FILTERBYNAME"
script "../main.nf"
process "BBMAP_FILTERBYNAME"

tag "modules"
tag "modules_nfcore"
tag "bbmap"
tag "bbmap/filterbyname"

test("paired end fastq.bz2") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
input[1] = ""
input[2] = "fastq.bz2"
input[3] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() }
)
}

}

test("paired end fastq.bz2 - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
input[1] = ""
input[2] = "fastq.bz2"
input[3] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("single end fasta") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
input[1] = ""
input[2] = "fasta"
input[3] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() }
)
}

}

test("single end fasta - stub") {
options "-stub"
when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
input[1] = ""
input[2] = "fasta"
input[3] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() }
)
}

}

test("single end fastq.gz filter") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486"
input[2] = "fasta"
input[3] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() }
)
}

}

test("single end fastq.gz - stub") {
options "-stub"
when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486"
input[2] = "fastq.gz"
input[3] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() }
)
}

}

test("paired end fastq.gz filter interleaved") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486"
input[2] = "fastq.gz"
input[3] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() }
)
}

}

test("paired end fastq.gz filter interleaved - stub") {
options "-stub"
when {
process {
"""
input[0] = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486"
input[2] = "fastq.gz"
input[3] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() }
)
}

}

}
Loading

0 comments on commit 2864fbf

Please sign in to comment.