Skip to content

Commit

Permalink
Added 2FAST2Q module (#7318)
Browse files Browse the repository at this point in the history
* Added 2FAST2Q module

* added tests to 2fast2q

* added tests to 2fast2q

* added tests to 2fast2q

* fixed tests to 2fast2q

* fixed tests to 2fast2q

* Update modules/nf-core/fast2q/tests/main.nf.test

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/fast2q/tests/main.nf.test

Co-authored-by: Simon Pearce <[email protected]>

* added optional input - hope it is being parsed correctly

* fix

* fix lack of optional input on tests

* fixing in progree

* fixing in progress

* fixing in progress

* fixing in progress

* I changed the test data to 2FAST2Q repo data, NOT NF-CORE test data, just as a way to check if it is working. The test data I was using from NF-core has the wrong input file format.

* I changed the test data to 2FAST2Q repo data, NOT NF-CORE test data, just as a way to check if it is working. The test data I was using from NF-core has the wrong input file format.

* I changed the test data to 2FAST2Q repo data, NOT NF-CORE test data, just as a way to check if it is working. The test data I was using from NF-core has the wrong input file format.

* I changed the test data to 2FAST2Q repo data, NOT NF-CORE test data, just as a way to check if it is working. The test data I was using from NF-core has the wrong input file format.

* testing if it works with just fastq

* Update modules/nf-core/fast2q/main.nf

Co-authored-by: Simon Pearce <[email protected]>

* added test data and fixed several output issues

* fixed the nf.test

* fixed the nf.test

* fixed the module

* moved test data up

* direct link to a test data

* changed paths to tests

* Update modules/nf-core/fast2q/tests/nextflow.config

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/fast2q/tests/main.nf.test

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/fast2q/tests/main.nf.test

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/fast2q/tests/main.nf.test

Co-authored-by: Simon Pearce <[email protected]>

* changed assertions in test

* removed indentation on main 2fast2q cmd as it was not being recognized

* everything working locally

* everything working locally

* everything working locally

* everything working locally

* everything working locally

* everything working locally

* enhanced descriptions

* updated singularity container

* adding a file deleted by accident

* Update module

* Fix linting

* Update meta.yml

Updated meta descriptions

* Update meta.yml

* Update meta.yml

* Fix broken test

* Update main.nf.test.snap

* Update main.nf.test

---------

Co-authored-by: Simon Pearce <[email protected]>
  • Loading branch information
afombravo and SPPearce authored Jan 30, 2025
1 parent 62c2b3e commit 92aad63
Show file tree
Hide file tree
Showing 8 changed files with 533 additions and 0 deletions.
5 changes: 5 additions & 0 deletions modules/nf-core/fast2q/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::fast2q=2.7.2
67 changes: 67 additions & 0 deletions modules/nf-core/fast2q/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
process FAST2Q {

tag "2FAST2Q"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fast2q:2.7.2--pyh7e72e81_0' :
'biocontainers/fast2q:2.7.2--pyh7e72e81_0' }"

input:
tuple val(meta), path(fastq)
tuple val(meta2), path(library)

output:
tuple val(meta), path("${prefix}.csv") , emit: count_matrix
tuple val(meta), path("${prefix}_stats.csv") , emit: stats
tuple val(meta), path("${prefix}_distribution_plot.png") , emit: distribution_plot
tuple val(meta), path("${prefix}_reads_plot.png") , emit: reads_plot
tuple val(meta), path("${prefix}_reads_plot_percentage.png"), emit: reads_plot_percentage
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def input_file = (fastq instanceof Path && fastq.exists()) ? "--s ${fastq}" : ''
def library_file = (library instanceof Path && library.exists()) ? "--g ${library}" : ''

"""
export MPLCONFIGDIR=\$PWD
2fast2q \\
-c \\
--o ./ \\
--fn ${prefix} \\
--cp ${task.cpus} \\
$input_file \\
$library_file \\
$args
mv **/${prefix}* .
cat <<-END_VERSIONS > versions.yml
${task.process}:
2FAST2Q version: \$(2fast2q -v | grep 'Version:' | sed 's/Version: //g')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"

"""
touch ${prefix}.csv
touch ${prefix}_stats.csv
touch ${prefix}_distribution_plot.png
touch ${prefix}_reads_plot.png
touch ${prefix}_reads_plot_percentage.png
cat <<-END_VERSIONS > versions.yml
${task.process}:
2FAST2Q version: \$(2fast2q -v | grep 'Version:' | sed 's/Version: //g')
END_VERSIONS
"""

}
98 changes: 98 additions & 0 deletions modules/nf-core/fast2q/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
name: fast2q
description: A program that counts sequence occurrences in FASTQ files.
keywords:
- CRISPRi
- FASTQ
- genomics
tools:
- 2FAST2Q:
description: |
2FAST2Q is ideal for CRISPRi-Seq, and for extracting and counting any kind of information from reads in the fastq format, such as barcodes in Bar-seq experiments.
2FAST2Q can work with sequence mismatches, Phred-score, and be used to find and extract unknown sequences delimited by known sequences.
2FAST2Q can extract multiple features per read using either fixed positions or delimiting search sequences.
homepage: https://github.com/afombravo/2FAST2Q
doi: 10.7717/peerj.14041
licence: ["GPL-3.0-or-later"]
identifier: ""

input:
- - meta:
type: map
description: |
Groovy Map containing output name.
e.g. [ id:'test']
- fastq:
type: directory
description: Folder with FASTQ file(s). 2FAST2Q automatically picks up all the FASTQ files inside the provided folder.
pattern: "*.{fastq,gz}"

- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'library_name', multiple_features_per_read:false ]
- library:
type: file
description: .csv library file following the ´Feature_name,sequence´ or ´Feature_name,sequence1:sequence2´ format. See 2FAST2Q instructions for more information.
pattern: "*.csv"

output:
- count_matrix:
- meta:
type: map
description: |
Groovy Map containing output name.
e.g. [ id:'test' ]
- ${prefix}.csv:
type: file
description: |
Count matrix csv file
- stats:
- meta:
type: map
description: |
Groovy Map containing output name.
e.g. [ id:'test' ]
- ${prefix}_stats.csv:
type: file
description: |
File containing all the relevant statistics such as quality passing reads, aligned reads, total reads, and sample run times.
- distribution_plot:
- meta:
type: map
description: |
Groovy Map containing output name.
e.g. [ id:'test' ]
- ${prefix}_distribution_plot.png:
type: file
description: |
Violin plot of the distribution of reads per feature across all samples.
- reads_plot:
- meta:
type: map
description: |
Groovy Map containing output name.
e.g. [ id:'test' ]
- ${prefix}_reads_plot.png:
type: file
description: |
Bar plot with the distribution of reads, in absolute numbers, binned to the different quality metrics indicated in the statistics.csv
- reads_plot_percentage:
- meta:
type: map
description: |
Groovy Map containing output name.
e.g. [ id:'test' ]
- ${prefix}_reads_plot_percentage.png:
type: file
description: |
Bar plot with the distribution of reads, in percentage, binned to the different quality metrics indicated in the statistics.csv
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@afombravo"
maintainers:
- "@afombravo"
165 changes: 165 additions & 0 deletions modules/nf-core/fast2q/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
nextflow_process {

name "Test Process 2FAST2Q"
script "../main.nf"
process "FAST2Q"

tag "modules"
tag "modules_nfcore"
tag "fast2q"

config './nextflow.config'

test("2FAST2Q self-test") {

when {

params {
module_args = '-t'
}

process {
"""
input[0] = [
[ id:'test1' ], // meta map
[]
]
input[1] = [
[ id:'library_name', multiple_features_per_read:false ], // meta map for second input
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.count_matrix,
file(process.out.stats[0][1]).name,
file(process.out.distribution_plot[0][1]).name,
file(process.out.reads_plot[0][1]).name,
file(process.out.reads_plot_percentage[0][1]).name,
process.out.versions, // versions file
path(process.out.versions[0]).yaml // Validate against the versions snapshot
).match()
}
)
}
}

test("Extracting and counting all features at position=0 with default length of 20bp from a FASTQ file (without optional library.csv)") {

when {

params {
module_args = '--mo EC'
}

process {
"""
input[0] = [
[ id:'test1' ], // meta map
file(params.test_data_base + '/data/genomics/mus_musculus/mageck/ERR376998.small.fastq.gz', checkIfExists: true) // FASTQ file
]
input[1] = [
[ id:'library_name', multiple_features_per_read:false ], // meta map for second input
[]
]
"""
}
}

then {
assertAll(
{ assert process.success }, // Ensure process ran successfully
{ assert snapshot(
process.out.count_matrix,
file(process.out.stats[0][1]).name,
file(process.out.distribution_plot[0][1]).name,
file(process.out.reads_plot[0][1]).name,
file(process.out.reads_plot_percentage[0][1]).name,
process.out.versions, // versions file
path(process.out.versions[0]).yaml // Validate against the versions snapshot
).match()
}
)
}
}

test("Extracting all features at position=0 with default length of 20bp from a FASTQ file, and respective alignment to a file with DNA features (library.csv)") {

when {

params {
module_args = ''
}

process {
"""
input[0] = [
[ id:'test1' ], // meta map
file(params.test_data_base + '/data/genomics/mus_musculus/mageck/ERR376998.small.fastq.gz', checkIfExists: true) // FASTQ file
]
input[1] = [
[ id:'library_name', multiple_features_per_read:false ], // meta map for second input
file(params.test_data_base + '/data/genomics/mus_musculus/mageck/yusa_library.csv', checkIfExists: true) // library file
]
"""
}
}

then {
assertAll(
{ assert process.success }, // Ensure process ran successfully
{ assert snapshot(
process.out.count_matrix,
file(process.out.stats[0][1]).name,
file(process.out.distribution_plot[0][1]).name,
file(process.out.reads_plot[0][1]).name,
file(process.out.reads_plot_percentage[0][1]).name,
process.out.versions, // versions file
path(process.out.versions[0]).yaml // Validate against the versions snapshot
).match()
}
)
}
}

test("2FAST2Q self-test - stub") {
options "-stub"
when {

params {
module_args = '-t'
}

process {
"""
input[0] = [
[ id:'test1' ], // meta map
[]
]
input[1] = [
[ id:'library_name', multiple_features_per_read:false ], // meta map for second input
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out,
path(process.out.versions[0]).yaml // Validate against the versions snapshot
).match()
}
)
}
}


}
Loading

0 comments on commit 92aad63

Please sign in to comment.