Skip to content

Commit

Permalink
Merge branch 'master' into gridss_envs
Browse files Browse the repository at this point in the history
  • Loading branch information
SPPearce authored Jan 14, 2025
2 parents 1e84d3a + 1c42491 commit 4f9479f
Show file tree
Hide file tree
Showing 47 changed files with 2,092 additions and 1,058 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
.replace('modules/nf-core/', '')
.split('/')
.slice(0, 2)
.filter(x => !x.startsWith('main.nf') && x !== 'tests' && x !== 'meta.yml' && x !== 'environment.yml')
.filter(x => !x.startsWith('main.nf') && x !== 'tests' && x !== 'templates' && x !== 'meta.yml' && x !== 'environment.yml' && !x.endsWith('.md'))
.join('/'))
)
];
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pytest-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ jobs:

- name: Upload logs on failure
if: failure()
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4
with:
name: logs-${{ matrix.profile }}-${{ steps.parsed.outputs.result }}
path: |
Expand Down
29 changes: 20 additions & 9 deletions modules/nf-core/custom/filterdifferentialtable/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ process CUSTOM_FILTERDIFFERENTIALTABLE {

input:
tuple val(meta), path(input_file)
val(logFC_column)
val(FC_threshold)
val(padj_column)
val(padj_threshold)
tuple val(logfc_column), val(fc_threshold), val(fc_cardinality)
tuple val(stat_column), val(stat_threshold), val(stat_cardinality)

output:
tuple val(meta), path("*_filtered.tsv"), emit: filtered
Expand Down Expand Up @@ -41,14 +39,27 @@ process CUSTOM_FILTERDIFFERENTIALTABLE {
table = pd.read_csv("${input_file}", sep=sep)
# Calculate log2 fold change threshold
logFC_threshold = log2(float("${FC_threshold}"))
logfc_threshold = log2(float("${fc_threshold}"))
# define evaluation
def evaluate_condition(x, threshold, cardinality):
if cardinality == ">=":
return x >= threshold
elif cardinality == "<=":
return x <= threshold
elif cardinality == ">":
return x > threshold
elif cardinality == "<":
return x < threshold
else:
raise ValueError(f"Invalid cardinality: {cardinality}")
# Apply filters
mask = (
table["${logFC_column}"].notna() &
table["${padj_column}"].notna() &
(table["${logFC_column}"].abs() >= logFC_threshold) &
(table["${padj_column}"] <= float("${padj_threshold}"))
table["${logfc_column}"].notna() &
table["${stat_column}"].notna() &
table["${logfc_column}"].abs().apply(lambda x: evaluate_condition(x, logfc_threshold, "${fc_cardinality}")) &
table["${stat_column}"].apply(lambda x: evaluate_condition(x, float("${stat_threshold}"), "${stat_cardinality}"))
)
filtered_table = table[mask]
Expand Down
25 changes: 19 additions & 6 deletions modules/nf-core/custom/filterdifferentialtable/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ keywords:
- filter
- differential expression
- logFC
- significance statistic
- p-value
tools:
- "pandas":
Expand All @@ -26,18 +27,30 @@ input:
type: file
description: Input differential expression table (CSV, TSV, or TXT format)
pattern: "*.{csv,tsv,txt}"
- - logFC_column:
- - logfc_column:
type: string
description: Name of the column containing log fold change values
- - FC_threshold:
- fc_threshold:
type: float
description: Fold change threshold for filtering
- - padj_column:
- fc_cardinality:
type: string
description: Name of the column containing adjusted p-values
- - padj_threshold:
description: |
Operator to compare the fold change values with the threshold.
Valid values are: ">=", "<=", ">", "<".
- - stat_column:
type: string
description: |
Name of the column containing the significance statistic values
(eg. adjusted p-values).
- stat_threshold:
type: float
description: Adjusted p-value threshold for filtering
description: Statistic threshold for filtering
- stat_cardinality:
type: string
description: |
Operator to compare the column values with the threshold.
Valid values are: ">=", "<=", ">", "<".
output:
- filtered:
- meta:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@ nextflow_process {
process {
"""
input[0] = [ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.deseq2.results.tsv", checkIfExists: true) ]
input[1] = 'log2FoldChange'
input[2] = 2
input[3] = 'padj'
input[4] = 0.05
input[1] = Channel.of(['log2FoldChange', 2, '>='])
input[2] = Channel.of(['padj', 0.05, '<='])
"""
}
}
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/galah/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- bioconda::galah=0.3.1
- bioconda::galah=0.4.2
36 changes: 18 additions & 18 deletions modules/nf-core/galah/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,39 @@ process GALAH {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/galah%3A0.3.1--h031d066_3':
'biocontainers/galah:0.3.1--h031d066_3' }"
'https://depot.galaxyproject.org/singularity/galah:0.4.2--h7b50bb2_1':
'biocontainers/galah:0.4.2--h7b50bb2_1' }"

input:
tuple val(meta), path(bins), path(qc_table), val(qc_format)

output:
tuple val(meta), path("*.tsv") , emit: tsv
tuple val(meta), path("${prefix}-dereplicated/*") , emit: dereplicated_bins
path "versions.yml" , emit: versions
tuple val(meta), path("*.tsv") , emit: tsv
tuple val(meta), path("${prefix}/*"), emit: dereplicated_bins
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def qc_args = (qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}"
def qc_input = qc_table ? qc_args : ""
def valid_qc_format = qc_format in ["checkm", "genome_info"]
if( qc_table && !valid_qc_format ) {
error "Invalid qc_format supplied! qc_format should be either 'checkm' or 'genome_info'."
def qc_input = ""
if(qc_format == "checkm2") {
qc_input = "--checkm2-quality-report ${qc_table}"
} else if(qc_format == "checkm") {
qc_input = "--checkm-tab-table ${qc_table}"
} else if(qc_format == "genome-info") {
qc_input = "--genome-info ${qc_table}"
}
"""
mkdir ${prefix}-dereplicated
galah cluster \\
--threads ${task.cpus} \\
--genome-fasta-files ${bins} \\
${qc_input} \\
--output-cluster-definition ${prefix}-dereplicated_bins.tsv \\
--output-representative-fasta-directory ${prefix}-dereplicated
--output-cluster-definition ${prefix}.tsv \\
--output-representative-fasta-directory ${prefix} \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand All @@ -44,12 +45,11 @@ process GALAH {
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir ${prefix}-dereplicated/
touch ${prefix}-dereplicated/test.fa
touch ${prefix}-dereplicated_bins.tsv
mkdir ${prefix}/
touch ${prefix}/test.fa
touch ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
17 changes: 9 additions & 8 deletions modules/nf-core/galah/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,18 @@ input:
- qc_table:
type: file
description: |
(optional) Either a (CheckM)[https://nf-co.re/modules/checkm_lineagewf] summary TSV containing
information on the completeness and contamination of the input genomes (13 columns),
or a 3-column csv with the header `genome,completeness,contamination`.
In both cases the first column should contain the names of the input genome files,
minus the last file extension
(i.e. if the genome is gzipped, the genome name should retain the .fasta extension).
(optional) A summary TSV from either CheckM [https://nf-co.re/modules/checkm_lineagewf],
CheckM2 [https://nf-co.re/modules/checkm2_predict/], or a CSV
in drep-style format [https://github.com/MrOlm/drep] with three columnns,
`genome,completeness,contamination`. In both cases the first column should contain the
names of the input genome files, minus the last file extension
(i.e. if the genome is gzipped, the genome name should
retain the .fasta extension).
pattern: "*.{csv,tsv}"
- qc_format:
type: string
description: Defines the type if input table in `qc_table`, if specified.
pattern: "checkm|genome_info"
pattern: "checkm|checkm2|genome_info"
output:
- tsv:
- meta:
Expand All @@ -57,7 +58,7 @@ output:
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- ${prefix}-dereplicated/*:
- ${prefix}/*:
type: file
description: The representative genomes following dereplication by galah.
pattern: "*"
Expand Down
133 changes: 133 additions & 0 deletions modules/nf-core/galah/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
nextflow_process {

name "Test Process GALAH"
script "../main.nf"
config "./nextflow.config"
process "GALAH"

tag "modules"
tag "modules_nfcore"
tag "galah"
tag "checkm2/databasedownload"
tag "checkm2/predict"
tag "gawk"

test("genomes - no qc_table") {

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
[file("https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true),
file("https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)],
[],
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match()
}
)
}

}

test("genomes - checkm2 qc_table") {

setup {
run("CHECKM2_DATABASEDOWNLOAD") {
script "../../checkm2/databasedownload/main.nf"
process {
"""
input[0] = "5571251"
"""
}
}

run("CHECKM2_PREDICT") {
script "../../checkm2/predict/main.nf"
process {
"""
input[0] = [
[ id:'test_checkm2' ], // meta map
[file("https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true),
file("https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)]
]
input[1] = CHECKM2_DATABASEDOWNLOAD.out.database
"""
}
}

run("GAWK") {
script "../../gawk/main.nf"
process {
"""
input[0] = CHECKM2_PREDICT.out.checkm2_tsv
input[1] = []
"""
}
}
}

when {
process {
"""
ch_checkm_tsv = GAWK.out.output
| map { meta, tsv -> [ [id: "test"], tsv ] }
input[0] = Channel.of([
[ id:'test' ], // meta map
[file("https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true),
file("https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)]
])
.combine(ch_checkm_tsv, by: 0)
.map { meta, bins, tsv -> [ meta, bins, tsv, "checkm2" ] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match()
}
)
}

}

test("genomes - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
[file("https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true),
file("https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)],
[],
"checkm2"
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match()
}
)
}

}

}
Loading

0 comments on commit 4f9479f

Please sign in to comment.