diff --git a/.gitignore b/.gitignore index a42ce0162..f90b6f085 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc null/ +.nf-test/test diff --git a/.nf-core.yml b/.nf-core.yml index b538c2df7..e527d2fb0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -23,8 +23,4 @@ template: outdir: . skip_features: - igenomes - version: 2.13.0dev -update: - https://github.com/nf-core/modules.git: - nf-core: - mafft: feb29be775d9e41750180539e9a3bdce801d0609 + version: 2.12.0dev diff --git a/.nf-test.log b/.nf-test.log new file mode 100644 index 000000000..f17f57530 --- /dev/null +++ b/.nf-test.log @@ -0,0 +1,33 @@ +Jan-21 15:44:08.149 [main] INFO com.askimed.nf.test.App - nf-test 0.9.0 +Jan-21 15:44:08.182 [main] INFO com.askimed.nf.test.App - Arguments: [test, --profile, test_pplace_hmmsearch,singularity, ./tests/pipeline/pplace_hmmsearch.nf.test] +Jan-21 15:44:12.792 [main] INFO com.askimed.nf.test.App - Nextflow Version: 24.10.3 +Jan-21 15:44:12.796 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Load config from file /cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/nf-test.config... +Jan-21 15:44:19.790 [main] INFO com.askimed.nf.test.lang.dependencies.DependencyResolver - Loaded 212 files from directory /cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq in 4.903 sec +Jan-21 15:44:19.796 [main] INFO com.askimed.nf.test.lang.dependencies.DependencyResolver - Found 1 tests. +Jan-21 15:44:19.796 [main] DEBUG com.askimed.nf.test.lang.dependencies.DependencyResolver - Found tests: [/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test] +Jan-21 15:44:19.796 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Detected 1 test files. +Jan-21 15:44:20.249 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Started test plan +Jan-21 15:44:20.249 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Running testsuite 'Test Workflow main.nf' from file '/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test'. +Jan-21 15:44:20.250 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Run test 'b466648a: test_pplace_hmmsearch'. type: com.askimed.nf.test.lang.pipeline.PipelineTest +Jan-21 15:49:18.369 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Init new snapshot file '/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test.snap' +Jan-21 15:49:18.381 [main] DEBUG com.askimed.nf.test.lang.extensions.Snapshot - Snapshot 'overall_summary_tsv' not found. +Jan-21 15:49:18.406 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Created snapshot 'overall_summary_tsv' +Jan-21 15:49:18.497 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Wrote snapshots to file '/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test.snap' +Jan-21 15:49:18.498 [main] DEBUG com.askimed.nf.test.lang.extensions.Snapshot - Snapshot 'barrnap' not found. +Jan-21 15:49:18.498 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Created snapshot 'barrnap' +Jan-21 15:49:18.535 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Wrote snapshots to file '/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test.snap' +Jan-21 15:49:18.536 [main] DEBUG com.askimed.nf.test.lang.extensions.Snapshot - Snapshot 'cutadapt' not found. +Jan-21 15:49:18.536 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Created snapshot 'cutadapt' +Jan-21 15:49:18.570 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Wrote snapshots to file '/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test.snap' +Jan-21 15:49:18.570 [main] DEBUG com.askimed.nf.test.lang.extensions.Snapshot - Snapshot 'dada2' not found. +Jan-21 15:49:18.571 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Created snapshot 'dada2' +Jan-21 15:49:18.606 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Wrote snapshots to file '/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test.snap' +Jan-21 15:49:18.606 [main] DEBUG com.askimed.nf.test.lang.extensions.Snapshot - Snapshot 'input' not found. +Jan-21 15:49:18.606 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Created snapshot 'input' +Jan-21 15:49:18.659 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Wrote snapshots to file '/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test.snap' +Jan-21 15:49:18.662 [main] DEBUG com.askimed.nf.test.lang.extensions.Snapshot - Snapshot 'multiqc' not found. +Jan-21 15:49:18.662 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Created snapshot 'multiqc' +Jan-21 15:49:18.719 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Wrote snapshots to file '/cfs/klemming/projects/supr/snic2020-16-76/ddl/dev/ampliseq/tests/pipeline/pplace_hmmsearch.nf.test.snap' +Jan-21 15:49:18.726 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Test 'b466648a: test_pplace_hmmsearch' finished. status: PASSED +Jan-21 15:49:18.739 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Testsuite 'Test Workflow main.nf' finished. snapshot file: true, skipped tests: false, failed tests: false +Jan-21 15:49:18.743 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Executed 1 tests. 0 tests failed. Done! diff --git a/assets/schema_phylosearch_input.json b/assets/schema_phylosearch_input.json new file mode 100644 index 000000000..0ec59df14 --- /dev/null +++ b/assets/schema_phylosearch_input.json @@ -0,0 +1,66 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/ampliseq/master/assets/schema_pplace_sheet.json", + "title": "nf-core/phyloplace pipeline - params.pplace_sheet schema", + "description": "Schema for the file provided with params.pplace_sheet", + "type": "array", + "items": { + "type": "object", + "properties": { + "target": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Target name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "min_bitscore": { + "type": "integer", + "errorMessage": "Minimum bitscore for hits to this HMM.", + "meta": ["min_bitscore"] + }, + "alignmethod": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Method to use for aligning: 'hmmer' or 'mafft'.", + "meta": ["alignmethod"] + }, + "hmm": { + "type": "string", + "pattern": "^\\S+.hmm$", + "errorMessage": "HMMER HMM file to search sequences with.", + "meta": ["hmm"] + }, + "extract_hmm": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Name of HMM file in multi-HMM to extract.", + "meta": ["extract_hmm"] + }, + "refseqfile": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Multiple sequence alignment of reference sequences. Any format suppored by hmmbuild in HMMER (see queryfile for examples) or MAFFT.", + "meta": ["refseqfile"] + }, + "refphylogeny": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Newick formatted file with the reference phylogeny.", + "meta": ["refphylogeny"] + }, + "model": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Phylogenetic model to use in placement, see EPA-NG documentation.", + "meta": ["model"] + }, + "taxonomy": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Tab separated file with taxonomy assignments of reference sequences.", + "meta": ["taxonomy"] + } + }, + "required": ["target", "hmm"] + } +} diff --git a/conf/modules.config b/conf/modules.config index cbbf66f0c..87606b254 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -797,6 +797,10 @@ process { ] } + withName: HMMER_HMMSEARCH { + ext.args = { meta.min_bitscore && "${meta.min_bitscore}" != "null" ? "--incT ${meta.min_bitscore}" : "" } + } + withName: 'QIIME2_INASV|QIIME2_INSEQ|QIIME2_INTAX|QIIME2_INTREE' { publishDir = [ path: { "${params.outdir}/qiime2/input" }, diff --git a/conf/test_pplace_hmmsearch.config b/conf/test_pplace_hmmsearch.config new file mode 100644 index 000000000..60dd236f8 --- /dev/null +++ b/conf/test_pplace_hmmsearch.config @@ -0,0 +1,48 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/ampliseq -profile test_pplace, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 20, + memory: '16.GB', + time: '6.h' + ] + } + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + FW_primer = "GTGYCAGCMGCCGCGGTAA" + RV_primer = "GGACTACNVGGGTWTCTAAT" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata.tsv" + skip_dada_taxonomy = true + qiime_ref_taxonomy = "greengenes85" + filter_ssu = "bac" + + // this is to remove low abundance ASVs to reduce runtime of downstream processes + min_samples = 2 + min_frequency = 10 + + // pplace + pplace_sheet = 'https://raw.githubusercontent.com/erikrikarddaniel/test-datasets/phyloplace/testdata/phylosearch_input.csv' + + // Adjust taxonomic levels + tax_agglom_min = 1 + tax_agglom_max = 3 + + // Skip some steps to reduce runtime + skip_alpha_rarefaction = true + skip_fastqc = true +} diff --git a/modules.json b/modules.json index ea99de766..0d081af65 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,26 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "clustalo/align": { + "branch": "master", + "git_sha": "7b32b09fe7787c0fc6924e7b6f223a0b1daf0d2f", + "installed_by": [ + "_", + "a", + "c", + "e", + "f", + "g", + "i", + "k", + "n", + "p", + "s", + "t", + "w", + "fasta_newick_epang_gappa" + ] + }, "cutadapt": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", @@ -12,12 +32,12 @@ }, "epang/place": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fasta_newick_epang_gappa"] }, "epang/split": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "868cb0d7fc4862991fb7c2b4cd7289806cd53f81", "installed_by": ["fasta_newick_epang_gappa"] }, "fastqc": { @@ -27,49 +47,74 @@ }, "gappa/examineassign": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fasta_newick_epang_gappa"] }, "gappa/examinegraft": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fasta_newick_epang_gappa"] }, "gappa/examineheattree": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fasta_newick_epang_gappa"] }, "hmmer/eslalimask": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fasta_newick_epang_gappa"] }, "hmmer/eslreformat": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "0e9cb409c32d3ec4f0d3804588e4778971c09b7e", "installed_by": ["fasta_newick_epang_gappa"] }, "hmmer/hmmalign": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "03a9f356a1a333923c1177c2912fa7bc61bb46f3", "installed_by": ["fasta_newick_epang_gappa"] }, "hmmer/hmmbuild": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "03a9f356a1a333923c1177c2912fa7bc61bb46f3", "installed_by": ["fasta_newick_epang_gappa"] }, + "hmmer/hmmrank": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_hmmsearch_rank_fastas"] + }, + "hmmer/hmmsearch": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_hmmsearch_rank_fastas"] + }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/kraken2/kraken2/kraken2-kraken2.diff" }, - "mafft": { - "branch": "master", - "git_sha": "feb29be775d9e41750180539e9a3bdce801d0609", - "installed_by": ["fasta_newick_epang_gappa"] + "mafft/align": { + "branch": "master", + "git_sha": "868cb0d7fc4862991fb7c2b4cd7289806cd53f81", + "installed_by": [ + "_", + "a", + "c", + "e", + "f", + "g", + "i", + "k", + "n", + "p", + "s", + "t", + "w", + "fasta_newick_epang_gappa" + ] }, "multiqc": { "branch": "master", @@ -82,6 +127,11 @@ "installed_by": ["modules"], "patch": "modules/nf-core/pigz/uncompress/pigz-uncompress.diff" }, + "seqtk/subseq": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fasta_hmmsearch_rank_fastas"] + }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", @@ -106,9 +156,14 @@ }, "subworkflows": { "nf-core": { + "fasta_hmmsearch_rank_fastas": { + "branch": "master", + "git_sha": "15086c852c860f785a3654cba03f0ee00533cd08", + "installed_by": ["subworkflows"] + }, "fasta_newick_epang_gappa": { "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "git_sha": "725f406d25254b40a4bf436159ab841d43c43a17", "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { diff --git a/modules/local/hmmextract.nf b/modules/local/hmmextract.nf new file mode 100644 index 000000000..7ae388bf0 --- /dev/null +++ b/modules/local/hmmextract.nf @@ -0,0 +1,61 @@ +// This is a modified version of nf-core/hmmer/hmmfetch that only extracts, but +// does so from a single input channel to keep things synchronized. +process HMMER_HMMEXTRACT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h87f3376_2': + 'biocontainers/hmmer:3.3.2--h87f3376_2' }" + + input: + tuple val(meta), path(hmm), val(key) + + output: + tuple val(meta), path("*.hmm"), emit: hmm + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def outfile = ! key && ! keyfile ? '' : "> ${prefix}.hmm" + + // Avoid accidentally overwriting the input hmm + def move = "" + if ( "${prefix}.hmm" == "${hmm}" ) { + move = "mv ${hmm} ${prefix}.in.hmm" + hmm = "${prefix}.in.hmm" + } + + """ + $move + + hmmfetch \\ + $args \\ + $hmm \\ + $key \\ + $outfile + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.hmm + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/clustalo/align/environment.yml b/modules/nf-core/clustalo/align/environment.yml new file mode 100644 index 000000000..80bc6a5ad --- /dev/null +++ b/modules/nf-core/clustalo/align/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::clustalo=1.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/clustalo/align/main.nf b/modules/nf-core/clustalo/align/main.nf new file mode 100644 index 000000000..c220afd7b --- /dev/null +++ b/modules/nf-core/clustalo/align/main.nf @@ -0,0 +1,71 @@ +process CLUSTALO_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0': + 'biocontainers/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + path hmm_in + path hmm_batch + path profile1 + path profile2 + val compress + + output: + tuple val(meta), path("*.aln{.gz,}"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def options_tree = tree ? "--guidetree-in=$tree" : "" + def fhmm_in = hmm_in ? "--hmm-in=${hmm_in}" : "" + def fhmm_batch = hmm_batch ? "--hmm-batch=${hmm_batch}" : "" + def fprofile1 = profile1 ? "--profile1=${profile1}" : "" + def fprofile2 = profile2 ? "--profile2=${profile2}" : "" + def write_output = compress ? "--force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "-o ${prefix}.aln" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // the --force -o is necessary, as clustalo expands the commandline input, + // causing it to treat the pipe as a parameter and fail + // this way, the command expands to /dev/fd/, and --force allows writing output to an already existing file + """ + clustalo \ + -i ${fasta} \ + $options_tree \ + ${fhmm_in} \ + ${fhmm_batch} \ + ${fprofile1} \ + ${fprofile2} \ + --threads=${task.cpus} \ + $args \ + $write_output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln${compress ? '.gz' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/clustalo/align/meta.yml b/modules/nf-core/clustalo/align/meta.yml new file mode 100644 index 000000000..cd5964713 --- /dev/null +++ b/modules/nf-core/clustalo/align/meta.yml @@ -0,0 +1,84 @@ +name: "clustalo_align" +description: Align sequences using Clustal Omega +keywords: + - alignment + - MSA + - genomics +tools: + - "clustalo": + description: "Latest version of Clustal: a multiple sequence alignment program + for DNA or proteins" + homepage: "http://www.clustal.org/omega/" + documentation: "http://www.clustal.org/omega/" + tool_dev_url: "http://www.clustal.org/omega/" + doi: "10.1038/msb.2011.75" + licence: ["GPL v2"] + identifier: "" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta,faa,fna}" + - - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" + - - hmm_in: + type: file + description: HMM file for profile alignment + pattern: "*.hmm" + - - hmm_batch: + type: file + description: specify HMMs for individual sequences + pattern: "*" + - - profile1: + type: file + description: Pre-aligned multiple sequence file 1 + pattern: "*.{alnfaa,faa,fa,fasta}" + - - profile2: + type: file + description: Pre-aligned multiple sequence file 2 + pattern: "*.{alnfaa,faa,fa,fasta}" + - - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. + Set to true to enable/false to disable compression. Compression is done using + pigz, and is multithreaded. +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln{.gz,}": + type: file + description: Alignment file, in gzipped fasta format + pattern: "*.aln{.gz,}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@joseespinosa" +maintainers: + - "@luisas" + - "@joseespinosa" + - "@lrauschning" diff --git a/modules/nf-core/clustalo/align/tests/main.nf.test b/modules/nf-core/clustalo/align/tests/main.nf.test new file mode 100644 index 000000000..3b3baf0e5 --- /dev/null +++ b/modules/nf-core/clustalo/align/tests/main.nf.test @@ -0,0 +1,182 @@ +nextflow_process { + + name "Test Process CLUSTALO_ALIGN" + script "../main.nf" + process "CLUSTALO_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "clustalo" + tag "clustalo/align" + tag "clustalo/guidetree" + + test("sarscov2 - contigs-fasta - uncompressed") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - contigs-fasta - compressed") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("profile alignment -- hmm-in") { + + when { + process { + """ + input[0] = [ [ id: 'test'], file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phyloplace/testdata/PF14720_3_sequences.faa", checkIfExists: true) ] + input[1] = [[:],[]] + input[2] = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phyloplace/testdata/PF14720.hmm", checkIfExists: true) + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("profile alignment -- profile1") { + + when { + process { + """ + input[0] = [ [ id: 'test'], file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phyloplace/testdata/PF14720_3_sequences.faa", checkIfExists: true) ] + input[1] = [[:],[]] + input[2] = [] + input[3] = [] + input[4] = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phyloplace/testdata/PF14720_seed.alnfaa", checkIfExists: true) + input[5] = [] + input[6] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - contigs-fasta - guide_tree") { + + setup { + + run("CLUSTALO_GUIDETREE") { + script "../../guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + input[1] = CLUSTALO_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - contigs-fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/clustalo/align/tests/main.nf.test.snap b/modules/nf-core/clustalo/align/tests/main.nf.test.snap new file mode 100644 index 000000000..fe33fd247 --- /dev/null +++ b/modules/nf-core/clustalo/align/tests/main.nf.test.snap @@ -0,0 +1,200 @@ +{ + "sarscov2 - contigs-fasta - uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ], + "1": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ], + "versions": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T15:47:09.471999" + }, + "profile alignment -- hmm-in": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,9d3b05b9871b55ae5aaacedbc3f0f691" + ] + ], + "1": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,9d3b05b9871b55ae5aaacedbc3f0f691" + ] + ], + "versions": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T15:47:26.948278" + }, + "sarscov2 - contigs-fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T15:49:45.969566" + }, + "sarscov2 - contigs-fasta - compressed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ], + "1": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ], + "versions": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T15:47:21.819165" + }, + "profile alignment -- profile1": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,bc500eae88f3858f0f27800f8cd69698" + ] + ], + "1": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,bc500eae88f3858f0f27800f8cd69698" + ] + ], + "versions": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T15:47:32.12986" + }, + "sarscov2 - contigs-fasta - guide_tree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ], + "1": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ], + "versions": [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T15:47:53.772195" + } +} \ No newline at end of file diff --git a/modules/nf-core/clustalo/align/tests/tags.yml b/modules/nf-core/clustalo/align/tests/tags.yml new file mode 100644 index 000000000..58bd27768 --- /dev/null +++ b/modules/nf-core/clustalo/align/tests/tags.yml @@ -0,0 +1,2 @@ +clustalo/align: + - "modules/nf-core/clustalo/align/**" diff --git a/modules/nf-core/epang/place/environment.yml b/modules/nf-core/epang/place/environment.yml index a8536b1d4..6faa55f39 100644 --- a/modules/nf-core/epang/place/environment.yml +++ b/modules/nf-core/epang/place/environment.yml @@ -1,7 +1,5 @@ -name: epang_place channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::epa-ng=0.3.8 diff --git a/modules/nf-core/epang/place/main.nf b/modules/nf-core/epang/place/main.nf index 217975ebe..3426d8a5e 100644 --- a/modules/nf-core/epang/place/main.nf +++ b/modules/nf-core/epang/place/main.nf @@ -51,4 +51,22 @@ process EPANG_PLACE { epang: \$(echo \$(epa-ng --version 2>&1) | sed 's/^EPA-ng v//') END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def queryarg = queryaln ? "--query $queryaln" : "" + def refalnarg = referencealn ? "--ref-msa $referencealn" : "" + def reftreearg = referencetree ? "--tree $referencetree" : "" + def bfastarg = bfastfile ? "--bfast $bfastfile" : "" + def binaryarg = binaryfile ? "--binary $binaryfile" : "" + if ( binaryfile && ( referencealn || referencetree ) ) error "[EPANG] Cannot supply both binary and reference MSA or reference tree. Check input" + """ + touch ${prefix}.epa_info.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + epang: \$(echo \$(epa-ng --version 2>&1) | sed 's/^EPA-ng v//') + END_VERSIONS + """ } diff --git a/modules/nf-core/epang/place/meta.yml b/modules/nf-core/epang/place/meta.yml index 7d31a3497..64fc1f590 100644 --- a/modules/nf-core/epang/place/meta.yml +++ b/modules/nf-core/epang/place/meta.yml @@ -11,54 +11,67 @@ tools: documentation: "https://github.com/Pbdas/epa-ng/wiki/Full-Stack-Example" tool_dev_url: "https://github.com/Pbdas/epa-ng" doi: "10.1093/sysbio/syy054" - licence: "['GNU Affero General Public License v3.0']" + licence: ["GNU Affero General Public License v3.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - queryaln: - type: file - description: aligned query sequences in any supported format including phylip and fasta, may be gzipped - pattern: "*" - - referencealn: - type: file - description: reference alignment in any supported format including phylip and fasta, may be gzipped - pattern: "*" - - referencetree: - type: file - description: newick file containing the reference tree in which query sequences will be placed - pattern: "*" - - bfastfile: - type: file - description: file argument to the --bfast parameter - pattern: "*" - - binaryfile: - type: file - description: file argument to the --binary parameter - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - queryaln: + type: file + description: aligned query sequences in any supported format including phylip + and fasta, may be gzipped + pattern: "*" + - referencealn: + type: file + description: reference alignment in any supported format including phylip and + fasta, may be gzipped + pattern: "*" + - referencetree: + type: file + description: newick file containing the reference tree in which query sequences + will be placed + pattern: "*" + - - bfastfile: + type: file + description: file argument to the --bfast parameter + pattern: "*" + - - binaryfile: + type: file + description: file argument to the --binary parameter + pattern: "*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - epang: - type: directory - description: directory in which EPA-NG was run + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ./.: + type: directory + description: directory in which EPA-NG was run - jplace: - type: file - description: gzipped file with placement information - pattern: "*.jplace.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.epa_result.jplace.gz": + type: file + description: gzipped file with placement information + pattern: "*.jplace.gz" - log: - type: file - description: log file from placement - pattern: "*.log" + - "*.epa_info.log": + type: file + description: log file from placement + pattern: "*.log" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" maintainers: diff --git a/modules/nf-core/epang/place/tests/main.nf.test b/modules/nf-core/epang/place/tests/main.nf.test new file mode 100644 index 000000000..78987c50e --- /dev/null +++ b/modules/nf-core/epang/place/tests/main.nf.test @@ -0,0 +1,79 @@ + +nextflow_process { + + name "Test Process EPANG_PLACE" + script "../main.nf" + process "EPANG_PLACE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "epang" + tag "epang/place" + + test("test-epang-place") { + + when { + process { + """ + input[0] = [ + [ id:'test', model:'LG' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/query.alnfaa.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/reference.alnfaa.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/reference.newick', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.epang }, // The entire folder including .command.sh, etc + { assert snapshot( + file(process.out.jplace[0][1]).name, + file(process.out.log[0]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-epang-place-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', model:'LG' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/query.alnfaa.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/reference.alnfaa.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/reference.newick', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.epang }, // The entire folder including .command.sh, etc + { assert snapshot( + process.out.jplace, + file(process.out.log[0]).name, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/epang/place/tests/main.nf.test.snap b/modules/nf-core/epang/place/tests/main.nf.test.snap new file mode 100644 index 000000000..c976c374e --- /dev/null +++ b/modules/nf-core/epang/place/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "test-epang-place-stub": { + "content": [ + [ + + ], + "test.epa_info.log", + [ + "versions.yml:md5,5f9d61fa794083a919e0f547f353ad3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T12:09:20.297486" + }, + "test-epang-place": { + "content": [ + "test.epa_result.jplace.gz", + "test.epa_info.log", + [ + "versions.yml:md5,5f9d61fa794083a919e0f547f353ad3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T12:09:15.588794" + } +} \ No newline at end of file diff --git a/modules/nf-core/epang/place/tests/nextflow.config b/modules/nf-core/epang/place/tests/nextflow.config new file mode 100644 index 000000000..9cc672a46 --- /dev/null +++ b/modules/nf-core/epang/place/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + // This is a very clear techniuqe to pass sample specific optional value arguments. + // I do not think this violates any nf-core guidelines + ext.args = { " --model ${meta.model} " } +} diff --git a/modules/nf-core/epang/split/environment.yml b/modules/nf-core/epang/split/environment.yml index 5dd7ff771..6faa55f39 100644 --- a/modules/nf-core/epang/split/environment.yml +++ b/modules/nf-core/epang/split/environment.yml @@ -1,7 +1,5 @@ -name: epang_split channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::epa-ng=0.3.8 diff --git a/modules/nf-core/epang/split/main.nf b/modules/nf-core/epang/split/main.nf index 67b534911..7a83f3da8 100644 --- a/modules/nf-core/epang/split/main.nf +++ b/modules/nf-core/epang/split/main.nf @@ -34,4 +34,18 @@ process EPANG_SPLIT { epang: \$(echo \$(epa-ng --version 2>&1) | sed 's/^EPA-ng v//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_query.fasta + gzip ${prefix}_query.fasta + touch ${prefix}_reference.fasta + gzip ${prefix}_reference.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + epang: \$(echo \$(epa-ng --version 2>&1) | sed 's/^EPA-ng v//') + END_VERSIONS + """ } diff --git a/modules/nf-core/epang/split/meta.yml b/modules/nf-core/epang/split/meta.yml index 5af4234f0..c91b1de5f 100644 --- a/modules/nf-core/epang/split/meta.yml +++ b/modules/nf-core/epang/split/meta.yml @@ -11,38 +11,49 @@ tools: documentation: "https://github.com/Pbdas/epa-ng/wiki/Full-Stack-Example" tool_dev_url: "https://github.com/Pbdas/epa-ng" doi: "10.1093/sysbio/syy054" - licence: "['GNU Affero General Public License v3.0']" + licence: ["GNU Affero General Public License v3.0"] + identifier: "" input: # Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - refaln: - type: file - description: reference alignment in any supported format including phylip and fasta, may be gzipped - pattern: "*.{faa,fna,fa,fasta,fa,phy,aln,alnfaa,alnfna,alnfa,mfa,faa.gz,fna.gz,fa.gz,fasta.gz,fa.gz,phy.gz,aln.gz,alnfaa.gz,alnfna.gz,alnfa.gz,mfa.gz}" - - fullaln: - type: file - description: full alignment in any supported format to split into reference and query alignments - pattern: "*.{faa,fna,fa,fasta,fa,phy,aln,alnfaa,alnfna,alnfa,mfa,faa.gz,fna.gz,fa.gz,fasta.gz,fa.gz,phy.gz,aln.gz,alnfaa.gz,alnfna.gz,alnfa.gz,mfa.gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - refaln: + type: file + description: reference alignment in any supported format including phylip and + fasta, may be gzipped + pattern: "*.{faa,fna,fa,fasta,fa,phy,aln,alnfaa,alnfna,alnfa,mfa,faa.gz,fna.gz,fa.gz,fasta.gz,fa.gz,phy.gz,aln.gz,alnfaa.gz,alnfna.gz,alnfa.gz,mfa.gz}" + - fullaln: + type: file + description: full alignment in any supported format to split into reference + and query alignments + pattern: "*.{faa,fna,fa,fasta,fa,phy,aln,alnfaa,alnfna,alnfa,mfa,faa.gz,fna.gz,fa.gz,fasta.gz,fa.gz,phy.gz,aln.gz,alnfaa.gz,alnfna.gz,alnfa.gz,mfa.gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - query: - type: file - description: query sequence alignment in gzipped fasta format + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*query.fasta.gz": + type: file + description: query sequence alignment in gzipped fasta format - reference: - type: file - description: reference sequence alignment in gzipped fasta format + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*reference.fasta.gz": + type: file + description: reference sequence alignment in gzipped fasta format - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" maintainers: diff --git a/modules/nf-core/epang/split/tests/main.nf.test b/modules/nf-core/epang/split/tests/main.nf.test new file mode 100644 index 000000000..b51a66439 --- /dev/null +++ b/modules/nf-core/epang/split/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process EPANG_SPLIT" + script "../main.nf" + process "EPANG_SPLIT" + + tag "modules" + tag "modules_nfcore" + tag "epang" + tag "epang/split" + tag "mafft/align" + + setup { + run("MAFFT_ALIGN") { + script "../../../mafft/align/main.nf" + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/PF14720_seed.alnfaa', checkIfExists: true) + ] + input[1] = [[id:"test2"], + file('https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/PF14720_3_sequences.faa', checkIfExists: true) + ] + input[2] = [[:],[]] + input[3] = [[:],[]] + input[4] = [[:],[]] + input[5] = [[:],[]] + input[6] = false + """ + } + } + } + + + test("PF14720") { + + when { + process { + """ + input[0] = MAFFT_ALIGN.out.fas.map { + [ + [ id:'test'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/PF14720_seed.alnfaa', checkIfExists: true), + it[1] + ] + } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("PF14720 - stub") { + options "-stub" + when { + process { + """ + input[0] = MAFFT_ALIGN.out.fas.map { + [ + [ id:'test'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/PF14720_seed.alnfaa', checkIfExists: true), + it[1] + ] + } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/epang/split/tests/main.nf.test.snap b/modules/nf-core/epang/split/tests/main.nf.test.snap new file mode 100644 index 000000000..f7f46f17c --- /dev/null +++ b/modules/nf-core/epang/split/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "PF14720 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_query.fasta.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_reference.fasta.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,83ff91cc742cb443b1793f2b28dbf21a" + ], + "query": [ + [ + { + "id": "test" + }, + "test_query.fasta.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reference": [ + [ + { + "id": "test" + }, + "test_reference.fasta.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,83ff91cc742cb443b1793f2b28dbf21a" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T15:15:38.13000903" + }, + "PF14720": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.query.fasta.gz:md5,ff1159ff8a5d2587fb20309a81da3c29" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.reference.fasta.gz:md5,e7753871101c48c42fc417ce0e9b398a" + ] + ], + "2": [ + "versions.yml:md5,83ff91cc742cb443b1793f2b28dbf21a" + ], + "query": [ + [ + { + "id": "test" + }, + "test.query.fasta.gz:md5,ff1159ff8a5d2587fb20309a81da3c29" + ] + ], + "reference": [ + [ + { + "id": "test" + }, + "test.reference.fasta.gz:md5,e7753871101c48c42fc417ce0e9b398a" + ] + ], + "versions": [ + "versions.yml:md5,83ff91cc742cb443b1793f2b28dbf21a" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T14:47:33.857838942" + } +} \ No newline at end of file diff --git a/modules/nf-core/epang/split/tests/tags.yml b/modules/nf-core/epang/split/tests/tags.yml new file mode 100644 index 000000000..11a896284 --- /dev/null +++ b/modules/nf-core/epang/split/tests/tags.yml @@ -0,0 +1,2 @@ +epang/split: + - "modules/nf-core/epang/split/**" diff --git a/modules/nf-core/gappa/examineassign/environment.yml b/modules/nf-core/gappa/examineassign/environment.yml index 4930e7245..0059e1a49 100644 --- a/modules/nf-core/gappa/examineassign/environment.yml +++ b/modules/nf-core/gappa/examineassign/environment.yml @@ -1,7 +1,5 @@ -name: gappa_examineassign channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gappa=0.8.0 diff --git a/modules/nf-core/gappa/examineassign/main.nf b/modules/nf-core/gappa/examineassign/main.nf index 940a61077..2b84cb1f1 100644 --- a/modules/nf-core/gappa/examineassign/main.nf +++ b/modules/nf-core/gappa/examineassign/main.nf @@ -39,4 +39,17 @@ process GAPPA_EXAMINEASSIGN { gappa: \$(echo \$(gappa --version 2>&1 | sed 's/v//' )) END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.profile.tsv + touch ${prefix}.labelled_tree.newick + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gappa: \$(echo \$(gappa --version 2>&1 | sed 's/v//' )) + END_VERSIONS + """ } diff --git a/modules/nf-core/gappa/examineassign/meta.yml b/modules/nf-core/gappa/examineassign/meta.yml index 1753b1811..12c1bfe62 100644 --- a/modules/nf-core/gappa/examineassign/meta.yml +++ b/modules/nf-core/gappa/examineassign/meta.yml @@ -12,42 +12,87 @@ tools: documentation: "https://github.com/lczech/gappa/wiki" tool_dev_url: "https://github.com/lczech/gappa" doi: "10.1093/bioinformatics/btaa070" - licence: "['GPL v3']" + licence: ["GPL v3"] + identifier: biotools:GAPPA input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - jplace: - type: file - description: jplace file output from phylogenetic placement, e.g. EPA-NG, gzipped or not - pattern: "*.{jplace,jplace.gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - jplace: + type: file + description: jplace file output from phylogenetic placement, e.g. EPA-NG, gzipped + or not + pattern: "*.{jplace,jplace.gz}" + - taxonomy: + type: file + description: taxonomy file output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - examineassign: - type: directory - description: Execution directory + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ./.: + type: directory + description: Execution directory - profile: - type: file - description: profile tsv file - pattern: "*profile.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*profile.tsv": + type: file + description: profile tsv file + pattern: "*profile.tsv" - labelled_tree: - type: file - description: labelled tree in newick format - pattern: "*labelled_tree.newick" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*labelled_tree.newick": + type: file + description: labelled tree in newick format + pattern: "*labelled_tree.newick" - per_query: - type: file - description: per query taxonomy assignments in tsv format - pattern: "*per_query.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*per_query.tsv": + type: file + description: per query taxonomy assignments in tsv format + pattern: "*per_query.tsv" + - krona: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*krona.profile": + type: file + description: krona profile file + pattern: "*krona.profile" + - sativa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*sativa.tsv": + type: file + description: sativa output file + pattern: "*sativa.tsv" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" maintainers: diff --git a/modules/nf-core/gappa/examineassign/tests/main.nf.test b/modules/nf-core/gappa/examineassign/tests/main.nf.test new file mode 100644 index 000000000..f138fef34 --- /dev/null +++ b/modules/nf-core/gappa/examineassign/tests/main.nf.test @@ -0,0 +1,79 @@ + +nextflow_process { + + name "Test Process GAPPA_EXAMINEASSIGN" + script "../main.nf" + process "GAPPA_EXAMINEASSIGN" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "gappa" + tag "gappa/examineassign" + + test("test-gappa-examineassign") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/epa_result.jplace.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/gappa_taxonomy.tsv', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.examineassign }, // The entire work folder + { assert snapshot( + process.out.profile, + process.out.labelled_tree, + process.out.per_query, + process.out.krona, + process.out.sativa, + process.out.versions + ).match() + } + ) + } + } + + test("test-gappa-examineassign-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/epa_result.jplace.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/gappa_taxonomy.tsv', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.examineassign }, // The entire work folder + { assert snapshot( + process.out.profile, + process.out.labelled_tree, + process.out.per_query, + process.out.krona, + process.out.sativa, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/gappa/examineassign/tests/main.nf.test.snap b/modules/nf-core/gappa/examineassign/tests/main.nf.test.snap new file mode 100644 index 000000000..8b0917bd1 --- /dev/null +++ b/modules/nf-core/gappa/examineassign/tests/main.nf.test.snap @@ -0,0 +1,91 @@ +{ + "test-gappa-examineassign": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.profile.tsv:md5,ce604fa03604462358b69cfbff7c1593" + ] + ], + [ + [ + { + "id": "test" + }, + "test.labelled_tree.newick:md5,39de8a46303ea757f6eb94478db1ffe0" + ] + ], + [ + [ + { + "id": "test" + }, + "test.per_query.tsv:md5,16e1fbcc5f79588a0ed21e495cdd91ee" + ] + ], + [ + [ + { + "id": "test" + }, + "test.krona.profile:md5,19110ac998eaa7532f2a92772cd35aa6" + ] + ], + [ + [ + { + "id": "test" + }, + "test.sativa.tsv:md5,28020fc2f8440fabb5ae83b0c7114cdb" + ] + ], + [ + "versions.yml:md5,9f3e0de8bc5115c34aaff1751a4d8384" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T21:14:07.337881" + }, + "test-gappa-examineassign-stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.profile.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test" + }, + "test.labelled_tree.newick:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,9f3e0de8bc5115c34aaff1751a4d8384" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T21:14:11.960052" + } +} \ No newline at end of file diff --git a/modules/nf-core/gappa/examineassign/tests/nextflow.config b/modules/nf-core/gappa/examineassign/tests/nextflow.config new file mode 100644 index 000000000..549445958 --- /dev/null +++ b/modules/nf-core/gappa/examineassign/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.when = { taxonomy } + ext.args = "--per-query-results --krona --sativa" +} diff --git a/modules/nf-core/gappa/examinegraft/environment.yml b/modules/nf-core/gappa/examinegraft/environment.yml index c22460d20..0059e1a49 100644 --- a/modules/nf-core/gappa/examinegraft/environment.yml +++ b/modules/nf-core/gappa/examinegraft/environment.yml @@ -1,7 +1,5 @@ -name: gappa_examinegraft channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gappa=0.8.0 diff --git a/modules/nf-core/gappa/examinegraft/main.nf b/modules/nf-core/gappa/examinegraft/main.nf index 3efed466c..b01845db3 100644 --- a/modules/nf-core/gappa/examinegraft/main.nf +++ b/modules/nf-core/gappa/examinegraft/main.nf @@ -34,4 +34,15 @@ process GAPPA_EXAMINEGRAFT { gappa: \$(echo \$(gappa --version 2>&1 | sed 's/v//' )) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.newick + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gappa: \$(echo \$(gappa --version 2>&1 | sed 's/v//' )) + END_VERSIONS + """ } diff --git a/modules/nf-core/gappa/examinegraft/meta.yml b/modules/nf-core/gappa/examinegraft/meta.yml index 9dcb56bcc..f24b732e0 100644 --- a/modules/nf-core/gappa/examinegraft/meta.yml +++ b/modules/nf-core/gappa/examinegraft/meta.yml @@ -1,7 +1,9 @@ name: "gappa_examinegraft" -description: grafts query sequences from phylogenetic placement on the reference tree +description: Grafts query sequences from phylogenetic placement on the reference tree keywords: - sort + - graft + - phylogeny tools: - "gappa": description: "Genesis Applications for Phylogenetic Placement Analysis" @@ -9,31 +11,35 @@ tools: documentation: "https://github.com/lczech/gappa/wiki" tool_dev_url: "https://github.com/lczech/gappa" doi: "10.1093/bioinformatics/btaa070" - licence: "['GPL v3']" + licence: ["GPL v3"] + identifier: biotools:GAPPA input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - jplace: - type: file - description: jplace file output from phylogenetic placement, e.g. EPA-NG, gzipped or not - pattern: "*.{jplace,jplace.gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - jplace: + type: file + description: jplace file output from phylogenetic placement, e.g. EPA-NG, gzipped + or not + pattern: "*.{jplace,jplace.gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - newick: - type: file - description: phylogenetic tree file in newick format - pattern: "*.newick" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.newick": + type: file + description: phylogenetic tree file in newick format + pattern: "*.newick" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" maintainers: diff --git a/modules/nf-core/gappa/examinegraft/tests/main.nf.test b/modules/nf-core/gappa/examinegraft/tests/main.nf.test new file mode 100644 index 000000000..220ab4957 --- /dev/null +++ b/modules/nf-core/gappa/examinegraft/tests/main.nf.test @@ -0,0 +1,58 @@ + +nextflow_process { + + name "Test Process GAPPA_EXAMINEGRAFT" + script "../main.nf" + process "GAPPA_EXAMINEGRAFT" + + tag "modules" + tag "modules_nfcore" + tag "gappa" + tag "gappa/examinegraft" + + test("test-gappa-examinegraft") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/epa_result.jplace.gz', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-gappa-examinegraft-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/epa_result.jplace.gz', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/gappa/examinegraft/tests/main.nf.test.snap b/modules/nf-core/gappa/examinegraft/tests/main.nf.test.snap new file mode 100644 index 000000000..e60d15f83 --- /dev/null +++ b/modules/nf-core/gappa/examinegraft/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test-gappa-examinegraft-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.newick:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,ea0faa432b03d6ccd726d3ba249ea5b5" + ], + "newick": [ + [ + { + "id": "test" + }, + "test.newick:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ea0faa432b03d6ccd726d3ba249ea5b5" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T12:19:44.870566" + }, + "test-gappa-examinegraft": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.epa_result.newick:md5,97e54b6a9576a78b774fd63a050e5474" + ] + ], + "1": [ + "versions.yml:md5,ea0faa432b03d6ccd726d3ba249ea5b5" + ], + "newick": [ + [ + { + "id": "test" + }, + "test.epa_result.newick:md5,97e54b6a9576a78b774fd63a050e5474" + ] + ], + "versions": [ + "versions.yml:md5,ea0faa432b03d6ccd726d3ba249ea5b5" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T12:19:40.543312" + } +} \ No newline at end of file diff --git a/modules/nf-core/gappa/examineheattree/environment.yml b/modules/nf-core/gappa/examineheattree/environment.yml index a5ee8d48f..0059e1a49 100644 --- a/modules/nf-core/gappa/examineheattree/environment.yml +++ b/modules/nf-core/gappa/examineheattree/environment.yml @@ -1,7 +1,5 @@ -name: gappa_examineheattree channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gappa=0.8.0 diff --git a/modules/nf-core/gappa/examineheattree/main.nf b/modules/nf-core/gappa/examineheattree/main.nf index 27368f010..8c01ab92c 100644 --- a/modules/nf-core/gappa/examineheattree/main.nf +++ b/modules/nf-core/gappa/examineheattree/main.nf @@ -42,4 +42,17 @@ process GAPPA_EXAMINEHEATTREE { gappa: \$(echo \$(gappa --version 2>&1 | sed 's/v//' )) END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.colours.txt + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gappa: \$(echo \$(gappa --version 2>&1 | sed 's/v//' )) + END_VERSIONS + """ } diff --git a/modules/nf-core/gappa/examineheattree/meta.yml b/modules/nf-core/gappa/examineheattree/meta.yml index 35e0c1e4f..2e533d367 100644 --- a/modules/nf-core/gappa/examineheattree/meta.yml +++ b/modules/nf-core/gappa/examineheattree/meta.yml @@ -12,51 +12,85 @@ tools: documentation: "https://github.com/lczech/gappa/wiki" tool_dev_url: "https://github.com/lczech/gappa" doi: "10.1093/bioinformatics/btaa070" - licence: "['GPL v3']" + licence: ["GPL v3"] + identifier: biotools:GAPPA input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - jplace: - type: file - description: jplace file output from phylogenetic placement, e.g. EPA-NG, gzipped or not - pattern: "*.{jplace,jplace.gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - jplace: + type: file + description: jplace file output from phylogenetic placement, e.g. EPA-NG, gzipped + or not + pattern: "*.{jplace,jplace.gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - newick: - type: file - description: phylogenetic tree file in newick format - pattern: "*.newick" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.newick": + type: file + description: phylogenetic tree file in newick format + pattern: "*.newick" - nexus: - type: file - description: coloured phylogenetic tree file in nexus format - pattern: "*.nexus" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.nexus": + type: file + description: coloured phylogenetic tree file in nexus format + pattern: "*.nexus" - phyloxml: - type: file - description: coloured phylogenetic tree file in phyloxml format - pattern: "*.phyloxml" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.phyloxml": + type: file + description: coloured phylogenetic tree file in phyloxml format + pattern: "*.phyloxml" - svg: - type: file - description: coloured phylogenetic tree file in svg format - pattern: "*.svg" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.svg": + type: file + description: coloured phylogenetic tree file in svg format + pattern: "*.svg" - colours: - type: file - description: colours used in plot - pattern: "*.colours.txt" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.colours.txt": + type: file + description: colours used in plot + pattern: "*.colours.txt" - log: - type: file - description: log file from the run - pattern: "*.log" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.log": + type: file + description: log file from the run + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" maintainers: diff --git a/modules/nf-core/gappa/examineheattree/tests/main.nf.test b/modules/nf-core/gappa/examineheattree/tests/main.nf.test new file mode 100644 index 000000000..fbb2758d0 --- /dev/null +++ b/modules/nf-core/gappa/examineheattree/tests/main.nf.test @@ -0,0 +1,68 @@ + +nextflow_process { + + name "Test Process GAPPA_EXAMINEHEATTREE" + script "../main.nf" + process "GAPPA_EXAMINEHEATTREE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "gappa" + tag "gappa/examineheattree" + + test("test-gappa-examineheattree") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/epa_result.jplace.gz', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.newick, + process.out.nexus, + process.out.phyloxml, + file(process.out.svg[0][1]).readLines()[3..7], + file(process.out.colours[0][1]).readLines()[3..5], + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-gappa-examineheattree-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/epa_result.jplace.gz', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/gappa/examineheattree/tests/main.nf.test.snap b/modules/nf-core/gappa/examineheattree/tests/main.nf.test.snap new file mode 100644 index 000000000..7fcc3929c --- /dev/null +++ b/modules/nf-core/gappa/examineheattree/tests/main.nf.test.snap @@ -0,0 +1,124 @@ +{ + "test-gappa-examineheattree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tree.newick:md5,f055e11c9b44b6d3afb502aba99d9579" + ] + ], + [ + [ + { + "id": "test" + }, + "test.tree.nexus:md5,e1cfdcb63f3a751677d0e78fb6b9230a" + ] + ], + [ + [ + { + "id": "test" + }, + "test.tree.phyloxml:md5,ea35b07e4fd8d174fd6b2f204629cc4e" + ] + ], + [ + " ", + " ", + " ", + " ", + " " + ], + [ + " At 0.600: Label '0.6', Color #9a3398", + " At 0.800: Label '0.8', Color #4d1a4c", + " At 1.000: Label '1', Color #000000" + ], + "test.log", + [ + "versions.yml:md5,b8a834ae6c95b7aefdc5e9dc681adbc2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T21:05:10.885627" + }, + "test-gappa-examineheattree-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test" + }, + "test.colours.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,b8a834ae6c95b7aefdc5e9dc681adbc2" + ], + "colours": [ + [ + { + "id": "test" + }, + "test.colours.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "newick": [ + + ], + "nexus": [ + + ], + "phyloxml": [ + + ], + "svg": [ + + ], + "versions": [ + "versions.yml:md5,b8a834ae6c95b7aefdc5e9dc681adbc2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T21:06:48.684352" + } +} \ No newline at end of file diff --git a/modules/nf-core/gappa/examineheattree/tests/nextflow.config b/modules/nf-core/gappa/examineheattree/tests/nextflow.config new file mode 100644 index 000000000..476328abe --- /dev/null +++ b/modules/nf-core/gappa/examineheattree/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: 'GAPPA_EXAMINEHEATTREE' { + ext.args = "--write-newick-tree --write-nexus-tree --write-phyloxml-tree --write-svg-tree" + prefix = { "${meta.id}.heat-tree." } + } +} diff --git a/modules/nf-core/hmmer/eslalimask/environment.yml b/modules/nf-core/hmmer/eslalimask/environment.yml index ed14ff863..7c62eac7e 100644 --- a/modules/nf-core/hmmer/eslalimask/environment.yml +++ b/modules/nf-core/hmmer/eslalimask/environment.yml @@ -1,7 +1,5 @@ -name: hmmer_eslalimask channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::hmmer=3.3.2 diff --git a/modules/nf-core/hmmer/eslalimask/meta.yml b/modules/nf-core/hmmer/eslalimask/meta.yml index c4a9a4ccf..4d16ca5de 100644 --- a/modules/nf-core/hmmer/eslalimask/meta.yml +++ b/modules/nf-core/hmmer/eslalimask/meta.yml @@ -11,76 +11,87 @@ tools: documentation: http://hmmer.org/documentation.html doi: "10.1371/journal.pcbi.1002195" licence: ["BSD-3-Clause"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - unmaskedaln: - type: file - description: multiple sequence alignment, Stockholm or other formats - pattern: "*" - - fmask_rf: - type: val - description: Flag to output optional file with final mask of non-gap RF len - - fmask_all: - type: val - description: Flag to output optional file with final mask of full aln len - - gmask_rf: - type: val - description: Flag to output optional file gap-based 0/1 mask of non-gap RF len - - gmask_all: - type: val - description: Flag to output optional file gap-based 0/1 mask of full aln len - - pmask_rf: - type: val - description: Flag to output optional file with PP-based 0/1 mask of non-gap RF len - - pmask_all: - type: val - description: Flag to output optional file with PP-based 0/1 mask of full aln len - - maskfile: - type: file - description: mask file, see program documentation - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - unmaskedaln: + type: file + description: multiple sequence alignment, Stockholm or other formats + pattern: "*" + - fmask_rf: + type: boolean + description: Flag to output optional file with final mask of non-gap RF len + - fmask_all: + type: boolean + description: Flag to output optional file with final mask of full aln len + - gmask_rf: + type: boolean + description: Flag to output optional file gap-based 0/1 mask of non-gap RF len + - gmask_all: + type: boolean + description: Flag to output optional file gap-based 0/1 mask of full aln len + - pmask_rf: + type: boolean + description: Flag to output optional file with PP-based 0/1 mask of non-gap + RF len + - pmask_all: + type: boolean + description: Flag to output optional file with PP-based 0/1 mask of full aln + len + - - maskfile: + type: file + description: mask file, see program documentation + pattern: "*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - maskedaln: - type: file - description: Masked alignment in gzipped Stockholm format - pattern: "*.sthlm.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.masked.sthlm.gz": + type: file + description: Masked alignment in gzipped Stockholm format + pattern: "*.sthlm.gz" - fmask_rf: - type: file - description: File with final mask of non-gap RF len - pattern: "*.fmask-rf.gz" + - "*.fmask-rf.gz": + type: file + description: File with final mask of non-gap RF len + pattern: "*.fmask-rf.gz" - fmask_all: - type: file - description: File with final mask of full aln len - pattern: "*.fmask-all.gz" + - "*.fmask-all.gz": + type: file + description: File with final mask of full aln len + pattern: "*.fmask-all.gz" - gmask_rf: - type: file - description: File with gap-based 0/1 mask of non-gap RF len - pattern: "*.gmask-rf.gz" + - "*.gmask-rf.gz": + type: file + description: File with gap-based 0/1 mask of non-gap RF len + pattern: "*.gmask-rf.gz" - gmask_all: - type: file - description: File with gap-based 0/1 mask of full aln len - pattern: "*.gmask-all.gz" + - "*.gmask-all.gz": + type: file + description: File with gap-based 0/1 mask of full aln len + pattern: "*.gmask-all.gz" - pmask_rf: - type: file - description: File with PP-based 0/1 mask of non-gap RF len - pattern: "*.pmask-rf.gz" + - "*.pmask-rf.gz": + type: file + description: File with PP-based 0/1 mask of non-gap RF len + pattern: "*.pmask-rf.gz" - pmask_all: - type: file - description: File with PP-based 0/1 mask of full aln len - pattern: "*.pmask-all.gz" + - "*.pmask-all.gz": + type: file + description: File with PP-based 0/1 mask of full aln len + pattern: "*.pmask-all.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" maintainers: diff --git a/modules/nf-core/hmmer/eslreformat/environment.yml b/modules/nf-core/hmmer/eslreformat/environment.yml index a847b7d31..7c62eac7e 100644 --- a/modules/nf-core/hmmer/eslreformat/environment.yml +++ b/modules/nf-core/hmmer/eslreformat/environment.yml @@ -1,7 +1,5 @@ -name: hmmer_eslreformat channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::hmmer=3.3.2 diff --git a/modules/nf-core/hmmer/eslreformat/meta.yml b/modules/nf-core/hmmer/eslreformat/meta.yml index 7e530d59a..9aa209001 100644 --- a/modules/nf-core/hmmer/eslreformat/meta.yml +++ b/modules/nf-core/hmmer/eslreformat/meta.yml @@ -1,7 +1,11 @@ name: "hmmer_eslreformat" -description: reformats sequence files, see HMMER documentation for details. The module requires that the format is specified in ext.args in a config file, and that this comes last. See the tools help for possible values. +description: reformats sequence files, see HMMER documentation for details. The module + requires that the format is specified in ext.args in a config file, and that this + comes last. See the tools help for possible values. keywords: - sort + - hmmer + - reformat tools: - "hmmer": description: "Biosequence analysis using profile hidden Markov models" @@ -9,30 +13,33 @@ tools: documentation: http://hmmer.org/documentation.html doi: "10.1371/journal.pcbi.1002195" licence: ["BSD-3-Clause"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - seqfile: - type: file - description: Sequences, aligned or not, in any supported format - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - seqfile: + type: file + description: Sequences, aligned or not, in any supported format + pattern: "*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - seqreformated: - type: file - description: Reformated sequence file - pattern: "*.*.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.*.gz": + type: file + description: Reformatted sequence file + pattern: "*.*.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" maintainers: diff --git a/modules/nf-core/hmmer/hmmalign/environment.yml b/modules/nf-core/hmmer/hmmalign/environment.yml index 9fa6bdafc..c5ddec5d9 100644 --- a/modules/nf-core/hmmer/hmmalign/environment.yml +++ b/modules/nf-core/hmmer/hmmalign/environment.yml @@ -1,7 +1,5 @@ -name: hmmer_hmmalign channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::hmmer=3.3.2 + - bioconda::hmmer=3.4 diff --git a/modules/nf-core/hmmer/hmmalign/main.nf b/modules/nf-core/hmmer/hmmalign/main.nf index 00ae8da2b..39b17c787 100644 --- a/modules/nf-core/hmmer/hmmalign/main.nf +++ b/modules/nf-core/hmmer/hmmalign/main.nf @@ -4,16 +4,16 @@ process HMMER_HMMALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1' : - 'biocontainers/hmmer:3.3.2--h1b792b2_1' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/07/07c4cbd91c4459dc86b13b5cd799cacba96b27d66c276485550d299c7a4c6f8a/data' : + 'community.wave.seqera.io/library/hmmer:3.4--cb5d2dd2e85974ca' }" input: tuple val(meta), path(fasta) path hmm output: - tuple val(meta), path("*.sthlm.gz"), emit: sthlm - path "versions.yml" , emit: versions + tuple val(meta), path("*.sto.gz"), emit: sto + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,7 +25,18 @@ process HMMER_HMMALIGN { hmmalign \\ $args \\ $hmm \\ - $fasta | gzip -c > ${prefix}.sthlm.gz + $fasta | gzip -c > ${prefix}.sto.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmalign -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo | gzip > ${prefix}.sto.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/hmmer/hmmalign/meta.yml b/modules/nf-core/hmmer/hmmalign/meta.yml index c1ac8f408..fb16ba682 100644 --- a/modules/nf-core/hmmer/hmmalign/meta.yml +++ b/modules/nf-core/hmmer/hmmalign/meta.yml @@ -1,7 +1,12 @@ name: hmmer_hmmalign -description: hmmalign from the HMMER suite aligns a number of sequences to an HMM profile +description: hmmalign from the HMMER suite aligns a number of sequences to an HMM + profile keywords: - alignment + - HMMER + - profile + - amino acid + - nucleotide tools: - hmmer: description: Biosequence analysis using profile hidden Markov models @@ -9,37 +14,41 @@ tools: documentation: http://hmmer.org/documentation.html doi: "10.1371/journal.pcbi.1002195" licence: ["BSD-3-Clause"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - fasta: - type: file - description: Amino acid or nucleotide gzipped compressed fasta file - pattern: "*.{fna.gz,faa.gz,fasta.gz,fa.gz}" - - hmm: - type: file - description: A gzipped HMM file - pattern: "*.hmm.gz" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fasta: + type: file + description: Amino acid or nucleotide gzipped compressed fasta file + pattern: "*.{fna.gz,faa.gz,fasta.gz,fa.gz}" + - - hmm: + type: file + description: A gzipped HMM file + pattern: "*.hmm.gz" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + - sto: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sto.gz": + type: file + description: Multiple alignment in gzipped Stockholm format + pattern: "*.sto.gz" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - sthlm: - type: file - description: Multiple alignment in gzipped Stockholm format - pattern: "*.sthlm.gz" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" - "@jfy133" maintainers: - "@erikrikarddaniel" - "@jfy133" + - "@vagkaratzas" diff --git a/modules/nf-core/hmmer/hmmalign/tests/main.nf.test b/modules/nf-core/hmmer/hmmalign/tests/main.nf.test new file mode 100644 index 000000000..7b0368fc6 --- /dev/null +++ b/modules/nf-core/hmmer/hmmalign/tests/main.nf.test @@ -0,0 +1,58 @@ + +nextflow_process { + + name "Test Process HMMER_HMMALIGN" + script "../main.nf" + process "HMMER_HMMALIGN" + + tag "modules" + tag "modules_nfcore" + tag "hmmer" + tag "hmmer/hmmalign" + + test("test-hmmer-hmmalign") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-hmmer-hmmalign-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/hmmer/hmmalign/tests/main.nf.test.snap b/modules/nf-core/hmmer/hmmalign/tests/main.nf.test.snap new file mode 100644 index 000000000..ae65cacb1 --- /dev/null +++ b/modules/nf-core/hmmer/hmmalign/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test-hmmer-hmmalign": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.sto.gz:md5,4ae989d5ade2aaae9578cb88ba031e8f" + ] + ], + "1": [ + "versions.yml:md5,601e13e7a1e6057766d862a828d501c4" + ], + "sto": [ + [ + { + "id": "test" + }, + "test.sto.gz:md5,4ae989d5ade2aaae9578cb88ba031e8f" + ] + ], + "versions": [ + "versions.yml:md5,601e13e7a1e6057766d862a828d501c4" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-13T12:37:26.551497424" + }, + "test-hmmer-hmmalign-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.sto.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,601e13e7a1e6057766d862a828d501c4" + ], + "sto": [ + [ + { + "id": "test" + }, + "test.sto.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,601e13e7a1e6057766d862a828d501c4" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-13T12:37:32.244343836" + } +} \ No newline at end of file diff --git a/modules/nf-core/hmmer/hmmbuild/environment.yml b/modules/nf-core/hmmer/hmmbuild/environment.yml index 1957ad5c6..c5ddec5d9 100644 --- a/modules/nf-core/hmmer/hmmbuild/environment.yml +++ b/modules/nf-core/hmmer/hmmbuild/environment.yml @@ -1,7 +1,5 @@ -name: hmmer_hmmbuild channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::hmmer=3.3.2 + - bioconda::hmmer=3.4 diff --git a/modules/nf-core/hmmer/hmmbuild/main.nf b/modules/nf-core/hmmer/hmmbuild/main.nf index 3e3022fe6..8eed7fcd7 100644 --- a/modules/nf-core/hmmer/hmmbuild/main.nf +++ b/modules/nf-core/hmmer/hmmbuild/main.nf @@ -4,8 +4,8 @@ process HMMER_HMMBUILD { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h87f3376_2': - 'biocontainers/hmmer:3.3.2--h1b792b2_1' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/07/07c4cbd91c4459dc86b13b5cd799cacba96b27d66c276485550d299c7a4c6f8a/data' : + 'community.wave.seqera.io/library/hmmer:3.4--cb5d2dd2e85974ca' }" input: tuple val(meta), path(alignment) @@ -41,4 +41,16 @@ process HMMER_HMMBUILD { hmmer: \$(echo \$(hmmbuild -h | grep HMMER | sed 's/# HMMER //' | sed 's/ .*//' 2>&1)) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo | gzip > ${prefix}.hmm.gz + touch ${prefix}.hmmbuild.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(echo \$(hmmbuild -h | grep HMMER | sed 's/# HMMER //' | sed 's/ .*//' 2>&1)) + END_VERSIONS + """ } diff --git a/modules/nf-core/hmmer/hmmbuild/meta.yml b/modules/nf-core/hmmer/hmmbuild/meta.yml index 4bf6b1cfe..9be14fe9a 100644 --- a/modules/nf-core/hmmer/hmmbuild/meta.yml +++ b/modules/nf-core/hmmer/hmmbuild/meta.yml @@ -13,30 +13,43 @@ tools: documentation: "http://hmmer.org/documentation.html" tool_dev_url: "https://github.com/EddyRivasLab/hmmer" doi: "10.1371/journal.pcbi.1002195" - licence: "['BSD']" + licence: ["BSD"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - alignment: - type: file - description: multiple sequence alignment in fasta, clustal, stockholm or phylip format - pattern: "*" - - mxfile: - type: file - description: read substitution score matrix, for use when building profiles from single sequences (--singlemx option) - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - alignment: + type: file + description: multiple sequence alignment in fasta, clustal, stockholm or phylip + format + pattern: "*" + - - mxfile: + type: file + description: read substitution score matrix, for use when building profiles + from single sequences (--singlemx option) + pattern: "*" output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - hmm: - type: file - description: Gzipped HMM file - pattern: "*.{hmm.gz}" + - meta: + type: file + description: Gzipped HMM file + pattern: "*.{hmm.gz}" + - "*.hmm.gz": + type: file + description: Gzipped HMM file + pattern: "*.{hmm.gz}" + - hmmbuildout: + - "*.hmmbuild.txt": + type: file + description: HMM build output + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" maintainers: diff --git a/modules/nf-core/hmmer/hmmbuild/tests/main.nf.test b/modules/nf-core/hmmer/hmmbuild/tests/main.nf.test new file mode 100644 index 000000000..635f5b661 --- /dev/null +++ b/modules/nf-core/hmmer/hmmbuild/tests/main.nf.test @@ -0,0 +1,66 @@ + +nextflow_process { + + name "Test Process HMMER_HMMBUILD" + script "../main.nf" + process "HMMER_HMMBUILD" + + tag "modules" + tag "modules_nfcore" + tag "hmmer" + tag "hmmer/hmmbuild" + + test("test-hmmer-hmmbuild") { + + when { + process { + """ + input[0] = [ + [ id: 'PF14720' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/hmmer/PF14720_seed.alnfaa.gz', checkIfExists: true) + ] + input[1] = [] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.hmmbuildout[0]).text.contains('CPU time:') }, + { assert snapshot( + file(process.out.hmm[0][1]).name, // unstable + file(process.out.hmmbuildout[0]).name, // unstable + process.out.versions + ).match() + } + ) + } + } + + test("test-hmmer-hmmbuild-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id: 'PF14720' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/hmmer/PF14720_seed.alnfaa.gz', checkIfExists: true) + ] + input[1] = [] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/hmmer/hmmbuild/tests/main.nf.test.snap b/modules/nf-core/hmmer/hmmbuild/tests/main.nf.test.snap new file mode 100644 index 000000000..79317b89e --- /dev/null +++ b/modules/nf-core/hmmer/hmmbuild/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "test-hmmer-hmmbuild-stub": { + "content": [ + { + "0": [ + [ + { + "id": "PF14720" + }, + "PF14720.hmm.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "PF14720.hmmbuild.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "versions.yml:md5,f8a0bffcbbc58404752849403812905b" + ], + "hmm": [ + [ + { + "id": "PF14720" + }, + "PF14720.hmm.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "hmmbuildout": [ + "PF14720.hmmbuild.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,f8a0bffcbbc58404752849403812905b" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-13T12:52:29.794123574" + }, + "test-hmmer-hmmbuild": { + "content": [ + "PF14720.hmm.gz", + "PF14720.hmmbuild.txt", + [ + "versions.yml:md5,f8a0bffcbbc58404752849403812905b" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-13T12:52:23.95935055" + } +} \ No newline at end of file diff --git a/modules/nf-core/hmmer/hmmrank/environment.yml b/modules/nf-core/hmmer/hmmrank/environment.yml new file mode 100644 index 000000000..fbce96e25 --- /dev/null +++ b/modules/nf-core/hmmer/hmmrank/environment.yml @@ -0,0 +1,11 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::r-base=4.2.3 + - conda-forge::r-data.table=1.14.8 + - conda-forge::r-dtplyr=1.3.1 + - conda-forge::r-stringi=1.8.4 + - conda-forge::r-stringr=1.5.1 + - conda-forge::r-tidyverse=2.0.0 diff --git a/modules/nf-core/hmmer/hmmrank/main.nf b/modules/nf-core/hmmer/hmmrank/main.nf new file mode 100644 index 000000000..4d34e2875 --- /dev/null +++ b/modules/nf-core/hmmer/hmmrank/main.nf @@ -0,0 +1,70 @@ +process HMMER_HMMRANK { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-b2ec1fea5791d428eebb8c8ea7409c350d31dada:a447f6b7a6afde38352b24c30ae9cd6e39df95c4-1' : + 'biocontainers/mulled-v2-b2ec1fea5791d428eebb8c8ea7409c350d31dada:a447f6b7a6afde38352b24c30ae9cd6e39df95c4-1' }" + + input: + tuple val(meta), path(tblouts) // HMMER_HMMSEARCH.out.target_summary + + output: + tuple val(meta), path("*.hmmrank.tsv.gz"), emit: hmmrank + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + #!/usr/bin/env Rscript + library(readr) + library(dplyr) + library(tidyr) + library(stringr) + + # Read all the tblout files + + read_fwf(c('${tblouts.join("','")}'), fwf_cols(content = c(1, NA)), col_types = cols(content = col_character()), comment='#', id = 'fname') %>% + filter(! str_detect(content, '^ *#')) %>% + separate( + content, + c('accno', 't0', 'profile_desc', 't1', 'evalue', 'score', 'bias', 'f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'rest'), + '\\\\s+', extra='merge', convert = FALSE + ) %>% + transmute(profile = basename(fname) %>% str_remove('${prefix}\\\\.') %>% str_remove('.tbl.gz'), accno, profile_desc, evalue = as.double(evalue), score = as.double(score)) %>% + # Group and calculate a rank based on score and evalue; let ties be resolved by profile in alphabetical order + group_by(accno) %>% + arrange(desc(score), evalue, profile) %>% + mutate(rank = row_number()) %>% + ungroup() %>% + write_tsv('${prefix}.hmmrank.tsv.gz') + + writeLines( + c( + "\\"${task.process}\\":", + paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")), + paste0(" tidyverse: ", packageVersion('tidyverse')) + ), + "versions.yml" + ) + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo \"profile\taccno\tprofile_desc\tevalue\tscore\trank\" | gzip -c > ${prefix}.hmmrank.tsv.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + R: 4.0 + tidyverse: 2.0 + END_VERSIONS + """ +} diff --git a/modules/nf-core/hmmer/hmmrank/meta.yml b/modules/nf-core/hmmer/hmmrank/meta.yml new file mode 100644 index 000000000..96f953618 --- /dev/null +++ b/modules/nf-core/hmmer/hmmrank/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "hmmer_hmmrank" +description: R script that scores output from multiple runs of hmmer/hmmsearch +keywords: + - hmmer + - hmmsearch + - rank +tools: + - "hmmer": + description: "Biosequence analysis using profile hidden Markov models" + homepage: http://hmmer.org/ + documentation: http://hmmer.org/documentation.html + tool_dev_url: https://github.com/EddyRivasLab/hmmer + doi: "10.1371/journal.pcbi.1002195" + licence: ["BSD"] + identifier: "" + - "R": + description: "A Language and Environment for Statistical Computing" + homepage: https://www.r-project.org/ + documentation: https://www.r-project.org/ + licence: ["GPL v2"] + identifier: "" + - "Tidyverse": + description: "Tidyverse: R packages for data science" + homepage: https://www.tidyverse.org/ + documentation: https://www.tidyverse.org/ + tool_dev_url: https://github.com/tidyverse + doi: "10.21105/joss.01686" + licence: ["MIT"] + identifier: "" + +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - tblouts: + type: file + description: table outputs from hmmsearch + pattern: "*.tsv.gz" +output: + #Only when we have meta + - hmmrank: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.hmmrank.tsv.gz": + type: file + description: TSV file with ranked hmmer results + pattern: "*.hmmrank.tsv.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" +maintainers: + - "@erikrikarddaniel" diff --git a/modules/nf-core/hmmer/hmmrank/tests/main.nf.test b/modules/nf-core/hmmer/hmmrank/tests/main.nf.test new file mode 100644 index 000000000..eb1b04895 --- /dev/null +++ b/modules/nf-core/hmmer/hmmrank/tests/main.nf.test @@ -0,0 +1,63 @@ +// nf-core modules test hmmer/hmmrank +nextflow_process { + + name "Test Process HMMER_HMMRANK" + script "../main.nf" + process "HMMER_HMMRANK" + + tag "modules" + tag "modules_nfcore" + tag "hmmer" + tag "hmmer/hmmfetch" + tag "hmmer/hmmsearch" + tag "hmmer/hmmrank" + + test("hmmrank - 16S") { + + setup { + run("HMMER_HMMFETCH") { + script "../../hmmfetch/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id: 'arc16s' ], file("https://raw.githubusercontent.com/tseemann/barrnap/master/db/arc.hmm")), + tuple([ id: 'bac16s' ], file("https://raw.githubusercontent.com/tseemann/barrnap/master/db/bac.hmm")) + ]) + input[1] = Channel.of('16S_rRNA').first() + input[2] = [] + input[3] = [] + """ + } + } + run("HMMER_HMMSEARCH") { + script "../../hmmsearch/main.nf" + process { + """ + input[0] = HMMER_HMMFETCH.out.hmm.map { + [ it[0], it[1], file("https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/domain_16s.fna"), false, true, false ] + } + """ + } + } + } + + when { + process { + """ + input[0] = HMMER_HMMSEARCH.out.target_summary + .collect { it[1] } + .map { [ [ id: '16S-test' ], it ] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match('16s_out') }, + { assert snapshot(process.out.hmmrank).match('16s_hmmrank') }, + { assert snapshot(process.out.versions).match('16s_versions') } + ) + } + } +} diff --git a/modules/nf-core/hmmer/hmmrank/tests/main.nf.test.snap b/modules/nf-core/hmmer/hmmrank/tests/main.nf.test.snap new file mode 100644 index 000000000..faaa61a4c --- /dev/null +++ b/modules/nf-core/hmmer/hmmrank/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "16s_versions": { + "content": [ + [ + "versions.yml:md5,32da212c74db43f1e08eccfe23f71b21" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-07T12:43:53.980488506" + }, + "16s_out": { + "content": [ + { + "0": [ + [ + { + "id": "16S-test" + }, + "16S-test.hmmrank.tsv.gz:md5,b4f48685b5b0127114c68dda279c0cbd" + ] + ], + "1": [ + "versions.yml:md5,32da212c74db43f1e08eccfe23f71b21" + ], + "hmmrank": [ + [ + { + "id": "16S-test" + }, + "16S-test.hmmrank.tsv.gz:md5,b4f48685b5b0127114c68dda279c0cbd" + ] + ], + "versions": [ + "versions.yml:md5,32da212c74db43f1e08eccfe23f71b21" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-07T12:43:53.95117124" + }, + "16s_hmmrank": { + "content": [ + [ + [ + { + "id": "16S-test" + }, + "16S-test.hmmrank.tsv.gz:md5,b4f48685b5b0127114c68dda279c0cbd" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-07T12:43:53.970807435" + } +} \ No newline at end of file diff --git a/modules/nf-core/hmmer/hmmrank/tests/tags.yml b/modules/nf-core/hmmer/hmmrank/tests/tags.yml new file mode 100644 index 000000000..9fdf6b2db --- /dev/null +++ b/modules/nf-core/hmmer/hmmrank/tests/tags.yml @@ -0,0 +1,2 @@ +hmmer/hmmrank: + - "modules/nf-core/hmmer/hmmrank/**" diff --git a/modules/nf-core/hmmer/hmmsearch/environment.yml b/modules/nf-core/hmmer/hmmsearch/environment.yml new file mode 100644 index 000000000..c5ddec5d9 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::hmmer=3.4 diff --git a/modules/nf-core/hmmer/hmmsearch/main.nf b/modules/nf-core/hmmer/hmmsearch/main.nf new file mode 100644 index 000000000..603a865e8 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/main.nf @@ -0,0 +1,70 @@ +process HMMER_HMMSEARCH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmmer:3.4--hdbdd923_1' : + 'biocontainers/hmmer:3.4--hdbdd923_1' }" + + input: + tuple val(meta), path(hmmfile), path(seqdb), val(write_align), val(write_target), val(write_domain) + + output: + tuple val(meta), path('*.txt.gz') , emit: output + tuple val(meta), path('*.sto.gz') , emit: alignments , optional: true + tuple val(meta), path('*.tbl.gz') , emit: target_summary, optional: true + tuple val(meta), path('*.domtbl.gz'), emit: domain_summary, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + output = "${prefix}.txt" + alignment = write_align ? "-A ${prefix}.sto" : '' + target_summary = write_target ? "--tblout ${prefix}.tbl" : '' + domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" : '' + """ + hmmsearch \\ + $args \\ + --cpu $task.cpus \\ + -o $output \\ + $alignment \\ + $target_summary \\ + $domain_summary \\ + $hmmfile \\ + $seqdb + + gzip --no-name *.txt \\ + ${write_align ? '*.sto' : ''} \\ + ${write_target ? '*.tbl' : ''} \\ + ${write_domain ? '*.domtbl' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.txt" + ${write_align ? "touch ${prefix}.sto" : ''} \\ + ${write_target ? "touch ${prefix}.tbl" : ''} \\ + ${write_domain ? "touch ${prefix}.domtbl" : ''} + + gzip --no-name *.txt \\ + ${write_align ? '*.sto' : ''} \\ + ${write_target ? '*.tbl' : ''} \\ + ${write_domain ? '*.domtbl' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/hmmer/hmmsearch/meta.yml b/modules/nf-core/hmmer/hmmsearch/meta.yml new file mode 100644 index 000000000..0e0786591 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/meta.yml @@ -0,0 +1,92 @@ +name: hmmer_hmmsearch +description: search profile(s) against a sequence database +keywords: + - Hidden Markov Model + - HMM + - hmmer + - hmmsearch +tools: + - hmmer: + description: Biosequence analysis using profile hidden Markov models + homepage: http://hmmer.org/ + documentation: http://hmmer.org/documentation.html + tool_dev_url: https://github.com/EddyRivasLab/hmmer + doi: "10.1371/journal.pcbi.1002195" + licence: ["BSD"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - hmmfile: + type: file + description: One or more HMM profiles created with hmmbuild + pattern: "*.{hmm,hmm.gz}" + - seqdb: + type: file + description: Database of sequences in FASTA format + pattern: "*.{fasta,fna,faa,fa,fasta.gz,fna.gz,faa.gz,fa.gz}" + - write_align: + type: boolean + description: Flag to save optional alignment output. Specify with 'true' to + save. + - write_target: + type: boolean + description: Flag to save optional per target summary. Specify with 'true' to + save. + - write_domain: + type: boolean + description: Flag to save optional per domain summary. Specify with 'true' to + save. +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt.gz": + type: file + description: Human readable output summarizing hmmsearch results + pattern: "*.{txt.gz}" + - alignments: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sto.gz": + type: file + description: Optional multiple sequence alignment (MSA) in Stockholm format + pattern: "*.{sto.gz}" + - target_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbl.gz": + type: file + description: Optional tabular (space-delimited) summary of per-target output + pattern: "*.{tbl.gz}" + - domain_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.domtbl.gz": + type: file + description: Optional tabular (space-delimited) summary of per-domain output + pattern: "*.{domtbl.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Midnighter" +maintainers: + - "@Midnighter" diff --git a/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test new file mode 100644 index 000000000..f1b59e986 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process HMMER_HMMSEARCH" + script "../main.nf" + process "HMMER_HMMSEARCH" + + tag "modules" + tag "modules_nfcore" + tag "hmmer" + tag "hmmer/hmmsearch" + + test("hmmer/hmmsearch") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + false, + false, + false + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.output[0][1]).linesGzip.toString().contains('[ok]') }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("hmmer/hmmsearch - optional") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + true, + true, + true + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.output.get(0).get(1)).linesGzip.toString().contains('[ok]') }, + { assert path(process.out.target_summary.get(0).get(1)).linesGzip.toString().contains('[ok]') }, + { assert snapshot( + process.out.alignments + + process.out.versions + ).match() } + ) + } + + } + + test("hmmer/hmmsearch - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + false, + false, + false + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("hmmer/hmmsearch - optional - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + true, + true, + true + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap new file mode 100644 index 000000000..e6b22771d --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap @@ -0,0 +1,175 @@ +{ + "hmmer/hmmsearch": { + "content": [ + [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:47.293093635" + }, + "hmmer/hmmsearch - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ], + "alignments": [ + + ], + "domain_summary": [ + + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_summary": [ + + ], + "versions": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:57.862047944" + }, + "hmmer/hmmsearch - optional - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ], + "alignments": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "domain_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:19:03.49192788" + }, + "hmmer/hmmsearch - optional": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,5c44c289b9e36aa1f7f3afae2005fbb7" + ], + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:52.725638562" + } +} \ No newline at end of file diff --git a/modules/nf-core/hmmer/hmmsearch/tests/tags.yml b/modules/nf-core/hmmer/hmmsearch/tests/tags.yml new file mode 100644 index 000000000..1776d21f9 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/tags.yml @@ -0,0 +1,2 @@ +hmmer/hmmsearch: + - "modules/nf-core/hmmer/hmmsearch/**" diff --git a/modules/nf-core/mafft/align/environment.yml b/modules/nf-core/mafft/align/environment.yml new file mode 100644 index 000000000..97a13e685 --- /dev/null +++ b/modules/nf-core/mafft/align/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::mafft=7.520 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/mafft/align/main.nf b/modules/nf-core/mafft/align/main.nf new file mode 100644 index 000000000..6031dd4bc --- /dev/null +++ b/modules/nf-core/mafft/align/main.nf @@ -0,0 +1,75 @@ +process MAFFT_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0': + 'biocontainers/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(add) + tuple val(meta3), path(addfragments) + tuple val(meta4), path(addfull) + tuple val(meta5), path(addprofile) + tuple val(meta6), path(addlong) + val(compress) + + output: + tuple val(meta), path("*.fas{.gz,}"), emit: fas + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def add = add ? "--add <(unpigz -cdf ${add})" : '' + def addfragments = addfragments ? "--addfragments <(unpigz -cdf ${addfragments})" : '' + def addfull = addfull ? "--addfull <(unpigz -cdf ${addfull})" : '' + def addprofile = addprofile ? "--addprofile <(unpigz -cdf ${addprofile})" : '' + def addlong = addlong ? "--addlong <(unpigz -cdf ${addlong})" : '' + def write_output = compress ? " | pigz -cp ${task.cpus} > ${prefix}.fas.gz" : "> ${prefix}.fas" + // this will not preserve MAFFTs return value, but mafft crashes when it receives a process substitution + if ("$fasta" == "${prefix}.fas" ) error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + mafft \\ + --thread ${task.cpus} \\ + ${add} \\ + ${addfragments} \\ + ${addfull} \\ + ${addprofile} \\ + ${addlong} \\ + ${args} \\ + ${fasta} \\ + ${write_output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def add = add ? "--add ${add}" : '' + def addfragments = addfragments ? "--addfragments ${addfragments}" : '' + def addfull = addfull ? "--addfull ${addfull}" : '' + def addprofile = addprofile ? "--addprofile ${addprofile}" : '' + def addlong = addlong ? "--addlong ${addlong}" : '' + if ("$fasta" == "${prefix}.fas" ) error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + touch ${prefix}.fas${compress ? '.gz' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/mafft/align/meta.yml b/modules/nf-core/mafft/align/meta.yml new file mode 100644 index 000000000..30d805382 --- /dev/null +++ b/modules/nf-core/mafft/align/meta.yml @@ -0,0 +1,108 @@ +name: mafft_align +description: Multiple sequence alignment using MAFFT +keywords: + - fasta + - msa + - multiple sequence alignment +tools: + - "mafft": + description: Multiple alignment program for amino acid or nucleotide sequences + based on fast Fourier transform + homepage: https://mafft.cbrc.jp/alignment/software/ + documentation: https://mafft.cbrc.jp/alignment/software/manual/manual.html + tool_dev_url: https://mafft.cbrc.jp/alignment/software/source.html + doi: "10.1093/nar/gkf436" + licence: ["BSD"] + identifier: biotools:MAFFT + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: biotools:MAFFT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file containing the sequences to align. May be gzipped or + uncompressed. + pattern: "*.{fa,fasta}{.gz,}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - add: + type: file + description: FASTA file containing sequences to align to the sequences in `fasta` + using `--add`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - addfragments: + type: file + description: FASTA file containing sequences to align to the sequences in `fasta` + using `--addfragments`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - addfull: + type: file + description: FASTA file containing sequences to align to the sequences in `fasta` + using `--addfull`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - addprofile: + type: file + description: FASTA file containing sequences to align to the sequences in `fasta` + using `--addprofile`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" + - - meta6: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - addlong: + type: file + description: FASTA file containing sequences to align to the sequences in `fasta` + using `--addlong`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" + - - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. + Set to true to enable/false to disable compression. Compression is done using + pigz, and is multithreaded. +output: + - fas: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fas{.gz,}": + type: file + description: Aligned sequences in FASTA format. May be gzipped or uncompressed. + pattern: "*.fas{.gz,}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@MillironX" +maintainers: + - "@MillironX" + - "@Joon-Klaps" diff --git a/modules/nf-core/mafft/align/tests/main.nf.test b/modules/nf-core/mafft/align/tests/main.nf.test new file mode 100644 index 000000000..660a8977c --- /dev/null +++ b/modules/nf-core/mafft/align/tests/main.nf.test @@ -0,0 +1,249 @@ +nextflow_process { + + name "Test Process MAFFT_ALIGN" + script "../main.nf" + process "MAFFT_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "mafft" + tag "mafft/align" + + test("SARS-CoV-2 scaffolds fasta - uncompressed") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/scaffolds.fasta', checkIfExists: true) + ] + input[1] = [[:], []] + input[2] = [[:], []] + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = [[:], []] + input[6] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - uncompressed")} + ) + } + + } + + test("SARS-CoV-2 scaffolds fasta - compressed") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/scaffolds.fasta', checkIfExists: true) + ] + input[1] = [[:], []] + input[2] = [[:], []] + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = [[:], []] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - compressed")} + ) + } + + } + + test("SARS-CoV-2 scaffolds fasta - add informative sites fasta normal") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [[ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/informative_sites.fas', checkIfExists: true) + ] + input[2] = [[:], []] + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = [[:], []] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta normal") } + ) + } + } + + test("SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [[:], []] + input[2] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/informative_sites.fas', checkIfExists: true) + ] + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = [[:], []] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments") } + ) + } + } + + test("SARS-CoV-2 scaffolds fasta - add informative sites fasta full") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [[:], []] + input[2] = [[:], []] + input[3] = [[ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/informative_sites.fas', checkIfExists: true) + ] + input[4] = [[:], []] + input[5] = [[:], []] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta full") } + ) + } + + } + + test("SARS-CoV-2 scaffolds fasta - add informative sites fasta profile") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [[:], []] + input[2] = [[:], []] + input[3] = [[:], []] + input[4] = [[ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/informative_sites.fas', checkIfExists: true) + ] + input[5] = [[:], []] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta profile") } + ) + } + + } + + test("SARS-CoV-2 scaffolds fasta - add informative sites fasta long") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [[:], []] + input[2] = [[:], []] + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/informative_sites.fas', checkIfExists: true) + ] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta long") } + ) + } + + } + + test("SARS-CoV-2 scaffolds fasta - add informative sites all sites fasta multiple") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/all_sites.fas', checkIfExists: true) + ] + input[2] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/informative_sites.fas', checkIfExists: true) + ] + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = [[:], []] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta multiple") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/mafft/align/tests/main.nf.test.snap b/modules/nf-core/mafft/align/tests/main.nf.test.snap new file mode 100644 index 000000000..fd1c21378 --- /dev/null +++ b/modules/nf-core/mafft/align/tests/main.nf.test.snap @@ -0,0 +1,282 @@ +{ + "SARS-CoV-2 scaffolds fasta - uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T16:35:37.370628782" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta multiple": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "1": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "versions": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T16:37:18.354500948" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta normal": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" + ] + ], + "1": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" + ] + ], + "versions": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T16:36:15.023267596" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta long": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" + ] + ], + "1": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" + ] + ], + "versions": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T16:37:05.79514229" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta profile": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" + ] + ], + "1": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" + ] + ], + "versions": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T16:36:52.893313726" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "1": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "versions": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T16:36:27.678238997" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta full": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" + ] + ], + "1": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" + ] + ], + "versions": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T16:36:41.02801897" + }, + "SARS-CoV-2 scaffolds fasta - compressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,87ac79c217c88dbdc575ad66e868c8c0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T16:36:03.570717213" + } +} \ No newline at end of file diff --git a/modules/nf-core/mafft/align/tests/tags.yml b/modules/nf-core/mafft/align/tests/tags.yml new file mode 100644 index 000000000..97b6666f6 --- /dev/null +++ b/modules/nf-core/mafft/align/tests/tags.yml @@ -0,0 +1,2 @@ +mafft/align: + - modules/nf-core/mafft/align/** diff --git a/modules/nf-core/mafft/main.nf b/modules/nf-core/mafft/main.nf deleted file mode 100644 index 9b7d27c5e..000000000 --- a/modules/nf-core/mafft/main.nf +++ /dev/null @@ -1,38 +0,0 @@ -process MAFFT { - tag "$meta.id" - label 'process_high' - - conda "bioconda::mafft=7.520" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mafft:7.520--hec16e2b_1': - 'biocontainers/mafft:7.520--hec16e2b_1' }" - - input: - tuple val(meta), path(fasta) - path addsequences - - output: - tuple val(meta), path("*.fas"), emit: fas - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def add = addsequences ? "--add $addsequences" : '' - """ - mafft \\ - --thread ${task.cpus} \\ - ${args} \\ - ${add} \\ - ${fasta} \\ - > ${prefix}.fas - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/mafft/meta.yml b/modules/nf-core/mafft/meta.yml deleted file mode 100644 index 7cbf10876..000000000 --- a/modules/nf-core/mafft/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: mafft -description: Multiple sequence alignment using MAFFT -keywords: - - fasta - - msa - - multiple sequence alignment -tools: - - mafft: - description: Multiple alignment program for amino acid or nucleotide sequences based on fast Fourier transform - homepage: https://mafft.cbrc.jp/alignment/software/ - documentation: https://mafft.cbrc.jp/alignment/software/manual/manual.html - tool_dev_url: https://mafft.cbrc.jp/alignment/software/source.html - doi: "10.1093/nar/gkf436" - licence: ["BSD"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: FASTA file containing the sequences to align - pattern: "*.{fa,fasta}" - - addsequences: - type: file - description: FASTA file containing sequences to align to the sequences in `fasta` - pattern: "*.{fa,fasta}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fas: - type: file - description: Aligned sequences in FASTA format - pattern: "*.{fas}" - -authors: - - "@MillironX" diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml new file mode 100644 index 000000000..693aa5c17 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf new file mode 100644 index 000000000..d5caebc32 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/main.nf @@ -0,0 +1,56 @@ +process SEQTK_SUBSEQ { + tag "$sequences" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" + + input: + tuple val(meta), path(sequences) + path filter_list + + output: + tuple val(meta), path("*.gz"), emit: sequences + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } + """ + seqtk \\ + subseq \\ + $args \\ + $sequences \\ + $filter_list | \\ + gzip --no-name > ${sequences}${prefix}.${ext}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } + """ + echo "" | gzip > ${sequences}${prefix}.${ext}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml new file mode 100644 index 000000000..2667f40d6 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/meta.yml @@ -0,0 +1,48 @@ +name: seqtk_subseq +description: Select only sequences that match the filtering condition +keywords: + - filtering + - selection + - fastx +tools: + - seqtk: + description: Seqtk is a fast and lightweight tool for processing sequences in + the FASTA or FASTQ format + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ["MIT"] + identifier: biotools:seqtk +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - sequences: + type: file + description: FASTQ/FASTA file + pattern: "*.{fq,fq.gz,fa,fa.gz}" + - - filter_list: + type: file + description: BED file or a text file with a list of sequence names + pattern: "*.{bed,lst}" +output: + - sequences: + - meta: + type: file + description: FASTQ/FASTA file + pattern: "*.{fq.gz,fa.gz}" + - "*.gz": + type: file + description: FASTQ/FASTA file + pattern: "*.{fq.gz,fa.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sidorov-si" +maintainers: + - "@sidorov-si" diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test new file mode 100644 index 000000000..fa8fad692 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SEQTK_SUBSEQ" + script "modules/nf-core/seqtk/subseq/main.nf" + process "SEQTK_SUBSEQ" + config "./standard.config" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/subseq" + + test("sarscov2_subseq_fa") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2_subseq_fa_stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap new file mode 100644 index 000000000..75b3793ed --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2_subseq_fa": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" + ] + ], + "1": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ], + "sequences": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" + ] + ], + "versions": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ] + } + ], + "timestamp": "2024-02-22T15:56:36.155954" + }, + "sarscov2_subseq_fa_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ], + "sequences": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ] + } + ], + "timestamp": "2024-02-22T15:56:44.222329" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/standard.config b/modules/nf-core/seqtk/subseq/tests/standard.config new file mode 100644 index 000000000..e8d7dc302 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/standard.config @@ -0,0 +1,5 @@ +process { + withName: SEQTK_SUBSEQ { + ext.prefix = { ".filtered" } + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/tags.yml b/modules/nf-core/seqtk/subseq/tests/tags.yml new file mode 100644 index 000000000..74056bab3 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/subseq: + - "modules/nf-core/seqtk/subseq/**" diff --git a/nextflow.config b/nextflow.config index 7a9e3f0f9..93064225d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,6 +67,7 @@ params { orf_end = null stop_codons = "TAA,TAG" pplace_tree = null + pplace_sheet = null pplace_aln = null pplace_model = null pplace_alnmethod = 'hmmer' @@ -279,6 +280,7 @@ profiles { test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } test_novaseq { includeConfig 'conf/test_novaseq.config' } test_pplace { includeConfig 'conf/test_pplace.config' } + test_pplace_hmmsearch { includeConfig 'conf/test_pplace_hmmsearch.config' } test_sintax { includeConfig 'conf/test_sintax.config' } test_its_dada_taxonomy { includeConfig 'conf/test_its_dada_taxonomy.config' } test_multiregion { includeConfig 'conf/test_multiregion.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 1db9be808..af89f5e06 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -691,7 +691,7 @@ }, "ancombc_effect_size": { "type": "number", - "default": 1, + "default": 1.0, "minimum": 0, "description": "Effect size threshold for differential abundance barplot for `--ancombc` and `--ancombc_formula`", "fa_icon": "fas fa-greater-than-equal" @@ -714,17 +714,17 @@ "properties": { "report_template": { "type": "string", - "default": "${projectDir}/assets/report_template.Rmd", + "default": "/crex/proj/snic2020-16-76/nobackup/projects/Danilo/dev/ampliseq/assets/report_template.Rmd", "description": "Path to Markdown file (Rmd)" }, "report_css": { "type": "string", - "default": "${projectDir}/assets/nf-core_style.css", + "default": "/crex/proj/snic2020-16-76/nobackup/projects/Danilo/dev/ampliseq/assets/nf-core_style.css", "description": "Path to style file (css)" }, "report_logo": { "type": "string", - "default": "${projectDir}/assets/nf-core-ampliseq_logo_light_long.png", + "default": "/crex/proj/snic2020-16-76/nobackup/projects/Danilo/dev/ampliseq/assets/nf-core-ampliseq_logo_light_long.png", "description": "Path to logo file (png)" }, "report_title": { @@ -1013,5 +1013,10 @@ { "$ref": "#/$defs/institutional_config_options" } - ] + ], + "properties": { + "pplace_sheet": { + "type": "string" + } + } } diff --git a/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/main.nf b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/main.nf new file mode 100644 index 000000000..8f7a5cae7 --- /dev/null +++ b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/main.nf @@ -0,0 +1,55 @@ +include { HMMER_HMMSEARCH } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMRANK } from '../../../modules/nf-core/hmmer/hmmrank/main' +include { SEQTK_SUBSEQ } from '../../../modules/nf-core/seqtk/subseq/main' + +workflow FASTA_HMMSEARCH_RANK_FASTAS { + + take: + ch_hmms // channel: [ val(meta), file(hmm) ], i.e. a list of hmm profiles, each with its meta object + ch_fasta // channel: file(fasta), a single fasta file + + main: + + ch_versions = Channel.empty() + + ch_hmms + .combine(ch_fasta) + .map { [ it[0], it[1], it[2], false, true, false ] } + .set { ch_hmmsearch } + + HMMER_HMMSEARCH ( ch_hmmsearch ) + ch_versions = ch_versions.mix(HMMER_HMMSEARCH.out.versions.first()) + + HMMER_HMMSEARCH.out.target_summary + .collect { it[1] } + .map { [ [ id: 'rank' ], it ] } + .set { ch_hmmrank } + + HMMER_HMMRANK ( ch_hmmrank ) + ch_versions = ch_versions.mix(HMMER_HMMRANK.out.versions.first()) + + HMMER_HMMRANK.out.hmmrank + .map { it[1] } + .splitCsv(header: true, sep: '\t') + .filter { it.rank == '1' } + .collectFile { [ "${it.profile}.txt", "${it.accno}\n" ] } + .map { [ [ id: it.baseName ], it ] } + .groupTuple(sort: true) + .set { ch_subseq_filter } + + ch_subseq_filter + .combine(ch_fasta) + .map { [ it[0], it[2] ] } + .groupTuple(sort: true) + .set { ch_subseq_fasta } + + SEQTK_SUBSEQ ( ch_subseq_fasta, ch_subseq_filter.map { it[1] } ) + ch_versions = ch_versions.mix(SEQTK_SUBSEQ.out.versions.first()) + + emit: + hmmrank = HMMER_HMMRANK.out.hmmrank // channel: [ [ id: 'rank' ], hmmrank_tsv ] + seqfastas = SEQTK_SUBSEQ.out.sequences // channel: [ meta, fasta ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/meta.yml b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/meta.yml new file mode 100644 index 000000000..6584ff527 --- /dev/null +++ b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/meta.yml @@ -0,0 +1,46 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fasta_hmmsearch_rank_fastas" +description: Run hmmsearch and output separate fasta files for top scoring hits to each profile +keywords: + - hmmer + - search + - rank + - fasta +components: + - hmmer/hmmsearch + - hmmer/hmmrank + - seqtk/subseq +input: + - ch_hmms: + type: file + description: | + The input channel containing hmm profiles + Structure: [ val(meta), path(hmm) ] + pattern: "*.{hmm}" + - ch_fasta: + type: file + description: | + The input channel containing sequences to be searched and ranked +output: + - hmmrank: + type: file + description: | + Channel containing the TSV file from ranking hmmsearch hits + Structure: [ val(meta), path(hmmrank) ] + pattern: "*.hmmrank.tsv.gz" + - bai: + type: file + description: | + Channel containing subsets of sequences + Structure: [ val(meta), path(fasta) ] + pattern: "*.fa.gz" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" +maintainers: + - "@erikrikarddaniel" diff --git a/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/main.nf.test b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/main.nf.test new file mode 100644 index 000000000..f52301012 --- /dev/null +++ b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/main.nf.test @@ -0,0 +1,90 @@ +// nf-core subworkflows test fasta_hmmsearch_rank_fastas +nextflow_workflow { + + name "Test Subworkflow FASTA_HMMSEARCH_RANK_FASTAS" + script "../main.nf" + workflow "FASTA_HMMSEARCH_RANK_FASTAS" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fasta_hmmsearch_rank_fastas" + tag "hmmer" + tag "hmmer/hmmfetch" + tag "hmmer/hmmsearch" + tag "hmmer/hmmrank" + tag "seqtk/subseq" + + test("SSU rRNA - hmm/fasta") { + + setup { + run("HMMER_HMMFETCH") { + script "../../../../modules/nf-core/hmmer/hmmfetch/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id: 'arc16s' ], file("https://raw.githubusercontent.com/tseemann/barrnap/master/db/arc.hmm")), + tuple([ id: 'bac16s' ], file("https://raw.githubusercontent.com/tseemann/barrnap/master/db/bac.hmm")), + tuple([ id: 'euk18s' ], file("https://raw.githubusercontent.com/tseemann/barrnap/master/db/euk.hmm")), + tuple([ id: 'mito12s' ], file("https://raw.githubusercontent.com/tseemann/barrnap/master/db/mito.hmm")) + ]) + input[1] = Channel.of('16S_rRNA', '16S_rRNA', '18S_rRNA', '12S_rRNA') + input[2] = [] + input[3] = [] + """ + } + } + } + + when { + workflow { + """ + input[0] = HMMER_HMMFETCH.out.hmm + input[1] = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/domain_16s.fna") + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match('16s_out') } + ) + } + } + + // The profile used here doesn't give any significant results, and no output fastas are expected + test("LSU rRNA - hmm/fasta") { + + setup { + run("HMMER_HMMFETCH") { + script "../../../../modules/nf-core/hmmer/hmmfetch/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id: 'euk28s' ], file("https://raw.githubusercontent.com/tseemann/barrnap/master/db/euk.hmm")) + ]) + input[1] = Channel.of('28S_rRNA') + input[2] = [] + input[3] = [] + """ + } + } + } + + when { + workflow { + """ + input[0] = HMMER_HMMFETCH.out.hmm + input[1] = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/domain_16s.fna") + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match('28s_out') } + ) + } + } +} diff --git a/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/main.nf.test.snap new file mode 100644 index 000000000..7d59abb35 --- /dev/null +++ b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "28s_out": { + "content": [ + { + "0": [ + [ + { + "id": "rank" + }, + "rank.hmmrank.tsv.gz:md5,d07a8a166c4795c2c375435bc30336cd" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,73a97cda6003d8459e51bf335131fc8f", + "versions.yml:md5,afe27ad9b11150e9c2ff77ff1d63e587" + ], + "hmmrank": [ + [ + { + "id": "rank" + }, + "rank.hmmrank.tsv.gz:md5,d07a8a166c4795c2c375435bc30336cd" + ] + ], + "seqfastas": [ + + ], + "versions": [ + "versions.yml:md5,73a97cda6003d8459e51bf335131fc8f", + "versions.yml:md5,afe27ad9b11150e9c2ff77ff1d63e587" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-12T20:13:12.239271103" + }, + "16s_out": { + "content": [ + { + "0": [ + [ + { + "id": "rank" + }, + "rank.hmmrank.tsv.gz:md5,bc2b70023376221de9efd160d29a4418" + ] + ], + "1": [ + [ + { + "id": "arc16s" + }, + "domain_16s.fnaarc16s.fa.gz:md5,1b0a4343017e4183de898567b34ed079" + ], + [ + { + "id": "bac16s" + }, + "domain_16s.fnabac16s.fa.gz:md5,6a8ed428cfd947c2d6e3a58e9bdf5560" + ] + ], + "2": [ + "versions.yml:md5,73a97cda6003d8459e51bf335131fc8f", + "versions.yml:md5,a4108b7257b01a1ccdb51311656cdaca", + "versions.yml:md5,afe27ad9b11150e9c2ff77ff1d63e587" + ], + "hmmrank": [ + [ + { + "id": "rank" + }, + "rank.hmmrank.tsv.gz:md5,bc2b70023376221de9efd160d29a4418" + ] + ], + "seqfastas": [ + [ + { + "id": "arc16s" + }, + "domain_16s.fnaarc16s.fa.gz:md5,1b0a4343017e4183de898567b34ed079" + ], + [ + { + "id": "bac16s" + }, + "domain_16s.fnabac16s.fa.gz:md5,6a8ed428cfd947c2d6e3a58e9bdf5560" + ] + ], + "versions": [ + "versions.yml:md5,73a97cda6003d8459e51bf335131fc8f", + "versions.yml:md5,a4108b7257b01a1ccdb51311656cdaca", + "versions.yml:md5,afe27ad9b11150e9c2ff77ff1d63e587" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-12T20:05:14.789661218" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/tags.yml b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/tags.yml new file mode 100644 index 000000000..b59d6638b --- /dev/null +++ b/subworkflows/nf-core/fasta_hmmsearch_rank_fastas/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fasta_hmmsearch_rank_fastas: + - subworkflows/nf-core/fasta_hmmsearch_rank_fastas/** diff --git a/subworkflows/nf-core/fasta_newick_epang_gappa/main.nf b/subworkflows/nf-core/fasta_newick_epang_gappa/main.nf index 15cdb20c1..c41c02603 100644 --- a/subworkflows/nf-core/fasta_newick_epang_gappa/main.nf +++ b/subworkflows/nf-core/fasta_newick_epang_gappa/main.nf @@ -6,9 +6,11 @@ include { HMMER_ESLALIMASK as HMMER_MASKQUERY } from '../../../modules/nf- include { HMMER_ESLREFORMAT as HMMER_UNALIGNREF } from '../../../modules/nf-core/hmmer/eslreformat/main' include { HMMER_ESLREFORMAT as HMMER_AFAFORMATREF } from '../../../modules/nf-core/hmmer/eslreformat/main' include { HMMER_ESLREFORMAT as HMMER_AFAFORMATQUERY } from '../../../modules/nf-core/hmmer/eslreformat/main' -include { MAFFT } from '../../../modules/nf-core/mafft/main' +include { CLUSTALO_ALIGN } from '../../../modules/nf-core/clustalo/align/main' +include { MAFFT_ALIGN } from '../../../modules/nf-core/mafft/align/main' include { EPANG_PLACE } from '../../../modules/nf-core/epang/place/main' -include { EPANG_SPLIT } from '../../../modules/nf-core/epang/split/main' +include { EPANG_SPLIT as EPANG_SPLIT_CLUSTALO } from '../../../modules/nf-core/epang/split/main' +include { EPANG_SPLIT as EPANG_SPLIT_MAFFT } from '../../../modules/nf-core/epang/split/main' include { GAPPA_EXAMINEGRAFT as GAPPA_GRAFT } from '../../../modules/nf-core/gappa/examinegraft/main' include { GAPPA_EXAMINEASSIGN as GAPPA_ASSIGN } from '../../../modules/nf-core/gappa/examineassign/main' include { GAPPA_EXAMINEHEATTREE as GAPPA_HEATTREE } from '../../../modules/nf-core/gappa/examineheattree/main' @@ -21,24 +23,25 @@ workflow FASTA_NEWICK_EPANG_GAPPA { main: ch_versions = Channel.empty() - // Divide the input channel into two: One for hmmer and one for mafft alignment - ch_hmmer_data = ch_pp_data.filter { it.data.alignmethod == 'hmmer' } - ch_mafft_data = ch_pp_data.filter { it.data.alignmethod == 'mafft' } + // Divide the input channel into three: One each for hmmer, clustalo and mafft alignment + ch_hmmer_data = ch_pp_data.filter { it -> it.data.alignmethod == 'hmmer' } + ch_clustalo_data = ch_pp_data.filter { it -> it.data.alignmethod == 'clustalo' } + ch_mafft_data = ch_pp_data.filter { it -> it.data.alignmethod == 'mafft' } // 1.a.1 HMMER alignment: For entries that do not specify an hmm file, build one to use for alignment HMMER_HMMBUILD ( ch_hmmer_data - .filter { ! it.data.hmmfile } - .map { [ it.meta, it.data.refseqfile ] }, + .filter { it -> ! it.data.hmmfile } + .map { it -> [ it.meta, it.data.refseqfile ] }, [] ) // 1.a.2 This handles mixed input where some samples have hmmfile set, while others don't (sample sheet input) ch_hmm = Channel.empty() - .mix(HMMER_HMMBUILD.out.hmm.map { [ it[0], it[1] ] }) + .mix(HMMER_HMMBUILD.out.hmm.map { it -> [ it[0], it[1] ] }) .mix( ch_hmmer_data - .filter { it.data.hmmfile } - .map { [ it.meta, it.data.hmmfile ] } + .filter { it -> it.data.hmmfile } + .map { it -> [ it.meta, it.data.hmmfile ] } ) ch_versions = ch_versions.mix(HMMER_HMMBUILD.out.versions.first()) @@ -46,15 +49,15 @@ workflow FASTA_NEWICK_EPANG_GAPPA { // 1.b For entries that do not specify an hmm file, "unalign" the reference sequences before they can be aligned to the hmm. HMMER_UNALIGNREF ( ch_hmmer_data - .filter { ! it.data.hmmfile } - .map { [ it.meta, it.data.refseqfile ] } + .filter { it -> ! it.data.hmmfile } + .map { it -> [ it.meta, it.data.refseqfile ] } ) ch_hmmer_unaligned = Channel.empty() - .mix(HMMER_UNALIGNREF.out.seqreformated.map { [ it[0], it[1] ] }) + .mix(HMMER_UNALIGNREF.out.seqreformated.map { it -> [ it[0], it[1] ] }) .mix( ch_hmmer_data - .filter { it.data.hmmfile } - .map { [ it.meta, it.data.refseqfile ] } + .filter { it -> it.data.hmmfile } + .map { it -> [ it.meta, it.data.refseqfile ] } ) ch_versions = ch_versions.mix(HMMER_UNALIGNREF.out.versions) @@ -65,27 +68,27 @@ workflow FASTA_NEWICK_EPANG_GAPPA { .groupTuple(size: 2, sort: { a, b -> a =~ /\.hmm/ ? 1 : -1 }) HMMER_HMMALIGNREF ( - ch_hmmer_alignref.map { [ it[0], it[1][0] ] }, - ch_hmmer_alignref.map { it[1][1] } + ch_hmmer_alignref.map { it -> [ it[0], it[1][0] ] }, + ch_hmmer_alignref.map { it -> it[1][1] } ) ch_versions = ch_versions.mix(HMMER_HMMALIGNREF.out.versions) ch_hmmer_alignquery = Channel.empty() - .mix(ch_hmmer_data.map { [ it.meta, it.data.queryseqfile ] }) + .mix(ch_hmmer_data.map { it -> [ it.meta, it.data.queryseqfile ] }) .mix(ch_hmm) .groupTuple(size: 2, sort: { a, b -> a =~ /\.hmm/ ? 1 : -1 }) HMMER_HMMALIGNQUERY ( - ch_hmmer_alignquery.map { [ it[0], it[1][0] ] }, - ch_hmmer_alignquery.map { it[1][1] } + ch_hmmer_alignquery.map { it -> [ it[0], it[1][0] ] }, + ch_hmmer_alignquery.map { it -> it[1][1] } ) ch_versions = ch_versions.mix(HMMER_HMMALIGNQUERY.out.versions) // 1.d Mask the alignments (Add '--rf-is-mask' ext.args in config for the process.) - HMMER_MASKREF ( HMMER_HMMALIGNREF.out.sthlm.map { [ it[0], it[1], [], [], [], [], [], [] ] }, [] ) + HMMER_MASKREF ( HMMER_HMMALIGNREF.out.sto.map { it -> [ it[0], it[1], [], [], [], [], [], [] ] }, [] ) ch_versions = ch_versions.mix(HMMER_MASKREF.out.versions) - HMMER_MASKQUERY ( HMMER_HMMALIGNQUERY.out.sthlm.map { [ it[0], it[1], [], [], [], [], [], [] ] }, [] ) + HMMER_MASKQUERY ( HMMER_HMMALIGNQUERY.out.sto.map { it -> [ it[0], it[1], [], [], [], [], [], [] ] }, [] ) ch_versions = ch_versions.mix(HMMER_MASKQUERY.out.versions) // 1.e Reformat alignments to "afa" (aligned fasta) @@ -95,30 +98,62 @@ workflow FASTA_NEWICK_EPANG_GAPPA { HMMER_AFAFORMATQUERY ( HMMER_MASKQUERY.out.maskedaln ) ch_versions = ch_versions.mix(HMMER_AFAFORMATQUERY.out.versions) - // 2.a MAFFT profile alignment of query sequences to reference alignment - MAFFT ( - ch_mafft_data.map { [ it.meta, it.data.refseqfile ] }, - ch_mafft_data.map { [ it.data.queryseqfile ] } + // 2.a CLUSTALO_ALIGN profile alignment of query sequences to reference alignment + CLUSTALO_ALIGN ( + ch_clustalo_data.map { it -> [ it.meta, it.data.queryseqfile ] }, + [ [:], []], + [ ], + [ ], + ch_clustalo_data.map { it -> it.data.refseqfile }, + [ ], + false ) - ch_versions = ch_versions.mix(MAFFT.out.versions) + ch_versions = ch_versions.mix(CLUSTALO_ALIGN.out.versions) // 2.b Split the profile alignment into reference and query parts - EPANG_SPLIT ( - ch_mafft_data.map { [ it.meta, it.data.refseqfile ] } - .join(MAFFT.out.fas) + EPANG_SPLIT_CLUSTALO ( + ch_clustalo_data.map { it -> [ it.meta, it.data.refseqfile ] } + .join(CLUSTALO_ALIGN.out.alignment) ) - ch_versions = ch_versions.mix(EPANG_SPLIT.out.versions) + ch_versions = ch_versions.mix(EPANG_SPLIT_CLUSTALO.out.versions) + + // 3.a MAFFT profile alignment of query sequences to reference alignment + MAFFT_ALIGN ( + ch_mafft_data.map { it -> [ it.meta, it.data.refseqfile ] }, + ch_mafft_data.map { it -> [ it.meta, it.data.queryseqfile ] }, + [ [], [] ], + [ [], [] ], + [ [], [] ], + [ [], [] ], + false + ) + ch_versions = ch_versions.mix(MAFFT_ALIGN.out.versions) + + // 3.b Split the profile alignment into reference and query parts + EPANG_SPLIT_MAFFT ( + ch_mafft_data.map { it -> [ it.meta, it.data.refseqfile ] } + .join(MAFFT_ALIGN.out.fas) + ) + ch_versions = ch_versions.mix(EPANG_SPLIT_MAFFT.out.versions) - // 3. Do the placement - ch_epang_query = ch_pp_data.map { [ it.meta, it.data.model, it.data.refphylogeny ] } + // 4. Do the placement + ch_epang_query = ch_pp_data.map { it -> [ it.meta, it.data.model, it.data.refphylogeny ] } .join ( HMMER_AFAFORMATQUERY.out.seqreformated ) .join ( HMMER_AFAFORMATREF.out.seqreformated ) .mix( - ch_pp_data.map { [ it.meta, it.data.model, it.data.refphylogeny ] } - .join(EPANG_SPLIT.out.query.map { [ it[0], it[1] ] } ) - .join(EPANG_SPLIT.out.reference.map { [ it[0], it[1] ] } ) + ch_pp_data.map { it -> [ it.meta, it.data.model, it.data.refphylogeny ] } + .join( + EPANG_SPLIT_CLUSTALO.out.query + .mix(EPANG_SPLIT_MAFFT.out.query) + .map { it -> [ it[0], it[1] ] } + ) + .join( + EPANG_SPLIT_CLUSTALO.out.reference + .mix(EPANG_SPLIT_MAFFT.out.reference) + .map { it -> [ it[0], it[1] ] } + ) ) - .map { [ [ id:it[0].id, model:it[1] ], it[3], it[4], it[2] ] } + .map { it -> [ [ id:it[0].id, model:it[1] ], it[3], it[4], it[2] ] } EPANG_PLACE ( ch_epang_query, @@ -126,19 +161,19 @@ workflow FASTA_NEWICK_EPANG_GAPPA { ) ch_versions = ch_versions.mix(EPANG_PLACE.out.versions) - // 7. Calculate a tree with the placed sequences + // 5. Calculate a tree with the placed sequences GAPPA_GRAFT ( EPANG_PLACE.out.jplace ) ch_versions = ch_versions.mix(GAPPA_GRAFT.out.versions) - // 8. Classify + // 6. Classify GAPPA_ASSIGN ( EPANG_PLACE.out.jplace - .map { [ [ id:it[0].id ], it[1] ] } - .join( ch_pp_data.map { [ it.meta, it.data.taxonomy ] } ) + .map { it -> [ [ id:it[0].id ], it[1] ] } + .join( ch_pp_data.map { it -> [ [ id: it.meta.id ], it.data.taxonomy ] } ) ) ch_versions = ch_versions.mix(GAPPA_ASSIGN.out.versions) - // 9. Heat tree output + // 7. Heat tree output GAPPA_HEATTREE ( EPANG_PLACE.out.jplace ) ch_versions = ch_versions.mix(GAPPA_HEATTREE.out.versions) @@ -151,4 +186,3 @@ workflow FASTA_NEWICK_EPANG_GAPPA { heattree = GAPPA_HEATTREE.out.svg versions = ch_versions // channel: [ versions.yml ] } - diff --git a/subworkflows/nf-core/fasta_newick_epang_gappa/meta.yml b/subworkflows/nf-core/fasta_newick_epang_gappa/meta.yml index e458915e8..fcf07b2aa 100644 --- a/subworkflows/nf-core/fasta_newick_epang_gappa/meta.yml +++ b/subworkflows/nf-core/fasta_newick_epang_gappa/meta.yml @@ -12,7 +12,8 @@ components: - hmmer/hmmalign - hmmer/eslalimask - hmmer/eslreformat - - mafft + - clustalo/align + - mafft/align - epang/place - epang/split - gappa/examinegraft @@ -59,7 +60,7 @@ input: description: Phylogenetic model to use in placement, e.g. 'LG+F' or 'GTR+I+F' - alignmethod: type: string - description: Method used for alignment, 'hmmer' or 'mafft' + description: Method used for alignment, 'hmmer', 'clustalo' or 'mafft' output: - meta: type: map diff --git a/subworkflows/nf-core/fasta_newick_epang_gappa/tests/main.nf.test b/subworkflows/nf-core/fasta_newick_epang_gappa/tests/main.nf.test new file mode 100644 index 000000000..c573f2908 --- /dev/null +++ b/subworkflows/nf-core/fasta_newick_epang_gappa/tests/main.nf.test @@ -0,0 +1,175 @@ +nextflow_workflow { + + name "Test Workflow FASTA_NEWICK_EPANG_GAPPA" + script "../main.nf" + config "./nextflow.config" + + workflow "FASTA_NEWICK_EPANG_GAPPA" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fasta_newick_epang_gappa" + tag "fasta_newick_epang_gappa" + tag "clustalo" + tag "clustalo/align" + tag "epang" + tag "epang/place" + tag "epang/split" + tag "gappa" + tag "gappa/examineassign" + tag "gappa/examinegraft" + tag "gappa/examineheattree" + tag "hmmer" + tag "hmmer/hmmbuild" + tag "hmmer/hmmalign" + tag "hmmer/eslalimask" + tag "hmmer/eslreformat" + tag "mafft/align" + + + test("test_fasta_newick_epang_gappa_hmmer") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + meta: [ id: "hmmer", min_bitscore: 4 ], + data: [ + alignmethod: 'hmmer', + queryseqfile: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_3_sequences.faa", checkIfExists: true), + refseqfile: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_seed.alnfaa", checkIfExists: true), + refphylogeny: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_seed.ft.LGCAT.newick", checkIfExists: true), + model: "LG", + taxonomy: file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/gappa_taxonomy.tsv", checkIfExists: true) + ] + ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file(workflow.out.jplace.get(0).get(1)).name).match("hmmer.epa_result.jplace.gz") }, + { assert snapshot(file(workflow.out.grafted_phylogeny.get(0).get(1)).name).match("hmmer.graft.placement.epa_result.newick") }, + { assert snapshot(workflow.out.grafted_phylogeny.get(0).get(1)).md5().match("hmmer.graft.placement.epa_result.newick_lines") }, + { assert snapshot(file(workflow.out.taxonomy_profile.get(0).get(1)).name).match("hmmer.taxonomy.profile.tsv") }, + { assert snapshot(file(workflow.out.taxonomy_per_query.get(0).get(1)).name).match("hmmer.taxonomy.per_query.tsv") }, + { assert snapshot(file(workflow.out.versions.get(0)).name).match("hmmer.versions.yml") }, + { assert snapshot(workflow.out.versions.get(0)).md5().match("hmmer.versions.yml_lines") }, + ) + } + } + + test("test_fasta_newick_epang_gappa_mafft") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of( + [ + meta: [ id: "mafft" ], + data: [ + alignmethod: 'mafft', + queryseqfile: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_3_sequences.faa", checkIfExists: true), + refseqfile: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_seed.alnfaa", checkIfExists: true), + refphylogeny: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_seed.ft.LGCAT.newick", checkIfExists: true), + model: "LG", + taxonomy: file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/gappa_taxonomy.tsv", checkIfExists: true) + ] + ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file(workflow.out.jplace.get(0).get(1)).name).match("mafft.epa_result.jplace.gz") }, + { assert snapshot(file(workflow.out.grafted_phylogeny.get(0).get(1)).name).match("mafft.graft.placement.epa_result.newick") }, + { assert snapshot(workflow.out.grafted_phylogeny.get(0).get(1)).md5().match("mafft.graft.placement.epa_result.newick_lines") }, + { assert snapshot(file(workflow.out.taxonomy_profile.get(0).get(1)).name).match("mafft.taxonomy.profile.tsv") }, + { assert snapshot(workflow.out.grafted_phylogeny.get(0).get(1)).md5().match("mafft.taxonomy.profile.tsv_lines") }, + { assert snapshot(file(workflow.out.taxonomy_per_query.get(0).get(1)).name).match("mafft.taxonomy.per_query.tsv") }, + { assert snapshot(workflow.out.grafted_phylogeny.get(0).get(1)).md5().match("mafft.taxonomy.per_query.tsv_lines") }, + { assert snapshot(file(workflow.out.versions.get(0)).name).match("mafft.versions.yml") }, + { assert snapshot(workflow.out.versions.get(0)).md5().match("mafft.versions.yml_lines") }, + ) + } + } + + test("test_fasta_newick_epang_gappa_clustalo") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + meta: [ id: "clustalo" ], + data: [ + alignmethod: 'clustalo', + queryseqfile: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_3_sequences.faa", checkIfExists: true), + refseqfile: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_seed.alnfaa", checkIfExists: true), + refphylogeny: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/PF14720_seed.ft.LGCAT.newick", checkIfExists: true), + model: "LG", + taxonomy: file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/gappa/gappa_taxonomy.tsv", checkIfExists: true) + ] + ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file(workflow.out.jplace.get(0).get(1)).name).match("clustalo.epa_result.jplace.gz") }, + { assert snapshot(file(workflow.out.grafted_phylogeny.get(0).get(1)).name).match("clustalo.graft.placement.epa_result.newick") }, + { assert snapshot(workflow.out.grafted_phylogeny.get(0).get(1)).md5().match("clustalo.graft.placement.epa_result.newick_lines") }, + { assert snapshot(file(workflow.out.taxonomy_profile.get(0).get(1)).name).match("clustalo.taxonomy.profile.tsv") }, + { assert snapshot(file(workflow.out.taxonomy_per_query.get(0).get(1)).name).match("clustalo.taxonomy.per_query.tsv") }, + { assert snapshot(file(workflow.out.versions.get(0)).name).match("clustalo.versions.yml") }, + { assert snapshot(workflow.out.versions.get(0)).md5().match("clustalo.versions.yml_lines") }, + ) + } + } + + test("test_fasta_newick_epang_gappa_nucl_hmmer") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + meta: [ id: "nucl_hmmer" ], + data: [ + alignmethod: 'hmmer', + queryseqfile: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/cyn_syn.fna", checkIfExists: true), + refseqfile: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/cyanos_16s.alnfna", checkIfExists: true), + refphylogeny: file("https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/cyanos_16s.newick", checkIfExists: true), + model: "GTR+F+I+I+R3", + taxonomy: [] + ] + ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file(workflow.out.jplace.get(0).get(1)).name).match("nucl_hmmer.epa_result.jplace.gz") }, + { assert snapshot(file(workflow.out.grafted_phylogeny.get(0).get(1)).name).match("nucl_hmmer.graft.placement.epa_result.newick") }, + { assert snapshot(file(workflow.out.versions.get(0)).name).match("nucl_hmmer.versions.yml") }, + { assert snapshot(workflow.out.versions.get(0)).md5().match("nucl_hmmer.versions.yml_lines") }, + ) + } + } +} diff --git a/subworkflows/nf-core/fasta_newick_epang_gappa/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_newick_epang_gappa/tests/main.nf.test.snap new file mode 100644 index 000000000..f7ddb6582 --- /dev/null +++ b/subworkflows/nf-core/fasta_newick_epang_gappa/tests/main.nf.test.snap @@ -0,0 +1,254 @@ +{ + "mafft.taxonomy.profile.tsv_lines": { + "content": "eb4ae24c330c4b4ab768f7d1c26c7215", + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.978336429" + }, + "mafft.taxonomy.per_query.tsv_lines": { + "content": "eb4ae24c330c4b4ab768f7d1c26c7215", + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.98411855" + }, + "nucl_hmmer.versions.yml": { + "content": [ + "versions.yml" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:17:41.730837855" + }, + "clustalo.graft.placement.epa_result.newick": { + "content": [ + "clustalo.graft.clustalo.epa_result.newick" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T19:04:43.794333" + }, + "hmmer.graft.placement.epa_result.newick": { + "content": [ + "hmmer.graft.hmmer.epa_result.newick" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:44.863462591" + }, + "clustalo.taxonomy.per_query.tsv": { + "content": [ + "clustalo.taxonomy.per_query.tsv" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T19:05:03.942473" + }, + "mafft.epa_result.jplace.gz": { + "content": [ + "mafft.epa_result.jplace.gz" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.970209461" + }, + "mafft.taxonomy.profile.tsv": { + "content": [ + "mafft.taxonomy.profile.tsv" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.975738553" + }, + "mafft.versions.yml_lines": { + "content": "f5957093def0191f32bb294000fb9242", + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.988948939" + }, + "clustalo.taxonomy.profile.tsv": { + "content": [ + "clustalo.taxonomy.profile.tsv" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T19:05:03.920786" + }, + "hmmer.taxonomy.per_query.tsv": { + "content": [ + "hmmer.taxonomy.per_query.tsv" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:44.874521222" + }, + "hmmer.graft.placement.epa_result.newick_lines": { + "content": "8533b159d543f75b92efcfb4fd330280", + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:44.866852673" + }, + "hmmer.taxonomy.profile.tsv": { + "content": [ + "hmmer.taxonomy.profile.tsv" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:44.868720482" + }, + "mafft.graft.placement.epa_result.newick_lines": { + "content": "eb4ae24c330c4b4ab768f7d1c26c7215", + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.973967772" + }, + "clustalo.versions.yml": { + "content": [ + "versions.yml" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T18:13:47.005413" + }, + "clustalo.versions.yml_lines": { + "content": "910c165ca1906b710498fd584f5c4086", + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T18:13:22.261471" + }, + "clustalo.graft.placement.epa_result.newick_lines": { + "content": "c5019b883ff08671bd71be3eb23efa52", + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T19:04:43.809323" + }, + "hmmer.versions.yml": { + "content": [ + "versions.yml" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:44.880743216" + }, + "hmmer.epa_result.jplace.gz": { + "content": [ + "hmmer.epa_result.jplace.gz" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:44.857734455" + }, + "nucl_hmmer.graft.placement.epa_result.newick": { + "content": [ + "nucl_hmmer.graft.nucl_hmmer.epa_result.newick" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:17:41.728034651" + }, + "mafft.taxonomy.per_query.tsv": { + "content": [ + "mafft.taxonomy.per_query.tsv" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.980370338" + }, + "clustalo.epa_result.jplace.gz": { + "content": [ + "clustalo.epa_result.jplace.gz" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T19:04:14.103708" + }, + "mafft.graft.placement.epa_result.newick": { + "content": [ + "mafft.graft.mafft.epa_result.newick" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.972095017" + }, + "nucl_hmmer.epa_result.jplace.gz": { + "content": [ + "nucl_hmmer.epa_result.jplace.gz" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:17:41.725684052" + }, + "nucl_hmmer.versions.yml_lines": { + "content": "eafb4184937621216ff03df17c4245a6", + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:17:41.733022413" + }, + "hmmer.versions.yml_lines": { + "content": "eafb4184937621216ff03df17c4245a6", + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:44.884598465" + }, + "mafft.versions.yml": { + "content": [ + "versions.yml" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.04.5" + }, + "timestamp": "2024-11-14T10:16:59.986610634" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fasta_newick_epang_gappa/tests/nextflow.config b/subworkflows/nf-core/fasta_newick_epang_gappa/tests/nextflow.config new file mode 100644 index 000000000..1d449817c --- /dev/null +++ b/subworkflows/nf-core/fasta_newick_epang_gappa/tests/nextflow.config @@ -0,0 +1,62 @@ +process { + withName: HMMER_HMMBUILD { + ext.prefix = { "${meta.id}.ref" } + } + withName: HMMER_UNALIGNREF { + ext.prefix = { "${meta.id}.ref.unaligned" } + ext.args = "--gapsym=- afa" + ext.postprocessing = '| sed "/^>/!s/-//g"' + } + + withName: HMMER_HMMALIGNREF { + ext.prefix = { "${meta.id}.ref.hmmalign" } + } + + withName: HMMER_HMMALIGNQUERY { + ext.prefix = { "${meta.id}.query.hmmalign" } + } + withName: 'HMMER_MASK.*' { + ext.args = '--rf-is-mask' + } + + withName: 'HMMER_MASKQUERY.*' { + ext.prefix = { "${meta.id}.query.hmmalign" } + } + + withName: 'HMMER_MASKREF.*' { + ext.prefix = { "${meta.id}.ref.hmmalign" } + } + + withName: 'HMMER_AFAFORMATQUERY.*' { + ext.prefix = { "${meta.id}.query.hmmalign.masked" } + ext.args = 'afa' + } + + withName: 'HMMER_AFAFORMATREF.*' { + ext.prefix = { "${meta.id}.ref.hmmalign.masked" } + ext.args = 'afa' + } + + withName: 'MAFFT' { + ext.args = '--keeplength' + } + + withName: 'EPANG_PLACE' { + ext.args = { "--model ${meta.model}" } + } + + withName: 'GAPPA_GRAFT' { + ext.prefix = { "${meta.id}.graft" } + } + + withName: 'GAPPA_ASSIGN' { + ext.prefix = { "${meta.id}.taxonomy" } + ext.args = "--per-query-results --krona --sativa" + ext.when = { taxonomy } + } + + withName: 'GAPPA_HEATTREE' { + ext.prefix = { "${meta.id}.heattree" } + ext.args = "--write-nexus-tree --write-phyloxml-tree --write-svg-tree" + } +} diff --git a/subworkflows/nf-core/fasta_newick_epang_gappa/tests/tags.yml b/subworkflows/nf-core/fasta_newick_epang_gappa/tests/tags.yml new file mode 100644 index 000000000..bf3f3e613 --- /dev/null +++ b/subworkflows/nf-core/fasta_newick_epang_gappa/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fasta_newick_epang_gappa: + - subworkflows/nf-core/fasta_newick_epang_gappa/** diff --git a/test_out/pipeline_info/execution_trace_2024-10-28_14-07-50.txt b/test_out/pipeline_info/execution_trace_2024-10-28_14-07-50.txt new file mode 100644 index 000000000..6b739acdf --- /dev/null +++ b/test_out/pipeline_info/execution_trace_2024-10-28_14-07-50.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/tests/pipeline/pplace_hmmsearch.nf.test b/tests/pipeline/pplace_hmmsearch.nf.test new file mode 100644 index 000000000..01620c6e4 --- /dev/null +++ b/tests/pipeline/pplace_hmmsearch.nf.test @@ -0,0 +1,44 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + tag "test_pplace_hmmsearch" + tag "pipeline" + + test("test_pplace_hmmsearch") { + + when { + params { + outdir = "$outputDir" + skip_qiime = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/software_versions.yml").exists() }, + { assert snapshot(path("$outputDir/overall_summary.tsv")).match("overall_summary_tsv") }, + { assert snapshot(path("$outputDir/barrnap/rrna.arc.gff"), + path("$outputDir/barrnap/rrna.bac.gff"), + path("$outputDir/barrnap/rrna.euk.gff"), + path("$outputDir/barrnap/rrna.mito.gff")).match("barrnap") }, + { assert new File("$outputDir/barrnap/summary.tsv").exists() }, + { assert snapshot(path("$outputDir/cutadapt/cutadapt_summary.tsv")).match("cutadapt") }, + { assert snapshot(path("$outputDir/dada2/ASV_seqs.fasta"), + path("$outputDir/dada2/ASV_table.tsv"), + path("$outputDir/dada2/DADA2_stats.tsv"), + path("$outputDir/dada2/DADA2_table.rds"), + path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, + { assert snapshot(path("$outputDir/input/Samplesheet.tsv"), + path("$outputDir/input/Metadata.tsv")).match("input") }, + { assert new File("$outputDir/pplace/bac16s.graft.bac16s.epa_result.newick").exists() }, + { assert new File("$outputDir/pplace/bac16s.taxonomy.per_query_unique.tsv").exists() }, + { assert new File("$outputDir/pplace/bac16s.heattree.tree.svg").exists() }, + { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } + ) + } + } +} diff --git a/tests/pipeline/pplace_hmmsearch.nf.test.snap b/tests/pipeline/pplace_hmmsearch.nf.test.snap new file mode 100644 index 000000000..17c1688ba --- /dev/null +++ b/tests/pipeline/pplace_hmmsearch.nf.test.snap @@ -0,0 +1,71 @@ +{ + "input": { + "content": [ + "Samplesheet.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4", + "Metadata.tsv:md5,060b56528bb566eed71f6dfdb52cc395" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-21T15:49:18.606845" + }, + "cutadapt": { + "content": [ + "cutadapt_summary.tsv:md5,5d02749984a811479e7d534fda75163f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-21T15:49:18.536473" + }, + "overall_summary_tsv": { + "content": [ + "overall_summary.tsv:md5,97d0949d7b4bf23f44788530dd6728ad" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-21T15:49:18.406044" + }, + "dada2": { + "content": [ + "ASV_seqs.fasta:md5,864c3e0dc9b4a7649beee0c8665dceb5", + "ASV_table.tsv:md5,2618251e597593e4d716dd9bed095539", + "DADA2_stats.tsv:md5,54a1ac8d6c5a3ff15f700c4b2dd40c86", + "DADA2_table.rds:md5,d095501019ce7ebccfa0eb801db1ed29", + "DADA2_table.tsv:md5,5c9fb0bfd70da165f0ce6a361bfe0b43" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-21T15:49:18.570997" + }, + "barrnap": { + "content": [ + "rrna.arc.gff:md5,6dae470aace9293d5eb8c318584852dd", + "rrna.bac.gff:md5,439a9084f089120f700f938dfb58fa41", + "rrna.euk.gff:md5,c9bc1d9d8fb77dc19c95dee2d53840eb", + "rrna.mito.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-21T15:49:18.498844" + }, + "multiqc": { + "content": [ + "multiqc_general_stats.txt:md5,7a395be7984aaa0f9154de4abed5d824", + "multiqc_cutadapt.txt:md5,b4409890ffb9c6938433c374b50c380e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-21T15:49:18.662649" + } +} \ No newline at end of file diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 4f4c24830..aa245791c 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -174,6 +174,7 @@ include { FASTQC } from '../modules/nf-core/fastqc/ma include { MULTIQC } from '../modules/nf-core/multiqc/main' include { VSEARCH_CLUSTER } from '../modules/nf-core/vsearch/cluster/main' include { FASTA_NEWICK_EPANG_GAPPA } from '../subworkflows/nf-core/fasta_newick_epang_gappa/main' +include { FASTA_HMMSEARCH_RANK_FASTAS } from '../subworkflows/nf-core/fasta_hmmsearch_rank_fastas/main' // // MODULE: Installed directly from nf-core/modules @@ -217,6 +218,7 @@ include { PICRUST } from '../modules/local/picrust' include { SBDIEXPORT } from '../modules/local/sbdiexport' include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate' include { SUMMARY_REPORT } from '../modules/local/summary_report' +include { HMMER_HMMEXTRACT } from '../modules/local/hmmextract' include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax' include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax' include { FILTER_CLUSTERS } from '../modules/local/filter_clusters' @@ -285,6 +287,34 @@ workflow AMPLISEQ { error("One of `--input`, `--input_fasta`, `--input_folder` must be provided!") } + if (params.pplace_sheet) { + // + // Create channel from phylosearch file provided through params.phylosearch + // + Channel + .fromList(samplesheetToList(params.pplace_sheet, "${projectDir}/assets/schema_phylosearch_input.json")) + .map { + [ + meta: [ + id: it.id[0], + min_bitscore: it.min_bitscore[0] + ], + data: [ + alignmethod: it.alignmethod ? it.alignmethod[0] : 'hmmer', + hmm: file(it.hmm[0], checkIfExists: true), + extract_hmm: it.extract_hmm[0], + refseqfile: it.refseqfile[0] ? file(it.refseqfile[0], checkIfExists: true) : [], + refphylogeny: it.refphylogeny[0] ? file(it.refphylogeny[0], checkIfExists: true) : [], + model: it.model[0], + taxonomy: it.taxonomy[0] ? file(it.taxonomy[0], checkIfExists: true) : [] + ] + ] + } + .set {ch_phylosearch_data} + } else { + ch_phylosearch_data = Channel.empty() + } + // // Add primer info to sequencing files // @@ -641,7 +671,9 @@ workflow AMPLISEQ { } // Phylo placement - if ( params.pplace_tree ) { + ch_pp_data = Channel.empty() + ch_tax_for_phyloseq = Channel.empty() + if ( params.pplace_aln && params.pplace_tree ) { ch_pp_data = ch_fasta.map { it -> [ meta: [ id: params.pplace_name ?: 'user_tree' ], data: [ @@ -654,10 +686,63 @@ workflow AMPLISEQ { taxonomy: params.pplace_taxonomy ? file( params.pplace_taxonomy, checkIfExists: true ) : [] ] ] } - FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) - ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) - ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv - ch_tax_for_robject = ch_tax_for_robject.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) + FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) + ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) + ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) + + } else if ( params.pplace_sheet ) { + // 1. Deal with entries in the ch_phyloplace_data channel, i.e. search, then add to the ch_phyloplace_data channel + // For search entries with a named hmm to extract, call extraction + + ch_phylosearch_data + .filter { it.data.extract_hmm } + .map { [ it.meta, it.data.hmm, it.data.extract_hmm ] } + .set { ch_hmmextract } + + HMMER_HMMEXTRACT(ch_hmmextract) + ch_versions = ch_versions.mix(HMMER_HMMEXTRACT.out.versions) + + // Create an input channel for FASTA_HMMSEARCH_RANK_FASTAS by adding the non-keyed entries from the original channel to the output of the extracted + HMMER_HMMEXTRACT.out.hmm + .mix( + ch_phylosearch_data + .filter { ! it.data.extract_hmm } + .map { [ it.meta, it.data.hmm ] } + ) + .set { ch_search_profiles } + + FASTA_HMMSEARCH_RANK_FASTAS(ch_search_profiles, ch_fasta) + ch_versions = ch_versions.mix(FASTA_HMMSEARCH_RANK_FASTAS.out.versions) + + FASTA_HMMSEARCH_RANK_FASTAS.out.seqfastas + .join( + ch_phylosearch_data + .filter { it.data.alignmethod && it.data.refseqfile && it.data.refphylogeny } + .map { [ [ id: it.meta.id ], it ] } + ) + .map { [ + meta: it[2].meta, + data: [ + alignmethod: it[2].data.alignmethod, + queryseqfile: it[1], + refseqfile: it[2].data.refseqfile, + refphylogeny: it[2].data.refphylogeny, + model: it[2].data.model, + taxonomy: it[2].data.taxonomy + ] + ] + } + .set { ch_pp_data } + + // + // SUBWORKFLOW: Run phylogenetic placement + // + FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) + ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) + ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) + } else { ch_pplace_tax = Channel.empty() } @@ -679,7 +764,8 @@ workflow AMPLISEQ { ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions ) ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv - ch_tax_for_robject = ch_tax_for_robject.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } ) + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } ) + } else { ch_qiime2_tax = Channel.empty() }