From 92519e130a34bad9583cfbdb48632e04380999dd Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Wed, 17 May 2023 14:27:04 +0100 Subject: [PATCH 01/30] Add raw assembly stage to the start of the pipeline --- conf/modules.config | 10 +- modules.json | 187 ++++++++++++++++----- modules/local/gfa_to_fasta.nf | 28 +++ modules/local/keep_seqnames.nf | 4 +- modules/nf-core/bcftools/consensus/main.nf | 6 +- modules/nf-core/hifiasm/hifiasm.diff | 46 +++++ modules/nf-core/hifiasm/main.nf | 94 +++++++++++ modules/nf-core/hifiasm/meta.yml | 94 +++++++++++ subworkflows/local/assembly_stats.nf | 6 +- subworkflows/local/prepare_input.nf | 12 +- subworkflows/local/raw_assembly.nf | 45 +++++ workflows/genomeassembly.nf | 45 +++-- 12 files changed, 507 insertions(+), 70 deletions(-) create mode 100644 modules/local/gfa_to_fasta.nf create mode 100644 modules/nf-core/hifiasm/hifiasm.diff create mode 100644 modules/nf-core/hifiasm/main.nf create mode 100644 modules/nf-core/hifiasm/meta.yml create mode 100644 subworkflows/local/raw_assembly.nf diff --git a/conf/modules.config b/conf/modules.config index 0c51bee0..401e8831 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -32,6 +32,12 @@ process { ] } + // Set up of the raw assembly pipeline + withName: HIFIASM { + ext.args = "--primary" + } + // End of Set up of the raw assembly pipeline + // Set up of the polishing pipeline withName: LONGRANGER_ALIGN { @@ -45,6 +51,7 @@ process { // Filter by mapping quality, dont keep alternative homozygotes, // keep longer allele ext.args = '-i\'QUAL>1 && (GT="AA" || GT="Aa")\' -Hla' + ext.prefix = { "${meta.id}.consensus" } } withName: BCFTOOLS_INDEX { @@ -180,7 +187,8 @@ process { } withName: 'FASTK_FASTK' { - ext.args = "-k31 -t" + //FIX THIS FOR THE REAL DATASET -k31 + ext.args = "-k17 -t" } withName: 'FASTK_HISTEX' { diff --git a/modules.json b/modules.json index 43ccb18d..b390225e 100644 --- a/modules.json +++ b/modules.json @@ -8,233 +8,328 @@ "bcftools/concat": { "branch": "master", "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/consensus": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "git_sha": "fa12afdf5874c1d11e4a20efe81c97935e8eea24", + "installed_by": [ + "modules" + ] }, "bcftools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bedtools/bamtobed/bedtools-bamtobed.diff" }, "busco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/cload": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/zoomify": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastk/fastk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastk/histex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "freebayes": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/freebayes/freebayes.diff" }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "643756685546fa61f5c8fba439af746c090b9180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genescopefk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gfastats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "hifiasm": { + "branch": "master", + "git_sha": "07b737b47d33d8d980e5e0c09736764f450980a3", + "patch": "modules/nf-core/hifiasm/hifiasm.diff" }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextmap": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextsnapshot": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/calcuts": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/purgedups/calcuts/purgedups-calcuts.diff" }, "purgedups/getseqs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/pbcstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/purgedups": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/splitfa": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/collate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fastq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fixmate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/markdup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "seqtk/subseq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "yahs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } diff --git a/modules/local/gfa_to_fasta.nf b/modules/local/gfa_to_fasta.nf new file mode 100644 index 00000000..edeb877d --- /dev/null +++ b/modules/local/gfa_to_fasta.nf @@ -0,0 +1,28 @@ +process GFA_TO_FASTA { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'quay.io/biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(gfa) + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + """ + prefix=\$(basename $gfa .gfa) + awk '/^S/{print ">"\$2;print \$3}' $gfa > \${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + GNU Awk: \$(echo \$(awk --version | head -n1 | cut -f3 -d' ' | sed 's/,//')) + END_VERSIONS + """ +} diff --git a/modules/local/keep_seqnames.nf b/modules/local/keep_seqnames.nf index 178364ee..8046a0ae 100644 --- a/modules/local/keep_seqnames.nf +++ b/modules/local/keep_seqnames.nf @@ -7,7 +7,7 @@ process KEEP_SEQNAMES { 'ubuntu:20.04' }" input: - tuple val(meta), path(fai) + tuple val(meta), path(fa) output: path "*seq.lst" , emit: seqlist @@ -19,7 +19,7 @@ process KEEP_SEQNAMES { script: def prefix = meta.prefix ?: '' """ - cut -f1 $fai > seq.lst + grep '>' $fa | cut -f1 | sed 's/>//' > seq.lst cat <<-END_VERSIONS > versions.yml "${task.process}": cut: \$(cut --version | head -n 1 | awk '{print \$NF}') diff --git a/modules/nf-core/bcftools/consensus/main.nf b/modules/nf-core/bcftools/consensus/main.nf index a32d94b1..db2c758a 100644 --- a/modules/nf-core/bcftools/consensus/main.nf +++ b/modules/nf-core/bcftools/consensus/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_CONSENSUS { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.16" + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'quay.io/biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcf), path(tbi), path(fasta) diff --git a/modules/nf-core/hifiasm/hifiasm.diff b/modules/nf-core/hifiasm/hifiasm.diff new file mode 100644 index 00000000..6ac17b0a --- /dev/null +++ b/modules/nf-core/hifiasm/hifiasm.diff @@ -0,0 +1,46 @@ +Changes in module 'nf-core/hifiasm' +--- modules/nf-core/hifiasm/main.nf ++++ modules/nf-core/hifiasm/main.nf +@@ -4,8 +4,8 @@ + + conda "bioconda::hifiasm=0.18.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/hifiasm:0.18.5--h5b5514e_0' : +- 'quay.io/biocontainers/hifiasm:0.18.5--h5b5514e_0' }" ++ 'https://depot.galaxyproject.org/singularity/mulled-v2-8019bff5bdc04e0e88980d5ba292ba022fec5dd9:56ed7e3ac0e84e7d947af98abfb86dda9e1dc9f8-0' : ++ 'quay.io/biocontainers/mulled-v2-8019bff5bdc04e0e88980d5ba292ba022fec5dd9:56ed7e3ac0e84e7d947af98abfb86dda9e1dc9f8-0' }" + + input: + tuple val(meta), path(reads) +@@ -13,6 +13,7 @@ + path maternal_kmer_dump + path hic_read1 + path hic_read2 ++ path hic_reads_cram + + output: + tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs +@@ -22,8 +23,10 @@ + tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true + tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true + tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true +- tuple val(meta), path("*.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true +- tuple val(meta), path("*.hap2.p_ctg.gfa") , emit: maternal_contigs , optional: true ++ tuple val(meta), path("*.asm.hic.p_ctg.gfa") , emit: hic_primary_contigs , optional: true ++ tuple val(meta), path("*.asm.hic.a_ctg.gfa") , emit: hic_alternate_contigs , optional: true ++ tuple val(meta), path("*.asm.hic.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true ++ tuple val(meta), path("*.asm.hic.hap2.p_ctg.gfa") , emit: maternal_contigs , optional: true + path "versions.yml" , emit: versions + + when: +@@ -32,6 +35,8 @@ + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" ++ def hic_read1 = hic_reads_cram ? "<( samtools cat $hic_reads_cram | samtools fastq -n -f0x40 -F0xB00 )" : "" ++ def hic_read2 = hic_reads_cram ? "<( samtools cat $hic_reads_cram | samtools fastq -n -f0x80 -F0xB00 )" : "" + if ((paternal_kmer_dump) && (maternal_kmer_dump) && (hic_read1) && (hic_read2)) { + error "Hifiasm Trio-binning and Hi-C integrated should not be used at the same time" + } else if ((paternal_kmer_dump) && !(maternal_kmer_dump)) { + +************************************************************ diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf new file mode 100644 index 00000000..0e967256 --- /dev/null +++ b/modules/nf-core/hifiasm/main.nf @@ -0,0 +1,94 @@ +process HIFIASM { + tag "$meta.id" + label 'process_high' + + conda "bioconda::hifiasm=0.18.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8019bff5bdc04e0e88980d5ba292ba022fec5dd9:56ed7e3ac0e84e7d947af98abfb86dda9e1dc9f8-0' : + 'quay.io/biocontainers/mulled-v2-8019bff5bdc04e0e88980d5ba292ba022fec5dd9:56ed7e3ac0e84e7d947af98abfb86dda9e1dc9f8-0' }" + + input: + tuple val(meta), path(reads) + path paternal_kmer_dump + path maternal_kmer_dump + path hic_read1 + path hic_read2 + path hic_reads_cram + + output: + tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs + tuple val(meta), path("*.ec.bin") , emit: corrected_reads + tuple val(meta), path("*.ovlp.source.bin") , emit: source_overlaps + tuple val(meta), path("*.ovlp.reverse.bin"), emit: reverse_overlaps + tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true + tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true + tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true + tuple val(meta), path("*.asm.hic.p_ctg.gfa") , emit: hic_primary_contigs , optional: true + tuple val(meta), path("*.asm.hic.a_ctg.gfa") , emit: hic_alternate_contigs , optional: true + tuple val(meta), path("*.asm.hic.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true + tuple val(meta), path("*.asm.hic.hap2.p_ctg.gfa") , emit: maternal_contigs , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def hic_read1 = hic_reads_cram ? "<( samtools cat $hic_reads_cram | samtools fastq -n -f0x40 -F0xB00 )" : "" + def hic_read2 = hic_reads_cram ? "<( samtools cat $hic_reads_cram | samtools fastq -n -f0x80 -F0xB00 )" : "" + if ((paternal_kmer_dump) && (maternal_kmer_dump) && (hic_read1) && (hic_read2)) { + error "Hifiasm Trio-binning and Hi-C integrated should not be used at the same time" + } else if ((paternal_kmer_dump) && !(maternal_kmer_dump)) { + error "Hifiasm Trio-binning requires maternal data" + } else if (!(paternal_kmer_dump) && (maternal_kmer_dump)) { + error "Hifiasm Trio-binning requires paternal data" + } else if ((paternal_kmer_dump) && (maternal_kmer_dump)) { + """ + hifiasm \\ + $args \\ + -o ${prefix}.asm \\ + -t $task.cpus \\ + -1 $paternal_kmer_dump \\ + -2 $maternal_kmer_dump \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hifiasm: \$(hifiasm --version 2>&1) + END_VERSIONS + """ + } else if ((hic_read1) && !(hic_read2)) { + error "Hifiasm Hi-C integrated requires paired-end data (only R1 specified here)" + } else if (!(hic_read1) && (hic_read2)) { + error "Hifiasm Hi-C integrated requires paired-end data (only R2 specified here)" + } else if ((hic_read1) && (hic_read2)) { + """ + hifiasm \\ + $args \\ + -o ${prefix}.asm \\ + -t $task.cpus \\ + --h1 $hic_read1 \\ + --h2 $hic_read2 \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hifiasm: \$(hifiasm --version 2>&1) + END_VERSIONS + """ + } else { // Phasing with Hi-C data is not supported yet + """ + hifiasm \\ + $args \\ + -o ${prefix}.asm \\ + -t $task.cpus \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hifiasm: \$(hifiasm --version 2>&1) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/hifiasm/meta.yml b/modules/nf-core/hifiasm/meta.yml new file mode 100644 index 00000000..775f3a37 --- /dev/null +++ b/modules/nf-core/hifiasm/meta.yml @@ -0,0 +1,94 @@ +name: hifiasm +description: Whole-genome assembly using PacBio HiFi reads +keywords: + - genome assembly + - haplotype resolution + - phasing + - PacBio + - HiFi + - long reads +tools: + - hifiasm: + description: Haplotype-resolved assembler for accurate HiFi reads + homepage: https://github.com/chhylp123/hifiasm + documentation: https://github.com/chhylp123/hifiasm + tool_dev_url: https://github.com/chhylp123/hifiasm + doi: "10.1038/s41592-020-01056-5" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file with PacBio HiFi reads + pattern: "*.{fastq}" + - paternal_kmer_dump: + type: file + description: Yak kmer dump file for paternal reads (can be used for haplotype resolution). It can have an arbitrary extension. + - maternal_kmer_dump: + type: file + description: Yak kmer dump file for maternal reads (can be used for haplotype resolution). It can have an arbitrary extension. + - use_parental_kmers: + type: logical + description: A flag (true or false) signalling if the module should use the paternal and maternal kmer dumps. + - hic_read1: + type: file + description: Hi-C data Forward reads. + - hic_read2: + type: file + description: Hi-C data Reverse reads. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - raw_unitigs: + type: file + description: Raw unitigs + pattern: "*.r_utg.gfa" + - processed_unitigs: + type: file + description: Processed unitigs + pattern: "*.p_utg.gfa" + - primary_contigs: + type: file + description: Primary contigs + pattern: "*.asm.p_ctg.gfa" + - alternate_contigs: + type: file + description: Alternative contigs + pattern: "*.asm.a_ctg.gfa" + - paternal_contigs: + type: file + description: Paternal contigs + pattern: "*.hap1.p_ctg.gfa" + - maternal_contigs: + type: file + description: Maternal contigs + pattern: "*.hap2.p_ctg.gfa" + - corrected_reads: + type: file + description: Corrected reads + pattern: "*.ec.bin" + - source_overlaps: + type: file + description: Source overlaps + pattern: "*.ovlp.source.bin" + - reverse_overlaps: + type: file + description: Reverse overlaps + pattern: "*.ovlp.reverse.bin" + +authors: + - "@sidorov-si" + - "@scorreard" diff --git a/subworkflows/local/assembly_stats.nf b/subworkflows/local/assembly_stats.nf index 1c052875..6541be3c 100644 --- a/subworkflows/local/assembly_stats.nf +++ b/subworkflows/local/assembly_stats.nf @@ -36,7 +36,11 @@ workflow GENOME_STATISTICS { ch_versions = ch_versions.mix(BUSCO.out.versions.first()) // MerquryFK - hist.join(ktab).join(assembly).set{ ch_merq } + hist.join(ktab).join(assembly) + .map{ meta, hist, ktab, primary, hap -> + hap.size() ? [ meta, hist, ktab, primary, hap ] : + [ meta, hist, ktab, primary, [] ] } + .set{ ch_merq } MERQURYFK_MERQURYFK ( ch_merq ) ch_versions = ch_versions.mix(MERQURYFK_MERQURYFK.out.versions.first()) diff --git a/subworkflows/local/prepare_input.nf b/subworkflows/local/prepare_input.nf index 874107c3..2c95eb9b 100644 --- a/subworkflows/local/prepare_input.nf +++ b/subworkflows/local/prepare_input.nf @@ -34,7 +34,7 @@ workflow PREPARE_INPUT { // Prepare primary assembly_input.primary_ch.map { fasta -> - effect = fasta.endsWith('.gz') ? 'gunzip' : 'none' + effect = fasta ? fasta.endsWith('.gz') ? 'gunzip' : 'none' : 'empty' [ ['effect':effect], fasta ] } .branch { @@ -43,6 +43,8 @@ workflow PREPARE_INPUT { return [ [:], fasta ] geno : meta.effect == "none" return [ [:], fasta ] + empty : meta.effect == "empty" + return [] } .set { ch_asm } @@ -57,7 +59,7 @@ workflow PREPARE_INPUT { // Prepare haplotigs assembly_input.haplotigs_ch.map { fasta -> - effect = fasta.endsWith('.gz') ? 'gunzip' : 'none' + effect = fasta ? fasta.endsWith('.gz') ? 'gunzip' : 'none' : 'empty' [ ['effect':effect], fasta ] } .branch { @@ -66,6 +68,8 @@ workflow PREPARE_INPUT { return [ [:], fasta ] geno : meta.effect == "none" return [ [:], fasta ] + empty : meta.effect == "empty" + return [] } .set { ch_asm_hap } @@ -100,11 +104,11 @@ workflow PREPARE_INPUT { data.pacbio.reads.collect { file( it.reads, checkIfExists: true ) } ] : []) hic_ch: ( data.HiC ? [ [id: data.id, datatype: "hic", read_group: "\'@RG\\tID:" + data.id + "\\tPL:ILLUMINA" + "\\tSM:" + data.id + "\'" ], - data.HiC.reads.collect { file( it.reads, checkIfExists: true ) }, + data.HiC.reads.collect { file( it.reads, checkIfExists: true ) }, data.HiC.arima_motif ] : []) busco_ch : ( data.busco ? [ [id: data.id ], - file(data.busco.lineages_path, checkIfExists: true), + data.busco.lineages_path ? file(data.busco.lineages_path, checkIfExists: true) : [], data.busco.lineage ] : [] ) } diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf new file mode 100644 index 00000000..3c8a1dd7 --- /dev/null +++ b/subworkflows/local/raw_assembly.nf @@ -0,0 +1,45 @@ +include { HIFIASM as HIFIASM_PRI } from '../../modules/nf-core/hifiasm/main' +include { HIFIASM as HIFIASM_HIC } from '../../modules/nf-core/hifiasm/main' + +include { GFA_TO_FASTA as GFA_TO_FASTA_PRI } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_ALT } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_PRI_HIC } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_ALT_HIC } from '../../modules/local/gfa_to_fasta' + +workflow RAW_ASSEMBLY { + take: + hifi_reads // channel: [ val(meta), [ datafile ] ] + hic_reads // channel: [ datafile ] + + main: + ch_versions = Channel.empty() + + HIFIASM_PRI(hifi_reads, [], [], [], [], []) + ch_versions = ch_versions.mix(HIFIASM_PRI.out.versions) + + GFA_TO_FASTA_PRI( HIFIASM_PRI.out.primary_contigs ) + GFA_TO_FASTA_ALT( HIFIASM_PRI.out.alternate_contigs ) + ch_versions = ch_versions.mix(GFA_TO_FASTA_PRI.out.versions) + + HIFIASM_HIC(hifi_reads, [], [], [], [], hic_reads) + GFA_TO_FASTA_PRI_HIC( HIFIASM_HIC.out.hic_primary_contigs ) + GFA_TO_FASTA_ALT_HIC( HIFIASM_HIC.out.hic_alternate_contigs ) + + emit: + raw_unitigs = HIFIASM_PRI.out.raw_unitigs + source_overlaps = HIFIASM_PRI.out.source_overlaps + reverse_overlaps = HIFIASM_PRI.out.reverse_overlaps + corrected_reads = HIFIASM_PRI.out.corrected_reads + primary_contigs_gfa = HIFIASM_PRI.out.primary_contigs + alternate_contigs_gfa = HIFIASM_PRI.out.alternate_contigs + processed_unitigs = HIFIASM_PRI.out.processed_unitigs + + primary_contigs = GFA_TO_FASTA_PRI.out.fasta + alternate_contigs = GFA_TO_FASTA_ALT.out.fasta + primary_hic_contigs_gfa = HIFIASM_HIC.out.hic_primary_contigs + alternate_hic_contigs_gfa = HIFIASM_HIC.out.hic_alternate_contigs + phased_hic_contigs_hap1_gfa = HIFIASM_HIC.out.paternal_contigs + phased_hic_contigs_hap2_gfa = HIFIASM_HIC.out.maternal_contigs + + versions = ch_versions +} diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index a8395913..e49bf371 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -27,6 +27,7 @@ if (params.polishing_on) { polishing_on = params.polishing_on } else { polishing // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // include { PREPARE_INPUT } from '../subworkflows/local/prepare_input' +include { RAW_ASSEMBLY } from '../subworkflows/local/raw_assembly' include { GENOMESCOPE_MODEL } from '../subworkflows/local/genomescope_model' include { PURGE_DUPS as PURGE_DUPS_PRI } from '../subworkflows/local/purge_dups' include { PURGE_DUPS as PURGE_DUPS_ALT } from '../subworkflows/local/purge_dups' @@ -35,6 +36,8 @@ include { SCAFFOLDING } from '../subworkflows/local/scaffolding' include { KEEP_SEQNAMES as KEEP_SEQNAMES_PRIMARY } from '../modules/local/keep_seqnames' include { KEEP_SEQNAMES as KEEP_SEQNAMES_HAPLOTIGS } from '../modules/local/keep_seqnames' include { ALIGN_SHORT } from '../subworkflows/local/align_short' +include { GENOME_STATISTICS as GENOME_STATISTICS_RAW } from '../subworkflows/local/assembly_stats' +include { GENOME_STATISTICS as GENOME_STATISTICS_PURGED } from '../subworkflows/local/assembly_stats' include { GENOME_STATISTICS as GENOME_STATISTICS_POLISHED } from '../subworkflows/local/assembly_stats' include { GENOME_STATISTICS as GENOME_STATISTICS_SCAFFOLDS } from '../subworkflows/local/assembly_stats' @@ -66,14 +69,21 @@ workflow GENOMEASSEMBLY { // PREPARE_INPUT(ch_input) ch_versions = ch_versions.mix(PREPARE_INPUT.out.versions) + + PREPARE_INPUT.out.hifi.set{ hifi_reads_ch } + PREPARE_INPUT.out.hic.map{ meta, reads, motif -> reads }.set{ hic_reads_ch } - PREPARE_INPUT.out.primary_asm.map{ meta, p, p_idx -> [meta, p] }.set{ primary_contigs_ch } - PREPARE_INPUT.out.haplotigs_asm.map{ meta, h, h_idx -> [meta, h] }.set{ haplotigs_ch } - PREPARE_INPUT.out.hifi.set{ reads_ch } - - GENOMESCOPE_MODEL(reads_ch) + GENOMESCOPE_MODEL( hifi_reads_ch ) - reads_ch.join(primary_contigs_ch) + RAW_ASSEMBLY( hifi_reads_ch , hic_reads_ch ) + RAW_ASSEMBLY.out.primary_contigs.set{ primary_contigs_ch } + RAW_ASSEMBLY.out.alternate_contigs.set{ haplotigs_ch } + GENOME_STATISTICS_RAW( primary_contigs_ch.join(haplotigs_ch), + PREPARE_INPUT.out.busco, + GENOMESCOPE_MODEL.out.hist, + GENOMESCOPE_MODEL.out.ktab + ) + hifi_reads_ch.join(primary_contigs_ch) .join(GENOMESCOPE_MODEL.out.model) .set{ purge_dups_input } PURGE_DUPS_PRI( purge_dups_input, 'primary' ) @@ -85,21 +95,29 @@ workflow GENOMEASSEMBLY { .set{ haplotigs_to_merge } CAT_CAT_HAPLOTIGS{ haplotigs_to_merge } - reads_ch.join(CAT_CAT_HAPLOTIGS.out.file_out) + hifi_reads_ch.join(CAT_CAT_HAPLOTIGS.out.file_out) .join(GENOMESCOPE_MODEL.out.model) .set{ purge_dups_haploitgs_input } PURGE_DUPS_ALT( purge_dups_haploitgs_input, 'haplotigs' ) - PURGE_DUPS_PRI.out.pri.combine(PURGE_DUPS_ALT.out.pri) - .map{ meta_pri, purged_pri, meta_alt, purged_alt -> [meta_pri, [purged_pri, purged_alt]]} + PURGE_DUPS_ALT.out.pri.map{ meta, fasta -> [[id:meta.id], fasta] } + set{ haplotigs_ch } + GENOME_STATISTICS_PURGED( primary_contigs_ch.join(haplotigs_ch), + PREPARE_INPUT.out.busco, + GENOMESCOPE_MODEL.out.hist, + GENOMESCOPE_MODEL.out.ktab + ) + + if ( polishing_on ) { + PURGE_DUPS_PRI.out.pri.combine(PURGE_DUPS_ALT.out.pri) + .map{ meta_pri, purged_pri, meta_alt, purged_alt -> [meta_pri, [purged_pri, purged_alt]]} .set{ purged_pri_alt_ch } - CAT_CAT_PURGEDUPS( purged_pri_alt_ch ) - SAMTOOLS_FAIDX_PURGEDUPS( CAT_CAT_PURGEDUPS.out.file_out ) - CAT_CAT_PURGEDUPS.out.file_out.join( SAMTOOLS_FAIDX_PURGEDUPS.out.fai ) + CAT_CAT_PURGEDUPS( purged_pri_alt_ch ) + SAMTOOLS_FAIDX_PURGEDUPS( CAT_CAT_PURGEDUPS.out.file_out ) + CAT_CAT_PURGEDUPS.out.file_out.join( SAMTOOLS_FAIDX_PURGEDUPS.out.fai ) .set{ reference_ch } - if ( polishing_on ) { PREPARE_INPUT.out.illumina_10X.map{ meta, reads, kmers -> [reads] } .set{ illumina_10X_ch } @@ -162,6 +180,7 @@ workflow GENOMEASSEMBLY { CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) + } /* From 1584b58b41cb788cb7c4d3031dd9b1d9a1618c3e Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Wed, 17 May 2023 14:27:14 +0100 Subject: [PATCH 02/30] Update test dataset to run quickly --- assets/test.yaml | 14 +++++--------- assets/test_iyVesGerm1.yaml | 3 +-- conf/test.config | 2 +- conf/test_full_iyVesGerm1.config | 6 +++--- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/assets/test.yaml b/assets/test.yaml index fbfffc4c..5f0ad7ef 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,17 +1,13 @@ samples: - - id: ilEupCent1 - assembly: - primary: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/sanger-tol-genomeassembly/test/primary.fa - haplotigs: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/sanger-tol-genomeassembly/test/haplotigs.fa + - id: odSpoLacu1 illumina_10X: - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/sanger-tol-genomeassembly/test/10X/small/ + reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/dataset/10x_10000/ pacbio: reads: - - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/sanger-tol-genomeassembly/test/1000_reads.fasta + - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/dataset/HiFi.reads.398.extra.fasta HiC: reads: - - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/sanger-tol-genomeassembly/test/hic.01.cram + - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/dataset/41741_2#7.sub.cram arima_motif: GATC,GANTC,CTNAG,TTAA busco: - lineages_path: /lustre/scratch123/tol/resources/busco/v5/ - lineage: insecta_odb10 + lineage: bacteria_odb10 diff --git a/assets/test_iyVesGerm1.yaml b/assets/test_iyVesGerm1.yaml index f7d1f2bd..4ffedc5f 100644 --- a/assets/test_iyVesGerm1.yaml +++ b/assets/test_iyVesGerm1.yaml @@ -4,11 +4,10 @@ samples: primary: /lustre/scratch124/tol/projects/darwin/data/insects/Vespula_germanica/working/iyVesGerm1.hifiasm.20201015/iyVesGerm1.p_ctg.fa.gz haplotigs: /lustre/scratch124/tol/projects/darwin/data/insects/Vespula_germanica/working/iyVesGerm1.hifiasm.20201015/iyVesGerm1.a_ctg.fa.gz illumina_10X: - reads: /lustre/scratch124/tol/projects/darwin/data/insects/Vespula_germanica/genomic_data/iyVesGerm1/10x/ + reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/sanger-tol-genomeassembly/test/10X_iyVesGerm1 pacbio: reads: - reads: /lustre/scratch124/tol/projects/darwin/data/insects/Vespula_germanica/genomic_data/iyVesGerm1/pacbio/fasta/m64094_200217_145414.ccs.bc1011_BAK8A_OA--bc1011_BAK8A_OA.filtered.fasta.gz - kmer_pref: /lustre/scratch124/tol/projects/darwin/data/insects/Vespula_germanica/genomic_data/iyVesGerm1/pacbio/kmer/k31/ HiC: reads: - reads: /lustre/scratch124/tol/projects/darwin/data/insects/Vespula_germanica/genomic_data/iyVesGerm1/hic-arima2/34957_3#2.cram diff --git a/conf/test.config b/conf/test.config index d1128372..c6c8d11c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,5 +22,5 @@ params { // Input data input = "${projectDir}/assets/test.yaml" groups = 10 - polishing_on = false + polishing_on = true } diff --git a/conf/test_full_iyVesGerm1.config b/conf/test_full_iyVesGerm1.config index b8ff0133..8d01e87d 100644 --- a/conf/test_full_iyVesGerm1.config +++ b/conf/test_full_iyVesGerm1.config @@ -14,11 +14,11 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - max_cpus = 16 - max_memory = '100.GB' + max_cpus = 8 + max_memory = '50.GB' max_time = '24.h' // Input data for full size test input = 'assets/test_iyVesGerm1.yaml' - polishing_on = false + polishing_on = true } From 74983323cf5d219d5f56709a0b0bdb5c9d536cd8 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Wed, 17 May 2023 14:40:20 +0100 Subject: [PATCH 03/30] Fix formatting --- conf/modules.config | 2 +- modules.json | 180 +++++++++++--------------------------------- 2 files changed, 46 insertions(+), 136 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 401e8831..59f1da2c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -187,7 +187,7 @@ process { } withName: 'FASTK_FASTK' { - //FIX THIS FOR THE REAL DATASET -k31 + //FIX THIS FOR THE REAL DATASET -k31 ext.args = "-k17 -t" } diff --git a/modules.json b/modules.json index b390225e..72029f58 100644 --- a/modules.json +++ b/modules.json @@ -8,152 +8,110 @@ "bcftools/concat": { "branch": "master", "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/consensus": { "branch": "master", "git_sha": "fa12afdf5874c1d11e4a20efe81c97935e8eea24", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bedtools/bamtobed/bedtools-bamtobed.diff" }, "busco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/cat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/cload": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastk/fastk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastk/histex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "freebayes": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/freebayes/freebayes.diff" }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "643756685546fa61f5c8fba439af746c090b9180", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genescopefk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gfastats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "hifiasm": { "branch": "master", @@ -163,173 +121,125 @@ "merquryfk/merquryfk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextmap": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextsnapshot": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/calcuts": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/purgedups/calcuts/purgedups-calcuts.diff" }, "purgedups/getseqs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/pbcstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/purgedups": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/splitfa": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/collate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fastq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fixmate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/markdup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "seqtk/subseq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "yahs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } From 0a59581232d0768c40aa73d9971ad96e2eb6f629 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Wed, 17 May 2023 14:59:17 +0100 Subject: [PATCH 04/30] Fix kmer size --- conf/modules.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 59f1da2c..a5a719f5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -187,8 +187,7 @@ process { } withName: 'FASTK_FASTK' { - //FIX THIS FOR THE REAL DATASET -k31 - ext.args = "-k17 -t" + ext.args = "-k31 -t" } withName: 'FASTK_HISTEX' { From b0f4e312f1bff5ba29e33b33c932fab0451dac1f Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Wed, 17 May 2023 16:37:11 +0100 Subject: [PATCH 05/30] Fix minor bug in haplotigs assignment --- workflows/genomeassembly.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index e49bf371..d21b91b0 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -102,7 +102,7 @@ workflow GENOMEASSEMBLY { PURGE_DUPS_ALT( purge_dups_haploitgs_input, 'haplotigs' ) PURGE_DUPS_ALT.out.pri.map{ meta, fasta -> [[id:meta.id], fasta] } - set{ haplotigs_ch } + .set{ haplotigs_ch } GENOME_STATISTICS_PURGED( primary_contigs_ch.join(haplotigs_ch), PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, From af2c32ae77d251094094ba3e97e71b57a54432ff Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 18 May 2023 09:37:36 +0100 Subject: [PATCH 06/30] Fix minor bug in channels joining --- workflows/genomeassembly.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index d21b91b0..9bcb2861 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -146,13 +146,15 @@ workflow GENOMEASSEMBLY { .set{ haplotigs_contigs_ch } // Check genome stats for polished pri and alt - GENOME_STATISTICS_POLISHED( primary_contigs_ch.join(haplotigs_contigs_ch), + primary_contigs_ch.join(haplotigs_contigs_ch) + .map{ meta, pri, alt -> [[id:meta.id], pri, alt]} + .set{ polished_asm_stats_input_ch } + GENOME_STATISTICS_POLISHED( polished_asm_stats_input_ch, PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab ) ch_versions = ch_versions.mix(GENOME_STATISTICS_POLISHED.out.versions) - } PREPARE_INPUT.out.hic.map{ meta, crams, motif -> [meta, crams] } From cf35d33608afed5748f661ee5436280fc2904f7d Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 18 May 2023 15:32:06 +0100 Subject: [PATCH 07/30] Customize output --- assets/test.yaml | 2 +- conf/modules.config | 464 +++++++++++++++++++++++++++--- conf/test.config | 2 +- subworkflows/local/polishing.nf | 4 +- subworkflows/local/scaffolding.nf | 4 - workflows/genomeassembly.nf | 4 +- 6 files changed, 434 insertions(+), 46 deletions(-) diff --git a/assets/test.yaml b/assets/test.yaml index 5f0ad7ef..de388f3b 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,5 +1,5 @@ samples: - - id: odSpoLacu1 + - id: idTesTol1 illumina_10X: reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/dataset/10x_10000/ pacbio: diff --git a/conf/modules.config b/conf/modules.config index a5a719f5..ac80f39a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -12,47 +12,337 @@ process { - withName: SAMPLESHEET_CHECK { + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + + // Set up of kmer profile + withName: FASTK_FASTK { + ext.args = "-k31 -t" + publishDir = [ + path: { "${params.outdir}/kmer" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: FASTQC { - ext.args = '--quiet' + withName: FASTK_HISTEX { + ext.args = "-G" + publishDir = [ + path: { "${params.outdir}/kmer" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + + withName: GENESCOPEFK { + ext.args = "-k31" publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/kmer" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + // End of Set up of kmer profile // Set up of the raw assembly pipeline - withName: HIFIASM { + withName: HIFIASM_PRI { ext.args = "--primary" + publishDir = [ + path: { "${params.outdir}/hifiasm" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: HIFIASM_HIC { + ext.args = "--primary" + publishDir = [ + path: { "${params.outdir}/hifiasm-hic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI' { + publishDir = [ + path: { "${params.outdir}/hifiasm" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT' { + publishDir = [ + path: { "${params.outdir}/hifiasm" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_.*HIC' { + publishDir = [ + path: { "${params.outdir}/hifiasm-hic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENOME_STATISTICS_RAW:GFASTATS' { + publishDir = [ + path: { "${params.outdir}/hifiasm" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_RAW:BUSCO' { + publishDir = [ + path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.busco" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENOME_STATISTICS_RAW:MERQURYFK_MERQURYFK' { + publishDir = [ + path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.ccs.merquryk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } // End of Set up of the raw assembly pipeline + // Set up of the purging pipeline + + withName: '.*PURGE_DUPS_PRI:MINIMAP2_ALIGN_READS' { + //these options from pbmm2 CSS preset + ext.args = "-k19 -w10 -O5,56 -E4,1 -A2 -B5 -z400,50 -r2000 --lj-min-ratio 0.5" + ext.prefix = { "${meta.id}.reads" } + publishDir = [ + path: { "${params.outdir}/purging/coverage" }, + mode: params.publish_dir_mode, + pattern: ".*paf" + ] + } + + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PBCSTAT' { + publishDir = [ + path: { "${params.outdir}/purging/coverage" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_PRI:GET_CALCUTS_PARAMS' { + publishDir = [ + path: { "${params.outdir}/purging/coverage" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_CALCUTS' { + publishDir = [ + path: { "${params.outdir}/purging/coverage" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_SPLITFA' { + ext.prefix = "self_aln" + publishDir = [ + path: { "${params.outdir}/purging/split_aln" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_PRI:MINIMAP2_ALIGN_ASSEMBLY' { + ext.args = "-xasm5 -DP" + ext.prefix = "self_aln" + publishDir = [ + path: { "${params.outdir}/purging/split_aln" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PURGEDUPS' { + ext.args = "-2" + publishDir = [ + path: { "${params.outdir}/purging/purge_dups" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_GETSEQS' { + ext.prefix = { "${meta.prefix}" } + publishDir = [ + path: { "${params.outdir}/purging/seqs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_READS' { + ext.prefix = { "${meta.id}.reads" } + publishDir = [ + path: { "${params.outdir}/purging/coverage.htigs" }, + mode: params.publish_dir_mode, + pattern: ".*paf" + ] + } + + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PBCSTAT' { + publishDir = [ + path: { "${params.outdir}/purging/coverage.htigs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_ALT:GET_CALCUTS_PARAMS' { + publishDir = [ + path: { "${params.outdir}/purging/coverage.htigs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_CALCUTS' { + publishDir = [ + path: { "${params.outdir}/purging/coverage.htigs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_SPLITFA' { + ext.prefix = "self_aln" + publishDir = [ + path: { "${params.outdir}/purging/split_aln.htigs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_ASSEMBLY' { + ext.prefix = "self_aln" + publishDir = [ + path: { "${params.outdir}/purging/split_aln.htigs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PURGEDUPS' { + publishDir = [ + path: { "${params.outdir}/purging/purge_dups.htigs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_GETSEQS' { + publishDir = [ + path: { "${params.outdir}/purging/seqs.htigs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENOME_STATISTICS_PURGED:GFASTATS' { + publishDir = [ + path: { "${params.outdir}/purging" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_PURGED:BUSCO' { + publishDir = [ + path: { "${params.outdir}/purging/${meta.id}.purged.busco" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENOME_STATISTICS_PURGED:MERQURYFK_MERQURYFK' { + publishDir = [ + path: { "${params.outdir}/purging/${meta.id}.purged.ccs.merquryk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // End of Set up of the purging pipeline + + // Set up of the polishing pipeline - withName: LONGRANGER_ALIGN { + withName: LONGRANGER_MKREF { + publishDir = [ + path: { "${params.outdir}/polishing" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: LONGRANGER_ALIGN { ext.args = "--disable-ui --nopreflight" if(System.getenv('GITHUB_ACTION') != null ) { container = "ghcr.io/sanger-tol/longranger:2.2.2-c3" } + publishDir = [ + path: { "${params.outdir}/polishing" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: BED_CHUNKS { + publishDir = [ + path: { "${params.outdir}/polishing/chunks" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: FREEBAYES { + publishDir = [ + path: { "${params.outdir}/polishing/vcf" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MERGE_FREEBAYES { + publishDir = [ + path: { "${params.outdir}/polishing/" }, + mode: params.publish_dir_mode, + pattern: "*merged*" + ] } withName: BCFTOOLS_CONSENSUS { - // Filter by mapping quality, dont keep alternative homozygotes, + // Filter by mapping quality, keep alt-alt het and alt-alt hom, // keep longer allele ext.args = '-i\'QUAL>1 && (GT="AA" || GT="Aa")\' -Hla' ext.prefix = { "${meta.id}.consensus" } + publishDir = [ + path: { "${params.outdir}/polishing/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } + withName: BCFTOOLS_INDEX { ext.args = '--tbi' @@ -70,12 +360,45 @@ process { withName: SEQTK_SUBSEQ_PRIMARY { ext.prefix = '.primary' + publishDir = [ + path: { "${params.outdir}/polishing" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: SEQTK_SUBSEQ_HAPLOTIGS { ext.prefix = '.haplotig' + publishDir = [ + path: { "${params.outdir}/polishing" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS' { + publishDir = [ + path: { "${params.outdir}/polishing" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_POLISHED:BUSCO' { + publishDir = [ + path: { "${params.outdir}/polishing/${meta.id}.polished.busco" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } + withName: '.*GENOME_STATISTICS_POLISHED:MERQURYFK_MERQURYFK' { + publishDir = [ + path: { "${params.outdir}/polishing/${meta.id}.polished.ccs.merquryk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } // End of Set up of the polishing pipeline // Set up of the Hi-C read mapping pipeline @@ -126,6 +449,54 @@ process { ext.args = "-u -F0xf00 -e 'mapq>=10' --output-fmt bam" } + withName: '.*ALIGN_SHORT:MARKDUP_STATS:SAMTOOLS_VIEW_MARKDUP' { + publishDir = [ + path: { "${params.outdir}/scaffolding" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ALIGN_SHORT:MARKDUP_STATS:BED_SORT' { + publishDir = [ + path: { "${params.outdir}/scaffolding" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/scaffolding" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_STATS' { + publishDir = [ + path: { "${params.outdir}/scaffolding" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { + publishDir = [ + path: { "${params.outdir}/scaffolding" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_IDXSTATS' { + publishDir = [ + path: { "${params.outdir}/scaffolding" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GNU_SORT { ext.args = '-k4,4' } @@ -135,6 +506,11 @@ process { withName: 'YAHS' { // Skip the initial assembly error correction step ext.args = '--no-contig-ec' + publishDir = [ + path: { "${params.outdir}/scaffolding/yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: 'COOLER_CLOAD' { @@ -144,20 +520,63 @@ process { // chrom2 field number (one-based) is 6; // pos2 field number (one-based) is 7 ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' + publishDir = [ + path: { "${params.outdir}/scaffolding/yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: 'PRETEXTSNAPSHOT' { // Make one plot containing all sequences ext.args = '--sequences \"=full\"' + publishDir = [ + path: { "${params.outdir}/scaffolding/yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: 'JUICER_TOOLS_PRE' { ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' ext.juicer_jvm_params = '-Xms6g -Xmx48g' + publishDir = [ + path: { "${params.outdir}/scaffolding/yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: 'JUICER_PRE' { ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" + publishDir = [ + path: { "${params.outdir}/scaffolding/yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*GENOME_STATISTICS_SCAFFOLDS:GFASTATS' { + publishDir = [ + path: { "${params.outdir}/scaffolding" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { + publishDir = [ + path: { "${params.outdir}/scaffolding/${meta.id}.${meta.id}_scaffolds_final.busco" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENOME_STATISTICS_SCAFFOLDS:MERQURYFK_MERQURYFK' { + publishDir = [ + path: { "${params.outdir}/scaffolding/${meta.id}.${meta.id}_scaffolds_final.ccs.merquryk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } // End of Set up of the scaffolding pipeline @@ -167,31 +586,4 @@ process { } //End of Set up of assmebly stats subworkflow - - //Set up purge_dups subworkflow - withName: 'MINIMAP2_ALIGN_READS' { - //these options from pbmm2 CSS preset - ext.args = "-k19 -w10 -O5,56 -E4,1 -A2 -B5 -z400,50 -r2000 --lj-min-ratio 0.5" - } - - withName: 'MINIMAP2_ALIGN_ASSEMBLY' { - ext.args = "-xasm5 -DP" - } - - withName: 'PURGEDUPS_PURGEDUPS' { - ext.args = "-2" - } - - withName: 'PURGEDUPS_GETSEQS' { - ext.prefix = { "${meta.prefix}" } - } - - withName: 'FASTK_FASTK' { - ext.args = "-k31 -t" - } - - withName: 'FASTK_HISTEX' { - ext.args = "-G" - } - //End of Set up purge_dups subworkflow } diff --git a/conf/test.config b/conf/test.config index c6c8d11c..f70c22bf 100644 --- a/conf/test.config +++ b/conf/test.config @@ -21,6 +21,6 @@ params { // Input data input = "${projectDir}/assets/test.yaml" - groups = 10 + bed_chunks_polishing = 2 polishing_on = true } diff --git a/subworkflows/local/polishing.nf b/subworkflows/local/polishing.nf index da3f9fe5..0cd12f00 100644 --- a/subworkflows/local/polishing.nf +++ b/subworkflows/local/polishing.nf @@ -16,7 +16,7 @@ workflow POLISHING { take: fasta_in //tuple meta, fasta, fai reads_10X // file - groups //val + bed_chunks_polishing //val main: ch_versions = Channel.empty() @@ -38,7 +38,7 @@ workflow POLISHING { // Split genome into chunks fasta_in.map{ meta, fasta, fai -> [meta, fai] } .set{chunks_ch} - BED_CHUNKS (chunks_ch, groups) + BED_CHUNKS (chunks_ch, bed_chunks_polishing) ch_versions = ch_versions.mix(BED_CHUNKS.out.versions) intervals_structured = BED_CHUNKS.out.coords.toList().transpose() LONGRANGER_ALIGN.out.bam.join(LONGRANGER_ALIGN.out.bai) diff --git a/subworkflows/local/scaffolding.nf b/subworkflows/local/scaffolding.nf index d0551e6a..ae19e05f 100644 --- a/subworkflows/local/scaffolding.nf +++ b/subworkflows/local/scaffolding.nf @@ -9,7 +9,6 @@ include { PREPARE_PRETEXTMAP_INPUT } from '../../modules/local/prepare_pret include { PRETEXTMAP } from '../../modules/nf-core/pretextmap/main.nf' include { PRETEXTSNAPSHOT } from '../../modules/nf-core/pretextsnapshot/main' include { CHROM_SIZES } from '../../modules/local/chrom_sizes.nf' -include { GFASTATS } from '../../modules/nf-core/gfastats/main.nf' workflow SCAFFOLDING { take: @@ -31,8 +30,6 @@ workflow SCAFFOLDING { ch_versions = ch_versions.mix(YAHS.out.versions) SCAFFOLDS_FAIDX(YAHS.out.scaffolds_fasta) ch_versions = ch_versions.mix(SCAFFOLDS_FAIDX.out.versions) - GFASTATS(YAHS.out.scaffolds_fasta, "fasta", [], [], [], [], [], []) - ch_versions = ch_versions.mix(GFASTATS.out.versions) bed_in.map{ meta, bed -> meta}.set{ch_meta} // Prepare contact pairs for cooler @@ -76,7 +73,6 @@ workflow SCAFFOLDING { alignments_sorted = JUICER_PRE.out.pairs fasta = YAHS.out.scaffolds_fasta chrom_sizes = CHROM_SIZES.out.chrom_sizes - stats = GFASTATS.out.assembly_summary cool = COOLER_CLOAD.out.cool mcool = COOLER_ZOOMIFY.out.mcool snapshots = PRETEXTSNAPSHOT.out.image diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 9bcb2861..17dbe54f 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -12,7 +12,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = Channel.of(file(params.input)) } else { exit 1, 'Input samplesheet not specified!' } -if (params.groups) { groups = params.groups } else { groups = 100; } +if (params.bed_chunks_polishing) { bed_chunks_polishing = params.bed_chunks_polishing } else { bed_chunks_polishing = 100; } if (params.cool_bin) { cool_bin = params.cool_bin } else { cool_bin = 1000; } @@ -121,7 +121,7 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.illumina_10X.map{ meta, reads, kmers -> [reads] } .set{ illumina_10X_ch } - POLISHING(reference_ch, illumina_10X_ch, groups) + POLISHING(reference_ch, illumina_10X_ch, bed_chunks_polishing) ch_versions = ch_versions.mix(POLISHING.out.versions) // Separate the primary and alternative contigs again after polishing From 222e20330043a49783b38abaecf8892e8f0aa3e6 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 18 May 2023 15:39:54 +0100 Subject: [PATCH 08/30] Fix formatting --- conf/modules.config | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ac80f39a..b2e8f7b5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -59,7 +59,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: HIFIASM_HIC { ext.args = "--primary" publishDir = [ @@ -75,7 +75,7 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - } + } withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT' { publishDir = [ @@ -342,7 +342,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: BCFTOOLS_INDEX { ext.args = '--tbi' From 3046e22a60d7c0dbbcc8302ae8fa566c1a0715b8 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 18 May 2023 15:49:42 +0100 Subject: [PATCH 09/30] Fix formatting --- conf/modules.config | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b2e8f7b5..524536ac 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -59,7 +59,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: HIFIASM_HIC { ext.args = "--primary" publishDir = [ @@ -100,7 +100,7 @@ process { pattern: '*assembly_summary' ] } - + withName: '.*GENOME_STATISTICS_RAW:BUSCO' { publishDir = [ path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.busco" }, @@ -108,7 +108,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*GENOME_STATISTICS_RAW:MERQURYFK_MERQURYFK' { publishDir = [ path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.ccs.merquryk" }, @@ -130,7 +130,7 @@ process { pattern: ".*paf" ] } - + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PBCSTAT' { publishDir = [ path: { "${params.outdir}/purging/coverage" }, @@ -138,7 +138,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_PRI:GET_CALCUTS_PARAMS' { publishDir = [ path: { "${params.outdir}/purging/coverage" }, @@ -146,7 +146,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_CALCUTS' { publishDir = [ path: { "${params.outdir}/purging/coverage" }, @@ -154,7 +154,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_SPLITFA' { ext.prefix = "self_aln" publishDir = [ @@ -173,7 +173,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PURGEDUPS' { ext.args = "-2" publishDir = [ @@ -182,7 +182,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_PRI:PURGEDUPS_GETSEQS' { ext.prefix = { "${meta.prefix}" } publishDir = [ @@ -191,7 +191,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_READS' { ext.prefix = { "${meta.id}.reads" } publishDir = [ @@ -200,7 +200,7 @@ process { pattern: ".*paf" ] } - + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PBCSTAT' { publishDir = [ path: { "${params.outdir}/purging/coverage.htigs" }, @@ -208,15 +208,16 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: '.*PURGE_DUPS_ALT:GET_CALCUTS_PARAMS' { + + + withName: '.*PURGE_DUPS_ALT:GET_CALCUTS_PARAMS' { publishDir = [ path: { "${params.outdir}/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_CALCUTS' { publishDir = [ path: { "${params.outdir}/purging/coverage.htigs" }, @@ -224,7 +225,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_SPLITFA' { ext.prefix = "self_aln" publishDir = [ @@ -242,7 +243,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PURGEDUPS' { publishDir = [ path: { "${params.outdir}/purging/purge_dups.htigs" }, @@ -250,7 +251,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*PURGE_DUPS_ALT:PURGEDUPS_GETSEQS' { publishDir = [ path: { "${params.outdir}/purging/seqs.htigs" }, @@ -258,7 +259,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: '.*GENOME_STATISTICS_PURGED:GFASTATS' { publishDir = [ path: { "${params.outdir}/purging" }, @@ -266,7 +267,7 @@ process { pattern: '*assembly_summary' ] } - + withName: '.*GENOME_STATISTICS_PURGED:BUSCO' { publishDir = [ path: { "${params.outdir}/purging/${meta.id}.purged.busco" }, @@ -382,7 +383,7 @@ process { pattern: '*assembly_summary' ] } - + withName: '.*GENOME_STATISTICS_POLISHED:BUSCO' { publishDir = [ path: { "${params.outdir}/polishing/${meta.id}.polished.busco" }, @@ -561,7 +562,7 @@ process { pattern: '*assembly_summary' ] } - + withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { publishDir = [ path: { "${params.outdir}/scaffolding/${meta.id}.${meta.id}_scaffolds_final.busco" }, From f50f7e85e66dac4c3ce8b12cb835c844593338e9 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 18 May 2023 15:52:11 +0100 Subject: [PATCH 10/30] Fix formatting --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 524536ac..6c967a4d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -210,7 +210,7 @@ process { } - withName: '.*PURGE_DUPS_ALT:GET_CALCUTS_PARAMS' { + withName: '.*PURGE_DUPS_ALT:GET_CALCUTS_PARAMS' { publishDir = [ path: { "${params.outdir}/purging/coverage.htigs" }, mode: params.publish_dir_mode, From 01bfbb27131fcbb85e321d034f23572a32732e55 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 18 May 2023 16:29:03 +0100 Subject: [PATCH 11/30] Add minor fixes according to code review --- assets/test.yaml | 6 +- conf/modules.config | 28 ++++- conf/test_full_iyVesGerm1.config | 4 - modules.json | 180 +++++++++++++++++++++------- modules/local/get_calcuts_params.nf | 1 - modules/local/gfa_to_fasta.nf | 4 +- modules/local/keep_seqnames.nf | 4 +- 7 files changed, 164 insertions(+), 63 deletions(-) diff --git a/assets/test.yaml b/assets/test.yaml index de388f3b..24057633 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,13 +1,13 @@ samples: - id: idTesTol1 illumina_10X: - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/dataset/10x_10000/ + reads: /lustre/scratch123/tol/resources/nextflow/test-data/Test_tolasm/genomic_data/idTesTol1/10x/ pacbio: reads: - - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/dataset/HiFi.reads.398.extra.fasta + - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Test_tolasm/genomic_data/idTesTol1/pacbio/fasta/HiFi.reads.398.extra.fasta HiC: reads: - - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/dataset/41741_2#7.sub.cram + - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Test_tolasm/genomic_data/idTesTol1/hic-arima2/41741_2#7.sub.cram arima_motif: GATC,GANTC,CTNAG,TTAA busco: lineage: bacteria_odb10 diff --git a/conf/modules.config b/conf/modules.config index 6c967a4d..adeab500 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -70,6 +70,7 @@ process { } withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI' { + ext.prefix = { "${meta.id}.asm.p_ctg" } publishDir = [ path: { "${params.outdir}/hifiasm" }, mode: params.publish_dir_mode, @@ -78,6 +79,7 @@ process { } withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT' { + ext.prefix = { "${meta.id}.asm.a_ctg" } publishDir = [ path: { "${params.outdir}/hifiasm" }, mode: params.publish_dir_mode, @@ -186,9 +188,16 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_GETSEQS' { ext.prefix = { "${meta.prefix}" } publishDir = [ - path: { "${params.outdir}/purging/seqs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + [ + path: { "${params.outdir}/purging/seqs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ], + [ + path: { "${params.outdir}/purging/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.contains('purged.fa') ? null : 'purged.fa' } + ] ] } @@ -254,9 +263,16 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_GETSEQS' { publishDir = [ - path: { "${params.outdir}/purging/seqs.htigs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + [ + path: { "${params.outdir}/purging/seqs.htigs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ], + [ + path: { "${params.outdir}/purging/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.contains('purged.fa') ? null : 'purged.htigs.fa' } + ] ] } diff --git a/conf/test_full_iyVesGerm1.config b/conf/test_full_iyVesGerm1.config index 8d01e87d..7d034eaa 100644 --- a/conf/test_full_iyVesGerm1.config +++ b/conf/test_full_iyVesGerm1.config @@ -14,10 +14,6 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - max_cpus = 8 - max_memory = '50.GB' - max_time = '24.h' - // Input data for full size test input = 'assets/test_iyVesGerm1.yaml' polishing_on = true diff --git a/modules.json b/modules.json index 72029f58..b390225e 100644 --- a/modules.json +++ b/modules.json @@ -8,110 +8,152 @@ "bcftools/concat": { "branch": "master", "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/consensus": { "branch": "master", "git_sha": "fa12afdf5874c1d11e4a20efe81c97935e8eea24", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bedtools/bamtobed/bedtools-bamtobed.diff" }, "busco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/cload": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/zoomify": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastk/fastk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastk/histex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "freebayes": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/freebayes/freebayes.diff" }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "643756685546fa61f5c8fba439af746c090b9180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genescopefk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gfastats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "hifiasm": { "branch": "master", @@ -121,125 +163,173 @@ "merquryfk/merquryfk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextmap": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextsnapshot": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/calcuts": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/purgedups/calcuts/purgedups-calcuts.diff" }, "purgedups/getseqs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/pbcstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/purgedups": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/splitfa": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/collate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fastq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fixmate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/markdup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "seqtk/subseq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "yahs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } diff --git a/modules/local/get_calcuts_params.nf b/modules/local/get_calcuts_params.nf index d7e2ac1a..9be8382e 100644 --- a/modules/local/get_calcuts_params.nf +++ b/modules/local/get_calcuts_params.nf @@ -17,7 +17,6 @@ process GET_CALCUTS_PARAMS { task.ext.when == null || task.ext.when script: - def prefix = meta.prefix ?: '' """ cutoffs=`get_calcuts_params_from_model_fk.py $model_fk` diff --git a/modules/local/gfa_to_fasta.nf b/modules/local/gfa_to_fasta.nf index edeb877d..e826e0c4 100644 --- a/modules/local/gfa_to_fasta.nf +++ b/modules/local/gfa_to_fasta.nf @@ -16,9 +16,9 @@ process GFA_TO_FASTA { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ - prefix=\$(basename $gfa .gfa) - awk '/^S/{print ">"\$2;print \$3}' $gfa > \${prefix}.fa + awk '/^S/{print ">"\$2;print \$3}' $gfa > ${prefix}.fa cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/keep_seqnames.nf b/modules/local/keep_seqnames.nf index 8046a0ae..9c37403e 100644 --- a/modules/local/keep_seqnames.nf +++ b/modules/local/keep_seqnames.nf @@ -7,7 +7,7 @@ process KEEP_SEQNAMES { 'ubuntu:20.04' }" input: - tuple val(meta), path(fa) + tuple val(meta), path(fasta) output: path "*seq.lst" , emit: seqlist @@ -19,7 +19,7 @@ process KEEP_SEQNAMES { script: def prefix = meta.prefix ?: '' """ - grep '>' $fa | cut -f1 | sed 's/>//' > seq.lst + grep '>' $fasta | cut -f1 | sed 's/>//' > seq.lst cat <<-END_VERSIONS > versions.yml "${task.process}": cut: \$(cut --version | head -n 1 | awk '{print \$NF}') From 3c45f9e42b91766d722a3aa24dd377afb82e5ed7 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 18 May 2023 16:30:17 +0100 Subject: [PATCH 12/30] Prettify --- modules.json | 180 +++++++++++++-------------------------------------- 1 file changed, 45 insertions(+), 135 deletions(-) diff --git a/modules.json b/modules.json index b390225e..72029f58 100644 --- a/modules.json +++ b/modules.json @@ -8,152 +8,110 @@ "bcftools/concat": { "branch": "master", "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/consensus": { "branch": "master", "git_sha": "fa12afdf5874c1d11e4a20efe81c97935e8eea24", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bedtools/bamtobed/bedtools-bamtobed.diff" }, "busco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/cat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/cload": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastk/fastk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastk/histex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "freebayes": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/freebayes/freebayes.diff" }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "643756685546fa61f5c8fba439af746c090b9180", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genescopefk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gfastats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "hifiasm": { "branch": "master", @@ -163,173 +121,125 @@ "merquryfk/merquryfk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextmap": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextsnapshot": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/calcuts": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/purgedups/calcuts/purgedups-calcuts.diff" }, "purgedups/getseqs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/pbcstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/purgedups": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/splitfa": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/collate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fastq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fixmate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/markdup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "seqtk/subseq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "yahs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } From 3bda9ac0bc794131bf1a8024e1dc078e770f1562 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 18 May 2023 17:01:01 +0100 Subject: [PATCH 13/30] Fix purge dups publish error --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index adeab500..7273a666 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -196,7 +196,7 @@ process { [ path: { "${params.outdir}/purging/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.contains('purged.fa') ? null : 'purged.fa' } + saveAs: { filename -> filename.contains('purged.fa') ? 'purged.fa' : null } ] ] } @@ -271,7 +271,7 @@ process { [ path: { "${params.outdir}/purging/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.contains('purged.fa') ? null : 'purged.htigs.fa' } + saveAs: { filename -> filename.contains('purged.fa') ? 'purged.htigs.fa' : null } ] ] } From 415179d757d7f56bdb29d331c09392cdb42590f5 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Fri, 19 May 2023 10:55:25 +0100 Subject: [PATCH 14/30] Update test dataset --- assets/test.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/test.yaml b/assets/test.yaml index 24057633..0262bea0 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,13 +1,13 @@ samples: - - id: idTesTol1 + - id: baUndUnlc1 illumina_10X: - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Test_tolasm/genomic_data/idTesTol1/10x/ + reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/ pacbio: reads: - - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Test_tolasm/genomic_data/idTesTol1/pacbio/fasta/HiFi.reads.398.extra.fasta + - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta HiC: reads: - - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Test_tolasm/genomic_data/idTesTol1/hic-arima2/41741_2#7.sub.cram + - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram arima_motif: GATC,GANTC,CTNAG,TTAA busco: lineage: bacteria_odb10 From 0d6c9ec47ca06ab762050509b563317c0e8c34e0 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Fri, 19 May 2023 13:06:51 +0100 Subject: [PATCH 15/30] Update test dataset --- assets/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/test.yaml b/assets/test.yaml index 0262bea0..57de3843 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -4,7 +4,7 @@ samples: reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/ pacbio: reads: - - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta + - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/hifiasm/HiFi.reads.fasta HiC: reads: - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram From 8072192bb0d986cfe0493989a76360a55ecad376 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Fri, 19 May 2023 13:52:10 +0100 Subject: [PATCH 16/30] Add contigs stats for haplotigs and make stats naming more explicit --- conf/modules.config | 53 ++++++++++++++++++++++------ subworkflows/local/assembly_stats.nf | 23 +++++++----- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7273a666..df2748e6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -95,7 +95,17 @@ process { ] } - withName: '.*GENOME_STATISTICS_RAW:GFASTATS' { + withName: '.*GENOME_STATISTICS_RAW:GFASTATS_PRI' { + ext.prefix = { "${meta.id}.asm.p_ctg" } + publishDir = [ + path: { "${params.outdir}/hifiasm" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_RAW:GFASTATS_HAP' { + ext.prefix = { "${meta.id}.asm.a_ctg" } publishDir = [ path: { "${params.outdir}/hifiasm" }, mode: params.publish_dir_mode, @@ -276,7 +286,17 @@ process { ] } - withName: '.*GENOME_STATISTICS_PURGED:GFASTATS' { + withName: '.*GENOME_STATISTICS_PURGED:GFASTATS_PRI' { + ext.prefix = { "purged" } + publishDir = [ + path: { "${params.outdir}/purging" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_PURGED:GFASTATS_HAP' { + ext.prefix = { "purged.htigs" } publishDir = [ path: { "${params.outdir}/purging" }, mode: params.publish_dir_mode, @@ -375,24 +395,34 @@ process { } withName: SEQTK_SUBSEQ_PRIMARY { - ext.prefix = '.primary' + ext.prefix = 'primary' publishDir = [ path: { "${params.outdir}/polishing" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : "primary.fa" } ] } withName: SEQTK_SUBSEQ_HAPLOTIGS { - ext.prefix = '.haplotig' + ext.prefix = 'haplotigs' publishDir = [ path: { "${params.outdir}/polishing" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : "haplotigs.fa" } + ] + } + + withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS_PRI' { + ext.prefix = { "primary" } + publishDir = [ + path: { "${params.outdir}/polishing" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' ] } - withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS' { + withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS_HAP' { + ext.prefix = { "haplotigs" } publishDir = [ path: { "${params.outdir}/polishing" }, mode: params.publish_dir_mode, @@ -400,6 +430,7 @@ process { ] } + withName: '.*GENOME_STATISTICS_POLISHED:BUSCO' { publishDir = [ path: { "${params.outdir}/polishing/${meta.id}.polished.busco" }, @@ -571,9 +602,11 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*GENOME_STATISTICS_SCAFFOLDS:GFASTATS' { + + withName: '.*GENOME_STATISTICS_SCAFFOLDS:GFASTATS_PRI' { + ext.prefix = { "${meta.id}_scaffolds_final" } publishDir = [ - path: { "${params.outdir}/scaffolding" }, + path: { "${params.outdir}/scaffolding/yahs/" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -581,7 +614,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { publishDir = [ - path: { "${params.outdir}/scaffolding/${meta.id}.${meta.id}_scaffolds_final.busco" }, + path: { "${params.outdir}/scaffolding/yahs/${meta.id}_scaffolds_final.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/subworkflows/local/assembly_stats.nf b/subworkflows/local/assembly_stats.nf index 6541be3c..028a5d81 100644 --- a/subworkflows/local/assembly_stats.nf +++ b/subworkflows/local/assembly_stats.nf @@ -7,7 +7,8 @@ // https://github.com/sanger-tol/genomenote/blob/383f23e6b7a89f9aad6b85c8f7320b5c5825de73/subworkflows/local/genome_statistics.nf // -include { GFASTATS } from '../../modules/nf-core/gfastats/main' +include { GFASTATS as GFASTATS_PRI } from '../../modules/nf-core/gfastats/main' +include { GFASTATS as GFASTATS_HAP } from '../../modules/nf-core/gfastats/main' include { BUSCO } from '../../modules/nf-core/busco/main' include { MERQURYFK_MERQURYFK } from '../../modules/nf-core/merquryfk/merquryfk/main' @@ -21,12 +22,15 @@ workflow GENOME_STATISTICS { main: ch_versions = Channel.empty() - // Get ODB lineage value assembly.map{ meta, primary, haplotigs -> [meta, primary] } .set{ primary_ch } - GFASTATS( primary_ch, 'fasta', [], [], [], [], [], [] ) - ch_versions = ch_versions.mix(GFASTATS.out.versions.first()) + GFASTATS_PRI( primary_ch, 'fasta', [], [], [], [], [], [] ) + ch_versions = ch_versions.mix(GFASTATS_PRI.out.versions.first()) + + assembly.map{ meta, primary, haplotigs -> [meta, haplotigs] } + .set{ haplotigs_ch } + GFASTATS_HAP( haplotigs_ch, 'fasta', [], [], [], [], [], [] ) // BUSCO BUSCO ( assembly.map{ meta, primary, haplotigs -> [meta, primary]}, @@ -45,11 +49,12 @@ workflow GENOME_STATISTICS { ch_versions = ch_versions.mix(MERQURYFK_MERQURYFK.out.versions.first()) emit: - busco = BUSCO.out.short_summaries_json // meta, path("short_summary.*.json") - merquryk_completeness = MERQURYFK_MERQURYFK.out.stats // meta, stats - merquryk_qv = MERQURYFK_MERQURYFK.out.qv // meta, qv - assembly_stats = GFASTATS.out.assembly_summary // path("*.assembly_summary") - versions = ch_versions + busco = BUSCO.out.short_summaries_json // meta, path("short_summary.*.json") + merquryk_completeness = MERQURYFK_MERQURYFK.out.stats // meta, stats + merquryk_qv = MERQURYFK_MERQURYFK.out.qv // meta, qv + assembly_stats_pri = GFASTATS_PRI.out.assembly_summary // path("*.assembly_summary") + assembly_stats_alt = GFASTATS_HAP.out.assembly_summary // path("*.assembly_summary") + versions = ch_versions } From 835b8c2019ab070d5a846851963c8b2d81368245 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Fri, 19 May 2023 14:32:24 +0100 Subject: [PATCH 17/30] Make hifiasm in hic mode optional and keep the stats for hifiasm-hic assembly if created --- conf/modules.config | 36 ++++++++++++++++++++++++++++++ conf/test.config | 1 + subworkflows/local/raw_assembly.nf | 20 +++++++++++------ workflows/genomeassembly.nf | 13 ++++++++++- 4 files changed, 62 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index df2748e6..572607be 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -87,6 +87,24 @@ process { ] } + withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI_HIC' { + ext.prefix = { "${meta.id}.asm.hic.p_ctg" } + publishDir = [ + path: { "${params.outdir}/hifiasm-hic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT_HIC' { + ext.prefix = { "${meta.id}.asm.hic.a_ctg" } + publishDir = [ + path: { "${params.outdir}/hifiasm-hic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_.*HIC' { publishDir = [ path: { "${params.outdir}/hifiasm-hic" }, @@ -113,6 +131,24 @@ process { ] } + withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_PRI' { + ext.prefix = { "${meta.id}.asm.hic.p_ctg" } + publishDir = [ + path: { "${params.outdir}/hifiasm-hic" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_HAP' { + ext.prefix = { "${meta.id}.asm.hic.a_ctg" } + publishDir = [ + path: { "${params.outdir}/hifiasm-hic" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + withName: '.*GENOME_STATISTICS_RAW:BUSCO' { publishDir = [ path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.busco" }, diff --git a/conf/test.config b/conf/test.config index f70c22bf..fbfa77a9 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,4 +23,5 @@ params { input = "${projectDir}/assets/test.yaml" bed_chunks_polishing = 2 polishing_on = true + hifiasm_hic_on = true } diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf index 3c8a1dd7..16fbcafb 100644 --- a/subworkflows/local/raw_assembly.nf +++ b/subworkflows/local/raw_assembly.nf @@ -10,6 +10,7 @@ workflow RAW_ASSEMBLY { take: hifi_reads // channel: [ val(meta), [ datafile ] ] hic_reads // channel: [ datafile ] + hifiasm_hic_on // val: True/False main: ch_versions = Channel.empty() @@ -21,9 +22,11 @@ workflow RAW_ASSEMBLY { GFA_TO_FASTA_ALT( HIFIASM_PRI.out.alternate_contigs ) ch_versions = ch_versions.mix(GFA_TO_FASTA_PRI.out.versions) - HIFIASM_HIC(hifi_reads, [], [], [], [], hic_reads) - GFA_TO_FASTA_PRI_HIC( HIFIASM_HIC.out.hic_primary_contigs ) - GFA_TO_FASTA_ALT_HIC( HIFIASM_HIC.out.hic_alternate_contigs ) + if ( hifiasm_hic_on ) { + HIFIASM_HIC(hifi_reads, [], [], [], [], hic_reads) + GFA_TO_FASTA_PRI_HIC( HIFIASM_HIC.out.hic_primary_contigs ) + GFA_TO_FASTA_ALT_HIC( HIFIASM_HIC.out.hic_alternate_contigs ) + } emit: raw_unitigs = HIFIASM_PRI.out.raw_unitigs @@ -34,12 +37,15 @@ workflow RAW_ASSEMBLY { alternate_contigs_gfa = HIFIASM_PRI.out.alternate_contigs processed_unitigs = HIFIASM_PRI.out.processed_unitigs + primary_hic_contigs_gfa = hifiasm_hic_on ? HIFIASM_HIC.out.hic_primary_contigs : null + alternate_hic_contigs_gfa = hifiasm_hic_on ? HIFIASM_HIC.out.hic_alternate_contigs : null + phased_hic_contigs_hap1_gfa = hifiasm_hic_on ? HIFIASM_HIC.out.paternal_contigs : null + phased_hic_contigs_hap2_gfa = hifiasm_hic_on ? HIFIASM_HIC.out.maternal_contigs : null + primary_contigs = GFA_TO_FASTA_PRI.out.fasta alternate_contigs = GFA_TO_FASTA_ALT.out.fasta - primary_hic_contigs_gfa = HIFIASM_HIC.out.hic_primary_contigs - alternate_hic_contigs_gfa = HIFIASM_HIC.out.hic_alternate_contigs - phased_hic_contigs_hap1_gfa = HIFIASM_HIC.out.paternal_contigs - phased_hic_contigs_hap2_gfa = HIFIASM_HIC.out.maternal_contigs + primary_hic_contigs = hifiasm_hic_on ? GFA_TO_FASTA_PRI_HIC.out.fasta : null + alternate_hic_contigs = hifiasm_hic_on ? GFA_TO_FASTA_ALT_HIC.out.fasta : null versions = ch_versions } diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 17dbe54f..3aa5cc6a 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -17,6 +17,7 @@ if (params.bed_chunks_polishing) { bed_chunks_polishing = params.bed_chunks_poli if (params.cool_bin) { cool_bin = params.cool_bin } else { cool_bin = 1000; } if (params.polishing_on) { polishing_on = params.polishing_on } else { polishing_on = false; } +if (params.hifiasm_hic_on) { hifiasm_hic_on = params.hifiasm_hic_on } else { hifiasm_hic_on = false; } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -37,6 +38,7 @@ include { KEEP_SEQNAMES as KEEP_SEQNAMES_PRIMARY } from '../modules/local/keep_s include { KEEP_SEQNAMES as KEEP_SEQNAMES_HAPLOTIGS } from '../modules/local/keep_seqnames' include { ALIGN_SHORT } from '../subworkflows/local/align_short' include { GENOME_STATISTICS as GENOME_STATISTICS_RAW } from '../subworkflows/local/assembly_stats' +include { GENOME_STATISTICS as GENOME_STATISTICS_RAW_HIC } from '../subworkflows/local/assembly_stats' include { GENOME_STATISTICS as GENOME_STATISTICS_PURGED } from '../subworkflows/local/assembly_stats' include { GENOME_STATISTICS as GENOME_STATISTICS_POLISHED } from '../subworkflows/local/assembly_stats' include { GENOME_STATISTICS as GENOME_STATISTICS_SCAFFOLDS } from '../subworkflows/local/assembly_stats' @@ -75,7 +77,7 @@ workflow GENOMEASSEMBLY { GENOMESCOPE_MODEL( hifi_reads_ch ) - RAW_ASSEMBLY( hifi_reads_ch , hic_reads_ch ) + RAW_ASSEMBLY( hifi_reads_ch , hic_reads_ch, hifiasm_hic_on ) RAW_ASSEMBLY.out.primary_contigs.set{ primary_contigs_ch } RAW_ASSEMBLY.out.alternate_contigs.set{ haplotigs_ch } GENOME_STATISTICS_RAW( primary_contigs_ch.join(haplotigs_ch), @@ -83,6 +85,15 @@ workflow GENOMEASSEMBLY { GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab ) + + if ( hifiasm_hic_on ) { + GENOME_STATISTICS_RAW_HIC( RAW_ASSEMBLY.out.primary_hic_contigs + .join(RAW_ASSEMBLY.out.alternate_hic_contigs), + PREPARE_INPUT.out.busco, + GENOMESCOPE_MODEL.out.hist, + GENOMESCOPE_MODEL.out.ktab + ) + } hifi_reads_ch.join(primary_contigs_ch) .join(GENOMESCOPE_MODEL.out.model) .set{ purge_dups_input } From a4e76e996cdc2436bad6b76e81d0f8e821df639c Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Fri, 19 May 2023 15:33:20 +0100 Subject: [PATCH 18/30] Specify BUSCO lineage in the titles of output folders --- conf/modules.config | 8 ++++---- subworkflows/local/assembly_stats.nf | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 572607be..84d3e1af 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -151,7 +151,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:BUSCO' { publishDir = [ - path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.busco" }, + path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -342,7 +342,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:BUSCO' { publishDir = [ - path: { "${params.outdir}/purging/${meta.id}.purged.busco" }, + path: { "${params.outdir}/purging/${meta.id}.purged.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -469,7 +469,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:BUSCO' { publishDir = [ - path: { "${params.outdir}/polishing/${meta.id}.polished.busco" }, + path: { "${params.outdir}/polishing/${meta.id}.polished.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -650,7 +650,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { publishDir = [ - path: { "${params.outdir}/scaffolding/yahs/${meta.id}_scaffolds_final.busco" }, + path: { "${params.outdir}/scaffolding/yahs/${meta.id}_scaffolds_final.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/subworkflows/local/assembly_stats.nf b/subworkflows/local/assembly_stats.nf index 028a5d81..b2569696 100644 --- a/subworkflows/local/assembly_stats.nf +++ b/subworkflows/local/assembly_stats.nf @@ -33,8 +33,10 @@ workflow GENOME_STATISTICS { GFASTATS_HAP( haplotigs_ch, 'fasta', [], [], [], [], [], [] ) // BUSCO - BUSCO ( assembly.map{ meta, primary, haplotigs -> [meta, primary]}, - lineage.map{ meta, lineage_db, ch_lineage -> ch_lineage }, + BUSCO ( assembly.join(lineage) + .map{ meta, primary, haplotigs, lineage_db, lineage_name -> + [[id:meta.id, lineage:lineage_name], primary]}, + lineage.map{ meta, lineage_db, lineage_name -> lineage_name } , lineage.map{ meta, lineage_db, ch_lineage -> lineage_db }, [] ) ch_versions = ch_versions.mix(BUSCO.out.versions.first()) From 1f352be6ff898fb4e2a1959d12a2cccddf195fa7 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Fri, 19 May 2023 15:52:15 +0100 Subject: [PATCH 19/30] Fix formatting --- conf/modules.config | 96 ++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 84d3e1af..8b9e62f0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -173,7 +173,7 @@ process { ext.args = "-k19 -w10 -O5,56 -E4,1 -A2 -B5 -z400,50 -r2000 --lj-min-ratio 0.5" ext.prefix = { "${meta.id}.reads" } publishDir = [ - path: { "${params.outdir}/purging/coverage" }, + path: { "${params.outdir}/hifiasm/purging/coverage" }, mode: params.publish_dir_mode, pattern: ".*paf" ] @@ -181,7 +181,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PBCSTAT' { publishDir = [ - path: { "${params.outdir}/purging/coverage" }, + path: { "${params.outdir}/hifiasm/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -189,7 +189,7 @@ process { withName: '.*PURGE_DUPS_PRI:GET_CALCUTS_PARAMS' { publishDir = [ - path: { "${params.outdir}/purging/coverage" }, + path: { "${params.outdir}/hifiasm/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -197,7 +197,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_CALCUTS' { publishDir = [ - path: { "${params.outdir}/purging/coverage" }, + path: { "${params.outdir}/hifiasm/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -206,7 +206,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_SPLITFA' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/purging/split_aln" }, + path: { "${params.outdir}/hifiasm/purging/split_aln" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -216,7 +216,7 @@ process { ext.args = "-xasm5 -DP" ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/purging/split_aln" }, + path: { "${params.outdir}/hifiasm/purging/split_aln" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -225,7 +225,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PURGEDUPS' { ext.args = "-2" publishDir = [ - path: { "${params.outdir}/purging/purge_dups" }, + path: { "${params.outdir}/hifiasm/purging/purge_dups" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -235,12 +235,12 @@ process { ext.prefix = { "${meta.prefix}" } publishDir = [ [ - path: { "${params.outdir}/purging/seqs" }, + path: { "${params.outdir}/hifiasm/purging/seqs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ], [ - path: { "${params.outdir}/purging/" }, + path: { "${params.outdir}/hifiasm/purging/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.contains('purged.fa') ? 'purged.fa' : null } ] @@ -250,7 +250,7 @@ process { withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_READS' { ext.prefix = { "${meta.id}.reads" } publishDir = [ - path: { "${params.outdir}/purging/coverage.htigs" }, + path: { "${params.outdir}/hifiasm/purging/coverage.htigs" }, mode: params.publish_dir_mode, pattern: ".*paf" ] @@ -258,7 +258,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PBCSTAT' { publishDir = [ - path: { "${params.outdir}/purging/coverage.htigs" }, + path: { "${params.outdir}/hifiasm/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -267,7 +267,7 @@ process { withName: '.*PURGE_DUPS_ALT:GET_CALCUTS_PARAMS' { publishDir = [ - path: { "${params.outdir}/purging/coverage.htigs" }, + path: { "${params.outdir}/hifiasm/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -275,7 +275,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_CALCUTS' { publishDir = [ - path: { "${params.outdir}/purging/coverage.htigs" }, + path: { "${params.outdir}/hifiasm/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -284,7 +284,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_SPLITFA' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/purging/split_aln.htigs" }, + path: { "${params.outdir}/hifiasm/purging/split_aln.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -293,7 +293,7 @@ process { withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_ASSEMBLY' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/purging/split_aln.htigs" }, + path: { "${params.outdir}/hifiasm/purging/split_aln.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -301,7 +301,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PURGEDUPS' { publishDir = [ - path: { "${params.outdir}/purging/purge_dups.htigs" }, + path: { "${params.outdir}/hifiasm/purging/purge_dups.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -310,12 +310,12 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_GETSEQS' { publishDir = [ [ - path: { "${params.outdir}/purging/seqs.htigs" }, + path: { "${params.outdir}/hifiasm/purging/seqs.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ], [ - path: { "${params.outdir}/purging/" }, + path: { "${params.outdir}/hifiasm/purging/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.contains('purged.fa') ? 'purged.htigs.fa' : null } ] @@ -325,7 +325,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:GFASTATS_PRI' { ext.prefix = { "purged" } publishDir = [ - path: { "${params.outdir}/purging" }, + path: { "${params.outdir}/hifiasm/purging" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -334,7 +334,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:GFASTATS_HAP' { ext.prefix = { "purged.htigs" } publishDir = [ - path: { "${params.outdir}/purging" }, + path: { "${params.outdir}/hifiasm/purging" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -342,7 +342,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:BUSCO' { publishDir = [ - path: { "${params.outdir}/purging/${meta.id}.purged.${meta.lineage}.busco" }, + path: { "${params.outdir}/hifiasm/purging/${meta.id}.purged.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -350,7 +350,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/purging/${meta.id}.purged.ccs.merquryk" }, + path: { "${params.outdir}/hifiasm/purging/${meta.id}.purged.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -362,7 +362,7 @@ process { withName: LONGRANGER_MKREF { publishDir = [ - path: { "${params.outdir}/polishing" }, + path: { "${params.outdir}/hifiasm/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -374,7 +374,7 @@ process { container = "ghcr.io/sanger-tol/longranger:2.2.2-c3" } publishDir = [ - path: { "${params.outdir}/polishing" }, + path: { "${params.outdir}/hifiasm/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -382,7 +382,7 @@ process { withName: BED_CHUNKS { publishDir = [ - path: { "${params.outdir}/polishing/chunks" }, + path: { "${params.outdir}/hifiasm/polishing/chunks" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -390,7 +390,7 @@ process { withName: FREEBAYES { publishDir = [ - path: { "${params.outdir}/polishing/vcf" }, + path: { "${params.outdir}/hifiasm/polishing/vcf" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -398,7 +398,7 @@ process { withName: MERGE_FREEBAYES { publishDir = [ - path: { "${params.outdir}/polishing/" }, + path: { "${params.outdir}/hifiasm/polishing/" }, mode: params.publish_dir_mode, pattern: "*merged*" ] @@ -410,7 +410,7 @@ process { ext.args = '-i\'QUAL>1 && (GT="AA" || GT="Aa")\' -Hla' ext.prefix = { "${meta.id}.consensus" } publishDir = [ - path: { "${params.outdir}/polishing/" }, + path: { "${params.outdir}/hifiasm/polishing/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -433,7 +433,7 @@ process { withName: SEQTK_SUBSEQ_PRIMARY { ext.prefix = 'primary' publishDir = [ - path: { "${params.outdir}/polishing" }, + path: { "${params.outdir}/hifiasm/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : "primary.fa" } ] @@ -442,7 +442,7 @@ process { withName: SEQTK_SUBSEQ_HAPLOTIGS { ext.prefix = 'haplotigs' publishDir = [ - path: { "${params.outdir}/polishing" }, + path: { "${params.outdir}/hifiasm/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : "haplotigs.fa" } ] @@ -451,7 +451,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS_PRI' { ext.prefix = { "primary" } publishDir = [ - path: { "${params.outdir}/polishing" }, + path: { "${params.outdir}/hifiasm/polishing" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -460,7 +460,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS_HAP' { ext.prefix = { "haplotigs" } publishDir = [ - path: { "${params.outdir}/polishing" }, + path: { "${params.outdir}/hifiasm/polishing" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -469,7 +469,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:BUSCO' { publishDir = [ - path: { "${params.outdir}/polishing/${meta.id}.polished.${meta.lineage}.busco" }, + path: { "${params.outdir}/hifiasm/polishing/${meta.id}.polished.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -477,7 +477,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/polishing/${meta.id}.polished.ccs.merquryk" }, + path: { "${params.outdir}/hifiasm/polishing/${meta.id}.polished.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -534,7 +534,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:SAMTOOLS_VIEW_MARKDUP' { publishDir = [ - path: { "${params.outdir}/scaffolding" }, + path: { "${params.outdir}/hifiasm/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -542,7 +542,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:BED_SORT' { publishDir = [ - path: { "${params.outdir}/scaffolding" }, + path: { "${params.outdir}/hifiasm/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -550,7 +550,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_INDEX' { publishDir = [ - path: { "${params.outdir}/scaffolding" }, + path: { "${params.outdir}/hifiasm/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -558,7 +558,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_STATS' { publishDir = [ - path: { "${params.outdir}/scaffolding" }, + path: { "${params.outdir}/hifiasm/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -566,7 +566,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { publishDir = [ - path: { "${params.outdir}/scaffolding" }, + path: { "${params.outdir}/hifiasm/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -574,7 +574,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_IDXSTATS' { publishDir = [ - path: { "${params.outdir}/scaffolding" }, + path: { "${params.outdir}/hifiasm/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -590,7 +590,7 @@ process { // Skip the initial assembly error correction step ext.args = '--no-contig-ec' publishDir = [ - path: { "${params.outdir}/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -604,7 +604,7 @@ process { // pos2 field number (one-based) is 7 ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' publishDir = [ - path: { "${params.outdir}/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -614,7 +614,7 @@ process { // Make one plot containing all sequences ext.args = '--sequences \"=full\"' publishDir = [ - path: { "${params.outdir}/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -624,7 +624,7 @@ process { ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' ext.juicer_jvm_params = '-Xms6g -Xmx48g' publishDir = [ - path: { "${params.outdir}/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -633,7 +633,7 @@ process { withName: 'JUICER_PRE' { ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" publishDir = [ - path: { "${params.outdir}/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -642,7 +642,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:GFASTATS_PRI' { ext.prefix = { "${meta.id}_scaffolds_final" } publishDir = [ - path: { "${params.outdir}/scaffolding/yahs/" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -650,7 +650,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { publishDir = [ - path: { "${params.outdir}/scaffolding/yahs/${meta.id}_scaffolds_final.${meta.lineage}.busco" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/${meta.id}_scaffolds_final.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -658,7 +658,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/scaffolding/${meta.id}.${meta.id}_scaffolds_final.ccs.merquryk" }, + path: { "${params.outdir}/hifiasm/scaffolding/${meta.id}.${meta.id}_scaffolds_final.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] From 7bb735500968948daae87d5d1fa745b40e54d682 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Fri, 19 May 2023 16:18:58 +0100 Subject: [PATCH 20/30] Fix path to HiFi reads in test --- assets/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/test.yaml b/assets/test.yaml index 57de3843..0262bea0 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -4,7 +4,7 @@ samples: reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/ pacbio: reads: - - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/assembly_test/hifiasm/HiFi.reads.fasta + - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta HiC: reads: - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram From b20f4a73aaa708b08ca7e8e9059ca6b3469fc8f3 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Mon, 22 May 2023 14:30:08 +0100 Subject: [PATCH 21/30] Set special resources for hifiasm --- conf/base.config | 5 + conf/test_full_iyVesGerm1.config | 5 + modules.json | 180 ++++++++++++++++++++------- modules/nf-core/hifiasm/hifiasm.diff | 16 ++- modules/nf-core/hifiasm/main.nf | 5 +- 5 files changed, 160 insertions(+), 51 deletions(-) diff --git a/conf/base.config b/conf/base.config index 33cd2459..490384f4 100644 --- a/conf/base.config +++ b/conf/base.config @@ -24,6 +24,11 @@ process { // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withName:HIFIASM { + cpus = { check_max( 28 * task.attempt, 'cpus' ) } + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + time = { check_max( 48.h * task.attempt, 'time' ) } + } withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } diff --git a/conf/test_full_iyVesGerm1.config b/conf/test_full_iyVesGerm1.config index 7d034eaa..531bf49d 100644 --- a/conf/test_full_iyVesGerm1.config +++ b/conf/test_full_iyVesGerm1.config @@ -14,7 +14,12 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' + max_cpus = 28 + max_memory = '100.GB' + max_time = '24.h' + // Input data for full size test input = 'assets/test_iyVesGerm1.yaml' polishing_on = true + hifiasm_hic_on = true } diff --git a/modules.json b/modules.json index 72029f58..b390225e 100644 --- a/modules.json +++ b/modules.json @@ -8,110 +8,152 @@ "bcftools/concat": { "branch": "master", "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/consensus": { "branch": "master", "git_sha": "fa12afdf5874c1d11e4a20efe81c97935e8eea24", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bedtools/bamtobed/bedtools-bamtobed.diff" }, "busco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/cload": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/zoomify": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastk/fastk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastk/histex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "freebayes": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/freebayes/freebayes.diff" }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "643756685546fa61f5c8fba439af746c090b9180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genescopefk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gfastats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "hifiasm": { "branch": "master", @@ -121,125 +163,173 @@ "merquryfk/merquryfk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextmap": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextsnapshot": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/calcuts": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/purgedups/calcuts/purgedups-calcuts.diff" }, "purgedups/getseqs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/pbcstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/purgedups": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/splitfa": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/collate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fastq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fixmate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/markdup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "seqtk/subseq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "yahs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } diff --git a/modules/nf-core/hifiasm/hifiasm.diff b/modules/nf-core/hifiasm/hifiasm.diff index 6ac17b0a..3d33a901 100644 --- a/modules/nf-core/hifiasm/hifiasm.diff +++ b/modules/nf-core/hifiasm/hifiasm.diff @@ -1,9 +1,15 @@ Changes in module 'nf-core/hifiasm' --- modules/nf-core/hifiasm/main.nf +++ modules/nf-core/hifiasm/main.nf -@@ -4,8 +4,8 @@ +@@ -2,10 +2,13 @@ + tag "$meta.id" + label 'process_high' - conda "bioconda::hifiasm=0.18.5" +- conda "bioconda::hifiasm=0.18.5" ++ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { ++ exit 1, "This version of HIFIASM module does not support Conda. Please use Docker / Singularity / Podman instead." ++ } ++ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hifiasm:0.18.5--h5b5514e_0' : - 'quay.io/biocontainers/hifiasm:0.18.5--h5b5514e_0' }" @@ -12,7 +18,7 @@ Changes in module 'nf-core/hifiasm' input: tuple val(meta), path(reads) -@@ -13,6 +13,7 @@ +@@ -13,6 +16,7 @@ path maternal_kmer_dump path hic_read1 path hic_read2 @@ -20,7 +26,7 @@ Changes in module 'nf-core/hifiasm' output: tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs -@@ -22,8 +23,10 @@ +@@ -22,8 +26,10 @@ tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true @@ -33,7 +39,7 @@ Changes in module 'nf-core/hifiasm' path "versions.yml" , emit: versions when: -@@ -32,6 +35,8 @@ +@@ -32,6 +38,8 @@ script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf index 0e967256..85c4b004 100644 --- a/modules/nf-core/hifiasm/main.nf +++ b/modules/nf-core/hifiasm/main.nf @@ -2,7 +2,10 @@ process HIFIASM { tag "$meta.id" label 'process_high' - conda "bioconda::hifiasm=0.18.5" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "This version of HIFIASM module does not support Conda. Please use Docker / Singularity / Podman instead." + } + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-8019bff5bdc04e0e88980d5ba292ba022fec5dd9:56ed7e3ac0e84e7d947af98abfb86dda9e1dc9f8-0' : 'quay.io/biocontainers/mulled-v2-8019bff5bdc04e0e88980d5ba292ba022fec5dd9:56ed7e3ac0e84e7d947af98abfb86dda9e1dc9f8-0' }" From e3ee315b8d2264bb6f8d48b0773b7bb0b8a79b51 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 25 May 2023 13:33:39 +0100 Subject: [PATCH 22/30] Updates to output dir structure and yahs resolution params for testing --- conf/modules.config | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 8b9e62f0..97e29a22 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -517,13 +517,18 @@ process { withName: SAMTOOLS_MARKDUP { ext.prefix = { "${meta.id}.markdup" } + publishDir = [ + path: { "${params.outdir}/hifiasm/scaffolding" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: SAMTOOLS_VIEW { ext.args = "--output-fmt cram" } - withName: SAMTOOLS_VIEW_MARKDUP { + withName: '.*ALIGN_SHORT:MARKDUP_STATS:SAMTOOLS_VIEW_MARKDUP' { // Return compressed BAM output // Filter out reads classified as not primary alignment (0x100) // read fails platform/vendor quality checks (0x200) @@ -532,14 +537,6 @@ process { ext.args = "-u -F0xf00 -e 'mapq>=10' --output-fmt bam" } - withName: '.*ALIGN_SHORT:MARKDUP_STATS:SAMTOOLS_VIEW_MARKDUP' { - publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*ALIGN_SHORT:MARKDUP_STATS:BED_SORT' { publishDir = [ path: { "${params.outdir}/hifiasm/scaffolding" }, @@ -588,9 +585,9 @@ process { // Set up of the scffolding pipeline withName: 'YAHS' { // Skip the initial assembly error correction step - ext.args = '--no-contig-ec' + ext.args = '--no-contig-ec -r 1000,2000,5000' publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -604,7 +601,7 @@ process { // pos2 field number (one-based) is 7 ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -614,7 +611,7 @@ process { // Make one plot containing all sequences ext.args = '--sequences \"=full\"' publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -624,7 +621,7 @@ process { ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' ext.juicer_jvm_params = '-Xms6g -Xmx48g' publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -633,7 +630,7 @@ process { withName: 'JUICER_PRE' { ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -642,7 +639,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:GFASTATS_PRI' { ext.prefix = { "${meta.id}_scaffolds_final" } publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -650,7 +647,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/${meta.id}_scaffolds_final.${meta.lineage}.busco" }, + path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs/${meta.id}_scaffolds_final.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] From e26569d6535545b9df32bf0e11321cc80a88eee4 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 25 May 2023 14:19:45 +0100 Subject: [PATCH 23/30] Add date to output folder --- conf/modules.config | 128 +++++++++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 61 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 97e29a22..0cd882ca 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,6 +10,12 @@ ---------------------------------------------------------------------------------------- */ +params { + timestamp = (new Date()).format("yyyyMMdd") + hifiasm = "hifiasm.${params.timestamp}" + hifiasmhic = "hifiasm-hic.${params.timestamp}" +} + process { withName: CUSTOM_DUMPSOFTWAREVERSIONS { @@ -54,7 +60,7 @@ process { withName: HIFIASM_PRI { ext.args = "--primary" publishDir = [ - path: { "${params.outdir}/hifiasm" }, + path: { "${params.outdir}/${params.hifiasm}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -63,7 +69,7 @@ process { withName: HIFIASM_HIC { ext.args = "--primary" publishDir = [ - path: { "${params.outdir}/hifiasm-hic" }, + path: { "${params.outdir}/${params.hifiasmhic}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -72,7 +78,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI' { ext.prefix = { "${meta.id}.asm.p_ctg" } publishDir = [ - path: { "${params.outdir}/hifiasm" }, + path: { "${params.outdir}/${params.hifiasm}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -81,7 +87,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT' { ext.prefix = { "${meta.id}.asm.a_ctg" } publishDir = [ - path: { "${params.outdir}/hifiasm" }, + path: { "${params.outdir}/${params.hifiasm}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -90,7 +96,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI_HIC' { ext.prefix = { "${meta.id}.asm.hic.p_ctg" } publishDir = [ - path: { "${params.outdir}/hifiasm-hic" }, + path: { "${params.outdir}/${params.hifiasmhic}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -99,7 +105,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT_HIC' { ext.prefix = { "${meta.id}.asm.hic.a_ctg" } publishDir = [ - path: { "${params.outdir}/hifiasm-hic" }, + path: { "${params.outdir}/${params.hifiasmhic}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -107,7 +113,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_.*HIC' { publishDir = [ - path: { "${params.outdir}/hifiasm-hic" }, + path: { "${params.outdir}/${params.hifiasmhic}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -116,7 +122,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:GFASTATS_PRI' { ext.prefix = { "${meta.id}.asm.p_ctg" } publishDir = [ - path: { "${params.outdir}/hifiasm" }, + path: { "${params.outdir}/${params.hifiasm}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -125,7 +131,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:GFASTATS_HAP' { ext.prefix = { "${meta.id}.asm.a_ctg" } publishDir = [ - path: { "${params.outdir}/hifiasm" }, + path: { "${params.outdir}/${params.hifiasm}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -134,7 +140,7 @@ process { withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_PRI' { ext.prefix = { "${meta.id}.asm.hic.p_ctg" } publishDir = [ - path: { "${params.outdir}/hifiasm-hic" }, + path: { "${params.outdir}/${params.hifiasmhic}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -143,7 +149,7 @@ process { withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_HAP' { ext.prefix = { "${meta.id}.asm.hic.a_ctg" } publishDir = [ - path: { "${params.outdir}/hifiasm-hic" }, + path: { "${params.outdir}/${params.hifiasmhic}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -151,7 +157,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:BUSCO' { publishDir = [ - path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.${meta.lineage}.busco" }, + path: { "${params.outdir}/${params.hifiasm}/${meta.id}.p_ctg.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -159,7 +165,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/hifiasm/${meta.id}.p_ctg.ccs.merquryk" }, + path: { "${params.outdir}/${params.hifiasm}/${meta.id}.p_ctg.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -173,7 +179,7 @@ process { ext.args = "-k19 -w10 -O5,56 -E4,1 -A2 -B5 -z400,50 -r2000 --lj-min-ratio 0.5" ext.prefix = { "${meta.id}.reads" } publishDir = [ - path: { "${params.outdir}/hifiasm/purging/coverage" }, + path: { "${params.outdir}/${params.hifiasm}/purging/coverage" }, mode: params.publish_dir_mode, pattern: ".*paf" ] @@ -181,7 +187,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PBCSTAT' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/coverage" }, + path: { "${params.outdir}/${params.hifiasm}/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -189,7 +195,7 @@ process { withName: '.*PURGE_DUPS_PRI:GET_CALCUTS_PARAMS' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/coverage" }, + path: { "${params.outdir}/${params.hifiasm}/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -197,7 +203,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_CALCUTS' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/coverage" }, + path: { "${params.outdir}/${params.hifiasm}/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -206,7 +212,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_SPLITFA' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/hifiasm/purging/split_aln" }, + path: { "${params.outdir}/${params.hifiasm}/purging/split_aln" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -216,7 +222,7 @@ process { ext.args = "-xasm5 -DP" ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/hifiasm/purging/split_aln" }, + path: { "${params.outdir}/${params.hifiasm}/purging/split_aln" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -225,7 +231,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PURGEDUPS' { ext.args = "-2" publishDir = [ - path: { "${params.outdir}/hifiasm/purging/purge_dups" }, + path: { "${params.outdir}/${params.hifiasm}/purging/purge_dups" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -235,12 +241,12 @@ process { ext.prefix = { "${meta.prefix}" } publishDir = [ [ - path: { "${params.outdir}/hifiasm/purging/seqs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/seqs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ], [ - path: { "${params.outdir}/hifiasm/purging/" }, + path: { "${params.outdir}/${params.hifiasm}/purging/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.contains('purged.fa') ? 'purged.fa' : null } ] @@ -250,7 +256,7 @@ process { withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_READS' { ext.prefix = { "${meta.id}.reads" } publishDir = [ - path: { "${params.outdir}/hifiasm/purging/coverage.htigs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/coverage.htigs" }, mode: params.publish_dir_mode, pattern: ".*paf" ] @@ -258,7 +264,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PBCSTAT' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/coverage.htigs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -267,7 +273,7 @@ process { withName: '.*PURGE_DUPS_ALT:GET_CALCUTS_PARAMS' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/coverage.htigs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -275,7 +281,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_CALCUTS' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/coverage.htigs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -284,7 +290,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_SPLITFA' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/hifiasm/purging/split_aln.htigs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/split_aln.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -293,7 +299,7 @@ process { withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_ASSEMBLY' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/hifiasm/purging/split_aln.htigs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/split_aln.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -301,7 +307,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PURGEDUPS' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/purge_dups.htigs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/purge_dups.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -310,12 +316,12 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_GETSEQS' { publishDir = [ [ - path: { "${params.outdir}/hifiasm/purging/seqs.htigs" }, + path: { "${params.outdir}/${params.hifiasm}/purging/seqs.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ], [ - path: { "${params.outdir}/hifiasm/purging/" }, + path: { "${params.outdir}/${params.hifiasm}/purging/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.contains('purged.fa') ? 'purged.htigs.fa' : null } ] @@ -325,7 +331,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:GFASTATS_PRI' { ext.prefix = { "purged" } publishDir = [ - path: { "${params.outdir}/hifiasm/purging" }, + path: { "${params.outdir}/${params.hifiasm}/purging" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -334,7 +340,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:GFASTATS_HAP' { ext.prefix = { "purged.htigs" } publishDir = [ - path: { "${params.outdir}/hifiasm/purging" }, + path: { "${params.outdir}/${params.hifiasm}/purging" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -342,7 +348,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:BUSCO' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/${meta.id}.purged.${meta.lineage}.busco" }, + path: { "${params.outdir}/${params.hifiasm}/purging/${meta.id}.purged.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -350,7 +356,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/hifiasm/purging/${meta.id}.purged.ccs.merquryk" }, + path: { "${params.outdir}/${params.hifiasm}/purging/${meta.id}.purged.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -362,7 +368,7 @@ process { withName: LONGRANGER_MKREF { publishDir = [ - path: { "${params.outdir}/hifiasm/polishing" }, + path: { "${params.outdir}/${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -374,7 +380,7 @@ process { container = "ghcr.io/sanger-tol/longranger:2.2.2-c3" } publishDir = [ - path: { "${params.outdir}/hifiasm/polishing" }, + path: { "${params.outdir}/${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -382,7 +388,7 @@ process { withName: BED_CHUNKS { publishDir = [ - path: { "${params.outdir}/hifiasm/polishing/chunks" }, + path: { "${params.outdir}/${params.hifiasm}/polishing/chunks" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -390,7 +396,7 @@ process { withName: FREEBAYES { publishDir = [ - path: { "${params.outdir}/hifiasm/polishing/vcf" }, + path: { "${params.outdir}/${params.hifiasm}/polishing/vcf" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -398,7 +404,7 @@ process { withName: MERGE_FREEBAYES { publishDir = [ - path: { "${params.outdir}/hifiasm/polishing/" }, + path: { "${params.outdir}/${params.hifiasm}/polishing/" }, mode: params.publish_dir_mode, pattern: "*merged*" ] @@ -410,7 +416,7 @@ process { ext.args = '-i\'QUAL>1 && (GT="AA" || GT="Aa")\' -Hla' ext.prefix = { "${meta.id}.consensus" } publishDir = [ - path: { "${params.outdir}/hifiasm/polishing/" }, + path: { "${params.outdir}/${params.hifiasm}/polishing/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -433,7 +439,7 @@ process { withName: SEQTK_SUBSEQ_PRIMARY { ext.prefix = 'primary' publishDir = [ - path: { "${params.outdir}/hifiasm/polishing" }, + path: { "${params.outdir}/${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : "primary.fa" } ] @@ -442,7 +448,7 @@ process { withName: SEQTK_SUBSEQ_HAPLOTIGS { ext.prefix = 'haplotigs' publishDir = [ - path: { "${params.outdir}/hifiasm/polishing" }, + path: { "${params.outdir}/${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : "haplotigs.fa" } ] @@ -451,7 +457,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS_PRI' { ext.prefix = { "primary" } publishDir = [ - path: { "${params.outdir}/hifiasm/polishing" }, + path: { "${params.outdir}/${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -460,7 +466,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS_HAP' { ext.prefix = { "haplotigs" } publishDir = [ - path: { "${params.outdir}/hifiasm/polishing" }, + path: { "${params.outdir}/${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -469,7 +475,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:BUSCO' { publishDir = [ - path: { "${params.outdir}/hifiasm/polishing/${meta.id}.polished.${meta.lineage}.busco" }, + path: { "${params.outdir}/${params.hifiasm}/polishing/${meta.id}.polished.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -477,7 +483,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/hifiasm/polishing/${meta.id}.polished.ccs.merquryk" }, + path: { "${params.outdir}/${params.hifiasm}/polishing/${meta.id}.polished.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -518,7 +524,7 @@ process { withName: SAMTOOLS_MARKDUP { ext.prefix = { "${meta.id}.markdup" } publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -539,7 +545,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:BED_SORT' { publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -547,7 +553,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_INDEX' { publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -555,7 +561,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_STATS' { publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -563,7 +569,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -571,7 +577,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_IDXSTATS' { publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -587,7 +593,7 @@ process { // Skip the initial assembly error correction step ext.args = '--no-contig-ec -r 1000,2000,5000' publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -601,7 +607,7 @@ process { // pos2 field number (one-based) is 7 ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -611,7 +617,7 @@ process { // Make one plot containing all sequences ext.args = '--sequences \"=full\"' publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -621,7 +627,7 @@ process { ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' ext.juicer_jvm_params = '-Xms6g -Xmx48g' publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -630,7 +636,7 @@ process { withName: 'JUICER_PRE' { ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -639,7 +645,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:GFASTATS_PRI' { ext.prefix = { "${meta.id}_scaffolds_final" } publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -647,7 +653,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/yahs/out.break.yahs/${meta.id}_scaffolds_final.${meta.lineage}.busco" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs/${meta.id}_scaffolds_final.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -655,7 +661,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/hifiasm/scaffolding/${meta.id}.${meta.id}_scaffolds_final.ccs.merquryk" }, + path: { "${params.outdir}/${params.hifiasm}/scaffolding/${meta.id}.${meta.id}_scaffolds_final.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] From 539550f7fc1165112c6bfa8ce8ecd419b58a0d4a Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 25 May 2023 14:21:59 +0100 Subject: [PATCH 24/30] Prettify --- modules.json | 180 +++++++++++++-------------------------------------- 1 file changed, 45 insertions(+), 135 deletions(-) diff --git a/modules.json b/modules.json index b390225e..72029f58 100644 --- a/modules.json +++ b/modules.json @@ -8,152 +8,110 @@ "bcftools/concat": { "branch": "master", "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/consensus": { "branch": "master", "git_sha": "fa12afdf5874c1d11e4a20efe81c97935e8eea24", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bedtools/bamtobed/bedtools-bamtobed.diff" }, "busco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/cat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/cload": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastk/fastk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastk/histex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "freebayes": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/freebayes/freebayes.diff" }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "643756685546fa61f5c8fba439af746c090b9180", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genescopefk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gfastats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "hifiasm": { "branch": "master", @@ -163,173 +121,125 @@ "merquryfk/merquryfk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextmap": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextsnapshot": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/calcuts": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/purgedups/calcuts/purgedups-calcuts.diff" }, "purgedups/getseqs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/pbcstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/purgedups": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/splitfa": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/collate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fastq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fixmate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/markdup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "seqtk/subseq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "yahs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } From 79f3e4dc531bb729f464143bc43ca2fc3aa8ef48 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 25 May 2023 15:34:36 +0100 Subject: [PATCH 25/30] Customize yahs command line for tests --- conf/modules.config | 1 - conf/test.config | 6 ++ modules.json | 180 +++++++++++++++++++++++++++++++++----------- 3 files changed, 141 insertions(+), 46 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0cd882ca..e4969361 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -591,7 +591,6 @@ process { // Set up of the scffolding pipeline withName: 'YAHS' { // Skip the initial assembly error correction step - ext.args = '--no-contig-ec -r 1000,2000,5000' publishDir = [ path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, diff --git a/conf/test.config b/conf/test.config index fbfa77a9..65e548f9 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,3 +25,9 @@ params { polishing_on = true hifiasm_hic_on = true } + +// Set up of the scffolding eipeline +withName: 'YAHS' { + // Skip the initial assembly error correction step + ext.args = '-r 1000,2000,5000' +} diff --git a/modules.json b/modules.json index 72029f58..b390225e 100644 --- a/modules.json +++ b/modules.json @@ -8,110 +8,152 @@ "bcftools/concat": { "branch": "master", "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/consensus": { "branch": "master", "git_sha": "fa12afdf5874c1d11e4a20efe81c97935e8eea24", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bedtools/bamtobed/bedtools-bamtobed.diff" }, "busco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/cload": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/zoomify": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastk/fastk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastk/histex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "freebayes": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/freebayes/freebayes.diff" }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "643756685546fa61f5c8fba439af746c090b9180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genescopefk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gfastats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "hifiasm": { "branch": "master", @@ -121,125 +163,173 @@ "merquryfk/merquryfk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextmap": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextsnapshot": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/calcuts": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/purgedups/calcuts/purgedups-calcuts.diff" }, "purgedups/getseqs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/pbcstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/purgedups": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "purgedups/splitfa": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/collate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fastq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fixmate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/markdup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "seqtk/subseq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "yahs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } From b9faca4e808d1a1565b1ae47c7d3386d23a2e784 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Tue, 30 May 2023 14:45:50 +0100 Subject: [PATCH 26/30] Add tolid as prefix to the output folder --- conf/modules.config | 131 +++++++++--------- modules/nf-core/seqtk/subseq/main.nf | 2 +- .../nf-core/seqtk/subseq/seqtk-subseq.diff | 7 +- subworkflows/local/polishing.nf | 11 +- workflows/genomeassembly.nf | 6 +- 5 files changed, 78 insertions(+), 79 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e4969361..5e6f9448 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -60,7 +60,7 @@ process { withName: HIFIASM_PRI { ext.args = "--primary" publishDir = [ - path: { "${params.outdir}/${params.hifiasm}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -69,7 +69,7 @@ process { withName: HIFIASM_HIC { ext.args = "--primary" publishDir = [ - path: { "${params.outdir}/${params.hifiasmhic}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -78,7 +78,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI' { ext.prefix = { "${meta.id}.asm.p_ctg" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -87,7 +87,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT' { ext.prefix = { "${meta.id}.asm.a_ctg" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -96,7 +96,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI_HIC' { ext.prefix = { "${meta.id}.asm.hic.p_ctg" } publishDir = [ - path: { "${params.outdir}/${params.hifiasmhic}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -105,7 +105,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT_HIC' { ext.prefix = { "${meta.id}.asm.hic.a_ctg" } publishDir = [ - path: { "${params.outdir}/${params.hifiasmhic}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -113,7 +113,7 @@ process { withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_.*HIC' { publishDir = [ - path: { "${params.outdir}/${params.hifiasmhic}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -122,7 +122,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:GFASTATS_PRI' { ext.prefix = { "${meta.id}.asm.p_ctg" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -131,7 +131,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:GFASTATS_HAP' { ext.prefix = { "${meta.id}.asm.a_ctg" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -140,7 +140,7 @@ process { withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_PRI' { ext.prefix = { "${meta.id}.asm.hic.p_ctg" } publishDir = [ - path: { "${params.outdir}/${params.hifiasmhic}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -149,7 +149,7 @@ process { withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_HAP' { ext.prefix = { "${meta.id}.asm.hic.a_ctg" } publishDir = [ - path: { "${params.outdir}/${params.hifiasmhic}" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -157,7 +157,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:BUSCO' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/${meta.id}.p_ctg.${meta.lineage}.busco" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/${meta.id}.p_ctg.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -165,7 +165,7 @@ process { withName: '.*GENOME_STATISTICS_RAW:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/${meta.id}.p_ctg.ccs.merquryk" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/${meta.id}.p_ctg.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -179,7 +179,7 @@ process { ext.args = "-k19 -w10 -O5,56 -E4,1 -A2 -B5 -z400,50 -r2000 --lj-min-ratio 0.5" ext.prefix = { "${meta.id}.reads" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/coverage" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/coverage" }, mode: params.publish_dir_mode, pattern: ".*paf" ] @@ -187,7 +187,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PBCSTAT' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/coverage" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -195,7 +195,7 @@ process { withName: '.*PURGE_DUPS_PRI:GET_CALCUTS_PARAMS' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/coverage" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -203,7 +203,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_CALCUTS' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/coverage" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -212,7 +212,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_SPLITFA' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/split_aln" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/split_aln" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -222,7 +222,7 @@ process { ext.args = "-xasm5 -DP" ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/split_aln" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/split_aln" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -231,7 +231,7 @@ process { withName: '.*PURGE_DUPS_PRI:PURGEDUPS_PURGEDUPS' { ext.args = "-2" publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/purge_dups" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/purge_dups" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -241,12 +241,12 @@ process { ext.prefix = { "${meta.prefix}" } publishDir = [ [ - path: { "${params.outdir}/${params.hifiasm}/purging/seqs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/seqs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ], [ - path: { "${params.outdir}/${params.hifiasm}/purging/" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.contains('purged.fa') ? 'purged.fa' : null } ] @@ -256,7 +256,7 @@ process { withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_READS' { ext.prefix = { "${meta.id}.reads" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/coverage.htigs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/coverage.htigs" }, mode: params.publish_dir_mode, pattern: ".*paf" ] @@ -264,7 +264,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PBCSTAT' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/coverage.htigs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -273,7 +273,7 @@ process { withName: '.*PURGE_DUPS_ALT:GET_CALCUTS_PARAMS' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/coverage.htigs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -281,7 +281,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_CALCUTS' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/coverage.htigs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/coverage.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -290,7 +290,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_SPLITFA' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/split_aln.htigs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/split_aln.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -299,7 +299,7 @@ process { withName: '.*PURGE_DUPS_ALT:MINIMAP2_ALIGN_ASSEMBLY' { ext.prefix = "self_aln" publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/split_aln.htigs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/split_aln.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -307,7 +307,7 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_PURGEDUPS' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/purge_dups.htigs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/purge_dups.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -316,12 +316,12 @@ process { withName: '.*PURGE_DUPS_ALT:PURGEDUPS_GETSEQS' { publishDir = [ [ - path: { "${params.outdir}/${params.hifiasm}/purging/seqs.htigs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/seqs.htigs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ], [ - path: { "${params.outdir}/${params.hifiasm}/purging/" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.contains('purged.fa') ? 'purged.htigs.fa' : null } ] @@ -331,7 +331,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:GFASTATS_PRI' { ext.prefix = { "purged" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -340,7 +340,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:GFASTATS_HAP' { ext.prefix = { "purged.htigs" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -348,7 +348,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:BUSCO' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/${meta.id}.purged.${meta.lineage}.busco" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/${meta.id}.purged.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -356,7 +356,7 @@ process { withName: '.*GENOME_STATISTICS_PURGED:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/purging/${meta.id}.purged.ccs.merquryk" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/${meta.id}.purged.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -368,7 +368,7 @@ process { withName: LONGRANGER_MKREF { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -380,7 +380,7 @@ process { container = "ghcr.io/sanger-tol/longranger:2.2.2-c3" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -388,23 +388,16 @@ process { withName: BED_CHUNKS { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing/chunks" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: FREEBAYES { - publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing/vcf" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing/chunks" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: MERGE_FREEBAYES { + ext.prefix = 'merged' publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing/" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing/" }, mode: params.publish_dir_mode, pattern: "*merged*" ] @@ -416,7 +409,7 @@ process { ext.args = '-i\'QUAL>1 && (GT="AA" || GT="Aa")\' -Hla' ext.prefix = { "${meta.id}.consensus" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing/" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -439,7 +432,7 @@ process { withName: SEQTK_SUBSEQ_PRIMARY { ext.prefix = 'primary' publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : "primary.fa" } ] @@ -448,7 +441,7 @@ process { withName: SEQTK_SUBSEQ_HAPLOTIGS { ext.prefix = 'haplotigs' publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : "haplotigs.fa" } ] @@ -457,7 +450,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS_PRI' { ext.prefix = { "primary" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -466,7 +459,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:GFASTATS_HAP' { ext.prefix = { "haplotigs" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -475,7 +468,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:BUSCO' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing/${meta.id}.polished.${meta.lineage}.busco" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing/${meta.id}.polished.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -483,7 +476,7 @@ process { withName: '.*GENOME_STATISTICS_POLISHED:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/polishing/${meta.id}.polished.ccs.merquryk" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing/${meta.id}.polished.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -524,7 +517,7 @@ process { withName: SAMTOOLS_MARKDUP { ext.prefix = { "${meta.id}.markdup" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -545,7 +538,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:BED_SORT' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -553,7 +546,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_INDEX' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -561,7 +554,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_STATS' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -569,7 +562,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -577,7 +570,7 @@ process { withName: '.*ALIGN_SHORT:MARKDUP_STATS:CONVERT_STATS:SAMTOOLS_IDXSTATS' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -590,9 +583,9 @@ process { // Set up of the scffolding pipeline withName: 'YAHS' { - // Skip the initial assembly error correction step + ext.prefix = 'out' publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -606,7 +599,7 @@ process { // pos2 field number (one-based) is 7 ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -616,7 +609,7 @@ process { // Make one plot containing all sequences ext.args = '--sequences \"=full\"' publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -626,7 +619,7 @@ process { ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' ext.juicer_jvm_params = '-Xms6g -Xmx48g' publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -635,7 +628,7 @@ process { withName: 'JUICER_PRE' { ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -644,7 +637,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:GFASTATS_PRI' { ext.prefix = { "${meta.id}_scaffolds_final" } publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] @@ -652,7 +645,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding/yahs/out.break.yahs/${meta.id}_scaffolds_final.${meta.lineage}.busco" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -660,7 +653,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/${params.hifiasm}/scaffolding/${meta.id}.${meta.id}_scaffolds_final.ccs.merquryk" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/out_scaffolds_final.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf index 3e6765e9..bc0b3bab 100644 --- a/modules/nf-core/seqtk/subseq/main.nf +++ b/modules/nf-core/seqtk/subseq/main.nf @@ -8,7 +8,7 @@ process SEQTK_SUBSEQ { 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }" input: - path sequences + tuple val(meta), path(sequences) path filter_list output: diff --git a/modules/nf-core/seqtk/subseq/seqtk-subseq.diff b/modules/nf-core/seqtk/subseq/seqtk-subseq.diff index 4e4f5f72..6f19d092 100644 --- a/modules/nf-core/seqtk/subseq/seqtk-subseq.diff +++ b/modules/nf-core/seqtk/subseq/seqtk-subseq.diff @@ -1,7 +1,12 @@ Changes in module 'nf-core/seqtk/subseq' --- modules/nf-core/seqtk/subseq/main.nf +++ modules/nf-core/seqtk/subseq/main.nf -@@ -12,7 +12,7 @@ +@@ -8,11 +8,11 @@ + 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }" + + input: +- path sequences ++ tuple val(meta), path sequences path filter_list output: diff --git a/subworkflows/local/polishing.nf b/subworkflows/local/polishing.nf index 0cd12f00..2d34ff2e 100644 --- a/subworkflows/local/polishing.nf +++ b/subworkflows/local/polishing.nf @@ -69,10 +69,12 @@ workflow POLISHING { input_sort = BCFTOOLS_VIEW.out.vcf.map{ meta, vcf -> [ [id: meta.id.toString()+'_sorted'], vcf ]} BCFTOOLS_SORT(input_sort) ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) - + // Merge vcf files into one - MERGE_FREEBAYES(BCFTOOLS_SORT.out.vcf.map{ meta, vcf -> [[id: 'merged'], vcf]}.groupTuple(), - BCFTOOLS_SORT.out.vcf.map{ meta, vcf -> [meta, []] }) + meta_ch = fasta_in.collect{it[0]} + MERGE_FREEBAYES(BCFTOOLS_SORT.out.vcf.combine(fasta_in) + .map{ meta, vcf, meta_fin, fa, fai -> [[id: meta_fin.id], vcf]}.groupTuple(), + [ [id:'merged'], [] ] ) ch_versions = ch_versions.mix(MERGE_FREEBAYES.out.versions) // Normalize variants and index normalized vcf @@ -90,7 +92,8 @@ workflow POLISHING { BCFTOOLS_NORM.out.vcf .join(BCFTOOLS_INDEX_NORM.out.tbi, by: [0], remainder: true) .join(fasta_ch, by: [0], remainder: true) - .set{ch_merge} + .set{ ch_merge } + //ch_merge.view() BCFTOOLS_CONSENSUS(ch_merge) ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions) diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 3aa5cc6a..5dd3fa08 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -139,9 +139,7 @@ workflow GENOMEASSEMBLY { // Separate primary contigs KEEP_SEQNAMES_PRIMARY(PURGE_DUPS_PRI.out.pri) ch_versions = ch_versions.mix(KEEP_SEQNAMES_PRIMARY.out.versions) - POLISHING.out.fasta.map{ meta, f -> f } - .set{ polished_fasta } - SEQTK_SUBSEQ_PRIMARY(polished_fasta, KEEP_SEQNAMES_PRIMARY.out.seqlist) + SEQTK_SUBSEQ_PRIMARY(POLISHING.out.fasta, KEEP_SEQNAMES_PRIMARY.out.seqlist) ch_versions = ch_versions.mix(SEQTK_SUBSEQ_PRIMARY.out.versions) POLISHING.out.fasta.map{ meta, f -> meta } .combine(SEQTK_SUBSEQ_PRIMARY.out.sequences) @@ -150,7 +148,7 @@ workflow GENOMEASSEMBLY { // Separate alt contigs KEEP_SEQNAMES_HAPLOTIGS(PURGE_DUPS_ALT.out.pri) ch_versions = ch_versions.mix(KEEP_SEQNAMES_HAPLOTIGS.out.versions) - SEQTK_SUBSEQ_HAPLOTIGS(polished_fasta, KEEP_SEQNAMES_HAPLOTIGS.out.seqlist) + SEQTK_SUBSEQ_HAPLOTIGS(POLISHING.out.fasta, KEEP_SEQNAMES_HAPLOTIGS.out.seqlist) ch_versions = ch_versions.mix(SEQTK_SUBSEQ_HAPLOTIGS.out.versions) POLISHING.out.fasta.map{ meta, f -> meta } .combine(SEQTK_SUBSEQ_HAPLOTIGS.out.sequences) From c69753ba958dd339458c1a2c8336b31e3ea0ea2f Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Tue, 30 May 2023 14:51:09 +0100 Subject: [PATCH 27/30] Prettify --- conf/modules.config | 2 +- modules.json | 180 +++++++++++--------------------------------- 2 files changed, 46 insertions(+), 136 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5e6f9448..2e629a9b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -583,7 +583,7 @@ process { // Set up of the scffolding pipeline withName: 'YAHS' { - ext.prefix = 'out' + ext.prefix = 'out' publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index b390225e..72029f58 100644 --- a/modules.json +++ b/modules.json @@ -8,152 +8,110 @@ "bcftools/concat": { "branch": "master", "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/consensus": { "branch": "master", "git_sha": "fa12afdf5874c1d11e4a20efe81c97935e8eea24", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bedtools/bamtobed/bedtools-bamtobed.diff" }, "busco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/cat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/cload": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastk/fastk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastk/histex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "freebayes": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/freebayes/freebayes.diff" }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "643756685546fa61f5c8fba439af746c090b9180", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genescopefk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gfastats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "hifiasm": { "branch": "master", @@ -163,173 +121,125 @@ "merquryfk/merquryfk": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextmap": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextsnapshot": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/calcuts": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/purgedups/calcuts/purgedups-calcuts.diff" }, "purgedups/getseqs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/pbcstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/purgedups": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "purgedups/splitfa": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/collate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fastq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fixmate": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/markdup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "seqtk/subseq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "yahs": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } From eca213ad57beb82343f103fd64cd8537dbd6ddc5 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 1 Jun 2023 09:41:06 +0100 Subject: [PATCH 28/30] Update dataset for test full --- assets/test_gsMetZobe1.yaml | 14 ++++++++++++++ conf/modules.config | 2 +- conf/test_full.config | 9 +++++---- 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 assets/test_gsMetZobe1.yaml diff --git a/assets/test_gsMetZobe1.yaml b/assets/test_gsMetZobe1.yaml new file mode 100644 index 00000000..7c5ecf78 --- /dev/null +++ b/assets/test_gsMetZobe1.yaml @@ -0,0 +1,14 @@ +samples: + - id: gsMetZobe1 + illumina_10X: + reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/gsMetZobe1/10x/ + pacbio: + reads: + - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/gsMetZobe1/pacbio/m64125_200823_145825.ccs.bc1019_BAK8B_OA--bc1019_BAK8B_OA.filtered.fasta.gz + HiC: + reads: + - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/gsMetZobe1/hic-arima/35528_4#7.cram + arima_motif: GATC,GANTC,CTNAG,TTAA + busco: + lineages_path: /lustre/scratch123/tol/resources/busco/v5/ + lineage: fungi_odb10 diff --git a/conf/modules.config b/conf/modules.config index 2e629a9b..5bbaf730 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -653,7 +653,7 @@ process { withName: '.*GENOME_STATISTICS_SCAFFOLDS:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/out_scaffolds_final.ccs.merquryk" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/test_full.config b/conf/test_full.config index 24f7f73b..6ccbbe6a 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -14,11 +14,12 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 28 + max_memory = '100.GB' + max_time = '24.h' // Input data for full size test - input = 'assets/test_gfLaeSulp1.yaml' + input = 'assets/test_gsMetZobe1.yaml' polishing_on = true + hifiasm_hic_on = true } From 8d065fdf1f4857c83a80aa730546ac4e76d059fe Mon Sep 17 00:00:00 2001 From: Ksenia Date: Thu, 1 Jun 2023 14:51:12 +0100 Subject: [PATCH 29/30] Update modules/local/keep_seqnames.nf Co-authored-by: Priyanka Surana --- modules/local/keep_seqnames.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/keep_seqnames.nf b/modules/local/keep_seqnames.nf index 9c37403e..0ec58d8e 100644 --- a/modules/local/keep_seqnames.nf +++ b/modules/local/keep_seqnames.nf @@ -20,6 +20,7 @@ process KEEP_SEQNAMES { def prefix = meta.prefix ?: '' """ grep '>' $fasta | cut -f1 | sed 's/>//' > seq.lst + cat <<-END_VERSIONS > versions.yml "${task.process}": cut: \$(cut --version | head -n 1 | awk '{print \$NF}') From d9e98c3fea81f38f829793be8a0df7775b209534 Mon Sep 17 00:00:00 2001 From: Ksenia Krasheninnikova Date: Thu, 1 Jun 2023 15:21:32 +0100 Subject: [PATCH 30/30] Update to the pipeline info naming --- conf/modules.config | 2 +- docs/output.md | 2 +- lib/NfcoreTemplate.groovy | 2 +- nextflow.config | 2 +- nextflow_schema.json | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5bbaf730..4d98dbf2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -20,7 +20,7 @@ process { withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/genomeassembly_info" }, mode: params.publish_dir_mode, pattern: '*_versions.yml' ] diff --git a/docs/output.md b/docs/output.md index da276a1e..64968fda 100644 --- a/docs/output.md +++ b/docs/output.md @@ -58,7 +58,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files -- `pipeline_info/` +- `genomeassembly_info/` - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 90dbdc0f..cbf667ce 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -135,7 +135,7 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") + def output_d = new File("${params.outdir}/genomeassembly_info/") if (!output_d.exists()) { output_d.mkdirs() } diff --git a/nextflow.config b/nextflow.config index ed61a841..bbdfa995 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,7 +32,7 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" + tracedir = "${params.outdir}/genomeassembly_info" publish_dir_mode = 'copy' email = null email_on_fail = null diff --git a/nextflow_schema.json b/nextflow_schema.json index c7ed0592..050e256c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -206,7 +206,7 @@ "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", + "default": "${params.outdir}/genomeassembly_info", "fa_icon": "fas fa-cogs", "hidden": true },