Skip to content

Commit

Permalink
add consensus masking
Browse files Browse the repository at this point in the history
  • Loading branch information
rmcolq committed Sep 22, 2023
1 parent af60d54 commit 3e77c2d
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 8 deletions.
3 changes: 2 additions & 1 deletion conf/params.config
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ params {

reference_segment_sep = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
reference_min_count = 10
mask_depth = 20

assembler = null

Expand All @@ -73,6 +74,6 @@ params {
]
agent = null
container = "rmcolq/pantheon"
container_sha = "sha256:7b547c61219e9b6c5821cf9a9b50ca184613608f172fc7ae156de9b5a142f19a"
container_sha = "sha256:6f08c4c419929666b62e8b0699c588d23c22ebe18803fdcf207f24d6e4c68631"
}
}
5 changes: 5 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,9 @@ dependencies:
- tabix
- mako
- entrez-direct
- minimap2
- samtools=1.17
- bedtools
- pip:
- git+https://github.com/Desperate-Dan/maskara.git

71 changes: 71 additions & 0 deletions modules/assemble_taxa.nf
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,74 @@ process medaka_consensus {
mv medaka_output/consensus.fasta "assembly_${taxon_id}.fa"
"""
}

process map_to_consensus {

label 'process_low'

conda 'bioconda::minimmap2=v2.26 bioconda::samtools=v1.17'
container "${params.wf.container}@${params.wf.container_sha}"

input:
tuple val(taxon_id), val(barcode_id), path(reads), path(assembly)
output:
tuple val(taxon_id), val(barcode_id), path(reads), path(assembly), path("out.bam"), path("out.bam.csi")
script:
"""
minimap2 -x map-ont -a ${assembly} ${reads} | samtools sort -o out.bam --write-index -
"""
}

process define_mask {

label 'process_low'

container "${params.wf.container}@${params.wf.container_sha}"

input:
tuple val(taxon_id), val(barcode_id), path(reads), path(assembly), path(bam), path(index)
output:
tuple val(taxon_id), val(barcode_id), path(reads), path(assembly), path("mask.bed.tsv")
script:
"""
str=\$(head -n1 ${assembly})
ref="\${str: 1}"
maskara -d ${params.mask_depth} -r \$ref -o mask.bed ${bam}
"""
}

process apply_mask {

label 'process_low'

publishDir path: "${params.outdir}/${barcode_id}/assemblies", mode: 'copy', pattern: 'assembly_*.fa'

conda 'bioconda::bedtools=v2.31.0'
container "${params.wf.container}@${params.wf.container_sha}"

input:
tuple val(taxon_id), val(barcode_id), path(reads), path(assembly), path(mask)
output:
tuple val(taxon_id), val(barcode_id), path(reads), path("assembly_${taxon_id}.masked.fa")
script:
"""
bedtools maskfasta -fi ${assembly} -bed ${mask} -fo "assembly_${taxon_id}.masked.fa"
"""
}

workflow assemble {
take:
unique_id
ch_assembly
main:
subset_references(ch_assembly)
check_subset(subset_references.out.all)
medaka_consensus(check_subset.out)
map_to_consensus(medaka_consensus.out)
define_mask(map_to_consensus.out)
apply_mask(define_mask.out)
subset_references.out.summary.collectFile(name: "${params.outdir}/${unique_id}/reference_by_barcode.csv", keepHeader: true, skip:1).set{ assembly_summary }
emit:
completed = apply_mask.out
summary = assembly_summary
}
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ manifest {
description = 'Investigate and assemble metagenomic sequence data from human respiratory infections.'
mainScript = 'main.nf'
nextflowVersion = '>=20.10.0'
version = 'v0.5.0'
version = 'v0.5.1'
}

profiles {
Expand Down
9 changes: 3 additions & 6 deletions subworkflows/process_run.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// workflow to extract reads and create assemblies from a wf-metagenomic or scylla run
include { move_or_compress; unpack_taxonomy; extract_reads; check_reads; taxid_from_name; download_references_by_taxid; filter_references } from '../modules/preprocess'
include { subset_references; check_subset; medaka_consensus } from '../modules/assemble_taxa'
include { assemble } from '../modules/assemble_taxa'
include { generate_assembly_report } from '../modules/generate_report'

EXTENSIONS = ["fastq", "fastq.gz", "fq", "fq.gz"]
Expand Down Expand Up @@ -69,11 +69,8 @@ workflow process_run {
.set{ ch_reads }

ch_assembly = ch_reads.combine(ch_references, by: 0)
subset_references(ch_assembly)
check_subset(subset_references.out.all)
medaka_consensus(check_subset.out)
subset_references.out.summary.collectFile(name: "${params.outdir}/${unique_id}/reference_by_barcode.csv", keepHeader: true, skip:1).set{ assembly_summary }
assemble(unique_id, ch_assembly)

generate_assembly_report(unique_id, input_summary, reference_summary, extract_summary, assembly_summary)
generate_assembly_report(unique_id, input_summary, reference_summary, extract_summary, assemble.out.summary)
}
}

0 comments on commit 3e77c2d

Please sign in to comment.