-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from sanger-tol/update_hic_mapping
Update hic mapping
- Loading branch information
Showing
32 changed files
with
521 additions
and
556 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/bin/bash | ||
|
||
# | ||
# Based on https://github.com/sanger-tol/treeval/blob/80554a803903183613d49690d5770eeadb3c42c9/bin/generate_cram_csv.sh | ||
# from Sanger TOL treeval pipeline | ||
# | ||
|
||
#cram_path=$1 | ||
chunkn=0 | ||
#for cram in ${cram_path}/*.cram; do | ||
for cram in "$@"; do | ||
|
||
rgline=$(samtools view -H $cram|grep "RG"|sed 's/\t/\\t/g'|sed "s/'//g") | ||
|
||
crampath=$(readlink -f ${cram}) | ||
|
||
ncontainers=$(zcat ${crampath}.crai|wc -l) | ||
base=$(basename $cram .cram) | ||
|
||
from=0 | ||
to=10000 | ||
|
||
|
||
while [ $to -lt $ncontainers ] | ||
do | ||
echo $crampath,${crampath}.crai,${from},${to},${base},${chunkn},${rgline} | ||
from=$((to+1)) | ||
((to+=10000)) | ||
((chunkn++)) | ||
done | ||
|
||
if [ $from -le $ncontainers ] | ||
then | ||
echo $crampath,${crampath}.crai,${from},${ncontainers},${base},${chunkn},${rgline} | ||
((chunkn++)) | ||
fi | ||
done | ||
|
||
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
Nextflow config file for running full-size tests | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
Defines input files and everything required to run a full size pipeline test. | ||
Use as follows: | ||
nextflow run sanger-tol/genomeassembly -profile test_full,<docker/singularity> --outdir <OUTDIR> | ||
---------------------------------------------------------------------------------------- | ||
*/ | ||
|
||
params { | ||
config_profile_name = 'Full test profile' | ||
config_profile_description = 'Full test dataset to check pipeline function' | ||
|
||
max_cpus = 28 | ||
max_memory = '100.GB' | ||
max_time = '24.h' | ||
|
||
// Input data for full size test | ||
input = 'assets/test_gsMetZobe1.yaml' | ||
polishing_on = true | ||
hifiasm_hic_on = true | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// | ||
// Copied from https://github.com/sanger-tol/treeval/blob/28309b7a1faf3aee5627f497c7cfa62d12ac65b8/modules/local/bamtobed_sort.nf | ||
// from Sanger TOL treeval pipeline | ||
// | ||
|
||
|
||
process BAMTOBED_SORT { | ||
tag "$meta.id" | ||
label "process_high" | ||
|
||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' : | ||
'biocontainers/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' }" | ||
|
||
input: | ||
tuple val(meta), path(bam) | ||
|
||
output: | ||
tuple val(meta), path("*.bed"), emit: sorted_bed | ||
path "versions.yml" , emit: versions | ||
|
||
script: | ||
def prefix = args.ext.prefix ?: "${meta.id}" | ||
def st_cores = task.cpus > 4 ? 4 : "${task.cpus}" | ||
def buffer_mem = task.memory.toGiga() / 2 | ||
""" | ||
samtools view -@${st_cores} -u -F0x400 ${bam} | bamToBed | sort -k4 --parallel=${task.cpus} -S ${buffer_mem}G > ${prefix}_merged_sorted.bed | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') | ||
bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def prefix = args.ext.prefix ?: "${meta.id}" | ||
""" | ||
touch ${prefix}_merged_sorted.bed | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') | ||
bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// | ||
// Copied from https://github.com/sanger-tol/treeval/blob/28309b7a1faf3aee5627f497c7cfa62d12ac65b8/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf | ||
// from Sanger TOL treeval pipeline | ||
// | ||
|
||
process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { | ||
tag "$meta.id" | ||
label "process_high" | ||
|
||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' : | ||
'biocontainers/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' }" | ||
|
||
input: | ||
tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix) | ||
|
||
output: | ||
tuple val(meta), path("*.bam"), emit: mappedbam | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def args1 = task.ext.args1 ?: '' | ||
def args2 = task.ext.args2 ?: '' | ||
def args3 = task.ext.args3 ?: '' | ||
def args4 = task.ext.args4 ?: '' | ||
def args5 = task.ext.args5 ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
// Please be aware one of the tools here required mem = 28 * reference size!!! | ||
""" | ||
cram_filter -n ${from}-${to} ${cramfile} - | \\ | ||
samtools fastq ${args1} | \\ | ||
bwa-mem2 mem -p ${bwaprefix} -t${task.cpus} -5SPCp -H'${rglines}' - | \\ | ||
samtools fixmate ${args3} - - | \\ | ||
samtools view -bh ${args4} - | \\ | ||
samtools sort ${args5} -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mem.bam - | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) | ||
bwa-mem2: \$(bwa-mem2 version | sed 's/bwa-mem2 //g') | ||
END_VERSIONS | ||
""" | ||
// temp removal staden_io_lib: \$(echo \$(staden_io_lib --version 2>&1) | sed 's/^.*staden_io_lib //; s/Using.*\$//') CAUSES ERROR | ||
|
||
stub: | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def base = "45022_3#2" | ||
def chunkid = "1" | ||
""" | ||
touch ${prefix}_${base}_${chunkid}_mem.bam | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) | ||
bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// | ||
// Based on https://github.com/sanger-tol/treeval/blob/28309b7a1faf3aee5627f497c7cfa62d12ac65b8/modules/local/generate_cram_csv.nf | ||
// from Sanger TOL treeval pipeline | ||
// | ||
|
||
process GENERATE_CRAM_CSV { | ||
tag "${meta.id}" | ||
label 'process_low' | ||
|
||
conda "bioconda::samtools=1.17" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : | ||
'biocontainers/samtools:1.17--h00cdaf9_0' }" | ||
|
||
input: | ||
tuple val(meta), path(crampaths, stageAs: "?/*") | ||
|
||
|
||
output: | ||
tuple val(meta), path('*.csv'), emit: csv | ||
path "versions.yml", emit: versions | ||
|
||
script: | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
generate_cram_csv.sh $crampaths >> ${prefix}_cram.csv | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
""" | ||
touch ${meta.id}.csv | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) | ||
END_VERSIONS | ||
""" | ||
} |
Oops, something went wrong.