Skip to content

Commit

Permalink
Refactor rules for cleaning previous outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
sposadac committed Apr 6, 2020
1 parent 2ef84bc commit 92f628a
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 94 deletions.
57 changes: 0 additions & 57 deletions rules/align.smk
Original file line number Diff line number Diff line change
Expand Up @@ -209,18 +209,6 @@ rule create_denovo_initial:
sed -i -e "s/>.*/>${{CONSENSUS_NAME}}/" {output}
"""

rule vicunaclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/initial_consensus
rm -rf {params.DIR}/*/*/references/vicuna_consensus.fasta
rm -rf {params.DIR}/*/*/references/initial_consensus.fasta
rm -rf references/initial_aln.fasta
rm -rf references/initial_aln_gap_removed.fasta
rm -rf references/MAFFT_initial_aln.*
"""

# change this to switch between VICUNA and creating a simple initial
# initial reference
Expand Down Expand Up @@ -348,13 +336,6 @@ rule msa:
rm ALL_{wildcards.kind}.fasta
"""

rule msaclean:
shell:
"""
rm -rf references/ALL_aln_*.fasta
rm -rf references/MAFFT_*_cohort.*
"""


# 4. convert alignments to REF alignment
def get_reference_name(wildcards):
Expand Down Expand Up @@ -396,18 +377,6 @@ rule convert_to_ref:
"""


rule alignclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/alignments
rm -rf {params.DIR}/*/*/QA_alignments
rm -rf {params.DIR}/*/*/references/ref_ambig.fasta
rm -rf {params.DIR}/*/*/references/ref_majority.fasta
rm -rf {params.DIR}/*/*/references/initial_consensus.fasta
"""

# 2-4. Alternative: align reads using bwa or bowtie
if config.general["aligner"] == "bwa":
rule ref_bwa_index:
Expand Down Expand Up @@ -571,32 +540,6 @@ elif config.general["aligner"] == "bowtie":
rm {params.TMP_SAM}
"""

rule bwaclean:
input:
"{}.bwt".format(reference_file)
params:
DIR = config.input['datadir']
shell:
"""
rm -f {input}
rm -rf {params.DIR}/*/*/alignments
"""

rule bowtieclean:
input:
INDEX1 = "{}.1.bt2".format(reference_file),
INDEX2 = "{}.2.bt2".format(reference_file),
INDEX3 = "{}.3.bt2".format(reference_file),
INDEX4 = "{}.4.bt2".format(reference_file),
INDEX5 = "{}.rev.1.bt2".format(reference_file),
INDEX6 = "{}.rev.2.bt2".format(reference_file)
params:
DIR = config.input['datadir']
shell:
"""
rm -f {input}
rm -rf {params.DIR}/*/*/alignments
"""


rule consensus_sequences:
Expand Down
112 changes: 112 additions & 0 deletions rules/clean.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
rule extractclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/extracted_data
"""


rule trimmingclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/preprocessed_data
"""


rule vicunaclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/initial_consensus
rm -rf {params.DIR}/*/*/references/vicuna_consensus.fasta
rm -rf {params.DIR}/*/*/references/initial_consensus.fasta
rm -rf references/initial_aln.fasta
rm -rf references/initial_aln_gap_removed.fasta
rm -rf references/MAFFT_initial_aln.*
"""


rule msaclean:
shell:
"""
rm -rf references/ALL_aln_*.fasta
rm -rf references/MAFFT_*_cohort.*
"""


rule alignclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/alignments
rm -rf {params.DIR}/*/*/QA_alignments
rm -rf {params.DIR}/*/*/references/ref_ambig.fasta
rm -rf {params.DIR}/*/*/references/ref_majority.fasta
rm -rf {params.DIR}/*/*/references/initial_consensus.fasta
"""


rule bwaclean:
input:
"{}.bwt".format(reference_file)
params:
DIR = config.input['datadir']
shell:
"""
rm -f {input}
rm -rf {params.DIR}/*/*/alignments
rm -rf {params.DIR}/*/*/references/ref_ambig*.fasta
rm -rf {params.DIR}/*/*/references/ref_majority*.fasta
"""


rule bowtieclean:
input:
INDEX1 = "{}.1.bt2".format(reference_file),
INDEX2 = "{}.2.bt2".format(reference_file),
INDEX3 = "{}.3.bt2".format(reference_file),
INDEX4 = "{}.4.bt2".format(reference_file),
INDEX5 = "{}.rev.1.bt2".format(reference_file),
INDEX6 = "{}.rev.2.bt2".format(reference_file)
params:
DIR = config.input['datadir']
shell:
"""
rm -f {input}
rm -rf {params.DIR}/*/*/alignments
rm -rf {params.DIR}/*/*/references/ref_ambig*.fasta
rm -rf {params.DIR}/*/*/references/ref_majority*.fasta
"""


rule snvclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/variants/SNVs
"""


rule savageclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/variants/global/contigs_stage_?.fasta
rm -rf {params.DIR}/*/*/variants/global/stage_?
"""


rule haplocliqueclean:
params:
DIR = config.input['datadir']
shell:
"""
rm {params.DIR}/*/*/variants/global/quasispecies.*
"""
15 changes: 0 additions & 15 deletions rules/haplotypes.smk
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,6 @@ rule haploclique_visualization:
{params.COMPUTE_MDS} -q {params.INPREFIX} -s {params.REGION_START} -e {params.REGION_END} {params.USE_MSA} {params.MSA} -p {output.PDF} -o {params.TSV} > {log.output} 2> >(tee {log.errfile} >&2)
"""

rule haplocliqueclean:
params:
DIR = config.input['datadir']
shell:
"""
rm {params.DIR}/*/*/variants/global/quasispecies.*
"""

if config.input['paired']:
rule savage:
Expand Down Expand Up @@ -141,12 +134,4 @@ else:
{params.SAVAGE} -t {threads} --split {params.SPLIT} -s ${{R1}} -o {params.OUTDIR} 2> >(tee -a {log.errfile} >&2)
"""

rule savageclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/variants/global/contigs_stage_?.fasta
rm -rf {params.DIR}/*/*/variants/global/stage_?
"""

15 changes: 0 additions & 15 deletions rules/quality_assurance.smk
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,6 @@ rule extract:
cat {input} | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > {output} 2> >(tee {log.errfile} >&2)
"""

rule extractclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/extracted_data
"""


# 2. clipping
def len_cutoff(wildcards):
Expand Down Expand Up @@ -175,11 +167,4 @@ else:
gzip {wildcards.dataset}/preprocessed_data/R1.fastq
"""

rule trimmingclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/preprocessed_data
"""

7 changes: 0 additions & 7 deletions rules/snv.smk
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,6 @@ rule snv:
fi
"""

rule snvclean:
params:
DIR = config.input['datadir']
shell:
"""
rm -rf {params.DIR}/*/*/variants/SNVs
"""

rule lofreq:
input:
Expand Down
1 change: 1 addition & 0 deletions vpipe.snake
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ rule alltrimmed:
trimmed_files


include: "rules/clean.smk"
include: "rules/quality_assurance.smk"
include: "rules/align.smk"
include: "rules/mafs.smk"
Expand Down

0 comments on commit 92f628a

Please sign in to comment.