Skip to content

Commit

Permalink
Add file extension for SnakeMake (github-linguist#3953)
Browse files Browse the repository at this point in the history
* Add file extension for SnakeMake

Previously a file name was defined for [SnakeMake[(snakemake-wrappers.readthedocs.io): github-linguist#1834

Currently, the canonical extension is `smk` (see [this discussion](https://groups.google.com/forum/#!topic/Snakemake/segLE-RlV_s) with the author (@johanneskoester) of SnakeMake, and the [FAQ](http://snakemake.readthedocs.io/en/stable/project_info/faq.html#how-do-i-enable-syntax-highlighting-in-vim-for-snakefiles)).

* Adding two Snakemake (smk) example files
  • Loading branch information
nh13 authored and ayoubserti committed Jan 22, 2020
1 parent d428563 commit e8663fb
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/linguist/languages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4140,6 +4140,7 @@ Python:
- ".pyt"
- ".pyw"
- ".rpy"
- ".smk"
- ".spec"
- ".tac"
- ".wsgi"
Expand Down
68 changes: 68 additions & 0 deletions samples/Python/snakemake-calling.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/calling.smk
# Accessed: Jan 10 2020 by Nils Homer
# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE)

if "restrict-regions" in config["processing"]:
rule compose_regions:
input:
config["processing"]["restrict-regions"]
output:
"called/{contig}.regions.bed"
conda:
"../envs/bedops.yaml"
shell:
"bedextract {wildcards.contig} {input} > {output}"


rule call_variants:
input:
bam=get_sample_bams,
ref=config["ref"]["genome"],
known=config["ref"]["known-variants"],
regions="called/{contig}.regions.bed" if config["processing"].get("restrict-regions") else []
output:
gvcf=protected("called/{sample}.{contig}.g.vcf.gz")
log:
"logs/gatk/haplotypecaller/{sample}.{contig}.log"
params:
extra=get_call_variants_params
wrapper:
"0.27.1/bio/gatk/haplotypecaller"


rule combine_calls:
input:
ref=config["ref"]["genome"],
gvcfs=expand("called/{sample}.{{contig}}.g.vcf.gz", sample=samples.index)
output:
gvcf="called/all.{contig}.g.vcf.gz"
log:
"logs/gatk/combinegvcfs.{contig}.log"
wrapper:
"0.27.1/bio/gatk/combinegvcfs"


rule genotype_variants:
input:
ref=config["ref"]["genome"],
gvcf="called/all.{contig}.g.vcf.gz"
output:
vcf=temp("genotyped/all.{contig}.vcf.gz")
params:
extra=config["params"]["gatk"]["GenotypeGVCFs"]
log:
"logs/gatk/genotypegvcfs.{contig}.log"
wrapper:
"0.27.1/bio/gatk/genotypegvcfs"


rule merge_variants:
input:
ref=get_fai(), # fai is needed to calculate aggregation over contigs below
vcfs=lambda w: expand("genotyped/all.{contig}.vcf.gz", contig=get_contigs()),
output:
vcf="genotyped/all.vcf.gz"
log:
"logs/picard/merge-genotyped.log"
wrapper:
"0.40.2/bio/picard/mergevcfs"
90 changes: 90 additions & 0 deletions samples/Python/snakemake-mapping.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/mapping.smk
# Accessed: Jan 10 2020 by Nils Homer
# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE)

rule trim_reads_se:
input:
unpack(get_fastq)
output:
temp("trimmed/{sample}-{unit}.fastq.gz")
params:
extra="",
**config["params"]["trimmomatic"]["se"]
log:
"logs/trimmomatic/{sample}-{unit}.log"
wrapper:
"0.30.0/bio/trimmomatic/se"


rule trim_reads_pe:
input:
unpack(get_fastq)
output:
r1=temp("trimmed/{sample}-{unit}.1.fastq.gz"),
r2=temp("trimmed/{sample}-{unit}.2.fastq.gz"),
r1_unpaired=temp("trimmed/{sample}-{unit}.1.unpaired.fastq.gz"),
r2_unpaired=temp("trimmed/{sample}-{unit}.2.unpaired.fastq.gz"),
trimlog="trimmed/{sample}-{unit}.trimlog.txt"
params:
extra=lambda w, output: "-trimlog {}".format(output.trimlog),
**config["params"]["trimmomatic"]["pe"]
log:
"logs/trimmomatic/{sample}-{unit}.log"
wrapper:
"0.30.0/bio/trimmomatic/pe"


rule map_reads:
input:
reads=get_trimmed_reads
output:
temp("mapped/{sample}-{unit}.sorted.bam")
log:
"logs/bwa_mem/{sample}-{unit}.log"
params:
index=config["ref"]["genome"],
extra=get_read_group,
sort="samtools",
sort_order="coordinate"
threads: 8
wrapper:
"0.27.1/bio/bwa/mem"


rule mark_duplicates:
input:
"mapped/{sample}-{unit}.sorted.bam"
output:
bam=temp("dedup/{sample}-{unit}.bam"),
metrics="qc/dedup/{sample}-{unit}.metrics.txt"
log:
"logs/picard/dedup/{sample}-{unit}.log"
params:
config["params"]["picard"]["MarkDuplicates"]
wrapper:
"0.26.1/bio/picard/markduplicates"


rule recalibrate_base_qualities:
input:
bam=get_recal_input(),
bai=get_recal_input(bai=True),
ref=config["ref"]["genome"],
known=config["ref"]["known-variants"]
output:
bam=protected("recal/{sample}-{unit}.bam")
params:
extra=get_regions_param() + config["params"]["gatk"]["BaseRecalibrator"]
log:
"logs/gatk/bqsr/{sample}-{unit}.log"
wrapper:
"0.27.1/bio/gatk/baserecalibrator"


rule samtools_index:
input:
"{prefix}.bam"
output:
"{prefix}.bam.bai"
wrapper:
"0.27.1/bio/samtools/index"

0 comments on commit e8663fb

Please sign in to comment.