forked from github-linguist/linguist
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add file extension for SnakeMake (github-linguist#3953)
* Add file extension for SnakeMake Previously a file name was defined for [SnakeMake[(snakemake-wrappers.readthedocs.io): github-linguist#1834 Currently, the canonical extension is `smk` (see [this discussion](https://groups.google.com/forum/#!topic/Snakemake/segLE-RlV_s) with the author (@johanneskoester) of SnakeMake, and the [FAQ](http://snakemake.readthedocs.io/en/stable/project_info/faq.html#how-do-i-enable-syntax-highlighting-in-vim-for-snakefiles)). * Adding two Snakemake (smk) example files
- Loading branch information
1 parent
d428563
commit e8663fb
Showing
3 changed files
with
159 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4140,6 +4140,7 @@ Python: | |
- ".pyt" | ||
- ".pyw" | ||
- ".rpy" | ||
- ".smk" | ||
- ".spec" | ||
- ".tac" | ||
- ".wsgi" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/calling.smk | ||
# Accessed: Jan 10 2020 by Nils Homer | ||
# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE) | ||
|
||
if "restrict-regions" in config["processing"]: | ||
rule compose_regions: | ||
input: | ||
config["processing"]["restrict-regions"] | ||
output: | ||
"called/{contig}.regions.bed" | ||
conda: | ||
"../envs/bedops.yaml" | ||
shell: | ||
"bedextract {wildcards.contig} {input} > {output}" | ||
|
||
|
||
rule call_variants: | ||
input: | ||
bam=get_sample_bams, | ||
ref=config["ref"]["genome"], | ||
known=config["ref"]["known-variants"], | ||
regions="called/{contig}.regions.bed" if config["processing"].get("restrict-regions") else [] | ||
output: | ||
gvcf=protected("called/{sample}.{contig}.g.vcf.gz") | ||
log: | ||
"logs/gatk/haplotypecaller/{sample}.{contig}.log" | ||
params: | ||
extra=get_call_variants_params | ||
wrapper: | ||
"0.27.1/bio/gatk/haplotypecaller" | ||
|
||
|
||
rule combine_calls: | ||
input: | ||
ref=config["ref"]["genome"], | ||
gvcfs=expand("called/{sample}.{{contig}}.g.vcf.gz", sample=samples.index) | ||
output: | ||
gvcf="called/all.{contig}.g.vcf.gz" | ||
log: | ||
"logs/gatk/combinegvcfs.{contig}.log" | ||
wrapper: | ||
"0.27.1/bio/gatk/combinegvcfs" | ||
|
||
|
||
rule genotype_variants: | ||
input: | ||
ref=config["ref"]["genome"], | ||
gvcf="called/all.{contig}.g.vcf.gz" | ||
output: | ||
vcf=temp("genotyped/all.{contig}.vcf.gz") | ||
params: | ||
extra=config["params"]["gatk"]["GenotypeGVCFs"] | ||
log: | ||
"logs/gatk/genotypegvcfs.{contig}.log" | ||
wrapper: | ||
"0.27.1/bio/gatk/genotypegvcfs" | ||
|
||
|
||
rule merge_variants: | ||
input: | ||
ref=get_fai(), # fai is needed to calculate aggregation over contigs below | ||
vcfs=lambda w: expand("genotyped/all.{contig}.vcf.gz", contig=get_contigs()), | ||
output: | ||
vcf="genotyped/all.vcf.gz" | ||
log: | ||
"logs/picard/merge-genotyped.log" | ||
wrapper: | ||
"0.40.2/bio/picard/mergevcfs" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/mapping.smk | ||
# Accessed: Jan 10 2020 by Nils Homer | ||
# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE) | ||
|
||
rule trim_reads_se: | ||
input: | ||
unpack(get_fastq) | ||
output: | ||
temp("trimmed/{sample}-{unit}.fastq.gz") | ||
params: | ||
extra="", | ||
**config["params"]["trimmomatic"]["se"] | ||
log: | ||
"logs/trimmomatic/{sample}-{unit}.log" | ||
wrapper: | ||
"0.30.0/bio/trimmomatic/se" | ||
|
||
|
||
rule trim_reads_pe: | ||
input: | ||
unpack(get_fastq) | ||
output: | ||
r1=temp("trimmed/{sample}-{unit}.1.fastq.gz"), | ||
r2=temp("trimmed/{sample}-{unit}.2.fastq.gz"), | ||
r1_unpaired=temp("trimmed/{sample}-{unit}.1.unpaired.fastq.gz"), | ||
r2_unpaired=temp("trimmed/{sample}-{unit}.2.unpaired.fastq.gz"), | ||
trimlog="trimmed/{sample}-{unit}.trimlog.txt" | ||
params: | ||
extra=lambda w, output: "-trimlog {}".format(output.trimlog), | ||
**config["params"]["trimmomatic"]["pe"] | ||
log: | ||
"logs/trimmomatic/{sample}-{unit}.log" | ||
wrapper: | ||
"0.30.0/bio/trimmomatic/pe" | ||
|
||
|
||
rule map_reads: | ||
input: | ||
reads=get_trimmed_reads | ||
output: | ||
temp("mapped/{sample}-{unit}.sorted.bam") | ||
log: | ||
"logs/bwa_mem/{sample}-{unit}.log" | ||
params: | ||
index=config["ref"]["genome"], | ||
extra=get_read_group, | ||
sort="samtools", | ||
sort_order="coordinate" | ||
threads: 8 | ||
wrapper: | ||
"0.27.1/bio/bwa/mem" | ||
|
||
|
||
rule mark_duplicates: | ||
input: | ||
"mapped/{sample}-{unit}.sorted.bam" | ||
output: | ||
bam=temp("dedup/{sample}-{unit}.bam"), | ||
metrics="qc/dedup/{sample}-{unit}.metrics.txt" | ||
log: | ||
"logs/picard/dedup/{sample}-{unit}.log" | ||
params: | ||
config["params"]["picard"]["MarkDuplicates"] | ||
wrapper: | ||
"0.26.1/bio/picard/markduplicates" | ||
|
||
|
||
rule recalibrate_base_qualities: | ||
input: | ||
bam=get_recal_input(), | ||
bai=get_recal_input(bai=True), | ||
ref=config["ref"]["genome"], | ||
known=config["ref"]["known-variants"] | ||
output: | ||
bam=protected("recal/{sample}-{unit}.bam") | ||
params: | ||
extra=get_regions_param() + config["params"]["gatk"]["BaseRecalibrator"] | ||
log: | ||
"logs/gatk/bqsr/{sample}-{unit}.log" | ||
wrapper: | ||
"0.27.1/bio/gatk/baserecalibrator" | ||
|
||
|
||
rule samtools_index: | ||
input: | ||
"{prefix}.bam" | ||
output: | ||
"{prefix}.bam.bai" | ||
wrapper: | ||
"0.27.1/bio/samtools/index" |