diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 32148b4b1f..866b02d756 100755 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -3605,6 +3605,7 @@ Python: - ".pyt" - ".pyw" - ".rpy" + - ".smk" - ".spec" - ".tac" - ".wsgi" diff --git a/samples/Python/snakemake-calling.smk b/samples/Python/snakemake-calling.smk new file mode 100644 index 0000000000..886d51a1b5 --- /dev/null +++ b/samples/Python/snakemake-calling.smk @@ -0,0 +1,68 @@ +# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/calling.smk +# Accessed: Jan 10 2020 by Nils Homer +# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE) + +if "restrict-regions" in config["processing"]: + rule compose_regions: + input: + config["processing"]["restrict-regions"] + output: + "called/{contig}.regions.bed" + conda: + "../envs/bedops.yaml" + shell: + "bedextract {wildcards.contig} {input} > {output}" + + +rule call_variants: + input: + bam=get_sample_bams, + ref=config["ref"]["genome"], + known=config["ref"]["known-variants"], + regions="called/{contig}.regions.bed" if config["processing"].get("restrict-regions") else [] + output: + gvcf=protected("called/{sample}.{contig}.g.vcf.gz") + log: + "logs/gatk/haplotypecaller/{sample}.{contig}.log" + params: + extra=get_call_variants_params + wrapper: + "0.27.1/bio/gatk/haplotypecaller" + + +rule combine_calls: + input: + ref=config["ref"]["genome"], + gvcfs=expand("called/{sample}.{{contig}}.g.vcf.gz", sample=samples.index) + output: + gvcf="called/all.{contig}.g.vcf.gz" + log: + "logs/gatk/combinegvcfs.{contig}.log" + wrapper: + "0.27.1/bio/gatk/combinegvcfs" + + +rule genotype_variants: + input: + ref=config["ref"]["genome"], + gvcf="called/all.{contig}.g.vcf.gz" + output: + vcf=temp("genotyped/all.{contig}.vcf.gz") + params: + extra=config["params"]["gatk"]["GenotypeGVCFs"] + log: + "logs/gatk/genotypegvcfs.{contig}.log" + wrapper: + "0.27.1/bio/gatk/genotypegvcfs" + + +rule merge_variants: + input: + ref=get_fai(), # fai is needed to calculate aggregation over contigs below + vcfs=lambda w: expand("genotyped/all.{contig}.vcf.gz", contig=get_contigs()), + output: + vcf="genotyped/all.vcf.gz" + log: + "logs/picard/merge-genotyped.log" + wrapper: + "0.40.2/bio/picard/mergevcfs" diff --git a/samples/Python/snakemake-mapping.smk b/samples/Python/snakemake-mapping.smk new file mode 100644 index 0000000000..e48bb27fb7 --- /dev/null +++ b/samples/Python/snakemake-mapping.smk @@ -0,0 +1,90 @@ +# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/mapping.smk +# Accessed: Jan 10 2020 by Nils Homer +# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE) + +rule trim_reads_se: + input: + unpack(get_fastq) + output: + temp("trimmed/{sample}-{unit}.fastq.gz") + params: + extra="", + **config["params"]["trimmomatic"]["se"] + log: + "logs/trimmomatic/{sample}-{unit}.log" + wrapper: + "0.30.0/bio/trimmomatic/se" + + +rule trim_reads_pe: + input: + unpack(get_fastq) + output: + r1=temp("trimmed/{sample}-{unit}.1.fastq.gz"), + r2=temp("trimmed/{sample}-{unit}.2.fastq.gz"), + r1_unpaired=temp("trimmed/{sample}-{unit}.1.unpaired.fastq.gz"), + r2_unpaired=temp("trimmed/{sample}-{unit}.2.unpaired.fastq.gz"), + trimlog="trimmed/{sample}-{unit}.trimlog.txt" + params: + extra=lambda w, output: "-trimlog {}".format(output.trimlog), + **config["params"]["trimmomatic"]["pe"] + log: + "logs/trimmomatic/{sample}-{unit}.log" + wrapper: + "0.30.0/bio/trimmomatic/pe" + + +rule map_reads: + input: + reads=get_trimmed_reads + output: + temp("mapped/{sample}-{unit}.sorted.bam") + log: + "logs/bwa_mem/{sample}-{unit}.log" + params: + index=config["ref"]["genome"], + extra=get_read_group, + sort="samtools", + sort_order="coordinate" + threads: 8 + wrapper: + "0.27.1/bio/bwa/mem" + + +rule mark_duplicates: + input: + "mapped/{sample}-{unit}.sorted.bam" + output: + bam=temp("dedup/{sample}-{unit}.bam"), + metrics="qc/dedup/{sample}-{unit}.metrics.txt" + log: + "logs/picard/dedup/{sample}-{unit}.log" + params: + config["params"]["picard"]["MarkDuplicates"] + wrapper: + "0.26.1/bio/picard/markduplicates" + + +rule recalibrate_base_qualities: + input: + bam=get_recal_input(), + bai=get_recal_input(bai=True), + ref=config["ref"]["genome"], + known=config["ref"]["known-variants"] + output: + bam=protected("recal/{sample}-{unit}.bam") + params: + extra=get_regions_param() + config["params"]["gatk"]["BaseRecalibrator"] + log: + "logs/gatk/bqsr/{sample}-{unit}.log" + wrapper: + "0.27.1/bio/gatk/baserecalibrator" + + +rule samtools_index: + input: + "{prefix}.bam" + output: + "{prefix}.bam.bai" + wrapper: + "0.27.1/bio/samtools/index"