From 1753b8432bf7558ec378ee48fd1f5efd7905c4ac Mon Sep 17 00:00:00 2001
From: Nicolai-vKuegelgen <nicolai.von-kuegelgen@bih-charite.de>
Date: Thu, 22 Aug 2024 10:40:56 +0200
Subject: [PATCH] snakefmt

---
 stemcnv_check/app/make_staticdata.py          |   2 +-
 .../control_files/default_config.yaml         |   3 -
 stemcnv_check/rules/SNP_processing.smk        | 138 ++++++-----
 stemcnv_check/rules/StemCNV-check.smk         | 220 +++++++++++-------
 stemcnv_check/rules/common.smk                |  72 +++---
 .../rules/illumina_raw_processing.smk         | 102 ++++----
 stemcnv_check/rules/penncnv.smk               | 186 ++++++++++-----
 stemcnv_check/rules/report_generation.smk     |  77 +++---
 stemcnv_check/rules/staticdata_creation.smk   | 118 ++++++----
 9 files changed, 572 insertions(+), 346 deletions(-)

diff --git a/stemcnv_check/app/make_staticdata.py b/stemcnv_check/app/make_staticdata.py
index 6ad1929..717646f 100644
--- a/stemcnv_check/app/make_staticdata.py
+++ b/stemcnv_check/app/make_staticdata.py
@@ -180,7 +180,7 @@ def create_missing_staticdata(args):
                         ),
                     )
                     .dag(
-                        DAGSettings(targets=[use_vcf],
+                        DAGSettings(targets={use_vcf},
                                     force_incomplete=True)
                     )
                     .execute_workflow()
diff --git a/stemcnv_check/control_files/default_config.yaml b/stemcnv_check/control_files/default_config.yaml
index 45b00d7..ef6dc03 100644
--- a/stemcnv_check/control_files/default_config.yaml
+++ b/stemcnv_check/control_files/default_config.yaml
@@ -392,6 +392,3 @@ tools:
   PennCNV:
     memory: 4000 # "4000MB"
     runtime: "30m"
-  make_cnv_vcf:
-    memory: 4000 # "4000MB"
-    runtime: "30m"
diff --git a/stemcnv_check/rules/SNP_processing.smk b/stemcnv_check/rules/SNP_processing.smk
index e48f5dd..2d26068 100644
--- a/stemcnv_check/rules/SNP_processing.smk
+++ b/stemcnv_check/rules/SNP_processing.smk
@@ -3,15 +3,20 @@ import importlib.resources
 from stemcnv_check import STEM_CNV_CHECK
 
 
-
 rule filter_snp_vcf:
-    input: os.path.join(DATAPATH, "{sample_id}", "{sample_id}.unprocessed.vcf")
-    output: os.path.join(DATAPATH, "{sample_id}", "{sample_id}.processed-SNP-data.{filter}-filter.vcf")
-    threads: get_tool_resource('filter_snp_vcf', 'threads')
+    input:
+        os.path.join(DATAPATH, "{sample_id}", "{sample_id}.unprocessed.vcf"),
+    output:
+        os.path.join(
+            DATAPATH,
+            "{sample_id}",
+            "{sample_id}.processed-SNP-data.{filter}-filter.vcf",
+        ),
+    threads: get_tool_resource("filter_snp_vcf", "threads")
     resources:
-        runtime=get_tool_resource('filter_snp_vcf', 'runtime'),
-        mem_mb=get_tool_resource('filter_snp_vcf', 'memory'),
-        partition=get_tool_resource('filter_snp_vcf', 'partition')
+        runtime=get_tool_resource("filter_snp_vcf", "runtime"),
+        mem_mb=get_tool_resource("filter_snp_vcf", "memory"),
+        partition=get_tool_resource("filter_snp_vcf", "partition"),
     log:
         err=os.path.join(LOGPATH, "filter_snp_vcf", "{sample_id}", "{filter}.error.log"),
         #out=os.path.join(LOGPATH, "filter_snp_vcf", "{sample_id}", "{filter}.out.log")
@@ -19,66 +24,91 @@ rule filter_snp_vcf:
     #     importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","general-R.yaml")
     # script:
     #     '../scripts/filter_snp_vcf.R'
-#FIXME: wrong, this was the wrong fasta file
-# vcfpy changes the vcf too much, i.e.
-# - add new contigs to header
-# - merges variants at same position into multi-allelic
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","python-vcf.yaml")
+        "../envs/python-vcf.yaml"
     script:
-        '../scripts/filter_snp_vcf.py'
-        
+        "../scripts/filter_snp_vcf.py"
+
 
 rule annotate_snp_vcf:
     input:
-      vcf = os.path.join(DATAPATH, "{sample_id}", "{sample_id}.processed-SNP-data.{filter}-filter.vcf"),
-      genomefasta = get_genome_fasta
-    output: os.path.join(DATAPATH, "{sample_id}", "{sample_id}.annotated-SNP-data.{filter}-filter.vcf.gz")
-    threads: get_tool_resource('VEP', 'threads')
+        vcf=os.path.join(
+            DATAPATH,
+            "{sample_id}",
+            "{sample_id}.processed-SNP-data.{filter}-filter.vcf",
+        ),
+        genomefasta=get_genome_fasta,
+    output:
+        os.path.join(
+            DATAPATH,
+            "{sample_id}",
+            "{sample_id}.annotated-SNP-data.{filter}-filter.vcf.gz",
+        ),
+    threads: get_tool_resource("VEP", "threads")
     resources:
-        threads=get_tool_resource('VEP', 'threads'),
-        runtime=get_tool_resource('VEP', 'runtime'),
-        mem_mb=get_tool_resource('VEP', 'memory'),
-        partition=get_tool_resource('VEP', 'partition')
+        threads=get_tool_resource("VEP", "threads"),
+        runtime=get_tool_resource("VEP", "runtime"),
+        mem_mb=get_tool_resource("VEP", "memory"),
+        partition=get_tool_resource("VEP", "partition"),
     log:
-        err=os.path.join(LOGPATH, "annotate_snp_vcf", "{sample_id}", "{filter}.error.log"),
-        out=os.path.join(LOGPATH, "annotate_snp_vcf", "{sample_id}", "{filter}.out.log")
+        err=os.path.join(
+            LOGPATH, "annotate_snp_vcf", "{sample_id}", "{filter}.error.log"
+        ),
+        out=os.path.join(LOGPATH, "annotate_snp_vcf", "{sample_id}", "{filter}.out.log"),
     params:
-        genomeversion = 'GRCh38' if config['genome_version'] in ('hg38', 'GRCh38') else 'GRCh37',
-        vep_cache_path = config['use_vep_cache']
+        genomeversion=(
+            "GRCh38" if config["genome_version"] in ("hg38", "GRCh38") else "GRCh37"
+        ),
+        vep_cache_path=config["use_vep_cache"],
     conda:
-      importlib.resources.files(STEM_CNV_CHECK).joinpath("envs", "vep-annotation.yaml")
+        importlib.resources.files(STEM_CNV_CHECK).joinpath(
+            "envs", "vep-annotation.yaml"
+        )
     shell:
-        'vep --verbose ' 
-        '--fasta {input.genomefasta} ' # only needed for HGVS
+        "vep --verbose "
+        "--fasta {input.genomefasta} "
+
+        "--input_file {input.vcf} "
+        "--output_file {output} "
+        "--compress_output bgzip "
+        "--format vcf "
+        "--vcf "
+        "--force_overwrite "
+        "--no_stats "
+        "--warning_file {log.err} "
+        "--skipped_variants_file {log.out} "
+        "--assembly {params.genomeversion} "
+        "--fork {resources.threads} "
+        "--cache "
+        "--dir_cache {params.vep_cache_path} "
+
+        "--total_length "
+
+        "--gencode_basic "
+        "--symbol "
+        "--terms SO "
+        "--hgvs "
+        "--pick "
+
+
+        "--check_existing --no_check_alleles "
+        "--af "
+        "--pubmed "
+
+        '--fields "Gene,SYMBOL,STRAND,Consequence,cDNA_position,CDS_position,Protein_position,HGVSc,HGVSp,Existing_variation,CLIN_SIG,SOMATIC,PHENO,AF,PUBMED" '
+
+        " >> {log.out} 2>> {log.err}"
+        # only needed for HGVS
         # TODO: maye omitting this helps w/ pipe issues?
-        '--input_file {input.vcf} '
-        '--output_file {output} '
-        '--compress_output bgzip '
-        '--format vcf ' #might help with pipe auto-detection issues
-        '--vcf '
-        '--force_overwrite '
-        '--no_stats '
-        '--warning_file {log.err} '
-        '--skipped_variants_file {log.out} '
-        '--assembly {params.genomeversion} '
-        '--fork {resources.threads} '
-        '--cache '
-        '--dir_cache {params.vep_cache_path} '
+        #might help with pipe auto-detection issues
         ## Annotation options
-        '--total_length ' 
         # Gene & protein annotation
-        '--gencode_basic ' #> limit to gencode transcripts
-        '--symbol ' #> add gene symbol
-        '--terms SO ' # how to write/format/annotate the consequence
-        '--hgvs ' #> add HGVS nomenclature (protein changes)
-        '--pick ' #> pick the most severe consequence (& gene) per variant
+        #> limit to gencode transcripts
+        #> add gene symbol
+        # how to write/format/annotate the consequence
+        #> add HGVS nomenclature (protein changes)
+        #> pick the most severe consequence (& gene) per variant
         # will query databases for existing annotation at the same position
         # includes existing annotations from ClinVar, COSMIC etc
-        '--check_existing --no_check_alleles '
-        '--af ' #  global allele frequency (AF) from 1000 Genomes Phase 3 data 
-        '--pubmed '
+        #  global allele frequency (AF) from 1000 Genomes Phase 3 data 
         # Select content of CSQ:
-        '--fields "Gene,SYMBOL,STRAND,Consequence,cDNA_position,CDS_position,Protein_position,HGVSc,HGVSp,Existing_variation,CLIN_SIG,SOMATIC,PHENO,AF,PUBMED" '
-
-        ' >> {log.out} 2>> {log.err}'
\ No newline at end of file
diff --git a/stemcnv_check/rules/StemCNV-check.smk b/stemcnv_check/rules/StemCNV-check.smk
index 8cec727..8c80367 100644
--- a/stemcnv_check/rules/StemCNV-check.smk
+++ b/stemcnv_check/rules/StemCNV-check.smk
@@ -6,7 +6,11 @@ from pathlib import Path
 import tempfile
 import ruamel.yaml as ruamel_yaml
 from stemcnv_check import STEM_CNV_CHECK
-from stemcnv_check.helpers import read_sample_table, config_extract, collect_SNP_cluster_ids
+from stemcnv_check.helpers import (
+    read_sample_table,
+    config_extract,
+    collect_SNP_cluster_ids,
+)
 from stemcnv_check.exceptions import SampleConstraintError, ConfigValueError
 
 SNAKEDIR = str(importlib.resources.files(STEM_CNV_CHECK))
@@ -22,169 +26,221 @@ else:
     # This a manual workaround around for passing the config as a snakemake object to R & python scripts
     # Save config to tempfile & use that instead, this ensures the combined default + user configs are used
     # It also archives all options passed by command line and allows them all to be displayed in report
-    f, CONFIGFILE = tempfile.mkstemp(suffix = '.yaml', text=True)
+    f, CONFIGFILE = tempfile.mkstemp(suffix=".yaml", text=True)
     os.close(f)
     removetempconfig = True
 
-yaml = ruamel_yaml.YAML(typ='safe')
-config['snakedir'] = SNAKEDIR
-with open(CONFIGFILE, 'w') as yamlout:
+
+
+yaml = ruamel_yaml.YAML(typ="safe")
+config["snakedir"] = SNAKEDIR
+with open(CONFIGFILE, "w") as yamlout:
     yaml.dump(config, yamlout)
 
 
-SAMPLETABLE = config['sample_table'] if 'sample_table' in config else 'sample_table.tsv' # Defined by wrapper
-BASEPATH = config['basedir'] if 'basedir' in config else os.getcwd() #Defined by wrapper
-DATAPATH = config['data_path'] #if os.path.isabs(config['data_path']) else os.path.join(BASEPATH, config['data_path'])
-LOGPATH = config['log_path'] #if os.path.isabs(config['log_path']) else os.path.join(BASEPATH, config['log_path'])
-TARGET = config['target'] if 'target' in config else 'report' #Defined by wrapper
-IDAT_INPUT = config['raw_data_folder']
+SAMPLETABLE = (
+    config["sample_table"] if "sample_table" in config else "sample_table.tsv"
+)  # Defined by wrapper
+BASEPATH = (
+    config["basedir"] if "basedir" in config else os.getcwd()
+)  # Defined by wrapper
+DATAPATH = config[
+    "data_path"
+]  # if os.path.isabs(config['data_path']) else os.path.join(BASEPATH, config['data_path'])
+LOGPATH = config[
+    "log_path"
+]  # if os.path.isabs(config['log_path']) else os.path.join(BASEPATH, config['log_path'])
+TARGET = config["target"] if "target" in config else "report"  # Defined by wrapper
+IDAT_INPUT = config["raw_data_folder"]
+
 
 wildcard_constraints:
-    sample_id=config['wildcard_constraints']['sample_id'],
+    sample_id=config["wildcard_constraints"]["sample_id"],
     #sentrix_name=config['wildcard_constraints']['sentrix_name'],
     #sentrix_pos=config['wildcard_constraints']['sentrix_pos'],
 
-#Never submit these to cluster
+
+# Never submit these to cluster
 localrules:
     relink_gencall,
-    all
+    all,
+
 
 sample_data = read_sample_table(SAMPLETABLE)
 sample_data_full = read_sample_table(SAMPLETABLE, with_opt=True)
 
-include: 'common.smk'
-include: 'illumina_raw_processing.smk'
+
+include: "common.smk"
+include: "illumina_raw_processing.smk"
 include: "SNP_processing.smk"
 include: "penncnv.smk"
-
 include: "report_generation.smk"
 
+
 # Rule function,s move to common? ========================================================================
 
+
 def get_cnv_vcf_output(mode):
-    addition = config['settings']['make_cnv_vcf']['name_addition']
-    if mode == 'combined-calls':
-        return os.path.join(DATAPATH, "{sample_id}", f"{{sample_id}}.combined-cnv-calls{addition}.vcf")
-    elif mode == 'split-tools':
-        return [os.path.join(DATAPATH, "{sample_id}", f"{{sample_id}}.{tool}-cnv-calls{addition}.vcf")
-                for tool in config['settings']['CNV.calling.tools']]
+    addition = config["settings"]["make_cnv_vcf"]["name_addition"]
+    if mode == "combined-calls":
+        return os.path.join(
+            DATAPATH, "{sample_id}", f"{{sample_id}}.combined-cnv-calls{addition}.vcf"
+        )
+    elif mode == "split-tools":
+        return [
+            os.path.join(
+                DATAPATH, "{sample_id}", f"{{sample_id}}.{tool}-cnv-calls{addition}.vcf"
+            )
+            for tool in config["settings"]["CNV.calling.tools"]
+        ]
     else:
-        raise ConfigValueError('Value not allowed for settings$make.cnv.vcf$mode: "{}"'.format(config['settings']['make_cnv_vcf']['mode']))
+        raise ConfigValueError(
+            'Value not allowed for settings$make.cnv.vcf$mode: "{}"'.format(
+                config["settings"]["make_cnv_vcf"]["mode"]
+            )
+        )
 
-def get_target_files(target = TARGET):
-    #Target options: ('report', 'combined-cnv-calls', 'PennCNV', 'CBS', 'SNP-data'),
+
+def get_target_files(target=TARGET):
+    # Target options: ('report', 'combined-cnv-calls', 'PennCNV', 'CBS', 'SNP-data'),
     all_samples = [sample_id for sample_id, _, _, _, _ in sample_data]
 
     # complete
-    if target == 'complete':
-        return get_target_files('report') + get_target_files('combined-cnv-calls')
+    if target == "complete":
+        return get_target_files("report") + get_target_files("combined-cnv-calls")
 
     # Report
-    if target == 'report':
+    if target == "report":
         return expand(
             os.path.join(DATAPATH, "{sample_id}", "{sample_id}.{report_filetype}"),
-            sample_id = all_samples,
-            report_filetype = [rep+'.'+config['reports'][rep]['file_type'] for rep in config['reports'].keys() 
-                               if rep != '__default__']) #+ \
-                     # expand(os.path.join(DATAPATH,"{sample_id}","{sample_id}.summary-check.tsv"),
-                     #                sample_id = all_samples)
+            sample_id=all_samples,
+            report_filetype=[
+                rep + "." + config["reports"][rep]["file_type"]
+                for rep in config["reports"].keys()
+                if rep != "__default__"
+            ],
+        )  # + \
+        # expand(os.path.join(DATAPATH,"{sample_id}","{sample_id}.summary-check.tsv"),
+        #                sample_id = all_samples)
     # Target Processed-calls
-    if target == 'combined-cnv-calls':
-        return expand([
-                os.path.join(DATAPATH, "{sample_id}", "{sample_id}.combined-cnv-calls.tsv"),
-                os.path.join(DATAPATH, "{sample_id}", "{sample_id}.combined-cnv-calls.vcf.gz"),
+    if target == "combined-cnv-calls":
+        return expand(
+            [
+                os.path.join(
+                    DATAPATH, "{sample_id}", "{sample_id}.combined-cnv-calls.tsv"
+                ),
+                os.path.join(
+                    DATAPATH, "{sample_id}", "{sample_id}.combined-cnv-calls.vcf.gz"
+                ),
             ],
-            sample_id = all_samples
+            sample_id=all_samples,
         )
     # Target PennCNV
-    if target == 'PennCNV':
+    if target == "PennCNV":
         return expand(
-            os.path.join(DATAPATH, "{sample_id}", "{sample_id}.CNV_calls.penncnv.vcf.gz"),
-            sample_id = all_samples,
+            os.path.join(
+                DATAPATH, "{sample_id}", "{sample_id}.CNV_calls.penncnv.vcf.gz"
+            ),
+            sample_id=all_samples,
         )
     # Target CBS
-    if target == 'CBS':
+    if target == "CBS":
         return expand(
             os.path.join(DATAPATH, "{sample_id}", "{sample_id}.CNV_calls.CBS.vcf.gz"),
-            sample_id = all_samples
+            sample_id=all_samples,
         )
     # Target SNP-data
-    if target == 'SNP-data':
-        #TODO: update this
+    if target == "SNP-data":
+        # TODO: update this
         return expand(
-            os.path.join(DATAPATH,"{sample_id}","{sample_id}.processed-SNP-data.{filter}-filter.vcf"),
+            os.path.join(
+                DATAPATH,
+                "{sample_id}",
+                "{sample_id}.processed-SNP-data.{filter}-filter.vcf",
+            ),
             # os.path.join(DATAPATH, "{sample_id}", "{sample_id}.annotated-SNP-data.{filter}-filter.vcf.gz"),
-            sample_id = all_samples,
-            filter = config['settings']['default-filter-set']
-        ) 
-    
+            sample_id=all_samples,
+            filter=config["settings"]["default-filter-set"],
+        )
+
+
 # Rules ========================================================================
 
+
 rule all:
     input:
-        get_target_files()
+        get_target_files(),
     run:
         if removetempconfig:
             os.remove(CONFIGFILE)
 
 
-
 rule run_CBS:
     input:
-        vcf = cnv_vcf_input_function('CBS')
+        vcf=cnv_vcf_input_function("CBS"),
     output:
-        vcf = os.path.join(DATAPATH, "{sample_id}", "{sample_id}.CNV_calls.CBS.vcf.gz")
-    threads: get_tool_resource('CBS', 'threads')
+        vcf=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.CNV_calls.CBS.vcf.gz"),
+    threads: get_tool_resource("CBS", "threads")
     resources:
-        runtime=get_tool_resource('CBS', 'runtime'),
-        mem_mb=get_tool_resource('CBS', 'memory'),
-        partition=get_tool_resource('CBS', 'partition')
+        runtime=get_tool_resource("CBS", "runtime"),
+        mem_mb=get_tool_resource("CBS", "memory"),
+        partition=get_tool_resource("CBS", "partition"),
     params:
         # SDundo = config['settings']['CBS']['SDundo'],
         # filter=get_tool_filter_settings('CBS'),
-        settings=config['settings']['CBS']
+        settings=config["settings"]["CBS"],
     log:
         err=os.path.join(LOGPATH, "CBS", "{sample_id}", "error.log"),
-        out=os.path.join(LOGPATH, "CBS", "{sample_id}", "out.log")
+        out=os.path.join(LOGPATH, "CBS", "{sample_id}", "out.log"),
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs", "general-R.yaml")
+        "../envs/general-R.yaml"
     script:
-        '../scripts/run_CBS_DNAcopy.R'
-    # shell:
-    #     "Rscript {SNAKEDIR}/scripts/run_CBS_DNAcopy.R -s {params.SDundo} {input.tsv} {output.tsv} {CONFIGFILE} {SAMPLETABLE} > {log.out} 2> {log.err}"
+        "../scripts/run_CBS_DNAcopy.R"
 
 
+# shell:
+#     "Rscript {SNAKEDIR}/scripts/run_CBS_DNAcopy.R -s {params.SDundo} {input.tsv} {output.tsv} {CONFIGFILE} {SAMPLETABLE} > {log.out} 2> {log.err}"
 
 
 def get_processed_ref_data(wildcards):
     sample_id, ref_id = get_ref_id(wildcards)
-    return os.path.join(DATAPATH, f"{ref_id}", f"{ref_id}.combined-cnv-calls.tsv") if ref_id else []
+    return (
+        os.path.join(DATAPATH, f"{ref_id}", f"{ref_id}.combined-cnv-calls.tsv")
+        if ref_id
+        else []
+    )
 
 
 rule run_process_CNV_calls:
     input:
-        cnv_calls = expand(
-            os.path.join(DATAPATH, "{{sample_id}}", "{{sample_id}}.CNV_calls.{caller}.vcf.gz"),
-            caller = config['settings']['CNV.calling.tools']
+        cnv_calls=expand(
+            os.path.join(
+                DATAPATH, "{{sample_id}}", "{{sample_id}}.CNV_calls.{caller}.vcf.gz"
+            ),
+            caller=config["settings"]["CNV.calling.tools"],
         ),
-        ref_data = get_processed_ref_data,
-        snp_vcf = cnv_vcf_input_function('settings:CNV_processing:call_processing')
+        ref_data=get_processed_ref_data,
+        snp_vcf=cnv_vcf_input_function("settings:CNV_processing:call_processing"),
     output:
-        tsv = os.path.join(DATAPATH, "{sample_id}", "{sample_id}.combined-cnv-calls.tsv"),
-        vcf = os.path.join(DATAPATH, "{sample_id}", "{sample_id}.combined-cnv-calls.vcf.gz")
-    threads: get_tool_resource('CNV.process', 'threads')
+        tsv=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.combined-cnv-calls.tsv"),
+        vcf=os.path.join(
+            DATAPATH, "{sample_id}", "{sample_id}.combined-cnv-calls.vcf.gz"
+        ),
+    threads: get_tool_resource("CNV.process", "threads")
     resources:
-        runtime=get_tool_resource('CNV.process', 'runtime'),
-        mem_mb=get_tool_resource('CNV.process', 'memory'),
-        partition=get_tool_resource('CNV.process', 'partition')
+        runtime=get_tool_resource("CNV.process", "runtime"),
+        mem_mb=get_tool_resource("CNV.process", "memory"),
+        partition=get_tool_resource("CNV.process", "partition"),
     params:
-        settings=config['settings']['CNV_processing']
+        settings=config["settings"]["CNV_processing"],
     log:
         err=os.path.join(LOGPATH, "CNV_process", "{sample_id}", "error.log"),
         # out=os.path.join(LOGPATH, "CNV_process", "{sample_id}", "out.log")
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","general-R.yaml")
+        "../envs/general-R.yaml"
     script:
-        '../scripts/process_CNV_calls.R'
-    # shell:
-    #     "Rscript {SNAKEDIR}/scripts/process_CNV_calls.R {params.penncnv} {params.cbs} {DATAPATH} {wildcards.sample_id} {CONFIGFILE} {SAMPLETABLE} > {log.out} 2> {log.err}"
+        "../scripts/process_CNV_calls.R"
+
+
+# shell:
+#     "Rscript {SNAKEDIR}/scripts/process_CNV_calls.R {params.penncnv} {params.cbs} {DATAPATH} {wildcards.sample_id} {CONFIGFILE} {SAMPLETABLE} > {log.out} 2> {log.err}"
diff --git a/stemcnv_check/rules/common.smk b/stemcnv_check/rules/common.smk
index 297bb24..4c0a00d 100644
--- a/stemcnv_check/rules/common.smk
+++ b/stemcnv_check/rules/common.smk
@@ -1,6 +1,6 @@
 # Makestatic Data :
 # def fix_container_path(path_in, bound_to):
-# 
+#
 #         path_in = Path(path_in)
 #         if bound_to == 'static':
 #                 rel_path = path_in.name
@@ -10,9 +10,10 @@
 #                         'tmp': Path(DOWNLOAD_DIR)
 #                 }[bound_to].absolute()
 #                 rel_path = path_in.absolute().relative_to(local_target)
-# 
+#
 #         return Path('/outside/') / bound_to / rel_path
 
+
 def get_ref_id(wildcards, get_sex=False):
     sample_id = wildcards.sample_id
     sex, ref_id = [(s, r) for sid, _, _, s, r in sample_data if sid == sample_id][0]
@@ -20,12 +21,14 @@ def get_ref_id(wildcards, get_sex=False):
 
     if ref_id:
         try:
-            #Assume existing match -> wrapper should have done a check
+            # Assume existing match -> wrapper should have done a check
             ref_sex = [s for sid, _, _, s, _ in sample_data if sid == ref_id][0]
             ref_sex = ref_sex[0].lower()
         except IndexError:
             # Somehow no match
-            raise SampleConstraintError(f"Listed reference sample can not be found in sample-table: '{ref_id}'")
+            raise SampleConstraintError(
+                f"Listed reference sample can not be found in sample-table: '{ref_id}'"
+            )
     else:
         ref_id = False
         ref_sex = False
@@ -39,59 +42,62 @@ def get_ref_id(wildcards, get_sex=False):
 def fix_container_path(path_in, bound_to):
     path_in = Path(path_in)
 
-    if bound_to == 'static':
+    if bound_to == "static":
         rel_path = path_in.name
     else:
         local_target = {
-            'data': Path(DATAPATH),
-            'rawdata': Path(IDAT_INPUT),
-            'logs': Path(LOGPATH),
-            'snakedir': Path(importlib.resources.files(STEM_CNV_CHECK)),
+            "data": Path(DATAPATH),
+            "rawdata": Path(IDAT_INPUT),
+            "logs": Path(LOGPATH),
+            "snakedir": Path(importlib.resources.files(STEM_CNV_CHECK)),
         }[bound_to].absolute()
         rel_path = path_in.absolute().relative_to(local_target)
 
-    return Path('/outside/') / bound_to / rel_path
+    return Path("/outside/") / bound_to / rel_path
+
 
 def get_tool_filter_settings(tool):
-    if tool.split(':')[0] == 'report':
-        report_settings = config['reports'][tool.split(':')[1]]
-        out = config_extract((tool.split(':')[2], 'filter-settings'), report_settings, config['reports']['__default__'])
-    elif tool.count(':') == 2 and tool.split(':')[1] == 'CNV_processing':
-        out = config['settings']['CNV_processing']['call_processing']['filter-settings']
+    if tool.split(":")[0] == "report":
+        report_settings = config["reports"][tool.split(":")[1]]
+        out = config_extract(
+            (tool.split(":")[2], "filter-settings"),
+            report_settings,
+            config["reports"]["__default__"],
+        )
+    elif tool.count(":") == 2 and tool.split(":")[1] == "CNV_processing":
+        out = config["settings"]["CNV_processing"]["call_processing"]["filter-settings"]
     else:
-        out = config['settings'][tool]['filter-settings']
-    if out == '__default__':
-        out = config['settings']['default-filter-set']
+        out = config["settings"][tool]["filter-settings"]
+    if out == "__default__":
+        out = config["settings"]["default-filter-set"]
     return out
 
 
-def get_tool_resource(tool ,resource):
-    if not resource in ('threads', 'memory', 'runtime', 'partition', 'cmd-line-params'):
+def get_tool_resource(tool, resource):
+    if not resource in ("threads", "memory", "runtime", "partition", "cmd-line-params"):
         raise KeyError(f"This resource can not be defined: {resource}")
-    if not tool in config['tools']:
-        return config['tools']['__default__'][resource]
+    if not tool in config["tools"] or not resource in config["tools"][tool]:
+        return config["tools"]["__default__"][resource]
     else:
-        if resource in config['tools'][tool]:
-            return config['tools'][tool][resource]
-        else:
-            return config['tools']['__default__'][resource]
+        return config["tools"][tool][resource]
+
 
 def get_genome_fasta(wildcards):
     """Get the correct genome fasta file"""
     # #FIXME: future
     # chip = get_sample_info(wildcards.sample_id)['array_name']
     # genome = config['array_definitions'][chip]['genome_version']
-    if config['genome_version'] in ('hg38', 'GRCh38'):
-        return config['global_settings']['hg38_genome_fasta']
+    if config["genome_version"] in ("hg38", "GRCh38"):
+        return config["global_settings"]["hg38_genome_fasta"]
     else:
-        return config['global_settings']['hg19_genome_fasta']
+        return config["global_settings"]["hg19_genome_fasta"]
 
 
 def cnv_vcf_input_function(tool):
     return lambda wildcards: os.path.join(
-        DATAPATH, 
-        wildcards.sample_id, 
-        f"{wildcards.sample_id}.processed-SNP-data.{get_tool_filter_settings(tool)}-filter.vcf"
+        DATAPATH,
+        wildcards.sample_id,
+        f"{wildcards.sample_id}.processed-SNP-data.{get_tool_filter_settings(tool)}-filter.vcf",
         # VEP still has issues; skip for now
         # f"{wildcards.sample_id}.annotated-SNP-data.{get_tool_filter_settings(tool)}-filter.vcf.gz"
-    )
\ No newline at end of file
+    )
diff --git a/stemcnv_check/rules/illumina_raw_processing.smk b/stemcnv_check/rules/illumina_raw_processing.smk
index b02c2a4..2acaf83 100644
--- a/stemcnv_check/rules/illumina_raw_processing.smk
+++ b/stemcnv_check/rules/illumina_raw_processing.smk
@@ -1,27 +1,36 @@
 
 rule run_gencall:
     input:
-        bpm = config['static_data']['bpm_manifest_file'],
-        egt = config['static_data']['egt_cluster_file'],
-        idat_path = os.path.join(IDAT_INPUT, "{sentrix_name}"),
+        bpm=config["static_data"]["bpm_manifest_file"],
+        egt=config["static_data"]["egt_cluster_file"],
+        idat_path=os.path.join(IDAT_INPUT, "{sentrix_name}"),
     output:
-        os.path.join(DATAPATH, "gtc", "{sentrix_name}", "_done")
-    threads: get_tool_resource('GenCall', 'threads')
+        os.path.join(DATAPATH, "gtc", "{sentrix_name}", "_done"),
+    threads: get_tool_resource("GenCall", "threads")
     resources:
-        runtime=get_tool_resource('GenCall', 'runtime'),
-        mem_mb=get_tool_resource('GenCall', 'memory'),
-        partition=get_tool_resource('GenCall', 'partition')
+        runtime=get_tool_resource("GenCall", "runtime"),
+        mem_mb=get_tool_resource("GenCall", "memory"),
+        partition=get_tool_resource("GenCall", "partition"),
     params:
-        options = get_tool_resource('GenCall', 'cmd-line-params'),
-        outpath = lambda wildcards: fix_container_path(os.path.join(DATAPATH, "gtc", wildcards.sentrix_name), 'data'),
-        bpm = fix_container_path(config['static_data']['bpm_manifest_file'],'static'),
-        egt = fix_container_path(config['static_data']['egt_cluster_file'],'static'),
-        idat_path= lambda wildcards: fix_container_path(os.path.join(IDAT_INPUT, wildcards.sentrix_name),'rawdata'),
-        logerr = lambda wildcards: fix_container_path(os.path.join(LOGPATH,"GenCall",wildcards.sentrix_name,"error.log"),'logs'),
-        logout = lambda wildcards: fix_container_path(os.path.join(LOGPATH,"GenCall", wildcards.sentrix_name,"out.log"),'logs')
+        options=get_tool_resource("GenCall", "cmd-line-params"),
+        outpath=lambda wildcards: fix_container_path(
+            os.path.join(DATAPATH, "gtc", wildcards.sentrix_name), "data"
+        ),
+        bpm=fix_container_path(config["static_data"]["bpm_manifest_file"], "static"),
+        egt=fix_container_path(config["static_data"]["egt_cluster_file"], "static"),
+        idat_path=lambda wildcards: fix_container_path(
+            os.path.join(IDAT_INPUT, wildcards.sentrix_name), "rawdata"
+        ),
+        logerr=lambda wildcards: fix_container_path(
+            os.path.join(LOGPATH, "GenCall", wildcards.sentrix_name, "error.log"),
+            "logs",
+        ),
+        logout=lambda wildcards: fix_container_path(
+            os.path.join(LOGPATH, "GenCall", wildcards.sentrix_name, "out.log"), "logs"
+        ),
     log:
-        err = os.path.join(LOGPATH, "GenCall", "{sentrix_name}", "error.log"),
-        out = os.path.join(LOGPATH, "GenCall", "{sentrix_name}", "out.log"),
+        err=os.path.join(LOGPATH, "GenCall", "{sentrix_name}", "error.log"),
+        out=os.path.join(LOGPATH, "GenCall", "{sentrix_name}", "out.log"),
     container:
         # Not sure if we need to use a specific version here
         "docker://us.gcr.io/broad-gotc-prod/illumina-iaap-autocall:1.0.2-1.1.0-1629910298"
@@ -30,51 +39,60 @@ rule run_gencall:
 
 
 # The iaap-cli will *always* generate filenames derived from the sentrix name & pos
-def get_chip(wildcards, outtype='dir_path'):
+def get_chip(wildcards, outtype="dir_path"):
     """Get the chip name from a sample_id
     Values for outtype: 'dirpath' | 'file'"""
-    chip_name, chip_pos = [(n, p) for sid, n, p, _, _ in sample_data if sid == wildcards.sample_id][0]
-    if outtype == 'dir_path':
-        return os.path.join(DATAPATH,'gtc',chip_name)
-    elif outtype == 'file':
-        return os.path.join(chip_name,chip_name + '_' + chip_pos + '.gtc')
+    chip_name, chip_pos = [
+        (n, p) for sid, n, p, _, _ in sample_data if sid == wildcards.sample_id
+    ][0]
+    if outtype == "dir_path":
+        return os.path.join(DATAPATH, "gtc", chip_name)
+    elif outtype == "file":
+        return os.path.join(chip_name, chip_name + "_" + chip_pos + ".gtc")
 
 
-#Note:
+# Note:
 # the *.gtc output files from gencall will *always* match the idat_file names
 # --> maybe better to switch from Chip_Name & Chip_Pos to Folder_Name & IDAT_Name ?!
 rule relink_gencall:
     input:
-        lambda wildcards: os.path.join(get_chip(wildcards),'_done')
+        lambda wildcards: os.path.join(get_chip(wildcards), "_done"),
     output:
-        os.path.join(DATAPATH,"{sample_id}","{sample_id}.gencall.gtc")
+        os.path.join(DATAPATH, "{sample_id}", "{sample_id}.gencall.gtc"),
     params:
-        gtc_link_path=lambda wildcards: os.path.join('..','gtc',get_chip(wildcards,outtype='file'))
+        gtc_link_path=lambda wildcards: os.path.join(
+            "..", "gtc", get_chip(wildcards, outtype="file")
+        ),
     shell:
         'ln -s "{params.gtc_link_path}" "{output}"'
-    
 
-#TODO: input functions to get correct fasta (& later correct manifest files)
+
+# TODO: input functions to get correct fasta (& later correct manifest files)
 rule run_gtc2vcf_vcf:
     input:
-        bpm=config['static_data']['bpm_manifest_file'],
-        egt=config['static_data']['egt_cluster_file'],
+        bpm=config["static_data"]["bpm_manifest_file"],
+        egt=config["static_data"]["egt_cluster_file"],
         genome=get_genome_fasta,
-        gtc=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.gencall.gtc")
+        gtc=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.gencall.gtc"),
     output:
-        vcf = pipe(os.path.join(DATAPATH, "{sample_id}", "{sample_id}.unprocessed.vcf")),
-        metatxt = os.path.join(DATAPATH, "{sample_id}", "{sample_id}.stats.txt"),
-    threads: get_tool_resource('gtc2vcf', 'threads')
+        vcf=pipe(os.path.join(DATAPATH, "{sample_id}", "{sample_id}.unprocessed.vcf")),
+        metatxt=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.stats.txt"),
+    threads: get_tool_resource("gtc2vcf", "threads")
     resources:
-        runtime=get_tool_resource('gtc2vcf', 'runtime'),
-        mem_mb=get_tool_resource('gtc2vcf', 'memory'),
-        partition=get_tool_resource('gtc2vcf', 'partition')
+        runtime=get_tool_resource("gtc2vcf", "runtime"),
+        mem_mb=get_tool_resource("gtc2vcf", "memory"),
+        partition=get_tool_resource("gtc2vcf", "partition"),
     params:
-        options = get_tool_resource('gtc2vcf', 'cmd-line-params'),
-        csv='--csv "{}"'.format(config['static_data']['csv_manifest_file']) if config['static_data']['csv_manifest_file'] else '',
+        options=get_tool_resource("gtc2vcf", "cmd-line-params"),
+        csv=(
+            '--csv "{}"'.format(config["static_data"]["csv_manifest_file"])
+            if config["static_data"]["csv_manifest_file"]
+            else ""
+        ),
     log:
         err=os.path.join(LOGPATH, "gtc2vcf", "{sample_id}", "vcf.error.log"),
         #out=os.path.join(LOGPATH, "gtc2vcf", "{sample_id}", "vcf.out.log"),
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","gtc2vcf.yaml")
-    shell: 'bcftools plugin gtc2vcf {params.options} -O v --bpm "{input.bpm}" {params.csv} --egt "{input.egt}" --fasta-ref "{input.genome}" --extra {output.metatxt} {input.gtc} 2> {log.err} | bcftools sort -o {output.vcf} 2>> {log.err}'
+        "../envs/gtc2vcf.yaml"
+    shell:
+        'bcftools plugin gtc2vcf {params.options} -O v --bpm "{input.bpm}" {params.csv} --egt "{input.egt}" --fasta-ref "{input.genome}" --extra {output.metatxt} {input.gtc} 2> {log.err} | bcftools sort -o {output.vcf} 2>> {log.err}'
diff --git a/stemcnv_check/rules/penncnv.smk b/stemcnv_check/rules/penncnv.smk
index 88bada1..75ff508 100644
--- a/stemcnv_check/rules/penncnv.smk
+++ b/stemcnv_check/rules/penncnv.smk
@@ -1,12 +1,23 @@
 import os
 
+
 rule prep_PennCNV_sexfile:
-    input: cnv_vcf_input_function('PennCNV')
-    output: temp(os.path.join(DATAPATH,"{sample_id}","{sample_id}.penncnv.sexfile.txt"))
-    log: os.path.join(LOGPATH,"PennCNV","{sample_id}","sexfile.log")
-    params: 
-        sex_info = lambda wildcards: get_ref_id(wildcards, True),
-        sample_docker_path = lambda wildcards: fix_container_path(os.path.join(DATAPATH, wildcards.sample_id, f"{wildcards.sample_id}.penncnv.input.tsv"),'data'),
+    input:
+        cnv_vcf_input_function("PennCNV"),
+    output:
+        temp(os.path.join(DATAPATH, "{sample_id}", "{sample_id}.penncnv.sexfile.txt")),
+    log:
+        os.path.join(LOGPATH, "PennCNV", "{sample_id}", "sexfile.log"),
+    params:
+        sex_info=lambda wildcards: get_ref_id(wildcards, True),
+        sample_docker_path=lambda wildcards: fix_container_path(
+            os.path.join(
+                DATAPATH,
+                wildcards.sample_id,
+                f"{wildcards.sample_id}.penncnv.input.tsv",
+            ),
+            "data",
+        ),
     shell:
         #TODO check if clause
         'echo -e "{params.sample_docker_path}\t{params.sex_info[2]}" > {output}; '
@@ -17,80 +28,133 @@ rule prep_PennCNV_sexfile:
 # - extract tsv SNP file from vcf
 rule prep_PennCNV_input:
     input:
-        vcf=cnv_vcf_input_function('PennCNV')
+        vcf=cnv_vcf_input_function("PennCNV"),
     output:
-        tsv=temp(os.path.join(DATAPATH,"{sample_id}","{sample_id}.penncnv.input.tsv"))
-    log: 
-        os.path.join(LOGPATH,"PennCNV","{sample_id}","input.log")
+        tsv=temp(os.path.join(DATAPATH, "{sample_id}", "{sample_id}.penncnv.input.tsv")),
+    log:
+        os.path.join(LOGPATH, "PennCNV", "{sample_id}", "input.log"),
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","vembrane.yaml")
+        "../envs/vembrane.yaml"
     params:
-        filter = get_tool_filter_settings('PennCNV')
-    #TODO: need to remove pseudo-autosomal regions form X & Y
+        filter=get_tool_filter_settings("PennCNV"),
+    # TODO: need to remove pseudo-autosomal regions form X & Y
     shell:
-        'vembrane filter \'"PASS" in FILTER\' {input.vcf} 2> {log}|'
-        'vembrane table --header \'Name, Chr, Position, B Allele Freq, Log R Ratio\''
+        "vembrane filter '\"PASS\" in FILTER' {input.vcf} 2> {log}|"
+        "vembrane table --header 'Name, Chr, Position, B Allele Freq, Log R Ratio'"
         ' --long \'ID, CHROM, POS, FORMAT["BAF"][SAMPLE], FORMAT["LRR"][SAMPLE]\' > {output.tsv} 2>> {log}'
+
+
 # # Alternative:
 #     wrapper:
 #         "v3.14.1/bio/vembrane/table"
 
+
 # - run auto, x & Y calling
 rule run_PennCNV:
     input:
-        tsv=os.path.join(DATAPATH,"{sample_id}","{sample_id}.penncnv.input.tsv"),
-        sexfile=os.path.join(DATAPATH,"{sample_id}","{sample_id}.penncnv.sexfile.txt"),
-        pfb=config['static_data']['penncnv_pfb_file'],
-        gcmodel=config['static_data']['penncnv_GCmodel_file']
+        tsv=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.penncnv.input.tsv"),
+        sexfile=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.penncnv.sexfile.txt"),
+        pfb=config["static_data"]["penncnv_pfb_file"],
+        gcmodel=config["static_data"]["penncnv_GCmodel_file"],
     output:
-        tsv=temp(os.path.join(DATAPATH,"{sample_id}","{sample_id}.penncnv-{chr}.tsv")),
-        err=os.path.join(LOGPATH,"PennCNV","{sample_id}","{chr}.error.log")
-    threads: get_tool_resource('PennCNV','threads')
+        tsv=temp(os.path.join(DATAPATH, "{sample_id}", "{sample_id}.penncnv-{chr}.tsv")),
+        err=os.path.join(LOGPATH, "PennCNV", "{sample_id}", "{chr}.error.log"),
+    threads: get_tool_resource("PennCNV", "threads")
     resources:
-        runtime=get_tool_resource('PennCNV','runtime'),
-        mem_mb=get_tool_resource('PennCNV','memory'),
-        partition=get_tool_resource('PennCNV','partition')
+        runtime=get_tool_resource("PennCNV", "runtime"),
+        mem_mb=get_tool_resource("PennCNV", "memory"),
+        partition=get_tool_resource("PennCNV", "partition"),
     wildcard_constraints:
-        chr='chrx|chry|auto'
+        chr="chrx|chry|auto",
     params:
-        filter=get_tool_filter_settings('PennCNV'),
-        chrom=lambda wildcards: '' if wildcards.chr == 'auto' else '-' + wildcards.chr,
+        filter=get_tool_filter_settings("PennCNV"),
+        chrom=lambda wildcards: "" if wildcards.chr == "auto" else "-" + wildcards.chr,
         #LOH calling is optional, also disabled for male sex chromosomes
-        do_loh=lambda wildcards: '' if (wildcards.chr != 'auto' and get_ref_id(wildcards,True)[2] == 'm' and 
-                                        config['settings']['PennCNV']['enable_LOH_calls']) else '-loh',
-        snakedir = fix_container_path(SNAKEDIR, 'snakedir'),
-        tsvout=lambda wildcards: fix_container_path(os.path.join(DATAPATH, wildcards.sample_id, wildcards.sample_id+".penncnv-"+wildcards.chr+".tsv"),'data'),
-        tsvin=lambda wildcards: fix_container_path(os.path.join(DATAPATH, wildcards.sample_id, wildcards.sample_id+".penncnv.input.tsv"),'data'),
-        pfb=fix_container_path(config['static_data']['penncnv_pfb_file'],'static'),
-        sexfile=lambda wildcards: '' if wildcards.chr == 'auto' else '--sexfile ' + str(fix_container_path(os.path.join(DATAPATH, wildcards.sample_id, wildcards.sample_id+".penncnv.sexfile.txt"),'data')),
-        gcmodel=fix_container_path(config['static_data']['penncnv_GCmodel_file'],'static'),
-        logerr=lambda wildcards:fix_container_path(os.path.join(LOGPATH,"PennCNV", wildcards.sample_id, wildcards.chr+".error.log"),'logs'),
-        logout=lambda wildcards:fix_container_path(os.path.join(LOGPATH,"PennCNV", wildcards.sample_id, wildcards.chr+".out.log"),'logs')
+        do_loh=lambda wildcards: (
+            ""
+            if (
+                wildcards.chr != "auto"
+                and get_ref_id(wildcards, True)[2] == "m"
+                and config["settings"]["PennCNV"]["enable_LOH_calls"]
+            )
+            else "-loh"
+        ),
+        snakedir=fix_container_path(SNAKEDIR, "snakedir"),
+        tsvout=lambda wildcards: fix_container_path(
+            os.path.join(
+                DATAPATH,
+                wildcards.sample_id,
+                wildcards.sample_id + ".penncnv-" + wildcards.chr + ".tsv",
+            ),
+            "data",
+        ),
+        tsvin=lambda wildcards: fix_container_path(
+            os.path.join(
+                DATAPATH,
+                wildcards.sample_id,
+                wildcards.sample_id + ".penncnv.input.tsv",
+            ),
+            "data",
+        ),
+        pfb=fix_container_path(config["static_data"]["penncnv_pfb_file"], "static"),
+        sexfile=lambda wildcards: (
+            ""
+            if wildcards.chr == "auto"
+            else "--sexfile "
+            + str(
+                fix_container_path(
+                    os.path.join(
+                        DATAPATH,
+                        wildcards.sample_id,
+                        wildcards.sample_id + ".penncnv.sexfile.txt",
+                    ),
+                    "data",
+                )
+            )
+        ),
+        gcmodel=fix_container_path(
+            config["static_data"]["penncnv_GCmodel_file"], "static"
+        ),
+        logerr=lambda wildcards: fix_container_path(
+            os.path.join(
+                LOGPATH, "PennCNV", wildcards.sample_id, wildcards.chr + ".error.log"
+            ),
+            "logs",
+        ),
+        logout=lambda wildcards: fix_container_path(
+            os.path.join(
+                LOGPATH, "PennCNV", wildcards.sample_id, wildcards.chr + ".out.log"
+            ),
+            "logs",
+        ),
     log:
-        err=os.path.join(LOGPATH,"PennCNV","{sample_id}","{chr}.error.log"),
-        out=os.path.join(LOGPATH,"PennCNV","{sample_id}","{chr}.out.log")
+        err=os.path.join(LOGPATH, "PennCNV", "{sample_id}", "{chr}.error.log"),
+        out=os.path.join(LOGPATH, "PennCNV", "{sample_id}", "{chr}.out.log"),
     container:
         "docker://genomicslab/penncnv"
     shell:
-        '/home/user/PennCNV/detect_cnv.pl -test {params.do_loh} {params.chrom} -confidence '
-        '-hmm {params.snakedir}/supplemental-files/hhall_loh.hmm -pfb {params.pfb} -gcmodel {params.gcmodel} '
-        '{params.sexfile} {params.tsvin} -out {params.tsvout} > {params.logout} 2> {params.logerr}'
+        "/home/user/PennCNV/detect_cnv.pl -test {params.do_loh} {params.chrom} -confidence "
+        "-hmm {params.snakedir}/supplemental-files/hhall_loh.hmm -pfb {params.pfb} -gcmodel {params.gcmodel} "
+        "{params.sexfile} {params.tsvin} -out {params.tsvout} > {params.logout} 2> {params.logerr}"
+
 
+def get_penncnv_output(wildcards, files="tsv"):
+    _, _, sex, _ = get_ref_id(wildcards, True)
+    chrs = ["auto", "chrx"]
+    if sex == "m":
+        chrs.append("chry")
 
-def get_penncnv_output(wildcards, files = 'tsv'):
-    _, _, sex, _ = get_ref_id(wildcards, True) 
-    chrs = ['auto','chrx']
-    if sex == 'm': chrs.append('chry')
-    
-    if files == 'tsv':
+    if files == "tsv":
         return expand(
-            os.path.join(DATAPATH,"{sample_id}","{sample_id}.penncnv-{chr}.tsv"),
-            chr=chrs, sample_id=wildcards.sample_id
+            os.path.join(DATAPATH, "{sample_id}", "{sample_id}.penncnv-{chr}.tsv"),
+            chr=chrs,
+            sample_id=wildcards.sample_id,
         )
-    elif files == 'log':
+    elif files == "log":
         return expand(
-            os.path.join(LOGPATH,"PennCNV","{sample_id}","{chr}.error.log"),
-            chr=chrs, sample_id=wildcards.sample_id
+            os.path.join(LOGPATH, "PennCNV", "{sample_id}", "{chr}.error.log"),
+            chr=chrs,
+            sample_id=wildcards.sample_id,
         )
     else:
         raise ValueError("Invalid file type requested: {}".format(files))
@@ -99,16 +163,18 @@ def get_penncnv_output(wildcards, files = 'tsv'):
 # - combine calls and write out as vcf
 rule combined_PennCNV_output:
     input:
-        vcf=cnv_vcf_input_function('PennCNV'),
+        vcf=cnv_vcf_input_function("PennCNV"),
         tsvs=get_penncnv_output,
         #logs=get_penncnv_output(wildcards, 'log')
     output:
-        vcf=os.path.join(DATAPATH,"{sample_id}","{sample_id}.CNV_calls.PennCNV.vcf.gz"),
+        vcf=os.path.join(
+            DATAPATH, "{sample_id}", "{sample_id}.CNV_calls.PennCNV.vcf.gz"
+        ),
         # TODO: how to best handle this? > separate rule ?!
         # stats=os.path.join(DATAPATH,"{sample_id}","{sample_id}.CNV_calls.penncnv.stats.tsv")
     log:
-        err=os.path.join(LOGPATH,"PennCNV","{sample_id}","combine.error.log"),
+        err=os.path.join(LOGPATH, "PennCNV", "{sample_id}", "combine.error.log"),
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","general-R.yaml")
-    script: 
-        "../scripts/combine_penncnv.R"
\ No newline at end of file
+        "../envs/general-R.yaml"
+    script:
+        "../scripts/combine_penncnv.R"
diff --git a/stemcnv_check/rules/report_generation.smk b/stemcnv_check/rules/report_generation.smk
index 878c7f6..67ea1f9 100644
--- a/stemcnv_check/rules/report_generation.smk
+++ b/stemcnv_check/rules/report_generation.smk
@@ -1,45 +1,67 @@
-
 def get_report_sample_input(wildcards):
     sample_id, ref_id, sex, ref_sex = get_ref_id(wildcards, True)
-    report_settings = config['reports'][wildcards.report]
+    report_settings = config["reports"][wildcards.report]
 
-    sample_files = expand([
+    sample_files = expand(
+        [
             os.path.join(DATAPATH, "{ids}", "{ids}.combined-cnv-calls.tsv"),
             os.path.join(DATAPATH, "{ids}", "{ids}.stats.txt"),
-            os.path.join(LOGPATH, "PennCNV", "{ids}","{chrs}.error.log"),
-            #TODO: replace with annotated VCF
-            os.path.join(DATAPATH, "{ids}", "{ids}.processed-SNP-data.{filter}-filter.vcf")
+            os.path.join(LOGPATH, "PennCNV", "{ids}", "{chrs}.error.log"),
+            # TODO: replace with annotated VCF
+            os.path.join(
+                DATAPATH, "{ids}", "{ids}.processed-SNP-data.{filter}-filter.vcf"
+            ),
             # os.path.join(DATAPATH, "{ids}", "{ids}.processed-data.tsv"),
             # os.path.join(DATAPATH, "{ids}", "{ids}.filtered-data-{filter}.tsv")
         ],
-        ids = (sample_id, ref_id) if ref_id else (sample_id,),
-        chrs = ['auto', 'chrx'] + (['chry'] if sex == 'm' else []),
-        filter = get_tool_filter_settings(f"report:{wildcards.report}:call.data.and.plots"),
+        ids=(sample_id, ref_id) if ref_id else (sample_id,),
+        chrs=["auto", "chrx"] + (["chry"] if sex == "m" else []),
+        filter=get_tool_filter_settings(
+            f"report:{wildcards.report}:call.data.and.plots"
+        ),
     )
 
-    incl_sections = config_extract(('include_sections', ), report_settings, config['reports']['__default__'])
-    excl_sections = config_extract(('exclude_sections', ), report_settings, config['reports']['__default__'])
-    do_snp_clustering = (incl_sections == '__all__' or 'SNP.dendrogram' in incl_sections) and not 'SNP.dendrogram' in excl_sections
+    incl_sections = config_extract(
+        ("include_sections",), report_settings, config["reports"]["__default__"]
+    )
+    excl_sections = config_extract(
+        ("exclude_sections",), report_settings, config["reports"]["__default__"]
+    )
+    do_snp_clustering = (
+        incl_sections == "__all__" or "SNP.dendrogram" in incl_sections
+    ) and not "SNP.dendrogram" in excl_sections
     if do_snp_clustering:
-        extra_sample_def = config_extract(('SNP_comparison', 'extra_samples'), report_settings, config['reports']['__default__'])
+        extra_sample_def = config_extract(
+            ("SNP_comparison", "extra_samples"),
+            report_settings,
+            config["reports"]["__default__"],
+        )
         ids = collect_SNP_cluster_ids(sample_id, extra_sample_def, sample_data_full)
         # VCF has both filtered & unfiltered SNPs now
         # if not config_extract(('SNP_comparison', 'ignore_filter'), report_settings, config['reports']['__default__']):
         sample_files += expand(
-            [os.path.join(DATAPATH, "{ids}", "{ids}.processed-SNP-data.{filter}-filter.vcf")],
-            ids=ids, filter = get_tool_filter_settings(f"report:{wildcards.report}:SNP_comparison")
+            [
+                os.path.join(
+                    DATAPATH, "{ids}", "{ids}.processed-SNP-data.{filter}-filter.vcf"
+                )
+            ],
+            ids=ids,
+            filter=get_tool_filter_settings(
+                f"report:{wildcards.report}:SNP_comparison"
+            ),
         )
 
-    if wildcards.ext == 'pdf':
+    if wildcards.ext == "pdf":
         sample_files += [os.path.join(LOGPATH, "report", "_latex_installation_check")]
 
     return sample_files
 
+
 rule check_latex_installation:
     output:
-        os.path.join(LOGPATH, "report", "_latex_installation_check")
+        os.path.join(LOGPATH, "report", "_latex_installation_check"),
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","general-R.yaml")
+        "../envs/general-R.yaml"
     shell:
         """
 Rscript - << 'EOF'
@@ -60,26 +82,25 @@ EOF
 
 rule knit_report:
     input:
-        get_report_sample_input
+        get_report_sample_input,
     output:
-        report=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.{report}.{ext}") ,
+        report=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.{report}.{ext}"),
         plots=directory(os.path.join(DATAPATH, "{sample_id}", "{report}-{ext}_images")),
         # -> this extra output is optional now!
         #summary=os.path.join(DATAPATH, "{sample_id}", "{sample_id}.summary-check.tsv")
     wildcard_constraints:
-        ext="(pdf|html)"
-    threads: get_tool_resource('knitr', 'threads')
+        ext="(pdf|html)",
+    threads: get_tool_resource("knitr", "threads")
     # params:
     #     config = get_report_config
     resources:
-        runtime=get_tool_resource('knitr', 'runtime'),
-        mem_mb=get_tool_resource('knitr', 'memory'),
-        partition=get_tool_resource('knitr', 'partition')
+        runtime=get_tool_resource("knitr", "runtime"),
+        mem_mb=get_tool_resource("knitr", "memory"),
+        partition=get_tool_resource("knitr", "partition"),
     log:
         err=os.path.join(LOGPATH, "report", "{sample_id}", "{report}-{ext}.error.log"),
-        out=os.path.join(LOGPATH, "report", "{sample_id}", "{report}-{ext}.out.log")
+        out=os.path.join(LOGPATH, "report", "{sample_id}", "{report}-{ext}.out.log"),
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","general-R.yaml")
+        "../envs/general-R.yaml"
     shell:
         "Rscript {SNAKEDIR}/scripts/knit_report.R {wildcards.sample_id} {wildcards.report} {CONFIGFILE} > {log.out} 2> {log.err}"
-
diff --git a/stemcnv_check/rules/staticdata_creation.smk b/stemcnv_check/rules/staticdata_creation.smk
index c085ac4..625d0a0 100644
--- a/stemcnv_check/rules/staticdata_creation.smk
+++ b/stemcnv_check/rules/staticdata_creation.smk
@@ -5,26 +5,28 @@ from pathlib import Path
 import tempfile
 from stemcnv_check import STEM_CNV_CHECK
 
-DOWNLOAD_DIR = config['TMPDIR'] if 'TMPDIR' in config else tempfile.mkdtemp()
-GENOME = config['genome']
+DOWNLOAD_DIR = config["TMPDIR"] if "TMPDIR" in config else tempfile.mkdtemp()
+GENOME = config["genome"]
 
 # ================================================================
 
+
 def fix_container_path(path_in, bound_to):
     path_in = Path(path_in)
 
-    if bound_to == 'static':
+    if bound_to == "static":
         rel_path = path_in.name
     else:
         local_target = {
-            'snakedir': Path(importlib.resources.files(STEM_CNV_CHECK)),
-            'tmp': Path(DOWNLOAD_DIR)
+            "snakedir": Path(importlib.resources.files(STEM_CNV_CHECK)),
+            "tmp": Path(DOWNLOAD_DIR),
         }[bound_to].absolute()
         rel_path = path_in.absolute().relative_to(local_target)
 
-    return Path('/outside/') / bound_to / rel_path
+    return Path("/outside/") / bound_to / rel_path
+
 
-# 
+#
 # rule all:
 #     input:
 #         config['genomeInfo_file'],
@@ -35,14 +37,17 @@ def fix_container_path(path_in, bound_to):
 #         config['genome_gtf_file']
 
 
-#Note: PennCNV does not seem to work with UCSC chromosome style in PFB file
+# Note: PennCNV does not seem to work with UCSC chromosome style in PFB file
 rule create_pfb_from_vcf:
-    input: config['vcf_input_file']
-    output: config['penncnv_pfb_file']
+    input:
+        config["vcf_input_file"],
+    output:
+        config["penncnv_pfb_file"],
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","general-R.yaml")
-    #shell: "Rscript {SNAKEDIR}/scripts/make_PFB_from_vcf.R {input} {output}"
-    shell: """
+        "../envs/general-R.yaml"
+    # shell: "Rscript {SNAKEDIR}/scripts/make_PFB_from_vcf.R {input} {output}"
+    shell:
+        """
 Rscript - << 'EOF'
 suppressMessages(library(tidyverse))
 suppressMessages(library(vcfR))
@@ -79,14 +84,17 @@ vcf.info %>%
 EOF
 """
 
-#FUTURE:
-#also get the gc5base.bw file from UCSCand make the GC model from that?
+
+# FUTURE:
+# also get the gc5base.bw file from UCSCand make the GC model from that?
 # -> PennCNV comes with a wig2gc5base python script (though that has a hard coded 'source' file in it?
 rule create_gcmodel_file:
-    input: config['penncnv_pfb_file']
-    output: config['penncnv_GCmodel_file']
+    input:
+        config["penncnv_pfb_file"],
+    output:
+        config["penncnv_GCmodel_file"],
     params:
-        download_path = fix_container_path(DOWNLOAD_DIR, 'tmp')
+        download_path=fix_container_path(DOWNLOAD_DIR, "tmp"),
     container:
         "docker://genomicslab/penncnv"
     shell:
@@ -99,19 +107,20 @@ rule create_gcmodel_file:
         /home/user/PennCNV/cal_gc_snp.pl {params.download_path}/{GENOME}.gc5Base.txt {input} -out {output}
         """
 
+
 rule create_array_info_file:
     input:
-        pfb = config['penncnv_pfb_file'],
-        chromInfo = config['genomeInfo_file']
+        pfb=config["penncnv_pfb_file"],
+        chromInfo=config["genomeInfo_file"],
     output:
-        density = config['array_density_file'],
-        gaps = config['array_gaps_file']
+        density=config["array_density_file"],
+        gaps=config["array_gaps_file"],
     params:
-        min_gap_size = config['min_gap_size'],
-        density_windows = config['density_windows'],
-        genome = config['genome']
+        min_gap_size=config["min_gap_size"],
+        density_windows=config["density_windows"],
+        genome=config["genome"],
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","general-R.yaml")
+        "../envs/general-R.yaml"
     shell:
         """
 Rscript - << 'EOF'
@@ -163,18 +172,25 @@ write_bed(density, '{output.density}')
 EOF
 """
 
+
 rule gencode_v45_gtf_download:
-    output: config['genome_gtf_file']
+    output:
+        config["genome_gtf_file"],
     # Source gtf GRCh38:
     params:
-        ftp_base = "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_45/",
-        release_path = "gencode.v45.basic.annotation.gtf.gz" if GENOME == "hg38" else "GRCh37_mapping/gencode.v45lift37.basic.annotation.gtf.gz"
+        ftp_base="https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_45/",
+        release_path=(
+            "gencode.v45.basic.annotation.gtf.gz"
+            if GENOME == "hg38"
+            else "GRCh37_mapping/gencode.v45lift37.basic.annotation.gtf.gz"
+        ),
     shell:
         """
         wget {params.ftp_base}/{params.release_path} -O {output} 2> /dev/null
         # gunzip {output}.gz
         """
 
+
 # #FIXME: the gencode fa.gz files are gzip not bgzip compressed
 # rule gencode_v45_genomeFasta_download:
 #     output: config['genome_fasta_file']
@@ -187,10 +203,12 @@ rule gencode_v45_gtf_download:
 #         wget {params.ftp_base}/{params.release_path} -O {output} 2> /dev/null
 #         """
 
+
 rule ucsc_goldenpath_download:
-    output: temp("{DOWNLOAD_DIR}/{genome}.{filename}.txt")
+    output:
+        temp("{DOWNLOAD_DIR}/{genome}.{filename}.txt"),
     wildcard_constraints:
-        genome = "hg19|hg38"
+        genome="hg19|hg38",
     shell:
         """
         wget 'https://hgdownload.cse.ucsc.edu/goldenPath/{wildcards.genome}/database/{wildcards.filename}.txt.gz' -O {output}.gz 2> /dev/null
@@ -200,14 +218,15 @@ rule ucsc_goldenpath_download:
 
 rule create_genome_info_file:
     input:
-        cytobands = ancient(os.path.join(DOWNLOAD_DIR, f"{GENOME}.cytoBand.txt")),
+        cytobands=ancient(os.path.join(DOWNLOAD_DIR, f"{GENOME}.cytoBand.txt")),
         #cytoBandIdeo.txt has info for non-assmebled chromosomes
         #centromer = os.path.join(DOWNLOAD_DIR, f"{GENOME}.centromeres.txt"), #Only exists for hg38
-        chrominfo = ancient(os.path.join(DOWNLOAD_DIR, f"{GENOME}.chromInfo.txt"))
+        chrominfo=ancient(os.path.join(DOWNLOAD_DIR, f"{GENOME}.chromInfo.txt")),
         #Note: the chromAlias.txt file might be useful if people use strange chr-/seqnames
-    output: config['genomeInfo_file']
+    output:
+        config["genomeInfo_file"],
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","general-R.yaml")
+        "../envs/general-R.yaml"
     shell:
         """
 Rscript - << 'EOF'
@@ -239,25 +258,38 @@ write_tsv(chrominfo, "{output}")
 EOF
         """
 
+
 rule download_vep_fasta:
     output:
-        os.path.join(config['vep_fasta_path'], 'homo_sapiens', '112_{genome}', 'Homo_sapiens.{genome}.dna.toplevel.fa.gz')
+        os.path.join(
+            config["vep_fasta_path"],
+            "homo_sapiens",
+            "112_{genome}",
+            "Homo_sapiens.{genome}.dna.toplevel.fa.gz",
+        ),
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","vep-annotation.yaml")
+        importlib.resources.files(STEM_CNV_CHECK).joinpath(
+            "envs", "vep-annotation.yaml"
+        )
     params:
-        fasta_path = config['vep_fasta_path']
+        fasta_path=config["vep_fasta_path"],
     shell:
         "vep_install -a f -s homo_sapiens -y {wildcards.genome} -c {params.fasta_path}"
 
+
 rule download_vep_cache:
     output:
-        done = os.path.join(config['vep_cache_path'], '.{genome}.done'),
-        folder = directory(os.path.join(config['vep_cache_path'], 'homo_sapiens', '112_{genome}'))
+        done=os.path.join(config["vep_cache_path"], ".{genome}.done"),
+        folder=directory(
+            os.path.join(config["vep_cache_path"], "homo_sapiens", "112_{genome}")
+        ),
     conda:
-        importlib.resources.files(STEM_CNV_CHECK).joinpath("envs","vep-annotation.yaml")
+        importlib.resources.files(STEM_CNV_CHECK).joinpath(
+            "envs", "vep-annotation.yaml"
+        )
     params:
-        cache_path = config['vep_cache_path']
+        cache_path=config["vep_cache_path"],
     shell:
         # -r {params.cache_path}/plugins; check if needed or if it follows -c
         # -a f > automatically get ensembl genome fasta; can replace fasta & gtf
-        "vep_install -a cp -g DosageSensitivity -s homo_sapiens -y {wildcards.genome} -c {params.cache_path} --CONVERT && touch {output.done}"
\ No newline at end of file
+        "vep_install -a cp -g DosageSensitivity -s homo_sapiens -y {wildcards.genome} -c {params.cache_path} --CONVERT && touch {output.done}"