-
Notifications
You must be signed in to change notification settings - Fork 0
/
config_v2.yaml
46 lines (37 loc) · 2.93 KB
/
config_v2.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
---
projectTitle: UKBB_Trait_Analysis
# htmlOutputPath: /s/public_webshare/project/UKBB_Splicing_Analysis/html
gtf_file: "/s/genomes/Gencode/Gencode_human/release_34/gencode.v34.annotation.gtf.gz"
fasta_file: "/s/genomes/Gencode/Gencode_human/release_34/GRCh38.primary_assembly.genome.fa"
protein_coding_genes_csv: "/s/project/rep/processed/trait_associations_v2/protein_coding_genes.hg38.csv"
protein_coding_genes_pq: "/s/project/rep/processed/trait_associations_v2/protein_coding_genes.hg38.parquet"
ukbb_raw_phenotypes_dir: "/s/raw/ukbiobank/phenotypes"
ukbb_processed_phenotypes_dir: "/s/project/uk_biobank/processed/phenotypes"
ukbb_decoded_phenotypes_dir: "/s/project/uk_biobank/processed/decoded_phenotypes"
# input
# "gs://ukbb-exome-public/300k/results/results.mt"
genebass_results_mt: "/s/project/rep/processed/genebass/results.mt"
# output
genebass_results_pq: "/s/project/rep/processed/genebass/results.parquet"
genebass_results_filtered_pq: "/s/project/rep/processed/genebass/filtered.parquet"
genebass_filtered_phenotypes_pq: "/s/project/rep/processed/genebass/filtered_phenotypes.parquet"
genebass_bonferroni_cutoff: 0.01
# config_dir will be copied to output_basedir
config_dir: "config/ukbb_wes_200k"
output_basedir: "/s/project/rep/processed/trait_associations_v2/ukbb_wes_200k"
trait_associations: "/s/project/rep/processed/trait_associations_v2/ukbb_wes_200k/associate/{phenotype_col}"
trait_association_comparison: "/s/project/rep/processed/trait_associations_v2/ukbb_wes_200k/compare_associations"
feature_sets_dir: "/s/project/rep/processed/trait_associations_v2/ukbb_wes_200k/feature_sets"
samples: "/s/project/rep/processed/trait_associations_v2/ukbb_wes_200k/samples.txt"
PRS_score_mapping: "/s/project/rep/processed/trait_associations_v2/ukbb_wes_200k/PRS_score_mapping.csv"
# feature sets
# abexp_predictions: "/s/project/rep/processed/training_results_v10/ukbb_wes_200k/predict_all/fset=DNA_only_nosplice_tl010/gtex_v8_old_dna/dna_only/DNA_only_nosplice_tl010@train_simplecv.py#lightgbm/data.parquet"
abexp_predictions: "/s/project/rep/processed/training_results_v10/ukbb_wes_200k/predict_all/fset=DNA_only_splice_agg/gtex_v8_old_dna/dna_only/DNA_only_splice@train_simplecv.py#lightgbm/data.parquet"
vep_predictions: "/s/project/rep/processed/training_results_v10/ukbb_wes_200k/feature_sets/aggregate_vep_spark.py@gt=any_tl=CANONICAL/data.parquet"
vep_variants: "/s/project/rep/processed/training_results_v10/ukbb_wes_200k/veff/tissue_specific_vep.py@tl=CANONICAL/veff.parquet"
# clumping
# mac_index_variants: "/s/project/uk_biobank/processed/clumping/30780/WES_200K_index_vars_mac_30780.parquet.p"
# clumping_gtf_file: "/s/genomes/Gencode/Gencode_human/release_34/gencode.v34.annotation.gtf.gz"
clumping_gtf_file: "/s/genomes/Gencode/Gencode_human/release_34/GRCh37_mapping/gencode.v34lift37.annotation.gtf.gz"
# misc
num_samples_per_tissue: '/s/project/rep/processed/training_results_v10/gtex_v8_old_dna/plots/nr_samples_per_tissue.py/samples_per_tissue.parquet'