From 462188264fff8efccb54143577acec5e8b3b104f Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Fri, 4 Oct 2024 10:13:56 -0700 Subject: [PATCH] Added tutorials. Fixed single-species model loading in gradient scripts. --- src/scripts/borzoi_satg_gene.py | 14 +- .../borzoi_satg_gene_crispr_ism_shuffle.py | 14 +- src/scripts/borzoi_satg_gene_focused_ism.py | 14 +- src/scripts/borzoi_satg_polya.py | 14 +- src/scripts/borzoi_satg_splice.py | 14 +- .../interpret_sequence/HBE1_example.gtf | 39 +++ tutorials/latest/interpret_sequence/README.md | 3 + .../explore_grads_k562_HBE1.ipynb | 276 +++++++++++++++ .../run_gradients_expr_HBE1.sh | 3 + .../latest/interpret_sequence/vis_helpers.py | 153 ++++++++ tutorials/latest/make_data/Makefile | 45 +++ tutorials/latest/make_data/README.md | 3 + tutorials/latest/make_data/download_bw.sh | 41 +++ .../latest/make_data/download_dependencies.sh | 97 ++++++ tutorials/latest/make_data/process_w5.sh | 65 ++++ tutorials/latest/make_data/targets_human.txt | 3 + tutorials/latest/score_variants/README.md | 3 + .../score_variants/run_variant_scripts.ipynb | 169 +++++++++ .../latest/score_variants/score_expr_sad.sh | 5 + .../latest/score_variants/score_expr_sed.sh | 5 + .../latest/score_variants/score_polya.sh | 5 + .../latest/score_variants/score_splice.sh | 5 + tutorials/latest/score_variants/snps_expr.vcf | 6 + .../latest/score_variants/snps_polya.vcf | 10 + .../latest/score_variants/snps_splice.vcf | 10 + tutorials/latest/train_model/README.md | 3 + .../latest/train_model/params_micro.json | 74 ++++ tutorials/latest/train_model/params_mini.json | 73 ++++ tutorials/latest/train_model/train_micro.sh | 3 + tutorials/latest/train_model/train_mini.sh | 3 + tutorials/legacy/interpret_sequence/README.md | 3 + .../explore_grads_liver_CFHR2.ipynb | 328 ++++++++++++++++++ .../explore_polya_grads_CD99.ipynb | 180 ++++++++++ .../explore_splice_grads_GCFC2.ipynb | 180 ++++++++++ .../run_gradients_expr_CFHR2.sh | 3 + .../run_gradients_polya_CD99.sh | 3 + .../run_gradients_splice_GCFC2.sh | 3 + .../legacy/interpret_sequence/vis_helpers.py | 153 ++++++++ tutorials/legacy/make_data/Makefile | 45 +++ tutorials/legacy/make_data/README.md | 3 + tutorials/legacy/make_data/download_bw.sh | 41 +++ .../legacy/make_data/download_dependencies.sh | 97 ++++++ tutorials/legacy/make_data/process_w5.sh | 65 ++++ tutorials/legacy/make_data/targets_human.txt | 3 + tutorials/legacy/score_variants/README.md | 3 + .../score_variants/run_variant_scripts.ipynb | 201 +++++++++++ .../legacy/score_variants/score_expr_sad.sh | 5 + .../legacy/score_variants/score_expr_sed.sh | 5 + .../legacy/score_variants/score_polya.sh | 5 + .../legacy/score_variants/score_splice.sh | 5 + tutorials/legacy/score_variants/snps_expr.vcf | 6 + .../legacy/score_variants/snps_polya.vcf | 10 + .../legacy/score_variants/snps_splice.vcf | 10 + tutorials/legacy/train_model/README.md | 3 + .../legacy/train_model/params_micro.json | 78 +++++ tutorials/legacy/train_model/params_mini.json | 77 ++++ tutorials/legacy/train_model/train_micro.sh | 3 + tutorials/legacy/train_model/train_mini.sh | 3 + 58 files changed, 2683 insertions(+), 10 deletions(-) create mode 100644 tutorials/latest/interpret_sequence/HBE1_example.gtf create mode 100644 tutorials/latest/interpret_sequence/README.md create mode 100644 tutorials/latest/interpret_sequence/explore_grads_k562_HBE1.ipynb create mode 100755 tutorials/latest/interpret_sequence/run_gradients_expr_HBE1.sh create mode 100644 tutorials/latest/interpret_sequence/vis_helpers.py create mode 100644 tutorials/latest/make_data/Makefile create mode 100644 tutorials/latest/make_data/README.md create mode 100755 tutorials/latest/make_data/download_bw.sh create mode 100755 tutorials/latest/make_data/download_dependencies.sh create mode 100755 tutorials/latest/make_data/process_w5.sh create mode 100644 tutorials/latest/make_data/targets_human.txt create mode 100644 tutorials/latest/score_variants/README.md create mode 100644 tutorials/latest/score_variants/run_variant_scripts.ipynb create mode 100644 tutorials/latest/score_variants/score_expr_sad.sh create mode 100755 tutorials/latest/score_variants/score_expr_sed.sh create mode 100644 tutorials/latest/score_variants/score_polya.sh create mode 100644 tutorials/latest/score_variants/score_splice.sh create mode 100644 tutorials/latest/score_variants/snps_expr.vcf create mode 100644 tutorials/latest/score_variants/snps_polya.vcf create mode 100644 tutorials/latest/score_variants/snps_splice.vcf create mode 100644 tutorials/latest/train_model/README.md create mode 100644 tutorials/latest/train_model/params_micro.json create mode 100644 tutorials/latest/train_model/params_mini.json create mode 100755 tutorials/latest/train_model/train_micro.sh create mode 100755 tutorials/latest/train_model/train_mini.sh create mode 100644 tutorials/legacy/interpret_sequence/README.md create mode 100644 tutorials/legacy/interpret_sequence/explore_grads_liver_CFHR2.ipynb create mode 100644 tutorials/legacy/interpret_sequence/explore_polya_grads_CD99.ipynb create mode 100644 tutorials/legacy/interpret_sequence/explore_splice_grads_GCFC2.ipynb create mode 100755 tutorials/legacy/interpret_sequence/run_gradients_expr_CFHR2.sh create mode 100755 tutorials/legacy/interpret_sequence/run_gradients_polya_CD99.sh create mode 100755 tutorials/legacy/interpret_sequence/run_gradients_splice_GCFC2.sh create mode 100644 tutorials/legacy/interpret_sequence/vis_helpers.py create mode 100644 tutorials/legacy/make_data/Makefile create mode 100644 tutorials/legacy/make_data/README.md create mode 100755 tutorials/legacy/make_data/download_bw.sh create mode 100755 tutorials/legacy/make_data/download_dependencies.sh create mode 100755 tutorials/legacy/make_data/process_w5.sh create mode 100644 tutorials/legacy/make_data/targets_human.txt create mode 100644 tutorials/legacy/score_variants/README.md create mode 100644 tutorials/legacy/score_variants/run_variant_scripts.ipynb create mode 100755 tutorials/legacy/score_variants/score_expr_sad.sh create mode 100755 tutorials/legacy/score_variants/score_expr_sed.sh create mode 100755 tutorials/legacy/score_variants/score_polya.sh create mode 100755 tutorials/legacy/score_variants/score_splice.sh create mode 100644 tutorials/legacy/score_variants/snps_expr.vcf create mode 100644 tutorials/legacy/score_variants/snps_polya.vcf create mode 100644 tutorials/legacy/score_variants/snps_splice.vcf create mode 100644 tutorials/legacy/train_model/README.md create mode 100644 tutorials/legacy/train_model/params_micro.json create mode 100644 tutorials/legacy/train_model/params_mini.json create mode 100755 tutorials/legacy/train_model/train_micro.sh create mode 100755 tutorials/legacy/train_model/train_mini.sh diff --git a/src/scripts/borzoi_satg_gene.py b/src/scripts/borzoi_satg_gene.py index 1c96712..9429498 100755 --- a/src/scripts/borzoi_satg_gene.py +++ b/src/scripts/borzoi_satg_gene.py @@ -229,8 +229,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -308,8 +313,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py b/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py index b3fd477..0db478d 100755 --- a/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py +++ b/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py @@ -252,8 +252,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -376,8 +381,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/src/scripts/borzoi_satg_gene_focused_ism.py b/src/scripts/borzoi_satg_gene_focused_ism.py index f095be8..5ee58ca 100755 --- a/src/scripts/borzoi_satg_gene_focused_ism.py +++ b/src/scripts/borzoi_satg_gene_focused_ism.py @@ -267,8 +267,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -514,8 +519,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/src/scripts/borzoi_satg_polya.py b/src/scripts/borzoi_satg_polya.py index 9f26eba..98206a1 100755 --- a/src/scripts/borzoi_satg_polya.py +++ b/src/scripts/borzoi_satg_polya.py @@ -180,8 +180,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -309,8 +314,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/src/scripts/borzoi_satg_splice.py b/src/scripts/borzoi_satg_splice.py index 473192f..24648ce 100755 --- a/src/scripts/borzoi_satg_splice.py +++ b/src/scripts/borzoi_satg_splice.py @@ -181,8 +181,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -281,8 +286,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/tutorials/latest/interpret_sequence/HBE1_example.gtf b/tutorials/latest/interpret_sequence/HBE1_example.gtf new file mode 100644 index 0000000..6e39119 --- /dev/null +++ b/tutorials/latest/interpret_sequence/HBE1_example.gtf @@ -0,0 +1,39 @@ +chr11 HAVANA transcript 5268345 5269945 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA exon 5269799 5269945 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 1; exon_id "ENSE00003817775.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA CDS 5269799 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 1; exon_id "ENSE00003817775.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA start_codon 5269888 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 1; exon_id "ENSE00003817775.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA exon 5269454 5269676 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 2; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA CDS 5269454 5269676 . - 1 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 2; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA exon 5268345 5268597 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 3; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA CDS 5268472 5268597 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 3; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA stop_codon 5268469 5268471 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 3; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA UTR 5269891 5269945 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 1; exon_id "ENSE00003817775.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA UTR 5268345 5268471 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 3; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA transcript 5268345 5505604 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5505569 5505604 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 1; exon_id "ENSE00001484269.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5281909 5281951 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 2; exon_id "ENSE00001484268.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5269799 5270156 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 3; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA CDS 5269799 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 3; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA start_codon 5269888 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 3; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5269454 5269676 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 4; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA CDS 5269454 5269676 . - 1 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 4; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5268345 5268597 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 5; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA CDS 5268472 5268597 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 5; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA stop_codon 5268469 5268471 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 5; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA UTR 5505569 5505604 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 1; exon_id "ENSE00001484269.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA UTR 5281909 5281951 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 2; exon_id "ENSE00001484268.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA UTR 5269891 5270156 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 3; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA UTR 5268345 5268471 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 5; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA transcript 5268345 5505652 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA exon 5505569 5505652 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 1; exon_id "ENSE00001526635.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA exon 5269799 5270156 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 2; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA CDS 5269799 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 2; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA start_codon 5269888 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 2; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA exon 5269454 5269676 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 3; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA CDS 5269454 5269676 . - 1 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 3; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA exon 5268345 5268597 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 4; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA CDS 5268472 5268597 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 4; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA stop_codon 5268469 5268471 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 4; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA UTR 5505569 5505652 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 1; exon_id "ENSE00001526635.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA UTR 5269891 5270156 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 2; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA UTR 5268345 5268471 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 4; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; diff --git a/tutorials/latest/interpret_sequence/README.md b/tutorials/latest/interpret_sequence/README.md new file mode 100644 index 0000000..1ac18dd --- /dev/null +++ b/tutorials/latest/interpret_sequence/README.md @@ -0,0 +1,3 @@ +## Interpretation + +Todo. diff --git a/tutorials/latest/interpret_sequence/explore_grads_k562_HBE1.ipynb b/tutorials/latest/interpret_sequence/explore_grads_k562_HBE1.ipynb new file mode 100644 index 0000000..dc044d5 --- /dev/null +++ b/tutorials/latest/interpret_sequence/explore_grads_k562_HBE1.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7030e9ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import h5py\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.ndimage import gaussian_filter1d\n", + "\n", + "from vis_helpers import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3bcaea3d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores_hyp.shape = (1, 1, 393216, 4)\n", + "scores.shape = (1, 1, 393216, 4)\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Load scores for the selected set of targets (grad)\n", + "\n", + "import gc\n", + "\n", + "seqs = None\n", + "strands = None\n", + "chrs = None\n", + "starts = None\n", + "ends = None\n", + "genes = None\n", + "\n", + "all_scores_hyp = []\n", + "all_scores = []\n", + "\n", + "gtex_tissues = ['liver']\n", + "\n", + "#Load score file\n", + "score_file = h5py.File('k562_HBE1/scores_f0c0.h5', 'r')\n", + "\n", + "#Get scores and onehots\n", + "scores = score_file['grads'][()][..., 0]\n", + "seqs = score_file['seqs'][()]\n", + "\n", + "#Get auxiliary information\n", + "strands = score_file['strand'][()]\n", + "strands = np.array([strands[j].decode() for j in range(strands.shape[0])])\n", + "\n", + "chrs = score_file['chr'][()]\n", + "chrs = np.array([chrs[j].decode() for j in range(chrs.shape[0])])\n", + "\n", + "starts = np.array(score_file['start'][()])\n", + "ends = np.array(score_file['end'][()])\n", + "\n", + "genes = score_file['gene'][()]\n", + "genes = np.array([genes[j].decode().split(\".\")[0] for j in range(genes.shape[0])])\n", + "\n", + "#Append hypothetical scores\n", + "all_scores_hyp.append(scores[None, ...])\n", + "\n", + "#Append input-gated scores\n", + "all_scores.append((scores * seqs)[None, ...])\n", + "\n", + "#Collect garbage\n", + "gc.collect()\n", + "\n", + "#Collect final scores\n", + "scores_hyp = np.concatenate(all_scores_hyp, axis=0)\n", + "scores = np.concatenate(all_scores, axis=0)\n", + "\n", + "print(\"scores_hyp.shape = \" + str(scores_hyp.shape))\n", + "print(\"scores.shape = \" + str(scores.shape))\n", + "\n", + "score_file = None\n", + "\n", + "#Collect garbage\n", + "gc.collect()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "955bf762", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "#Enumerate and visualize attributions; k562 example HBE1\n", + "\n", + "save_index = []\n", + "\n", + "#Visualization parameters\n", + "logo_width = 192\n", + "\n", + "top_n = 1\n", + "\n", + "use_gaussian = True\n", + "min_padding = 65536\n", + "gaussian_sigma = 8\n", + "local_window = 1024\n", + "\n", + "main_tissue_ix = 0\n", + "\n", + "tissue_colors = ['darkblue']\n", + "\n", + "#Loop over examples\n", + "for example_ix in range(top_n) :\n", + " \n", + " print(\"-- Example = \" + str(example_ix)+ \" --\")\n", + " \n", + " print(\" - \" + genes[example_ix] + \"(\" + str(strands[example_ix]) + \")\")\n", + " print(\" - \" + chrs[example_ix] + \":\" + str(starts[example_ix]) + \"-\" + str(ends[example_ix]))\n", + "\n", + " #Grad analysis\n", + " \n", + " #Calculate min and max scores globally (for scales)\n", + " min_val = np.min(scores[:, example_ix, ...])\n", + " max_val = np.max(scores[:, example_ix, ...])\n", + " \n", + " print(\" -- min_val = \" + str(round(min_val, 4)))\n", + " print(\" -- max_val = \" + str(round(max_val, 4)))\n", + " \n", + " max_abs_val = max(np.abs(min_val), np.abs(max_val))\n", + "\n", + " min_val -= 0.1 * max_abs_val\n", + " max_val += 0.1 * max_abs_val\n", + "\n", + " print(\" - (Gradient score profiles per tissue) - \")\n", + " \n", + " #Gradient profiles across input sequence\n", + " f, ax = plt.subplots(len(gtex_tissues), 1, figsize=(8, len(gtex_tissues) * 1.5))\n", + " \n", + " if len(gtex_tissues) == 1 :\n", + " ax = [ax]\n", + "\n", + " #Loop over tissues\n", + " for tissue_ix in range(len(gtex_tissues)) :\n", + "\n", + " #Get tissue scores\n", + " score = scores[tissue_ix, example_ix, ...]\n", + "\n", + " l1 = ax[tissue_ix].plot(np.arange(seqs.shape[1]), np.sum(score, axis=-1), linewidth=1, linestyle='-', color=tissue_colors[tissue_ix], label=gtex_tissues[tissue_ix])\n", + " \n", + " plt.sca(ax[tissue_ix])\n", + " \n", + " plt.xlim(0, seqs.shape[1])\n", + " plt.ylim(min_val, max_val)\n", + " \n", + " plt.legend(handles=[l1[0]], fontsize=8)\n", + " \n", + " plt.yticks([], [])\n", + " plt.xticks([], [])\n", + " \n", + " plt.sca(ax[0])\n", + " plt.title(\"Gradient Saliency for gene = '\" + genes[example_ix] + \"' (\" + str(strands[example_ix]) + \")\", fontsize=8)\n", + " \n", + " plt.sca(ax[len(gtex_tissues)-1])\n", + " plt.xlabel(chrs[example_ix] + \":\" + str(starts[example_ix]) + \"-\" + str(ends[example_ix]), fontsize=8)\n", + " \n", + " plt.sca(plt.gca())\n", + " plt.tight_layout()\n", + " \n", + " plt.show()\n", + "\n", + " #Apply gaussian filter\n", + " smooth_score = np.sum(scores[main_tissue_ix, example_ix, ...], axis=-1)\n", + " if use_gaussian :\n", + " smooth_score = gaussian_filter1d(smooth_score.astype('float32'), sigma=gaussian_sigma, truncate=2).astype('float16')\n", + " \n", + " #Calculate min/max positions and (differential) values\n", + " #max_pos = np.argmax(smooth_score[min_padding:-min_padding]) + min_padding\n", + " \n", + " max_pos = np.argmax(smooth_score[min_padding:-min_padding]) + min_padding\n", + "\n", + " print(\" - (Attribution at position of Max positive differential saliency) -\")\n", + "\n", + " print(\" - max_pos (rel) = \" + str(max_pos))\n", + " print(\" - max_pos (abs) = \" + str(starts[example_ix] + max_pos))\n", + " \n", + " #Visualize contribution scores\n", + " plot_start = max_pos - logo_width // 2\n", + " plot_end = max_pos + logo_width // 2\n", + " \n", + " print(\" - \" + chrs[example_ix] + \":\" + str(starts[example_ix] + max_pos - logo_width // 2) + \"-\" + str(starts[example_ix] + max_pos + logo_width // 2))\n", + "\n", + " #Logo min/max value across tissues\n", + " min_logo_val = np.min(scores[:, example_ix, plot_start:plot_end, :])\n", + " max_logo_val = np.max(scores[:, example_ix, plot_start:plot_end, :])\n", + "\n", + " max_abs_logo_val = max(np.abs(min_logo_val), np.abs(max_logo_val))\n", + "\n", + " min_logo_val -= 0.02 * max_abs_logo_val\n", + " max_logo_val += 0.02 * max_abs_logo_val\n", + "\n", + " print(\" - y_min = \" + str(round(min_logo_val, 8)))\n", + " print(\" - y_max = \" + str(round(max_logo_val, 8)))\n", + "\n", + " #Loop over tissues\n", + " for tissue_ix in range(len(gtex_tissues)) :\n", + " print(gtex_tissues[tissue_ix])\n", + "\n", + " #Get tissue-specific scores\n", + " score = scores[tissue_ix, example_ix, plot_start:plot_end, :]\n", + "\n", + " #Plot scores as sequence logo\n", + " plot_seq_scores(\n", + " score,\n", + " y_min=min_logo_val,\n", + " y_max=max_logo_val,\n", + " figsize=(8, 1),\n", + " plot_y_ticks=False,\n", + " )\n", + " \n", + " print(\"--------------------\")\n", + " print(\"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67a3cf9d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/latest/interpret_sequence/run_gradients_expr_HBE1.sh b/tutorials/latest/interpret_sequence/run_gradients_expr_HBE1.sh new file mode 100755 index 0000000..987a843 --- /dev/null +++ b/tutorials/latest/interpret_sequence/run_gradients_expr_HBE1.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +borzoi_satg_gene.py -o k562_HBE1 -f 0 -c 0 --rc --track_scale 0.3 --track_transform 0.5 --clip_soft 384.0 -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models HBE1_example.gtf diff --git a/tutorials/latest/interpret_sequence/vis_helpers.py b/tutorials/latest/interpret_sequence/vis_helpers.py new file mode 100644 index 0000000..00b92ef --- /dev/null +++ b/tutorials/latest/interpret_sequence/vis_helpers.py @@ -0,0 +1,153 @@ +import sys +import os +import numpy as np + +import matplotlib.pyplot as plt + +import matplotlib.cm as cm +import matplotlib.colors as colors + +import matplotlib as mpl +from matplotlib.text import TextPath +from matplotlib.patches import PathPatch, Rectangle +from matplotlib.font_manager import FontProperties +from matplotlib import gridspec +from matplotlib.ticker import FormatStrFormatter + +#Helper function to draw a letter at a given position +def dna_letter_at(letter, x, y, yscale=1, ax=None, color=None, alpha=1.0): + + fp = FontProperties(family="DejaVu Sans", weight="bold") + globscale = 1.35 + LETTERS = { "T" : TextPath((-0.305, 0), "T", size=1, prop=fp), + "G" : TextPath((-0.384, 0), "G", size=1, prop=fp), + "A" : TextPath((-0.35, 0), "A", size=1, prop=fp), + "C" : TextPath((-0.366, 0), "C", size=1, prop=fp), + "UP" : TextPath((-0.488, 0), '$\\Uparrow$', size=1, prop=fp), + "DN" : TextPath((-0.488, 0), '$\\Downarrow$', size=1, prop=fp), + "(" : TextPath((-0.25, 0), "(", size=1, prop=fp), + "." : TextPath((-0.125, 0), "-", size=1, prop=fp), + ")" : TextPath((-0.1, 0), ")", size=1, prop=fp)} + COLOR_SCHEME = {'G': 'orange',#'orange', + 'A': 'green',#'red', + 'C': 'blue',#'blue', + 'T': 'red',#'darkgreen', + 'UP': 'green', + 'DN': 'red', + '(': 'black', + '.': 'black', + ')': 'black'} + + + text = LETTERS[letter] + + chosen_color = COLOR_SCHEME[letter] + if color is not None : + chosen_color = color + + t = mpl.transforms.Affine2D().scale(1*globscale, yscale*globscale) + \ + mpl.transforms.Affine2D().translate(x,y) + ax.transData + p = PathPatch(text, lw=0, fc=chosen_color, alpha=alpha, transform=t) + if ax != None: + ax.add_artist(p) + return p + +#Function to plot sequence logo +def plot_seq_scores(importance_scores, figsize=(16, 2), plot_y_ticks=True, y_min=None, y_max=None, save_figs=False, fig_name="default") : + + importance_scores = importance_scores.T + + fig = plt.figure(figsize=figsize) + + ref_seq = "" + for j in range(importance_scores.shape[1]) : + argmax_nt = np.argmax(np.abs(importance_scores[:, j])) + + if argmax_nt == 0 : + ref_seq += "A" + elif argmax_nt == 1 : + ref_seq += "C" + elif argmax_nt == 2 : + ref_seq += "G" + elif argmax_nt == 3 : + ref_seq += "T" + + ax = plt.gca() + + for i in range(0, len(ref_seq)) : + mutability_score = np.sum(importance_scores[:, i]) + color = None + dna_letter_at(ref_seq[i], i + 0.5, 0, mutability_score, ax, color=color) + + plt.sca(ax) + plt.xticks([], []) + plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%.3f')) + + plt.xlim((0, len(ref_seq))) + + #plt.axis('off') + + if plot_y_ticks : + plt.yticks(fontsize=12) + else : + plt.yticks([], []) + + if y_min is not None and y_max is not None : + plt.ylim(y_min, y_max) + elif y_min is not None : + plt.ylim(y_min) + else : + plt.ylim( + np.min(importance_scores) - 0.1 * np.max(np.abs(importance_scores)), + np.max(importance_scores) + 0.1 * np.max(np.abs(importance_scores)) + ) + + plt.axhline(y=0., color='black', linestyle='-', linewidth=1) + + #for axis in fig.axes : + # axis.get_xaxis().set_visible(False) + # axis.get_yaxis().set_visible(False) + + plt.tight_layout() + + if save_figs : + plt.savefig(fig_name + ".png", transparent=True, dpi=300) + plt.savefig(fig_name + ".eps") + + plt.show() + +#Function to visualize a pair of sequence logos +def visualize_input_gradient_pair(att_grad_wt, att_grad_mut, plot_start=0, plot_end=100, save_figs=False, fig_name='') : + + scores_wt = att_grad_wt[plot_start:plot_end, :] + scores_mut = att_grad_mut[plot_start:plot_end, :] + + y_min = min(np.min(scores_wt), np.min(scores_mut)) + y_max = max(np.max(scores_wt), np.max(scores_mut)) + + y_max_abs = max(np.abs(y_min), np.abs(y_max)) + + y_min = y_min - 0.05 * y_max_abs + y_max = y_max + 0.05 * y_max_abs + + if np.sum(scores_mut) != 0. : + print("--- WT ---") + + plot_seq_scores( + scores_wt, y_min=y_min, y_max=y_max, + figsize=(8, 1), + plot_y_ticks=False, + save_figs=save_figs, + fig_name=fig_name + '_wt', + ) + + if np.sum(scores_mut) != 0. : + + print("--- Mut ---") + plot_seq_scores( + scores_mut, y_min=y_min, y_max=y_max, + figsize=(8, 1), + plot_y_ticks=False, + save_figs=save_figs, + fig_name=fig_name + '_mut', + ) diff --git a/tutorials/latest/make_data/Makefile b/tutorials/latest/make_data/Makefile new file mode 100644 index 0000000..c47bb3d --- /dev/null +++ b/tutorials/latest/make_data/Makefile @@ -0,0 +1,45 @@ +FASTA_HUMAN=$$BORZOI_HG38/assembly/gnomad/hg38.ml.fa +GAPS_HUMAN=$$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed +UMAP_HUMAN=$$BORZOI_HG38/mappability/umap_k36_t10_l32.bed +BLACK_HUMAN=$$BORZOI_HG38/blacklist/blacklist_hg38_all.bed + +FASTA_MOUSE=$$BORZOI_MM10/assembly/ucsc/mm10.ml.fa +GAPS_MOUSE=$$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed +UMAP_MOUSE=$$BORZOI_MM10/mappability/umap_k36_t10_l32.bed +BLACK_MOUSE=$$BORZOI_MM10/blacklist/blacklist_mm10_all.bed + +ALIGN=$$BORZOI_HG38/align/hg38.mm10.syn.net.gz + +OUT=data + +# mini borzoi configuration +LENGTH=393216 +TSTRIDE=43691 # (393216-2*131072)/3 +CROP=0 +WIDTH=32 +FOLDS=8 + +AOPTS=--break 2097152 -c $(CROP) --nf 524288 --no 393216 -l $(LENGTH) --stride $(TSTRIDE) -f $(FOLDS) --umap_t 0.5 -w $(WIDTH) +DOPTS=-c $(CROP) -d 2 -f $(FOLDS) -l $(LENGTH) -p 64 -r 16 --umap_clip 0.5 -w $(WIDTH) + +all: $(OUT)/hg38/tfrecords/train-0.tfr # $(OUT)/mm10/tfrecords/train-0.tfr + +umap_human.bed: + cat $(UMAP_HUMAN) $(BLACK_HUMAN) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_human.bed + +umap_mouse.bed: + cat $(UMAP_MOUSE) $(BLACK_MOUSE) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_mouse.bed + +# targets file is already generated in this example +#targets_human.txt targets_mouse.txt: +# ./make_targets.py + +$(OUT)/hg38/sequences.bed $(OUT)/mm10/sequences.bed: umap_human.bed umap_mouse.bed + hound_data_align.py -a hg38,mm10 -g $(GAPS_HUMAN),$(GAPS_MOUSE) -u umap_human.bed,umap_mouse.bed $(AOPTS) -o $(OUT) $(ALIGN) $(FASTA_HUMAN),$(FASTA_MOUSE) + +$(OUT)/hg38/tfrecords/train-0.tfr: $(OUT)/hg38/sequences.bed targets_human.txt + hound_data.py --restart $(DOPTS) -b $(BLACK_HUMAN) -o $(OUT)/hg38 $(FASTA_HUMAN) -u umap_human.bed targets_human.txt + +# no mouse data in this example +#$(OUT)/mm10/tfrecords/train-0.tfr: $(OUT)/mm10/sequences.bed targets_mouse.txt +# hound_data.py --restart $(DOPTS) -b $(BLACK_MOUSE) -o $(OUT)/mm10 $(FASTA_MOUSE) -u umap_mouse.bed targets_mouse.txt diff --git a/tutorials/latest/make_data/README.md b/tutorials/latest/make_data/README.md new file mode 100644 index 0000000..035a37d --- /dev/null +++ b/tutorials/latest/make_data/README.md @@ -0,0 +1,3 @@ +## Data Processing + +Todo. diff --git a/tutorials/latest/make_data/download_bw.sh b/tutorials/latest/make_data/download_bw.sh new file mode 100755 index 0000000..239f004 --- /dev/null +++ b/tutorials/latest/make_data/download_bw.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# download example data from ENCODE (ENCSR000AEL - K562 RNA-seq); 2 replicates + +# define ENCODE ID +ENC_ID='ENCSR000AEL' + +# define remote urls +URL_P_REP1='https://www.encodeproject.org/files/ENCFF980ZHM/@@download/ENCFF980ZHM.bigWig' +URL_M_REP1='https://www.encodeproject.org/files/ENCFF533LJF/@@download/ENCFF533LJF.bigWig' + +URL_P_REP2='https://www.encodeproject.org/files/ENCFF335LVS/@@download/ENCFF335LVS.bigWig' +URL_M_REP2='https://www.encodeproject.org/files/ENCFF257NOL/@@download/ENCFF257NOL.bigWig' + +# define ENCODE file IDs +FILE_P_REP1='ENCFF980ZHM' +FILE_M_REP1='ENCFF533LJF' + +FILE_P_REP2='ENCFF335LVS' +FILE_M_REP2='ENCFF257NOL' + +# create folder for bigwig files +mkdir -p "human/rna/encode/$ENC_ID/rep1" +mkdir -p "human/rna/encode/$ENC_ID/rep2" + + +# download bigwig files; rep1 +if [ -f "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" ]; then + echo "example RNA-seq data already downloaded (rep 1)." +else + wget $URL_P_REP1 -O "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" + wget $URL_M_REP1 -O "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1.bigWig" +fi + +# download bigwig files; rep2 +if [ -f "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" ]; then + echo "example RNA-seq data already downloaded (rep 2)." +else + wget $URL_P_REP2 -O "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" + wget $URL_M_REP2 -O "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2.bigWig" +fi diff --git a/tutorials/latest/make_data/download_dependencies.sh b/tutorials/latest/make_data/download_dependencies.sh new file mode 100755 index 0000000..cd23a51 --- /dev/null +++ b/tutorials/latest/make_data/download_dependencies.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# create additional folder in borzoi data folders +mkdir -p "$BORZOI_HG38/assembly/ucsc" +mkdir -p "$BORZOI_HG38/assembly/gnomad" +mkdir -p "$BORZOI_HG38/mappability" +mkdir -p "$BORZOI_HG38/blacklist" +mkdir -p "$BORZOI_HG38/align" + +mkdir -p "$BORZOI_MM10/assembly/ucsc" +mkdir -p "$BORZOI_MM10/mappability" +mkdir -p "$BORZOI_MM10/blacklist" + + +# download and uncompress auxiliary files required for Makefile (hg38) +if [ -f "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" ]; then + echo "hg38_gaps.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gaps.bed.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" +fi + +if [ -f "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" ]; then + echo "umap_k36_t10_l32.bed (hg38) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_hg38.bed.gz | gunzip -c > "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" +fi + +if [ -f "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" ]; then + echo "blacklist_hg38_all.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_hg38_all.bed.gz | gunzip -c > "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" +fi + +if [ -f "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" ]; then + echo "Splice site annotation already exist." +else + wget https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.mm10.syn.net.gz -O "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" +fi + + +# download and uncompress auxiliary files required for Makefile (mm10) +if [ -f "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" ]; then + echo "mm10_gaps.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10_gaps.bed.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" +fi + +if [ -f "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" ]; then + echo "umap_k36_t10_l32.bed (mm10) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_mm10.bed.gz | gunzip -c > "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" +fi + +if [ -f "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" ]; then + echo "blacklist_mm10_all.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_mm10_all.bed.gz | gunzip -c > "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" +fi + + +# download and uncompress pre-compiled umap bed files +if [ -f umap_human.bed ]; then + echo "umap_human.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_human.bed.gz | gunzip -c > umap_human.bed +fi + +if [ -f umap_mouse.bed ]; then + echo "umap_mouse.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_mouse.bed.gz | gunzip -c > umap_mouse.bed +fi + + +# download and index hg38 ml genome +if [ -f "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" ]; then + echo "hg38.ml.fa already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" + idx_genome.py "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" +fi + +# download and index hg38 ml genome (gnomad major alleles) +if [ -f "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" ]; then + echo "hg38.ml.fa (gnomad) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gnomad.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" + idx_genome.py "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" +fi + +# download and index mm10 ml genome +if [ -f "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" ]; then + echo "mm10.ml.fa already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10.ml.fa.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" + idx_genome.py "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" +fi diff --git a/tutorials/latest/make_data/process_w5.sh b/tutorials/latest/make_data/process_w5.sh new file mode 100755 index 0000000..9caa697 --- /dev/null +++ b/tutorials/latest/make_data/process_w5.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# merge bigwig replicates, generate .w5 files and run qc + +# define ENCODE ID +ENC_ID='ENCSR000AEL' + +# define ENCODE file IDs +FILE_P_REP1='ENCFF980ZHM' +FILE_M_REP1='ENCFF533LJF' + +FILE_P_REP2='ENCFF335LVS' +FILE_M_REP2='ENCFF257NOL' + +# create folder for merged replicate files +mkdir -p "human/rna/encode/$ENC_ID/summary" + + +# step 1: generate per-replicate .w5 files + +# rep1 +if [ -f "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" ]; then + echo "example RNA-seq .w5 already exists (rep 1)." +else + bw_h5.py -z "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" + bw_h5.py -z "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1.bigWig" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" +fi + +# rep2 +if [ -f "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" ]; then + echo "example RNA-seq .w5 already exists (rep 2)." +else + bw_h5.py -z "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + bw_h5.py -z "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2.bigWig" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" +fi + + +# step 2: merge replicates + +if [ -f "human/rna/encode/$ENC_ID/summary/coverage+.w5" ]; then + echo "example RNA-seq .w5 already exists (merged)." +else + w5_merge.py -w -s mean -z "human/rna/encode/$ENC_ID/summary/coverage+.w5" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + w5_merge.py -w -s mean -z "human/rna/encode/$ENC_ID/summary/coverage-.w5" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" +fi + + +# step 3: run qc on each replicate and the merged file + +if [ -f "human/rna/encode/$ENC_ID/summary/covqc/means.txt" ]; then + echo "qc statistics already exist." +else + # rep1 + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep1/covqc" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep1/covqc_m" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" + + # rep2 + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep2/covqc" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep2/covqc_m" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" + + # summary + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/summary/covqc" "human/rna/encode/$ENC_ID/summary/coverage+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/summary/covqc_m" "human/rna/encode/$ENC_ID/summary/coverage-.w5" +fi + diff --git a/tutorials/latest/make_data/targets_human.txt b/tutorials/latest/make_data/targets_human.txt new file mode 100644 index 0000000..0baf8d7 --- /dev/null +++ b/tutorials/latest/make_data/targets_human.txt @@ -0,0 +1,3 @@ + identifier file clip clip_soft scale sum_stat strand_pair description +0 ENCFF980ZHM+ human/rna/encode/ENCSR000AEL/summary/coverage+.w5 768 384 0.3 sum_sqrt 1 RNA:K562 +1 ENCFF980ZHM- human/rna/encode/ENCSR000AEL/summary/coverage-.w5 768 384 0.3 sum_sqrt 0 RNA:K562 diff --git a/tutorials/latest/score_variants/README.md b/tutorials/latest/score_variants/README.md new file mode 100644 index 0000000..827434f --- /dev/null +++ b/tutorials/latest/score_variants/README.md @@ -0,0 +1,3 @@ +## Variant Scoring + +Todo. diff --git a/tutorials/latest/score_variants/run_variant_scripts.ipynb b/tutorials/latest/score_variants/run_variant_scripts.ipynb new file mode 100644 index 0000000..db9a747 --- /dev/null +++ b/tutorials/latest/score_variants/run_variant_scripts.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f5d0f9fb", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import h5py\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a94cbf8", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate gene-specific variant effect scores\n", + "\n", + "!./score_expr_sed.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1047ff0f", + "metadata": {}, + "outputs": [], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (gene-specific expression)\n", + "\n", + "sed_h5 = h5py.File('snp_sed/f0c0/sed.h5', 'r')\n", + "\n", + "row_ix = 63\n", + "target_ix = 0\n", + "\n", + "print(\"score: 'logSED', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['logSED'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f105ecd9", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate gene-agnostic variant effect scores\n", + "\n", + "!./score_expr_sad.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96e4f7cb", + "metadata": {}, + "outputs": [], + "source": [ + "#Print an example variant effect prediction for a SNP (gene-agnostic expression)\n", + "\n", + "sad_h5 = h5py.File('snp_sad/f0c0/sad.h5', 'r')\n", + "\n", + "snp_ix = 1\n", + "target_ix = 0\n", + "\n", + "print(\"score: 'logD2', snp: '\" + str(sad_h5['snp'][snp_ix].decode()) + \"', track: '\" + str(sad_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sad_h5['logD2'][snp_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c56efaef", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate splice variant effect scores\n", + "\n", + "!./score_splice.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "980993fc", + "metadata": {}, + "outputs": [], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (splicing)\n", + "\n", + "sed_h5 = h5py.File('snp_splice/f0c0/sed.h5', 'r')\n", + "\n", + "row_ix = 116\n", + "target_ix = 755\n", + "\n", + "print(\"score: 'nDi', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['nDi'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05cccfb6", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate polyadenylation variant effect scores\n", + "\n", + "!./score_polya.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43ac562f", + "metadata": {}, + "outputs": [], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (polyadenylation)\n", + "\n", + "sed_h5 = h5py.File('snp_polya/f0c0/sed.h5', 'r')\n", + "\n", + "row_ix = 47\n", + "target_ix = 100\n", + "\n", + "print(\"score: 'logSED', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['COVR'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ba23572", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/latest/score_variants/score_expr_sad.sh b/tutorials/latest/score_variants/score_expr_sad.sh new file mode 100644 index 0000000..5e66a53 --- /dev/null +++ b/tutorials/latest/score_variants/score_expr_sad.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_sad/f0c0 + +borzoi_sad.py -o snp_sad/f0c0 --rc --stats logD2 -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models/f0c0/train/model_best.h5 snps_expr.vcf diff --git a/tutorials/latest/score_variants/score_expr_sed.sh b/tutorials/latest/score_variants/score_expr_sed.sh new file mode 100755 index 0000000..79587bb --- /dev/null +++ b/tutorials/latest/score_variants/score_expr_sed.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_sed/f0c0 + +borzoi_sed.py -o snp_sed/f0c0 --rc --stats logSED,logD2 -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models/f0c0/train/model_best.h5 snps_expr.vcf diff --git a/tutorials/latest/score_variants/score_polya.sh b/tutorials/latest/score_variants/score_polya.sh new file mode 100644 index 0000000..a4b6a06 --- /dev/null +++ b/tutorials/latest/score_variants/score_polya.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_polya/f0c0 + +borzoi_sed_paqtl_cov.py -o snp_polya/f0c0 --rc --stats COVR -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models/f0c0/train/model_best.h5 snps_polya.vcf diff --git a/tutorials/latest/score_variants/score_splice.sh b/tutorials/latest/score_variants/score_splice.sh new file mode 100644 index 0000000..db78c57 --- /dev/null +++ b/tutorials/latest/score_variants/score_splice.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_splice/f0c0 + +borzoi_sed.py -o snp_splice/f0c0 --span --no_untransform --rc --stats nDi -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models/f0c0/train/model_best.h5 snps_splice.vcf diff --git a/tutorials/latest/score_variants/snps_expr.vcf b/tutorials/latest/score_variants/snps_expr.vcf new file mode 100644 index 0000000..bb8d7cc --- /dev/null +++ b/tutorials/latest/score_variants/snps_expr.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +chr1 43110773 chr1_43110773_G_A_b38 G A . . +chr1 43120331 chr1_43120331_C_T_b38 C T . . +chr1 46309111 chr1_46309111_A_G_b38 A G . . +chr1 52632886 chr1_52632886_A_C_b38 A C . . +chr1 54053434 chr1_54053434_G_A_b38 G A . . diff --git a/tutorials/latest/score_variants/snps_polya.vcf b/tutorials/latest/score_variants/snps_polya.vcf new file mode 100644 index 0000000..5be4cad --- /dev/null +++ b/tutorials/latest/score_variants/snps_polya.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 11790946 chr1_11790946_G_C G C . . MT=ENSG00000177000.grp_2.downstream.ENST00000641805;PD=924;PI=chr1_11790946_G_C +chr1 150160094 chr1_150160094_C_G C G . . MT=ENSG00000023902.grp_1.downstream.ENST00000369126;PD=29;PI=chr1_150160094_C_G +chr16 57665101 chr16_57665101_A_G A G . . MT=ENSG00000205336.grp_1.downstream.ENST00000568908;PD=73;PI=chr16_57665101_A_G +chr16 80976052 chr16_80976052_T_G T G . . MT=ENSG00000103121.grp_2.downstream.ENST00000565925;PD=24;PI=chr16_80976052_T_G +chr16 88857261 chr16_88857261_T_C T C . . MT=ENSG00000167515.grp_2.downstream.ENST00000564547;PD=3851;PI=chr16_88857261_T_C \ No newline at end of file diff --git a/tutorials/latest/score_variants/snps_splice.vcf b/tutorials/latest/score_variants/snps_splice.vcf new file mode 100644 index 0000000..710eaf2 --- /dev/null +++ b/tutorials/latest/score_variants/snps_splice.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 1665061 chr1_1665061_C_T C T . . MT=ENSG00000189339.grp_2.contained.ENST00000611123;SD=959;PI=chr1_1665061_C_T +chr1 1689221 chr1_1689221_G_A G A . . MT=ENSG00000189339.grp_1.contained.ENST00000614300;SD=1753;PI=chr1_1689221_G_A +chr1 50655526 chr1_50655526_T_C T C . . MT=ENSG00000185104.grp_2.contained.ENST00000396153;SD=3;PI=chr1_50655526_T_C +chr1 109489368 chr1_109489368_C_G C G . . MT=ENSG00000143537.grp_2.contained.ENST00000360674;SD=1;PI=chr1_155060832_G_A +chr1 156236330 chr1_156236330_G_A G A . . MT=ENSG00000160783.grp_1.contained.ENST00000368279;SD=17;PI=chr1_156236330_G_A diff --git a/tutorials/latest/train_model/README.md b/tutorials/latest/train_model/README.md new file mode 100644 index 0000000..1587061 --- /dev/null +++ b/tutorials/latest/train_model/README.md @@ -0,0 +1,3 @@ +## Model Training + +Todo. diff --git a/tutorials/latest/train_model/params_micro.json b/tutorials/latest/train_model/params_micro.json new file mode 100644 index 0000000..ab03fc6 --- /dev/null +++ b/tutorials/latest/train_model/params_micro.json @@ -0,0 +1,74 @@ +{ + "train": { + "batch_size": 4, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.0002, + "loss": "poisson_mn", + "total_weight": 0.2, + "weight_range": 8, + "weight_exp": 6, + "warmup_steps": 10000, + "global_clipnorm": 0.2, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 393216, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 1.0e-6, + "trunk": [ + { + "name": "conv_dna", + "filters": 128, + "kernel_size": 11, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 160, + "filters_end": 320, + "divisible_by": 8, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 32, + "heads": 4, + "num_position_features": 32, + "dropout": 0.1, + "attention_dropout": 0.01, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 4 + }, + { + "name": "unet_conv", + "kernel_size": 3 + }, + { + "name": "unet_conv", + "kernel_size": 3 + } + ], + "head_human": { + "name": "final", + "units": 2, + "activation": "softplus" + } + } +} diff --git a/tutorials/latest/train_model/params_mini.json b/tutorials/latest/train_model/params_mini.json new file mode 100644 index 0000000..d3907ae --- /dev/null +++ b/tutorials/latest/train_model/params_mini.json @@ -0,0 +1,73 @@ +{ + "train": { + "batch_size": 2, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.0001, + "loss": "poisson_mn", + "total_weight": 0.2, + "weight_range": 8, + "weight_exp": 6, + "warmup_steps": 20000, + "global_clipnorm": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 393216, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 5.0e-7, + "trunk": [ + { + "name": "conv_dna", + "filters": 320, + "kernel_size": 11, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 384, + "filters_end": 768, + "divisible_by": 16, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 64, + "heads": 4, + "num_position_features": 32, + "dropout": 0.2, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 8 + }, + { + "name": "unet_conv", + "kernel_size": 3 + }, + { + "name": "unet_conv", + "kernel_size": 3 + } + ], + "head_human": { + "name": "final", + "units": 2, + "activation": "softplus" + } + } +} diff --git a/tutorials/latest/train_model/train_micro.sh b/tutorials/latest/train_model/train_micro.sh new file mode 100755 index 0000000..3c334ee --- /dev/null +++ b/tutorials/latest/train_model/train_micro.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +westminster_train_folds.py -e borzoi_py310 -f 2 -c 1 -q rtx4090 -o micro_models params_micro.json ../make_data/data/hg38 diff --git a/tutorials/latest/train_model/train_mini.sh b/tutorials/latest/train_model/train_mini.sh new file mode 100755 index 0000000..2cc5aa4 --- /dev/null +++ b/tutorials/latest/train_model/train_mini.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +westminster_train_folds.py -e borzoi_py310 -f 2 -c 1 -q rtx4090 -o mini_models params_mini.json ../make_data/data/hg38 diff --git a/tutorials/legacy/interpret_sequence/README.md b/tutorials/legacy/interpret_sequence/README.md new file mode 100644 index 0000000..1ac18dd --- /dev/null +++ b/tutorials/legacy/interpret_sequence/README.md @@ -0,0 +1,3 @@ +## Interpretation + +Todo. diff --git a/tutorials/legacy/interpret_sequence/explore_grads_liver_CFHR2.ipynb b/tutorials/legacy/interpret_sequence/explore_grads_liver_CFHR2.ipynb new file mode 100644 index 0000000..38b5c04 --- /dev/null +++ b/tutorials/legacy/interpret_sequence/explore_grads_liver_CFHR2.ipynb @@ -0,0 +1,328 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7030e9ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import h5py\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.ndimage import gaussian_filter1d\n", + "\n", + "from vis_helpers import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3bcaea3d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores_hyp.shape = (1, 1, 524288, 4)\n", + "scores.shape = (1, 1, 524288, 4)\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Load scores for the selected set of GTEx tissues (grad)\n", + "\n", + "import gc\n", + "\n", + "seqs = None\n", + "strands = None\n", + "chrs = None\n", + "starts = None\n", + "ends = None\n", + "genes = None\n", + "\n", + "all_scores_hyp = []\n", + "all_scores = []\n", + "\n", + "gtex_tissues = ['liver']\n", + "\n", + "#Load score file\n", + "score_file = h5py.File('../../../examples/saved_models/gtex_CFHR2/scores_f3c0.h5', 'r')\n", + "\n", + "#Get scores and onehots\n", + "scores = score_file['grads'][()][..., 0]\n", + "seqs = score_file['seqs'][()]\n", + "\n", + "#Get auxiliary information\n", + "strands = score_file['strand'][()]\n", + "strands = np.array([strands[j].decode() for j in range(strands.shape[0])])\n", + "\n", + "chrs = score_file['chr'][()]\n", + "chrs = np.array([chrs[j].decode() for j in range(chrs.shape[0])])\n", + "\n", + "starts = np.array(score_file['start'][()])\n", + "ends = np.array(score_file['end'][()])\n", + "\n", + "genes = score_file['gene'][()]\n", + "genes = np.array([genes[j].decode().split(\".\")[0] for j in range(genes.shape[0])])\n", + "\n", + "#Append hypothetical scores\n", + "all_scores_hyp.append(scores[None, ...])\n", + "\n", + "#Append input-gated scores\n", + "all_scores.append((scores * seqs)[None, ...])\n", + "\n", + "#Collect garbage\n", + "gc.collect()\n", + "\n", + "#Collect final scores\n", + "scores_hyp = np.concatenate(all_scores_hyp, axis=0)\n", + "scores = np.concatenate(all_scores, axis=0)\n", + "\n", + "print(\"scores_hyp.shape = \" + str(scores_hyp.shape))\n", + "print(\"scores.shape = \" + str(scores.shape))\n", + "\n", + "score_file = None\n", + "\n", + "#Collect garbage\n", + "gc.collect()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "955bf762", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-- Example = 0 --\n", + " - ENSG00000080910(+)\n", + " - chr1:196692638-197216926\n", + " -- min_val = -1.719\n", + " -- max_val = 3.385\n", + " - (Gradient score profiles per tissue) - \n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAACMCAYAAADhqz8fAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAt4ElEQVR4nO3deVxU9d4H8M8guKAiLigKCojsOygpKWAY7l63vFZWbrfFyqfltjz1WFF2b15su1ZadgXTWy6p16XE3DW1FAU3REVBQWQRkNVhBub7/EGcy8jINiwqn/frxevFnN/2PWfOwPme3zlnVCIiICIiIiIiMoJJSwdARERERET3PiYWRERERERkNCYWRERERERkNCYWRERERERkNCYWRERERERkNCYWRERERERkNCYWRERERERkNCYWRERERERkNCYWRERERERkNCYWRPcprVaLiIgIuLq6wsPDA35+fpg4cSLi4+MbpX+VSoWioiIAgK+vL27dumVUf5999hmysrLuWL5x40YEBATA19cXbm5uCAsLg06nq7HPlJQU9OjRQ3ndGHEaKzc3F0OHDoWvry8+/PDDFo2lub333nuIjo4GAERHR8PS0hK+vr7Kz/z585UylUqF1atXK223bduG0NBQ5fWyZcvg7e0NHx8fuLq64vHHH1fK6rLv/+tf/4KTkxMcHR3x9NNPo6ysTG8sV1dXDBgwAFOmTFH2cwD4/fff4evrC2dnZ4SFheH69etK2cWLFxEUFARnZ2cEBgYiISFBKcvKysKoUaPg5OQET09P/Prrr0pZSUkJHn30UQwYMADOzs7YuHGjUpaTk4OJEyfC29sbbm5ueOqpp/T24YULF8LR0RGOjo5YsGCBslyn0+Gvf/0rPD094erqijlz5kCj0QAAioqKMHLkSPTo0UPv81GXdbS3t69Wv1J6ejoCAwNr/VxWtWTJEvz973+vc30iussJEd2XHn/8cfnTn/4kubm5yrItW7bI6tWrDdYvKyurV/8ApLCw0KgYq7Kzs5PTp08bLLt+/bpYWVlJSkqKsuz48eOi0+lq7DM5OVm6d+/eaDE2hjVr1siYMWPq3U6r1TZBNM3r3XfflaioKBERiYqKkilTphisFxUVJfb29mJvby9qtVpERLZu3SohISEiInLs2DFxdHSUnJwcERHR6XRy/PhxpX1t+/7ly5eld+/ekpGRITqdTsaPHy/Lli0TEZHCwkLp2bOnnDt3TkREnn/+eXnzzTeVcRwdHWXv3r0iIhIZGSnTp09Xxhg+fLiyfuvXr5fBgwcrZbNmzZJ3331XRESOHj0q/fr1U97TiIgIeeqpp5TYevXqpcT+0ksvycsvvywiFZ/RkSNHypdffikiIvv37xd3d3cpKioStVotAQEBEhMTIyIi33zzjTz88MNSWloqOp1OZs+eLf/4xz9EREStVsuuXbskLi6u2uejtnW0s7Mz+J6JiDz33HOycuVKg2UhISGSnJxcbXlpaak4ODhIfn7+HfslonsHZyyI7kMXL17Epk2bsGLFCnTt2lVZPn78eOXMbnR0NEaNGoUnn3wSAwcOxNGjR/HJJ59g0KBB8PPzQ2BgIH7//Xel7caNG+Hq6oohQ4bggw8+0Buv6uzFxYsXMXbsWAwaNAg+Pj746quv9OotWrQIDzzwABwcHBAVFQUAeP/995Geno6pU6fC19e32qzK9evXYWpqiu7duyvL/P39oVKpAACvvfYaBg0aBF9fX4SEhODixYsGt4uxcQLAuXPnMHLkSHh7e8Pb2xvLli3DsWPH4ObmBhFR6g0ZMgTbt2/XG3/Xrl147bXXcOjQIfj6+mLXrl3IzMzEpEmT4OXlBU9PT3zzzTdKfXt7e3z44YcYPnw4nnrqqWrrk5CQgAceeACenp547LHHMHjwYGzbtg0AkJGRgWnTpiEwMBDe3t5455139PqNiIhAUFAQHBwcsHDhQqWspnbG6tSpEzp06FCnugEBAdXel0qpqano0qULLCwsAFS8X/7+/gDqtu//+OOPmDRpEnr16gWVSoVnn30WP/zwAwBg+/btGDhwIFxdXQEA8+bNU8piY2PRrl07ZebkmWeewX/+8x9otVpkZWXhxIkTmDFjBgBgypQpSE5ORkpKCgBg3bp1eP755wEAgwYNQq9evZRZi7Vr1yplDg4OCA4OxubNm5XYCwsLodPpoNFoUFJSAltbW6XdzJkz0bFjR7Rr1w6zZ89WYj158iRGjBiBtm3bQqVSYcyYMVi1ahUAoF27dggLC4OlpWW1bVvTOgKAlZWVwfdLrVZj7dq1mDp1qsHyO2nbti3Cw8Oxdu3aerUjortUS2c2RNT41q5dK97e3jXWiYqKko4dO8qFCxeUZVlZWcrvR44cEQ8PDxERyczMlG7dukliYqKIiCxatEhvxqLy97KyMhk4cKBytre4uFi8vLyUs8kA5LPPPhMRkYSEBOnUqZNy1ramGYvy8nKZPHmydO3aVSZOnCj/+Mc/JC0tTSnPzs5Wfv/hhx9k7NixIlJ9xsLYOLVarTg5OcnatWurjR0UFCQ7d+4UkYrZlAEDBhicUbn9TP20adOUM+KZmZlia2srv//+u7JNnn766TvOzPj7+8uqVatERCQ2NlZMTExk69atIiISHh4u+/fvF5GK2Y6RI0fKxo0blX5feuklEal4zy0sLJTtWVO7qs6ePSs+Pj4Gf2bOnGkw3tu3Q5cuXfTaff7553rbKCEhQXr16iX5+fl6MxbFxcXy4IMPirW1tfz5z3+WJUuWKGf467Lvv/DCC7Jo0SK9dXFwcBARkcWLF8tzzz2nlBUXF4upqamUl5fLjz/+KKNHj9bry8rKSq5cuSKxsbHi5uamVzZo0CDZv3+/3LhxQzp06KBX9sgjjyhn9zt16iSZmZlK2WuvvSYREREiIpKTkyOhoaFiZWUlnTp1kqefflqpN27cOL198aeffpLhw4eLiEh0dLQEBQVJQUGBlJaWytSpU6Vz5856MRia0atpHWty4MABCQwMvGP5nWYsRERWrlwpf/7zn2vsn4juDaYtmtUQUZOpPJsPAJcuXcKUKVNw69YtBAcHY/ny5QCAoUOHwsnJSakXFxeHDz/8EDk5OTA1NUVCQgI0Gg1+++03+Pv7w8XFBQDw9NNP44033qg25vnz53H27FlMnz5dWVZYWIiEhATljHLlWWM3NzeYmpoiIyNDOQN7JyYmJtiwYQMSExOxf/9+bN++HR9++CFiY2MxYMAA/PLLL1iyZIlyZregoKDG/hoaZ35+PsrKyjBt2jSlXeU16v/zP/+DL7/8EiNGjMCSJUswb948vffgTnbt2oWTJ08CAHr27InJkydj9+7dCAwMBADMmjXLYD8FBQU4c+YMHnvsMQAVZ/i9vb0BAMXFxdizZw8yMzOV+kVFRUhMTFReV66flZUV+vfvj+TkZFhaWtbarpK7u7vR9+uMGDECP/744x3L3dzcMG7cOCxatAhDhgxRlpubm+PgwYOIj4/HwYMHsXHjRixatEjZjnXZ96vWkSozTbeX3e72sqptG1pWUzzr16+Ht7c3du3ahZKSEkyYMAHR0dGYOXNmje2efPJJXLlyBcHBwejYsSNGjBiBPXv23HG96rqOd5KWlgZra2u9ZbNmzUJcXBwAICkpCWPGjEHbtm0BAFu3bkXfvn0BANbW1khLS6tTbER0d2NiQXQf8vPzw8WLF5GXl4euXbvC0dER8fHxiI6OVi6VASouTamk0WgwZcoU7Nu3DwEBASgoKECXLl2g0WjqdGABVByA9OjRo8YDzvbt2yu/t2nTRu+m2dq4urrC1dUVzzzzDEaNGoUtW7Zg6tSpmD9/Po4ePYr+/fvj1KlTeOihh5o9zsmTJ+ONN95AXFwctm7dik8//bTO63X7gVzV11Xfo6pEBCqVyuBBsE6ng0qlwrFjx2BmZmawvaH1q0u7SgkJCUpSczs/Pz+9y8eMERERAR8fH9jZ2ektV6lU8PPzg5+fH1588UW4u7tj3759ddr3+/Xrp1yiBABXrlxBv379lLKqB+ApKSmwsbGBiYlJtXaFhYUoLCxE79690b59e6SlpaGsrAympqYQEaSmpqJfv37KJXzZ2dnKpUS3j5mSkqJXNmbMGAAVNzevWLECbdq0QefOnTF16lTs3bsXM2fOrHE9VCoV3nnnHeVStjVr1sDd3b3W7V3TOtbE3Ny82oMRqu4DoaGhiI6ONnjzt1qtrvMlckR0d+M9FkT3IScnJ/zpT3/CnDlzcPPmTWV5cXHxHduo1WpotVrlLOKSJUuUsiFDhiAuLg4XLlwAAHz77bcG+3BxcYG5uTm+++47ZVlSUhJyc3NrjdnCwgL5+fkGy65du4ZDhw4pr/Py8pCcnAxHR0fk5+ejbdu2sLa2hojgiy++qHWshsbp4uKCtm3bYv369cqyGzduAABMTU3xzDPPYMKECZgyZYrB69cNGTFihHJfRXZ2NjZt2lRrYgQAXbp0gbu7u3JNfVxcHE6fPg0A6Ny5M4YNG4aPPvpIqZ+enl7rWeH6tKucsTD001hJBQDY2Nhg7ty5ek8OSkxMxKlTp5TXqampyM7ORv/+/eu070+ZMgWbNm1CZmYmRATLli1TZq9GjRqFY8eOKbM0X331lVIWEBAAtVqNffv2AQC+/vprTJw4EWZmZujZsyf8/PyUJ1lt2LAB9vb2yoH0I488gi+//BIAcOzYMWRkZGDo0KHVypKTk7F//35MmDABANC/f3/lXh2tVouYmBh4enoq7VauXIni4mKUlpZixYoVSqxqtVpZ/xs3buCjjz7C66+/Xuv2rmkda+Lj42NwZqsuzp07Bx8fnwa1JaK7TEtcf0VETa+0tFTeeecdcXZ2Fjc3NwkKCpKJEyfKb7/9JiKGn8qzaNEisbOzk2HDhklkZKTefRQbNmwQZ2dnGTJkiHz88ccG77EQEblw4YKMHTtWvLy8xN3dXUJCQpTr93Hbk6S6d++uXHe9fPlycXJyEh8fH4mLi9OLKyUlRcLDw5VyDw8P+fDDD5Xy+fPni729vQQHB8sHH3ygXDd+p3ssjIkzMTFRRowYIZ6enuLl5aU8TUik4n4FlUolp06duuP7cvt2z8jIkIkTJ4qXl5d4eHjo9VfTfSciIqdPn5aBAweKv7+/zJkzR/z9/eXAgQMiUvEkrUcffVQ8PT3F09NTBg8eLPHx8Qb7DQgIUJ4CVFO7xmToHovHHntMKau6jfLy8qRbt27KPRbHjx+X4OBgcXZ2Fh8fH/H29pbly5cr9Wvb90Uqnprk6OgoDg4OMmfOHNFoNErZ5s2bxcXFRRwdHWXixIl6Tyw6fPiweHt7i5OTk4SGhurd65OYmCiDBw8WJycnCQgIkDNnzihlGRkZ8vDDD8uAAQPE3d1d9u3bp5QVFRXJtGnTxNHRUZycnGT9+vVK2eXLlyU8PFw8PDzEzc1N5s6dqzwpS6TiiVIODg7i4OAg//u//6s3nouLi7i7u4uLi4ssXbpUb/v7+fmJtbW1mJiYiI2NjcyYMaNO61gTf39/vXWuqqZ7LEJDQ+Xw4cN1GoOI7m4qkTpe40BERDVat24dvv76a+zevbtZxisuLoa5uTlUKhUSEhIQGhqK8+fP6z0Niai5rFu3Dvv371dmX+oiISEBzz77LA4cONCEkRFRc+E9FkREjWDUqFG4cOECNm3a1GxjHjp0CK+99ppyD8zy5cuZVFCLmTZtGjIzM6HT6WBiUrcrrVNTU7Fs2bImjoyImgtnLIiIiIiIyGi8eZuIiIiIiIzGxIKIiIiIiIzGxIKIiIiIiIzGxIKIiIiIiIzW4KdC6XQ6pKeno3Pnzga/+ZWIiIiIiO5tIoLCwkL06dOn1ie+NTixSE9PV76hl4iIiIiI7l+pqamwtbWtsU6DE4vOnTsrg1hYWDS0GyIiIiIiuksVFBSgb9++yrF/TRqcWFRe/mRhYcHEgoiIiIjoPlaXWx948zYRERERERmNiQURUSuRl6fGtm2XWjoMIiK6TzX4UigiIrq3zJjxE37+ORkif23pUIiI7lrl5eXQarUtHUazMzMzQ5s2bYzqg4kFEVErcf16cUuHQER0VysqKkJaWhpEpKVDaXYqlQq2trbo1KlTg/tgYkFERERErV55eTnS0tJgbm4OKyurVvU9bSKC7OxspKWlwcnJqcEzF0wsiIiIiKjV02q1EBFYWVmhQ4cOLR1Os7OyskJKSgq0Wm2DEwvevE1E1Eq0wpl9IqJ6a00zFVU1xnozsSAiIiIiukupVCoUFRUBAMaMGYNLl+7ep/vxUigiIiIionvAzz//3Kj9lZWVwdS08dIBzlgQEREREd0D7O3tcebMGfz666/w8vLSKwsJCcGWLVsAADt27MDQoUMREBCABx54AAcOHAAA7Nu3D76+vpg/fz6GDBmCTZs2NWp8nLEgImolWuPjE4mIjFFSokViYm6T9e/q2g3m5mb1bjd06FBoNBrExsZi4MCBuHz5Mi5cuIAxY8bg8uXLiIiIQExMDCwsLJCUlISQkBCkpKQAAE6dOoUvvvgC//znPxt5bZhYEBG1GswriIjqJzExFwEBq5qs/+PHn4C/f68GtZ05cyaio6MxcOBAREdH4/HHH4epqSliYmKQlJSE4OBgvfqpqakAAGdnZwwdOtTo2A1hYkFEREREZICrazccP/5Ek/bfUE8++ST8/PywePFirFy5Urn/QkQwatQofPfdd9XaXL161agvwKsNEwsiIiIiIgPMzc0aPKPQ1GxsbDBw4EC89NJLsLa2hoeHBwAgPDwcEREROHPmDDw9PQEAR48eRWBgYJPHxMSCiKiV4D0WRET3l1mzZmHatGlYunSpsszJyQmrV6/G3LlzcevWLWg0Gvj7++Pf//53k8fDxIKIiIiI6C5V9aRQ5Q3YlR555BGDJ43Cw8MRHh5ebXloaChiY2MbPcZKfNwsEREREREZjYkFEREREREZjYkFEVErwVssiIioKTGxICIiIiIiozGxICJqJfhUKCKi2rXWv5WNsd58KhQRUStR+T+juFiDjh3bNmrfZ8/egJVVB/Ts2bFR+yUiai5mZmZQqVTIzs6GlZUVVCpVS4fUbEQE2dnZUKlUMDMza3A/TCyIiFqJc+dyAAALFhzCJ58Mb9S+PT2j0bOnOTIz5zVqv0REzaVNmzawtbVFWlpatce6tgYqlQq2trZo06ZNg/tgYkFE1EpUzlgUFmqapP+srJIm6ZeIqLl06tQJTk5O0Gq1LR1KszMzMzMqqQCYWBARtTqt9PJhIqI6adOmjdEH2K0Vb94mImplWuuNiURE1LSYWBARtTLMK4iIqCkwsSAiIiIiIqMxsSAiamV4KRQRETUFJhZERK0M8woiImoKTCyIiFoZzlgQEVFTYGJBRNTK6HQtHQEREd2PmFgQEbUynLEgIqKmwMSCiKiV2bjxIo4cSa+xzrVrhcjKKm6miIiI6H7AxIKIqJW5dasMQUHf11jH1vZr9Oq1tJkiIiKi+wETCyIiIiIiMhoTCyIiUgQHr8F3352tV5v8/NImioaIiO4lpi0dABER3R1yc2/h4ME0HDyYVq92ERGHmygiIiK6l3DGgoiIAAA6XcOeFlVWxufXEhEREwsiIvqDSqVqUDsTk4a1IyKi+wsTCyIiMkpDExIiIrq/MLEgIiIAQEPzAxP+JyEiIjCxICKiPzR05oEzFkREBPCpUEREZISnnvoZ332X0NJhEBHRXYAzFkSNRETw66/1e0wn0d2kIRMPTCqIiKgSEwuiRrJ6dQKGDVvD5KKJ2Nt/g9DQNS0dxn2NlzQREZExmFgQNZKsrBIAwPXrxS0cyf3pypUC7N9/7yRtv/2Wjl27rrR0GHXy3nuHsHRpfEuHUSclJVo4OX2LM2eyWzoUIiK6DRMLMujq1QKUl7fuL72Kjc3AnDkxuHlTjeJiTa3127ZtAwDQaMqbOjSDdDrB7NkxuHz5ZouMf7dKTr6J3NxbTTrG2rWJ1faRIUO+x8MPr2+S8UpLyyBS/cvszp/PxblzOQbbpKTk37G/iIgjmDdvV6PF15QuXbqJpKSb8PJa2dKh0F3g5Zf34sEHv2/pMIjoD0wsSI+IIDY2A3Z232D+/D0tHU6LmjlzO1asOIOuXb9Ap07/rLV+u3YViUVpacskFllZJYiKOoOXX97bIuO3BBGp9dui+/f/Ft27f4nS0rImieHq1QJMn74NQUE/GCwXEURFnYar6wrk5akbPI5GUw6dTqDVlqN9+8/QvfuX1eq4uq6Au3uU3rKsrGJoteVwcFhe6xipqQUNjs9YImIwWbpdZQLfWtRlmxijvFyH4OA1OHv2RpOO01Q+++w4Dh9Ob+kwiOgPrSqx0GrLodU270Hf1q2XEBOT3KxjVlVQUIqMjLpfmvP11ycxaNBqAMBXX8UbrKPRlNf7MoTK2Q9DBw8lJVqUldVvdkSnExw7dh0igvz80nq1rauzZw2f+TXk1KlsPPPMTgDAjh0pAICMjGKkpRXirbcOQqVaXGP7+rxHVV29WgARwaVLN5GTU3FWfsuWS9iw4QJUqsUQEajVZcrBt4jg6tWCRvsc6HRS68xWVlYxNm9OAlCxHyQk3EBJibZavZs31bh2rbDacpVqMUaMWKe8zs8vRXm5Dnv2XIWJycdo0+Zjg+OKiN6ZzPbtP6u2nxUVaZRtr1Itxiuv7IVIxcG7SrUYy5bF69VPTr5Zbf8tKqqYqTh1Khtbt16qFoeJyceYPXsHzp/PRbduXwAA0tOLkJlZrGyTyj5VqsVo2/YTiAgmTfoPUlMLEBV1Gvv2XUW7dp8iKOh7vP32rwCAvDw1tm27VGvC1KvXUrRt+6nBMpVqsd6+aWgWQKVajDlzYqDTVXx2L17MQ0xMMmJiktGv39fV6ltaLoFKtRhbtiThzJlsFBb+dyanqEijjFm5/pX7qInJx8oMz+1/J9TqMpSX6/D99+cQEXFYWV71sx8Xl4m8PDVEBCrVYsyatR2vvFK/JPvAgVQUFJQiKSmv2t8VT88o2NlVX19DB/6GZi0/+SQWKtViTJ68uc7xuLmtgInJx7hxo6TGeqWlZbh4Ma/O/VZ1/nwuDh5Mg6dndLWyoiINysp0ynuYmJiDrKz6/a365ZcUaDTlEJFqn7/Kfaom2dkl1f5eVH5Gb93S6i1riiRMRPCXv+zA9etFjd53TWN6e0fj0qWbzTZmXezcmYK33jqI998/XHvlOqiazBYVaTB37g44Oi6/48kiQ8vz80uRllb9/0ZZmQ4vvrjbqH1Cqy1HQUH9jy9ycm412YmsOxER5OWpceRIyyfZKmngVi8oKECXLl0AfACgvbJ8xAi7el1XHBbWDyL//QNTeQ11cLAtCgo0iI/PwoABlkhKulljP4bGHT68L/buTa1zLNRw9vYWSEkpgJNT1wb/g2sp48b1x7Ztl1s6jHtep05mKCqqnjAQEVHzeeihfsjKKsGZMy0zCxUa2hf79vHYyxAPj+5ISrrZYlc2GDJuXH+oVCqYmFT8ZGQUG0hQ1AAWID8/HxYWFjX21+gzFt26ta+90h98fKzQo0cH9Oxpjt69O8LGprNS1r17B+XRh46OlrX21blz22rLrK071jkWMo6VlTkAwNa2UwtHUn930wf8Xsakgoio5fXo0QG9e//3+Kdfv8411G58relyRVPT+h1GW1t3hJtb92rLnZy6NlZI9WZiUnGwXVam07vCocGkgfLz8wWA5OfnN7SLZpecfFPS0wubdcwbN0okNbWg2cbT6XRSXq5rcPsdO5IFiJS//nWvXL6cZ7D/rKziBvdvqG1k5FE5fjyjwX02lTlzYgSIFCBStmxJqrX+2rXnBIiUn366pCzT6XSSllYgJ09m3bGdVlsuoaFr5PTpO9cx5OZNtRLfhg3npbhYI56eUZKVVSy5ubeU/Q6IlFde2aO0q2xTOfbRo+lG7TP1cfBgqt74IiKlpWU1tqmsX/mTm3tLREQSE3Oka9clEh6+Xq9+1XV54YVdSrvBg1eLWq0VEZEtW5Lk5Zf3SE202nLR6f7bl06nk549v5Rt25KU15Wxv/TSHgEiZffuK1JYWCo6na5a3ECkpKUVSGFhqQCR4uu7srbNpVCrtbJ+faJcvZovf/nLDqW/jz76Te9v2oEDqRIbe12vrYPDNwZjqc+Pvf3XAkTK9Olb5f33D8vPP1+6Y92IiENy+XKebNhwXi5fzpMrV/KloKBim2g0ZUo9rbZcL84ZM34Se/uva9wOVcf5y192SGjoGuU90Ol0UlKikeJijfK66vtXH2vWnJPk5JsiInL+fI5kZRVLQsINSUsz/Le88j2vSXZ2sRQVldY7lry8W8o63Ul5uU7Wr0+UkJAf9D5bdaHRlMmmTRekrKzcYFlR0X/XraHbs6kcPnxN2R9SUwsM/s+6m+h0Ovn88+Oi0dT8N0+k4j2927Z3WVm55OSUyLx5OxutT51OJx9/fEwSEm7IsmXx8sEHhw1+TsrKyuX69aI69/v++4cFiJRr1xp+zKfVVh/T0OekUk5Oidy6pRUgUp599pcGj3s3qs8xv9GXQtVlWoToXiQi9X6u/7lzOQbPRjSVmJhk+Pv3RM+edZ+dO3MmG5aW7WFr27xnsSo99tg2aDTl+PHHP9WpfuX7UHkvgMhf6zxWXp5aua9h3jxffPnliPoH3ECV8f7rXyMxZ84OAPWL/U5SUwvQr983mDLFqc7b8PaYqtqzZxoiI49h+/ZkvPXWA/jb337XK9+3788ICelb5z4bYx3vJDf3Ftq0MUGXLu2abAy6tzTk7wLd/+SP+y8tLet+FQ3dWX2O+U2bKSaie05DviysOZMKABg1yqHebTw9rZogkrr7/vtx9apf+T5ota/Ue6yuXf/7T+Xhh+3q3b4xzJ7tpSQWjaHyoHrYMFuj+yovfxUmJiq4uXVHWNg6jB7tgL/97Xe9+9NqSiqaW7duHVo6BCK6B6hUKiYVLYSJBRHdE+p7LWulv/99GJydu2LiRKdGjqjupk51xoULjfNQAwuLdsjNfQGWlsafta+8ttbauiPOnp2F8+dzldd1denSXCxYcAjff3/O6HiIGmLIkD4tHQIR/YGJBRHd195884GWDgHr109o1P6qzsQ0pspJOjOzuidx/ftb4tFHXZlYUIu4dGmu8vAQImp5TCyIiFqhmmYlRICUlL+gXbu6/YuofIrIG28ENkpsRHXVv79lS4dARFUwsSAiaoVCQ6vfO9GhQ8W/BGvrjrCz61LnviofAcKbqomIWjcmFkRErdCDD1a/Lr1vXwv89NNkhIX1q1dfzs4Vz2AfPLh3o8RGRET3JiYWRESt0PPP+xlcPmZM/3r35ebWHbduvYT27fkvhYioNWv0b94mIqK7X0Mep1wTJhVERMTEgoiIiIiIjMZTTERETeCdd4YgPb2opcMgIiJqNkwsiIiaQETEgy0dAhERUbPipVBERERERGQ0JhZERERERGQ0JhZERERERGQ0JhZERERERGQ0JhZERERERGQ0JhZERK1Ely7tAAAbNkxo4UiIiOh+xMSCiKiVsLbuCACYPNm5hSMhIqL7ERMLIqJWQkRaOgQiIrqPMbEgImolmFcQEVFTYmJBRNRK6HTMLIiIqOkwsSAiaiWcnbu2dAhERHQfM23pAIiIqHmsWTMOCQk5LR0GERHdpzhjQUTUSlhYtMPgwX1aOgwiIrpPMbEgIiIiIiKjMbEgIiIiIiKjNfgei8rnoRcUFDRaMEREREREdPeoPNavy3chNTixKCwsBAD07du3oV0QEREREdE9oLCwEF26dKmxjkoa+FWsOp0O6enp6Ny5M1QqVYMCJCIiIiKiu5eIoLCwEH369IGJSc13UTQ4sSAiIiIiIqrEm7eJiIiIiMhoTCyIiIiIiMhoTCyIiIiIiMhoTCyIiIiIiMhoTCyIiIiIiMhoTCyIiIiIiMhoTCyIiIiIiMhoTCyIiP5gb2+PM2fO1Ln+1KlT0adPH6hUKhQVFd2xXkpKCkJDQ9GlSxcMHDiwWnlkZCQ8PT3h7u6OSZMm4ebNm0pZXl4eHn/8cTg5OcHNzQ1vvvmmUlZaWooXXngBTk5O8PDwwIwZM5SyY8eO4cEHH4S3tzd8fX2xZ88epeytt96Cm5sbfHx8EBgYqFcGABs2bICXlxc8PDzg7u6OlJQUAMCmTZuU/jw8PPD222+j8quQRASvvfYaPDw84O3tjeHDhyMpKcng9igqKsLIkSPRo0cP9OjRo1r5qlWr4OPjA09PT4SFheHq1asAgJs3b8LX11f5cXZ2hqmpKXJzcwEAs2fPhouLC3x9fREcHIz4+HilzxUrVsDLywumpqb44osvqo15p3UuLi7GrFmz4OXlBRcXF7z55pvKOu/ZswcPPPAA3N3d4enpqbc9AODq1asYP348XFxc4OrqiiVLlhjcHkRE9w0hIiIREbGzs5PTp0/XWk+r1YqIyM6dOyUzM1MASGFh4R3r5+TkyMGDB2Xbtm0SEBCgV/bLL7+Ip6enFBQUiIjIe++9J/PmzVPKJ06cKJGRkcrr9PR05feXXnpJXnzxRdHpdHplOp1ObGxsZM+ePSIicu7cObG1tZWSkhIREfn555+V3+Pj48XS0lJu3bolIiInTpwQV1dXuXbtmoiI5OfnS3FxsYiIFBQUSHl5uYiIlJaWyqBBg2Tz5s0iIvKf//xHAgMDRaPRiIjIBx98II888ojB7aFWq2XXrl0SFxcn3bt31ys7d+6c9O7dWzIyMkREJDo6WsaMGWOwn8jISBk3bpzyevPmzcp7s3XrVnFyclLK4uPjJSEhQZ544glZsmSJXj81rfNbb70lM2fOFJ1OJxqNRsLDw2XdunVKu0uXLomIyK1bt+TBBx+Uf//738p74O/vr9TV6XRy/fp1g+tBRHS/4IwFEbVKR44cwbBhw+Dj4wNvb29s3rwZQMWZ66CgIDg4OGDhwoVK/dDQULz99tsICwvDyJEjAQAjRoxAz549ax2rW7duGDp0KDp27Fit7OTJkxg2bBg6d+4MABg3bhxWrVoFAEhKSsKJEyfwyiuvKPV79+4NoOJMelRUFP72t79BpVLpleXk5CA3NxfDhw8HALi6usLS0hLbt28HAIwePRodOnQAAHh5eaG8vBw3btwAAHz88cd49dVX0adPHwCAhYUFzM3NAQCdO3eGiUnFvw21Wo3S0lLlNVAxg6JWqyEiKCgogK2trcHt0a5dO4SFhcHS0rJa2ZkzZ+Dr64tevXop22P79u3IycmpVjcqKgpz5sxRXk+YMAGmpqYAgMGDB+PKlSvQ6XQAAB8fH7i5uenFW6mmdT558iRGjx4NlUoFMzMzhIeHK++Pn58f+vfvDwBo3749fH19cfnyZQDA7t270aFDBzzyyCMAAJVKBWtra4Pbg4jofsHEgohandzcXEyaNAmLFi3CyZMnER8fj2HDhgGouNzm8OHDOHr0KCIjI3Ht2jWlXXx8PGJiYrB79+5ax5g7dy62bNlSa72BAwdi586dyMzMhIhg9erVKCwsRG5uLhISEtC3b188++yz8Pf3R3h4OOLi4gAAly5dQvfu3bFw4UIMHDgQw4YNU+Lq0aMHevXqhQ0bNgAAfv/9d1y4cEG5vKeqqKgoODo6KklAQkICrl69ipCQEPj5+WHBggUoLy9X6h8+fBje3t7o2bMnwsLCMHbsWADA+PHjMXz4cFhbW6N3797YvXs33n///VrX/3a+vr44fvy4chnVd999BxHBlStX9OodOXIEOTk5GDdunMF+Pv/8c4wZM8ZgInG7mtZ50KBBWLduHTQaDQoLC7Fp0yaD2zEjIwM//vgjxowZo/RpZWWF6dOnw8/PD5MmTVKSDiKi+xUTCyJqdY4cOQJ3d3cEBQUBAExMTNCtWzcAwOOPPw4AsLKyQv/+/ZGcnKy0e+KJJ2BmZlanMb799ltMmDCh1nqhoaF49dVXMXbsWAwZMkSZdTAzM4NWq8WRI0fw6KOP4sSJE3j11Vcxfvx4lJWVQavV4vLly3B3d0dsbCy++OILTJ8+HdnZ2QCAzZs349tvv4W/vz+++uorDB06tFrsu3fvRkREBNasWaMs02q1OH78OGJiYnDo0CEcOXIEX3/9tVIeFBSEU6dOITU1FceOHcPBgwcBACdOnEBiYiKuXbuG9PR0hIWF4YUXXqjTtqpqwIABWLp0KZ544gkEBgaisLAQXbp0qRb7ihUr8OSTTyozFFWtXr0a69at04u7JjWt8xtvvIG+ffsiMDAQEyZMQFBQULVYCgoKMH78eLz++uvw9/dX+ty1axcWLFiAuLg4jB49GtOnT6/39iAiupcwsSAiqqJ9+/bK723atEFZWZnyulOnTk0y5rPPPovY2Fj89ttvCA4Ohq2tLTp37gw7OzvY2NgolzSNHDkSGo0GaWlpsLOzg4mJiZII+fj4wMHBAWfPngUAeHt7Y/v27Thx4gRWrlyJ9PR0uLu7K2Pu378fs2bNwtatW+Hi4qIst7Ozw5QpU9ChQweYm5tj8uTJOHr0aLWYraysMHbsWKxfvx4AEB0djeHDh8PS0hImJiZ46qmnsHfvXgDA/PnzlRuuT58+Xev2mDx5Mo4cOYKjR4/i6aefhlqthqOjo1JeXFyMtWvXYvbs2dXarl27FhEREdi5c2edLlOrbZ3bt2+PTz/9FPHx8di7dy+6deumtx0LCwsxatQoTJgwQe+SNTs7O/j5+cHDwwMAMGPGDBw/flxv9oeI6H7DxIKIWp2goCCcO3cOhw8fBgDodDrlyUIt4fr16wCAkpISvPPOO3j99dcBAAEBAbCwsMCpU6cAALGxsQAAGxsb9OjRA2FhYdixYwcA4MqVK0hOTlaShIyMDKX/5cuXo2PHjnjooYcAAAcOHMATTzyBzZs3w8fHRy+Wxx57DL/88gt0Oh3Ky8uxc+dOpc758+eVexYKCwuxbds2eHt7AwD69++P3bt3Q6vVAgC2bt0KT09PAMA///lPxMfHIz4+Hl5eXnXeHuXl5XjjjTfw/PPPK/c8AMD69evh7e0NV1dXvXbr1q3D//3f/2HXrl3o169frePUZZ0LCgpQUlICAEhOTsbSpUvx6quvAqh4utWoUaMwcuRILFiwQK/P0aNH49q1a8qldDExMfD09ESbNm3qHBcR0T2nhW8eJyJqEUeOHJGgoCDx8vISb29v2bx5c7WnQgUEBMjevXtFRCQkJES2bt2q18f48ePFxsZGAEifPn0kJCREKZszZ47yxCS1Wi02NjbSo0cPMTMzExsbG3nzzTeVup6enuLu7i4DBgyQiIgI5SlPIiLHjh2TQYMGiZeXlwwaNEgOHDiglF26dElCQkLE09NTfHx8ZOPGjUrZe++9J05OTjJgwAAZP368XL16VSkbMGCA9OzZU3x8fJSfU6dOiYhIeXm5vPzyy+Lq6ioeHh7y3HPP6T3pyc3NTby9vcXDw0PeffddJVa1Wi1z584VFxcX8fLykvDwcElOTr7j9vfz8xNra2sxMTERGxsbmTFjhlI2cuRIcXNzE0dHR3nxxRdFrVbrtR06dKisWLGiWp+mpqZia2urt143btwQEZFVq1aJjY2NmJubi6WlpdjY2MiJEydqXee4uDhxcnISNzc38fLykg0bNijjLVy4UExNTfXGW7hwoVIeExMjPj4+4u3tLcHBwXLmzJk7bg8iovuBSqTKQ7eJiIiIiIgagJdCERERERGR0ZhYEBERERGR0ZhYEBERERGR0ZhYEBERERGR0ZhYEBERERGR0ZhYEBERERGR0ZhYEBERERGR0ZhYEBERERGR0ZhYEBERERGR0ZhYEBERERGR0ZhYEBERERGR0f4ff6GHcgVPT1EAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - (Attribution at position of Max positive differential saliency) -\n", + " - max_pos (rel) = 251085\n", + " - max_pos (abs) = 196943723\n", + " - chr1:196943627-196943819\n", + " - y_min = -1.78648438\n", + " - y_max = 3.45445312\n", + "liver\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAABZCAYAAACjWLKDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAuiUlEQVR4nO3dd3Qc1fnw8e8W7ap3y6q2XOQGNu4F9wYGh2YIeflBgFASIBAChEAIENMJxBBKANMCJIbQAomBYMANG5tiywYbN7kXSVZfSSuttt33j2ut2kpa1ZXt53OOzu5o7szcbTP3mdsMSimFEEIIIYQQQnSAMdgZEEIIIYQQQhz/JLAQQgghhBBCdJgEFkIIIYQQQogOk8BCCCGEEEII0WESWAghhBBCCCE6TAILIYQQQgghRIdJYCGEEEIIIYToMHN7N/R6veTm5hIVFYXBYOjMPAkhhBBCCCF6AKUUFRUVpKamYjS2XCfR7sAiNzeXjIyM9m4uhBBCCCGEOE4cOnSI9PT0FtO0O7CIioryHSQ6Orq9uxFCCHESmDcPLrkErrgi2DkRQgjRFuXl5WRkZPjK/i1pd2BR2/wpOjpaAgshhBDNKi+Hb7+F6Gi46aZg50YIIUR7BNL1QTpvCyGE6FKrV4PHA19/DUoFOzdCCCG6igQWQgghutTGjfrRZtMBhhBCiBOTBBZCCCG61P79wc6BEEKI7iCBhRBCiC518GCwcyCEEKI7SGAhhBCi7XL/B5X7A0paUtK1WRFCCNEzSGAhhBCibbwe+PpK+O76gJKXlcHgwRAX16W5EkIIEWTtHm5WCCHESapwLTgKIG8ZOIogNLHF5GVlcM89kJfXPdkTQggRHBJYCCGEaJv8z449UVCxs8XAwuvV81hMnAhFRd2TPSGEEMEhgYUQQoi2Kd8ReNJyPXdFv36QktKFeRJCCBF0ElgIIYRom8q9ASctLdWPSUlgMOg/IYQQJyYJLIQQQrRNTeBtmsrLISwMzHK1EUKIE56c6oUQQrRNTREMvhnc9laTut0QHd0NeRJCCBF0ElgIIYQInLsKPA7IWACmcPDWtJzcDVFRjf5Zvz2UUp2fRyGEEEEhgYUQQojA1TaDih0OITFg29ZicpcLIiO7IV9CCCGCTgILIYRoq8Y9kE+mu+41xWAKBcux2e6ih7aY3O0Gi6Ub8iWEECLoZOZtIYToBi+8AH/6U7Bz0QncFXVBBYDR1CSJx+vhlexXKLQX4naDqWkSIYQQJyAJLIQQootVV8O998KDD0JubmDbLFq3iO+OfOdbfnHji2wt2NpFOWwDrwss8S0mKa8p55ql17CtcBtudztGhKodl1bGphVCiOOKBBZCBMkPR3/g0bWPBjsbohusWAGFhXoW6p07A9tm0fpFfHXoK9/yPSvvYfX+1V2UwzbwusAS22KSMkeZ71FqLIQQ4uQhgYUQQfLN4W94eM3Dwc6G6AYbN7Z9mzJHma+ArpRqsBxUXhcYrS0msdXYfI9+ayxq+6ScTH1ThBDiJCCBhRBBUuYoo8JZgdvrDnZWRFvVLxgr1bDpjp/mO/v3g9UKaWmB7d7pcVLtrvYFEg63A6fH2TMCC+UCQ8ttm2wOm+/R6+2EFk3KCxt+A/nLO7gjIYQQXUkCCyE6i7MUvr0OKnYHlLzUUQrUFcLEievIEfj5z+G99wJL7yuY1/h/DCqvC4wh4CiEnOfBtqNJkvpNocxm8HjaeIzGNRr7l8CuZ2DtxQFNyieEECI4JLAQorP8cC/sXgzrLgsoef3ClzixHT4Mc+fChAnQp4//NE4nOBz6eePvRo/6rnhdYDRD1UH47gYo/rpJEluNjRBjCLYaG2azHnK2Ra111t77mn50loCjoEPZF0II0XUksBCiM3jdsO8N/bw8sN65tTUWtY/ixJWXB5mZutw8YEDT9UrBmWfCpEl6QrkyRxkx1pgGAUWUJapnBBbKA4aWe2PbHDbSotOwOWztq7FocDwFxd+1nk4IIUTQyQR5QnSGihxwlYMh8Fi9tLqUpIgkSqslsDju1fazaKYzst0OvXs3v/mnn8KqVfp5djaUJ5aREZPRILDoE9OnZwQWBpPu89ACW42NjOgMXWMRGUCNRUtqivXcGWOegpLsDuxICCFEV5MaCyE6Q8VuPVLOOXsgY0FAm5Q5yugb07dnFBZFl1FKN3NKTGw+zWef1T33evV3IyO6YWBRP9AIKmOIrqFrQf38BtQUqiWVe8EUDlnXw8g/6+MLIYTokSSwEKIzVO6BXlMgMhOGBza9cqmjlD4xfaQp1AnO6dSPoaHNp9mxA6ZPh3vu0ct+A4voIAYW9ftAGEP0yFD1eLwe7lt1HwdtBwHdFCo9Kt3Xx6K270izWhp+tuogRA3Uxw3rDeHpnfCChBBCdAUJLISo1ZHZfqtzIXqQfh7RTO/cRqTG4uRQUwNGY8uTxOXkwI036tm5k5P1dyMpIokadw1e5aXMUUZKZAqVzkpUsOd+MByrsbAkQPRgAArsBSxcvZBNeZsA3RQqPTodm8NGSAhUVHTgeG47hGd0MM8yk7cQQnQHCSyE6AzuqjYVfpRSlNeUkx6dLn0s2qOVeSN6BI8TPA6cTghppfXOgQMwYoSeSK5fP10wjwmNIdISSaWzkjJHGbGhsYSaQ6l2V3dP/ptjDAFvja6d6z0bgNyK3AaPjZtClZd34HiearDEdzDTQgghuoMEFkJ0Bk81mCMDTl7tribUHEpsaKzUWPjTnsCh6jAUftUzZnOuzodPToV/p2As3Yi3hb7OLpfug1B/GNraUaFiQmN8M27HhMYQbY0OzvelflMlY4geqKCexoGFrcZGWlQa5TXlhIbqGot2fyweB5isUJULB9+H6qPtfRVCCCG6mAQWQtRqqZ13rZoS/+s91WAKhT1/h8+ngbOsSRKbw8b2wu2AHhEqNjSW2NDYrutj4XXpTuU9oaDd1ewH4dMx8PkU2LEo2LnReajIAVcZ1op1uFzNfwxVVbqmon4fjDJHGdHWaN+Qs40DjaAymJt8v3MrckmPTq8LLBw24sPiMRgMhEXW4PHo4KK8vB0duT3VemCEkg2w9iIo+77teQ7kty2E6BYbN+q5fcSJSQILIQK1+S54PwFWzGm6zuvSBS77AShc43fUnCVbljDihRG4vW5KHaW+gmKXBBZeN6w8C5ZmwddXdv7+u1r9AqC/wmD9gqJSsPtFPSxpVJb+DIItfwUMuAZmfY41QtdkuVz+k1ZXN+3Y3biGona5NtCoroYbbtD9Mmpquvi1NBYSqWeZryevMo+RySPJq8zz5T/CEkG0NRpjuJ4tPDcXtm9vx/EMJqDl4W07W5Wrii1Ht+BtZVhdIUQrGtU8L1kCY8fC8OEBDOogjksSWAjRnPonxMp9sO0RiB4KpZubpjWFgtfZ4u52l+zG7XVzoOwAZY4yajw1fHfku665A537CRxdAUnT9J3zE13BahjxIPxkB/S5OLh58br0XfVhd0LyHMxZl2E0QmVlM8n9lF3LHGU88OUD/Fj4I2WOMmw1Nh5a8xC7indhc9h44AF4/nn4299gZ2DzMXYea6KeV8Jt9/0rtyKXkb1HNmgKlbIohfzKfGqwYbXCli2wYUM7jmcKA08nR0+tNLNbvnc5I14Y4RvlyumE996DI0c6NxtCnGwefRRGjYL09A4OQy16LAkshGhO/bvi+Z9D3CiYvxVmL2+a1hSm24InTgBzhN/d5ZTk0Cu8F7tLdlNaXcqOoh3cufzOrum8nfcZ9Lsc5qyGCa92/v57mvIdkPYTPUFh0tTg5sVp0/0QIvvrZWMIoaFwtJmuAeHhTWsdyhxlrDu0jgJ7ga/GYtX+VRy1H6XMUcYHH8Cf/wxr10JYWNe+nCYsCfqxfIfvX7kVuYxM1oGF0+PE4a67FWmrsREbC8uXwwcftON4plD920qaDkkzOpT1QG0v0lUrO4p2oBT85Cfw05/qR3F8sdvhZz+DIUPgm2+CnZuTgFJQ9C2U76xbPvZYWAg//ggffQSrV4PFErxsiq4jM28LEYjyXZAyTxdc40Y2XW8KBWcJpJ7V7Ag2OcU5zO4/m5ySHGKsMb7/d0mNReVeyLxUP48Z0vn770mUApdNz29QtkW3x68d+jcYXDYIidF3w9dfCb1nEhd3Bfn5MHSoDiKs1rrkYWG6mZTLVTd6VP3vRG1gUetoaSU7d8KvfgUxdV+j7mOOAKMFjnwCNUWADiyyErKwu+wU2gsbJLc5dGCxeHE7j2cKA0c+WGL0X2doZab07UXbGZI4hO2F2+njnMeaNfC//8HevXWbHzqkZ1Ov/1mKnudvf9OF2LPPhs2bYcKEYOfoBLfhRsh5Tj8/4xtIHO9bdeAADBgAqalBypvoFlJjIUQgnKUQ2lu3nd90u74rXV9ItJ7Iqxker4cjFUeY2mcqOcU5DfpVlDpKO39uAmeJnkxs7+uw8dbO3Xdn6MzhYpVbNz8yhcKXC2Djbzonj+3PEHDsNe17A4q/IS0Ntm7VzaGysxumtlj0PBf1OzPaauq+XzaHrUFgkVdQQ1xckIIK0J+XNQG23gcH3wF0YLFi3wqsJiu7S3Y3SF7mKCM2tgPHs8TpzvldqdF3cXvhdn6S9RO2F21n3TqYOxfmzdP9WjweXUjt1w/m+Olu1WVame1c+Ld0KTz2GLz6KlxxRbBzc4KrPgp7XoHZq+DMb5vcCCgthYSEoORMdCMJLFpiPwDZv4Nvr4PSdoxEIk4sBgMUfwvb/wLuRg3mIzJb/I4ctB0kNSqVrPgsckpyKHOUkRCWwOCEwXi8HqpcVZ2dWf2Qvxz2vtLJ+25eUNrMGkN0LYW7CmKHByEDjYRE6z4ISkHmZYBuT/z66/D44007LBoMkJamOzYrBYdz3VS5qrj4lIsZmTySoqoinB4nC4YuYHjScEorqzAHu67ZmgjKA4Db66HAXsAty26h1FHKvtJ9pESmcO+0exmVPEr3t0jpwLEiMvXM9o7CVpN2BqUUO4t3cnbW2Wwv2k5hIWRm1q1ftgx279aFpOef74YMVefDsknwQSp8Mb0bDnhiOXwYRo/WzxsPkiA6WelmXaOfNA3ix0JUw5pjk6lpJWF+ZT5z3pjDVwe/6rZsiq4lgUVzvB5YPgfCknVnULf9+JiUS3QNSxw4juqRfsL9zKwd2R9KN+mgw1nSZHVOSQ5Hyo9wy7JbyCnJobS6lLum3sWOG3eQEpXS+SNDWeN1gWTUnztnf8oLR1fBviVQtrXJ6q+/htNOg/nz4cUXA9mf6twhQC2xYN8HQ25pcIhuHzEJdDMod5WeVyN5FqADh40b4f77/W+SlQWvvQb//CdszbERY43h7Yve5tfjfk2hvZC40Djev/h9rh97PTWWPMrKgtzxMSLT9/SooxxF3Wd4wHaAwYmDuW/mfUzrOw2bw0bfvh04VmQmoODrX+jAvivU+y7mV+bj8rj4+vDXbC/cTkhIwxG9fvhB115ER8Opp3ZRXo6uhF3PQe7/YMcTEDcCFhyF099q/35P0muW293yrPeiE7lsuilw2Q/wlhH2vNxgdVwc5OU13GTpzqXsKt7FW1s78N0WPYoEFs2pztXth4fcBrkfQ9WhpkNcipNHVBbkfa6bgJj8NKqOytKPm25vMFpOrZziHOwuOz8W/sj+sv2UVJf4+ll0ySR5UVmQtwysvfyvDyBItjvtdfn67nrY/08daNv3N0l7771w5536bu6113bOS2iT2BGw/y3dLApYtAjOOQf+9CdYt66b82KyQuypsOsZPe8Jum9FSwYNgnffhcsvhwq3HloWINoaTVF1kW85JjSGanMe4eGwfr0eqraoKIA8dfYNkXp3InOdLrLis/jP//sP87Pmc6j8ENHWaF/+yxxlZGU1zEabmCMgLFWfh6vzWk/fQduLtmN32blrxV0UVxcTmVDOli16nculC6ldGtTteELfoIgfo5s/VeyCXlNh17Ow5sLAR8g6SQMJ0M3Vqo5VAicnw65d+rlctruYNRGqD0PUQOjz0yarBwzQw05v26Zr/RwO+HDnhzw651GW7lra+U2CRVCc+IFFe0+u1gRQLh1QhMTooTs7c//i+JIyF0q+g/+N0kPPNhaeXm8UIAuYQlm1fxVPrn+S4qpidpfsZuH0heTdluebSCw2NBaAGGtM548MlTxXBwKfjvW/vpUag0pnJTNen8G8f87jh6M/6Bmt+18Ftm2w7bEm6Q2GumFTu+za0NJvrfdMPSndynkAPPGEDi4efRROP72L8tOSpOmw/XHYdBsAU+sNVOWvs+/0ei1cKt1lvqAzxhrTIAiNscZgc5Yyd64e6WbcOH2hbkwpPRqO3X7s8+jsCeJ6z9CPBiO5ISkMShjEuYPPZXzaePIr8hvmt8bGjGPJhwxp593jpHpvkLVrG2nvKNrBuYPP5dNLP+W03qeRMnwb69fDLbfANdfo9/y//9VN1774ogsyUJKtRzhzFMDGm3TtUOFaGHC1Homr0dwadjusXAlr1ugRd052v/413HUXfPyxfj/mzoV77oEnn4Q33/Szwf43Yd8/4MjHevCHtlJKTxhZU9wj+sHUuGt4e+vbbMzd2OZty8vhoot0rfObb+pJLdskYbweOGT3i+CubrI6OhrGj4cZM3Qn+tKqCpbvXc5f1v2F3IpcsvOym+5THHc63FJ38+bNREZGBpS2oqaCKGtUk+ctGjNGtyFojrsS3A4ITQT0Sdbp1FVuPvV6S9auj4n1UuWqItISiVKKckc5y/cv55DtEHannTMHnskY62/hhQlgNEGfn4E5u8n+apc9Xg+7S3ZT5arC5XGRGZtJhCWCCEsEHq8Hh9tBhKVuGFK3143T4yQ8JNzv+iYv0+PG5XURFhLWYNvSUl3eiohy4VZuwswN1zdRXQAmK05jGB48On29fddyup148RJqDsXlcVHlrMIaYvUtV7uqsZgtvmW3cmMxhFFZqQsO/r4SSkFZmW7naghxYMCA1WytO5bJou/wmiNwYMCIEYvZQo27BoUi1BiiT+DmcDA3HGOz2l2N2WAmxBRCjVvf0bOamx+upX56h9vhO1Zzn69+Af8Hm96EhHHw4yEwNRo/VF0K+x6A/pew/pPXeeOHN5jWZxoXrLwAs9HMuYPPJXdXLpnVmRwtOkpxfDHZjmzCi8L5ZsM3hBaGYjKaqHZVE2IMweM2U12tRw1yG+yEmvX6KlcVFpMFV40Zp1O/n00Kq94kqDgd9q2D9Auo/PpLwsxhmIwmKp2VhIeEYzQYm77eY7+3J9Y9QT9PP1KiUrjqhat4buzPMb93E1iTwOmiInI1kZZIDAYDFTUVXHpZBA89ZOSpp+CMM2Du/ArKa8oxGoyEmkOJDY2lqsrA4cMweHCjvGZnt3xuqJ+/5j4b5wQoyNQ3AsJP58ors/nlL3XfhjlzdDOt5nhVw3NBpbMysHNTS/lxnAm5b+kmUUmjwZTN2LF6JCGjyct/Vh7B4Xbg9rpJjUqlX78Y+vfXF3J75SYiiyPJzs6moKCA8v3lJEcmk52dTdHRIkr3lnL1Rdl8/rkOKvLz9d3Z+s2+7rpLv+7wcJg4EaKimubXZtOdxmvXlZXp325oeL1zSeNzQ+32ngQ4GAbxY9i0ZQ8RRRFkZ2fjzfWSl5NHfK94srOzqTxQSV5hHo5e2SQm6kmxGrxlzb1/jZedP4F9b0HiJJy7nRyueA8jRrx4SY9Kr/vt1le6Wd85NUc23V8Ly99u+JahEUPpVd6Lwc7BfL/7M371Kwt//SvMnKnfr/HjdTOoKVMgLEr/Xs0mM9XuakyYMGKhvFyfBz2Geutd1bg8LsJDwn3LHq8Hs8mM2WDG4XEQ7jwb4xeLdC2NzQTxs+H7P8LqNLD24uDqj1ixfw0RlggiLZFU/jCXfXvMzJmjC24Nmv+19nqhwfnA7rTr86bXTEkJJCbq70h9FTUVvt++3WnH4/UQaY3EaDBSUVMBCiKten3tvr0eI3l5kJHR9GNq/NuPsERg9NSAqxxCk8BgoKYGSkrw31dnzJgGi85rNpKfr++KK6ULse+8A7ffDosWKVavr/t9VzhsRG17UQ9jXPahrm0c+rtmyx1NzhWOMqK23gvJM6HqiB7go2/dPDoer4dqd3WDc0uYKapeBWLd+ab+dbuiQte6+B30QHl9ffzcBitO5fGVI0qrS3nymycZEDuAp/Of5sKhFzI+fXzA5YyKCl0DevCgvq5szHZjDtV5cnlcONwOzEazLod43FQ6K9mcvxmXV6+blDGJRMtN8MHvISQKPBdCecNy0y9+Ab/5jR5R7c0VXzI1ZCp3nHoH72x9h9f+9xqGUYaG779Sx8oFob6h3IuL9XUx3E8RB/Q5MSoKTFY/5QxzXUeb+tf+GncNDreDMHOYbxkgxGilrEwfy28fHaWg+ghYEnUeW1B7ba89VzhdTiIsEb7l2jJJA16PboZt7UWV14XFZMFs1OcOp+fY9sZ65xZLuG85xBiC2YCu6Q1LAaMZt1u/f4mJ4PA0LVeYQR8vtDeVbofv3FBYFngfN4NqZ91TeXk5MUEblkQIIYQQQgjRXWw2G9HR0S2m6XCNxerVq4mcPr1hdN842j+2fMUHVzA0cSgGg4EtBVv454J/Npu2VV43fDYR5n4Fe/8O5kj+vekyqquhVy99N6nxWMm33abbOmdm6rumu1zL+Xzv5+wu2c1zZz9HUmRSh94Lf5RSGOo34aj3+i7996XEh8azo2gHd0+7m+mZ05t9P2wOG/PfnM85g85hR/EOxibM4M07ruCDD/RdjSpnNfP+NZPE8EQqaiq4ddKtnDfkvLr92Q/Bustg6r9h32sUmhM478sXGRg/kPzKfG6ecDPzB833pd9fup9f/PcXjEsdx87infxqzK8IMYbwxvdvMKXPFK4dcy3z35zPRUMv4tVNr/LJpZ/42oE3cGx/GzfqpimvvKLbKR915XDZB5cRGRJJpauSDyf8P1JUGfS7EvI/Y3PIAG5csZDE8EQK7YW8PeUa0m3fwsBrwbZDzxdhjfPt/52t77CndA9/mPoHbv7fzVw47EKm9Z3W7Pvp9ri56N2LuGzEZaw/vJ5FZyxq+4db/05ZIN/ZFjz85cNkxmVywdALuOBfF/Daea+RHJXsW//ixhd5f9v7xITGcLTyKCuvXFlX49A4Tx3MSyCKq4q57qPrSAhP4Lox1zEyZWSL6T/+GB58sO5Or79+GFuObsHtdTMqZVTnZ7jRXU02bmzwXv1y6S99TQcuHX4pt55+a/vPTT3cvn26ucgnn+jlnBy49VY9LGet+UvmMy5tHMv3LufV814lKyErOJltD48Lls8AjOCpgomvQlwLVVYdtO7QOm765CbMRjMY4MkRX/L+21amTtVDAs+cCdsKt7Hl6BbOzjo78NqwbvL65td5+punMRgM9I3tS//v3mfqVDj33EYJj33/b/vsNoYlDmN40nBuWXYLa69a2+Aat+bAGl7KfomB8QNJCEvAsebXlJbC3XfrGrva/ja1+7vwnQt5bM5jpEenM/216Xw9sBeM/gscXQ0FK7nuH29x/vm6w7zbTd2IaO38Pd694m425W3CrdzM6TeH2yff3vpG9Y71SvYrHCo/xAHbAa4aeRVT+/qZkLNe+kfWPILT42RH8Q6uG3Odvs43w+vVd/PHj4ekJD2aVVJSy6/1vH+dR2VNJXaXnd9P/j0Lhi7wrVNKMfP1mb5WBbdOvJWfnfqzFvPbGrfHTXZeNn1j+9I7sndA27Tb4aVQ8CUM+S0cfI81B3/KG++m8tJLjdIdy39enm7K9fe/6xrYmP47ufyDy4mwRGB32XntvNcY2muoL/0h2yHO/9f5DIgfwJ6SPTx79rPc+MmNvt0+Pvwz/vFSAldfrT+badMaHo9D/4Hcj2D8Ylh/BQxfCFEDmr12vLXlLVbtX8W0vtNYsmUJ0/tOx2w0c/lpl3Pj/25k4fSFDfLH9kV6GPWUs+Dba/lP76t55OunsZqsuLwubh5/M8989wzhIeHYnXa+uOILrvnvNaRHpTMubRwXhRRC5X4Y+Sh8MYNHvltBaprJN+zylwe+5OE1D3Na79MorCrk1cw0nf+Us2DNBSyOv4K3tr5FeEg4zionpS8E2GRbtZPNZlOAstlsurV2fc0sX/XhVYqFKBairvjgihbTBiT7dqVWn6fUpjuVOvBOQJvY7UoVF9ct3738bvXG5jcCP2ZH1Xt9D65+UN2z4h6V/JdkVe2qbrJe1bWEVwpU6qJUFflwpDLdZ1JP/We56t+/4a5HvTBKPbj6QZXylxS1rWBbw/0d+o9Sy+coZdul1LtxSu16XqUtSlPXf3S9inkkRm0v3N4gvcfrUREPRaiJL09U1gesasvRLU1eyh+++IM6e8nZ6sK3Lwzopa9erdTjjyu1eLHef8wjMerdH99VkQ9HKpejRKlvf63UxluVyv6dqrbnKesDVvXMN8+o8IfClcvjUqpij1JHPlHq6GqlPO4G+f1o50fKfL9ZRTwUoYz3Gevy28L36+WNL6uMJzLUhiMbAsp/V3ps7WO+34blAYvyeD0N1n+a86ka8NQA9cv//lLNen1W8ztq/0+6zVbvX63e3/Z+QGmvvFKp227Tzw8f7sJMtUW99+r+Vfer+1fdry741wXqo50fNVnvd/k45fUqdccd+jO55Ral9u1TKiVFqWXLlFq7VqnKSqVuW3abuuPzO9TgZwYHO7ttV7heqbejlKouUOqL2UoVfNWlh7M77SrxsUS15sAaNf3v033/93q79LCdZsORDWrSy5PU4g2L1Y0f36gWLlTqZz9TascOpTZtqpfw2Pf/95/93neuOvW5U5vsz+P1qEHPDFIDnhqgDpQdUDNnKrV0qZ8DH9vfrNdnqYe+fEi9mv2q6vtkX6U+OlWpvM+VqspXqnK/Ou00pb7y9xG28/f45g9vqis/vFKd+9a56tOcT1vfoNF1uKCyQGX+NVMNeXZIk/O0PxtzN6rRi0erzL9mKnftdasV1dVKVVQ0ykMzrv/oevVK9itq8DOD1b7SfU3Wz18yXz3zzTNq8DOD1ea8zf530lPPbc5ypb76uS4bbPit2rk5X6WnK5WdrdS6dfXSHcv/hx8qNXt23b/dHreKfiRa/Xvbv1XUw1F173+91zvw6YHq05xP1agXRimP16MSH0tU2bnZasBTA5RSSjkcSu3erVRRUdPjqezfKfXDfUrteU2pN81KlW5tsv/6y5/v+dz32zHfb1avbXrNt8xCVFl1WcPt11+p1I5nlCpYp9S2x9XO/O9V+hPpavGGxWryK5PV9/nfqz5P9lGPrX1MTX5lslJKqR8LflSvbXpNfzdXn6/UviVKbX9CqTdN6tFH3Orii/U5ft8+pWrcNSrlLynqpk9uUk99/ZRSX8xS6vBSpXIWK7X2EvVpzqdq+HPD1e+W/U7Nf3V+XZm/FZ0zGnpzranq36k3GDhl3SKuH3s9ZqOZjGg/jS3batRjuubCU6XHjg9AeHjDdnkPzHqg4/kIVO37cexxQcE2xr88nnkD5+l2f43WN35fRyw5i99N+h03fHIDU04ZwF1H9VjqsbH6Ts7E9InEhsbi8roYkthotmVTGLgqIKIvzF0HYcmMS1vG3P5zefvHtxmc0LDhu9FgZETvEfx13l854x9nNN0fcOXIK1nw9gKeOPOJgF7+tGn1In6MTEyfyL7SfYxJGYPZGgfjnvWlDQWG9x5OpCWS0Smj9d3AyP51HaQbyYzNxO114z7WeS4zNrPV/Fw9+mquHn11QHnvav3i+vme943p26Q2YnzaeKpcVQxJHEJCeM+YYWha32mtJzpm3z5YcOxGWlpaF2WoLRr91mYdWMvC1Qv5seBHXj//9SBmrOsZDLpTe31r18J77+nJ+saOhZ8O+ylT/z6VOybfEZxMdkTZVkiaCqG9umVwjfCQcIb1GsbT3zzN7H6zff8/Xsb1GJUyir2le1m2ZxmXDb+Mc8+Et9/WQx+feWbT9FkJWWTFZ9Enpo/fWmqjwcjinywmryKPPjF98Hpb7rCfHJnMH1f8EYAJaRNg3F9g8x26v0PSTGbNuocnntB9RywWmDz52IbtHIjgzIFncufyO3F5XMy4aEbrGzQ6Ti/g5XNeJjY01n+tcSOjU0YzKX0SI5NHYjIGNnJBW+bbOGPAGTz77bN4ldfvdW9qn6kcLj9MSXUJw3v3gHl+2iIkCk5/w7c4CD163nvvQXw8TJpEg3N59VuKiHpdSExGExPSJrCreBcT0if4ff/nDZjHXSvu4qyBZ2E0GJmcMZmnvnmKWf30UOFWqx7Nyi9zFNQUwil/gIwLGvbn8mNo4lCSIpJ49qxnuXfVvQxKGITZaCY2NBaP19P095Q6H364G4bfD8rLoN4jcHvd5FfmM6XPFE5NOhW7U49CeXqGHqVkWK9hDOs1TG9vDAGPA9IvgITx3BJt5KmndYuBs86Cn//cwhkDzmDxxsXsu3kf7NoLh/8L456DxNOZGJpBYVUhEZYIxqeN52M+bvH11ercUaHqjyVoMDSK8xWn9DqF3IpcjlQc4ZSkU/xv33g/rTGaAw4qgq7R+zG011AemvVQ3cW78b2RRoYnDWdT/iYK7YWM6t+Hq6/WVaWnnqo7cU5Kn8RL2S8xJmWMrpqu/34mjIfy7XqytOJvwX6QcanjeP371zmt92kNm2sdMzJ5JO9te4+shCxdsG9kUMIgtt6wlTMGnNGut2NyxmSe2/AckzMm+10/IW0Cizcu1hebVmTGZhIeEs7KK1aSEJZApCWwAQV6iv5x/RmXOo5lly2jf1zT4CkuLI5ISyRLdy1lfNr45nfUQ4frKyvTncV6jEa/tfFp4/nuyHdkxmb2uKYq3aF/f/j97+G3v9UX0gnpE1h6yVJ+MyHYs5i3g6sMIvpB4br2jfLTDrP7zeb97e8zu//s1hP3MEaDkdMzTmfpzqXMyJyByQT/93/wwAO6c3pjWfFZZMZmMj9rPlnx/pvIzcicwSXDLwFg2DD46CP9//37m6ZNiazrkZ0SlQJJU+CMr2D2Chh+D488ooPdf/7T/4hqbRUfFs/A+IGcnnF6iwN8tGR2/9mMSR3TesJjnj37Wa4ZfU27jtWaWf1msfrAaub2n+t3/dS+U3ll0yuMSxvnPxBqT7kriCZOhIce0k3bgQbn8T59YMsW3WypdhCD1soZ8wbOIzsvm3kD9YiCMzNn8uaWN5mZOdN/Buq/X+nnwb43dJOljbf4Jg1tTlp0Gi6PC6PByNDEoQyIH0DviN4sv3w5A+L9RC99LoLRT+qBKML072RS+iRe2PACU/pMwWgwMjplNC9mv+gLLBpInAy7F+v5RSr3Y7EauP12PeLXz3+ukzw06yFWXrGS1KhUOOWPuiP6h2mw/XFiQmNICEvg/e3vMz61hXJHI507f2srhZpTkk5hR9EOTEYTp/TyE1j00EJRV2rLhXtE7xE8v+F5hiQOwWAw8NRTevhDiwX69oVJUZP4/sPvuWDIBXqDxu/n5Lfhx4f0LMVJ0xibOpY/rfoTt0y8penBgFHJo3jgywd8P7jONrnPZO5ddS+T+/j/wU9Mn8jfvvsbt026ze/6+iIsEUSERGAxWQKqrehp+sX241D5IY6UH6FfbD+/aSakT2DJD0tYsmBJN+eu42rvM/RUIaYQHp79MGlRx6pTWqk9PBmcOdDP7erjgatCjx5jP6BHN+kGl5x6CSaDqeWgvwf7zYTfMKXPFOLC4pqubNTyYFB5LntK97C3dC8jk0e2uu+bb9Z3lpcsgZtu0gFLfcmRyUxIm0B8WDzJEclNtrda9Rw5nWn55cs7d4dBFG2NZvWVq+kb09fv+rGpY6l0VjIlw0+UCCfUuW3iRAgJ0SOBORzw7be6nLFw9cK6wKLRze+zss6i6PYi4sPiAZg/aD7Z+dnN3yRoUq56Bw69q0egM7RepB6cOJiPcz5mWK9hJEUkUeGsYGfRTr83FAHdnzT1LN/ilD5T+HDHh77XMzljMp/v/dx/YDHwl1C0Dr6YDinzoN+lTZKkRaeRFn3suhfaC2Y0rJWYlD6J179/vU19Hzs3sGhFenQ6xdXFAPSN7SsX7zYanjSc9YfWc+3oup6vw+vVbA6MH8jDsx7m/CHn+99B6jz9d8w4UwwhxpBmawRGJo/kUPmhgC4e7TE5YzIrr1jZ7PFnZM7g2tHXMrWPn85xfvSN7cvag2ubDywa16b1IHFhcdS4a9hZvLNBs6j6zhl0DnanXd/VO87ExekhAHuyG8bdULfQw74fog1MVn2HrtcUmPIORDce37jzZSVk8cdpf+zy43SVGZkzmJE5I6C0KVEplFSXsLN4Jz89pekkaI0NHqwHCNi+Xc9d0GR/kSkM7TWUgXED8bRyx/ekFEA5yW+h8hiLyULZHWV+Wx2caIxGPUnr88/XDRIwre801l+9nlHJ/gvGRoOxQfPigfED29YcNvVM/RegoYlD+TjnY54880lAt7b48sCXDIhrrr1VQ9eNvY4FQxf4bgJcPfpqRqeMJinCz+BD5jCY8nbAefPnhnE3MLXv1AbTEbSm279pgxIG4fEeO3nIxbtNhvYaisloYkTvEc2m+cPUPwS8v7iwOKr+WNXs+uG9h3PNqGuarxLsIKvZ2uLFLD06nRfPeTHg/WXGZvLVoa+arZ7v6TJjM1lzcE2zNUgXn3IxF59ysd91Pd2gQfqEv2CBLmC0NhO1EO0WEqP7WURk6D/RMX6u07Xn2kEJg/xs0FRCgv9mVaBrLArsBURZojg16dSO5PTE1AnlpPY2+ToeZWbCn/9ct2wxWZiYPrHuH11d7mx8A7PR8rBew/j75r/7+kH0j+vPqgOruHnCzf63byQ8JLzBzdP06HTSo9O76tUwKmUUo1JGUV5eHvA23T7z9o3jbuSm8Td192FPCBaThQO/PcBVo67qmgM0ujMSag7lpXNf0sOf9USN8psZk8lXB79qvsaihf4rPUG/uH5syN3QbFOo49mcOfDyy7o/0LvvBjs34oQW3geK1uuJxGpK9CSFolMNShiEyWAiObJp06W2SolK4WjlUQrsBQ36WwhxIhqTMoahiUN9A+YMiBvAjwU/1tVY9PBySiC6vcaitkOXaJ/UqNTWE52kMmMzKa4urgssenDTJ3/6x/bH6XE239byOHb++brd66pVOrgQosskTtL9K9b+DCpyYNbnYG5mil7RLqOSR1FSXdIp+0qJTOGo/ShR1qjjspmnEA00Lms0Wp7Zbybbfr3Nt9w/rj8KdUJd90/8RnfixNXoB1vbN+F4vePfL64fsaGx/jtQHufMZvjsMz06VELPGC1XnKhCEyF5Dhx6DzDokQNFp7pzyp3cOaVzelTHhcVRXlNOfmV+p9SACHE8GZ0ymtn9ZpMRc+I02zTouTjarry8nJiYmICm9xaiO+RW5PLMN89wz/R7CA85/u5Qur1uatw1RFgiWk8sut9xUvMlgJpi+P4u6D0L+vqZaVgET+MhTZUi86+ZHLUfpfzOckJMIcHJlxCiWW0p88utHHHCSI1K5ZE5jwQ7G+1mNpoxW+QnKUSHWRNg/OJg50IEKDkyGbvLLkGFECcAKcUIIYQQImhSolKockkneyFOBBJYCCGEEKJ7+GlOmByRjN1pD0JmhBCdTQILIYQQQgTN42c8jtvrDnY2hBCdQAILIYQQQgRNpCUy2FkQQnSSbp8gTwghjjv150RpPKqNEEIIIQAJLIQQQgghhBCdQAILIYQQQgghRIdJYCGEEEIIIYTosHZ33q6dsLu8vLzTMiOEED2SzdZwWc57QgghThK1ZX3lZ7joxtodWFRUVACQkZHR3l0IIYQQQgghjgMVFRXExMS0mMagAgk//PB6veTm5hIVFYVBRkkRQgghhBDihKOUoqKigtTUVIzGlntRtDuwEEIIIYQQQoha0nlbCCGEEEII0WESWAghhBBCCCE6TAILIYQQQgghRIdJYCGEEEIIIYToMAkshBBCCCGEEB0mgYUQQgghhBCiwySwEEIIIYQQQnSYBBZCCCGEEEKIDpPAQgghhBBCCNFhElgIIYQQQgghOkwCCyGEEEIIIUSHSWAhhBBCCCGE6LD/D+1t7S0lZaOrAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------\n", + "\n" + ] + } + ], + "source": [ + "#Enumerate and visualize attributions; liver example CFHR2\n", + "\n", + "save_index = []\n", + "\n", + "#Visualization parameters\n", + "logo_width = 192\n", + "\n", + "top_n = 1\n", + "\n", + "use_gaussian = True\n", + "min_padding = 65536\n", + "gaussian_sigma = 8\n", + "local_window = 1024\n", + "\n", + "main_tissue_ix = 0\n", + "\n", + "tissue_colors = ['darkblue']\n", + "\n", + "#Loop over examples\n", + "for example_ix in range(top_n) :\n", + " \n", + " print(\"-- Example = \" + str(example_ix)+ \" --\")\n", + " \n", + " print(\" - \" + genes[example_ix] + \"(\" + str(strands[example_ix]) + \")\")\n", + " print(\" - \" + chrs[example_ix] + \":\" + str(starts[example_ix]) + \"-\" + str(ends[example_ix]))\n", + "\n", + " #Grad analysis\n", + " \n", + " #Calculate min and max scores globally (for scales)\n", + " min_val = np.min(scores[:, example_ix, ...])\n", + " max_val = np.max(scores[:, example_ix, ...])\n", + " \n", + " print(\" -- min_val = \" + str(round(min_val, 4)))\n", + " print(\" -- max_val = \" + str(round(max_val, 4)))\n", + " \n", + " max_abs_val = max(np.abs(min_val), np.abs(max_val))\n", + "\n", + " min_val -= 0.1 * max_abs_val\n", + " max_val += 0.1 * max_abs_val\n", + "\n", + " print(\" - (Gradient score profiles per tissue) - \")\n", + " \n", + " #Gradient profiles across input sequence\n", + " f, ax = plt.subplots(len(gtex_tissues), 1, figsize=(8, len(gtex_tissues) * 1.5))\n", + " \n", + " if len(gtex_tissues) == 1 :\n", + " ax = [ax]\n", + "\n", + " #Loop over tissues\n", + " for tissue_ix in range(len(gtex_tissues)) :\n", + "\n", + " #Get tissue scores\n", + " score = scores[tissue_ix, example_ix, ...]\n", + "\n", + " l1 = ax[tissue_ix].plot(np.arange(seqs.shape[1]), np.sum(score, axis=-1), linewidth=1, linestyle='-', color=tissue_colors[tissue_ix], label=gtex_tissues[tissue_ix])\n", + " \n", + " plt.sca(ax[tissue_ix])\n", + " \n", + " plt.xlim(0, seqs.shape[1])\n", + " plt.ylim(min_val, max_val)\n", + " \n", + " plt.legend(handles=[l1[0]], fontsize=8)\n", + " \n", + " plt.yticks([], [])\n", + " plt.xticks([], [])\n", + " \n", + " plt.sca(ax[0])\n", + " plt.title(\"Gradient Saliency for gene = '\" + genes[example_ix] + \"' (\" + str(strands[example_ix]) + \")\", fontsize=8)\n", + " \n", + " plt.sca(ax[len(gtex_tissues)-1])\n", + " plt.xlabel(chrs[example_ix] + \":\" + str(starts[example_ix]) + \"-\" + str(ends[example_ix]), fontsize=8)\n", + " \n", + " plt.sca(plt.gca())\n", + " plt.tight_layout()\n", + " \n", + " plt.show()\n", + "\n", + " #Apply gaussian filter\n", + " smooth_score = np.sum(scores[main_tissue_ix, example_ix, ...], axis=-1)\n", + " if use_gaussian :\n", + " smooth_score = gaussian_filter1d(smooth_score.astype('float32'), sigma=gaussian_sigma, truncate=2).astype('float16')\n", + " \n", + " #Calculate min/max positions and (differential) values\n", + " max_pos = np.argmax(smooth_score[min_padding:-min_padding]) + min_padding\n", + "\n", + " print(\" - (Attribution at position of Max positive differential saliency) -\")\n", + "\n", + " print(\" - max_pos (rel) = \" + str(max_pos))\n", + " print(\" - max_pos (abs) = \" + str(starts[example_ix] + max_pos))\n", + " \n", + " #Visualize contribution scores\n", + " plot_start = max_pos - logo_width // 2\n", + " plot_end = max_pos + logo_width // 2\n", + " \n", + " print(\" - \" + chrs[example_ix] + \":\" + str(starts[example_ix] + max_pos - logo_width // 2) + \"-\" + str(starts[example_ix] + max_pos + logo_width // 2))\n", + "\n", + " #Logo min/max value across tissues\n", + " min_logo_val = np.min(scores[:, example_ix, plot_start:plot_end, :])\n", + " max_logo_val = np.max(scores[:, example_ix, plot_start:plot_end, :])\n", + "\n", + " max_abs_logo_val = max(np.abs(min_logo_val), np.abs(max_logo_val))\n", + "\n", + " min_logo_val -= 0.02 * max_abs_logo_val\n", + " max_logo_val += 0.02 * max_abs_logo_val\n", + "\n", + " print(\" - y_min = \" + str(round(min_logo_val, 8)))\n", + " print(\" - y_max = \" + str(round(max_logo_val, 8)))\n", + "\n", + " #Loop over tissues\n", + " for tissue_ix in range(len(gtex_tissues)) :\n", + " print(gtex_tissues[tissue_ix])\n", + "\n", + " #Get tissue-specific scores\n", + " score = scores[tissue_ix, example_ix, plot_start:plot_end, :]\n", + "\n", + " #Plot scores as sequence logo\n", + " plot_seq_scores(\n", + " score,\n", + " y_min=min_logo_val,\n", + " y_max=max_logo_val,\n", + " figsize=(8, 1),\n", + " plot_y_ticks=False,\n", + " )\n", + " \n", + " print(\"--------------------\")\n", + " print(\"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67a3cf9d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/legacy/interpret_sequence/explore_polya_grads_CD99.ipynb b/tutorials/legacy/interpret_sequence/explore_polya_grads_CD99.ipynb new file mode 100644 index 0000000..a4f3a1c --- /dev/null +++ b/tutorials/legacy/interpret_sequence/explore_polya_grads_CD99.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7030e9ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import h5py\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.ndimage import gaussian_filter1d\n", + "\n", + "from vis_helpers import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "534495a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores.shape = (1, 524288, 4)\n" + ] + } + ], + "source": [ + "#Load scores\n", + "\n", + "score_file = h5py.File('../../../examples/saved_models/gtex_CD99/scores_f3c0.h5', 'r')\n", + "\n", + "scores = score_file['grads'][()][:, :, :, 0]\n", + "seqs = score_file['seqs'][()][:]\n", + "genes = score_file['gene'][()][:]\n", + "genes = np.array([genes[j].decode() for j in range(genes.shape[0])])\n", + "strands = score_file['strand'][()][:]\n", + "strands = np.array([strands[j].decode() for j in range(strands.shape[0])])\n", + "\n", + "#Input-gate the scores\n", + "scores = scores * seqs\n", + "\n", + "print(\"scores.shape = \" + str(scores.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4dcb8667", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-- 0 (+) --\n", + " - gene_id = 'ENSG00000002586.20\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAABZCAYAAACjWLKDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAUYElEQVR4nO3df3iVdf3H8de9s8F2xjZRNpU2EWEwGBgy+WEQQVEpRlBgmaSGmJpemeZlZZfiF5LiMiuN0EvEXFj5o4QK+6FCCRGMkCCFSQwV25i4IcJ+s+2c8/3jzb37nLGNcc6ZY+v5uK5znXM+5/7xuT/35/7x/nzu+z5OKBQKCQAAAABikNDdGQAAAADQ8xFYAAAAAIgZgQUAAACAmBFYAAAAAIgZgQUAAACAmBFYAAAAAIgZgQUAAACAmCVGO2IwGFR5ebnS0tLkOE488wQAAADgNBAKhVRdXa2BAwcqIaHjPomoA4vy8nLl5OREOzoAAACAHqK0tFTZ2dkdDhN1YJGWltYyk/T09GgnAwAAAOA0VVVVpZycnJZz/45EHVi4lz+lp6cTWAAAgBPU1tbq/PPPlyTt379fqc8+K/Xpo9rZsyPTU1O7L5MAOqUztz5EHVgAAACczKFDh7wv111n7zU1kekAegWeCgUAAAAgZvRYAAAAoMcIBAJqamrq7mz0OklJSfL5fDFNg8ACAAAAPUJNTY3KysoUCoW6Oyu9juM4ys7OVr9+/aKeBoEFAAAATnuBQEBlZWXy+/3KzMzkf9TiKBQKqbKyUmVlZcrNzY2654LAAgAAAKe9pqYmhUIhZWZmKiUlpbuz0+tkZmZq//79ampqIrAAAACnl4SEBF188cUtn0+WDnQGPRVdIx7lSmABAAC6REpKirZt29bpdAA9G80EAAAAQJSampq0aNEi5eXlKT8/XxdddJFmz56tnTt3xjxtx3FUU1MjSRozZozq6+tjmt6DDz6oioqKmPPVHnosAAAAgCjNnz9fNTU12rJli/r37y9JWrt2rXbv3q0xY8ZEDBsIBKK+fyEegcqDDz6o6dOnKysrK+ZptYXAAgAAdIm6ujqNHDlSklRcXCx/e+l+fztTADpQVyft2dO188jLkzqonyUlJVqzZo1KS0tbggpJmjlzpiSpsLBQTz/9tLKyslRcXKxly5Zpy5Yteuqpp9Tc3KykpCQtW7ZMEyZMkCStXr1a3/3ud9W/f3/NmDEjYl6O46i6ulr9+vVTSUmJbrvtNlVUVKixsVE33nijbr755pbhli5dqtWrV6uiokILFy7U/PnztXjxYpWXl2vu3LlKTk5WYWHhCYFPrAgsAABAlwiFQnr77bdbPp8sHTgle/ZIBQVdO4/t26WxY9v9eceOHRo6dKjOPPPMdofZtGmTduzYodzcXEnS0KFD9c1vflOSVFRUpAULFmjXrl2qqKjQV7/6VW3evFnDhw/X/fff3+b0AoGArrrqKj355JPKy8tTXV2dJk6cqIkTJ2rs8bwmJydr69atev311zV+/HhdffXVWrhwoX7+85/rt7/9rUaNGhVtiXSIwAIAAAA9T16enfh39TxOIvxpSm+88YbmzJmj+vp6TZkyRZMmTdLkyZNbggrJgpElS5bovffeU2JiooqLi9XY2KiioiKNHTtWw4cPlyTdcMMN+va3v33C/P7zn/9o9+7duvLKK1vSqqurVVxc3BJYzJs3T5I0YsQIJSYm6uDBg8rOzo6uDE4BgQUAAAB6Hr+/w96ED8JFF12kkpISvf/+++rfv7+GDBminTt3qrCwUM8//7wkRfyTdWNjo+bMmaOXX35ZBQUFqqqqUkZGhhobGzvdexcKhTRgwIAO77lITk5u+ezz+dTc3BzdAp4ingoFAAAARCE3N1ezZs3SggULdOTIkZb02traNodvaGhQU1OTcnJyJEnLli1r+e2SSy7Rjh07tHfvXknSypUr25zG8OHD5ff7tWrVqpa0ffv26fDhwyfNb3p6uo4ePXrS4aJFYAEAAABEqbCwUKNHj9aECRM0cuRITZo0SevWrdOdd955wrDp6elavHixxo8frylTpqhv374tv2VlZWnFihWaOXOmPvKRj7T755GJiYlau3atnn32WV144YXKz8/X9ddf36lH0d56662aP3++xowZE5enTLXmhKK8a8rtujl69KjS09PjnS8AANDD1dbWtlwGUlNTo9Tjn2traiLTU1O7LY/oORoaGvTWW29p8ODBEZf6ID7aK99TOefnHgsAANAlHMdpeaxs+A2u7aUD6NkILAAAQJfw+/3avXt3p9MB9GzcYwEAAIAeg/8+6RrxKFd6LAAAAHDaS0pKkuM4qqysVGZmJpfRxVEoFFJlZaUcx1FSUlLU0yGwAAAAXaKurk7jxo2TJG3btk3+9tL9/namAHh8Pp+ys7NVVlam/fv3d3d2eh3HcZSdnS2fzxf1NAgsAABAlwiFQiouLm75fLJ04GT69eun3NxcNTU1dXdWep2kpKSYggqJwAIAAHSF556Thg7t7lygF/L5fDGfAKNrEFgAAID4mztXSkvr7lwA+ADxVCgAANA1qqu7OwcAPkAEFgAAAABiRmABAAAAIGbcYwEAALqEI2nQoEH2Oew/BxzHaTMdQM9GYAEAALqEX2rz/wb8fj//QwD0QlwKBQAAACBmBBYAAAAAYkZgAQAAukS9pHHjxmncuHGqr6/30uvr20wH0LNxjwUAAOgSQUmvvPKKfQ4GvfRgsM10AD0bPRYAAAAAYkZgAQAAACBmBBYAAAAAYkZgAQAAACBmBBYAAAAAYsZToQAAQJcZMGDAKaUD6LkILAAAQOz+/ndp0iQpwbsYIlVSZWXlCYOmpqa2mQ6gZ+NSKAAAEJtHH5WmTJEeeqi7cwKgGxFYAACA2Nx3n71v2dK9+QDQrQgsAABAbNx/z/b5IpLrJU2dOlVTp05VfX29l15f32Y6gJ6NeywAIBa7dknDhkl9+nR3ToDO27ZNOuMMKTc3PtMLBOzdvb+itFSSFJS0YcMG+xx2mVQwGPTS3aDE9dJL0uHD0he/eOJ83n1XqqyURo2KT757u8ZG6W9/kz796e7OCf5H0GMBANE6dkwaPVr62tfiM73NmyXHkY4ejc/0eoODB6WmJvt85IiVz7e+JYVC0qZN3Zq1kyoqkoqLuzsXZutW6e23ve/jx1tAHK177pHWr/e+Hztm76GQnfifd96J49x1l/e5Xz/v88aNUmamtHOnnQh/6lPSlVfatFobPdpe6JylS6VLL5X27pU2bGi7TLtaVZV0uvVKBYMWcCHunFAoulpWVVWljIwMHX3kEaWnpFgrhePYj+4k162zA+TcubYSHccbRpKqq62iX3758dw4Nm5VlZSe7g3rjhs+7dbZducfCtkrfF47d0o5OdJZZ9lvDQ12AP/EJ6RVq6QrrpCSkyPz4DjSK69IAwdK55xjv117rb0vWiSVl9v0Roxov5DCl7W99GDQDo4ZGVLfvt6Ofv16aeJEye+PHPfAAds5P/GEVFtr47VWXS3t328tRpMnS2lpkeXVej24v23fbjfe3XijfZ88OfJ3x7Gye+opaf58r6zd38vK7L2uzobNy/OWsbTU8tVWeYWv2127pAcekL7zHSk/35bx5puln/3MOxA5jnT11dLgwbazOnjQfj9yxNbLxInSiy9K06ZFtiK7ea2qklavtmXYuFFauVL6xS+8cqmvl2644cR8StL990tZWVbfjh2zdTF7tnTokDRunLWy5edLd9xh9WrFisjxZ82SZs6Urr/evk+bZuMNHiylpNj427dLv/qV9NGP2nCPPSYVFEjPPy+98YY3rYEDbXldX/+69M9/2glENFatityGNm6U9u2zspg3L3LYCROkpCSru+efL2Vneyd5I0ZIr78ujRlj2164hx6yOvuVr0i33mqtpU88Id12m/T++9Ltt0vXXCO9847NMxiUfv97ac0a6aabbPizz5YSE+2EbfFiafly6ZZbLA9uHXTzeMst3vYsRdb7xx6zZbzmGmnqVNtX9e9vdfyPf7STl7w8rzzclthjx2x/UVJi82/LokWWz5tuspbVm26Sfv1ry9PYsVZ/T9WXvmTbXrhLL5W+8AXpuusi0z/2Mdu3xiInR/re9+zz5s1Wly+7zMrdPUGdOlV6+eXI8WbPtu19+/a2p3vddVa2P/2p9NZb0g9+4J1wLl9urei/+53V5TPPlHbsiG05YnXHHdKPfhS/6T35pLf+w5c9N9fqVGdNmybNmWPb4cKFVm/jGZC23p6iVCvJDSFqZE+J6ij9tDBggO3TJbsh/ayzbB90KsaNs16hzrjsMunPf45Mu/xy2w/l51t+Yt2eO1JQ0P72Gm7BAum112zbvOwy20dXVdkx2nXXXXY5nHvPzVVX2b7PNXOmtHZtx/PJyrL98gMPeGmzZtmx4O677fzIvZfn4YftPKm01LaBJUss/eMft/33ww+ffLn697d8rVolDRlix9nBg23b2rvX9rOhkPTCCzb8o4/a8Tr8HKj1+VB76SUl0o9/7M378cftePeHP1g5rlol/fWv9tuiRdKgQXa89PnsWPl//ye9+Wb79cs9hrf2xBO2PP/6V+T8Jenee6ULLrDPa9ZYkP/uu9L06bYP/vKXVSUpQ9LRo0eVnp7eYXHGHlhI6ngWAADgf1GPDCyAeHIbLR3Hu2SwhzmVwCL2eywOHbLehdbXSIZClhYKWaTVXut9MOi1BLbVq+FOSzqx1dEdNhiMnH9bEVv4NNyVm5AQOf/WWrd07ttnrbM+n40fClnLaXvjnkp6+HKH57/1ModC1mLqtsS3V1Zu2Scmeq2tblm1lQd3Ou602ysXt+zCb9AL31hal334cgUClp+2ektcTU3Ws5Ge7s2/udkrZ3f6zc32PSnJW0/usiUkRPZahc8jIcFehw979TYQsGUOb2lobLT0DRustWjUKBuvb19bdndegYDNI7zHzOeznoRzzrHlcZe3ocFaVtyen6SkyB2OO51AQKqosFYydxi3vh07Zi1FS5dai4VbJs3NNmwgYC1IjY1WZklJXo/VgQNe75vjeMPX1loLfFJSZDklJlrvk89n801Ksve+fb3lray01p7mZuslKiiQPvQhb5t05xMMWk+Q3x+5/Rw7Zj2Dkyfb96YmL19uHXPL2d2PuOu/ttZaqUaP9sr5vfek1FRbn4mJNk7r+uhy8+huI+G9NW4ZhG934fXDLfM335Q+/GG7LOT222353GXo08e7PESy5U9J8b67y1Nfb3lISPB6TiUrz/HjbTnT0rxesuXL7TKRzEybRmKiVw+LiqxXxF22UMjW4f79ls+0NC8vdXW2/05MtFapxESppsbq3rBhXtm5r02brM659WLJEukb37BxDx2yVsm0NOvh8vmsjldVWU+jW7fc/LrbemOj1Se3boTvn9z5r1ljLWf9+kkzZngtt1dcIf3mN1bOr71mPR1z5lg+58+3Xs+sLK+eVVdLv/yl9VZOmyZdfLH0mc9YfqqrrZwdx5YnOdnG8fks724daW62ZUpN9bb1/HzpJz+xcq+stJ7DF1+0ZXvkESuzkSNtOn6/rcfKSpvfBRdITz9tvU6OY/NparLXm2/atrFypfUcDRli+QoGrczcV0KCt4986SXrKXznHRt/xgxvfd9xh5XLyJH2fdAgaehQuxykstJ6OkeNsv1iebl04YW2DAcOeL3XBw5Yb9WwYVaGd95pPVgHD0qFhYqLFSu8HuN777VW22uvtdby5culZ57xhv3kJ631OCHB1rW77ZWVWQv85z7nbadnnGHbXyBg68Hvl1591ZalpsZbn01Ntk9LSLDesoICb/tcutTqTFaWzcet148+Kl1yifXGnneeV4ePHLHtNBCQ/vEPm+ewYVaXm5vt5TiWr2DQ5l9fb+WekWH1sk8fL1+1tbaPS0uzPFRUWH0991yr1888Y+vo8cdteUtKrCV80SJb7+5leevXW2t+RoblwT3+JSRYWVRW2nres8d6zaurrRW/oMCGf+MNqzvuPnL8eLs8ce9er8ciFLJlSU729jNpafbu7p+2brXpFRTYPiUlxYZpbPSO7e4xxj12u8eFPXts+EGDvHwkJNh5WnOz9TgEg/bZ57Plcvd/DzxgLf+zZ9s+wV33DQ1WFu7xyeezXkD3apfExMhjt3TiMbz1uVxbamutTDIyrHzc44m7bC+8YD3bmZn2PRCwZXHz5u43m5u942NRkW2n7rqsr7eyu+8+m8/3v+/VseZmKzt3fJ/Pm7fjWF1zz3Mk673YvdvqfifE3mPRiegFAHAaeO21yEsue5pFi+yEQLIT/7/8RfrsZ73fX33VAqji4o4vU0X8jRkj/fvfdrJSXt5yj0Wneizuvlup991nl8Lm53snZIcP20kl4mP9eguAw+9vATrhVM75CSwAAD3DsWPWSzFvXvutgege//2v3QfmXm9/fP3USso6PkiFwgKLc89V1jvvWHpNjVJTwy6Gan1PJYBuRWABAAC6z+jR1gORktL2E4H27vV6zlqfhgwebJfvEVgAp4VTOefnfywAAEB8tb7vsrX27k+U7Kk4GzfGNz8APhAEFgAAIL7a6m0oKrLHgUuRD4tobfBgewHocQgsAABAfB3vsWgIBjVn+nQpENBzZ58t97lnDY6jOcc/P9fQoOTwJ6IB6LEILAAAQHwdDywCkv60bp19dv9BXVLgjDP0J/dzD322P4ATtfMHDgAAAFFq61Ko8P9xAdArEVgAAID4+uEP7T38fyj4Twqg1yOwAAAA8TV7tvTcc/anbAD+Z3CPBQAAiL/Pf16qrY1Mu+ce6cCB7skPgC5HYAEAAD4Yixfbe+uAA0CvEHVg4f5hd1VVVdwyAwAAeo/asACiqqqq5QlQ7aUDOP245/qhth7K0IoT6sxQbSgrK1NOTk40owIAAADoQUpLS5Wdnd3hMFEHFsFgUOXl5UpLS5PjOFFlEAAAAMDpKxQKqbq6WgMHDlRCQsfPfYo6sAAAAAAAF4+bBQAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMft/1LaIsEggB2EAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAABZCAYAAACjWLKDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAi9klEQVR4nO3deXyU1b3H8U/2PQGSEBKIgCCLGCWCKKBFccOFWrdaxaqtVWq1Xpfrdr3WXlurVr0uV0u11q0u4AIilioqSFEEgRAlQNiXJCSQfZYkM5Pk3D8OmcwkYQlJCMv3/XrllZk8z3OeM89Mnnl+zznnd0KMMQYREREREZEOCO3uCoiIiIiIyOFPgYWIiIiIiHSYAgsREREREekwBRYiIiIiItJhCixERERERKTDFFiIiIiIiEiHKbAQEREREZEOCz/QDRsbG9mxYwcJCQmEhIR0Zp1EREREROQQYIzB6XSSkZFBaOje2yQOOLDYsWMHmZmZB7q5iIiIiIgcJgoKCujXr99e1zngwCIhIcG/k8TExAMtRkREREREDlEOh4PMzEz/tf/eHHBg0dT9KTExUYGFiIiIiMgRbH+GPhxwYCEiIkeQwC8MY7qvHiIicthSVigREREREekwBRYiIiIiItJhCixERERERKTDFFiIiIiIiEiHKbAQEREREZEOU2AhIiIiIiIdpsBCREREREQ6TIGFiIiIiIh0mAILERFpnhRPk+OJiMgBUmAhIiIiIiIdpsBCREREREQ6TIGFiIiIiIh0mAILERERERHpMAUWIiIiIiLSYQosRESklXs/v5cbPrqhu6shIiKHEQUWIiLSisvrwu1zd3c1RETkMKLAQkTkKPXskmfZ6drZ5rIaXw01vpqDXCMRETmcKbAQETkKGWO467O72Fy5uc3lbp8bt1ctFiIisv8UWIiIHIXq6uswmD12d6rx1agrlIiItIsCCxGRo5DL6wr63ZLb61ZXKBERaRcFFiIiR6Gm1og9dXdSVygREWkvBRYiIkeJ6z+6nqcXPw3su8WixldDbX2t/7nT48Tb4AWgoLqAuD/FUV1X3cU1FhGRw4kCCxGRo0Sho5CdbpsFqqk1Yk/jKNxeNxGhEf7np75yKh+u+RAAh8dBja8Gp9fZxTUWEZHDiQILEZGjhNPjxOmxwcC+WiwaTANhoWE0mkbABhMOj8P/OPC3iIgIKLAQETlqODwOHF4bDLh9bnrF9NrjOApjDNHh0dTV1zVvuzuQaGqpUGAhIiKBFFiIiBwlHB5HUItFamxqmy0WTa0UsRGxuL1uGk0jTq+Tak+1vxzAX5aIiAhAeHdXQEREDg6n1+kPCtxeN73jerc5xqKuvo6YiBgbWPjcRHmjgNYBhVosREQkkAILEZGjQKNppNE0+oMBl9dFalzbLRZur5uYcBtY1PhqCA+1XxWBYytiwmM0eFtERIKoK5SIyFHA5XWRHp/uDwbcPjepsalttljU+GqIjYj1d4VyeBwkxyQHjbHISMhQi4WIiARRYCEichRweBwkxyZT67NzU7i8LnrH9W67xcLn9gcWNb4aHB5HUCDR9FxjLEREJJACCxGRo4DT4yQhMgGDAZoDi7ayQrm9uwOLcDvGomVg4fQ4SU9IV4uFiIgEUWAhInIUcHgcJEQlEBYSRn1jvb8rVFstFk1doWIiYvxdoYJaLLyOoG5VIiIioMBCROTQFhLS/ANQ+QOsfQoavO0qxuFxEB8ZT3xkPE6PE5fXRUpsSptjLNrqCtUnvo8/kHB6nKTHq8VCRESCKSuUiMjhoqEOFk6Gmu1gGuD4+/Z7U6fXdoVKiErA4XHg9rpJik6ivrG+1bpBg7d9brwNXpKikmhobABskJKekM7iwsWd9tJEROTwp8BCRKQTfLbxM9w+N5cNv6xzCzbGtlYYAzv+ZYMKAE9Zu4pxeBw2sIhMwOm1LRZxEXEYYzDGENLUIoIdY7G1aiuVdZXERcRRW19LSmwKoSGhNDQ24PSqxULaKeDzhTHdVw8R6VLqCiUi0gneyXuHv+X8rWt3UpkLPU+GSTkQnd6uTZ0ep78rlMPjwO1z8/6a9/E0ePA0eILWdfvc/Gvjv5ieN93fFSohMsF2o9o9yZ4Gb4uISEtqsRAR6QQF1QXscu/q2p1U5cHAn0OvbEgc2q5NmwZvJ0Ql+MdYPPzVw4BtoYgOj/avW+Or8T9uygrVtK3D48DpcdoxF0o3KyIiAdRiISKyv2pLoL71YGeAQkchFbUVXbt/bzkkHW8fh8e2a1On18m05dP4cvOX/jEWTVoO4A5c1tRiMW/TPKrqqmxg4XVyyfRL1GIh+6+p+5O6QYkc0RRYiMghr64Oli2DxsZurMT29+GjTJg9EDzBAYQxhtr6WnpE9+jau/j1LojsBZ5y8Fa1a1OHx8H68vUUu4r9YyyatEw521aLxbTl09hcuZmK2gpqfDUsLli8X+lm6+vhhRfg00/t8znr5tDziZ40mu58M0VEpCsosBCRQ1p9PZx5JowZA088cRB3HJjmNSQEfvgd9L0Ykk+xF/gBKmor6Bndk4yEDAodhV1Xp/oaCIuGBRfAkl+2a9PA1oXquuqgVoqWk+S1XBa47U7XTv/jGl9Nm1mlAj34IPz2t3DFFfb5mtI1VNVVUVBd0K76d5gx0GIsiYiIdC6NsRCRQ9rChVBUZO94Ow5mz5vAbEy1JTBnCFzwPYSEgakPynJTULySjIQM0uLTKHAUMDx1eNfUKTTC7vsABLYuVNVVBQUEbbVYZPfJJio8yt8VqknLcSQur4se0T3a3Kcx8Pbb8NlnEBVl/5Zfns+AHgPIL8unf4/+B/Ra9ktgFqLanTD/XKjOg9EvwHG3dN1+RUSOYmqxOFhMI9TtUv9SkXZasgSuvRbOPx+uvLKLd1ZTaFO61pUG/72uBJKG27kjXBvt74A+44WOQvom9CUjvu0WC48Hvv7aBkgdEh5vx3gMurHdmwYGB+U15fRL7McPv/6ByUMmtx5j4XPz2NmP8ZcL/+LvCtWkrCY4ze3exllUVdlubOeeCxMm2L/ll+Vz0XEXkV+W3+7XcMDWPQ9x/eHshRCTsfd1W7ZUyZ61PFY6biJHPQUWXcQYKC8HrxdwbYU5Q2HBJPhuandXTUBfgIeRsjLIzOyiwgM/BxU58NkY2Po2rHo4eL16N4TFQNX38MkwKP8uaHFBdQEfr/+Y179/3XbxCSi3sdFeWP/P/8Bdd+27SitXwi9/Cfff38bCiARwbYGeI9v9Up0eJ+tuW8f0y6dT6amkR3QPstKyyEjIaNVi4fY2z7zt9rpxep3k3ZLH7WNup6y2jAsGX4B52DA+c/xex5RUVEBqavO/mTGGbVXbOGvAWZ0fWLT8nw4cLFy2BIbdCanjoe+P916OMXsdaNxoGnl31btdnwGsPbrrfBZ4nPZx3ETkMNAJ5xJ1hTpQDR4o/AjqdkLaROhxgn+RzwcXXwxJSZCRAc9e/yIcez2c8N9QX9t9dZZmgd1c6nZB7gN2VuPkMTDsP7q7dhIgOtre8ferzIX8Z+zjwVMhdVzwBrUl4FgHcZmQMCh4WcsLnsDPwbLb4Pj7YdBNtkUiUGRP8FZA/GB7cdpCgaPAfye/wFEQVO7KFeB02i5d+7re8vngxz+GV16BiIg2Vkg4DnYttL+hXZOOOTwOEqMSSYhKoKq2iriIOADiIuJajbEImnnb66ahsYERvUcwsOdAlu9YTnxkPIB/Tow9iY+HmuZx4Oxy76K2vpa5G+ayuWozAL/6+FeU15Yz66pZrbavq69j9a7VnJh2IhFhbR2QAIHvZUu+aohMhs9Ohdh+MGFW8HbtsLhgMTd/cjMP/egh7h1/b7u27TKBr73RZ29geSsgNBJOf6976lTvhh1zweeC9PMhdh8tRSJHgsN9IsiW59EDCDA63mKRlOSPbnwNPuasmxN8J+dIvSO87Nfg3ABpZ9kTeYBvv7UXQu+9B88+i73QSRgCK++DLyd2S3UF2x2tsaH13/P+CEkjIOv3kHLaQa/WwVBSAlu3QkMbL/9Ql5kJS5fax9XVwDc/g2F3wbC7ISrFLmg6z5R8AV9fAb4qKF/W+i6qcyNseg02vw51LWau9jkgOg3yHoF5Y4OXxR5jt230QNyAVnUsdBRy08k3cedpd7bqClVVBX362CqG7uOMu2MHxMTYbl8T2zpVJJ0Am1+D725ufk2Bv1vYUL6BBVsWAHaMRdPM2w6vg7jI3YFFZFzrFgtfc4uFy+ciJiIGgITIBKrqqkiITLDPoxJaZYZqNI3cPOdm/rToTyQn21aL7dtty9PasrVU1VXxau6rrC1dS1lNGd8VfUd5TTkbKzbaAgK+M17JeYXL3ruMd/Pe3fuB25fo3uDeAlm/Cz5eu3/PXDuTvv/bly82f7HPombkzeCRMx/hvdXddMG+L0X/tFnDRr8AA67tvptZX11kM5f1yIJ6zXciR4nOarXbn66FgX+vd9suvG1d43RE0+uort7vTTqnK9TuL+9HFz3KzPyZXDfrulapBF0uyM2F1avtF027NR3A+lrY9HfYuRB2fW3vMncRh8fB7PzZbKnc0roeNUXQazQUzIS8PwRtl5ZmLxAcDti5E7ve1rch89JW2WT25HAMdA8l//43vPkmLFoExUUe+PIc2PIPyLkLKlYGrxwSDo114NoES3/VPRXuQldeCbNm2c9iZWV312b/vPjdi1z1wVW8kfsGF14IH3wAF1wATz/dtEYIbHzJngsCVayE5NMgIhFyW/Qlqq+FeeMg4wIbRLg2Bi9POc1etKef17pCEfG2peJfI21LZQsFjgKmjprKnafdaVssApx4IuTkwPz5zSlX7Uuw55IPPoAZM2yLRlSUPVcuW2a7RLWSNtEGyFU/QHhcGys0c3gc/OzDn/Hc0ueYs26Ov3tTUytDUIuFb+8tFolRiQD+CfISomxg0VaLxbur3qXEVcLbq94mvyKPSZPgrLPg9NPt+IrfjvktJXeXEBUexSs5r1Djq6GitoLXc18PKsfX4OOpxU9x+fDLefzrxzuWnjbjQsi9D4rmtFrU0NjAQwse4tqsa3lw/oOYvZx8GxobmJk/k/fWvMea0jXNwdBBYIztWtvYyN4vOiKT7HwnDR7Iuds+7g7urdDnbMh/Gja92v7tj9SbkiL7o0U3w4LqAv5v6f+xpHBJ63WX3QrrnoONL8OWN1stzi/L5+UVL7O50rYSf7P9Gz5Y8wHeBi+seRK2vQdb3oKq1Z1S9Q53hcoF4nNycNQ5WJ+3nlMyTiGyJpJ35r3D8am7J3LKyeGLL+wJcdAg2LXLNnQwahSsWNFcWNNz02h/CAEMhIb7y8HnhG0rINENZd9S2ft8NtTV0Se+DyWuEkZljCIsNCy4krvLdTphwQJbh+ho+ztQoaOQytpK6hvr6RPfh0/Wf8KQ5CHM/mo2Pz3+p/SO791cj4hb4YtXAENjUhZLZr/gn4m2Z3RPrrvuGC65BIYMgam/Og12fA9r74TeF9jtW772pjoUwpdfwogR9hj1G1iLs85JSEgIMeExrNq1irT4NKrrqkmLiiWjoRKik8FTSWl9NiWlEaSkQFwcJCbu480bNcr/cPmclxiaPJTIsEiWFi4lNDSU0485nWJnMVV1VUFZboyxFz3FxZCVBVsaF3JG/zNweVxsqNhA/8hRLF8OsbGQPmID0eHRZCZlsnzHcmq8NWSnZxMVFsXy4uVk9RjH0qW2zGGji3HXVzMsdRh5u/JIi0sjNS5136+hjc9Qbm7zGJeaGkOx6yRYvR1qPFC9Enqa5vey/seQ+zJ4FkHiRL5/LYeQEEhIgJ4pHtZUraBPfB/Ka8rpHdubIlcR4SHhxETE4Gv00Su6FxFhEWyt2kpISAjjMsexy7WLXTW7OCE2AapWQVRPSMqyWWlCwiE0nAXV1Zw54ExcXhcri1cSGxnL6IzRbKzYSImrhP5J/clMymRZ0TKcXidjM8cSFRbFl5u/JC4yLng/vZu74q3auYr0hHRSYlP499Z/M278eDZuDKO6GgpKK6kO2Up2erY/YB7Yc6B/2+8Kv2NE7xFEhUXxbeG3nNH/DP+ywM9C0PvTxme5sraSbVXbGJk+kq2VWzGYoP18W/AtI9NHEhUWxaJti5gwYIK/nBJnCUvyljA2Yyzzvp5HxsgMHnwwmY8+gn79ICf+YZj9X/ZDk7H7/8n/Xo6DTa/A2leBcc3LAH5YC+E3w9wnbdeY1EToGdm8bcMo2LkC1j0AaRcFlwsQ/hvY9QeIHQDbIqCoeXmGM4PqLdU0RDYwuG4wOS22feAB+N3v7P9L794BByonhzVrIDLS3ogwBh55BO69F3r1sqlaA9cFIOIWKJ4Hx1zZqo4NAXervtzyJWdHnU1aRBrTP5/OGRFnsHLlSkrdpfR39+e46OPIyckhpiwGt9cdVOcR9SPYunYrZZFlZJtsMkIzyMnJwVfqI7U6ld6xvcnJyeEY9zGszFlJXFkcafFpLNyykBXFK7iw/4VUhVXx1qdvccMNP6WoCAYOhLJNZQyNHkrR+iImxU7ih9wfeOqEp4iLjOP13NfJ6dFch/zSfCYnTGZC5ATcEW5mzZ8V9BlqNI007p7YJCw0jJCmC9HA97zpeeMpUD0KClfCwOuCjtuG8g1MiJzASDOS8vpyZi+YzTE9jgkua/dns8hRxMXxFzM1eyqfbviU1/71GqPTR9O/R3+WFi7lhN4nEOdaD4710PdCO9h+x1wIjaAx7Ww8DV5/taLDowmpXgPOddDzZIhJh9Jv7M2yXqewoTCVsjIbbKamwuefw8iR9rw+uOn/rel/L/BzYBLBORbe+zVEnAh5W9mwZRd5eTB2LKypnc/EgRNxeV3k7/yB0XHREJUKNQX4YkewcnUSMTF2P/2bknY17cddYAPahjqI7NE8SWNbxzz+EfjwP22AkNYvqI4ul01k0KuX/ewnJcGqVbBxI5xzjj33tlkukFucS/8e/ekZ05NF2xYxtt9YwsPsNUJ5TTmFjkJO6nMS26q20WAaODa2B2ybDv1+ArEZrF0LeXm2u3JMTKvig2zZYqvwox9BnmsBZw08C6fHSX5ZPqeknwzb34O4gWwITfZ/z7U8X8/fMp+Y8BjGHzOena6dlNeWN18btdOWyi2EEMKAngNYUrCEk/qcZFsT93A90Zny8+3N4Ysu8bCqbAXjMsex07WTtaVrSU9IZ2jKUNaUriE5Jpm0+DT/dl9t+YoJAybg9DjZULGBURnN1x2bKzcTFhJG/x79/d89TS2pLRU7i6mu2//rA48HvvgCUlLg1FP3/Lpq62tZWbyScZnjKHWXstO9kxN6HWvf2+RTbdIOx3o7vi5lfKsufXV19qZRr15wWmCHh5wcqN1lA+zQcDtGL6n5OqrQUUiNt4YhKUOaj5tx2vFg6edDdKq/nNr6Wl7NeZWRfUYybcU0ykeUk56Q3ryfiuNh+2YIjwGzgZLCudTW19JoGomNiOW91e8xOmM0f/juD5za71RyS3IZ0GMADy57kKsSwwiNbAT3dogrhnRPc7kBXLm5ez6ILYSYvd2e2QuHw0FSUtKBbCoiIiIiIoeR6upqEvdx17rDLRYLFy4kPj6+o8XIIWDW2lnU1tdyTdY1fF/yPc8ufZbo8GimXTQtaL3vvoN334Vx42yrT+/Bhfz3/P+m1lfLS5P+wXVTInnsMQgLgwGD65j8zmRuGHkDeaV5PHb2Y7z43YvU1ddx97i7ufFGmDwZkpNh9Gj432WPUltfS0pMCneMvaN7DkQHPDj/QcJCwlhatJQ3xt1In8rFcPx9totO2kQIj/YPjlqwZQFLCpewrnwdj539GJ56D39e/GcuHXYp5w46l6s/vJoz+59JaEgofRP7MiNvBrGRsZyeeTrLdizD5XWxuXIzz016jqy0LP9dK5fXxfUfXc/EARPpm9CXnwz/SVAd7//ifsZljuPvOX/no5991HynF/DWe7l+9vWEhoQy7cJpJEYHn0Ce+fYZNlVu4qS0k7hp1E17PRY/n/lzTk4/mZlrZzLv5/P8/fQBPtv4GdNXTycuIo7xmeO5Ouvqjh986TafbviUdeXruHXMrYSHdvhr5bDyxNdPsKRoCQ//6GFGVn5u5xnJuMB2LXBugOPvtV3XotMg87LmDVc9Aj2zwfhg2wzwVsO4t6B0McQPhJ5ZNDZ2PNnTiy/aQfTXX2+fP734aZYXL8dR5+DWMbdy4XEXduwAdILf/x6ys+GSS/a97uebPrfn2dAwTu17Ks9OetZ/7jPGMOH1Cf5kBO9f8jLHrrzZZlFz5PPihm9wueCee2zX0PT0Pe/nH/+wY9PuucdO0vnmD6+yrnwdRc4ibjvlNk7b8HvIvBzKl+IddCOTP/s9N468kbzSPMb2G8uM1TOIjYjl9GNOx9dgx2HOyp/FjCtmEBUeteceGy0ftzB3w1zmbZrHA6c/ENQysEcHuJ/A5TNmwLx58J//CW43fFP/HAmRCczMn8nbl73NbXNv45qsa/h4/cdM+11wxrzNX7zPE18/QWVdJW9c+gYx4cHNRI8sfITVpau5+7S7GXPJXuaXWbGC55Y8x9qytZyScQo3ntwi5XZAD4xaopkU/w3z5kF4uL0WCVze8vX/+pNfU+wqpsxdxovnPcnINXfC8Htst6KUcTZ5R+YVsPw38B/N3R9dxDI5cRGff273016T3prElKwpzF43mw9OuwpKvoQTfgc5d8LpM9pfYBdyuVxMaMoZvi/mAFVXVxvAVFdXH2gRcoibs26O2VSxaY/La2ubH09fNd3M3zzfGGNMcbEx77xjzJw5dtnlMy434/8+3ry/+v1WZTQ0GJOba8zKlcY0NhqzsXyjuenjm0yRo6gzX8pB80buG+b2ubeb454/zr6gvD8Z890txiy/s9W63nqvGfTcIHPOm+e0WdZfl/3VJD+RbIqdxabUXWpOmnaSOefNc8yaXWvMU988Zf789Z9N/2f6G1+DLzDZozHGmOtnXW96P9nbOD3OVuXOzp9t+j/T39w7794297tm1xqTW5zb5rKdrp3mmW+fMVW1Vfs8Fg/Nf8jc/dndZuIbE1stK68pNyf85QQz4bUJZn3Z+n2WJXKoqvPVmcLqQvvki7ONKfnKmPznjfnqEmNmJBjjqzFmwUXGbHg5eMMlNxmz8e/GVK8zpmKlMYX/NGbRlcYs/bUxJQs6rX4PPWTMo482P5+1dpaZOmeqOeXlU0xBdUGn7acjJk82Zt68/Vt3e9V2M+qlUeb5Jc+bPy78o/1jwKXMGa+eYf624m+m5+M9TX3Bx8Z8MsIumJlhrrzSmNmz928/d9xhzF/+0vz8h5IfzPn/ON8Mfn6w8Tg2GfNBil3w9TXGlCwwUz6cYk6adpKZs26OKXOXmROnnWjOev0sk1+ab4qdxabn4z3NLZ/csucdBl6OHfilWesy9/TTDjfcYMwrrzQ/X1a0zIx4cYSZ9NYkY4wxz377rOn7dF/z+srX29z+z1//2UxfNb3NZVW1VSZnR85+1aPMXWZeWv6ScdQ59rre5s3GDBu2HwXuPg4PL3jYTFs2zQx4doDxFH1mzKdj7PJPxxiz8CfGbP/QmK+vNuafWUGbb9myn/vZg6lzppqr3r/K3Pf5ffb/fsNLxuxc2Go/h4L2XPMfXbeWpF0uHnLxXpdHRzc/vuqEq/yP+/SBqwNuQE/JmsJ/zf8vLhh8QasyQkPhpJOanw/qNYiXJ798wHXubucPOp/f/PM3XJN1jb3NOOKBPa4bERbB3Clz/YNoW5o6eipTRzfPexIVHsX26u0MTx1Oja+GKTOnMDx1uL1D3KJH4/MXPI/T4/SnBQ00afAkrj3xWm4YeUOb+93brNG943pzx2l37HF5y/2c/ebZPHLmI62W9YrpRWJUIrvcuzgu+bj9Kk/kUBQVHkXfxL72ia8KopJt/+zIZDuQurEOypdC/58FbzhgCiy5AfpfBaFRcOL/2HEZneyqq2zig/JyGDoUrvj5j7j383sJCw2jX2K/Tt/fgRg8GL75xs73Ul5uW7D3JDMpk4raChYXLubG7Bubm3N2/86eezsur4tBvQYR5t4Gqc3jxOrq9j2uoklKih3z2CQrLYt15evI7pNNZL3Dpnyuzoed82HwTdyYfSNPf/s05w06j8iwSGLCY9jh3MHQlKEA/PsX/yY9vo0mksDmqLYedySTSydlgamoCJ5LaHTGaMb0HcPlwy8HYMqJU/A2eLls+GVtbn/P+Hv2WHZSdBLZ6dn7VY/k2GRuHnXzPtdLTbWtTXV1dgxPq2x8LY7z6Zs+5/GvH6dPfB8i66shYSjs/MqOrUgYCp4yGPsPIPh4pqfblq/iYpsePCVlv16G3+Qhk7n43YtZ9ItFUPCiHeu04a/tK+QQpMBCutylwy/l0uGXdnc1Doq0+DQenvAwZw08a7/WH5I8ZL/LfuTMR6jx2UkBstOzKXGV8IuRv2hz3cSoRH8mn5YiwyL548Q/7vd+D9SpfU/l/vH3c+WItqfLfvq8p6mr77qsbiIHXUQSeCvh3EU2DfkHvezfLi9tvW7aBDjrU6j8HlL2Mrq0g0aMsNnxmgYh94rpRWxELKPSR+1744PkttvgjDPgpZfguefgpz/d+/rZ6dnMWTeHly5+qdXF88m5b/DCshc4uc/JUO+wM61vfBl8DtLS7ADxc89tTte/Jz/5iT1eXi8MGAC33gqfXP2JzYjmLYSwaGio2Z1HOoqzBo4NOu8/fs7j1PqaU/0GJtgIchikgGxrephXL2nO9JUSm7LX4OFgi4/Hn40uMtJm3AvS4sWc5nWxcNtC7jj1Dpu2NTzGThNgfND7R7D2SQiNtjcOht7u3y4qyk5kOmyYDSo2bWpfPc8bdB6rblllB/MXv26zho552XanPIwpsBDpZF11gj1/8Pn+x6EhoWy6fVObLRKHirDQMB4+8+E9Lj+t35E5Z4gcxRKH2XTEKeOgKg9i+0LBLEgebQONhMEt1h9qf7pY//4BGZ6A+dfPJzIsssv3u7+OPdZmYNq1y2Z+25dzBp5DVV1VmzdPstOzWb5juW3NqC+0/eNrisDUc/75cNddNh18VtbeA5gRI2D6dJg71wY9ACN6j7APqhx2TEyvk+HSHW1uf+aAM/f9Qg4TKSn2/TmcvPaaTePdp8++142PjOf5Sc9z+jGng3eDzT464Gf2xxioKbDpzQfd2Grbe++FX/5y/1vCAkWERTQHnPHHws4FMPDnUFNos64dpjqcFWp/RoiLiIgc8UoXw+fjbZrZ426xEzjm3geEwNnzIe3M7q7hEa++sZ6L37mYJ899kqzS2UAIDLgG3NtpSJnAb35ju1298UbweN528VTAR/3g8jKbPtw02LvcR6iXX4a//hUefdS2Xkya1N016kKli+18VhevsXMfHaz3tXoNzM2yCR3iBsAZHxyc/e6n9lzzK7AQERHpLDs+tbnoB99sc95v/wAwNquMJn07uNY9D67NMOrZzi977okQnW7H0Ix9C+Iy973NYaq01Hb3qaiAt96CKVO6u0ZdqMEDH/SEY38Jjnw4+4uDt+/Nr9vJ6obc1iXjrTqiPdf86golIiLSWTIm2Z8mx1zRfXU52sX2g63v2sc1RbZrWmcZfh8suc62TkUcul1SO0NqKixbZn+uONI/zmFRMOgmWP88RPfe9/qd6dgb7M9hTi0WIiIicuRxF8DsY2DwVNtyMXFe55ZftdrOkHywL0ClaxkDRZ9Ar2wbnIpaLEREROQoF5cJPU60A297ZHV++T1GdH6Z0v1CQqDf5O6uxWGrZXZfERERkSPD+BnQdzKc/Ex310TkqKAWCxERETkyJQ2DCR93dy1EjhoHHFg0Dc1wOBydVhkRERERETl0NF3r78+w7AMOLJxOJwCZmUduijUREREREbHX/klJSXtd54CzQjU2NrJjxw4SEhIIUW5uEREREZEjjjEGp9NJRkYGoaF7H559wIGFiIiIiIhIE2WFEhERERGRDlNgISIiIiIiHabAQkREREREOkyBhYiIiIiIdJgCCxERERER6TAFFiIiIiIi0mEKLEREREREpMMUWIiIiIiISIcpsBARERERkQ5TYCEiIiIiIh2mwEJERERERDpMgYWIiIiIiHTY/wNV7sxETxqDBQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize polya-centric gradient for gene(s)\n", + "\n", + "#Find position of max saliency\n", + "max_poses = np.argmax(np.sum(scores, axis=-1), axis=-1)\n", + "\n", + "#Loop over genes\n", + "for example_ix in range(scores.shape[0]) :\n", + " \n", + " #Get max pos\n", + " max_pos = max_poses[example_ix]\n", + " \n", + " #Only visualize genes that are not extremely long\n", + " if max_pos >= 150000 and max_pos < seqs.shape[1] - 150000 :\n", + " \n", + " print(\"-- \" + str(example_ix) + \" (\" + str(strands[example_ix]) + \") --\")\n", + " print(\" - gene_id = '\" + str(genes[example_ix]))\n", + "\n", + " #Plot scores\n", + " f = plt.figure(figsize=(8, 1))\n", + "\n", + " #Annotate 4kb window\n", + " plot_start = max_pos - 2000\n", + " plot_end = max_pos + 6 + 2000\n", + "\n", + " l1 = plt.plot(np.arange(seqs.shape[1]), np.sum(scores[example_ix, ...], axis=-1), linewidth=1, linestyle='-', color='red', label='Gradient')\n", + "\n", + " plt.axvline(x=plot_start, color='black', linestyle='--')\n", + " plt.axvline(x=plot_end, color='black', linestyle='--')\n", + "\n", + " plt.xlim(0, seqs.shape[1])\n", + " \n", + " plt.legend(handles=[l1[0]], fontsize=8)\n", + " \n", + " plt.yticks([], [])\n", + " plt.xticks([], [])\n", + "\n", + " plt.tight_layout()\n", + "\n", + " plt.show()\n", + " \n", + " #Visualize contribution scores\n", + " plot_start = max_pos - 100\n", + " plot_end = max_pos + 6 + 100\n", + " \n", + " #Rev-comp scores if gene is on minus strand\n", + " if strands[example_ix] == '-' :\n", + " plot_end = seqs.shape[1] - (max_pos - 100)\n", + " plot_start = seqs.shape[1] - (max_pos + 6 + 100)\n", + " \n", + " #Plot sequence logo\n", + " visualize_input_gradient_pair(\n", + " scores[example_ix, :, :] if strands[example_ix] == '+' else scores[example_ix, ::-1, ::-1],\n", + " np.zeros(scores[example_ix, ...].shape),\n", + " plot_start=plot_start,\n", + " plot_end=plot_end,\n", + " save_figs=False,\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d7aefe0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/legacy/interpret_sequence/explore_splice_grads_GCFC2.ipynb b/tutorials/legacy/interpret_sequence/explore_splice_grads_GCFC2.ipynb new file mode 100644 index 0000000..cc22f72 --- /dev/null +++ b/tutorials/legacy/interpret_sequence/explore_splice_grads_GCFC2.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7030e9ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import h5py\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.ndimage import gaussian_filter1d\n", + "\n", + "from vis_helpers import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "534495a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores.shape = (1, 524288, 4)\n" + ] + } + ], + "source": [ + "#Load scores\n", + "\n", + "score_file = h5py.File('../../../examples/saved_models/gtex_GCFC2/scores_f3c0.h5', 'r')\n", + "\n", + "scores = score_file['grads'][()][:, :, :, 0]\n", + "seqs = score_file['seqs'][()][:]\n", + "genes = score_file['gene'][()][:]\n", + "genes = np.array([genes[j].decode() for j in range(genes.shape[0])])\n", + "strands = score_file['strand'][()][:]\n", + "strands = np.array([strands[j].decode() for j in range(strands.shape[0])])\n", + "\n", + "#Input-gate the scores\n", + "scores = scores * seqs\n", + "\n", + "print(\"scores.shape = \" + str(scores.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fd114809", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-- 0 (-) --\n", + " - gene_id = 'ENSG00000005436.14\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAABZCAYAAACjWLKDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAVi0lEQVR4nO3de3BU5f3H8c8SUiEJQdRgocmolRQKarkUQXDQqq2jqMhQa4tTLUO9o6WodawIir8qo61F4/RqNd7wjlYYbJFWrArhIgElEQlqlBgkgUAum4Qku8/vj8eTs5vsJtlLspvwfs3snLPPOec5z7ns7vPd5znneIwxRgAAAAAQg36JLgAAAACA3o/AAgAAAEDMCCwAAAAAxIzAAgAAAEDMCCwAAAAAxIzAAgAAAEDMCCwAAAAAxKx/tAv6/X6Vl5dr0KBB8ng88SwTAAAAgCRgjFFtba2GDx+ufv06bpOIOrAoLy9XTk5OtIsDAAAA6CX27Nmj7OzsDueJOrAYNGhQ60oyMzOjzQYAAABAkqqpqVFOTk5r3b8jUQcWTvenzMxMAgsAQNx4vV6deOKJkqTS0lKlP/OMNGSIvNOnB6enpyeukABwhOnKpQ9RBxYAAHSX/fv3u2+uu84O6+qC0wEASYW7QgEAAACIGS0WAAAA6DV8Pp+am5sTXYw+JzU1VSkpKTHlQWABAACAXqGurk5lZWUyxiS6KH2Ox+NRdna2MjIyos6DwAIAAABJz+fzqaysTGlpacrKyuI5anFkjFFlZaXKysqUm5sbdcsFgQUAAACSXnNzs4wxysrK0sCBAxNdnD4nKytLpaWlam5uJrAAAPQN/fr10/e///3W8c7SARxZaKnoHvHYrwQWAICkMnDgQG3evLnL6QCA5MBfPgAAAECUmpubdc8992jUqFEaM2aMxo0bp0svvVTbtm2LOW+Px6O6ujpJ0tixY9XQ0BBTfsuWLVNFRUXM5QqHFgsAAAAgSnPmzFFdXZ02bNigIUOGSJJWrlypoqIijR07Nmhen88X9fUL8QhUli1bpvPOO09Dhw6NOa9QCCwAAEmlvr5eo0ePliQVFxcrLVx6WlqYHAAcEerrpZ07u3cdo0ZJHXzXlJSU6NVXX9WePXtagwpJuvjiiyVJ+fn5ev755zV06FAVFxcrLy9PGzZs0HPPPaeWlhalpqYqLy9PkyZNkiStWLFCv/3tbzVkyBBdeOGFQevyeDyqra1VRkaGSkpKNH/+fFVUVKipqUnXXnutbrjhhtb5li5dqhUrVqiiokKLFi3SnDlztGTJEpWXl+vHP/6xBgwYoPz8/HaBT6wILAAAScUYo88//7x1vLN0AEeonTulCRO6dx3vvy+NHx92cmFhoUaMGKFjjjkm7DzvvvuuCgsLlZubK0kaMWKEFixYIEkqKCjQ3LlztWPHDlVUVOjqq6/W+vXrNXLkSD3wwAMh8/P5fJo9e7aefvppjRo1SvX19Zo8ebImT56s8V+XdcCAAdq4caM++ugjnX766fr5z3+uRYsW6fHHH9fLL7+sU045Jdo90iECCwAAAPQ+o0bZin93r6MTgXdT+uSTTzRr1iw1NDRo2rRpmjp1qs4888zWoEKywcjvfvc7HThwQP3791dxcbGamppUUFCg8ePHa+TIkZKka665Rrfffnu79X388ccqKirST3/609a02tpaFRcXtwYWV1xxhSTpu9/9rvr376+vvvpK2dnZ0e2DCBBYAAAAoPdJS+uwNaEnjBs3TiUlJTp48KCGDBmik08+Wdu2bVN+fr5WrVolSUFPsm5qatKsWbO0bt06TZgwQTU1NRo8eLCampq63BJrjNFxxx3X4TUXAwYMaB1PSUlRS0tLdBsYIe4KBQAAAEQhNzdXM2bM0Ny5c3Xo0KHWdK/XG3L+xsZGNTc3KycnR5KUl5fXOu2MM85QYWGhdu3aJUl67LHHQuYxcuRIpaWl6amnnmpN2717t6qqqjotb2ZmpqqrqzudL1oEFgAAAECU8vPzdeqpp2rSpEkaPXq0pk6dqrVr1+q2225rN29mZqaWLFmi008/XdOmTdNRRx3VOm3o0KH629/+posvvlhTpkwJ+yDQ/v37a+XKlXrxxRd12mmnacyYMfrlL3/ZpVvR3nzzzZozZ47Gjh0bl7tMteUxUV4B5zTdVFdXKzMzM97lAgAcobxeb2vXgbq6OqV/Pe6tqwtOT09PWBkB9LzGxkZ99tlnOumkk4K6+iA+wu3fSOr8XGMBAEgqHo+n9baygRdFhksHACQHAgugpxUUSFlZ0sknJ7okQFJKS0tTUVFRl9MBAMmBwALoaWecYYfchx8AgIjxHJvuEY/9SmABAACApJeamiqPx6PKykplZWXRJTKOjDGqrKyUx+NRampq1PkQWAAAkkp9fb0mTpwoSdq8ebPSwqWnpYXJAUBflJKSouzsbJWVlam0tDTRxelzPB6PsrOzlZKSEnUeBBYAgKRijFFxcXHreGfpAI4cGRkZys3NVXNzc6KL0uekpqbGFFRIBBYAAADoRVJSUmKuAKN78IA8AAAAADEjsAAAAAAQMwILAAAAADEjsAAAAAAQMy7eBgAkFY/HoxNOOKF1vLN0AEByILAAACSVtLS0kPeoD5cOAEgOdIUCAAAAEDMCCwBActm1K9ElAABEgcACSJS33kp0CdDX+HzS0qW9u2K+YYMaRo7UxNxcTZw4UQ0NDa2TGhoaNHHixHbpYXm9Ek/nBYAeQ2ABJMo55yS6BH3P1q3S3XcnuhSJc/fd0h13SCNHJrok0du1S35JW3bv1pYtW+R/6aXWSf5Nm7Rlyxab7vd3nldGhnThhd1XVgBAEAKLI8XatfbfTPR99fXSmDFH5j+1EyZI99yT6FIkztatiS5B7JYuDX5/1VXu+NlnR57f2rUxFQcA0HUEFvH2r39JL7+c6FIE++gj6Yc/lH72s+SqbOblSa+9luhS9Kye2N4zzpCKi6Ubbuj+dSG59OY/D/74R+mdd6SqqsiXLSqSDhwIP72xMXR6c7OUny8ZE/k6AQDtEFjE2wUXSJddluhSBPv0Uzt86SVp2LDElsVhjHTzzdLMme2n/ehH0qxZ7dO93u4vV3dZskR67LHQ2xtvlZV2WFsb33xLSqT33gtOe/11yeOJz7pKS6VLLpGammLPKxl98on02Wfdu459+9zx116Tqqvbz1NWJhUU2Ar86tU2LdR8PW3BAmnaNKmiIvJlTzlFmjw58uUefVSaM0d6++3IlwUAtENg0RX79tnKU6J+fP1+W+mK1kUXueMd/avXkzr6Z/XNN6UVK4LTVq+2/aV37+7ecnVFY2PkLT+LF0tXX90+/ZJLurZ8U5N08KANEu+6q+N59+61w1WrIitjKMZIhw/b8e98RzrzzODpM2bY4Y4dduj322UaG20FNhI33iitXNl3KnnXXy+9+KL7fsQI6dvf7tqymzbZ7xxnv3ZV4D/zM2dKkya57z/+2J4/OTm2VesHP5CmT5fWrJGOPtr+6x+otlb6+9+D06K9GPr55+327NxpP8tOC8GhQ/b7taYm8jxffz34/e7dNpgK1foQrkXCuQA8GQIrAOgLTJSqq6uNJFNdXe0mNjUZ4/O57/1+Y5Yvt+mOL74wprnZDnfvNmbzZvu+udmYvXuNOXw49Ar9fmNKSuzQ73fTAqcHDnfsMObYY425/353nsZGY+67z5j6evve5zPm7beN2bfPmK1b3WULC42RjLnpJruM/VmyL0d5uTFlZXbcmfaXv7jjX35pp0vGXHGFnd/vt9u4b58x771nX2vW2G0uLbXzzphh1+9sp9/v5vnnPxvz4YfuPm5psfvt/vuNue46O8+DD9rtk4xZujS47M7rr381ZssWY6ZMMaaiwt3urVvtsdq+3a7L2U/Ll7vb3txsTFWVMffea8vh9xszfLgxv/61MUVFNr/Ac6HtcamoMOaqq+yxd8rT0GCn+3zBZV6+3Jh//9seMyfttdfceevq2udfXGzLH3huOOV2lnOO/fbtNs8rr7TDiy6y6T6fPSaHD9v0hQuNqay0+3vhQrcsr7/u5uf3G5Ofb9PffNOYQ4eMeeSR0Ps/3Ovss+3wssuMufZaYx5+2Jht2+z+bjvvsGH2XHD238GDdn94vcHzHThgP2sNDcZ8/rk9n7/80k6rqQn9WXKWra11x194wR0vLHTPV+c1a1bw+3PPtcPHH7flvPFGY044wZiNG+0x3rLFmCeftPOcd57dzsDli4qMeeUVY2691Zh//CN42vLlwZ97p+w+nzuP32+P+bp19v28eXaegwfd5f77X3c7neO+YYM9Dz791JivvrLfAXv32vn27rXLVlXZ89vvt+eEMcYMGmTM6tX2s71pk93fTln277f5Ou/XrXPLXVRkz9mHHrLTsrKM+eST9sd7//7g8WHD3PdTpkR2nrV9TZvmjjufk507jcnNtWlPPWXPocBlvvENeywDz5sPPrDT/vMfY/75Tzv+1lt2HzrLDR/uni/O91SIV51k9PWrrqP0W24x5je/aZ/HqlXt0x5+2Jj//c+W+fBhu/4TT7TT/u//bHpDg/0MFRQYc/nl7nHas8eYJ56wx76y0pa/qso9lyorjXnjDXss33nHft+vWWPPo8pKm++779p8nO8tv9+eCxddZMyyZXYf79jR/rurM1VV9nNtjD0fzznHmF/8wv6+BM5TUhK8nN9vyxKoosL9PESipcWWYeNGY9avt+872o5w+bddZtcuY555xv5O9+9vl/vww9B5V1cH/+Y4+Tm/L85ntSuam+1xa7ueUPUT57smlKYm+53z3nvGfPZZ8P7u7Dg7v0OB87e02PO2s+MTrh7VttydaWx06wGRqKqyv4HOPvd67f4MpazMnnN+v1tXjPR4OcJtU1NT+3O9t4n0eyFQ289FFELW+cPwGGNMNAFJTU2NBg8erGpJmXELcwAARzqvpBO/Hi+VlN5JOrrZgAFSerptdWr7qqiwLZXo+9LTpYED3fcejx063W+zstxzoicNGhS+O67HY/9aiMSxx9phRz08jjnG3X7ns+Dsh860LdOQIbZHgmPgQNtLIfPr2vXhw/amLOFkZEipqcF5xMvX+6LG79fggwdVXV2tzMyOa/39418KSd/7nrR9e7dkHTd5ebZJf8GCRJcEABAgXVKon+hw6ehmixa5laFQr8WL47u++fOlZcs6nicrq+sVuVBGj7Y3uUgGzrWPN94oLVzYtWXGjw++C9zixfY6yki36aSTunbt17hx0k9+YsedSrEz3LlTev996YorbFq4bTjtNNvlMtKbTEyYIJ1/vnTffe2nTZwoXX65VF4uPfRQ8PYsWGArxnfeGbzMvffabrebNrXPb/Zs6dRT7fgdd9jh4MH2Zij332/fX3mlPX8CPwOSPR+d8zY93XYdzciw3WIffNCmX3ON7bKakiI98YRN+9Wvgm+TftdddtnMTJu3z+duw/Tpdt0+n91eSbr1Vhv8P/GE7Z78wANuXnPmuOuZN89eVyZJN91k68GOjAyprs6W5bjj3C7Xt91mh42NXb+Ve080i4TU0BB5c2ukbrvNHvJo1NS4442NxuTlhW9Kck4tZ94vvrDjBw8aM26c7VbQ2bY6XZ5WrAif/y23RLwZIX8GjLFdZCRjVq7sPA+v1zbzh+J0SYrEoUN2OG+eMX/4Q/vl16835rnngtP8frcbS6yam+3559izJ/y8Tz5pu8Q4Tfs5OcFdKxz19W6XqgcftGkPP9y1rig7dhhz9dVul5SPP7bdLmbPdvNvu8zAgbZLzIwZ9hgENhu3PdahdLTNgbZutV2zAvN1jld9vT2309OD15mRETqvxsbgLnErVtj5H3nEpj/7bMfldroWhjsXA8voaLtvAn3wge3u012c7lXGGDN0aHA3rc7Mndv5MQwU7txatMh2XQvsqnH77Xaa19t5voHLOXl2pYuC32+7HQUqKLCfpd273bQ773S7EEb62rHD5vHSS/a90yXq6adt+vz57rzvvBO6nOXlxkyf3r67x4QJwft+//7u+b1qabFdx/oKp2tfZzZvjrlrBnqRggL7vYNeK5I6f+ICi75k1SpbAesuV11lf+TWrIl82aam0IEFord9uzEvvtg+vb7eVm6c/s7GdF45WrfOnbeqyl5/EOoHt+1yHVVynHmOPz76bQzlkUds+cJZs8autysVi1D8fmP+9Ce373oonVXu5s2z16okm40b7b754IOuze9cl9DR/g60bZsx3/qWe+ynTAm+hiYePv88uv7WXbF2bWRBRSgVFfbYO39A1NRE/53n9dprkQAAPXyNRRf6WyFGBw7YZs5ly6T+UfReu+ce6YUX7PMsJLfZDt1v/37bZPnNb4ae3tVj8eqrUmGhbY79xjds/85wnH6f559vn6vSk+rqbJMqYrN+vTR1qu2rnJXV9eXmzpUef9w+E2L+/G4rXrdwzltJDY8+qgvmzZMkvSHJ6dXdIOkCSTrrLL3xxhsaGNjfuy2/33Y3kPjOA4AYRFLn755rLBBfxx7r9ouLxuLF9hXww40ectxx8cln5szIn4Fx9NHxWXckCCriY8qU6CrDZ51lA4ujjop/mXqQ3+eTc9PhwMuC/ZJNf/tt+Tu7YLjf13dTT02Ne/kAAKHxHIsjybPP2ifbom+79lo7vP76xJYDPc+pbPf2PxHidZehFSsifxYIACBqBBZHktmz2z/gDD3n+ONDP1E83gYMCB7iyOG0cvTGwKK83L1LSby6Ls2caR/sCADoEQQWQE+59FLpscfc92vXds96nH7lkd7SD72f809/v1741T5smHvtUGCLhXPLRwBA0uuFvz5AL1Raav+NDez7fu653bMu5wL/lpbuyR/JqzcHFpLb0hIYWNx+uzteV9ez5QEARKSX/voAvcwJJ/TcRaS0WBy5evs1FhdeKJ19tnTZZW5aNHfCAwAkBN/YQF8zb559IuukSYkuCXrarFnSk0/ap6/2RoMHS2+9JXm9SktLCzlLuHQAQOLxHAugJzU0SGlp9ja0lZWJLg3QOzgtMDyPAgB6XCR1frpCAYlw6qmJLgEAAEBcEVgAidBb+8ADAACEQWABAEgqjY2Nmj59uqZPn67GxsZO0wEAyYGLtwEAScXn82n16tWt452lAwCSAy0WQE866ijppJOkhQsTXRIAAIC4osUC6En9+kmffproUgAAAMQdLRYAAAAAYkaLBQAguf3+99L69YkuBQCgE7RYAACS2y23SK+8kuhSAAA6EXWLhfPA7pqamrgVBgAAr9fbOl5TU9N6B6hw6QCA7uPU9Z26f0c8pitzhVBWVqacnJxoFgUAAADQi+zZs0fZ2dkdzhN1YOH3+1VeXq5BgwbJw1OEAQAAgD7HGKPa2loNHz5c/fp1fBVF1IEFAAAAADi4eBsAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMTs/wEbSMf9OrtM2wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAABZCAYAAACjWLKDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAoO0lEQVR4nO3dd3hUVfrA8e+k90IKSSAUgQBKpKsgRSxYVl0B18qu7vrT1d0V7KwrKrurrmVt6IoFVKQLSjdA6CA9hYQQICQhpCckmWRSZpKZub8/TkiB9IQM5f08T56UuffOeydTznvPOe/RaZqmIYQQQgghhBDtYGfrAIQQQgghhBCXPkkshBBCCCGEEO0miYUQQgghhBCi3SSxEEIIIYQQQrSbJBZCCCGEEEKIdpPEQgghhBBCCNFuklgIIYQQQggh2s2hrTtarVaysrLw9PREp9N1ZExCCCGEEEKIi4CmaRgMBkJCQrCza7pPos2JRVZWFqGhoW3dXQghhBBCCHGJSE9Pp3v37k1u0+bEwtPTs+ZOvLy82noYIYQQQgghxEWqpKSE0NDQmrZ/U9qcWJwd/uTl5SWJhRBCCCGEEJexlkx9aHNiIYQQQpzn3A8eTbNNHEIIITqdVIUSQgghhBBCtJskFkIIIYQQQoh2k8RCCCGEEEII0W6SWAghhBBCCCHaTRILIYQQQgghRLtJYiGEEEIIIYRoN0kshBBCCCGEEO0miYUQQgghhBCi3SSxEEII0T7lWbBzElgqaxfE0zRZHE8IIa4wklgIIYRon8zVkLEKMtfaOhIhhBA2JImFEEKI9smoTiiyI2wbhxBCCJuSxEIIIUT7FB5U301nbBuHEEIIm5LEQgghRNtVlUhCIYQQApDEQgghRHuUZ9o6AiGEEBcJSSyEEEK0XWWh+u7ka9s4hBBC2JwkFkIIIdrOVAg6O7gzFhy9bR2NEEIIG5LEQgghRNtVFoLvMHDvAUETbR2NEEIIG5LEQgghRNtVFoJnP/VzwCjbxiKEEMKmJLEQQgjRdqZCcAlUP7v3tm0sQgghbEoSCyGEEG1nKa+dW6HT2TYWIYQQNiWJhRBCiLazVoGTj62jEEIIcRGQxEIIIUTbaWZw9LJ1FEIIIS4CklgIIYRoO2sV2Dk1uUlEUgTFxuJOCkgIIYStSGIhhBCidSwm2DoRsjaoHgudQ6ObWjUrv1n8GxLyEzoxQCGEELbQ+KeBEEII0ZCU7yAnEgwnIWA02DlAVgSYzkDv39fb1GAyoKGhN+ptE6sQQohOIz0WQgghWid9pfpekQnW6h6L1AVw9P3zNi0yFgFIYiGEEFcASSyEEEK0jv5w7c+aGXT2jW9anVBIYiGEEJc/GQolhBCiWQaTAWcHZ5ywgjEXRi+CjNXqRs3a6H6SWAghxJVDeiyEEEI0a9jXw1h6ZCmUpYODO/R4EAbNBDtH1WvRCL1RT4BbAEUVRZ0YrRBCCFuQxEIIIUSz8svyyS/LV/MqPPqAnT34hKv5FdamE4se3j2kx0IIIa4AklgIYUua1mSjTIiLQaWlkmJTMfnl+WAuB9eQ2hvP9lh4X9PgvjWJhUnfOcEKIYSwGUksxCXlrZ1vsSR+ia3D6BhWM2y7A1aFQskJADYlb6LEVGLjwISo70z5GQDyyvLAagIn39obdQ5qkbx+zzS4r/RYCCHElUMSC3FJWXN8DTvTdjZ8o6apBnoTE0kvKplrIWcTGHMgdxsVVRXcvvB2DmYetHVkHULT4MsvYWcj/y5x6cgry6O7V3fVY2ExqpW2K3Ihdzvo7MBsaHRfSSyEEOLKIYlFZzm9AjaNhpytto7kkpZlyCLTkNnwjUf+Bev6w56pAFRUVVBeVd6J0bVS3k7wDIP+zwHUnFdGSYYNg+o4ixbBM8/ApEm2jkS0V35ZPv39+qs5FhYj2DurBfK2TABrJVQWN7rvuYnF3vS93Dz/5k6KXAghRGeSxKI9Mn+B6BehIqfp7UwFsPf3cGYvnJjdObFdhixWCyWmErIMWeffaC6HhP+oK6nVNfaf3/g8j/78aCdH2QpFMTDodRj+MYTcRUZJBjp0jSdOF4GiInj6adiwofltFy2CsDDo2rVlxy4vB6OxffGJCyO/vDqxKM9XPRSaVnujvQtUNZ1YhHiGYDCpXo2E/AS2ndpW87sQQojLh00Ti7jcuJqxu5ec4kTYeQ8c+wiOfdj0tnk7wdELxq0C70GdEt7lKLcslzC/MDXO+1yFUeDkA1Py4ZrXAEjVp5JalNrwwaxVcGoJlBy/cAE3pzwTugxXP7uHklmSycCAgRd1j8Uzz8BXX8Hf/970dpoGBw/C0qWwZk3zx01JgX79oG9fMFyi7c2TJ2HJEjBfhnPx88vyCfUOVT2Ads6ql+IsRx8wNf4+rjfq8XHxwapZ0TSNU/pTAKQVp13YoIUQQnQ6myYWt/xwCwvjFtoyhLbLWAW+w2D8enDv1fS2hYeg9x+g+29h0BudEd3lQ7OCqRBQw6BCPFU1mipLFZqmsfTIUgrKC6D4CATfoRK4Xo8AkFqUSm5ZLpqmgU5X+wWw9w+w5xHYMNImpwWAuRRcg2p+zSjJYHjw8Noei3Nj7iiaBtmbQB9f/2/NqKqC9eth/nz4wx+a3ra0FCoqYPBglSw0Z/ZscHBQvRulpc1vf9Goftzy8uC66+CRR1RycbnJL8/Hz9UPJ3snKnV2agJ3wBhVdtbJG0pTGt1Xb9TzcuTLGCoNlFeVk1acRn+//qTpJbEQQojLjc0SizPlZzhTfobE/ERbhdA+BQeg/3TodldNNZT7f7yfNcfV5Vmj2UhkciRVlip1Ne9sKUZ7J1tFfOmxmGDLLfCTH0S9QGZJJsEewQS6B5JTmkNOaQ4P//Qw205tU2O86yR4FquFCnMFAW4BFFYU1j+uqRDSf4bRi6H/sx0a8okTMGgQXHttC668m8vA3g1WdIGYV8g0ZDI8eHhtj8XZxn4LGv2tEvsKbLsdfhkM5VkQ8wosc4Z9fwRg2ZFlzI2ee95ux49DQIBKKp5/vum7MBggKAjsWvgOs2ULzJsHu3aBl1drT8hG6jxuK1dC796wcSMEBjawrcUEuTvU4nKXoPyyfPzd/PFz9aO40qSGd3r0Amc/cPRW74dV+gb3LTIW8UvSLxjNRvRGPWn6NG4MvVF6LK5ElkqwWmwdhRDiArJZYpGYn8h13a4j8Ux1YpH0JazuDdvuslVIrWMqUBNvK7LBdIYSUwk/J/7MlpQtAOzL2MfEhROJz4uHqhJ1JT3uTfjVxmP+O7qReiFlrFaLcY1dCR69yDRkEuwZTLBnMFmGLOJy4/Bw8iAuN05VpXFwh/h/wo8eZBoyCfEMoZdPL1L1qfUb6YWHwH8U9HoYBr/doSH/61/Qpw/ceWcLrrzbO6khWWdPtySDIUFDyDZkd2hM9VhMcPJruDkSJmwEw0lImgPDZ4OTHwDfRH/D7P3nzwUqLKztfWiuE6W8HFxcWh5WejqMGgVubuDu3vL9Otp7u98jfE548xvqj9R73Pbvh6eegokT4fbbG9h+5yTYfgf8cmkOhcwrz+NfO/9FSlEKheZKqKgzz8nJR/VgbJ3Y4L51q0HpjXrSitO4ofsNNUOixBUi6StY7gWrQsB4iQ6BFkI0y3aJxZlEbrvqNlKKUlTjPPbvMPS/EHTrhb3jymLI2gD6hNbvW3doirkMHFxhx2/hwNPE5sRyfffricmJAeBg5kGCPYJV6VCdA2gW1YjTxwEw5+Ac1h5f25Fn1rzjs2GFL6zq0fwbe6UeEj+Eox9AZVGnhHee/F3Q768Qeh/0n0aWIYsDmQfIKMkg05BJXG4cd4fdrZI3e1fVuMEONAupRan09O5JT++e6jlWl+kMeFylngNHP4CqjhvUv2ULfP45vPeeumKfVJDETd/fVDPhvKKqgqKK6sfTwUMlRP3+CqjEIjo7mrKqMiotlY3dRQ29XjX2W6U0Bdy6q9dZ8G1qonuvqdDvaRj6AZqmkVSYRHlVORVVFQAk5CVQWFFIWRm4urbsbtzc1FCoRlXkwPbfwJq+aCkLqahoOqGYtX0WvT/trYa1XUC/pv/KKf0pNbyuKbnb6j1uBQXQs2cj2+rjoTgeJufCbXs6POb9GfvJKW2mgEQ75ZflE5sTS7GpmByrAxiSVMEEUHMsAEqTz9vvbMGFs86Un8FsNdO3S1/psbiSmMvh8Kswfg3cML/jh3cKIS4aNu2xGOg/EC9nL0oyN6uGTsid0PPBC9dVWlUKG0dC+k8Qpyb4nig4QWRyZMv2r3vV29FTHa/rBABismO4J+weUopSsGpWDmYd5PEhj3Mw66DqrTCdgcH/VmFYqvj3zn/zwZ4POvwUG2UqUOVY74qD23aDvTPrTqzjum+uo6yyDIDMkszacc8774OyNLBzgLI0EvMTuen7mzp3XLTpDLiHwpG3YN8fyTRkEnEygticWDJLMonPi2fKwCnE58arx9iYC+Gvg5MfKUUpJBclczj38PkTuM/+H4ti1LCgJiratIamqapJ3bur33U6iDgZQUJ+Qs1z7B9b/kHY52GYrWY1hMRwErwGAKrc7AubXmi88lUdn38ON9wA992nxvc3qyJH3ZepAJwDoDBaJZiFMeDeE1IXwe77OaU/RTfPbgwKHERcbhwGk4ER34zg/V/fx8vr/OFda4+v5e+ba2dyx+bEklyYjJcX5OSApc5LOTYnlpc3vYzFalE9S5794dad6HyuxsGh/qRno9nInvQ9aighsCV1C3Y6O9X7BPyc+DO7T+9uwYm3gLUKNA1N0zice5hJAyZxIPNAg5tmG7Krhzfm13vcmsx3ihMh8CZ1UcOQBOamMq7WKTGVMP778fx3z3877JgNyS/Pr/k5vcqqHrPE/6qLQi6Nl/0qMZUQ6B7IkilLGNtjLMmFyRSUF/C3iL/Zdo7FOfOXKqoq2JW2q+b51hobN8KMGbWV0jae3MiqY6s6MNjLgOGkupgTcKOaV2bnfMHuqspSxfu/vn/Bn1+VlkpmbZ91/oUrqP/8kiRKVNM0jRc3vsjW1Mt72YHOTyyqX2SJZxKZsXkGqfpUsvUnVMMwcy2s7gHGtg8FyciAadPghRdg/371JvPQiof4MeFHVXfde5Aa/jLwZbCYeCXyFR5b9ViDlYZyS3OZMH9CzfCmely6qgnDoVMAiMmJQW/U42TvRHJhMoeyDjG+53iVWLj3gPy9NVf29mXsY0yPMeSV5VFs7JhGbbMMyeA1EDSzSjDKM5l/eD52OjsiTkYAcP/y+7lr8V1YK4tVz8rQ98ElCBw8WHJkCaWVpSw/urxz4gVw8FTDyNx7QsEhMksy8XXxxdPJs6bHYuPJjWSXZlPh3BXyf61JGlKKUojOjmbX6V3nv/E7+0PZaTXJu8sIAL6O+prfLf8d1urF9fak7yEhr+FerZWJKxn73djangdLJVQWoUPDyQlMptptI1MieXn0y0SmRKJpGhEnI7i+2/XsSd8D3ler9U20KqqsFnJKc7DTqZdkZsn5JWejs6PZm74XgH/+Uy08t2MH+Ps38zgefg0O/BlSF0BJouqBcu2m/rd2jmoSuUsA5GwlOjsaHxcf3J3cic6OJjIlkkkDJrE+aT2BgZCQAFZr7Tm+v+d9Vh1bRVJBEkazkcnLJvPEmidwdwdPT4iKUvsAfH7gc1YeW6nmxBTFQO+pcHo56OPw94ekJLWd1QpLjyzlzkV3su7EOsoqy8gyZPHwoIfZmrqV0spSXo58mec3NjPRoyGaVTVySpLAWKiG7xz4M+z/P07pTxHoHsjIkJHsz9x/3q4x2TFcNfsqvo76Wg27q/O4+fvD6dO12xpMBkI/DuXfO/6teit19uo1tecRcHRrstGRV5bH7QtvZ/up7Q2ewlNrn+L+H+8HVCP2gWseqHkNAzXHLChQVbymTYPVq9VN86Ln8UvSLw0eNyY7hplbZzbYW1ZQXsCah9Yw9dqp5FUUgWsIxL8J5elqFW7H6okxdg719tMb9QR7BPPQoIcIDwwnuSgZk8XE0fyjth0Kdc78paVHlnL3krtZe2JtqxqF+/bByy/DQw+Br69qOLyw6QVmbJ6ByWxqct/LrvFZngH7n4JD09Swp7rnNmKw6lUuOQ6bRqnhqBfIxuSNfHbgM76K+qrDjz0/dj6bkjcBEJEUwfex36v3g3NpWr3nmNFsZPz34/km6psOj+lyszNtZ81jfLmJzYllXdI6Ptr7ka1DuaBs1mNx7MwxBgYMpJtnN45XVKpyoaH31zT02mrGDFWd5aOPYPhwWH50OW6Obry18y0sVrP64CtPh11TyCtM5HjBcZ4c9iSL4xefd6z/HfwfQ4OG8s7ud86/I//RcPR9SF8BqMTigz0fkKpPZUfaDlL1qdyx6A4S8hIw+gyFtCVw8GlAvfEVVhRib2fPltQtaJqG3qhXV7Hbw1oFZ/ap9TWKz5kU79xFjYt28gVDMmWGFHac2sHkgZNZcXQFh3MO4+HkwbDgYWw/vVs1hiwmSJ6HlrOVZQnLeHLYkyxLWNa+GFvDeyCkr4QgNXY705BJ3DNxLJy8kIySDE4UnGBD8gY0TeOo1VM1VrfdDlV6UvWpPHf9c7w5/s2aq9w1ugyHM7+qIXFmA0azkU/2fYKrgyvrTqzjdPFppm+YztSVU6m0VFJaWco3Ud+QXJiMpmn8Z/d/GBkyUk1wTngHDv1NxZm7jYEDVXnV06ehoKiK7ae2k3gmkc0pmzmcexiLZiHYI1hd0fQdptY12f8kOaYyBvoPxPKGhT8P//N5JWfLKsv40+o/8WzEs+SW5mE0goeH+sy2s6PpBsrJb2DUfDXky94NSlPVat8evVXSm/WLuqLu4EZUdhQRJyNYHL+YqOwo1hxfg7ezt5oA3+UkmqYmbj//PJwuPk1eWR739r+XpUeWsvrYau7oewcmi4mThUlcdx387ndw771gMpuIOBnB9Ounsyh+kRqXb8xVC61lrGHUKHj9dZg1C3JzYV7MPD678zPmxczj1/RfKa0sJTIlkq2pW/k58Wdu7X0rDnYOHMk7Uv9c9fGw7wk1Z+vUOeWZrGbVqMndrobZnVoALoEQ9jdw9md/2jYOZR1i2oZp7MvYB8CWlC1EZ0cDKvn8cOKHzIuZh+Y5oN7jNnIkzJ0L27ZBZCQsiFvA1PCpLIpfRJVbTzizB4JuA59wNYG7iYn5Xxz8ggF+A3hn1/nvO2n6NOJy46gwVxCTHcO6pHW4OLigN+pJKkiqt+0nn0C3bqri1j33qF7i+YfnM2PzjAYXjpyxeQbHC46z4PCCen8/exX/nv73MKr7KLVI3tkyyaDeU32HqJ/r/h2VWHi7eAPg7eLN6eLTTBk4hc2/34xVs9YMt7O1b2O/rXm+ndsozM2FlSth1arzhx4ePgzjx8PQoXD99eqCRDfPbowIGVGbwDWWNJxzP5ekuu878f8Ev5EQOklN6q97bgm5UHwUPPtB98nq79YqVUjD0kwC1tI4qi2IW8Cb49/kx4Qfay4UdYRjZ44xN2YuL216ifKqchbGL+S1sa+16H4Wxy8mPDCcT/Z/0v7P+SZYrBbmx84nNif2gt3HhVRiKuHZiGf5x5Z/XNi5hh2tuQsD1bcvjl/M1PCpxOfFNz/c9hLm0PwmTYuNjcXDw6NV+1Ts/5XStFLeG/ceqxJXsfxwMt01E8zuqxo8ngng0pLxHefz8YEff4SyMvDz03gr9i3C/MIw5hv5dF8iN2XuhjRHyDazqHAOznnORBmjSCpMYkDlALad2oamadze93bmrp/LjaE3kpieyJJNS+jv31/dSXQ0VPSDoxkQ/wGmbveRfiydQ78/xIqjK1gWuYwJzhN4bexrzNw2kyXRaQzOCYKU5eA/mp/Tf+b2PrcTYAlgwYYFrN+xnrjcOJzsnfhg4ge4OLRi1mtdxz5VE4IDxkHuj9B/Wu1tmgZZXeDbO6A8ncj01QQZgjgad5RtJ7dhybRQXFyMycHEu0mf4uM/Gr67E8ylnMg6hDHdyK+6X8lMzWTl1pWcKDgBwJCgIQS4B7Q4xKoqNalZp1Pj9Z2b6hGvuBr2zoRfu4HXADKSMsg6nkVxQTGxMbH01fXl+zu/5+O9H7Ny7x50paMgNRLcexFXGMfMcTPxc/RjXtw8oqNV45Cz30uug+9/A3aOrC78El22jiJjEbMOz6KHdw98q3zJL8/n9UWvcyjrEA72Drxb8C7TbphGTlIOaWVprMhawYSw/th1GQrJO6B8IRMn+vDgg2BvD+/9EE2v8l70MfbBs9CTd5a9Q2hFKPa59ixPWs6j93yB7rQzWEzEV5rxKPAgOjoaLUtj/5n99KvsVxPzdzHf4ZTnhJujGy98/zwTJ77IXXdBSAg88QT4RkWpTDoqqvYcz/6uHwIrplVfOd8EVbfCF0PUNtdPhKPZEOsO9m5sq9zGNyO+wc/VjxmbZ1BsKubBqx+kj7EPX66dwx13PMrs2XDNNeD403y8C71JS0zjUNYhAt0DcXdyx2w08+7ydxk79q+sW6dek5+t3opXoRe79+9me9p29t00DaclfwKvfqBzZPToaKZPh59+gt5D0jh55CRLC5YSlR3F3Py5THCewDCfYXy450NOJpzE380fnVHHBys+YPoN02ufM+kroTAbgivh2Kswrn/tbZZKOJoFuipIngt+10PWr1AUDicWs0Y3nOk9pjOuxzj+b+3/Mct+FiuPrURv1PP6uNdZuW0lp4NPk5+Xzw9xeYQfq33c+gyJJjERbr4ZZs3SWGj8mAH+A6AQPti2jTvyneDj7mAuBs+jte9vZ/9XZ5/y5gq+Xvc1N4beSPzpeH7w/4EViSswmo2M6TGG5MJkdIU6yuzLmLloJoeyD/HwoIfpZ+rHnLVzmHrt1JrjenioBrG/v5q/siR3Ji5WFxzLHXl98es8Gl5bSCI+N56jcUe5MfRG3o56m3BrOA7VvQ/5Zfm4F7gTHR1NyakSEjMSiQ4bBalrwOdaSMwG/WBI3Qk+A2rPKTqag5kH0bI0oqOjKT1VyvGM44zuMRpfvS/+xf5E7Iygl2+vBl/+Vs3K14e+Jqcsh17evXhsyGPoWntl/+xroDHR0ZwqOlXv+bah+wYCPQJrbn/ySXj8cRg5Up3ap5+q1eSdndU8qrfeUr1zbm4Q4/ERxnIj+Q75fHbiM3pW9Kz9PxtOqmp0AWNU71Xvx9Q8vbO3H54JgeOh/DR49IOu4+rFn1+WT7GpmO5e3dv+GYEq270haQMaGg9c8wCezp6tf9zOqvu+c2oZxC9T55e2CMZcVXtuAMXD1We8uRwyPKH0HTX8OW8bhM9q8/nUiI7GYDKwbc82zBlmrDlW5q6fy4iQ9l2sPGvm1pm4Wl0pKy9jxvwZ7Di6A7JAl62rvZ8GHjctKop3f3qXgf4DqTpTxYc/fchtfW5r9H6qLFWcKDiBnc6OAf4DWvWcn71/Nqf0pzhZeJIv7/6ypjz7pWJe9Dxc8l1wdXDlxfkv8tLol+rdnpCXQEFFAUHuQYT5h9koykbUfS9v4HlgjTrEksgl3Bh6I356Pz76+SOmXD2lk4Nsu9JW1IHXaW2cDVlSUoK3t3dbdhVCCCGEEEJcQoqLi/FqpiZ8u3ssvvtuB25uHnTvDi4pH0DaUgB+MfzM/oSe/POf6mJ5m4aQVmd9j/z0CI+GP0pGSQa5ZbmYrWb83fzZmbaTZSPuwb6yCPxuUFUnbt3W3lOyqYySDB5c/iAju42ksKKQHyb9QEWFutLv7g7v7/kPVdYqdp/ezfz75hPsGWzrkJm1fRbR2dFUVFXwwqB7ubNiLwz7BBLegj5Pge+ghq/kaBq3/HAL8+6dx/qk9bg6uPLEsCdscxJCiFpmI1RUD8dz9AUXvwt7f9XvDw89BH/9K1x1FXTp0oIqZC29sn4lqH4s4nPjeXvX20weOJmlR5Yye+xzdD/8vCqEYecMY1o+nPXNN9Vh770XjEY1vO7aa9UwzOuvb6bHuRX+/W/o3x8eeEBVk3N1rT0fSo5D7D/gui8hajqLTn5FaronM2dCcTG05/rm/w78jxJTCbtP72buXQuZer8vzz2nCklMmtQx53au+Hg1VPu779R8ss2bYc8emDBB9UD361e77Zkz8NhjamHSdhtef5hivd6mKgPsmgxDP4bUb8G5K9jZQ7d7IG839H2i9hhRUZC6GI59CECk/wPsLTfxxvg3eCXyFUY4/p4vZoXz2GNqTaIpU6j3On33XVi+XJUj/8+inXy8/2PCuoQRlR3FxqkbsbezrxdmjiEHT2dP3J3OKRt44gs1nC7kLoibCWOWYbaY0el02NvZk5CXwOpjqwnvGo5j6j3s3AnvvKPaUo6Oatii2WpmXM9xjBunijA09H4zf74anvvKK6qMuptbB/wvWuCll9Rr7LbbVBva15eax/HAAfjiC5gzR1WHDD7bDKy+fcoU9doNDVXx7syIZE/6Hm656hbSdo3h1Cl47TX1+inWTvPO7newWC3MHvsCrkfegBGzIeo5sru8xsN/uZZXXoGyslLefXd8i2Jvd49FvexlfbhahbU8C8vIeXy7fgIVFar2/V1tWZ5CpwNN4w8r/8CCODXu9+PbP2bywMk8vupxXhz1Ir9xM6syqsM+VFVXurbsxC9m478fTx/fPowIGcFfRv6l3m0pRSncPP9mxvQYw8LJF8eq5WuOr2H9ifXE5MSwcsoCukU9od6QAPo+peZ1NGLoV0NrxoOu+N2KS6prUAjRQarf693c1Id3i/ep61Kdp9AR6jwWFZXleP7HEx8XH4xmIyX3zcKu8BCM/AK23KQqA7ZQWRmsXQslJWqNll69Oj50gEcfhbvvhocfrr4QaVfnf5u6CHK2QM+HIOFt/r7hFwJD3Hjhhfbfr96oZ+KCiUwZOIWxdjN49VVVEONCW7UK0tLUULoHH1R/M5vB4ZxLvZoGf/mLuqjo66sagxdEwUGIfhFGfA5bb4UxP6oKgqZ8VagmtDrLqn6dsnOSKi9t58wurwn8KzmGb+/9lgdWPMB1KStxMQfx3nt1jn92PyAsTJ2HnR3cNaWAfp/14/Ehj3Mk7wibft+KSdt7H1NJhc5ezdcL+2ujm2qaeh5nZakEdsKE+re/8IKqeNitG7z4ohraeNbMmerxf/XVlofWEa6+GtatUxdZalQ/jl99BYmJKtGv+Xsd7m4aBkPDC9T+/e9qEde6r5/SylIc7BxwqSyEzWNh/Dowl7JgzSC27nTlu+8aafM3ot09FjUslWA4Dvckw+FXsbeHJ5/smEMPChzE9Ounc+zMMQYFDqKHdw+2PlanXJdLsFr51bNf4we5hDw57Ele3fIq/514fgnJq3yvYsPUDXR1b7zEY2eb2Gci0yKmEeQRRLcu/eHWXaoMpb1Lk0kFwED/gYwIHkFkSiQDAwZ2UsRCiItC3Q9EnY6wwRrr16sP1a5dm7k6eCUnEueq81i4AmF+Yfww6QeejXgWu9JkVc5dH9/qw7q7q4pXF9rrr6vqZStWwNNPw211/7dFsaqS4eC3YNjHPNrVjQceUI3E8HB1Rb+tfFx8OPCkKi29davqiekM9913/t/OTSpAvTzmzLng4agS5OWnwfsatd6OZlU9Fo0pOgyjvoecrQRbnNicspken/QAYHCxFwMbWQe0okIVNnnkEdVrAH4EewYT4BbAmB5jWhdz90nqedH3KVXdsAk6nep1a8xHH6neI01T8yPrevpplfDu36/+b48/3row22roUFi0CCZPVkllSAg1r/MbboD//lfFY7HALee8F970G5W89eun9vfxqb3t0UdVz2B2NgwapF4/Hk7VT3yHEJVUpi0DBzdKywarnpJW6rjEoiJLPTndQzvskGddE3ANJwpOkFeWx6DABp6x/tepr8vE1Gun1k7CbMAA/wGdGE3zXBxceGP8G7UTxXQ6cGvZpLEB/gMIcAsgMiWSvl36XsAohRAXnXM+EOcfVlfUQA2XEG0zOGgwS+KXMKTrEKjSqypNJzu+/GpHGTAANjV2sdp3CPT7G+y6H9x7En7jYg4cgNRUtV9HCQtTo3WKi9XV67PrEV0RPHqpoi8RQwANbo4E1yaGWVdkQJeRkLOVIJfaq9eOdo4E+LpS1Miaunq96hVwrJMHjO4+mjmH5vDdb79rXcyh94FnH9AfUdXI2qmhq/ugnge7drVjSH8bffqpKiv/1lsqiahr8GD43/9UGfHbGqgD8OOPsHBh7RD6usLD4cABSEmBgQ1dy+0yvKa6X680WFr9PlxW1vLYOy6xMBvUir6lp1TvQZ+OGys/KHAQb+96m8KKQoI8gjrsuKLj/Gnon9q03wD/AWxO2YyzgzNO9k4dHJUQ4lIyeDBERDS/nWja0KChzN4/m9fHvQ7mfaqka/gsMLeidXAx6f839VXN01PN9ehI3bvDb3+rGr5Dh6rG5BVl9A9QmgL27uDaxIgIc7laY0qzQMZKPMKexdPJkz8O+SM/Jf7EyME6XntNlbguKFDlmM8qKzt/HsONPW7k+8Pfc0P3G1ofs0+4+uoEnb3UjL8/fPZZ47dPnKi+GuLuDn/+c+P7enqq99rm3HorTJ+upm7079/89md13DoW5jJVI9+Yq1aX7UA9fXpyvOA4vXx6dehxhe0N9B9IxMmIi64XRgghLlVDgoaQachkSNAQcA5Uw5S9wsC3Ba2JK9hXX6nJ1Rs32joSG/G4qumkAlRbz8ENLEZVNhkIdA/kqeFPEeQRxJ13gp+fujIeFUVti1ynw71f8HlzqB4Jf4SM5zPOn5wtbM7RURUWeOklNVyxpTquxwIADRy9oest4NSlQ48c5hfGNQHXdOgxhe2F+YWRZcjikUGP2DoUIYS4LFzX7Treufkdru16LZADB54G955qLY3Bb9k6vIta7962juBiVz0R295ZLWrscRXBnsHE58UT5BGEoyNs367mwISEAC/W7ulLEZmZUFkJTtUDFJzsnejqcfHMGRX1+furOSYlJS3fp+MSCwcP1UXmPQBu2dy+Y50zoQ9NY8mUJbg6NFALTFzSnB2cGdV9FMOCh9k6FCGEuCz4uPjw6tjqMjZBE9Xn874/Qq9Hm95RiIbUbZN5BcAib3D0hLFqAH6Qxxzic+PrDVUPOTvNss48Kheg9wD49ls1p+GppzohdtHpOi6xcPRUVQUuEBkGdfna/afdtg5BCCEuTw6uMHEv5O1QK10L0R5VgKVclfd3UouIBLkHcST/COGBzc93mDgRnnlGzQOQxOLy1HFzLFxDoLIIDMlQngHG/LYfS9PqfwkhhBCibVz8occUNTZeiNY6t03m0QfytoOpEPRHCPIIOq/HojFvvAHjxnXccgTi4tNxPRZ2juB9Nez+HVgqYMKVOvtJCCGEEOIy5TsU9j+p1qkaOYcgjyBO6U8R7NFEidpq/v6dswihsJ2O67EAtQhPUQyUHAM6uTaXEEIIIYS4sELuUqtyG06Azp5gz2A0NFkOQAAdXRXqmn9Azma1oI3blbS6jBBCCCHEFaDXo5C3E9AgYAxB1sMAklgIoKMTCydfuDOmQw8phBBCCCEuEjodXP91za+9fHrxxNAnCPEMaWIncaXo4HUshBBCCCHElaKLaxfm3jvX1mGIi0SbEwutulpTSWtWzRBCCCGEEEJcMs629bUWVGptc2JhMBgACA0NbeshhBBCCCGEEJcAg8GAt7d3k9votJakHw2wWq1kZWXh6emJTicVoIQQQgghhLjcaJqGwWAgJCQEO7umC8q2ObEQQgghhBBCiLM6dh0LIYQQQgghxBVJEgshhBBCCCFEu0liIYQQQgghhGg3SSyEEEIIIYQQ7SaJhRBCCCGEEKLdJLEQQgghhBBCtJskFkIIIYQQQoh2k8RCCCGEEEII0W6SWAghhBBCCCHaTRILIYQQQgghRLtJYiGEEEIIIYRoN0kshBBCCCGEEO32/x0B1BN2CAoMAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize splice-centric gradient for gene(s)\n", + "\n", + "#Find position of max saliency\n", + "max_poses = np.argmax(np.sum(scores, axis=-1), axis=-1)\n", + "\n", + "#Loop over genes\n", + "for example_ix in range(scores.shape[0]) :\n", + " \n", + " #Get max pos\n", + " max_pos = max_poses[example_ix]\n", + " \n", + " #Only visualize genes that are not extremely long\n", + " if max_pos >= 150000 and max_pos < seqs.shape[1] - 150000 :\n", + " \n", + " print(\"-- \" + str(example_ix) + \" (\" + str(strands[example_ix]) + \") --\")\n", + " print(\" - gene_id = '\" + str(genes[example_ix]))\n", + "\n", + " #Plot scores\n", + " f = plt.figure(figsize=(8, 1))\n", + "\n", + " #Annotate 4kb window\n", + " plot_start = max_pos - 2000\n", + " plot_end = max_pos + 6 + 2000\n", + "\n", + " l1 = plt.plot(np.arange(seqs.shape[1]), np.sum(scores[example_ix, ...], axis=-1), linewidth=1, linestyle='-', color='red', label='Gradient')\n", + "\n", + " plt.axvline(x=plot_start, color='black', linestyle='--')\n", + " plt.axvline(x=plot_end, color='black', linestyle='--')\n", + "\n", + " plt.xlim(0, seqs.shape[1])\n", + " \n", + " plt.legend(handles=[l1[0]], fontsize=8)\n", + " \n", + " plt.yticks([], [])\n", + " plt.xticks([], [])\n", + "\n", + " plt.tight_layout()\n", + "\n", + " plt.show()\n", + " \n", + " #Visualize contribution scores\n", + " plot_start = max_pos - 100\n", + " plot_end = max_pos + 6 + 100\n", + " \n", + " #Rev-comp scores if gene is on minus strand\n", + " if strands[example_ix] == '-' :\n", + " plot_end = seqs.shape[1] - (max_pos - 100)\n", + " plot_start = seqs.shape[1] - (max_pos + 6 + 100)\n", + " \n", + " #Plot sequence logo\n", + " visualize_input_gradient_pair(\n", + " scores[example_ix, :, :] if strands[example_ix] == '+' else scores[example_ix, ::-1, ::-1],\n", + " np.zeros(scores[example_ix, ...].shape),\n", + " plot_start=plot_start,\n", + " plot_end=plot_end,\n", + " save_figs=False,\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d7aefe0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/legacy/interpret_sequence/run_gradients_expr_CFHR2.sh b/tutorials/legacy/interpret_sequence/run_gradients_expr_CFHR2.sh new file mode 100755 index 0000000..7f1e551 --- /dev/null +++ b/tutorials/legacy/interpret_sequence/run_gradients_expr_CFHR2.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +borzoi_satg_gene.py -o ../../../examples/saved_models/gtex_CFHR2 -f 3 -c 0 --rc --untransform_old --track_scale 0.01 --track_transform 0.75 --clip_soft 384.0 -t ../../../examples/targets_gtex_liver.txt ../../../examples/params_pred.json ../../../examples/saved_models ../../../examples/CFHR2_example.gtf diff --git a/tutorials/legacy/interpret_sequence/run_gradients_polya_CD99.sh b/tutorials/legacy/interpret_sequence/run_gradients_polya_CD99.sh new file mode 100755 index 0000000..e1f8b94 --- /dev/null +++ b/tutorials/legacy/interpret_sequence/run_gradients_polya_CD99.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +borzoi_satg_polya.py -o ../../../examples/saved_models/gtex_CD99 -f 3 -c 0 --rc --untransform_old --track_scale 0.01 --track_transform 0.75 --clip_soft 384.0 -t ../../../examples/targets_gtex.txt ../../../examples/params_pred.json ../../../examples/saved_models ../../../examples/CD99_example.gtf diff --git a/tutorials/legacy/interpret_sequence/run_gradients_splice_GCFC2.sh b/tutorials/legacy/interpret_sequence/run_gradients_splice_GCFC2.sh new file mode 100755 index 0000000..9fc75fb --- /dev/null +++ b/tutorials/legacy/interpret_sequence/run_gradients_splice_GCFC2.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +borzoi_satg_splice.py -o ../../../examples/saved_models/gtex_GCFC2 -f 3 -c 0 --rc --untransform_old --track_scale 0.01 --track_transform 0.75 --clip_soft 384.0 -t ../../../examples/targets_gtex.txt ../../../examples/params_pred.json ../../../examples/saved_models ../../../examples/GCFC2_example.gtf diff --git a/tutorials/legacy/interpret_sequence/vis_helpers.py b/tutorials/legacy/interpret_sequence/vis_helpers.py new file mode 100644 index 0000000..00b92ef --- /dev/null +++ b/tutorials/legacy/interpret_sequence/vis_helpers.py @@ -0,0 +1,153 @@ +import sys +import os +import numpy as np + +import matplotlib.pyplot as plt + +import matplotlib.cm as cm +import matplotlib.colors as colors + +import matplotlib as mpl +from matplotlib.text import TextPath +from matplotlib.patches import PathPatch, Rectangle +from matplotlib.font_manager import FontProperties +from matplotlib import gridspec +from matplotlib.ticker import FormatStrFormatter + +#Helper function to draw a letter at a given position +def dna_letter_at(letter, x, y, yscale=1, ax=None, color=None, alpha=1.0): + + fp = FontProperties(family="DejaVu Sans", weight="bold") + globscale = 1.35 + LETTERS = { "T" : TextPath((-0.305, 0), "T", size=1, prop=fp), + "G" : TextPath((-0.384, 0), "G", size=1, prop=fp), + "A" : TextPath((-0.35, 0), "A", size=1, prop=fp), + "C" : TextPath((-0.366, 0), "C", size=1, prop=fp), + "UP" : TextPath((-0.488, 0), '$\\Uparrow$', size=1, prop=fp), + "DN" : TextPath((-0.488, 0), '$\\Downarrow$', size=1, prop=fp), + "(" : TextPath((-0.25, 0), "(", size=1, prop=fp), + "." : TextPath((-0.125, 0), "-", size=1, prop=fp), + ")" : TextPath((-0.1, 0), ")", size=1, prop=fp)} + COLOR_SCHEME = {'G': 'orange',#'orange', + 'A': 'green',#'red', + 'C': 'blue',#'blue', + 'T': 'red',#'darkgreen', + 'UP': 'green', + 'DN': 'red', + '(': 'black', + '.': 'black', + ')': 'black'} + + + text = LETTERS[letter] + + chosen_color = COLOR_SCHEME[letter] + if color is not None : + chosen_color = color + + t = mpl.transforms.Affine2D().scale(1*globscale, yscale*globscale) + \ + mpl.transforms.Affine2D().translate(x,y) + ax.transData + p = PathPatch(text, lw=0, fc=chosen_color, alpha=alpha, transform=t) + if ax != None: + ax.add_artist(p) + return p + +#Function to plot sequence logo +def plot_seq_scores(importance_scores, figsize=(16, 2), plot_y_ticks=True, y_min=None, y_max=None, save_figs=False, fig_name="default") : + + importance_scores = importance_scores.T + + fig = plt.figure(figsize=figsize) + + ref_seq = "" + for j in range(importance_scores.shape[1]) : + argmax_nt = np.argmax(np.abs(importance_scores[:, j])) + + if argmax_nt == 0 : + ref_seq += "A" + elif argmax_nt == 1 : + ref_seq += "C" + elif argmax_nt == 2 : + ref_seq += "G" + elif argmax_nt == 3 : + ref_seq += "T" + + ax = plt.gca() + + for i in range(0, len(ref_seq)) : + mutability_score = np.sum(importance_scores[:, i]) + color = None + dna_letter_at(ref_seq[i], i + 0.5, 0, mutability_score, ax, color=color) + + plt.sca(ax) + plt.xticks([], []) + plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%.3f')) + + plt.xlim((0, len(ref_seq))) + + #plt.axis('off') + + if plot_y_ticks : + plt.yticks(fontsize=12) + else : + plt.yticks([], []) + + if y_min is not None and y_max is not None : + plt.ylim(y_min, y_max) + elif y_min is not None : + plt.ylim(y_min) + else : + plt.ylim( + np.min(importance_scores) - 0.1 * np.max(np.abs(importance_scores)), + np.max(importance_scores) + 0.1 * np.max(np.abs(importance_scores)) + ) + + plt.axhline(y=0., color='black', linestyle='-', linewidth=1) + + #for axis in fig.axes : + # axis.get_xaxis().set_visible(False) + # axis.get_yaxis().set_visible(False) + + plt.tight_layout() + + if save_figs : + plt.savefig(fig_name + ".png", transparent=True, dpi=300) + plt.savefig(fig_name + ".eps") + + plt.show() + +#Function to visualize a pair of sequence logos +def visualize_input_gradient_pair(att_grad_wt, att_grad_mut, plot_start=0, plot_end=100, save_figs=False, fig_name='') : + + scores_wt = att_grad_wt[plot_start:plot_end, :] + scores_mut = att_grad_mut[plot_start:plot_end, :] + + y_min = min(np.min(scores_wt), np.min(scores_mut)) + y_max = max(np.max(scores_wt), np.max(scores_mut)) + + y_max_abs = max(np.abs(y_min), np.abs(y_max)) + + y_min = y_min - 0.05 * y_max_abs + y_max = y_max + 0.05 * y_max_abs + + if np.sum(scores_mut) != 0. : + print("--- WT ---") + + plot_seq_scores( + scores_wt, y_min=y_min, y_max=y_max, + figsize=(8, 1), + plot_y_ticks=False, + save_figs=save_figs, + fig_name=fig_name + '_wt', + ) + + if np.sum(scores_mut) != 0. : + + print("--- Mut ---") + plot_seq_scores( + scores_mut, y_min=y_min, y_max=y_max, + figsize=(8, 1), + plot_y_ticks=False, + save_figs=save_figs, + fig_name=fig_name + '_mut', + ) diff --git a/tutorials/legacy/make_data/Makefile b/tutorials/legacy/make_data/Makefile new file mode 100644 index 0000000..f2dce79 --- /dev/null +++ b/tutorials/legacy/make_data/Makefile @@ -0,0 +1,45 @@ +FASTA_HUMAN=$$BORZOI_HG38/assembly/ucsc/hg38.ml.fa +GAPS_HUMAN=$$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed +UMAP_HUMAN=$$BORZOI_HG38/mappability/umap_k36_t10_l32.bed +BLACK_HUMAN=$$BORZOI_HG38/blacklist/blacklist_hg38_all.bed + +FASTA_MOUSE=$$BORZOI_MM10/assembly/ucsc/mm10.ml.fa +GAPS_MOUSE=$$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed +UMAP_MOUSE=$$BORZOI_MM10/mappability/umap_k36_t10_l32.bed +BLACK_MOUSE=$$BORZOI_MM10/blacklist/blacklist_mm10_all.bed + +ALIGN=$$BORZOI_HG38/align/hg38.mm10.syn.net.gz + +OUT=data + +# mini borzoi configuration +LENGTH=393216 +TSTRIDE=43691 # (393216-2*131072)/3 +CROP=98304 +WIDTH=32 +FOLDS=8 + +AOPTS=--break 2097152 -c $(CROP) --nf 524288 --no 393216 -l $(LENGTH) --stride $(TSTRIDE) -f $(FOLDS) --umap_t 0.5 -w $(WIDTH) +DOPTS=-c $(CROP) -d 2 -f $(FOLDS) -l $(LENGTH) -p 64 -r 16 --umap_clip 0.5 -w $(WIDTH) --transform_old + +all: $(OUT)/hg38/tfrecords/train-0.tfr # $(OUT)/mm10/tfrecords/train-0.tfr + +umap_human.bed: + cat $(UMAP_HUMAN) $(BLACK_HUMAN) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_human.bed + +umap_mouse.bed: + cat $(UMAP_MOUSE) $(BLACK_MOUSE) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_mouse.bed + +# targets file is already generated in this example +#targets_human.txt targets_mouse.txt: +# ./make_targets.py + +$(OUT)/hg38/sequences.bed $(OUT)/mm10/sequences.bed: umap_human.bed umap_mouse.bed + hound_data_align.py -a hg38,mm10 -g $(GAPS_HUMAN),$(GAPS_MOUSE) -u umap_human.bed,umap_mouse.bed $(AOPTS) -o $(OUT) $(ALIGN) $(FASTA_HUMAN),$(FASTA_MOUSE) + +$(OUT)/hg38/tfrecords/train-0.tfr: $(OUT)/hg38/sequences.bed targets_human.txt + hound_data.py --restart $(DOPTS) -b $(BLACK_HUMAN) -o $(OUT)/hg38 $(FASTA_HUMAN) -u umap_human.bed targets_human.txt + +# no mouse data in this example +#$(OUT)/mm10/tfrecords/train-0.tfr: $(OUT)/mm10/sequences.bed targets_mouse.txt +# hound_data.py --restart $(DOPTS) -b $(BLACK_MOUSE) -o $(OUT)/mm10 $(FASTA_MOUSE) -u umap_mouse.bed targets_mouse.txt diff --git a/tutorials/legacy/make_data/README.md b/tutorials/legacy/make_data/README.md new file mode 100644 index 0000000..035a37d --- /dev/null +++ b/tutorials/legacy/make_data/README.md @@ -0,0 +1,3 @@ +## Data Processing + +Todo. diff --git a/tutorials/legacy/make_data/download_bw.sh b/tutorials/legacy/make_data/download_bw.sh new file mode 100755 index 0000000..239f004 --- /dev/null +++ b/tutorials/legacy/make_data/download_bw.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# download example data from ENCODE (ENCSR000AEL - K562 RNA-seq); 2 replicates + +# define ENCODE ID +ENC_ID='ENCSR000AEL' + +# define remote urls +URL_P_REP1='https://www.encodeproject.org/files/ENCFF980ZHM/@@download/ENCFF980ZHM.bigWig' +URL_M_REP1='https://www.encodeproject.org/files/ENCFF533LJF/@@download/ENCFF533LJF.bigWig' + +URL_P_REP2='https://www.encodeproject.org/files/ENCFF335LVS/@@download/ENCFF335LVS.bigWig' +URL_M_REP2='https://www.encodeproject.org/files/ENCFF257NOL/@@download/ENCFF257NOL.bigWig' + +# define ENCODE file IDs +FILE_P_REP1='ENCFF980ZHM' +FILE_M_REP1='ENCFF533LJF' + +FILE_P_REP2='ENCFF335LVS' +FILE_M_REP2='ENCFF257NOL' + +# create folder for bigwig files +mkdir -p "human/rna/encode/$ENC_ID/rep1" +mkdir -p "human/rna/encode/$ENC_ID/rep2" + + +# download bigwig files; rep1 +if [ -f "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" ]; then + echo "example RNA-seq data already downloaded (rep 1)." +else + wget $URL_P_REP1 -O "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" + wget $URL_M_REP1 -O "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1.bigWig" +fi + +# download bigwig files; rep2 +if [ -f "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" ]; then + echo "example RNA-seq data already downloaded (rep 2)." +else + wget $URL_P_REP2 -O "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" + wget $URL_M_REP2 -O "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2.bigWig" +fi diff --git a/tutorials/legacy/make_data/download_dependencies.sh b/tutorials/legacy/make_data/download_dependencies.sh new file mode 100755 index 0000000..cd23a51 --- /dev/null +++ b/tutorials/legacy/make_data/download_dependencies.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# create additional folder in borzoi data folders +mkdir -p "$BORZOI_HG38/assembly/ucsc" +mkdir -p "$BORZOI_HG38/assembly/gnomad" +mkdir -p "$BORZOI_HG38/mappability" +mkdir -p "$BORZOI_HG38/blacklist" +mkdir -p "$BORZOI_HG38/align" + +mkdir -p "$BORZOI_MM10/assembly/ucsc" +mkdir -p "$BORZOI_MM10/mappability" +mkdir -p "$BORZOI_MM10/blacklist" + + +# download and uncompress auxiliary files required for Makefile (hg38) +if [ -f "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" ]; then + echo "hg38_gaps.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gaps.bed.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" +fi + +if [ -f "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" ]; then + echo "umap_k36_t10_l32.bed (hg38) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_hg38.bed.gz | gunzip -c > "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" +fi + +if [ -f "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" ]; then + echo "blacklist_hg38_all.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_hg38_all.bed.gz | gunzip -c > "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" +fi + +if [ -f "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" ]; then + echo "Splice site annotation already exist." +else + wget https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.mm10.syn.net.gz -O "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" +fi + + +# download and uncompress auxiliary files required for Makefile (mm10) +if [ -f "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" ]; then + echo "mm10_gaps.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10_gaps.bed.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" +fi + +if [ -f "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" ]; then + echo "umap_k36_t10_l32.bed (mm10) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_mm10.bed.gz | gunzip -c > "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" +fi + +if [ -f "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" ]; then + echo "blacklist_mm10_all.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_mm10_all.bed.gz | gunzip -c > "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" +fi + + +# download and uncompress pre-compiled umap bed files +if [ -f umap_human.bed ]; then + echo "umap_human.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_human.bed.gz | gunzip -c > umap_human.bed +fi + +if [ -f umap_mouse.bed ]; then + echo "umap_mouse.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_mouse.bed.gz | gunzip -c > umap_mouse.bed +fi + + +# download and index hg38 ml genome +if [ -f "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" ]; then + echo "hg38.ml.fa already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" + idx_genome.py "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" +fi + +# download and index hg38 ml genome (gnomad major alleles) +if [ -f "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" ]; then + echo "hg38.ml.fa (gnomad) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gnomad.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" + idx_genome.py "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" +fi + +# download and index mm10 ml genome +if [ -f "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" ]; then + echo "mm10.ml.fa already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10.ml.fa.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" + idx_genome.py "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" +fi diff --git a/tutorials/legacy/make_data/process_w5.sh b/tutorials/legacy/make_data/process_w5.sh new file mode 100755 index 0000000..9caa697 --- /dev/null +++ b/tutorials/legacy/make_data/process_w5.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# merge bigwig replicates, generate .w5 files and run qc + +# define ENCODE ID +ENC_ID='ENCSR000AEL' + +# define ENCODE file IDs +FILE_P_REP1='ENCFF980ZHM' +FILE_M_REP1='ENCFF533LJF' + +FILE_P_REP2='ENCFF335LVS' +FILE_M_REP2='ENCFF257NOL' + +# create folder for merged replicate files +mkdir -p "human/rna/encode/$ENC_ID/summary" + + +# step 1: generate per-replicate .w5 files + +# rep1 +if [ -f "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" ]; then + echo "example RNA-seq .w5 already exists (rep 1)." +else + bw_h5.py -z "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" + bw_h5.py -z "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1.bigWig" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" +fi + +# rep2 +if [ -f "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" ]; then + echo "example RNA-seq .w5 already exists (rep 2)." +else + bw_h5.py -z "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + bw_h5.py -z "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2.bigWig" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" +fi + + +# step 2: merge replicates + +if [ -f "human/rna/encode/$ENC_ID/summary/coverage+.w5" ]; then + echo "example RNA-seq .w5 already exists (merged)." +else + w5_merge.py -w -s mean -z "human/rna/encode/$ENC_ID/summary/coverage+.w5" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + w5_merge.py -w -s mean -z "human/rna/encode/$ENC_ID/summary/coverage-.w5" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" +fi + + +# step 3: run qc on each replicate and the merged file + +if [ -f "human/rna/encode/$ENC_ID/summary/covqc/means.txt" ]; then + echo "qc statistics already exist." +else + # rep1 + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep1/covqc" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep1/covqc_m" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" + + # rep2 + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep2/covqc" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep2/covqc_m" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" + + # summary + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/summary/covqc" "human/rna/encode/$ENC_ID/summary/coverage+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/summary/covqc_m" "human/rna/encode/$ENC_ID/summary/coverage-.w5" +fi + diff --git a/tutorials/legacy/make_data/targets_human.txt b/tutorials/legacy/make_data/targets_human.txt new file mode 100644 index 0000000..0baf8d7 --- /dev/null +++ b/tutorials/legacy/make_data/targets_human.txt @@ -0,0 +1,3 @@ + identifier file clip clip_soft scale sum_stat strand_pair description +0 ENCFF980ZHM+ human/rna/encode/ENCSR000AEL/summary/coverage+.w5 768 384 0.3 sum_sqrt 1 RNA:K562 +1 ENCFF980ZHM- human/rna/encode/ENCSR000AEL/summary/coverage-.w5 768 384 0.3 sum_sqrt 0 RNA:K562 diff --git a/tutorials/legacy/score_variants/README.md b/tutorials/legacy/score_variants/README.md new file mode 100644 index 0000000..827434f --- /dev/null +++ b/tutorials/legacy/score_variants/README.md @@ -0,0 +1,3 @@ +## Variant Scoring + +Todo. diff --git a/tutorials/legacy/score_variants/run_variant_scripts.ipynb b/tutorials/legacy/score_variants/run_variant_scripts.ipynb new file mode 100644 index 0000000..828c610 --- /dev/null +++ b/tutorials/legacy/score_variants/run_variant_scripts.ipynb @@ -0,0 +1,201 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f5d0f9fb", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import h5py\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a94cbf8", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate gene-specific variant effect scores\n", + "\n", + "!./score_expr_sed.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1047ff0f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "score: 'logSED', snp: 'chr1_46309111_A_G_b38', gene: 'ENSG00000237090.1', track: 'RNA:adipose_tissue' => -0.2551\n" + ] + } + ], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (gene-specific expression)\n", + "\n", + "sed_h5 = h5py.File('snp_sed/f3c0/sed.h5', 'r')\n", + "\n", + "row_ix = 63\n", + "target_ix = 0\n", + "\n", + "print(\"score: 'logSED', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['logSED'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f105ecd9", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate gene-agnostic variant effect scores\n", + "\n", + "!./score_expr_sad.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "96e4f7cb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "score: 'logD2', snp: 'chr1_43120331_C_T_b38', track: 'RNA:adipose_tissue' => 0.1057\n" + ] + } + ], + "source": [ + "#Print an example variant effect prediction for a SNP (gene-agnostic expression)\n", + "\n", + "sad_h5 = h5py.File('snp_sad/f3c0/sad.h5', 'r')\n", + "\n", + "snp_ix = 1\n", + "target_ix = 0\n", + "\n", + "print(\"score: 'logD2', snp: '\" + str(sad_h5['snp'][snp_ix].decode()) + \"', track: '\" + str(sad_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sad_h5['logD2'][snp_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c56efaef", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate splice variant effect scores\n", + "\n", + "!./score_splice.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "980993fc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "score: 'nDi', snp: 'chr1_156236330_G_A', gene: 'ENSG00000225905.1', track: 'RNA:foreskin fibroblast male newborn' => 0.0022\n" + ] + } + ], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (splicing)\n", + "\n", + "sed_h5 = h5py.File('snp_splice/f3c0/sed.h5', 'r')\n", + "\n", + "row_ix = 116\n", + "target_ix = 755\n", + "\n", + "print(\"score: 'nDi', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['nDi'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05cccfb6", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate polyadenylation variant effect scores\n", + "\n", + "!./score_polya.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "43ac562f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "score: 'logSED', snp: 'chr16_80976052_T_G', gene: 'ENSG00000132879.14', track: 'RNA:HeLa-S3 nuclear fraction' => 0.0628\n" + ] + } + ], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (polyadenylation)\n", + "\n", + "sed_h5 = h5py.File('snp_polya/f3c0/sed.h5', 'r')\n", + "\n", + "row_ix = 47\n", + "target_ix = 100\n", + "\n", + "print(\"score: 'logSED', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['COVR'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ba23572", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/legacy/score_variants/score_expr_sad.sh b/tutorials/legacy/score_variants/score_expr_sad.sh new file mode 100755 index 0000000..0d7c74a --- /dev/null +++ b/tutorials/legacy/score_variants/score_expr_sad.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_sad/f3c0 + +borzoi_sad.py -o snp_sad/f3c0 --rc --stats logD2 -u -t ../../../examples/targets_human.txt ../../../examples/params_pred.json ../../../examples/saved_models/f3c0/train/model0_best.h5 snps_expr.vcf diff --git a/tutorials/legacy/score_variants/score_expr_sed.sh b/tutorials/legacy/score_variants/score_expr_sed.sh new file mode 100755 index 0000000..9b97e2e --- /dev/null +++ b/tutorials/legacy/score_variants/score_expr_sed.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_sed/f3c0 + +borzoi_sed.py -o snp_sed/f3c0 --rc --stats logSED,logD2 -u -t ../../../examples/targets_gtex.txt ../../../examples/params_pred.json ../../../examples/saved_models/f3c0/train/model0_best.h5 snps_expr.vcf diff --git a/tutorials/legacy/score_variants/score_polya.sh b/tutorials/legacy/score_variants/score_polya.sh new file mode 100755 index 0000000..7eb24a5 --- /dev/null +++ b/tutorials/legacy/score_variants/score_polya.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_polya/f3c0 + +borzoi_sed_paqtl_cov.py -o snp_polya/f3c0 --rc --stats COVR -u -t ../../../examples/targets_rna.txt ../../../examples/params_pred.json ../../../examples/saved_models/f3c0/train/model0_best.h5 snps_polya.vcf diff --git a/tutorials/legacy/score_variants/score_splice.sh b/tutorials/legacy/score_variants/score_splice.sh new file mode 100755 index 0000000..f85779f --- /dev/null +++ b/tutorials/legacy/score_variants/score_splice.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_splice/f3c0 + +borzoi_sed.py -o snp_splice/f3c0 --span --no_untransform --rc --stats nDi -u -t ../../../examples/targets_rna.txt ../../../examples/params_pred.json ../../../examples/saved_models/f3c0/train/model0_best.h5 snps_splice.vcf diff --git a/tutorials/legacy/score_variants/snps_expr.vcf b/tutorials/legacy/score_variants/snps_expr.vcf new file mode 100644 index 0000000..bb8d7cc --- /dev/null +++ b/tutorials/legacy/score_variants/snps_expr.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +chr1 43110773 chr1_43110773_G_A_b38 G A . . +chr1 43120331 chr1_43120331_C_T_b38 C T . . +chr1 46309111 chr1_46309111_A_G_b38 A G . . +chr1 52632886 chr1_52632886_A_C_b38 A C . . +chr1 54053434 chr1_54053434_G_A_b38 G A . . diff --git a/tutorials/legacy/score_variants/snps_polya.vcf b/tutorials/legacy/score_variants/snps_polya.vcf new file mode 100644 index 0000000..5be4cad --- /dev/null +++ b/tutorials/legacy/score_variants/snps_polya.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 11790946 chr1_11790946_G_C G C . . MT=ENSG00000177000.grp_2.downstream.ENST00000641805;PD=924;PI=chr1_11790946_G_C +chr1 150160094 chr1_150160094_C_G C G . . MT=ENSG00000023902.grp_1.downstream.ENST00000369126;PD=29;PI=chr1_150160094_C_G +chr16 57665101 chr16_57665101_A_G A G . . MT=ENSG00000205336.grp_1.downstream.ENST00000568908;PD=73;PI=chr16_57665101_A_G +chr16 80976052 chr16_80976052_T_G T G . . MT=ENSG00000103121.grp_2.downstream.ENST00000565925;PD=24;PI=chr16_80976052_T_G +chr16 88857261 chr16_88857261_T_C T C . . MT=ENSG00000167515.grp_2.downstream.ENST00000564547;PD=3851;PI=chr16_88857261_T_C \ No newline at end of file diff --git a/tutorials/legacy/score_variants/snps_splice.vcf b/tutorials/legacy/score_variants/snps_splice.vcf new file mode 100644 index 0000000..710eaf2 --- /dev/null +++ b/tutorials/legacy/score_variants/snps_splice.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 1665061 chr1_1665061_C_T C T . . MT=ENSG00000189339.grp_2.contained.ENST00000611123;SD=959;PI=chr1_1665061_C_T +chr1 1689221 chr1_1689221_G_A G A . . MT=ENSG00000189339.grp_1.contained.ENST00000614300;SD=1753;PI=chr1_1689221_G_A +chr1 50655526 chr1_50655526_T_C T C . . MT=ENSG00000185104.grp_2.contained.ENST00000396153;SD=3;PI=chr1_50655526_T_C +chr1 109489368 chr1_109489368_C_G C G . . MT=ENSG00000143537.grp_2.contained.ENST00000360674;SD=1;PI=chr1_155060832_G_A +chr1 156236330 chr1_156236330_G_A G A . . MT=ENSG00000160783.grp_1.contained.ENST00000368279;SD=17;PI=chr1_156236330_G_A diff --git a/tutorials/legacy/train_model/README.md b/tutorials/legacy/train_model/README.md new file mode 100644 index 0000000..1587061 --- /dev/null +++ b/tutorials/legacy/train_model/README.md @@ -0,0 +1,3 @@ +## Model Training + +Todo. diff --git a/tutorials/legacy/train_model/params_micro.json b/tutorials/legacy/train_model/params_micro.json new file mode 100644 index 0000000..5a9c716 --- /dev/null +++ b/tutorials/legacy/train_model/params_micro.json @@ -0,0 +1,78 @@ +{ + "train": { + "batch_size": 4, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.0002, + "loss": "poisson_mn", + "total_weight": 0.2, + "warmup_steps": 10000, + "global_clipnorm": 0.2, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 393216, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 1.0e-6, + "trunk": [ + { + "name": "conv_dna", + "filters": 128, + "kernel_size": 11, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 160, + "filters_end": 320, + "divisible_by": 8, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 32, + "heads": 4, + "num_position_features": 32, + "dropout": 0.1, + "attention_dropout": 0.01, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 4 + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "Cropping1D", + "cropping": 3072 + } + ], + "head_human": { + "name": "final", + "units": 2, + "activation": "softplus" + } + } +} diff --git a/tutorials/legacy/train_model/params_mini.json b/tutorials/legacy/train_model/params_mini.json new file mode 100644 index 0000000..14c089c --- /dev/null +++ b/tutorials/legacy/train_model/params_mini.json @@ -0,0 +1,77 @@ +{ + "train": { + "batch_size": 2, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.0001, + "loss": "poisson_mn", + "total_weight": 0.2, + "warmup_steps": 20000, + "global_clipnorm": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 393216, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 1.0e-6, + "trunk": [ + { + "name": "conv_dna", + "filters": 320, + "kernel_size": 11, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 384, + "filters_end": 768, + "divisible_by": 16, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 64, + "heads": 4, + "num_position_features": 32, + "dropout": 0.2, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 8 + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "Cropping1D", + "cropping": 3072 + } + ], + "head_human": { + "name": "final", + "units": 2, + "activation": "softplus" + } + } +} diff --git a/tutorials/legacy/train_model/train_micro.sh b/tutorials/legacy/train_model/train_micro.sh new file mode 100755 index 0000000..3c334ee --- /dev/null +++ b/tutorials/legacy/train_model/train_micro.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +westminster_train_folds.py -e borzoi_py310 -f 2 -c 1 -q rtx4090 -o micro_models params_micro.json ../make_data/data/hg38 diff --git a/tutorials/legacy/train_model/train_mini.sh b/tutorials/legacy/train_model/train_mini.sh new file mode 100755 index 0000000..2cc5aa4 --- /dev/null +++ b/tutorials/legacy/train_model/train_mini.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +westminster_train_folds.py -e borzoi_py310 -f 2 -c 1 -q rtx4090 -o mini_models params_mini.json ../make_data/data/hg38