dbujold · hitz · Feb 19, 2016 · Feb 19, 2016 · Apr 19, 2016 · Apr 19, 2016
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,5 @@
 #PyCharm workdir
 .idea
-
+*.pyc
 #Scripts output directory content
-output/*
+output/*.json
diff --git a/IHEC_json_converter/bisulfite.py b/IHEC_json_converter/bisulfite.py
@@ -1,29 +1,21 @@
 __author__ = 'kelley'
 
-import json
-from general import convert_to_IHEC_format
 
 VERSION='1.6'
 
-def bisulfite_wrapper(assembly, taxon_id):
-    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=whole-genome%20shotgun%20bisulfite%20sequencing'
+# Used to set is_main
+BISULFATE_TRACK_HIEARCHY = {'methylation_profile': ['methylation state at CpG', 'signal']}
 
-    # Used to set is_main
-    track_hierarchy = {'methylation_profile': ['methylation state at CpG', 'methylation state at CHH']}
 
-    def dataset_additions_f(experiment, json_object):
+def bisulfate_addition(experiment, json_object):
+    #Set experiment_type
+    json_object['experiment_attributes']['experiment_type'] = 'DNA Methylation'
+    json_object['experiment_attributes']['assay_type'] = 'WGB-Seq'
 
-        #Set experiment_type
-        json_object['experiment_attributes']['experiment_type'] = 'DNA Methylation'
-        json_object['experiment_attributes']['assay_type'] = 'WGB-Seq'
+    return json_object
 
-        return json_object
 
-    return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f)
-
-
-
-if __name__ == "__main__":
-    data = bisulfite_wrapper(assembly='hg19', taxon_id=9606)
-    with open('../output/bisulfite_v%s.json' % VERSION, 'w+') as outfile:
-        json.dump(data, outfile, indent=4)
+# if __name__ == "__main__":
+#     data = bisulfite_wrapper(assembly='hg19', taxon_id=9606)
+#     with open('../output/bisulfite_v%s.json' % VERSION, 'w+') as outfile:
+#         json.dump(data, outfile, indent=4)
diff --git a/IHEC_json_converter/chipseq.py b/IHEC_json_converter/chipseq.py
@@ -1,34 +1,26 @@
 __author__ = 'kelley'
 
-import json
-from general import convert_to_IHEC_format
 
 VERSION='1.6'
 
-def chip_seq_wrapper(assembly, taxon_id, target):
-    url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=ChIP-seq&target.name=%s-human' % target
-
-    # Used to set is_main
-    track_hierarchy = {'peak_calls': ['optimal idr thresholded peaks', 'conservative idr thresholded peaks',
+CHIPSEQ_TRACK_HIEARCHY = {'peak_calls': ['optimal idr thresholded peaks', 'conservative idr thresholded peaks',
                                 'replicated peaks', 'peaks', 'hotspots'],
                            'signal': ['signal p-value', 'fold change over control', 'signal', 'raw signal']}
 
-    def dataset_additions_f(experiment, json_object):
-
-        #Set experiment_type
-        json_object['experiment_attributes']['experiment_type'] = experiment['target']['label']
 
-        return json_object
+def chip_seq_addition(experiment, json_object):
 
-    return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f)
+    #Set experiment_type
+    json_object['experiment_attributes']['experiment_type'] = experiment['target']['label']
 
+    return json_object
 
 
 
 
-if __name__ == "__main__":
-    targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
-    for t in targets:
-        data = chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
-        with open('../output/%s_v%s.json' % (t, VERSION), 'w+') as outfile:
-            json.dump(data, outfile, indent=4)
+# if __name__ == "__main__":
+#     targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
+#     for t in targets:
+#         data = chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
+#         with open('../output/%s_v%s.json' % (t, VERSION), 'w+') as outfile:
+#             json.dump(data, outfile, indent=4)
diff --git a/IHEC_json_converter/fetch_all_exp_jsons.py b/IHEC_json_converter/fetch_all_exp_jsons.py
@@ -2,7 +2,7 @@
 import getopt
 import json
 from datetime import datetime
-import rnaseq, bisulfite, chipseq
+from reference_epigenome_experiments import collect_experiments
 
 def main(argv):
     opts, args = getopt.getopt(argv, "", ["assembly=", "taxon-id="])
@@ -22,43 +22,9 @@ def main(argv):
 
     date_str = datetime.now().date()
 
-    #Todo: Merge experiments as a single JSON
-
-    #Whole-Genome Bisulfite Sequencing experiments
-    print("Processing WGB-Seq...")
-    try:
-        data = bisulfite.bisulfite_wrapper(assembly='hg19', taxon_id=9606)
-        filename = 'WGB-Seq_%s_%s_%s.json' % (taxon_id, assembly, date_str)
-        output_file(data, filename)
-        print("Done.")
-    except Exception as e:
-        print('An error occured while fetching WGB-Seq experiments: ' + e.message)
-    print
-
-    #RNA-Sequencing experiments
-    print("Processing RNA-Seq...")
-    try:
-        data = rnaseq.rna_seq_wrapper(assembly=assembly, taxon_id=taxon_id)
-        filename = 'RNA-Seq_%s_%s_%s.json' % (taxon_id, assembly, date_str)
-        output_file(data, filename)
-        print("Done.")
-    except Exception as e:
-        print('An error occured while fetching RNA-Seq experiments: ' + e.message)
-    print
-
-    #ChIP-Seq experiments
-    targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
-    for t in targets:
-        print("Processing ChIP-Seq %s..." % t)
-        try:
-            data = chipseq.chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
-            filename = 'ChIP-Seq_%s_%s_%s_%s.json' % (taxon_id, assembly, t, date_str)
-            output_file(data, filename)
-            print("Done.")
-        except Exception as e:
-            print('An error occured while fetching ChIP-Seq %s experiments: ' % t + e.message)
-        print
-    print("Operation completed.")
+    filename = 'ENCODE.{}.{}.{}.json'.format(taxon_id, assembly, date_str)
+    data = collect_experiments(assembly, taxon_id)
+    output_file(data, filename)