Skip to content
This repository has been archived by the owner on Oct 4, 2023. It is now read-only.

modification to bring up to spec #1

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#PyCharm workdir
.idea

*.pyc
#Scripts output directory content
output/*
output/*.json
30 changes: 11 additions & 19 deletions IHEC_json_converter/bisulfite.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,21 @@
__author__ = 'kelley'

import json
from general import convert_to_IHEC_format

VERSION='1.6'

def bisulfite_wrapper(assembly, taxon_id):
url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=whole-genome%20shotgun%20bisulfite%20sequencing'
# Used to set is_main
BISULFATE_TRACK_HIEARCHY = {'methylation_profile': ['methylation state at CpG', 'signal']}

# Used to set is_main
track_hierarchy = {'methylation_profile': ['methylation state at CpG', 'methylation state at CHH']}

def dataset_additions_f(experiment, json_object):
def bisulfate_addition(experiment, json_object):
#Set experiment_type
json_object['experiment_attributes']['experiment_type'] = 'DNA Methylation'
json_object['experiment_attributes']['assay_type'] = 'WGB-Seq'

#Set experiment_type
json_object['experiment_attributes']['experiment_type'] = 'DNA Methylation'
json_object['experiment_attributes']['assay_type'] = 'WGB-Seq'
return json_object

return json_object

return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f)



if __name__ == "__main__":
data = bisulfite_wrapper(assembly='hg19', taxon_id=9606)
with open('../output/bisulfite_v%s.json' % VERSION, 'w+') as outfile:
json.dump(data, outfile, indent=4)
# if __name__ == "__main__":
# data = bisulfite_wrapper(assembly='hg19', taxon_id=9606)
# with open('../output/bisulfite_v%s.json' % VERSION, 'w+') as outfile:
# json.dump(data, outfile, indent=4)
30 changes: 11 additions & 19 deletions IHEC_json_converter/chipseq.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,26 @@
__author__ = 'kelley'

import json
from general import convert_to_IHEC_format

VERSION='1.6'

def chip_seq_wrapper(assembly, taxon_id, target):
url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=ChIP-seq&target.name=%s-human' % target

# Used to set is_main
track_hierarchy = {'peak_calls': ['optimal idr thresholded peaks', 'conservative idr thresholded peaks',
CHIPSEQ_TRACK_HIEARCHY = {'peak_calls': ['optimal idr thresholded peaks', 'conservative idr thresholded peaks',
'replicated peaks', 'peaks', 'hotspots'],
'signal': ['signal p-value', 'fold change over control', 'signal', 'raw signal']}

def dataset_additions_f(experiment, json_object):

#Set experiment_type
json_object['experiment_attributes']['experiment_type'] = experiment['target']['label']

return json_object
def chip_seq_addition(experiment, json_object):

return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f)
#Set experiment_type
json_object['experiment_attributes']['experiment_type'] = experiment['target']['label']

return json_object




if __name__ == "__main__":
targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
for t in targets:
data = chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
with open('../output/%s_v%s.json' % (t, VERSION), 'w+') as outfile:
json.dump(data, outfile, indent=4)
# if __name__ == "__main__":
# targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
# for t in targets:
# data = chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
# with open('../output/%s_v%s.json' % (t, VERSION), 'w+') as outfile:
# json.dump(data, outfile, indent=4)
42 changes: 4 additions & 38 deletions IHEC_json_converter/fetch_all_exp_jsons.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import getopt
import json
from datetime import datetime
import rnaseq, bisulfite, chipseq
from reference_epigenome_experiments import collect_experiments

def main(argv):
opts, args = getopt.getopt(argv, "", ["assembly=", "taxon-id="])
Expand All @@ -22,43 +22,9 @@ def main(argv):

date_str = datetime.now().date()

#Todo: Merge experiments as a single JSON

#Whole-Genome Bisulfite Sequencing experiments
print("Processing WGB-Seq...")
try:
data = bisulfite.bisulfite_wrapper(assembly='hg19', taxon_id=9606)
filename = 'WGB-Seq_%s_%s_%s.json' % (taxon_id, assembly, date_str)
output_file(data, filename)
print("Done.")
except Exception as e:
print('An error occured while fetching WGB-Seq experiments: ' + e.message)
print

#RNA-Sequencing experiments
print("Processing RNA-Seq...")
try:
data = rnaseq.rna_seq_wrapper(assembly=assembly, taxon_id=taxon_id)
filename = 'RNA-Seq_%s_%s_%s.json' % (taxon_id, assembly, date_str)
output_file(data, filename)
print("Done.")
except Exception as e:
print('An error occured while fetching RNA-Seq experiments: ' + e.message)
print

#ChIP-Seq experiments
targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
for t in targets:
print("Processing ChIP-Seq %s..." % t)
try:
data = chipseq.chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
filename = 'ChIP-Seq_%s_%s_%s_%s.json' % (taxon_id, assembly, t, date_str)
output_file(data, filename)
print("Done.")
except Exception as e:
print('An error occured while fetching ChIP-Seq %s experiments: ' % t + e.message)
print
print("Operation completed.")
filename = 'ENCODE.{}.{}.{}.json'.format(taxon_id, assembly, date_str)
data = collect_experiments(assembly, taxon_id)
output_file(data, filename)



Expand Down
Loading