Skip to content

Commit

Permalink
Check with-from ID taxon in GPI - geneontology/gocamgen#79; pipe spli…
Browse files Browse the repository at this point in the history
  • Loading branch information
dustine32 committed Jun 24, 2020
1 parent 86103ce commit 02ef984
Showing 1 changed file with 52 additions and 38 deletions.
90 changes: 52 additions & 38 deletions ontobio/rdfgen/gocamgen/collapsed_assoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def __init__(self, associations):
self.collapsed_associations = []
self.assoc_dict = {}
self.go_ontology = None
self.gpi_entities = None

def setup_ontologies(self):
if self.go_ontology is None:
Expand Down Expand Up @@ -43,20 +44,14 @@ def collapse_annotations(self):
subj_id = a["subject"]["id"]
qualifiers = a["qualifiers"]
term = a["object"]["id"]
with_from = a["evidence"]["with_support_from"]
eco_code = a["evidence"]["type"]
extensions = get_annot_extensions(a)
with_froms = get_with_froms(a) # Handle pipe separation according to import requirements
is_protein_binding = eco_code == IPI_ECO_CODE and BINDING_ROOT in self.go_ontology.ancestors(term, reflexive=True)
if is_protein_binding:
cas = self.find_or_create_collapsed_associations(subj_id, qualifiers, term, with_froms, extensions)
with_from = None # Don't use ontobio-parsed with_from on lines
else:
cas = [self.find_or_create_collapsed_association(subj_id, qualifiers, term, None, extensions)]
for ca in cas:
# Line
association_line = CollapsedAssociationLine(a, with_from)
with_froms = self.get_with_froms(a) # Handle pipe separation according to import requirements
cas = []
for wf in with_froms:
ca = self.find_or_create_collapsed_association(subj_id, qualifiers, term, wf, extensions)
association_line = CollapsedAssociationLine(a, wf["line"])
ca.lines.append(association_line)
cas.append(ca)

def find_or_create_collapsed_association(self, subj_id, qualifiers, term, with_from, extensions):
query_header = {
Expand All @@ -69,25 +64,61 @@ def find_or_create_collapsed_association(self, subj_id, qualifiers, term, with_f
},
'object_extensions': extensions
}
if with_from:
query_header['evidence'] = {'with_support_from': sorted(with_from)}
if with_from and "header" in with_from:
query_header['evidence'] = {'with_support_from': sorted(with_from['header'])}
for ca in self.collapsed_associations:
if ca.header == query_header:
return ca
new_ca = CollapsedAssociation(query_header)
self.collapsed_associations.append(new_ca)
return new_ca

def find_or_create_collapsed_associations(self, subj_id, qualifiers, term, with_froms, extensions):
cas = []
for wf in with_froms:
ca = self.find_or_create_collapsed_association(subj_id, qualifiers, term, wf, extensions)
cas.append(ca)
return cas

def __iter__(self):
return iter(self.collapsed_associations)

def get_with_froms(self, annot):
source_line = annot["source_line"]
vals = source_line.split("\t")
with_from_col = vals[6]
# Parse into array (by "|") of arrays (by ",")
with_from_ds = [] # The list of lists
for piped_with_from in with_from_col.split("|"):
# Will be bypassing ontobio ID validation? Let's try teaming up with ontobio functions!
split_line = SplitLine(line=source_line, values=vals, taxon="") # req'd for error reporting in ontobio?
validated_comma_with_froms = GPAD_PARSER.validate_pipe_separated_ids(piped_with_from, split_line,
empty_allowed=True, extra_delims=",")
with_from_ds.append(validated_comma_with_froms)

# Now arrange these into "header" and "line" values
eco_code = annot["evidence"]["type"]
term = annot["object"]["id"]
is_binding = eco_code == IPI_ECO_CODE and BINDING_ROOT in self.go_ontology.ancestors(term, reflexive=True)
if is_binding:
# Using GPI, check with_froms for taxon equivalency to subj_id
if self.gpi_entities:
subject_id = annot["subject"]["id"]
subject_entity = self.gpi_entities[subject_id]
values_separated = []
for wf in with_from_ds:
wf_separated = {
"header": [],
"line": []
}
for wf_id in wf:
wf_entity = self.gpi_entities.get(wf_id)
if wf_entity and wf_entity.get("taxon") == subject_entity["taxon"]:
wf_separated['header'].append(wf_id)
else:
wf_separated['line'].append(wf_id)
values_separated.append(wf_separated)
return values_separated
else:
# Everything is defaulted to header if no GPI available
return [{"header": wf} for wf in with_from_ds]
else:
# Everything is defaulted to line if not binding
return [{"line": wf} for wf in with_from_ds]


class CollapsedAssociation:
def __init__(self, header):
Expand Down Expand Up @@ -168,23 +199,6 @@ def get_annot_extensions(annot):
return {}


def get_with_froms(annot):
source_line = annot["source_line"]
vals = source_line.split("\t")
with_from_col = vals[6]
# Parse into array (by "|") of arrays (by ",")
with_from_ds = []
for piped_with_from in with_from_col.split("|"):
# Will be bypassing ontobio ID validation? Let's try teaming up with ontobio functions!
split_line = SplitLine(line=source_line, values=vals, taxon="") # req'd for error reporting in ontobio?
validated_comma_with_froms = GPAD_PARSER.validate_pipe_separated_ids(piped_with_from, split_line, empty_allowed=True, extra_delims=",")
# comma_with_froms = piped_with_from.split(",")
# validated_comma_with_froms = []
# for wf in comma_with_froms:
with_from_ds.append(validated_comma_with_froms)
return with_from_ds


def extract_properties_from_string(prop_col):
props = prop_col.split("|")
props_dict = {}
Expand Down

0 comments on commit 02ef984

Please sign in to comment.