From 86103cefeccd48bab82eb865575348852798a51a Mon Sep 17 00:00:00 2001 From: dustine32 Date: Wed, 24 Jun 2020 15:34:42 -0700 Subject: [PATCH] Add GPI option to gpad2gocams for geneontology/gocamgen#83 --- bin/validate.py | 11 +++--- ontobio/rdfgen/gocamgen/gocam_builder.py | 48 +++++++++++++++++++++--- ontobio/rdfgen/gocamgen/gocamgen.py | 2 + 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/bin/validate.py b/bin/validate.py index 1c9f5612..30d0e824 100644 --- a/bin/validate.py +++ b/bin/validate.py @@ -13,6 +13,7 @@ import logging import sys import traceback +import subprocess import yamldown @@ -547,10 +548,11 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base @cli.command() @click.pass_context @click.option("--gpad_path", "-g", type=click.Path(), required=True) +@click.option("--gpi_path", "-i", type=click.Path(), required=True) @click.option("--target", "-t", type=click.Path(), required=True) @click.option("--ontology", "-o", type=click.Path(exists=True), required=False) -# Eventually will need access to GPI as well for getting taxon ID - should exist by now - get path from metadata -def gpad2gocams(ctx, gpad_path, target, ontology): +# Eventually will need access to GPI as well for getting taxon ID - should exist by now +def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology): if gpad_path.endswith(".gz"): unzipped = os.path.splitext(gpad_path)[0] unzip(gpad_path, unzipped) @@ -562,14 +564,13 @@ def gpad2gocams(ctx, gpad_path, target, ontology): absolute_target = os.path.abspath(target) gpad_basename = os.path.basename(gpad_path) gpad_basename_root, gpad_ext = os.path.splitext(gpad_basename) - if gpad_ext in ["gpad", "gpa"]: + if gpad_ext in [".gpad", ".gpa"]: output_basename = gpad_basename_root + ".nq" else: output_basename = gpad_basename + ".nq" output_path = os.path.join(absolute_target, output_basename) - ontology_graph = OntologyFactory().create(ontology, ignore_cache=True) - builder = GoCamBuilder(ontology_graph) + builder = GoCamBuilder(ontology, gpi_file=gpi_path) for gene, associations in assocs_by_gene.items(): builder.make_model_and_add_to_store(gene, annotations=associations) diff --git a/ontobio/rdfgen/gocamgen/gocam_builder.py b/ontobio/rdfgen/gocamgen/gocam_builder.py index e1f83739..630d520c 100644 --- a/ontobio/rdfgen/gocamgen/gocam_builder.py +++ b/ontobio/rdfgen/gocamgen/gocam_builder.py @@ -6,6 +6,7 @@ from ontobio.rdfgen.gocamgen.utils import ShexException from ontobio.io.gpadparser import GpadParser from ontobio.io.assocparser import AssocParserConfig +from ontobio.io.entityparser import GpiParser from ontobio.ontol_factory import OntologyFactory from ontobio.util.go_utils import GoAspector import argparse @@ -16,6 +17,7 @@ import time import click from os import path +from typing import List # from abc import ABC, abstractmethod from rdflib.graph import ConjunctiveGraph from rdflib.store import Store @@ -40,24 +42,26 @@ class GoCamBuilder: - def __init__(self, ontology_graph=None): - # self.ro_ontology = OntologyFactory().create("http://purl.obolibrary.org/obo/ro.owl") - # self.gorel_ontology = OntologyFactory().create("http://release.geneontology.org/2019-03-18/ontology/extensions/gorel.obo") - # Can't get logical_definitions w/ ont.create("go"), need to load ontology via PURL - # self.ontology = OntologyFactory().create("http://purl.obolibrary.org/obo/go.owl") - if ontology_graph is None: + def __init__(self, ontology=None, gpi_file=None): + if ontology is None: ontology_graph = OntologyFactory().create("http://purl.obolibrary.org/obo/go/extensions/go-lego.owl") + else: + ontology_graph = OntologyFactory().create(ontology, ignore_cache=True) self.ontology = ontology_graph self.ro_ontology = self.extract_relations_ontology(self.ontology) + # self.ontology = OntologyFactory().create("http://purl.obolibrary.org/obo/go.owl") + # self.ro_ontology = OntologyFactory().create("http://purl.obolibrary.org/obo/ro.owl") self.aspector = GoAspector(self.ontology) self.store = plugin.get('IOMemory', Store)() self.errors = GeneErrorSet() # Errors by gene ID + self.gpi_entities = self.parse_gpi(gpi_file) def translate_to_model(self, gene, assocs): model = AssocGoCamModel(gene, assocs, store=self.store) model.ontology = self.ontology model.ro_ontology = self.ro_ontology model.go_aspector = self.aspector + model.gpi_entities = self.gpi_entities model.translate() return model @@ -120,6 +124,38 @@ def extract_relations_ontology(ontology_graph): ro_terms = ro_terms + ontology_graph.descendants(t, reflexive=True) return ontology_graph.subontology(nodes=ro_terms) + @staticmethod + def parse_gpi(gpi_file): + # { + # "id":"MGI:MGI:87853", + # "label":"a", + # "full_name":"nonagouti", + # "synonyms":[ + # "agouti", + # "As", + # "agouti signal protein", + # "ASP" + # ], + # "type":"gene", + # "parents":[ + # + # ], + # "xrefs":[ + # "UniProtKB:Q03288" + # ], + # "taxon":{ + # "id":"NCBITaxon:10090" + # } + # } + if gpi_file is None: + return None + parser = GpiParser() + gpi_entities = {} + entities = parser.parse(gpi_file) + for entity in entities: + gpi_entities[entity['id']] = entity + return gpi_entities + class AssocExtractor: def __init__(self, gpad_file, parser_config: AssocParserConfig = None): diff --git a/ontobio/rdfgen/gocamgen/gocamgen.py b/ontobio/rdfgen/gocamgen/gocamgen.py index 0a951239..6f611bf6 100644 --- a/ontobio/rdfgen/gocamgen/gocamgen.py +++ b/ontobio/rdfgen/gocamgen/gocamgen.py @@ -409,10 +409,12 @@ def __init__(self, modeltitle, assocs, connection_relations=None, store=None): self.go_aspector = None self.default_contributor = "http://orcid.org/0000-0002-6659-0416" self.graph.bind("GOREL", GOREL) # Because GOREL isn't in context.jsonld's + self.gpi_entities = None def translate(self): self.associations.go_ontology = self.ontology + self.associations.gpi_entities = self.gpi_entities self.associations.collapse_annotations() for a in self.associations: