From 985173c0dd53f63574868284680ef12999d36e3e Mon Sep 17 00:00:00 2001 From: selewis Date: Thu, 16 Nov 2017 14:23:04 -0800 Subject: [PATCH] enhancement to filter by the produced_by information issue #545 --- ontobio/golr/golr_associations.py | 8 +++----- ontobio/golr/golr_query.py | 15 +++++++++++++- tests/test_golr_associations.py | 33 ++++++++++++++++++++----------- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/ontobio/golr/golr_associations.py b/ontobio/golr/golr_associations.py index 0eb9d9de..95ad8eda 100644 --- a/ontobio/golr/golr_associations.py +++ b/ontobio/golr/golr_associations.py @@ -118,7 +118,7 @@ def top_species(**kwargs): else: d = fcs['subject_taxon'] return sorted(d.items(), key=lambda t: -t[1]) - + def bulk_fetch(subject_category, object_category, taxon, rows=MAX_ROWS, **kwargs): @@ -178,7 +178,7 @@ def pivot_query_as_matrix(facet=None, facet_pivot_fields=[], **kwargs): xlabels=set() ylabels=set() - + for r in results: logging.info("R={}".format(r)) xtype=r['field'] @@ -204,7 +204,7 @@ def pivot_query_as_matrix(facet=None, facet_pivot_fields=[], **kwargs): 'yaxis':ylabels, 'z':z} return m - + # TODO: unify this with the monarch-specific instance @@ -294,5 +294,3 @@ def get_homologs(gene, relation=HomologyTypes.Ortholog.value): object_category='gene', relation=relation, subject=gene) - - diff --git a/ontobio/golr/golr_query.py b/ontobio/golr/golr_query.py index 58b17cde..8f782dd3 100644 --- a/ontobio/golr/golr_query.py +++ b/ontobio/golr/golr_query.py @@ -45,7 +45,7 @@ class GolrFields: """ - Enumeration of fields in Golr. + Enumeration of fields in Monarch Golr. Note the Monarch golr schema is taken as canonical here """ @@ -427,6 +427,11 @@ class GolrAssociationQuery(GolrAbstractQuery): compact_associations contains a more compact representation consisting of objects with (subject, relation and objects) + provided_by: String + + indicates the original source that provided the assertions + will only return assertions from this source + config : Config See :ref:`Config` for details. The config object can be used @@ -471,6 +476,7 @@ def __init__(self, facet_mincount=1, facet_pivot_fields=[], facet_on = 'on', + provided_by=None, pivot_subject_object=False, unselect_evidence=False, rows=10, @@ -519,6 +525,7 @@ def __init__(self, self.facet_mincount=facet_mincount self.facet_pivot_fields=facet_pivot_fields self.facet_on=facet_on + self.provided_by=provided_by self.pivot_subject_object=pivot_subject_object self.unselect_evidence=unselect_evidence self.max_rows=100000 @@ -708,6 +715,12 @@ def solr_params(self): else: fq['evidence_object_closure'] = e + provided_by=self.provided_by + if provided_by is not None: + # want to be able to select by whoever provided this assertion see: + # Dataset URIs in metadata and in main tll dumps are not connected #545 + fq[M.IS_DEFINED_BY] = 'https://data.monarchinitiative.org/ttl/'+provided_by+'.ttl' + if self.exclude_automatic_assertions: fq['-evidence_object_closure'] = 'ECO:0000501' diff --git a/tests/test_golr_associations.py b/tests/test_golr_associations.py index 9dc0cf39..d17532fc 100644 --- a/tests/test_golr_associations.py +++ b/tests/test_golr_associations.py @@ -3,7 +3,6 @@ """ from ontobio.golr.golr_associations import search_associations, search_associations_compact, select_distinct_subjects, get_objects_for_subject, get_subjects_for_object - HUMAN_SHH = 'NCBIGene:6469' HOLOPROSENCEPHALY = 'HP:0001360' TWIST_ZFIN = 'ZFIN:ZDB-GENE-050417-357' @@ -47,7 +46,7 @@ def test_go_assocs_negated(): # we also place NOT as a qualifier neg_assocs2 = [a for a in assocs if 'not' in a['qualifiers']] assert len(neg_assocs2) > 0 - + def test_go_assocs_compact(): assocs = search_associations_compact(subject=TWIST_ZFIN, object_category='function' @@ -70,8 +69,8 @@ def test_go_assocs_compact(): print(" QUERY FOR {} -> {}".format(obj,a)) m = [a for a in rassocs if a['subject'] == TWIST_ZFIN] assert len(m) == 1 - - + + def test_pheno_assocs(): payload = search_associations(subject=TWIST_ZFIN, object_category='phenotype' @@ -92,7 +91,7 @@ def test_pheno_assocs_compact(): a = assocs[0] assert a['subject'] == TWIST_ZFIN assert 'ZP:0007631' in a['objects'] - + def test_pheno_objects(): results = search_associations(subject=TWIST_ZFIN, fetch_objects=True, @@ -103,7 +102,7 @@ def test_pheno_objects(): print(str(objs)) assert len(objs) > 1 assert 'ZP:0007631' in objs - + def test_func_objects(): results = search_associations(subject=TWIST_ZFIN, fetch_objects=True, @@ -114,7 +113,7 @@ def test_func_objects(): print(objs) assert DVPF in objs assert len(objs) > 1 - + def test_pheno_objects_shh_2(): """ Equivalent to above, using convenience method @@ -136,7 +135,7 @@ def test_pheno2gene(): print(len(subjs)) assert HUMAN_SHH in subjs assert len(subjs) > 50 - + def test_disease_assocs(): payload = search_associations(subject=HUMAN_SHH, object_category='disease' @@ -153,7 +152,7 @@ def test_disease2gene(): for a in assocs: print(str(a)) assert len(assocs) > 0 - + def test_species_facet(): payload = search_associations(subject_category='gene', object_category='phenotype', @@ -162,5 +161,17 @@ def test_species_facet(): fcs = payload['facet_counts'] print(str(fcs)) assert 'Homo sapiens' in fcs['subject_taxon_label'].keys() - - + +def test_provided_by(capsys): + with capsys.disabled(): + payload = search_associations(subject_category='gene', + object_category='gene', + subject='HGNC:10848', + relation_closure='RO:HOM0000017', + homology_type='O', + provided_by='zfin', + facet_fields=['is_defined_by'], + rows=100) + fcs = payload['facet_counts'] + print('FCS: {}'.format(fcs)) + assert 'https://data.monarchinitiative.org/ttl/zfin.ttl' in fcs['is_defined_by'].keys()