Skip to content

Commit

Permalink
[r a] Support for HCA duos_id (#6196)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsotirho-ucsc committed Feb 13, 2025
1 parent bbd077a commit 6cbefcc
Show file tree
Hide file tree
Showing 11 changed files with 210 additions and 47 deletions.
2 changes: 1 addition & 1 deletion lambdas/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
# changes and reset the minor version to zero. Otherwise, increment only
# the minor version for backwards compatible changes. A backwards
# compatible change is one that does not require updates to clients.
'version': '12.2',
'version': '12.3',
'description': fd(f'''
# Overview
Expand Down
131 changes: 123 additions & 8 deletions lambdas/service/openapi.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'is_tissue_atlas_project': 'isTissueAtlasProject',
'tissue_atlas': 'tissueAtlas',
'bionetwork_name': 'bionetworkName',
'data_use_restriction': 'dataUseRestriction'
'data_use_restriction': 'dataUseRestriction',
'duos_id': 'duosId'
},
'sequencing_protocols': {
'instrument_manufacturer_model': 'instrumentManufacturerModel',
Expand Down
6 changes: 4 additions & 2 deletions src/azul/plugins/metadata/hca/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,8 @@ def _project_types(cls) -> FieldTypes:
'tissue_atlas': [tissue_atlas],
'bionetwork_name': [null_str],
'estimated_cell_count': null_int,
'data_use_restriction': null_str
'data_use_restriction': null_str,
'duos_id': null_str
}

def _project(self, project: api.Project) -> MutableJSON:
Expand Down Expand Up @@ -721,7 +722,8 @@ def _project(self, project: api.Project) -> MutableJSON:
'tissue_atlas': list(map(self._tissue_atlas, project.bionetworks)),
'bionetwork_name': sorted(bionetwork.name for bionetwork in project.bionetworks),
'estimated_cell_count': project.estimated_cell_count,
'data_use_restriction': project.data_use_restriction
'data_use_restriction': project.data_use_restriction,
'duos_id': project.duos_id
}

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/hca/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,8 @@ def make_projects(self, entry) -> MutableJSONs:
'isTissueAtlasProject': project['is_tissue_atlas_project'],
'tissueAtlas': project.get('tissue_atlas'),
'bionetworkName': project['bionetwork_name'],
'dataUseRestriction': project.get('data_use_restriction')
'dataUseRestriction': project.get('data_use_restriction'),
'duosId': project.get('duos_id')
}
if self.entity_type == 'projects':
translated_project['projectDescription'] = project.get('project_description', [])
Expand Down
2 changes: 2 additions & 0 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ class Project(Entity):
estimated_cell_count: int | None
bionetworks: OrderedSet[Bionetwork]
data_use_restriction: str | None
duos_id: str | None

def __init__(self, json: JSON) -> None:
super().__init__(json)
Expand All @@ -339,6 +340,7 @@ def __init__(self, json: JSON) -> None:
for bionetwork in content.get('hca_bionetworks', ())
if bionetwork)
self.data_use_restriction = content.get('data_use_restriction')
self.duos_id = content.get('duos_id')

def _accessions(self, namespace: str) -> set[str]:
return {a.accession for a in self.accessions if a.namespace == namespace}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions test/service/data/manifest/terra/pfb_entities.json
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@
"document_id": [
"6615efae-fca8-4dd2-a223-9cfcf30fe94d"
],
"duos_id": [
null
],
"estimated_cell_count": null,
"institutions": [
"Fake Institution"
Expand Down Expand Up @@ -864,6 +867,9 @@
"document_id": [
"e8642221-4c2c-4fd7-b926-a68bce363c88"
],
"duos_id": [
null
],
"estimated_cell_count": null,
"institutions": [
"Farmers Trucks",
Expand Down Expand Up @@ -3305,6 +3311,9 @@
"document_id": [
"90bf705c-d891-5ce2-aa54-094488b445c6"
],
"duos_id": [
null
],
"estimated_cell_count": 10000,
"institutions": [
"Newcastle University"
Expand Down
11 changes: 11 additions & 0 deletions test/service/data/manifest/terra/pfb_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1999,6 +1999,17 @@
],
"type": "array"
}
},
{
"name": "duos_id",
"namespace": "projects",
"type": {
"items": [
"null",
"string"
],
"type": "array"
}
}
],
"name": "projects",
Expand Down
3 changes: 2 additions & 1 deletion test/service/test_index_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ def assert_file_type_summaries(hit):
'isTissueAtlasProject',
'bionetworkName',
'estimatedCellCount',
'dataUseRestriction'
'dataUseRestriction',
'duosId'
}
response_json = get_response_json()
self.assertIn('hits', response_json)
Expand Down
51 changes: 30 additions & 21 deletions test/service/test_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ def test_response_stage_files(self):
"tissueAtlas": [],
"estimatedCellCount": None,
"dataUseRestriction": [None],
"duosId": [None],
}
],
"protocols": [
Expand Down Expand Up @@ -586,6 +587,7 @@ def test_response_stage_projects(self):
"isTissueAtlasProject": False,
"accessions": [],
"dataUseRestriction": None,
"duosId": None,
}
],
"protocols": [
Expand Down Expand Up @@ -850,6 +852,7 @@ def test_response_stage_projects_accessions(self):
{"namespace": "insdc_study", "accession": "PRJNA000000"},
],
"dataUseRestriction": None,
"duosId": None,
}
],
"protocols": [
Expand Down Expand Up @@ -3499,27 +3502,33 @@ def test_projects_response(self):
}
self.assertEqual({None: 2, 'Lung': 1, 'Retina': 1, 'Blood': 1}, terms)

def test_data_use_restriction(self):
field, value = 'dataUseRestriction', 'NRES'
params = {
'catalog': self.catalog,
'sort': field,
'filters': json.dumps({field: {'is': [value]}})
}
plugin = self.index_service.metadata_plugin(self.catalog)
for entity_type in plugin.exposed_indices:
url = self.base_url.set(path=('index', entity_type), args=params)
response = requests.get(url)
response.raise_for_status()
response = response.json()
facets = response['termFacets']
terms = {term['term'] for term in facets[field]['terms']}
self.assertEqual({None, value}, terms)
hits = response['hits']
self.assertGreater(len(hits), 0)
expected = value if entity_type == 'projects' else [value]
for hit in hits:
self.assertEqual(expected, one(hit['projects'])[field])
def test_data_use_and_duos_id(self):
test_data = [
('dataUseRestriction', 'GRU'),
('duosId', 'DUOS-999999')
]
for field, value in test_data:
with self.subTest(field=field):
params = {
'catalog': self.catalog,
'sort': field,
'filters': json.dumps({field: {'is': [value]}})
}
plugin = self.index_service.metadata_plugin(self.catalog)
for entity_type in plugin.exposed_indices:
url = self.base_url.set(path=('index', entity_type), args=params)
response = requests.get(url)
response.raise_for_status()
response = response.json()
if field != 'duosId':
facets = response['termFacets']
terms = {term['term'] for term in facets[field]['terms']}
self.assertEqual({None, value}, terms)
hits = response['hits']
self.assertGreater(len(hits), 0)
expected = value if entity_type == 'projects' else [value]
for hit in hits:
self.assertEqual(expected, one(hit['projects'])[field])


class TestUnpopulatedIndexResponse(IndexResponseTestCase):
Expand Down

0 comments on commit 6cbefcc

Please sign in to comment.