Skip to content

Commit

Permalink
[r a] Support for HCA duos_id (#6196)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsotirho-ucsc committed Feb 26, 2025
1 parent 95762ae commit c5cad83
Show file tree
Hide file tree
Showing 12 changed files with 212 additions and 48 deletions.
2 changes: 1 addition & 1 deletion lambdas/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
# changes and reset the minor version to zero. Otherwise, increment only
# the minor version for backwards compatible changes. A backwards
# compatible change is one that does not require updates to clients.
'version': '12.2',
'version': '12.3',
'description': fd(f'''
# Overview
Expand Down
131 changes: 123 additions & 8 deletions lambdas/service/openapi.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'is_tissue_atlas_project': 'isTissueAtlasProject',
'tissue_atlas': 'tissueAtlas',
'bionetwork_name': 'bionetworkName',
'data_use_restriction': 'dataUseRestriction'
'data_use_restriction': 'dataUseRestriction',
'duos_id': 'duosId'
},
'sequencing_protocols': {
'instrument_manufacturer_model': 'instrumentManufacturerModel',
Expand Down
6 changes: 4 additions & 2 deletions src/azul/plugins/metadata/hca/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,8 @@ def _project_types(cls) -> FieldTypes:
'tissue_atlas': [tissue_atlas],
'bionetwork_name': [null_str],
'estimated_cell_count': null_int,
'data_use_restriction': null_str
'data_use_restriction': null_str,
'duos_id': null_str
}

def _project(self, project: api.Project) -> MutableJSON:
Expand Down Expand Up @@ -728,7 +729,8 @@ def _project(self, project: api.Project) -> MutableJSON:
'tissue_atlas': list(map(self._tissue_atlas, project.bionetworks)),
'bionetwork_name': sorted(bionetwork.name for bionetwork in project.bionetworks),
'estimated_cell_count': project.estimated_cell_count,
'data_use_restriction': project.data_use_restriction
'data_use_restriction': project.data_use_restriction,
'duos_id': project.duos_id
}

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/hca/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,8 @@ def make_projects(self, entry) -> MutableJSONs:
'isTissueAtlasProject': project['is_tissue_atlas_project'],
'tissueAtlas': project.get('tissue_atlas'),
'bionetworkName': project['bionetwork_name'],
'dataUseRestriction': project.get('data_use_restriction')
'dataUseRestriction': project.get('data_use_restriction'),
'duosId': project.get('duos_id')
}
if self.entity_type == 'projects':
translated_project['projectDescription'] = project.get('project_description', [])
Expand Down
2 changes: 2 additions & 0 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ class Project(Entity):
estimated_cell_count: int | None
bionetworks: OrderedSet[Bionetwork]
data_use_restriction: str | None
duos_id: str | None

def __init__(self, json: JSON) -> None:
super().__init__(json)
Expand All @@ -339,6 +340,7 @@ def __init__(self, json: JSON) -> None:
for bionetwork in content.get('hca_bionetworks', ())
if bionetwork)
self.data_use_restriction = content.get('data_use_restriction')
self.duos_id = content.get('duos_id')

def _accessions(self, namespace: str) -> set[str]:
return {a.accession for a in self.accessions if a.namespace == namespace}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions test/service/data/manifest/terra/pfb_entities.json
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@
"document_id": [
"6615efae-fca8-4dd2-a223-9cfcf30fe94d"
],
"duos_id": [
null
],
"estimated_cell_count": null,
"institutions": [
"Fake Institution"
Expand Down Expand Up @@ -864,6 +867,9 @@
"document_id": [
"e8642221-4c2c-4fd7-b926-a68bce363c88"
],
"duos_id": [
null
],
"estimated_cell_count": null,
"institutions": [
"Farmers Trucks",
Expand Down Expand Up @@ -3305,6 +3311,9 @@
"document_id": [
"90bf705c-d891-5ce2-aa54-094488b445c6"
],
"duos_id": [
null
],
"estimated_cell_count": 10000,
"institutions": [
"Newcastle University"
Expand Down
11 changes: 11 additions & 0 deletions test/service/data/manifest/terra/pfb_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1999,6 +1999,17 @@
],
"type": "array"
}
},
{
"name": "duos_id",
"namespace": "projects",
"type": {
"items": [
"null",
"string"
],
"type": "array"
}
}
],
"name": "projects",
Expand Down
3 changes: 2 additions & 1 deletion test/service/test_index_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ def assert_file_type_summaries(hit):
'isTissueAtlasProject',
'bionetworkName',
'estimatedCellCount',
'dataUseRestriction'
'dataUseRestriction',
'duosId'
}
response_json = get_response_json()
self.assertIn('hits', response_json)
Expand Down
51 changes: 30 additions & 21 deletions test/service/test_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def test_response_stage_files(self):
"tissueAtlas": [],
"estimatedCellCount": None,
"dataUseRestriction": [None],
"duosId": [None],
}
],
"protocols": [
Expand Down Expand Up @@ -588,6 +589,7 @@ def test_response_stage_projects(self):
"isTissueAtlasProject": False,
"accessions": [],
"dataUseRestriction": None,
"duosId": None,
}
],
"protocols": [
Expand Down Expand Up @@ -852,6 +854,7 @@ def test_response_stage_projects_accessions(self):
{"namespace": "insdc_study", "accession": "PRJNA000000"},
],
"dataUseRestriction": None,
"duosId": None,
}
],
"protocols": [
Expand Down Expand Up @@ -3501,27 +3504,33 @@ def test_projects_response(self):
}
self.assertEqual({None: 2, 'Lung': 1, 'Retina': 1, 'Blood': 1}, terms)

def test_data_use_restriction(self):
field, value = 'dataUseRestriction', 'NRES'
params = {
'catalog': self.catalog,
'sort': field,
'filters': json.dumps({field: {'is': [value]}})
}
plugin = self.index_service.metadata_plugin(self.catalog)
for entity_type in plugin.exposed_indices:
url = self.base_url.set(path=('index', entity_type), args=params)
response = requests.get(url)
response.raise_for_status()
response = response.json()
facets = response['termFacets']
terms = {term['term'] for term in facets[field]['terms']}
self.assertEqual({None, value}, terms)
hits = response['hits']
self.assertGreater(len(hits), 0)
expected = value if entity_type == 'projects' else [value]
for hit in hits:
self.assertEqual(expected, one(hit['projects'])[field])
def test_data_use_and_duos_id(self):
test_data = [
('dataUseRestriction', 'GRU'),
('duosId', 'DUOS-999999')
]
for field, value in test_data:
with self.subTest(field=field):
params = {
'catalog': self.catalog,
'sort': field,
'filters': json.dumps({field: {'is': [value]}})
}
plugin = self.index_service.metadata_plugin(self.catalog)
for entity_type in plugin.exposed_indices:
url = self.base_url.set(path=('index', entity_type), args=params)
response = requests.get(url)
response.raise_for_status()
response = response.json()
if field != 'duosId':
facets = response['termFacets']
terms = {term['term'] for term in facets[field]['terms']}
self.assertEqual({None, value}, terms)
hits = response['hits']
self.assertGreater(len(hits), 0)
expected = value if entity_type == 'projects' else [value]
for hit in hits:
self.assertEqual(expected, one(hit['projects'])[field])


class TestUnpopulatedIndexResponse(IndexResponseTestCase):
Expand Down

0 comments on commit c5cad83

Please sign in to comment.