From 85b0fd6d8797fe436f044061e5d38ccfdb3a79f5 Mon Sep 17 00:00:00 2001 From: Daniel Sotirhos Date: Wed, 30 Oct 2024 11:03:18 -0700 Subject: [PATCH] [r] Support for AnVIL duos_id (#6620) --- src/azul/plugins/metadata/anvil/__init__.py | 5 +++++ src/azul/plugins/metadata/anvil/indexer/transform.py | 1 + src/azul/plugins/metadata/anvil/service/response.py | 1 + src/azul/plugins/repository/tdr_anvil/__init__.py | 3 ++- .../2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json | 1 + test/indexer/test_anvil.py | 8 ++++++-- test/service/data/verbatim/anvil/pfb_entities.json | 2 ++ test/service/data/verbatim/anvil/pfb_schema.json | 8 ++++++++ test/service/test_manifest.py | 6 ++++++ 9 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/azul/plugins/metadata/anvil/__init__.py b/src/azul/plugins/metadata/anvil/__init__.py index b3fe44532..afd514814 100644 --- a/src/azul/plugins/metadata/anvil/__init__.py +++ b/src/azul/plugins/metadata/anvil/__init__.py @@ -159,6 +159,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'registered_identifier', 'title', 'data_modality', + 'duos_id', ] }, 'donors': { @@ -345,6 +346,10 @@ def verbatim_pfb_schema(self, is_polymorphic=is_duos_type) ] if is_duos_type: + field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name, + column_name='duos_id', + anvil_datatype='string', + is_polymorphic=True)) field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name, column_name='description', anvil_datatype='string', diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py index 858899bd5..6cd29b6cc 100644 --- a/src/azul/plugins/metadata/anvil/indexer/transform.py +++ b/src/azul/plugins/metadata/anvil/indexer/transform.py @@ -491,6 +491,7 @@ def _duos_types(cls) -> FieldTypes: return { 'document_id': null_str, 'description': null_str, + 'duos_id': null_str, } def _duos(self, dataset: EntityReference) -> MutableJSON: diff --git a/src/azul/plugins/metadata/anvil/service/response.py b/src/azul/plugins/metadata/anvil/service/response.py index 8d0be3c12..6175bd647 100644 --- a/src/azul/plugins/metadata/anvil/service/response.py +++ b/src/azul/plugins/metadata/anvil/service/response.py @@ -210,6 +210,7 @@ def _non_pivotal_fields_by_entity_type(self) -> dict[str, set[str]]: }, 'datasets': { 'dataset_id', + 'duos_id', 'title' }, 'diagnoses': { diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py index 3d7afc2b9..de63bcdb0 100644 --- a/src/azul/plugins/repository/tdr_anvil/__init__.py +++ b/src/azul/plugins/repository/tdr_anvil/__init__.py @@ -328,6 +328,7 @@ def _supplementary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBund def _duos_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle: duos_info = self.tdr.get_duos(bundle_fqid.source) + duos_id = None if duos_info is None else one(duos_info['consentGroups'])['datasetIdentifier'] description = None if duos_info is None else duos_info.get('studyDescription') entity_id = change_version(bundle_fqid.uuid, self.bundle_uuid_version, @@ -337,7 +338,7 @@ def _duos_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle: bundle = TDRAnvilBundle(fqid=bundle_fqid) bundle.add_entity(entity=entity, version=self._version, - row={'description': description}) + row={'duos_id': duos_id, 'description': description}) return bundle def _bundle_entity(self, bundle_fqid: TDRAnvilBundleFQID) -> KeyReference: diff --git a/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json b/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json index 7da9d5f3c..42c50467f 100644 --- a/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json +++ b/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json @@ -2,6 +2,7 @@ "entities": { "anvil_dataset/2370f948-2783-4eb6-afea-e022897f4dcf": { "description": "Study description from DUOS", + "duos_id": "DUOS-000000", "version": "2022-06-01T00:00:00.000000Z" } }, diff --git a/test/indexer/test_anvil.py b/test/indexer/test_anvil.py index 263f833d1..a4d9d24e5 100644 --- a/test/indexer/test_anvil.py +++ b/test/indexer/test_anvil.py @@ -75,7 +75,7 @@ def setUpClass(cls) -> None: mock_duos_url = furl('https:://mock_duos.lan') - duos_id = 'foo' + duos_id = 'DUOS-000000' duos_description = 'Study description from DUOS' @classmethod @@ -93,6 +93,9 @@ def _patch_duos(cls) -> None: } })), Mock(spec=HTTPResponse, status=200, data=json.dumps({ + 'consentGroups': [{ + 'datasetIdentifier': cls.duos_id + }], 'studyDescription': cls.duos_description })) ])) @@ -226,8 +229,9 @@ def test_dataset_description(self): # These fields are populated only in the primary bundle self.assertEqual(dataset_ref.entity_id, contents['document_id']) self.assertEqual(['phs000693'], contents['registered_identifier']) - # This field is populated only in the DUOS bundle + # These fields are populated only in the DUOS bundle self.assertEqual('Study description from DUOS', contents['description']) + self.assertEqual('DUOS-000000', contents['duos_id']) else: self.fail(qualifier) self.assertDictEqual(doc_counts, { diff --git a/test/service/data/verbatim/anvil/pfb_entities.json b/test/service/data/verbatim/anvil/pfb_entities.json index 29d14d974..7c24f88c6 100644 --- a/test/service/data/verbatim/anvil/pfb_entities.json +++ b/test/service/data/verbatim/anvil/pfb_entities.json @@ -103,6 +103,7 @@ "datarepo_row_id": null, "dataset_id": null, "description": "Study description from DUOS", + "duos_id": "DUOS-000000", "owner": null, "principal_investigator": null, "registered_identifier": null, @@ -265,6 +266,7 @@ "datarepo_row_id": "2370f948-2783-4eb6-afea-e022897f4dcf", "dataset_id": "52ee7665-7033-63f2-a8d9-ce8e32666739", "description": null, + "duos_id": null, "owner": [ "Debbie Nickerson" ], diff --git a/test/service/data/verbatim/anvil/pfb_schema.json b/test/service/data/verbatim/anvil/pfb_schema.json index 07aee95c5..d27626eff 100644 --- a/test/service/data/verbatim/anvil/pfb_schema.json +++ b/test/service/data/verbatim/anvil/pfb_schema.json @@ -560,6 +560,14 @@ "string" ] }, + { + "name": "duos_id", + "namespace": "anvil_dataset", + "type": [ + "null", + "string" + ] + }, { "name": "owner", "namespace": "anvil_dataset", diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py index f57b88d01..3e4aa3086 100644 --- a/test/service/test_manifest.py +++ b/test/service/test_manifest.py @@ -1799,6 +1799,12 @@ def test_compact_manifest(self): '', '' ), + ( + 'datasets.duos_id', + '', + '', + '', + ), ( 'donors.document_id', '',