From 629688bfeb7c5d325cd4f8ea6465f87a4beffad4 Mon Sep 17 00:00:00 2001 From: claravox Date: Tue, 8 Oct 2024 11:29:14 +0200 Subject: [PATCH] check avu unit test --- integration_tests.py | 8 ++--- troubleshoot_data.py | 52 +++++---------------------- unit-tests/test_util_misc.py | 70 ++++++++++++++++++++++++++++++++++-- util/misc.py | 56 ++++++++++++++++++++++++++++- 4 files changed, 134 insertions(+), 52 deletions(-) diff --git a/integration_tests.py b/integration_tests.py index abfa859c9..f345b5c58 100644 --- a/integration_tests.py +++ b/integration_tests.py @@ -350,16 +350,16 @@ def _test_folder_secure_func(ctx, func): "test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "", "size", 0, 3), "check": lambda x: x == 3}, {"name": "msvc.json_objops.add_notexist_empty", - "test": lambda ctx: _call_msvc_json_objops(ctx, '', msi.kvpair(ctx, "e", "f"), 'add', 0), + "test": lambda ctx: _call_msvc_json_objops(ctx, '', msi.kvpair(ctx, "e", "f"), 'add', 0), "check": lambda x: x == '{"e": "f"}'}, {"name": "msvc.json_objops.add_notexist_nonempty", - "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "f"), 'add', 0), + "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "f"), 'add', 0), "check": lambda x: x == '{"a": "b", "e": "f"}'}, {"name": "msvc.json_objops.add_exist_nonempty", - "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "g"), 'add', 0), + "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "g"), 'add', 0), "check": lambda x: x == '{"a": "b", "e": "g"}'}, {"name": "msvc.json_objops.get_exist", - "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", ""), 'get', 1), + "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", ""), 'get', 1), "check": lambda x: str(x) == "(['c'], ['d'])"}, {"name": "msvc.json_objops.get_notexist", "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "e", ""), 'get', 1), diff --git a/troubleshoot_data.py b/troubleshoot_data.py index e33fa731c..8c1a88dc5 100644 --- a/troubleshoot_data.py +++ b/troubleshoot_data.py @@ -75,62 +75,26 @@ def find_data_packages(ctx, write_stdout): return [] -def check_data_package_system_avus(ctx, data_package, write_stdout): +def check_print_data_package_system_avus(ctx, data_package, write_stdout): """ Checks whether a data package has the expected system AVUs that start with constants.UUORGMETADATAPREFIX (i.e, 'org_'). This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from a successfully published data package. + This also prints if there are any missing or unexpected results. :param ctx: Combined type of a callback and rei struct :param data_package: String representing the data package collection path. :param write_stdout: A boolean representing whether to write to stdout or rodsLog - :returns: A tuple containing boolean results of checking results + :returns: A 2-tuple containing boolean results of checking results """ + extracted_avus = avu.of_coll(ctx, data_package) + results = misc.check_data_package_system_avus(extracted_avus) - # Fetch AVUs of the data package and filter those starting with 'org_' - extracted_avus = {m.attr for m in avu.of_coll(ctx, data_package) if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_')} - - # Define the set of ground truth AVUs - avu_names_suffix = [ - 'publication_approval_actor', 'publication_randomId', - 'publication_versionDOI', 'publication_dataCiteJsonPath', 'publication_license', - 'publication_anonymousAccess', 'publication_versionDOIMinted', - 'publication_accessRestriction', 'publication_landingPagePath', - 'publication_licenseUri', 'publication_publicationDate', - 'publication_vaultPackage', 'publication_submission_actor', 'publication_status', - 'publication_lastModifiedDateTime', 'publication_combiJsonPath', - 'publication_landingPageUploaded', 'publication_oaiUploaded', - 'publication_landingPageUrl', 'publication_dataCiteMetadataPosted' - ] - - # Define set of AVUs with more than one version of publication - avu_names_base_suffix = [ - 'publication_previous_version', 'publication_baseDOI', 'publication_baseRandomId', - 'publication_baseDOIMinted' - ] - - if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_avus: - combined_avu_names_suffix = avu_names_base_suffix + avu_names_suffix - ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix} - else: - ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in avu_names_suffix} - - # Find missing and unexpected AVUs - missing_avus = ground_truth_avus - extracted_avus - unexpected_avus = extracted_avus - ground_truth_avus - - results = { - 'no_missing_avus': not bool(missing_avus), - 'missing_avus': list(missing_avus), - 'no_unexpected_avus': not bool(unexpected_avus), - 'unexpected_avus': list(unexpected_avus) - } - - if missing_avus: + if not results["no_missing_avus"]: log.write(ctx, "check_data_package_system_avus: There are some missing AVUs in data package <{}> - {}".format(data_package, list(missing_avus)), write_stdout) - if unexpected_avus: + if not results["no_unexpected_avus"]: log.write(ctx, "check_data_package_system_avus: There are some unexpected AVUs in data package <{}> - {}".format(data_package, list(unexpected_avus)), write_stdout) return (results["no_missing_avus"], results["no_unexpected_avus"]) @@ -410,7 +374,7 @@ def batch_troubleshoot_published_data_packages(ctx, requested_package, log_file, schema_check_dict = vault_metadata_matches_schema(ctx, data_package, schema_cache, "troubleshoot-publications", write_stdout) result['schema_check'] = schema_check_dict['match_schema'] if schema_check_dict else False - result['no_missing_AVUs_check'], result['no_unexpected_AVUs_check'] = check_data_package_system_avus(ctx, data_package, write_stdout) + result['no_missing_AVUs_check'], result['no_unexpected_AVUs_check'] = check_print_data_package_system_avus(ctx, data_package, write_stdout) result['versionDOI_check'], result['baseDOI_check'] = check_datacite_doi_registration(ctx, data_package, offline, write_stdout) result['landingPage_check'] = check_landingpage(ctx, data_package, offline, api_call) publication_config = get_publication_config(ctx) diff --git a/unit-tests/test_util_misc.py b/unit-tests/test_util_misc.py index cddbe5fcd..72571daa5 100644 --- a/unit-tests/test_util_misc.py +++ b/unit-tests/test_util_misc.py @@ -6,19 +6,83 @@ import sys import time -from collections import OrderedDict +from collections import namedtuple, OrderedDict from unittest import TestCase sys.path.append('../util') -from misc import human_readable_size, last_run_time_acceptable, remove_empty_objects +from misc import check_data_package_system_avus, human_readable_size, last_run_time_acceptable, remove_empty_objects + +avs_success_data_package = { + "org_publication_accessRestriction": "Open - freely retrievable", + "org_publication_anonymousAccess": "yes", + "org_publication_approval_actor": "datamanager#tempZone", + "org_publication_combiJsonPath": "/tempZone/yoda/publication/ICGVFV-combi.json", + "org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/ICGVFV-dataCite.json", + "org_publication_dataCiteMetadataPosted": "yes", + "org_publication_landingPagePath": "/tempZone/yoda/publication/ICGVFV.html", + "org_publication_landingPageUploaded": "yes", + "org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/ICGVFV.html", + "org_publication_lastModifiedDateTime": "2024-10-04T15:32:46.000000", + "org_publication_license": "Creative Commons Attribution 4.0 International Public License", + "org_publication_licenseUri": "https://creativecommons.org/licenses/by/4.0/legalcode", + "org_publication_oaiUploaded": "yes", + "org_publication_publicationDate": "2024-10-04T15:33:17.853806", + "org_publication_randomId": "ICGVFV", + "org_publication_status": "OK", + "org_publication_submission_actor": "researcher#tempZone", + "org_publication_vaultPackage": "/tempZone/home/vault-default-3/research-default-3[1728048679]", + "org_publication_versionDOI": "10.00012/UU01-ICGVFV", + "org_publication_versionDOIMinted": "yes", +} +Avu = namedtuple('Avu', list('avu')) +Avu.attr = Avu.a +Avu.value = Avu.v +Avu.unit = Avu.u class UtilMiscTest(TestCase): + def test_check_data_package_system_avus(self): + # Success + avs = avs_success_data_package + avus_success = [Avu(attr, val, "") for attr, val in avs.items()] + result = check_data_package_system_avus(avus_success) + self.assertTrue(result['no_missing_avus']) + self.assertTrue(result['no_unexpected_avus']) + self.assertTrue(len(result['missing_avus']) == 0) + self.assertTrue(len(result['unexpected_avus']) == 0) + + # Unexpected + avs['org_publication_userAddedSomethingWeird'] = "yodayoda:)" + avus_unexpected = [Avu(attr, val, "") for attr, val in avs.items()] + result = check_data_package_system_avus(avus_unexpected) + self.assertTrue(result['no_missing_avus']) + self.assertFalse(result['no_unexpected_avus']) + self.assertTrue(len(result['missing_avus']) == 0) + self.assertTrue(len(result['unexpected_avus']) == 1) + + # Missing and unexpected + del avs['org_publication_landingPagePath'] + avus_missing_unexpected = [Avu(attr, val, "") for attr, val in avs.items()] + result = check_data_package_system_avus(avus_missing_unexpected) + self.assertFalse(result['no_missing_avus']) + self.assertFalse(result['no_unexpected_avus']) + self.assertTrue(len(result['missing_avus']) == 1) + self.assertTrue(len(result['unexpected_avus']) == 1) + + # Missing + del avs['org_publication_userAddedSomethingWeird'] + avus_missing = [Avu(attr, val, "") for attr, val in avs.items()] + result = check_data_package_system_avus(avus_missing) + self.assertFalse(result['no_missing_avus']) + self.assertTrue(result['no_unexpected_avus']) + self.assertTrue(len(result['missing_avus']) == 1) + self.assertTrue(len(result['unexpected_avus']) == 0) + def test_last_run_time_acceptable(self): """Test the last run time for copy to vault""" - # No last run time (job hasn't be tried before) + # No last run time (job hasn't been tried before) found = False last_run = 1 self.assertEqual(last_run_time_acceptable("b", found, last_run, 300), True) diff --git a/util/misc.py b/util/misc.py index 12df2a0af..57643d623 100644 --- a/util/misc.py +++ b/util/misc.py @@ -8,6 +8,60 @@ import time from collections import OrderedDict +import constants + + +def check_data_package_system_avus(extracted_avus): + """ + Checks whether a data package has the expected system AVUs that start with constants.UUORGMETADATAPREFIX (i.e, 'org_'). + This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from + a successfully published data package. + + :param extracted_avus: AVUs of the data package + + :returns: Dictionary of the results of the check + """ + # Filter those starting with 'org_' + extracted_avus = {m.attr for m in extracted_avus if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_')} + + # Define the set of ground truth AVUs + avu_names_suffix = [ + 'publication_approval_actor', 'publication_randomId', + 'publication_versionDOI', 'publication_dataCiteJsonPath', 'publication_license', + 'publication_anonymousAccess', 'publication_versionDOIMinted', + 'publication_accessRestriction', 'publication_landingPagePath', + 'publication_licenseUri', 'publication_publicationDate', + 'publication_vaultPackage', 'publication_submission_actor', 'publication_status', + 'publication_lastModifiedDateTime', 'publication_combiJsonPath', + 'publication_landingPageUploaded', 'publication_oaiUploaded', + 'publication_landingPageUrl', 'publication_dataCiteMetadataPosted' + ] + + # Define set of AVUs with more than one version of publication + avu_names_base_suffix = [ + 'publication_previous_version', 'publication_baseDOI', 'publication_baseRandomId', + 'publication_baseDOIMinted' + ] + + if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_avus: + combined_avu_names_suffix = avu_names_base_suffix + avu_names_suffix + ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix} + else: + ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in avu_names_suffix} + + # Find missing and unexpected AVUs + missing_avus = ground_truth_avus - extracted_avus + unexpected_avus = extracted_avus - ground_truth_avus + + results = { + 'no_missing_avus': not bool(missing_avus), + 'missing_avus': list(missing_avus), + 'no_unexpected_avus': not bool(unexpected_avus), + 'unexpected_avus': list(unexpected_avus) + } + + return results + def last_run_time_acceptable(coll, found, last_run, config_backoff_time): """Return whether the last run time is acceptable to continue with task.""" @@ -48,5 +102,5 @@ def remove_empty_objects(d): # Clean lists by filtering out empty objects. return [remove_empty_objects(item) for item in d if remove_empty_objects(item) not in (None, '', {}, [])] else: - # Return the value abecause it is not a dict or list. + # Return the value because it is not a dict or list. return d