From 928eaa0c4722d6b3160cf1015cac46a588b270bf Mon Sep 17 00:00:00 2001 From: Galen Date: Wed, 13 Nov 2024 22:36:28 -0800 Subject: [PATCH 1/9] try to import an imported ri dt value as a single uuid --- arches/app/datatypes/datatypes.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index dce4591933..a4472e13de 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2219,6 +2219,18 @@ def get_search_terms(self, nodevalue, nodeid=None): return terms def transform_value_for_tile(self, value, **kwargs): + try: + uuid.UUID(value) + return [ + { + "resourceId": str(value), + "ontologyProperty": "", + "inverseOntologyProperty": "", + "resourceXresourceId": str(uuid.uuid4()), + } + ] + except ValueError: + print("not a uuid") try: return json.loads(value) except ValueError: From 66cee1c90301ba31acbddf1af14de1429fe62153 Mon Sep 17 00:00:00 2001 From: Galen Date: Mon, 25 Nov 2024 14:08:11 -0800 Subject: [PATCH 2/9] initial logic to handle uuid, legacyid, or dict input for r-i dt, re #11597 --- arches/app/datatypes/datatypes.py | 131 +++++++++++++++++++++++++----- 1 file changed, 110 insertions(+), 21 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index a4472e13de..f6a122f75f 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2219,30 +2219,119 @@ def get_search_terms(self, nodevalue, nodeid=None): return terms def transform_value_for_tile(self, value, **kwargs): - try: - uuid.UUID(value) - return [ - { - "resourceId": str(value), + # kwargs config looks like this: + # { + # "graphs": [ + # { + # "name": "Person or Group", + # "graphid": "ccbd1537-ac5e-11e6-84a5-026d961c88e6", + # "relationshipConcept": "6f26aa04-52af-4b17-a656-674c547ade2a", + # "relationshipCollection": "00000000-0000-0000-0000-000000000005", + # "useOntologyRelationship": False, + # "inverseRelationshipConcept": "6f26aa04-52af-4b17-a656-674c547ade2a" + # } + # ], + # "searchDsl": "", + # "searchString": "" + # } + from arches.app.search.search_engine_factory import SearchEngineFactory + + relatable_graphs = kwargs.get("graphs", []) + default_values_lookup = dict() + for graph in relatable_graphs: + if graph.get("useOntologyRelationship", False): + default_values_lookup[graph["graphid"]] = { "ontologyProperty": "", "inverseOntologyProperty": "", - "resourceXresourceId": str(uuid.uuid4()), } - ] - except ValueError: - print("not a uuid") - try: - return json.loads(value) - except ValueError: - # do this if json (invalid) is formatted with single quotes, re #6390 - try: - return ast.literal_eval(value) - except: - return value - except TypeError: - # data should come in as json but python list is accepted as well - if isinstance(value, list): - return value + else: + default_values_lookup[graph["graphid"]] = { + "ontologyProperty": graph["relationshipConcept"], + "inverseOntologyProperty": graph["inverseRelationshipConcept"], + } + + subtypes_dict = {"uuid": uuid.UUID, "dict": dict, "str": str} + + if isinstance(value, str): + for test_method in [uuid.UUID, json.loads, ast.literal_eval]: + try: + converted_value = test_method(value) + except: + converted_value = False + if converted_value is not False: + break + if converted_value is False: + return [] + else: + converted_value = value + + value_type = None + for value_subtype_label, value_subtype_class in list(subtypes_dict.items()): + if not isinstance(converted_value, list): + converted_value = [converted_value] + + if isinstance(converted_value[0], value_subtype_class): + value_type = value_subtype_label + break + + se = SearchEngineFactory().create() + query = Query(se) + query.include("graph_id") + boolquery = Bool() + transformed_value = [] + + match value_type: + case "str": # aka legacyid + # query the graphid associated with each resourceinstance.legacyid + boolquery.filter(Terms(field="legacyid", terms=converted_value)) + query.add_query(boolquery) + results = query.search(index=RESOURCES_INDEX) + for hit in results["hits"]["hits"]: + resource_instance_object = {} + resource_instance_object["resourceId"] = hit["_id"] + resource_instance_object["ontologyProperty"] = ( + default_values_lookup[hit["_source"]["graph_id"]][ + "ontologyProperty" + ] + ) + resource_instance_object["inverseOntologyProperty"] = ( + default_values_lookup[hit["_source"]["graph_id"]][ + "inverseOntologyProperty" + ] + ) + resource_instance_object["resourceXresourceId"] = str(uuid.uuid4()) + transformed_value.append(resource_instance_object) + + case "uuid": + # query the graphid associated with each resourceinstance.resourceinstanceid + results = query.search( + index=RESOURCES_INDEX, id=[str(val) for val in converted_value] + ) + for hit in results["hits"]["hits"]: + resource_instance_object = {} + resource_instance_object["resourceId"] = hit["_id"] + resource_instance_object["ontologyProperty"] = ( + default_values_lookup[hit["_source"]["graph_id"]][ + "ontologyProperty" + ] + ) + resource_instance_object["inverseOntologyProperty"] = ( + default_values_lookup[hit["_source"]["graph_id"]][ + "inverseOntologyProperty" + ] + ) + resource_instance_object["resourceXresourceId"] = str(uuid.uuid4()) + transformed_value.append(resource_instance_object) + + case "dict": # assume data correctly formatted + for val in converted_value: + try: + uuid.UUID(val["resourceId"]) + except: + continue + transformed_value.append(val) + + return transformed_value def transform_export_values(self, value, *args, **kwargs): return json.dumps(value) From 2a19dfcbb269cc092acf57cd46f1324d8388cb9e Mon Sep 17 00:00:00 2001 From: Galen Date: Mon, 25 Nov 2024 21:13:48 -0800 Subject: [PATCH 3/9] handle for regular str objects, re #11597 --- arches/app/datatypes/datatypes.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index f6a122f75f..c97d56f1c9 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2256,11 +2256,14 @@ def transform_value_for_tile(self, value, **kwargs): for test_method in [uuid.UUID, json.loads, ast.literal_eval]: try: converted_value = test_method(value) + break except: converted_value = False - if converted_value is not False: - break - if converted_value is False: + + if converted_value is False and value != "": + converted_value = value # is a string, likely legacyid + elif converted_value is False: + logger.warning("ResourceInstanceDataType: value is empty") return [] else: converted_value = value From 2e89cf57ff1464e11291a4a96ceda804736cda51 Mon Sep 17 00:00:00 2001 From: Galen Date: Tue, 26 Nov 2024 12:29:53 -0800 Subject: [PATCH 4/9] ensure exact match on legacyid using keyword, re #11597 --- arches/app/datatypes/datatypes.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index c97d56f1c9..5fb3da4221 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2269,10 +2269,9 @@ def transform_value_for_tile(self, value, **kwargs): converted_value = value value_type = None + if not isinstance(converted_value, list): + converted_value = [converted_value] for value_subtype_label, value_subtype_class in list(subtypes_dict.items()): - if not isinstance(converted_value, list): - converted_value = [converted_value] - if isinstance(converted_value[0], value_subtype_class): value_type = value_subtype_label break @@ -2286,7 +2285,9 @@ def transform_value_for_tile(self, value, **kwargs): match value_type: case "str": # aka legacyid # query the graphid associated with each resourceinstance.legacyid - boolquery.filter(Terms(field="legacyid", terms=converted_value)) + boolquery.must( + Terms(field="legacyid.keyword", terms=converted_value) + ) # exact match on keyword query.add_query(boolquery) results = query.search(index=RESOURCES_INDEX) for hit in results["hits"]["hits"]: From 422c272d177894d8b043b56da46856ce6ed9999b Mon Sep 17 00:00:00 2001 From: Galen Date: Tue, 26 Nov 2024 15:56:53 -0800 Subject: [PATCH 5/9] ensure to split comma-separated strings, re #11597 --- arches/app/datatypes/datatypes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 5fb3da4221..d0fff3a783 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2261,7 +2261,8 @@ def transform_value_for_tile(self, value, **kwargs): converted_value = False if converted_value is False and value != "": - converted_value = value # is a string, likely legacyid + converted_value = value.split(",") # is a string, likely legacyid + converted_value = [val.strip() for val in converted_value if val] elif converted_value is False: logger.warning("ResourceInstanceDataType: value is empty") return [] From 8a1521d7a90604992781c4e46110257f318ba8a8 Mon Sep 17 00:00:00 2001 From: Galen Date: Tue, 26 Nov 2024 16:22:28 -0800 Subject: [PATCH 6/9] refactor some duplicate code, re #11597 --- arches/app/datatypes/datatypes.py | 52 ++++++++++++++----------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index d0fff3a783..2e25d1bfb2 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2250,6 +2250,22 @@ def transform_value_for_tile(self, value, **kwargs): "inverseOntologyProperty": graph["inverseRelationshipConcept"], } + def build_resource_instance_object(hit): + return { + "resourceId": hit["_id"], + "ontologyProperty": ( + default_values_lookup[hit["_source"]["graph_id"]][ + "ontologyProperty" + ] + ), + "inverseOntologyProperty": ( + default_values_lookup[hit["_source"]["graph_id"]][ + "inverseOntologyProperty" + ] + ), + "resourceXresourceId": str(uuid.uuid4()), + } + subtypes_dict = {"uuid": uuid.UUID, "dict": dict, "str": str} if isinstance(value, str): @@ -2263,6 +2279,10 @@ def transform_value_for_tile(self, value, **kwargs): if converted_value is False and value != "": converted_value = value.split(",") # is a string, likely legacyid converted_value = [val.strip() for val in converted_value if val] + try: + converted_value = [uuid.UUID(val) for val in converted_value] + except: + pass elif converted_value is False: logger.warning("ResourceInstanceDataType: value is empty") return [] @@ -2292,41 +2312,15 @@ def transform_value_for_tile(self, value, **kwargs): query.add_query(boolquery) results = query.search(index=RESOURCES_INDEX) for hit in results["hits"]["hits"]: - resource_instance_object = {} - resource_instance_object["resourceId"] = hit["_id"] - resource_instance_object["ontologyProperty"] = ( - default_values_lookup[hit["_source"]["graph_id"]][ - "ontologyProperty" - ] - ) - resource_instance_object["inverseOntologyProperty"] = ( - default_values_lookup[hit["_source"]["graph_id"]][ - "inverseOntologyProperty" - ] - ) - resource_instance_object["resourceXresourceId"] = str(uuid.uuid4()) - transformed_value.append(resource_instance_object) + transformed_value.append(build_resource_instance_object(hit)) case "uuid": # query the graphid associated with each resourceinstance.resourceinstanceid results = query.search( index=RESOURCES_INDEX, id=[str(val) for val in converted_value] ) - for hit in results["hits"]["hits"]: - resource_instance_object = {} - resource_instance_object["resourceId"] = hit["_id"] - resource_instance_object["ontologyProperty"] = ( - default_values_lookup[hit["_source"]["graph_id"]][ - "ontologyProperty" - ] - ) - resource_instance_object["inverseOntologyProperty"] = ( - default_values_lookup[hit["_source"]["graph_id"]][ - "inverseOntologyProperty" - ] - ) - resource_instance_object["resourceXresourceId"] = str(uuid.uuid4()) - transformed_value.append(resource_instance_object) + for hit in results["docs"]: + transformed_value.append(build_resource_instance_object(hit)) case "dict": # assume data correctly formatted for val in converted_value: From 1c7a0bc0a06da0cf5cc1f61878cf2806c416dc2a Mon Sep 17 00:00:00 2001 From: Galen Date: Tue, 26 Nov 2024 16:25:26 -0800 Subject: [PATCH 7/9] rm comments --- arches/app/datatypes/datatypes.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 2e25d1bfb2..221703cdbf 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2305,7 +2305,6 @@ def build_resource_instance_object(hit): match value_type: case "str": # aka legacyid - # query the graphid associated with each resourceinstance.legacyid boolquery.must( Terms(field="legacyid.keyword", terms=converted_value) ) # exact match on keyword @@ -2315,14 +2314,13 @@ def build_resource_instance_object(hit): transformed_value.append(build_resource_instance_object(hit)) case "uuid": - # query the graphid associated with each resourceinstance.resourceinstanceid results = query.search( index=RESOURCES_INDEX, id=[str(val) for val in converted_value] ) for hit in results["docs"]: transformed_value.append(build_resource_instance_object(hit)) - case "dict": # assume data correctly formatted + case "dict": # assume data correctly parsed via ast.literal for val in converted_value: try: uuid.UUID(val["resourceId"]) From 504798a518ef592e9a11401a4c4bbaf35fc0a0ba Mon Sep 17 00:00:00 2001 From: Galen Date: Wed, 1 Jan 2025 22:30:30 -0800 Subject: [PATCH 8/9] fill in missing access of node config graph.relationshipConcept --- arches/app/datatypes/datatypes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 221703cdbf..40fcefb650 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2239,7 +2239,9 @@ def transform_value_for_tile(self, value, **kwargs): relatable_graphs = kwargs.get("graphs", []) default_values_lookup = dict() for graph in relatable_graphs: - if graph.get("useOntologyRelationship", False): + if graph.get("useOntologyRelationship", False) or not graph.get( + "relationshipConcept", None + ): default_values_lookup[graph["graphid"]] = { "ontologyProperty": "", "inverseOntologyProperty": "", From cf956a853925bba63d093daa36c012c68f1e4838 Mon Sep 17 00:00:00 2001 From: Galen Date: Wed, 1 Jan 2025 22:40:07 -0800 Subject: [PATCH 9/9] check for pre-existing legacyid keyed resources in import_csv --- arches/app/etl_modules/import_single_csv.py | 30 +++++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/arches/app/etl_modules/import_single_csv.py b/arches/app/etl_modules/import_single_csv.py index 41d7d1a415..d0f11a4a85 100644 --- a/arches/app/etl_modules/import_single_csv.py +++ b/arches/app/etl_modules/import_single_csv.py @@ -14,7 +14,13 @@ from django.http import HttpRequest from django.utils.translation import gettext as _ from arches.app.datatypes.datatypes import DataTypeFactory -from arches.app.models.models import ETLModule, GraphModel, Node, NodeGroup +from arches.app.models.models import ( + ETLModule, + GraphModel, + Node, + NodeGroup, + ResourceInstance, +) from arches.app.models.system_settings import settings import arches.app.tasks as tasks from arches.app.utils.betterJSONSerializer import JSONSerializer @@ -397,10 +403,24 @@ def populate_staging_table( for row in reader: if id_label in fieldnames: id_index = fieldnames.index(id_label) - try: - resourceid = uuid.UUID(row[id_index]) - legacyid = None - except (AttributeError, ValueError): + # try: + # resourceid = uuid.UUID(row[id_index]) + # legacyid = None + # except (AttributeError, ValueError): + # resourceid = uuid.uuid4() + # legacyid = row[id_index] + try: # check for pre-existing resource keyed on the legacyid + resource = ResourceInstance.objects.get( + legacyid=row[id_index] + ) + resourceid = resource.resourceinstanceid + legacyid = row[id_index] + except Exception as e: + print( + "no pre-existing resource found for legacyid", + row[id_index], + ) + print(e) resourceid = uuid.uuid4() legacyid = row[id_index] else: