archesproject · whatisgalen · Nov 14, 2024 · Nov 25, 2024 · Nov 25, 2024 · Nov 26, 2024
diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py
@@ -2219,18 +2219,118 @@ def get_search_terms(self, nodevalue, nodeid=None):
         return terms
 
     def transform_value_for_tile(self, value, **kwargs):
-        try:
-            return json.loads(value)
-        except ValueError:
-            # do this if json (invalid) is formatted with single quotes, re #6390
-            try:
-                return ast.literal_eval(value)
-            except:
-                return value
-        except TypeError:
-            # data should come in as json but python list is accepted as well
-            if isinstance(value, list):
-                return value
+        # kwargs config looks like this:
+        # {
+        #     "graphs": [
+        #         {
+        #             "name": "Person or Group",
+        #             "graphid": "ccbd1537-ac5e-11e6-84a5-026d961c88e6",
+        #             "relationshipConcept": "6f26aa04-52af-4b17-a656-674c547ade2a",
+        #             "relationshipCollection": "00000000-0000-0000-0000-000000000005",
+        #             "useOntologyRelationship": False,
+        #             "inverseRelationshipConcept": "6f26aa04-52af-4b17-a656-674c547ade2a"
+        #         }
+        #     ],
+        #     "searchDsl": "",
+        #     "searchString": ""
+        # }
+        from arches.app.search.search_engine_factory import SearchEngineFactory
+
+        relatable_graphs = kwargs.get("graphs", [])
+        default_values_lookup = dict()
+        for graph in relatable_graphs:
+            if graph.get("useOntologyRelationship", False) or not graph.get(
+                "relationshipConcept", None
+            ):
+                default_values_lookup[graph["graphid"]] = {
+                    "ontologyProperty": "",
+                    "inverseOntologyProperty": "",
+                }
+            else:
+                default_values_lookup[graph["graphid"]] = {
+                    "ontologyProperty": graph["relationshipConcept"],
+                    "inverseOntologyProperty": graph["inverseRelationshipConcept"],
+                }
+
+        def build_resource_instance_object(hit):
+            return {
+                "resourceId": hit["_id"],
+                "ontologyProperty": (
+                    default_values_lookup[hit["_source"]["graph_id"]][
+                        "ontologyProperty"
+                    ]
+                ),
+                "inverseOntologyProperty": (
+                    default_values_lookup[hit["_source"]["graph_id"]][
+                        "inverseOntologyProperty"
+                    ]
+                ),
+                "resourceXresourceId": str(uuid.uuid4()),
+            }
+
+        subtypes_dict = {"uuid": uuid.UUID, "dict": dict, "str": str}
+
+        if isinstance(value, str):
+            for test_method in [uuid.UUID, json.loads, ast.literal_eval]:
+                try:
+                    converted_value = test_method(value)
+                    break
+                except:
+                    converted_value = False
+
+            if converted_value is False and value != "":
+                converted_value = value.split(",")  # is a string, likely legacyid
+                converted_value = [val.strip() for val in converted_value if val]
+                try:
+                    converted_value = [uuid.UUID(val) for val in converted_value]
+                except:
+                    pass
+            elif converted_value is False:
+                logger.warning("ResourceInstanceDataType: value is empty")
+                return []
+        else:
+            converted_value = value
+
+        value_type = None
+        if not isinstance(converted_value, list):
+            converted_value = [converted_value]
+        for value_subtype_label, value_subtype_class in list(subtypes_dict.items()):
+            if isinstance(converted_value[0], value_subtype_class):
+                value_type = value_subtype_label
+                break
+
+        se = SearchEngineFactory().create()
+        query = Query(se)
+        query.include("graph_id")
+        boolquery = Bool()
+        transformed_value = []
+
+        match value_type:
+            case "str":  # aka legacyid
+                boolquery.must(
+                    Terms(field="legacyid.keyword", terms=converted_value)
+                )  # exact match on keyword
+                query.add_query(boolquery)
+                results = query.search(index=RESOURCES_INDEX)
+                for hit in results["hits"]["hits"]:
+                    transformed_value.append(build_resource_instance_object(hit))
+
+            case "uuid":
+                results = query.search(
+                    index=RESOURCES_INDEX, id=[str(val) for val in converted_value]
+                )
+                for hit in results["docs"]:
+                    transformed_value.append(build_resource_instance_object(hit))
+
+            case "dict":  # assume data correctly parsed via ast.literal
+                for val in converted_value:
+                    try:
+                        uuid.UUID(val["resourceId"])
+                    except:
+                        continue
+                    transformed_value.append(val)
+
+        return transformed_value
 
     def transform_export_values(self, value, *args, **kwargs):
         return json.dumps(value)

diff --git a/arches/app/etl_modules/import_single_csv.py b/arches/app/etl_modules/import_single_csv.py
@@ -14,7 +14,13 @@
 from django.http import HttpRequest
 from django.utils.translation import gettext as _
 from arches.app.datatypes.datatypes import DataTypeFactory
-from arches.app.models.models import ETLModule, GraphModel, Node, NodeGroup
+from arches.app.models.models import (
+    ETLModule,
+    GraphModel,
+    Node,
+    NodeGroup,
+    ResourceInstance,
+)
 from arches.app.models.system_settings import settings
 import arches.app.tasks as tasks
 from arches.app.utils.betterJSONSerializer import JSONSerializer
@@ -397,10 +403,24 @@ def populate_staging_table(
                 for row in reader:
                     if id_label in fieldnames:
                         id_index = fieldnames.index(id_label)
-                        try:
-                            resourceid = uuid.UUID(row[id_index])
-                            legacyid = None
-                        except (AttributeError, ValueError):
+                        # try:
+                        #     resourceid = uuid.UUID(row[id_index])
+                        #     legacyid = None
+                        # except (AttributeError, ValueError):
+                        #     resourceid = uuid.uuid4()
+                        #     legacyid = row[id_index]
+                        try:  # check for pre-existing resource keyed on the legacyid
+                            resource = ResourceInstance.objects.get(
+                                legacyid=row[id_index]
+                            )
+                            resourceid = resource.resourceinstanceid
+                            legacyid = row[id_index]
+                        except Exception as e:
+                            print(
+                                "no pre-existing resource found for legacyid",
+                                row[id_index],
+                            )
+                            print(e)
                             resourceid = uuid.uuid4()
                             legacyid = row[id_index]
                     else: