Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

try to import an imported ri dt value as a single uuid #11655

Draft
wants to merge 10 commits into
base: dev/7.6.x
Choose a base branch
from
124 changes: 112 additions & 12 deletions arches/app/datatypes/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2219,18 +2219,118 @@ def get_search_terms(self, nodevalue, nodeid=None):
return terms

def transform_value_for_tile(self, value, **kwargs):
try:
return json.loads(value)
except ValueError:
# do this if json (invalid) is formatted with single quotes, re #6390
try:
return ast.literal_eval(value)
except:
return value
except TypeError:
# data should come in as json but python list is accepted as well
if isinstance(value, list):
return value
# kwargs config looks like this:
# {
# "graphs": [
# {
# "name": "Person or Group",
# "graphid": "ccbd1537-ac5e-11e6-84a5-026d961c88e6",
# "relationshipConcept": "6f26aa04-52af-4b17-a656-674c547ade2a",
# "relationshipCollection": "00000000-0000-0000-0000-000000000005",
# "useOntologyRelationship": False,
# "inverseRelationshipConcept": "6f26aa04-52af-4b17-a656-674c547ade2a"
# }
# ],
# "searchDsl": "",
# "searchString": ""
# }
from arches.app.search.search_engine_factory import SearchEngineFactory

relatable_graphs = kwargs.get("graphs", [])
default_values_lookup = dict()
for graph in relatable_graphs:
if graph.get("useOntologyRelationship", False) or not graph.get(
"relationshipConcept", None
):
default_values_lookup[graph["graphid"]] = {
"ontologyProperty": "",
"inverseOntologyProperty": "",
}
else:
default_values_lookup[graph["graphid"]] = {
"ontologyProperty": graph["relationshipConcept"],
"inverseOntologyProperty": graph["inverseRelationshipConcept"],
}

def build_resource_instance_object(hit):
return {
"resourceId": hit["_id"],
"ontologyProperty": (
default_values_lookup[hit["_source"]["graph_id"]][
"ontologyProperty"
]
),
"inverseOntologyProperty": (
default_values_lookup[hit["_source"]["graph_id"]][
"inverseOntologyProperty"
]
),
"resourceXresourceId": str(uuid.uuid4()),
}

subtypes_dict = {"uuid": uuid.UUID, "dict": dict, "str": str}

if isinstance(value, str):
for test_method in [uuid.UUID, json.loads, ast.literal_eval]:
try:
converted_value = test_method(value)
break
except:
converted_value = False

if converted_value is False and value != "":
converted_value = value.split(",") # is a string, likely legacyid
converted_value = [val.strip() for val in converted_value if val]
try:
converted_value = [uuid.UUID(val) for val in converted_value]
except:
pass
elif converted_value is False:
logger.warning("ResourceInstanceDataType: value is empty")
return []
else:
converted_value = value

value_type = None
if not isinstance(converted_value, list):
converted_value = [converted_value]
for value_subtype_label, value_subtype_class in list(subtypes_dict.items()):
if isinstance(converted_value[0], value_subtype_class):
value_type = value_subtype_label
break

se = SearchEngineFactory().create()
query = Query(se)
query.include("graph_id")
boolquery = Bool()
transformed_value = []

match value_type:
case "str": # aka legacyid
boolquery.must(
Terms(field="legacyid.keyword", terms=converted_value)
) # exact match on keyword
query.add_query(boolquery)
results = query.search(index=RESOURCES_INDEX)
for hit in results["hits"]["hits"]:
transformed_value.append(build_resource_instance_object(hit))

case "uuid":
results = query.search(
index=RESOURCES_INDEX, id=[str(val) for val in converted_value]
)
for hit in results["docs"]:
transformed_value.append(build_resource_instance_object(hit))

case "dict": # assume data correctly parsed via ast.literal
for val in converted_value:
try:
uuid.UUID(val["resourceId"])
except:
continue
transformed_value.append(val)

return transformed_value

def transform_export_values(self, value, *args, **kwargs):
return json.dumps(value)
Expand Down
30 changes: 25 additions & 5 deletions arches/app/etl_modules/import_single_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
from django.http import HttpRequest
from django.utils.translation import gettext as _
from arches.app.datatypes.datatypes import DataTypeFactory
from arches.app.models.models import ETLModule, GraphModel, Node, NodeGroup
from arches.app.models.models import (
ETLModule,
GraphModel,
Node,
NodeGroup,
ResourceInstance,
)
from arches.app.models.system_settings import settings
import arches.app.tasks as tasks
from arches.app.utils.betterJSONSerializer import JSONSerializer
Expand Down Expand Up @@ -397,10 +403,24 @@ def populate_staging_table(
for row in reader:
if id_label in fieldnames:
id_index = fieldnames.index(id_label)
try:
resourceid = uuid.UUID(row[id_index])
legacyid = None
except (AttributeError, ValueError):
# try:
# resourceid = uuid.UUID(row[id_index])
# legacyid = None
# except (AttributeError, ValueError):
# resourceid = uuid.uuid4()
# legacyid = row[id_index]
try: # check for pre-existing resource keyed on the legacyid
resource = ResourceInstance.objects.get(
legacyid=row[id_index]
)
resourceid = resource.resourceinstanceid
legacyid = row[id_index]
except Exception as e:
print(
"no pre-existing resource found for legacyid",
row[id_index],
)
print(e)
resourceid = uuid.uuid4()
legacyid = row[id_index]
else:
Expand Down
Loading