diff --git a/arches/app/utils/index_database.py b/arches/app/utils/index_database.py index c0d14bd0f17..82b14111944 100644 --- a/arches/app/utils/index_database.py +++ b/arches/app/utils/index_database.py @@ -12,6 +12,8 @@ from datetime import datetime from django.db import connection, connections from django.db.models import prefetch_related_objects, Prefetch, Q, QuerySet +from django.utils.translation import get_language + from arches.app.models import models from arches.app.models.resource import Resource from arches.app.models.system_settings import settings @@ -25,21 +27,6 @@ logger = logging.getLogger(__name__) -serialized_graphs = {} - - -def get_serialized_graph(graph): - """ - Returns the serialized version of the graph from the database - - """ - if not graph: - return None - - if graph.graphid not in serialized_graphs: - published_graph = graph.get_published_graph() - serialized_graphs[graph.graphid] = published_graph.serialized_graph - return serialized_graphs[graph.graphid] def index_db( @@ -201,15 +188,16 @@ def optimize_resource_iteration(resources: Iterable[Resource], chunk_size: int): - select related graphs - prefetch tiles (onto .prefetched_tiles) - prefetch primary descriptors (onto graph.descriptor_function) + - prefetch published graphs (onto graph.publication.published_graph_active_lang) - apply chunk_size to reduce memory footprint and spread the work of prefetching tiles across multiple queries The caller is responsible for moving the descriptor function - prefetch from the graph to the resource instance--a symptom of - this being more of a graph property--and for moving the prefetched - tiles to .tiles (because the Resource proxy model initializes - .tiles to an empty array and Django thinks that represents the - state in the db.) + and published graph prefetches from the graph to the resource instances + --a symptom of these being more like graph properties-- + and for moving the prefetched tiles to .tiles (because the Resource + proxy model initializes .tiles to an empty array and Django thinks + that represents the state in the db.) """ tiles_prefetch = Prefetch("tilemodel_set", to_attr="prefetched_tiles") # Same queryset as Resource.save_descriptors() @@ -221,18 +209,30 @@ def optimize_resource_iteration(resources: Iterable[Resource], chunk_size: int): queryset=descriptor_query, to_attr="descriptor_function", ) + published_graph_query = models.PublishedGraph.objects.filter( + language=get_language() + ) + published_graph_prefetch = Prefetch( + "graph__publication__publishedgraph_set", + queryset=published_graph_query, + to_attr="published_graph_active_lang", + ) if isinstance(resources, QuerySet): return ( resources.select_related("graph") - .prefetch_related(tiles_prefetch, descriptor_prefetch) + .prefetch_related( + tiles_prefetch, descriptor_prefetch, published_graph_prefetch + ) .iterator(chunk_size=chunk_size) ) else: # public API that arches itself does not currently use for r in resources: r.clean_fields() # ensure strings become UUIDs - prefetch_related_objects(resources, tiles_prefetch, descriptor_prefetch) + prefetch_related_objects( + resources, tiles_prefetch, descriptor_prefetch, published_graph_prefetch + ) return resources @@ -263,10 +263,19 @@ def index_resources_using_singleprocessing( for resource in optimize_resource_iteration( resources, chunk_size=batch_size // 8 ): + # Move prefetched relations to where the Proxy Model expects them. resource.tiles = resource.prefetched_tiles resource.descriptor_function = resource.graph.descriptor_function + try: + resource.serialized_graph = ( + resource.graph.publication.published_graph_active_lang[ + 0 + ].serialized_graph + ) + except IndexError: + resource.serialized_graph = None + resource.set_node_datatypes(node_datatypes) - resource.set_serialized_graph(get_serialized_graph(resource.graph)) if recalculate_descriptors: resource.save_descriptors() if quiet is False and bar is not None: diff --git a/tests/models/resource_test.py b/tests/models/resource_test.py index 452f146bb11..25afa90479b 100644 --- a/tests/models/resource_test.py +++ b/tests/models/resource_test.py @@ -422,18 +422,15 @@ def test_provisional_user_can_delete_own_resource(self): self.assertFalse(result) def test_recalculate_descriptors_prefetch_related_objects(self): + other_graph = Graph.new(name="Other graph", is_resource=True) + other_graph.publish() r1 = Resource(graph_id=self.search_model_graphid) - r2 = Resource(graph_id=self.search_model_graphid) + r2 = Resource(graph_id=other_graph.pk) r1_tile = Tile( data={self.search_model_creation_date_nodeid: "1941-01-01"}, nodegroup_id=self.search_model_creation_date_nodeid, ) r1.tiles.append(r1_tile) - r2_tile = Tile( - data={self.search_model_creation_date_nodeid: "1941-01-01"}, - nodegroup_id=self.search_model_creation_date_nodeid, - ) - r2.tiles.append(r2_tile) r1.save(index=False) r2.save(index=False) @@ -465,6 +462,13 @@ def test_recalculate_descriptors_prefetch_related_objects(self): ] self.assertEqual(len(tile_selects), 1) + published_graph_selects = [ + q + for q in queries + if q["sql"].startswith('SELECT "published_graphs"."id"') + ] + self.assertEqual(len(published_graph_selects), 1) + def test_self_referring_resource_instance_descriptor(self): # Create a nodegroup with a string node and a resource-instance node. graph = Graph.new(name="Self-referring descriptor test", is_resource=True)