Skip to content

Commit

Permalink
Preload dataset type cache in Butler server
Browse files Browse the repository at this point in the history
Pre-fetch dataset types the first time a repository is accessed in Butler server, to avoid the need to re-fetch them in most later operations.
  • Loading branch information
dhirving committed Dec 5, 2024
1 parent 85e2a80 commit 90f060b
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,35 @@ def __init__(self) -> None:
self._full = False
self._dimensions_full = False

def clone(self) -> DatasetTypeCache:
"""Make a copy of the caches that are safe to use in another thread.
Notes
-----
After cloning, the ``tables`` cache will be shared between the new
instance and the current instance. It is safe to read and update
``tables`` from multiple threads simultaneously -- the cached values
are immutable table schemas, and they are looked up one at a time by
name.
The other caches are copied, because their access patterns are more
complex.
``full`` and ``dimensions_full`` will initially return `False` in the
new instance. This preserves the invariant that a Butler is able to
see any changes to the database made before the Butler is instantiated.
The downside is that the cloned cache will have to be re-fetched before
it can be used for glob searches.
"""
clone = DatasetTypeCache()
# Share DynamicTablesCache between instances.
clone.tables = self.tables
# The inner key/value objects are immutable in both of these caches, so
# we can shallow-copy the dicts.
clone._by_name_cache = self._by_name_cache.copy()
clone._by_dimensions_cache = self._by_dimensions_cache.copy()
return clone

@property
def full(self) -> bool:
"""`True` if cache holds all known dataset types (`bool`)."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
Structure containing tables that summarize the contents of collections.
registry_schema_version : `VersionTuple` or `None`, optional
Version of registry schema.
_cache : `None`, optional
For internal use only.
"""

def __init__(
Expand All @@ -146,14 +148,15 @@ def __init__(
static: StaticDatasetTablesTuple,
summaries: CollectionSummaryManager,
registry_schema_version: VersionTuple | None = None,
_cache: DatasetTypeCache | None = None,
):
super().__init__(registry_schema_version=registry_schema_version)
self._db = db
self._collections = collections
self._dimensions = dimensions
self._static = static
self._summaries = summaries
self._cache = DatasetTypeCache()
self._cache = _cache if _cache is not None else DatasetTypeCache()
self._use_astropy_ingest_date = self.ingest_date_dtype() is ddl.AstropyTimeNsecTai
self._run_key_column = collections.getRunForeignKeyName()

Expand Down Expand Up @@ -270,6 +273,9 @@ def clone(
static=self._static,
summaries=self._summaries.clone(db=db, collections=collections, caching_context=caching_context),
registry_schema_version=self._registry_schema_version,
# See notes on DatasetTypeCache.clone() about cache behavior after
# cloning.
_cache=self._cache.clone(),
)

def refresh(self) -> None:
Expand Down Expand Up @@ -502,6 +508,9 @@ def _record_from_row(self, row: Mapping) -> _DatasetTypeRecord:
def _dataset_type_from_row(self, row: Mapping) -> DatasetType:
return self._record_from_row(row).dataset_type

def preload_cache(self) -> None:
self._fetch_dataset_types()

def _fetch_dataset_types(self) -> list[DatasetType]:
"""Fetch list of all defined dataset types."""
# This is one of three places we populate the dataset type cache:
Expand Down
7 changes: 7 additions & 0 deletions python/lsst/daf/butler/registry/interfaces/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@ def clone(
"""
raise NotImplementedError()

@abstractmethod
def preload_cache(self) -> None:
"""Fetch data from the database and use it to pre-populate caches to
speed up later operations.
"""
raise NotImplementedError()

@classmethod
@abstractmethod
def initialize(
Expand Down
1 change: 1 addition & 0 deletions python/lsst/daf/butler/registry/sql_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2485,6 +2485,7 @@ def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> N
def preload_cache(self) -> None:
"""Immediately load caches that are used for common operations."""
self.dimension_record_cache.preload_cache()
self._managers.datasets.preload_cache()

@property
def obsCoreTableManager(self) -> ObsCoreTableManager | None:
Expand Down

0 comments on commit 90f060b

Please sign in to comment.