From 35bdf68db9f0e94ecda1cf2aec419c0fbc95742f Mon Sep 17 00:00:00 2001 From: Peter Webb Date: Tue, 16 Jul 2024 15:37:43 -0400 Subject: [PATCH] Add group property to records. Correct filter on file records. (#169) * Add group property to records. Correct filter on file records. * Update documentation. * Add changelog entry. --- .../Under the Hood-20240716-125753.yaml | 6 +++++ dbt_common/clients/system.py | 26 +++++++++---------- dbt_common/record.py | 9 ++++--- docs/guides/record_replay.md | 9 ++++--- 4 files changed, 29 insertions(+), 21 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20240716-125753.yaml diff --git a/.changes/unreleased/Under the Hood-20240716-125753.yaml b/.changes/unreleased/Under the Hood-20240716-125753.yaml new file mode 100644 index 00000000..55b36cb3 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240716-125753.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Add record grouping mechanism to record/replay. +time: 2024-07-16T12:57:53.434099-04:00 +custom: + Author: peterallenwebb + Issue: "169" diff --git a/dbt_common/clients/system.py b/dbt_common/clients/system.py index 00a1ac69..afa7f744 100644 --- a/dbt_common/clients/system.py +++ b/dbt_common/clients/system.py @@ -38,6 +38,15 @@ c_bool = None +def _record_path(path: str) -> bool: + return ( + # TODO: The first check here obviates the next two checks but is probably too coarse? + "dbt/include" not in path + and "dbt/include/global_project" not in path + and "/plugins/postgres/dbt/include/" not in path + ) + + @dataclasses.dataclass class FindMatchingParams: root_path: str @@ -61,12 +70,7 @@ def __init__( def _include(self) -> bool: # Do not record or replay filesystem searches that were performed against # files which are actually part of dbt's implementation. - return ( - "dbt/include" - not in self.root_path # TODO: This actually obviates the next two checks but is probably too coarse? - and "dbt/include/global_project" not in self.root_path - and "/plugins/postgres/dbt/include/" not in self.root_path - ) + return _record_path(self.root_path) @dataclasses.dataclass @@ -150,10 +154,7 @@ class LoadFileParams: def _include(self) -> bool: # Do not record or replay file reads that were performed against files # which are actually part of dbt's implementation. - return ( - "dbt/include/global_project" not in self.path - and "/plugins/postgres/dbt/include/" not in self.path - ) + return _record_path(self.path) @dataclasses.dataclass @@ -248,10 +249,7 @@ class WriteFileParams: def _include(self) -> bool: # Do not record or replay file reads that were performed against files # which are actually part of dbt's implementation. - return ( - "dbt/include/global_project" not in self.path - and "/plugins/postgres/dbt/include/" not in self.path - ) + return _record_path(self.path) @Recorder.register_record_type diff --git a/dbt_common/record.py b/dbt_common/record.py index b33d4b5a..612ddf75 100644 --- a/dbt_common/record.py +++ b/dbt_common/record.py @@ -20,7 +20,8 @@ class Record: to the request, and the 'result' is what is returned.""" params_cls: type - result_cls: Optional[type] + result_cls: Optional[type] = None + group: Optional[str] = None def __init__(self, params, result) -> None: self.params = params @@ -309,9 +310,9 @@ def record_replay_wrapper(*args, **kwargs) -> Any: if recorder is None: return func_to_record(*args, **kwargs) - if ( - recorder.recorded_types is not None - and record_type.__name__ not in recorder.recorded_types + if recorder.recorded_types is not None and not ( + record_type.__name__ in recorder.recorded_types + or record_type.group in recorder.recorded_types ): return func_to_record(*args, **kwargs) diff --git a/docs/guides/record_replay.md b/docs/guides/record_replay.md index aff4c77c..b6dfc7b8 100644 --- a/docs/guides/record_replay.md +++ b/docs/guides/record_replay.md @@ -31,13 +31,16 @@ The final detail needed is to define the classes specified by `params_cls` and ` With these decorators applied and classes defined, dbt is able to record all file access during a run, and mock out the accesses during replay, isolating dbt from actually loading files. At least it would if dbt only used this function for all file access, which is only mostly true. We hope to continue improving the usefulness of this mechanism by adding more recorded functions and routing more operations through them. ## How to record/replay -If `DBT_RECORDER_MODE` is not `replay` or `record`, case insensitive, this is a no-op. Invalid values are ignored and do not throw exceptions. -`DBT_RECODER_TYPES` is optional. It indicates which types to filter the results by and expects a list of strings values for the `Record` subclasses. Any invalid types will be ignored. `all` is a valid type and behaves the same as not populating the env var. +Record/replay behavior is activated and configured via environment variables. When DBT_RECORDER_MODE is unset, the entire subsystem is disabled, and the decorators described above have no effect at all. This helps isolate the subsystem from core's application code, reducing the risk of performance impact or regressions. + +The record/replay subsystem is activated by setting the `DBT_RECORDER_MODE` variable to `replay`, `record`, or `diff`, case insensitive. Invalid values are ignored and do not throw exceptions. + +`DBT_RECODER_TYPES` is optional. It indicates which types to filter the results by and expects a list of strings values for the `Record` subclasses or groups of such classes. For example, all records of database/DWH interaction performed by adapters belong to the `Database` group. Any invalid type or group name will be ignored. `all` is a valid value for this variable and has the same effect as not populating the variable. ```bash -DBT_RECORDER_MODE=record DBT_RECODER_TYPES=QueryRecord,GetEnvRecord dbt run +DBT_RECORDER_MODE=record DBT_RECODER_TYPES=Database dbt run ``` replay need the file to replay