diff --git a/.changes/unreleased/Features-20230629-033005.yaml b/.changes/unreleased/Features-20230629-033005.yaml new file mode 100644 index 00000000000..65fda5420fa --- /dev/null +++ b/.changes/unreleased/Features-20230629-033005.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Establish framework for materialized views and materialization change management +time: 2023-06-29T03:30:05.527325-04:00 +custom: + Author: mikealfare + Issue: "6911" diff --git a/.flake8 b/.flake8 index e39b2fa4646..26e20a5d209 100644 --- a/.flake8 +++ b/.flake8 @@ -10,3 +10,5 @@ ignore = E741 E501 # long line checking is done in black exclude = test/ +per-file-ignores = + */__init__.py: F401 diff --git a/core/dbt/adapters/README.md b/core/dbt/adapters/README.md index 20ae9e7a56a..10d8eccb276 100644 --- a/core/dbt/adapters/README.md +++ b/core/dbt/adapters/README.md @@ -28,3 +28,19 @@ Defines various interfaces for various adapter objects. Helps mypy correctly res ## `reference_keys.py` Configures naming scheme for cache elements to be universal. + +## Validation + +- `ValidationMixin` +- `ValidationRule` + +These classes live in `validation.py`, outside of `relation` because they don't pertain specifically to `Relation`. +However, they are only currently used by `Relation`. +`ValidationMixin` provides optional validation mechanics that can be applied to either `Relation`, `RelationComponent`, +or `RelationChange` subclasses. To implement `ValidationMixin`, include it as a subclass in your `Relation`-like +object and add a method `validation_rules()` that returns a set of `ValidationRule` objects. +A `ValidationRule` is a combination of a `validation_check`, something that should always evaluate to `True` +in expected scenarios (i.e. a `False` is an invalid configuration), and an optional `validation_error`, +an instance of `DbtRuntimeError` that should be raised in the event the `validation_check` fails. +While optional, it's recommended that the `validation_error` be provided for clearer transparency to the end user +as the default does not know why the `validation_check` failed. diff --git a/core/dbt/adapters/base/__init__.py b/core/dbt/adapters/base/__init__.py index 07f5303992e..34685155c17 100644 --- a/core/dbt/adapters/base/__init__.py +++ b/core/dbt/adapters/base/__init__.py @@ -1,19 +1,17 @@ -# these are all just exports, #noqa them so flake8 will be happy - # TODO: Should we still include this in the `adapters` namespace? -from dbt.contracts.connection import Credentials # noqa: F401 -from dbt.adapters.base.meta import available # noqa: F401 -from dbt.adapters.base.connections import BaseConnectionManager # noqa: F401 -from dbt.adapters.base.relation import ( # noqa: F401 +from dbt.contracts.connection import Credentials +from dbt.adapters.base.meta import available +from dbt.adapters.base.connections import BaseConnectionManager +from dbt.adapters.base.relation import ( BaseRelation, RelationType, SchemaSearchMap, ) -from dbt.adapters.base.column import Column # noqa: F401 -from dbt.adapters.base.impl import ( # noqa: F401 +from dbt.adapters.base.column import Column +from dbt.adapters.base.impl import ( AdapterConfig, BaseAdapter, PythonJobHelper, ConstraintSupport, ) -from dbt.adapters.base.plugin import AdapterPlugin # noqa: F401 +from dbt.adapters.base.plugin import AdapterPlugin diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index d18c9af7f50..8da5c1a08c3 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -3,8 +3,8 @@ from contextlib import contextmanager from datetime import datetime from enum import Enum -import time from itertools import chain +import time from typing import ( Any, Callable, @@ -20,11 +20,46 @@ Union, ) -from dbt.contracts.graph.nodes import ColumnLevelConstraint, ConstraintType, ModelLevelConstraint - import agate import pytz +from dbt import deprecations +from dbt.adapters.base import Credentials, Column as BaseColumn +from dbt.adapters.base.connections import AdapterResponse, Connection +from dbt.adapters.base.meta import AdapterMeta, available +from dbt.adapters.base.relation import ( + ComponentName, + BaseRelation, + InformationSchema, + SchemaSearchMap, +) +from dbt.adapters.cache import RelationsCache, _make_ref_key_dict +from dbt.adapters.materialization import MaterializationFactory +from dbt.adapters.materialization.models import Materialization +from dbt.adapters.protocol import AdapterConfig, ConnectionManagerProtocol +from dbt.adapters.relation import RelationFactory +from dbt.adapters.relation.models import Relation as RelationModel, RelationChangeset, RelationRef +from dbt.clients.agate_helper import empty_table, merge_tables, table_from_rows +from dbt.clients.jinja import MacroGenerator +from dbt.contracts.graph.manifest import MacroManifest, Manifest +from dbt.contracts.graph.nodes import ( + ColumnLevelConstraint, + ConstraintType, + ModelLevelConstraint, + ParsedNode, + ResultNode, +) +from dbt.contracts.relation import RelationType +from dbt.events.functions import fire_event, warn_or_error +from dbt.events.types import ( + CacheMiss, + CatalogGenerationError, + CodeExecution, + CodeExecutionStatus, + ConstraintNotEnforced, + ConstraintNotSupported, + ListRelations, +) from dbt.exceptions import ( DbtInternalError, DbtRuntimeError, @@ -42,36 +77,8 @@ UnexpectedNonTimestampError, UnexpectedNullError, ) +from dbt.utils import AttrDict, cast_to_str, filter_null_values, executor -from dbt.adapters.protocol import AdapterConfig, ConnectionManagerProtocol -from dbt.clients.agate_helper import empty_table, merge_tables, table_from_rows -from dbt.clients.jinja import MacroGenerator -from dbt.contracts.graph.manifest import Manifest, MacroManifest -from dbt.contracts.graph.nodes import ResultNode -from dbt.events.functions import fire_event, warn_or_error -from dbt.events.types import ( - CacheMiss, - ListRelations, - CodeExecution, - CodeExecutionStatus, - CatalogGenerationError, - ConstraintNotSupported, - ConstraintNotEnforced, -) -from dbt.utils import filter_null_values, executor, cast_to_str, AttrDict - -from dbt.adapters.base.connections import Connection, AdapterResponse -from dbt.adapters.base.meta import AdapterMeta, available -from dbt.adapters.base.relation import ( - ComponentName, - BaseRelation, - InformationSchema, - SchemaSearchMap, -) -from dbt.adapters.base import Column as BaseColumn -from dbt.adapters.base import Credentials -from dbt.adapters.cache import RelationsCache, _make_ref_key_dict -from dbt import deprecations GET_CATALOG_MACRO_NAME = "get_catalog" FRESHNESS_MACRO_NAME = "collect_freshness" @@ -222,6 +229,26 @@ class BaseAdapter(metaclass=AdapterMeta): ConstraintType.foreign_key: ConstraintSupport.ENFORCED, } + @property + def relation_factory(self) -> RelationFactory: + """ + It's common to overwrite `Relation` instances in an adapter. In those cases this method + should also be overridden to register those new `Relation` instances. + """ + return RelationFactory() + + @property + def materialization_factory(self) -> MaterializationFactory: + """ + It's common to overwrite `Relation` instances in an adapter. In those cases `self.relation_factory` + should also be overridden to register those new `Relation` instances. Take the adapter's setting + to override the default in `MaterializationFactory`. + + It's uncommon to overwrite `Materialization` instances. In those cases the adapter should + override this method to override the default in `MaterializationFactory`. + """ + return MaterializationFactory(relation_factory=self.relation_factory) + def __init__(self, config): self.config = config self.cache = RelationsCache() @@ -1177,7 +1204,7 @@ def post_model_hook(self, config: Mapping[str, Any], context: Any) -> None: available in the materialization context). It should be considered read-only. - The second parameter is the value returned by pre_mdoel_hook. + The second parameter is the value returned by pre_model_hook. """ pass @@ -1429,6 +1456,146 @@ def render_model_constraint(cls, constraint: ModelLevelConstraint) -> Optional[s else: return None + """ + Pass-through methods to access `MaterializationFactory` and `RelationFactory` functionality + """ + + @available + def make_materialization_from_node(self, node: ParsedNode) -> Materialization: + """ + Produce a `Materialization` instance along with whatever associated `Relation` and `RelationRef` + instances are needed. + + *Note:* The node that comes in could be any one of `ParsedNode`, `CompiledNode`, or `ModelNode`. We + need at least a `ParsedNode` to process a materialization in general, and at least a `CompiledNode` + to process a materialization that requires a query. + + Args: + node: `model` or `config.model` in the global jinja context + + Returns: + a `Materialization` instance that contains all the information required to execute the materialization + """ + existing_relation_ref = self._get_existing_relation_ref_from_node(node) + return self.materialization_factory.make_from_node(node, existing_relation_ref) + + def _get_existing_relation_ref_from_node(self, node: ParsedNode) -> Optional[RelationRef]: + """ + We need to get `existing_relation_ref` from `Adapter` because we need access to a bunch of `cache` + things, in particular `get_relations`. + + TODO: if we refactor the interaction between `Adapter` and `cache`, the calculation of `existing_relation_ref` + could be moved here, which is a more intuitive spot (like `target_relation`) for it + (and removes the concern of creating a `RelationRef` from `Adapter` where it doesn't belong + """ + existing_base_relation: BaseRelation = self.get_relation( + database=node.database, + schema=node.schema, + identifier=node.identifier, + ) + + # mypy thinks existing_base_relation's identifiers are all optional because of IncludePolicy + if existing_base_relation: + existing_relation_ref = self.relation_factory.make_ref( + name=existing_base_relation.identifier, # type: ignore + schema_name=existing_base_relation.schema, # type: ignore + database_name=existing_base_relation.database, # type: ignore + relation_type=existing_base_relation.type, # type: ignore + ) + else: + existing_relation_ref = None + + return existing_relation_ref + + @available + def make_changeset( + self, existing_relation: RelationModel, target_relation: RelationModel + ) -> RelationChangeset: + """ + Generate a changeset between two relations. This gets used in macros like `alter_template()` to + determine what changes are needed, or if a full refresh is needed. + + Note that while this could be determined on `Materialization`, the fact that it gets called + in `alter_template()`, which only takes in two `Relation` instances, means that it needs to live + separately from `Materialization`. + + Args: + existing_relation: the current implementation of the relation in the database + target_relation: the new implementation that should exist in the database going forward + + Returns: + a `RelationChangeset` instance that collects all the changes required to turn `existing_relation` + into `target_relation` + """ + return self.relation_factory.make_changeset(existing_relation, target_relation) + + """ + Implementation of cache methods for `Relation` instances (versus `BaseRelation` instances) + """ + + @available + def cache_created_relation_model(self, relation: RelationModel) -> str: + base_relation = self.base_relation_from_relation_model(relation) + return self.cache_added(base_relation) + + @available + def cache_dropped_relation_model(self, relation: RelationModel) -> str: + base_relation = self.base_relation_from_relation_model(relation) + return self.cache_dropped(base_relation) + + @available + def cache_renamed_relation_model(self, relation: RelationModel, new_name: str) -> str: + from_relation = self.base_relation_from_relation_model(relation) + to_relation = from_relation.incorporate(path={"identifier": new_name}) + return self.cache_renamed(from_relation, to_relation) + + """ + Methods to swap back and forth between `Relation` and `BaseRelation` instances + """ + + @available + def is_base_relation(self, relation: Union[BaseRelation, RelationModel]) -> bool: + """ + Convenient for templating, given the mix of `BaseRelation` and `Relation` + """ + return isinstance(relation, BaseRelation) + + @available + def is_relation_model(self, relation: Union[BaseRelation, RelationModel]) -> bool: + """ + Convenient for templating, given the mix of `BaseRelation` and `Relation` + """ + return isinstance(relation, RelationModel) + + @available + def base_relation_from_relation_model(self, relation: RelationModel) -> BaseRelation: + """ + Produce a `BaseRelation` instance from a `Relation` instance. This is primarily done to + reuse existing functionality based on `BaseRelation` while working with `Relation` instances. + + Useful in combination with `is_relation_model`/`is_base_relation` + + Args: + relation: a `Relation` instance or subclass to be converted + + Returns: + a converted `BaseRelation` instance + """ + try: + relation_type = RelationType(relation.type) + except ValueError: + relation_type = RelationType.External + + base_relation: BaseRelation = self.Relation.create( + database=relation.database_name, + schema=relation.schema_name, + identifier=relation.name, + quote_policy=self.relation_factory.render_policy.quote_policy, + type=relation_type, + ) + assert isinstance(base_relation, BaseRelation) # mypy + return base_relation + COLUMNS_EQUAL_SQL = """ with diff_count as ( diff --git a/core/dbt/adapters/base/relation.py b/core/dbt/adapters/base/relation.py index ae4e585d524..6ace6427bb7 100644 --- a/core/dbt/adapters/base/relation.py +++ b/core/dbt/adapters/base/relation.py @@ -1,39 +1,44 @@ from collections.abc import Hashable -from dataclasses import dataclass, field -from typing import Optional, TypeVar, Any, Type, Dict, Iterator, Tuple, Set - -from dbt.contracts.graph.nodes import SourceDefinition, ManifestNode, ResultNode, ParsedNode +import dataclasses +from typing import Any, Dict, Iterator, Optional, Set, Tuple, Type, TypeVar + +from dbt.contracts.graph.nodes import ( + SourceDefinition, + ManifestNode, + ResultNode, + ParsedNode, +) from dbt.contracts.relation import ( - RelationType, ComponentName, - HasQuoting, FakeAPIObject, - Policy, + HasQuoting, Path, + Policy, + RelationType, ) from dbt.exceptions import ( ApproximateMatchError, + CompilationError, DbtInternalError, + DbtRuntimeError, MultipleDatabasesNotAllowedError, ) from dbt.node_types import NodeType -from dbt.utils import filter_null_values, deep_merge, classproperty - -import dbt.exceptions +from dbt.utils import classproperty, deep_merge, filter_null_values, merge Self = TypeVar("Self", bound="BaseRelation") -@dataclass(frozen=True, eq=False, repr=False) +@dataclasses.dataclass(frozen=True, eq=False, repr=False) class BaseRelation(FakeAPIObject, Hashable): path: Path type: Optional[RelationType] = None quote_character: str = '"' # Python 3.11 requires that these use default_factory instead of simple default # ValueError: mutable default for field include_policy is not allowed: use default_factory - include_policy: Policy = field(default_factory=lambda: Policy()) - quote_policy: Policy = field(default_factory=lambda: Policy()) + include_policy: Policy = dataclasses.field(default_factory=lambda: Policy()) + quote_policy: Policy = dataclasses.field(default_factory=lambda: Policy()) dbt_created: bool = False def _is_exactish_match(self, field: ComponentName, value: str) -> bool: @@ -87,9 +92,7 @@ def matches( if not search: # nothing was passed in - raise dbt.exceptions.DbtRuntimeError( - "Tried to match relation, but no search path was passed!" - ) + raise DbtRuntimeError("Tried to match relation, but no search path was passed!") exact_match = True approximate_match = True @@ -171,10 +174,11 @@ def without_identifier(self) -> "BaseRelation": def _render_iterator(self) -> Iterator[Tuple[Optional[ComponentName], Optional[str]]]: for key in ComponentName: + component = ComponentName(key) path_part: Optional[str] = None - if self.include_policy.get_part(key): - path_part = self.path.get_part(key) - if path_part is not None and self.quote_policy.get_part(key): + if self.include_policy.get_part(component): + path_part = self.path.get_part(component) + if path_part is not None and self.quote_policy.get_part(component): path_part = self.quoted(path_part) yield key, path_part @@ -234,7 +238,7 @@ def create_from_node( if quote_policy is None: quote_policy = {} - quote_policy = dbt.utils.merge(config.quoting, quote_policy) + quote_policy = merge(config.quoting, quote_policy) return cls.create( database=node.database, @@ -259,7 +263,7 @@ def create_from( return cls.create_from_source(node, **kwargs) else: # Can't use ManifestNode here because of parameterized generics - if not isinstance(node, (ParsedNode)): + if not isinstance(node, ParsedNode): raise DbtInternalError( f"type mismatch, expected ManifestNode but got {type(node)}" ) @@ -360,15 +364,13 @@ def get_relation_type(cls) -> Type[RelationType]: Info = TypeVar("Info", bound="InformationSchema") -@dataclass(frozen=True, eq=False, repr=False) +@dataclasses.dataclass(frozen=True, eq=False, repr=False) class InformationSchema(BaseRelation): information_schema_view: Optional[str] = None def __post_init__(self): if not isinstance(self.information_schema_view, (type(None), str)): - raise dbt.exceptions.CompilationError( - "Got an invalid name: {}".format(self.information_schema_view) - ) + raise CompilationError("Got an invalid name: {}".format(self.information_schema_view)) @classmethod def get_path(cls, relation: BaseRelation, information_schema_view: Optional[str]) -> Path: diff --git a/core/dbt/adapters/materialization/README.md b/core/dbt/adapters/materialization/README.md new file mode 100644 index 00000000000..66d003ed74d --- /dev/null +++ b/core/dbt/adapters/materialization/README.md @@ -0,0 +1,23 @@ +# Materialization Models + +## MaterializationFactory +Much like `RelationFactory` to `Relation`, this factory represents the way that `Materialization` instances should +be created. It guarantees that the same `RelationFactory`, and hence `Relation` subclasses, are always used. An +instance of this exists on `BaseAdapter`; however this will only need to be adjusted if a custom version of +`Materialization` is used. At the moment, this factory is sparce, with a single method for a single purpose: + +- `make_from_node` + +This method gets runs at the beginning of a materialization and that's about it. There is room for this to grow +as more complicated materializations arise. + +## Materialization +A `Materialization` model is intended to represent a single materialization and all of the information required +to execute that materialization in a database. In many cases it can be confusing to differentiate between a +`Materialization` and a `Relation`. For example, a View materialization implements a View relation in the database. +However, the connection is not always one to one. As another example, both an incremental materialization and +a table materialization implement a table relation in the database. The separation between `Materialization` +and `Relation` is intended to separate the "what" from the "how". `Relation` corresponds to the "what" +and `Materialization` corresponds to the "how". That allows `Relation` to focus on what is needed to, for instance, +create a table in the database; on the other hand, `Materialization` might need to create several `Relation` +objects to accomplish its task. diff --git a/core/dbt/adapters/materialization/__init__.py b/core/dbt/adapters/materialization/__init__.py new file mode 100644 index 00000000000..9da35f9cde6 --- /dev/null +++ b/core/dbt/adapters/materialization/__init__.py @@ -0,0 +1 @@ +from dbt.adapters.materialization.factory import MaterializationFactory diff --git a/core/dbt/adapters/materialization/factory.py b/core/dbt/adapters/materialization/factory.py new file mode 100644 index 00000000000..e54b1e88647 --- /dev/null +++ b/core/dbt/adapters/materialization/factory.py @@ -0,0 +1,74 @@ +from typing import Dict, Optional, Type + +from dbt.adapters.materialization import models +from dbt.adapters.relation import RelationFactory +from dbt.adapters.relation.models import RelationRef +from dbt.contracts.graph.nodes import ParsedNode +from dbt.dataclass_schema import StrEnum +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.materialization.models import ( + Materialization, + MaterializationType, + MaterializedViewMaterialization, +) + + +class MaterializationFactory: + def __init__( + self, + **kwargs, + ): + # the `StrEnum` will generally be `MaterializationType`, however this allows for extending that Enum + self.relation_factory: RelationFactory = kwargs.get("relation_factory", RelationFactory()) + self.materialization_types: Type[StrEnum] = kwargs.get( + "materialization_types", MaterializationType + ) + self.materialization_models: Dict[StrEnum, Type[models.Materialization]] = kwargs.get( + "materialization_models", + { + MaterializationType.MaterializedView: MaterializedViewMaterialization, + }, + ) + + try: + for relation_type in self.materialization_models.keys(): + assert relation_type in self.materialization_types + except AssertionError: + raise DbtRuntimeError( + f"Received models for {relation_type} " + f"but these materialization types are not registered on this factory.\n" + f" registered materialization types: {', '.join(self.materialization_types)}\n" + ) + + def make_from_node( + self, + node: ParsedNode, + existing_relation_ref: Optional[RelationRef] = None, + ) -> models.Materialization: + materialization_type = self._get_materialization_type(node.config.materialized) + materialization = self._get_materialization_model(materialization_type) + return materialization.from_node( + node=node, + relation_factory=self.relation_factory, + existing_relation_ref=existing_relation_ref, + ) + + def _get_materialization_type(self, materialization_type: str) -> StrEnum: + try: + return self.materialization_types(materialization_type) + except ValueError: + raise DbtRuntimeError( + f"This factory does not recognize this materialization type.\n" + f" received: {materialization_type}\n" + f" options: {', '.join(t for t in self.materialization_types)}\n" + ) + + def _get_materialization_model(self, materialization_type: StrEnum) -> Type[Materialization]: + if materialization := self.materialization_models.get(materialization_type): + return materialization + raise DbtRuntimeError( + f"This factory does not have a materialization for this type.\n" + f" received: {materialization_type}\n" + f" options: {', '.join(t for t in self.materialization_models.keys())}\n" + ) diff --git a/core/dbt/adapters/materialization/models/__init__.py b/core/dbt/adapters/materialization/models/__init__.py new file mode 100644 index 00000000000..dbc0abcacad --- /dev/null +++ b/core/dbt/adapters/materialization/models/__init__.py @@ -0,0 +1,6 @@ +from dbt.adapters.materialization.models._materialization import ( + Materialization, + MaterializationBuildStrategy, + MaterializationType, +) +from dbt.adapters.materialization.models._materialized_view import MaterializedViewMaterialization diff --git a/core/dbt/adapters/materialization/models/_materialization.py b/core/dbt/adapters/materialization/models/_materialization.py new file mode 100644 index 00000000000..c8f38227611 --- /dev/null +++ b/core/dbt/adapters/materialization/models/_materialization.py @@ -0,0 +1,132 @@ +from abc import ABC +from dataclasses import dataclass, field +from typing import Any, Dict, Optional + +from dbt.adapters.relation.factory import RelationFactory +from dbt.adapters.relation.models import DescribeRelationResults, Relation, RelationRef +from dbt.contracts.graph.model_config import OnConfigurationChangeOption +from dbt.contracts.graph.nodes import ParsedNode +from dbt.dataclass_schema import StrEnum +from dbt.flags import get_flag_obj +from dbt.utils import filter_null_values + + +class MaterializationType(StrEnum): + """ + This overlaps with `RelationType` for several values (e.g. `View`); however, they are not the same. + For example, a materialization type of `Incremental` would be associated with a relation type of `Table`. + """ + + View = "view" + Table = "table" + Incremental = "incremental" + Seed = "seed" + MaterializedView = "materialized_view" + + +class MaterializationBuildStrategy(StrEnum): + Alter = "alter" + Create = "create" + NoOp = "no_op" + Replace = "replace" + + +@dataclass +class Materialization(ABC): + + type: StrEnum # this will generally be `MaterializationType`, however this allows for extending that Enum + relation_factory: RelationFactory + target_relation: Relation + existing_relation_ref: Optional[RelationRef] = None + is_full_refresh: bool = False + grants: dict = field(default_factory=dict) + on_configuration_change: OnConfigurationChangeOption = OnConfigurationChangeOption.default() + + def __str__(self) -> str: + """ + This gets used in some error messages. + + Returns: + A user-friendly name to be used in logging, error messages, etc. + """ + return str(self.target_relation) + + def existing_relation( + self, describe_relation_results: DescribeRelationResults + ) -> Optional[Relation]: + """ + Produce a full-blown `Relation` instance for `self.existing_relation_ref` using metadata from the database + + Args: + describe_relation_results: the results from the macro `describe_sql(self.existing_relation_ref)` + + Returns: + a `Relation` instance that represents `self.existing_relation_ref` in the database + """ + if self.existing_relation_ref: + relation_type = self.existing_relation_ref.type + return self.relation_factory.make_from_describe_relation_results( + describe_relation_results, relation_type + ) + return None + + @property + def intermediate_relation(self) -> Optional[Relation]: + if self.target_relation: + return self.relation_factory.make_intermediate(self.target_relation) + return None + + @property + def backup_relation_ref(self) -> Optional[RelationRef]: + if self.existing_relation_ref: + return self.relation_factory.make_backup_ref(self.existing_relation_ref) + # don't throw an exception here, that way it behaves like `existing_relation_ref`, which is a property + return None + + @property + def build_strategy(self) -> MaterializationBuildStrategy: + return MaterializationBuildStrategy.NoOp + + @property + def should_revoke_grants(self) -> bool: + """ + This attempts to mimic the macro `should_revoke()` + """ + should_revoke = { + MaterializationBuildStrategy.Alter: True, + MaterializationBuildStrategy.Create: False, + MaterializationBuildStrategy.NoOp: False, + MaterializationBuildStrategy.Replace: True, + } + return should_revoke[self.build_strategy] + + @classmethod + def from_dict(cls, config_dict) -> "Materialization": + return cls(**filter_null_values(config_dict)) + + @classmethod + def from_node( + cls, + node: ParsedNode, + relation_factory: RelationFactory, + existing_relation_ref: Optional[RelationRef] = None, + ) -> "Materialization": + config_dict = cls.parse_node(node, relation_factory, existing_relation_ref) + materialization = cls.from_dict(config_dict) + return materialization + + @classmethod + def parse_node( + cls, + node: ParsedNode, + relation_factory: RelationFactory, + existing_relation_ref: Optional[RelationRef] = None, + ) -> Dict[str, Any]: + return { + "relation_factory": relation_factory, + "target_relation": relation_factory.make_from_node(node), + "is_full_refresh": any({get_flag_obj().FULL_REFRESH, node.config.full_refresh}), + "grants": node.config.grants, + "on_configuration_change": node.config.on_configuration_change, + "existing_relation_ref": existing_relation_ref, + } diff --git a/core/dbt/adapters/materialization/models/_materialized_view.py b/core/dbt/adapters/materialization/models/_materialized_view.py new file mode 100644 index 00000000000..d370afda366 --- /dev/null +++ b/core/dbt/adapters/materialization/models/_materialized_view.py @@ -0,0 +1,49 @@ +from abc import ABC +from dataclasses import dataclass +from typing import Any, Dict, Optional + +from dbt.adapters.relation.factory import RelationFactory +from dbt.adapters.relation.models import RelationRef +from dbt.contracts.graph.nodes import ParsedNode + +from dbt.adapters.materialization.models._materialization import ( + Materialization, + MaterializationBuildStrategy, + MaterializationType, +) + + +@dataclass +class MaterializedViewMaterialization(Materialization, ABC): + """ + This config identifies the minimal materialization parameters required for dbt to function as well + as built-ins that make macros more extensible. Additional parameters may be added by subclassing for your adapter. + """ + + @property + def build_strategy(self) -> MaterializationBuildStrategy: + + # this is a new relation, so just create it + if self.existing_relation_ref is None: + return MaterializationBuildStrategy.Create + + # there is an existing relation, so check if we are going to replace it before determining changes + elif self.is_full_refresh or ( + self.target_relation.type != self.existing_relation_ref.type + ): + return MaterializationBuildStrategy.Replace + + # `target_relation` and `existing_relation` both exist and are the same type, so we need to determine changes + else: + return MaterializationBuildStrategy.Alter + + @classmethod + def parse_node( + cls, + node: ParsedNode, + relation_factory: RelationFactory, + existing_relation_ref: Optional[RelationRef] = None, + ) -> Dict[str, Any]: + config_dict = super().parse_node(node, relation_factory, existing_relation_ref) + config_dict.update({"type": MaterializationType.MaterializedView}) + return config_dict diff --git a/core/dbt/adapters/relation/README.md b/core/dbt/adapters/relation/README.md new file mode 100644 index 00000000000..f1cef172b68 --- /dev/null +++ b/core/dbt/adapters/relation/README.md @@ -0,0 +1,73 @@ +# Relation Models +This package serves as an initial abstraction for managing the inspection of existing relations and determining +changes on those relations. It arose from the materialized view work and is currently only supporting +materialized views for Postgres, Redshift, and BigQuery as well as dynamic tables for Snowflake. There are three main +classes in this package. + +## RelationFactory +This factory is the entrypoint that should be used to consistently create `Relation` objects. An instance of this +factory exists, and is configured, on `BaseAdapter` and its subclasses. Using this ensures that if a materialized view +relation is needed, one is always created using the same subclass of `Relation`. An adapter should take an instance +of this class in the `available` method `BaseAdapter.relation_factory()`. This factory also has some +useful shortcut methods for common operations in jinja: + +- `make_from_node` +- `make_from_describe_relation_results` +- `make_ref` +- `make_backup_ref` +- `make_intermediate` +- `make_changeset` + +In addition to being useful in its own right, this factory also gets passed to `Materialization` classes to +streamline jinja workflows. While the adapter maintainer could call `make_backup_ref` directly, it's more likely +that a process that takes a `Materialization` instance is doing that for them. +See `../materialization/README.md` for more information. + +## Relation +This class holds the primary parsing methods required for marshalling data from a user config or a database metadata +query into a `Relation` subclass. `Relation` is a good class to subclass from for things like tables, views, etc. +The expectation is that a `Relation` is something that gets used with a `Materialization`. The intention is to +have some default implementations as built-ins for basic use/prototyping. So far there is only one. + +### MaterializedViewRelation +This class is a basic materialized view that only has enough attribution to create and drop a materialized views. +There is no change management. However, as long as the required jinja templates are provided, this should just work. + +## RelationComponent +This class is a boiled down version of `Relation` that still has some parsing functionality. `RelationComponent` +is a good class to subclass from for things like a Postgres index, a Redshift sortkey, a Snowflake target_lag, etc. +A `RelationComponent` should always be an attribute of a `Relation` or another `RelationComponent`. There are a +few built-ins that will likely be used in every `Relation`. + +### Schema +This represents a database schema. It's very basic, and generally the only reason to subclass from it is to +apply some type of validation rule (e.g. the name can only be so long). + +### Database +This represents a database. Like `Schema`, it's very basic, and generally the only reason to subclass from it is to +apply some type of validation rule (e.g. the name can only be so long). + +## RelationRef +- `RelationRef` +- `SchemaRef` +- `DatabaseRef` + +This collection of objects serves as a bare bones reference to a database object that can be used for small tasks, +e.g. `DROP`, `RENAME`. It really serves as a bridge between relation types that are build on this framework +and relation types that still reside on the existing framework. A materialized view will need to be able to +reference a table object that is sitting in the way and rename/drop it. Additionally, this provides a way to +reference an existing materialized view without querying the database to get all of the metadata. This step +is put off as late as possible to improve performance. + +## RelationChange +This class holds the methods required for detecting and acting on changes on a `Relation`. All changes +should subclass from `RelationChange`. A `RelationChange` can be thought of as being analogous +to a web request on a `Relation`. You need to know what you're doing +(`action`: 'create' = GET, 'drop' = DELETE, etc.) and the information (`context`) needed to make the change. +In our scenarios, `context` tends to be either an instance of `RelationComponent` corresponding to the new state +or a single value if the change is simple. For example, creating an `index` would require the entire config; +whereas updating a setting like `autorefresh` for Redshift would require only the setting. + +## RelationChangeset +This class is effectively a bin for collecting instances of `RelationChange`. It comes with a few helper +methods that facilitate rolling up concepts like `require_full_refresh` to the aggregate level. diff --git a/core/dbt/adapters/relation/__init__.py b/core/dbt/adapters/relation/__init__.py new file mode 100644 index 00000000000..e23bbd579a8 --- /dev/null +++ b/core/dbt/adapters/relation/__init__.py @@ -0,0 +1 @@ +from dbt.adapters.relation.factory import RelationFactory diff --git a/core/dbt/adapters/relation/factory.py b/core/dbt/adapters/relation/factory.py new file mode 100644 index 00000000000..cb93f9ef9a6 --- /dev/null +++ b/core/dbt/adapters/relation/factory.py @@ -0,0 +1,176 @@ +from dataclasses import replace +from typing import Dict, FrozenSet, Type + +from dbt.contracts.graph.nodes import ParsedNode +from dbt.contracts.relation import ComponentName, RelationType +from dbt.dataclass_schema import StrEnum +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.relation.models import ( + DescribeRelationResults, + MaterializedViewRelation, + MaterializedViewRelationChangeset, + Relation, + RelationChangeset, + RelationRef, + RenderPolicy, +) + + +class RelationFactory: + """ + Unlike other classes that get used by adapters, this class is not intended to be subclassed. Instead, + an instance should be taken from it that takes in all the required configuration (or defaults to + what is here). + """ + + # this configuration should never change + BACKUP_SUFFIX: str = "__dbt_backup" + INTERMEDIATE_SUFFIX: str = "__dbt_tmp" + + def __init__(self, **kwargs): + # the `StrEnum` class will generally be `RelationType`, however this allows for extending that Enum + self.relation_types: Type[StrEnum] = kwargs.get("relation_types", RelationType) + self.relation_models: Dict[StrEnum, Type[Relation]] = kwargs.get( + "relation_models", + { + RelationType.MaterializedView: MaterializedViewRelation, + }, + ) + self.relation_changesets: Dict[StrEnum, Type[RelationChangeset]] = kwargs.get( + "relation_changesets", + { + RelationType.MaterializedView: MaterializedViewRelationChangeset, + }, + ) + self.relation_can_be_renamed: FrozenSet[StrEnum] = kwargs.get( + "relation_can_be_renamed", {frozenset()} + ) + self.render_policy: RenderPolicy = kwargs.get("render_policy", RenderPolicy()) + + try: + for relation_type in self.relation_models.keys(): + assert relation_type in self.relation_types + except AssertionError: + raise DbtRuntimeError( + f"Received models for {relation_type} " + f"but these relation types are not registered on this factory.\n" + f" registered relation types: {', '.join(self.relation_types)}\n" + ) + + try: + for relation_type in self.relation_changesets.keys(): + assert relation_type in self.relation_types + except AssertionError: + raise DbtRuntimeError( + f"Received changeset for {relation_type}" + f"but this relation type is not registered on this factory.\n" + f" registered relation types: {', '.join(self.relation_types)}\n" + ) + + def make_from_node(self, node: ParsedNode) -> Relation: + relation_type = self.relation_types(node.config.materialized) + parser = self._get_relation_model(relation_type) + relation = parser.from_node(node) + assert isinstance(relation, Relation) # mypy + return relation + + def make_from_describe_relation_results( + self, + describe_relation_results: DescribeRelationResults, + relation_type: str, + ) -> Relation: + model = self._get_relation_model(self.relation_types(relation_type)) + relation = model.from_describe_relation_results(describe_relation_results) + assert isinstance(relation, Relation) # mypy + return relation + + def make_ref( + self, + name: str, + schema_name: str, + database_name: str, + relation_type: str, + ) -> RelationRef: + relation_type = self._get_relation_type(relation_type) + relation_ref = RelationRef.from_dict( + { + "name": name, + "schema": { + "name": schema_name, + "database": { + "name": database_name, + }, + }, + "render": self.render_policy, + "type": relation_type, + "can_be_renamed": relation_type in self.relation_can_be_renamed, + } + ) + return relation_ref + + def make_backup_ref(self, existing_relation: Relation) -> RelationRef: + if existing_relation.can_be_renamed: + backup_name = self.render_policy.part( + ComponentName.Identifier, f"{existing_relation.name}{self.BACKUP_SUFFIX}" + ) + assert isinstance( + backup_name, str + ) # since `part` can return None in certain scenarios (not this one) + return self.make_ref( + name=backup_name, + schema_name=existing_relation.schema_name, + database_name=existing_relation.database_name, + relation_type=existing_relation.type, + ) + raise DbtRuntimeError( + f"This relation cannot be renamed, hence it cannot be backed up: \n" + f" path: {existing_relation.fully_qualified_path}\n" + f" type: {existing_relation.type}\n" + ) + + def make_intermediate(self, target_relation: Relation) -> Relation: + if target_relation.can_be_renamed: + intermediate_name = self.render_policy.part( + ComponentName.Identifier, f"{target_relation.name}{self.INTERMEDIATE_SUFFIX}" + ) + return replace(target_relation, name=intermediate_name) + raise DbtRuntimeError( + f"This relation cannot be renamed, hence it cannot be staged: \n" + f" path: {target_relation.fully_qualified_path}\n" + f" type: {target_relation.type}\n" + ) + + def make_changeset( + self, existing_relation: Relation, target_relation: Relation + ) -> RelationChangeset: + changeset = self._get_relation_changeset(existing_relation.type) + return changeset.from_relations(existing_relation, target_relation) + + def _get_relation_type(self, relation_type: str) -> StrEnum: + try: + return self.relation_types(relation_type) + except ValueError: + raise DbtRuntimeError( + f"This factory does not recognize this relation type.\n" + f" received: {relation_type}\n" + f" options: {', '.join(t for t in self.relation_types)}\n" + ) + + def _get_relation_model(self, relation_type: StrEnum) -> Type[Relation]: + if relation := self.relation_models.get(relation_type): + return relation + raise DbtRuntimeError( + f"This factory does not have a relation model for this type.\n" + f" received: {relation_type}\n" + f" options: {', '.join(t for t in self.relation_models.keys())}\n" + ) + + def _get_relation_changeset(self, relation_type: StrEnum) -> Type[RelationChangeset]: + if relation_changeset := self.relation_changesets.get(relation_type): + return relation_changeset + raise DbtRuntimeError( + f"This factory does not have a relation changeset for this type.\n" + f" received: {relation_type}\n" + f" options: {', '.join(t for t in self.relation_changesets.keys())}\n" + ) diff --git a/core/dbt/adapters/relation/models/__init__.py b/core/dbt/adapters/relation/models/__init__.py new file mode 100644 index 00000000000..0f2fa02c5c1 --- /dev/null +++ b/core/dbt/adapters/relation/models/__init__.py @@ -0,0 +1,22 @@ +from dbt.adapters.relation.models._change import ( + RelationChange, + RelationChangeAction, + RelationChangeset, +) +from dbt.adapters.relation.models._database import DatabaseRelation +from dbt.adapters.relation.models._materialized_view import ( + MaterializedViewRelation, + MaterializedViewRelationChangeset, +) +from dbt.adapters.relation.models._policy import IncludePolicy, QuotePolicy, RenderPolicy +from dbt.adapters.relation.models._relation import Relation +from dbt.adapters.relation.models._relation_component import ( + DescribeRelationResults, + RelationComponent, +) +from dbt.adapters.relation.models._relation_ref import ( + DatabaseRelationRef, + RelationRef, + SchemaRelationRef, +) +from dbt.adapters.relation.models._schema import SchemaRelation diff --git a/core/dbt/adapters/relation/models/_change.py b/core/dbt/adapters/relation/models/_change.py new file mode 100644 index 00000000000..3a25fc7e618 --- /dev/null +++ b/core/dbt/adapters/relation/models/_change.py @@ -0,0 +1,118 @@ +from abc import ABC, abstractmethod +from copy import deepcopy +from dataclasses import dataclass +from typing import Any, Dict, Hashable + +from dbt.dataclass_schema import StrEnum +from dbt.exceptions import DbtRuntimeError +from dbt.utils import filter_null_values + +from dbt.adapters.relation.models._relation import Relation + + +class RelationChangeAction(StrEnum): + alter = "alter" + create = "create" + drop = "drop" + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class RelationChange(ABC): + """ + Changes are generally "alter the thing in place" or "drop the old one in favor of the new one". In other words, + you will either wind up with a single `alter` or a pair of `drop` and `create`. In the `alter` scenario, + `context` tends to be a single value, like a setting. In the `drop` and `create` scenario, + `context` tends to be the whole object, in particular for `create`. + """ + + action: StrEnum # this will generally be `RelationChangeAction`, however this allows for extending that Enum + context: Hashable # this is usually a RelationConfig, e.g. `IndexConfig`, or single value, e.g. `str` + + @property + @abstractmethod + def requires_full_refresh(self) -> bool: + """ + Indicates if this change can be performed via alter logic (hence `False`), or will require a full refresh + (hence `True`). While this is generally determined by the type of change being made, which could be a + static property, this is purposely being left as a dynamic property to allow for edge cases. + + Returns: + `True` if the change requires a full refresh, `False` if the change can be applied to the object + """ + raise NotImplementedError( + "Configuration change management has not been fully configured for this adapter and/or relation type." + ) + + +@dataclass +class RelationChangeset(ABC): + existing_relation: Relation + target_relation: Relation + _requires_full_refresh_override: bool = False + + def __post_init__(self): + if self.is_empty and self.existing_relation != self.target_relation: + # we need to force a full refresh if we didn't detect any changes but the objects are not the same + self.force_full_refresh() + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "RelationChangeset": + kwargs_dict = deepcopy(config_dict) + try: + return cls(**filter_null_values(kwargs_dict)) + except TypeError: + raise DbtRuntimeError(f"Unexpected configuration received:\n" f" {config_dict}\n") + + @classmethod + def from_relations( + cls, existing_relation: Relation, target_relation: Relation + ) -> "RelationChangeset": + kwargs_dict = cls.parse_relations(existing_relation, target_relation) + + # stuff the relations in so that we can do the post init to figure out if we need a full refresh + kwargs_dict.update( + { + "existing_relation": existing_relation, + "target_relation": target_relation, + } + ) + return cls.from_dict(kwargs_dict) + + @classmethod + @abstractmethod + def parse_relations( + cls, existing_relation: Relation, target_relation: Relation + ) -> Dict[str, Any]: + raise NotImplementedError( + "Configuration change management has not been fully configured for this adapter and/or relation type." + ) + + @property + def requires_full_refresh(self) -> bool: + """ + This should be a calculation based on the changes that you stack on this class. + Remember to call `super().requires_full_refresh()` in your conditions, or at least reference + `self._requires_full_refresh_override` + + Returns: + `True` if any change requires a full refresh or if the override has been triggered + `False` if all changes can be made without requiring a full refresh + """ + return self._requires_full_refresh_override + + @property + def is_empty(self) -> bool: + """ + Indicates if there are any changes in this changeset. + + Returns: + `True` if there is any change or if the override has been triggered + `False` if there are no changes + """ + return not self._requires_full_refresh_override + + def force_full_refresh(self): + """ + Activates the full refresh override. + """ + self._requires_full_refresh_override = True diff --git a/core/dbt/adapters/relation/models/_database.py b/core/dbt/adapters/relation/models/_database.py new file mode 100644 index 00000000000..2cbd8722ec4 --- /dev/null +++ b/core/dbt/adapters/relation/models/_database.py @@ -0,0 +1,84 @@ +from dataclasses import dataclass +from typing import Any, Dict, Optional + +from dbt.contracts.graph.nodes import ParsedNode +from dbt.contracts.relation import ComponentName + +from dbt.adapters.relation.models._relation_component import ( + DescribeRelationResults, + RelationComponent, +) + + +@dataclass(frozen=True) +class DatabaseRelation(RelationComponent): + """ + This config identifies the minimal materialization parameters required for dbt to function as well + as built-ins that make macros more extensible. Additional parameters may be added by subclassing for your adapter. + """ + + name: str + + def __str__(self) -> str: + return self.fully_qualified_path or "" + + @property + def fully_qualified_path(self) -> Optional[str]: + return self.render.part(ComponentName.Database, self.name) + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "DatabaseRelation": + """ + Parse `config_dict` into a `DatabaseRelation` instance, applying defaults + """ + database = super().from_dict(config_dict) + assert isinstance(database, DatabaseRelation) + return database + + @classmethod + def parse_node(cls, node: ParsedNode) -> Dict[str, Any]: + """ + Parse `ModelNode` into a dict representation of a `DatabaseRelation` instance + + This is generally used indirectly by calling `from_model_node()`, but there are times when the dict + version is more useful + + Args: + node: the `model` attribute in the global jinja context + + Example `model_node`: + + ModelNode({ + "database": "my_database", + ..., + }) + + Returns: a `DatabaseRelation` instance as a dict, can be passed into `from_dict` + """ + return {"name": node.database} + + @classmethod + def parse_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> Dict[str, Any]: + """ + Parse database metadata into a dict representation of a `DatabaseRelation` instance + + This is generally used indirectly by calling `from_describe_relation_results()`, + but there are times when the dict version is more appropriate. + + Args: + describe_relation_results: the results of a set of queries that fully describe an instance of this class + + Example of `describe_relation_results`: + + agate.Row({ + "database_name": "my_database", + }) + + Returns: a `DatabaseRelation` instance as a dict, can be passed into `from_dict` + """ + relation = cls._parse_single_record_from_describe_relation_results( + describe_relation_results, "relation" + ) + return {"name": relation["database_name"]} diff --git a/core/dbt/adapters/relation/models/_materialized_view.py b/core/dbt/adapters/relation/models/_materialized_view.py new file mode 100644 index 00000000000..2a7e7be5cea --- /dev/null +++ b/core/dbt/adapters/relation/models/_materialized_view.py @@ -0,0 +1,66 @@ +from dataclasses import dataclass, field +from typing import Any, Dict + +from dbt.contracts.relation import RelationType +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.relation.models._change import RelationChangeset +from dbt.adapters.relation.models._policy import RenderPolicy +from dbt.adapters.relation.models._relation import Relation +from dbt.adapters.relation.models._schema import SchemaRelation + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class MaterializedViewRelation(Relation): + """ + This config serves as a default implementation for materialized views. It's bare bones and only + supports the minimal attribution and drop/create (no alter). This may suffice for your needs. + However, if your adapter requires more attribution, it's recommended to subclass directly + from `Relation` and bypass this default; don't subclass from this. + + *Note:* Even if you use this, you'll still need to provide query templates for the macros + found in `include/global_project/macros/relations/atomic/*.sql` as there is no way to predict + that target database platform's data structure. + + The following parameters are configurable by dbt: + - name: name of the materialized view + - schema: schema that contains the materialized view + - query: the query that defines the view + """ + + # attribution + name: str + schema: SchemaRelation + query: str = field(hash=False, compare=False) + + # configuration + type = RelationType.MaterializedView + render = RenderPolicy() + SchemaParser = SchemaRelation + can_be_renamed = False + + +@dataclass +class MaterializedViewRelationChangeset(RelationChangeset): + @classmethod + def parse_relations( + cls, existing_relation: Relation, target_relation: Relation + ) -> Dict[str, Any]: + try: + assert existing_relation.type == RelationType.MaterializedView + assert target_relation.type == RelationType.MaterializedView + except AssertionError: + raise DbtRuntimeError( + f"Two materialized view relations were expected, but received:\n" + f" existing: {existing_relation}\n" + f" new: {target_relation}\n" + ) + return {} + + @property + def requires_full_refresh(self) -> bool: + return True + + @property + def is_empty(self) -> bool: + return False diff --git a/core/dbt/adapters/relation/models/_policy.py b/core/dbt/adapters/relation/models/_policy.py new file mode 100644 index 00000000000..6e673657903 --- /dev/null +++ b/core/dbt/adapters/relation/models/_policy.py @@ -0,0 +1,72 @@ +from abc import ABC +from dataclasses import dataclass +from typing import Optional, OrderedDict + +from dbt.contracts.relation import Policy, ComponentName + + +@dataclass +class IncludePolicy(Policy, ABC): + pass + + +@dataclass +class QuotePolicy(Policy, ABC): + pass + + +class RenderPolicy: + def __init__( + self, + quote_policy: QuotePolicy = QuotePolicy(), + include_policy: IncludePolicy = IncludePolicy(), + quote_character: str = '"', + delimiter: str = ".", + ): + self.quote_policy = quote_policy + self.include_policy = include_policy + self.quote_character = quote_character + self.delimiter = delimiter + + def part(self, component: ComponentName, value: str) -> Optional[str]: + """ + Apply the include and quote policy to the value so that it may be rendered in a template. + + Args: + component: the component to be referenced in `IncludePolicy` and `QuotePolicy` + value: the value to be rendered + + Returns: + a policy-compliant value + """ + # this is primarily done to make it easy to create the backup and intermediate names, e.g. + # name = "my_view", backup_name = "my_view"__dbt_backup, rendered_name = "my_view__dbt_backup" + unquoted_value = value.replace(self.quote_character, "") + + # if it should be included and quoted, then wrap it in quotes as-is + if self.include_policy.get_part(component) and self.quote_policy.get_part(component): + rendered_value = f"{self.quote_character}{unquoted_value}{self.quote_character}" + + # if it should be included without quotes, then apply `lower()` to make it case-insensitive + elif self.include_policy.get_part(component): + rendered_value = unquoted_value.lower() + + # if it should not be included, return `None`, so it gets excluded in `render` + else: + rendered_value = None + + return rendered_value + + def full(self, parts: OrderedDict[ComponentName, str]) -> Optional[str]: + """ + Apply `Render.part` to each part and then concatenate in order. + + Args: + parts: an ordered dictionary mapping ComponentName to value + + Returns: + a fully rendered path ready for a jinja template + """ + rendered_parts = [self.part(*part) for part in parts.items()] + rendered_path = self.delimiter.join(part for part in rendered_parts if part is not None) + return rendered_path diff --git a/core/dbt/adapters/relation/models/_relation.py b/core/dbt/adapters/relation/models/_relation.py new file mode 100644 index 00000000000..b7d7611b2d9 --- /dev/null +++ b/core/dbt/adapters/relation/models/_relation.py @@ -0,0 +1,165 @@ +from abc import ABC +from collections import OrderedDict +from dataclasses import dataclass, field +from typing import Any, Dict, Optional, Type + +from dbt.contracts.graph.nodes import ParsedNode, CompiledNode +from dbt.contracts.relation import ComponentName +from dbt.dataclass_schema import StrEnum + +from dbt.adapters.relation.models._relation_component import ( + DescribeRelationResults, + RelationComponent, +) +from dbt.adapters.relation.models._schema import SchemaRelation + + +@dataclass(frozen=True) +class Relation(RelationComponent, ABC): + """ + This config identifies the minimal materialization parameters required for dbt to function as well + as built-ins that make macros more extensible. Additional parameters may be added by subclassing for your adapter. + """ + + # attribution + name: str + schema: SchemaRelation + query: str + + # configuration + type: StrEnum # this will generally be `RelationType`, however this allows for extending that Enum + can_be_renamed: bool + SchemaParser: Type[SchemaRelation] = field(default=SchemaRelation, init=False) + + def __str__(self) -> str: + return self.fully_qualified_path or "" + + @property + def fully_qualified_path(self) -> Optional[str]: + return self.render.full( + OrderedDict( + { + ComponentName.Database: self.database_name, + ComponentName.Schema: self.schema_name, + ComponentName.Identifier: self.name, + } + ) + ) + + @property + def schema_name(self) -> str: + return self.schema.name + + @property + def database_name(self) -> str: + return self.schema.database_name + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "Relation": + """ + Parse `config_dict` into a `MaterializationViewRelation` instance, applying defaults + """ + # default configuration + kwargs_dict: Dict[str, Any] = { + "type": cls.type, + "can_be_renamed": cls.can_be_renamed, + } + + kwargs_dict.update(config_dict) + + if schema := config_dict.get("schema"): + kwargs_dict.update({"schema": cls.SchemaParser.from_dict(schema)}) + + relation = super().from_dict(kwargs_dict) + assert isinstance(relation, Relation) + return relation + + @classmethod + def from_node(cls, node: ParsedNode) -> "Relation": + # tighten the return type + relation = super().from_node(node) + assert isinstance(relation, Relation) + return relation + + @classmethod + def parse_node(cls, node: ParsedNode) -> Dict[str, Any]: + """ + Parse `CompiledNode` into a dict representation of a `Relation` instance + + This is generally used indirectly by calling `from_node()`, but there are times when the dict + version is more useful + + Args: + node: the `model` attribute in the global jinja context + + Example `node`: + + NodeConfig({ + "compiled_code": "create view my_view as\n select * from my_table;\n", + "database": "my_database", + "identifier": "my_view", + "schema": "my_schema", + ..., + }) + + Returns: a `Relation` instance as a dict, can be passed into `from_dict` + """ + # we need a `CompiledNode` here instead of just `ParsedNodeMandatory` because we need access to `query` + config_dict = { + "name": node.identifier, + "schema": cls.SchemaParser.parse_node(node), + "query": cls._parse_query_from_node(node), + } + return config_dict + + @classmethod + def from_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> "Relation": + # tighten the return type + relation = super().from_describe_relation_results(describe_relation_results) + assert isinstance(relation, Relation) + return relation + + @classmethod + def parse_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> Dict[str, Any]: + """ + Parse database metadata into a dict representation of a `Relation` instance + + This is generally used indirectly by calling `from_describe_relation_results()`, + but there are times when the dict version is more appropriate. + + Args: + describe_relation_results: the results of a set of queries that fully describe an instance of this class + + Example of `describe_relation_results`: + + { + "relation": agate.Table(agate.Row({ + "table_name": "my_materialized_view", + "query": "create materialized view my_materialized_view as select * from my_table;", + })), + } + + Returns: a `Relation` instance as a dict, can be passed into `from_dict` + """ + relation = cls._parse_single_record_from_describe_relation_results( + describe_relation_results, "relation" + ) + config_dict = { + "name": relation["name"], + "schema": cls.SchemaParser.parse_describe_relation_results(relation), + "query": relation["query"].strip(), + } + return config_dict + + @classmethod + def _parse_query_from_node(cls, node: ParsedNode) -> str: + try: + assert isinstance(node, CompiledNode) + query = node.compiled_code or "" + return query.strip() + except AssertionError: + return "" diff --git a/core/dbt/adapters/relation/models/_relation_component.py b/core/dbt/adapters/relation/models/_relation_component.py new file mode 100644 index 00000000000..5908794133d --- /dev/null +++ b/core/dbt/adapters/relation/models/_relation_component.py @@ -0,0 +1,181 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Dict, Union + +import agate + +from dbt.contracts.graph.nodes import ParsedNode +from dbt.exceptions import DbtRuntimeError +from dbt.utils import filter_null_values + +from dbt.adapters.relation.models._policy import RenderPolicy + +""" +`Relation` metadata from the database comes in the form of a collection of one or more `agate.Table`s. In order to +reference the tables, they are added to a dictionary. There can be more than one table because there can be +multiple grains of data for a single object. For example, a materialized view in Postgres has base level information, +like name. But it also can have multiple indexes, which needs to be a separate query. The metadata for +a materialized view might look like this: + +{ + "materialized_view": agate.Table( + agate.Row({"table_name": "table_abc", "query": "select * from table_def"}) + ), + "indexes": agate.Table("rows": [ + agate.Row({"name": "index_a", "columns": ["column_a"], "type": "hash", "unique": False}), + agate.Row({"name": "index_b", "columns": ["time_dim_a"], "type": "btree", "unique": False}), + ]), +} + +whereas the metadata that gets used to create an index (`RelationComponent`) may look like this: + +agate.Row({"name": "index_a", "columns": ["column_a"], "type": "hash", "unique": False}) + +Generally speaking, `Relation` instances (e.g. materialized view) will be described with +an `agate.Table` and `RelationComponent` instances (e.g. index) will be described with an `agate.Row`. +This happens simply because the `Relation` instance is the first step in processing the metadata, but the +`RelationComponent` instance can be looped when dispatching to it in `parse_describe_relation_results()`. +""" +DescribeRelationResults = Union[Dict[str, agate.Table], agate.Row] + + +@dataclass(frozen=True) +class RelationComponent(ABC): + """ + This config identifies the minimal relation parameters required for dbt to function as well + as built-ins that make macros more extensible. Additional parameters may be added by subclassing for your adapter. + """ + + # configuration + render: RenderPolicy = field(compare=False) + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "RelationComponent": + """ + This assumes the subclass of `Relation` is flat, in the sense that no attribute is + itself another subclass of `Relation`. If that's not the case, this should be overriden + to manually manage that complexity. But remember to either call `super().from_dict()` at the end, + or at least use `filter_null_values()` so that defaults get applied properly for the dataclass. + + Args: + config_dict: the dict representation of this instance + + Returns: the `Relation` representation associated with the provided dict + """ + # default configuration + kwargs_dict = {"render": getattr(cls, "render", RenderPolicy())} + + kwargs_dict.update(config_dict) + + try: + relation_component = cls(**filter_null_values(kwargs_dict)) + except TypeError: + raise DbtRuntimeError(f"Unexpected configuration received:\n" f" {config_dict}\n") + return relation_component + + @classmethod + def from_node(cls, node: ParsedNode) -> "RelationComponent": + """ + A wrapper around `parse_node()` and `from_dict()` that pipes the results of the first into + the second. This shouldn't really need to be overridden; instead, the component methods should be overridden. + + Args: + node: the `config.model` attribute in the global jinja context + + Returns: + a validated `Relation` instance specific to the adapter and relation type + """ + relation_config = cls.parse_node(node) + relation = cls.from_dict(relation_config) + return relation + + @classmethod + @abstractmethod + def parse_node(cls, node: ParsedNode) -> Dict[str, Any]: + """ + Parse `ParsedNodeMandatory` into a dict representation of a `Relation` instance + + In many cases this may be a one-to-one mapping; e.g. dbt calls it "schema" and the database calls it + "schema_name". In some cases it could require a calculation or dispatch to a lower grain object. + + See `dbt/adapters/postgres/relation/index.py` to see an example implementation. + + Args: + node: the `model` attribute in the global jinja context + + Returns: + a non-validated dictionary version of a `Relation` instance specific to the adapter and relation type + """ + raise NotImplementedError( + "`parse_node_config()` needs to be implemented for this relation." + ) + + @classmethod + def from_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> "RelationComponent": + """ + A wrapper around `parse_describe_relation_results()` and `from_dict()` that pipes the results of the + first into the second. This shouldn't really need to be overridden; instead, the component methods should + be overridden. + + Args: + describe_relation_results: the results of one or more queries run against the database to gather the + requisite metadata to describe this relation + + Returns: + a validated `Relation` instance specific to the adapter and relation type + """ + config_dict = cls.parse_describe_relation_results(describe_relation_results) + relation = cls.from_dict(config_dict) + return relation + + @classmethod + @abstractmethod + def parse_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> Dict[str, Any]: + """ + The purpose of this method is to parse the database parlance for `Relation.from_dict` consumption. + + This tends to be one-to-one except for combining grains of data. For example, a single table + could have multiple indexes which would result in multiple queries to the database to build one + `TableRelation` object. All of these pieces get knit together here. + + See `dbt/adapters/postgres/relation_config/materialized_view.py` to see an example implementation. + + Args: + describe_relation_results: the results of one or more queries run against the database to gather the + requisite metadata to describe this relation + + Returns: + a non-validated dictionary version of a `Relation` instance specific to the adapter and relation type + """ + raise NotImplementedError( + "`parse_describe_relation_results()` needs to be implemented for this relation." + ) + + @classmethod + def _parse_single_record_from_describe_relation_results( + cls, + describe_relation_results: DescribeRelationResults, + table: str, + ) -> agate.Row: + try: + assert isinstance(describe_relation_results, agate.Row) + return describe_relation_results + except AssertionError: + try: + assert isinstance(describe_relation_results, Dict) + describe_relation_results_table = describe_relation_results.get(table) + assert isinstance(describe_relation_results_table, agate.Table) + assert describe_relation_results_table is not None + assert len(describe_relation_results_table) == 1 + return describe_relation_results_table.rows[0] + except AssertionError: + raise DbtRuntimeError( + f"This method expects either an `agate.Row` instance or a `Dict[str, agate.Table]` instance " + f"where {table} is in the keys and the `agate.Table` has exactly one row." + f"one row. Received:\n" + f" {describe_relation_results}" + ) diff --git a/core/dbt/adapters/relation/models/_relation_ref.py b/core/dbt/adapters/relation/models/_relation_ref.py new file mode 100644 index 00000000000..8d8e874a556 --- /dev/null +++ b/core/dbt/adapters/relation/models/_relation_ref.py @@ -0,0 +1,106 @@ +""" +This module provides a way to store only the required metadata for a `Relation` without any parsers or actual +relation_type-specific subclasses. It's primarily used to represent a relation that exists in the database +without needing to query the database. This is useful with low attribution macros (e.g. `drop_sql`, `rename_sql`) +where the details are not needed to perform the action. It should be the case that if a macro supports execution +with a `RelationRef` instance, then it should also support execution with a `Relation` instance. The converse +is not true (e.g. `create_sql`). +""" +from copy import deepcopy +from dataclasses import dataclass +from typing import Any, Dict + +from dbt.contracts.graph.nodes import ParsedNode + +from dbt.adapters.relation.models._database import DatabaseRelation +from dbt.adapters.relation.models._policy import RenderPolicy +from dbt.adapters.relation.models._relation import Relation +from dbt.adapters.relation.models._relation_component import DescribeRelationResults +from dbt.adapters.relation.models._schema import SchemaRelation + + +@dataclass(frozen=True) +class DatabaseRelationRef(DatabaseRelation): + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "DatabaseRelationRef": + database_ref = cls( + **{ + "name": config_dict["name"], + "render": config_dict["render"], + } + ) + assert isinstance(database_ref, DatabaseRelationRef) + return database_ref + + @classmethod + def parse_node(cls, node: ParsedNode) -> Dict[str, Any]: + return {} + + @classmethod + def parse_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> Dict[str, Any]: + return {} + + +@dataclass(frozen=True) +class SchemaRelationRef(SchemaRelation): + render: RenderPolicy + DatabaseParser = DatabaseRelationRef + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "SchemaRelationRef": + database_dict = deepcopy(config_dict["database"]) + database_dict.update({"render": config_dict["render"]}) + schema_ref = cls( + **{ + "name": config_dict["name"], + "database": DatabaseRelationRef.from_dict(database_dict), + "render": config_dict["render"], + } + ) + assert isinstance(schema_ref, SchemaRelationRef) + return schema_ref + + @classmethod + def parse_node(cls, node: ParsedNode) -> Dict[str, Any]: + return {} + + @classmethod + def parse_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> Dict[str, Any]: + return {} + + +@dataclass(frozen=True) +class RelationRef(Relation): + can_be_renamed: bool + SchemaParser = SchemaRelationRef + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "RelationRef": + schema_dict = deepcopy(config_dict["schema"]) + schema_dict.update({"render": config_dict["render"]}) + relation_ref = cls( + **{ + "name": config_dict["name"], + "schema": SchemaRelationRef.from_dict(schema_dict), + "query": "", + "render": config_dict["render"], + "type": config_dict["type"], + "can_be_renamed": config_dict["can_be_renamed"], + } + ) + assert isinstance(relation_ref, RelationRef) + return relation_ref + + @classmethod + def parse_node(cls, node: ParsedNode) -> Dict[str, Any]: + return {} + + @classmethod + def parse_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> Dict[str, Any]: + return {} diff --git a/core/dbt/adapters/relation/models/_schema.py b/core/dbt/adapters/relation/models/_schema.py new file mode 100644 index 00000000000..564383f8437 --- /dev/null +++ b/core/dbt/adapters/relation/models/_schema.py @@ -0,0 +1,119 @@ +from collections import OrderedDict +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Any, Dict, Type, Optional + +from dbt.contracts.graph.nodes import ParsedNode +from dbt.contracts.relation import ComponentName + +from dbt.adapters.relation.models._relation_component import ( + DescribeRelationResults, + RelationComponent, +) +from dbt.adapters.relation.models._database import DatabaseRelation + + +@dataclass(frozen=True) +class SchemaRelation(RelationComponent): + """ + This config identifies the minimal materialization parameters required for dbt to function as well + as built-ins that make macros more extensible. Additional parameters may be added by subclassing for your adapter. + """ + + name: str + database: DatabaseRelation + + # configuration of base class + DatabaseParser: Type[DatabaseRelation] = field(default=DatabaseRelation, init=False) + + def __str__(self) -> str: + return self.fully_qualified_path or "" + + @property + def fully_qualified_path(self) -> Optional[str]: + return self.render.full( + OrderedDict( + { + ComponentName.Database: self.database_name, + ComponentName.Schema: self.name, + } + ) + ) + + @property + def database_name(self) -> str: + return self.database.name + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "SchemaRelation": + """ + Parse `config_dict` into a `SchemaRelation` instance, applying defaults + """ + kwargs_dict = deepcopy(config_dict) + + if database := config_dict.get("database"): + kwargs_dict.update({"database": cls.DatabaseParser.from_dict(database)}) + + schema = super().from_dict(kwargs_dict) + assert isinstance(schema, SchemaRelation) + return schema + + @classmethod + def parse_node(cls, node: ParsedNode) -> Dict[str, Any]: + """ + Parse `ParsedMandatoryNode` into a dict representation of a `SchemaRelation` instance + + This is generally used indirectly by calling `from_model_node()`, but there are times when the dict + version is more useful + + Args: + node: the `model` attribute in the global jinja context + + Example `model_node`: + + ModelNode({ + "database": "my_database", + "schema": "my_schema", + ..., + }) + + Returns: a `SchemaRelation` instance as a dict, can be passed into `from_dict` + """ + config_dict = { + "name": node.schema, + "database": cls.DatabaseParser.parse_node(node), + } + return config_dict + + @classmethod + def parse_describe_relation_results( + cls, describe_relation_results: DescribeRelationResults + ) -> Dict[str, Any]: + """ + Parse database metadata into a dict representation of a `SchemaRelation` instance + + This is generally used indirectly by calling `from_describe_relation_results()`, + but there are times when the dict version is more appropriate. + + Args: + describe_relation_results: the results of a set of queries that fully describe an instance of this class + + Example of `describe_relation_results`: + + agate.Row({ + "schema_name": "my_schema", + "database_name": "my_database", + }) + + Returns: a `SchemaRelation` instance as a dict, can be passed into `from_dict` + """ + relation = cls._parse_single_record_from_describe_relation_results( + describe_relation_results, "relation" + ) + config_dict = { + "name": relation["schema_name"], + "database": cls.DatabaseParser.parse_describe_relation_results( + describe_relation_results + ), + } + return config_dict diff --git a/core/dbt/adapters/relation_configs/README.md b/core/dbt/adapters/relation_configs/README.md deleted file mode 100644 index 6be3bc59d12..00000000000 --- a/core/dbt/adapters/relation_configs/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# RelationConfig -This package serves as an initial abstraction for managing the inspection of existing relations and determining -changes on those relations. It arose from the materialized view work and is currently only supporting -materialized views for Postgres and Redshift as well as dynamic tables for Snowflake. There are three main -classes in this package. - -## RelationConfigBase -This is a very small class that only has a `from_dict()` method and a default `NotImplementedError()`. At some -point this could be replaced by a more robust framework, like `mashumaro` or `pydantic`. - -## RelationConfigChange -This class inherits from `RelationConfigBase` ; however, this can be thought of as a separate class. The subclassing -merely points to the idea that both classes would likely inherit from the same class in a `mashumaro` or -`pydantic` implementation. This class is much more restricted in attribution. It should really only -ever need an `action` and a `context`. This can be though of as being analogous to a web request. You need to -know what you're doing (`action`: 'create' = GET, 'drop' = DELETE, etc.) and the information (`context`) needed -to make the change. In our scenarios, the context tends to be an instance of `RelationConfigBase` corresponding -to the new state. - -## RelationConfigValidationMixin -This mixin provides optional validation mechanics that can be applied to either `RelationConfigBase` or -`RelationConfigChange` subclasses. A validation rule is a combination of a `validation_check`, something -that should evaluate to `True`, and an optional `validation_error`, an instance of `DbtRuntimeError` -that should be raised in the event the `validation_check` fails. While optional, it's recommended that -the `validation_error` be provided for clearer transparency to the end user. diff --git a/core/dbt/adapters/relation_configs/__init__.py b/core/dbt/adapters/relation_configs/__init__.py deleted file mode 100644 index b8c73447a68..00000000000 --- a/core/dbt/adapters/relation_configs/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from dbt.adapters.relation_configs.config_base import ( # noqa: F401 - RelationConfigBase, - RelationResults, -) -from dbt.adapters.relation_configs.config_change import ( # noqa: F401 - RelationConfigChangeAction, - RelationConfigChange, -) -from dbt.adapters.relation_configs.config_validation import ( # noqa: F401 - RelationConfigValidationMixin, - RelationConfigValidationRule, -) diff --git a/core/dbt/adapters/relation_configs/config_base.py b/core/dbt/adapters/relation_configs/config_base.py deleted file mode 100644 index 9d0cddb0d21..00000000000 --- a/core/dbt/adapters/relation_configs/config_base.py +++ /dev/null @@ -1,44 +0,0 @@ -from dataclasses import dataclass -from typing import Union, Dict - -import agate -from dbt.utils import filter_null_values - - -""" -This is what relation metadata from the database looks like. It's a dictionary because there will be -multiple grains of data for a single object. For example, a materialized view in Postgres has base level information, -like name. But it also can have multiple indexes, which needs to be a separate query. It might look like this: - -{ - "base": agate.Row({"table_name": "table_abc", "query": "select * from table_def"}) - "indexes": agate.Table("rows": [ - agate.Row({"name": "index_a", "columns": ["column_a"], "type": "hash", "unique": False}), - agate.Row({"name": "index_b", "columns": ["time_dim_a"], "type": "btree", "unique": False}), - ]) -} -""" -RelationResults = Dict[str, Union[agate.Row, agate.Table]] - - -@dataclass(frozen=True) -class RelationConfigBase: - @classmethod - def from_dict(cls, kwargs_dict) -> "RelationConfigBase": - """ - This assumes the subclass of `RelationConfigBase` is flat, in the sense that no attribute is - itself another subclass of `RelationConfigBase`. If that's not the case, this should be overriden - to manually manage that complexity. - - Args: - kwargs_dict: the dict representation of this instance - - Returns: the `RelationConfigBase` representation associated with the provided dict - """ - return cls(**filter_null_values(kwargs_dict)) # type: ignore - - @classmethod - def _not_implemented_error(cls) -> NotImplementedError: - return NotImplementedError( - "This relation type has not been fully configured for this adapter." - ) diff --git a/core/dbt/adapters/relation_configs/config_change.py b/core/dbt/adapters/relation_configs/config_change.py deleted file mode 100644 index ac653fa5210..00000000000 --- a/core/dbt/adapters/relation_configs/config_change.py +++ /dev/null @@ -1,23 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Hashable - -from dbt.adapters.relation_configs.config_base import RelationConfigBase -from dbt.dataclass_schema import StrEnum - - -class RelationConfigChangeAction(StrEnum): - alter = "alter" - create = "create" - drop = "drop" - - -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class RelationConfigChange(RelationConfigBase, ABC): - action: RelationConfigChangeAction - context: Hashable # this is usually a RelationConfig, e.g. IndexConfig, but shouldn't be limited - - @property - @abstractmethod - def requires_full_refresh(self) -> bool: - raise self._not_implemented_error() diff --git a/core/dbt/adapters/relation_configs/config_validation.py b/core/dbt/adapters/validation.py similarity index 61% rename from core/dbt/adapters/relation_configs/config_validation.py rename to core/dbt/adapters/validation.py index 17bf74bf3e7..c38379a400d 100644 --- a/core/dbt/adapters/relation_configs/config_validation.py +++ b/core/dbt/adapters/validation.py @@ -1,16 +1,29 @@ from dataclasses import dataclass -from typing import Set, Optional +from typing import Optional, Set from dbt.exceptions import DbtRuntimeError @dataclass(frozen=True, eq=True, unsafe_hash=True) -class RelationConfigValidationRule: +class ValidationRule: + """ + A validation rule consists of two parts: + - validation_check: the thing that should be True + - validation_error: the error to raise in the event the validation check is False + """ + validation_check: bool validation_error: Optional[DbtRuntimeError] @property def default_error(self): + """ + This is a built-in stock error message. It may suffice in that it will raise an error for you, but + you should likely supply one in the rule that is more descriptive. This is akin to raising `Exception`. + + Returns: + a stock error message + """ return DbtRuntimeError( "There was a validation error in preparing this relation config." "No additional context was provided by this adapter." @@ -18,12 +31,12 @@ def default_error(self): @dataclass(frozen=True) -class RelationConfigValidationMixin: +class ValidationMixin: def __post_init__(self): self.run_validation_rules() @property - def validation_rules(self) -> Set[RelationConfigValidationRule]: + def validation_rules(self) -> Set[ValidationRule]: """ A set of validation rules to run against the object upon creation. @@ -32,6 +45,9 @@ def validation_rules(self) -> Set[RelationConfigValidationRule]: This defaults to no validation rules if not implemented. It's recommended to override this with values, but that may not always be necessary. + *Note:* Validation rules for child attributes (e.g. a ViewRelation's SchemaRelation) will run automatically + when they are created; there's no need to call `validation_rules` on child attributes. + Returns: a set of validation rules """ return set() @@ -45,13 +61,3 @@ def run_validation_rules(self): raise validation_rule.validation_error else: raise validation_rule.default_error - self.run_child_validation_rules() - - def run_child_validation_rules(self): - for attr_value in vars(self).values(): - if hasattr(attr_value, "validation_rules"): - attr_value.run_validation_rules() - if isinstance(attr_value, set): - for member in attr_value: - if hasattr(member, "validation_rules"): - member.run_validation_rules() diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index 4cfaa142e25..e8ada1723b0 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -110,6 +110,7 @@ class BaseDatabaseWrapper: def __init__(self, adapter, namespace: MacroNamespace): self._adapter = adapter self.Relation = RelationProxy(adapter) + self.relation_factory = adapter.relation_factory self._namespace = namespace def __getattr__(self, name): diff --git a/core/dbt/contracts/relation.py b/core/dbt/contracts/relation.py index 2cf811f9f6c..5fce5308214 100644 --- a/core/dbt/contracts/relation.py +++ b/core/dbt/contracts/relation.py @@ -86,7 +86,7 @@ class Path(FakeAPIObject): identifier: Optional[str] = None def __post_init__(self): - # handle pesky jinja2.Undefined sneaking in here and messing up rende + # handle pesky jinja2.Undefined sneaking in here and messing up render if not isinstance(self.database, (type(None), str)): raise CompilationError("Got an invalid path database: {}".format(self.database)) if not isinstance(self.schema, (type(None), str)): diff --git a/core/dbt/flags.py b/core/dbt/flags.py index 891d510f2e1..e83c9f50707 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -25,7 +25,7 @@ def env_set_truthy(key: str) -> Optional[str]: # this roughly follows the patten of EVENT_MANAGER in dbt/events/functions.py -# During de-globlization, we'll need to handle both similarly +# During de-globalization, we'll need to handle both similarly # Match USE_COLORS default with default in dbt.cli.params.use_colors for use in --version GLOBAL_FLAGS = Namespace(USE_COLORS=True) # type: ignore diff --git a/core/dbt/include/global_project/macros/adapters/drop_relation.sql b/core/dbt/include/global_project/macros/adapters/drop_relation.sql deleted file mode 100644 index bd254c78d51..00000000000 --- a/core/dbt/include/global_project/macros/adapters/drop_relation.sql +++ /dev/null @@ -1,44 +0,0 @@ -{% macro drop_relation(relation) -%} - {{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }} -{% endmacro %} - -{% macro default__drop_relation(relation) -%} - {% call statement('drop_relation', auto_begin=False) -%} - {%- if relation.is_table -%} - {{- drop_table(relation) -}} - {%- elif relation.is_view -%} - {{- drop_view(relation) -}} - {%- elif relation.is_materialized_view -%} - {{- drop_materialized_view(relation) -}} - {%- else -%} - drop {{ relation.type }} if exists {{ relation }} cascade - {%- endif -%} - {%- endcall %} -{% endmacro %} - - -{% macro drop_table(relation) -%} - {{ return(adapter.dispatch('drop_table', 'dbt')(relation)) }} -{%- endmacro %} - -{% macro default__drop_table(relation) -%} - drop table if exists {{ relation }} cascade -{%- endmacro %} - - -{% macro drop_view(relation) -%} - {{ return(adapter.dispatch('drop_view', 'dbt')(relation)) }} -{%- endmacro %} - -{% macro default__drop_view(relation) -%} - drop view if exists {{ relation }} cascade -{%- endmacro %} - - -{% macro drop_materialized_view(relation) -%} - {{ return(adapter.dispatch('drop_materialized_view', 'dbt')(relation)) }} -{%- endmacro %} - -{% macro default__drop_materialized_view(relation) -%} - drop materialized view if exists {{ relation }} cascade -{%- endmacro %} diff --git a/core/dbt/include/global_project/macros/adapters/relation.sql b/core/dbt/include/global_project/macros/adapters/relation.sql index f0dde7f20f0..1c2bd880079 100644 --- a/core/dbt/include/global_project/macros/adapters/relation.sql +++ b/core/dbt/include/global_project/macros/adapters/relation.sql @@ -43,18 +43,6 @@ {% endmacro %} -{% macro rename_relation(from_relation, to_relation) -%} - {{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }} -{% endmacro %} - -{% macro default__rename_relation(from_relation, to_relation) -%} - {% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %} - {% call statement('rename_relation') -%} - alter table {{ from_relation }} rename to {{ target_name }} - {%- endcall %} -{% endmacro %} - - {% macro get_or_create_relation(database, schema, identifier, type) -%} {{ return(adapter.dispatch('get_or_create_relation', 'dbt')(database, schema, identifier, type)) }} {% endmacro %} @@ -89,10 +77,3 @@ {% macro load_relation(relation) %} {{ return(load_cached_relation(relation)) }} {% endmacro %} - - -{% macro drop_relation_if_exists(relation) %} - {% if relation is not none %} - {{ adapter.drop_relation(relation) }} - {% endif %} -{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/materialization.sql b/core/dbt/include/global_project/macros/materializations/materialization.sql new file mode 100644 index 00000000000..75485cf8cfa --- /dev/null +++ b/core/dbt/include/global_project/macros/materializations/materialization.sql @@ -0,0 +1,26 @@ +{%- macro execute_no_op(materialization) -%} + {%- do store_raw_result( + name='main', + message='skip ' ~ materialization, + code='skip', + rows_affected='-1' + ) -%} +{%- endmacro -%} + + +{%- macro execute_build_sql(materialization, build_sql, pre_hooks, post_hooks) -%} + + -- `BEGIN` happens here: + {{- run_hooks(pre_hooks, inside_transaction=True) -}} + + {%- call statement(name='main') -%} + {{ build_sql }} + {%- endcall -%} + + {%- do apply_grants(materialization, materialization.grant_config, materialization.should_revoke_grants) -%} + + {{- run_hooks(post_hooks, inside_transaction=True) -}} + + {{- adapter.commit() -}} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/materializations/materialized_view.sql b/core/dbt/include/global_project/macros/materializations/materialized_view.sql new file mode 100644 index 00000000000..9fcb9ad3c56 --- /dev/null +++ b/core/dbt/include/global_project/macros/materializations/materialized_view.sql @@ -0,0 +1,68 @@ +{%- materialization materialized_view, default -%} + + {%- set materialization = adapter.make_materialization_from_node(config.model) -%} + + {%- set build_sql = materialized_view_build_sql(materialization) -%} + + {{- run_hooks(pre_hooks, inside_transaction=False) -}} + + {%- if build_sql == '' -%} + {{- execute_no_op(materialization) -}} + {%- else -%} + {{- execute_build_sql(materialization, build_sql, pre_hooks, post_hooks) -}} + {%- endif -%} + + {{- run_hooks(post_hooks, inside_transaction=False) -}} + + {%- set new_base_relation = adapter.base_relation_from_relation_model(materialization.target_relation) -%} + {{- return({'relations': [new_base_relation]}) -}} + +{%- endmaterialization -%} + + +{%- macro materialized_view_build_sql(materialization) -%} + + {%- if materialization.build_strategy == 'no_op' -%} + {%- set build_sql = '' -%} + + {%- elif materialization.build_strategy == 'create' -%} + {%- set build_sql = create_template(materialization.target_relation) -%} + + {%- elif materialization.build_strategy == 'replace' -%} + {%- set build_sql = replace_template( + materialization.existing_relation_ref, materialization.target_relation + ) -%} + + {%- elif materialization.build_strategy == 'alter' -%} + + {% set describe_relation_results = describe_template(materialization.existing_relation_ref ) %} + {% set existing_relation = materialization.existing_relation(describe_relation_results) %} + + {%- if materialization.on_configuration_change == 'apply' -%} + {%- set build_sql = alter_template(existing_relation, materialization.target_relation) -%} + + {%- elif materialization.on_configuration_change == 'continue' -%} + {%- set build_sql = '' -%} + {{- exceptions.warn( + "Configuration changes were identified and `on_configuration_change` " + "was set to `continue` for `" ~ materialization.target_relation ~ "`" + ) -}} + + {%- elif materialization.on_configuration_change == 'fail' -%} + {%- set build_sql = '' -%} + {{- exceptions.raise_fail_fast_error( + "Configuration changes were identified and `on_configuration_change` " + "was set to `fail` for `" ~ materialization.target_relation ~ "`" + ) -}} + + {%- endif -%} + + {%- else -%} + + {{- exceptions.raise_compiler_error("This build strategy is not supported for materialized views: " ~ materialization.build_strategy) -}} + + {%- endif -%} + + {%- do return(build_sql) -%} + +{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/models/materialized_view/alter_materialized_view.sql b/core/dbt/include/global_project/macros/materializations/models/materialized_view/alter_materialized_view.sql deleted file mode 100644 index b9ccdc2f141..00000000000 --- a/core/dbt/include/global_project/macros/materializations/models/materialized_view/alter_materialized_view.sql +++ /dev/null @@ -1,30 +0,0 @@ -{% macro get_alter_materialized_view_as_sql( - relation, - configuration_changes, - sql, - existing_relation, - backup_relation, - intermediate_relation -) %} - {{- log('Applying ALTER to: ' ~ relation) -}} - {{- adapter.dispatch('get_alter_materialized_view_as_sql', 'dbt')( - relation, - configuration_changes, - sql, - existing_relation, - backup_relation, - intermediate_relation - ) -}} -{% endmacro %} - - -{% macro default__get_alter_materialized_view_as_sql( - relation, - configuration_changes, - sql, - existing_relation, - backup_relation, - intermediate_relation -) %} - {{ exceptions.raise_compiler_error("Materialized views have not been implemented for this adapter.") }} -{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/models/materialized_view/create_materialized_view.sql b/core/dbt/include/global_project/macros/materializations/models/materialized_view/create_materialized_view.sql deleted file mode 100644 index 4b2ebeb3aa1..00000000000 --- a/core/dbt/include/global_project/macros/materializations/models/materialized_view/create_materialized_view.sql +++ /dev/null @@ -1,9 +0,0 @@ -{% macro get_create_materialized_view_as_sql(relation, sql) -%} - {{- log('Applying CREATE to: ' ~ relation) -}} - {{- adapter.dispatch('get_create_materialized_view_as_sql', 'dbt')(relation, sql) -}} -{%- endmacro %} - - -{% macro default__get_create_materialized_view_as_sql(relation, sql) -%} - {{ exceptions.raise_compiler_error("Materialized views have not been implemented for this adapter.") }} -{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/models/materialized_view/get_materialized_view_configuration_changes.sql b/core/dbt/include/global_project/macros/materializations/models/materialized_view/get_materialized_view_configuration_changes.sql deleted file mode 100644 index b1639b1631e..00000000000 --- a/core/dbt/include/global_project/macros/materializations/models/materialized_view/get_materialized_view_configuration_changes.sql +++ /dev/null @@ -1,23 +0,0 @@ -{% macro get_materialized_view_configuration_changes(existing_relation, new_config) %} - /* {# - It's recommended that configuration changes be formatted as follows: - {"": [{"action": "", "context": ...}]} - - For example: - { - "indexes": [ - {"action": "drop", "context": "index_abc"}, - {"action": "create", "context": {"columns": ["column_1", "column_2"], "type": "hash", "unique": True}}, - ], - } - - Either way, `get_materialized_view_configuration_changes` needs to align with `get_alter_materialized_view_as_sql`. - #} */ - {{- log('Determining configuration changes on: ' ~ existing_relation) -}} - {%- do return(adapter.dispatch('get_materialized_view_configuration_changes', 'dbt')(existing_relation, new_config)) -%} -{% endmacro %} - - -{% macro default__get_materialized_view_configuration_changes(existing_relation, new_config) %} - {{ exceptions.raise_compiler_error("Materialized views have not been implemented for this adapter.") }} -{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/models/materialized_view/materialized_view.sql b/core/dbt/include/global_project/macros/materializations/models/materialized_view/materialized_view.sql deleted file mode 100644 index 015f6cb8585..00000000000 --- a/core/dbt/include/global_project/macros/materializations/models/materialized_view/materialized_view.sql +++ /dev/null @@ -1,121 +0,0 @@ -{% materialization materialized_view, default %} - {% set existing_relation = load_cached_relation(this) %} - {% set target_relation = this.incorporate(type=this.MaterializedView) %} - {% set intermediate_relation = make_intermediate_relation(target_relation) %} - {% set backup_relation_type = target_relation.MaterializedView if existing_relation is none else existing_relation.type %} - {% set backup_relation = make_backup_relation(target_relation, backup_relation_type) %} - - {{ materialized_view_setup(backup_relation, intermediate_relation, pre_hooks) }} - - {% set build_sql = materialized_view_get_build_sql(existing_relation, target_relation, backup_relation, intermediate_relation) %} - - {% if build_sql == '' %} - {{ materialized_view_execute_no_op(target_relation) }} - {% else %} - {{ materialized_view_execute_build_sql(build_sql, existing_relation, target_relation, post_hooks) }} - {% endif %} - - {{ materialized_view_teardown(backup_relation, intermediate_relation, post_hooks) }} - - {{ return({'relations': [target_relation]}) }} - -{% endmaterialization %} - - -{% macro materialized_view_setup(backup_relation, intermediate_relation, pre_hooks) %} - - -- backup_relation and intermediate_relation should not already exist in the database - -- it's possible these exist because of a previous run that exited unexpectedly - {% set preexisting_backup_relation = load_cached_relation(backup_relation) %} - {% set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) %} - - -- drop the temp relations if they exist already in the database - {{ drop_relation_if_exists(preexisting_backup_relation) }} - {{ drop_relation_if_exists(preexisting_intermediate_relation) }} - - {{ run_hooks(pre_hooks, inside_transaction=False) }} - -{% endmacro %} - - -{% macro materialized_view_teardown(backup_relation, intermediate_relation, post_hooks) %} - - -- drop the temp relations if they exist to leave the database clean for the next run - {{ drop_relation_if_exists(backup_relation) }} - {{ drop_relation_if_exists(intermediate_relation) }} - - {{ run_hooks(post_hooks, inside_transaction=False) }} - -{% endmacro %} - - -{% macro materialized_view_get_build_sql(existing_relation, target_relation, backup_relation, intermediate_relation) %} - - {% set full_refresh_mode = should_full_refresh() %} - - -- determine the scenario we're in: create, full_refresh, alter, refresh data - {% if existing_relation is none %} - {% set build_sql = get_create_materialized_view_as_sql(target_relation, sql) %} - {% elif full_refresh_mode or not existing_relation.is_materialized_view %} - {% set build_sql = get_replace_materialized_view_as_sql(target_relation, sql, existing_relation, backup_relation, intermediate_relation) %} - {% else %} - - -- get config options - {% set on_configuration_change = config.get('on_configuration_change') %} - {% set configuration_changes = get_materialized_view_configuration_changes(existing_relation, config) %} - - {% if configuration_changes is none %} - {% set build_sql = refresh_materialized_view(target_relation) %} - - {% elif on_configuration_change == 'apply' %} - {% set build_sql = get_alter_materialized_view_as_sql(target_relation, configuration_changes, sql, existing_relation, backup_relation, intermediate_relation) %} - {% elif on_configuration_change == 'continue' %} - {% set build_sql = '' %} - {{ exceptions.warn("Configuration changes were identified and `on_configuration_change` was set to `continue` for `" ~ target_relation ~ "`") }} - {% elif on_configuration_change == 'fail' %} - {{ exceptions.raise_fail_fast_error("Configuration changes were identified and `on_configuration_change` was set to `fail` for `" ~ target_relation ~ "`") }} - - {% else %} - -- this only happens if the user provides a value other than `apply`, 'skip', 'fail' - {{ exceptions.raise_compiler_error("Unexpected configuration scenario") }} - - {% endif %} - - {% endif %} - - {% do return(build_sql) %} - -{% endmacro %} - - -{% macro materialized_view_execute_no_op(target_relation) %} - {% do store_raw_result( - name="main", - message="skip " ~ target_relation, - code="skip", - rows_affected="-1" - ) %} -{% endmacro %} - - -{% macro materialized_view_execute_build_sql(build_sql, existing_relation, target_relation, post_hooks) %} - - -- `BEGIN` happens here: - {{ run_hooks(pre_hooks, inside_transaction=True) }} - - {% set grant_config = config.get('grants') %} - - {% call statement(name="main") %} - {{ build_sql }} - {% endcall %} - - {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} - {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} - - {% do persist_docs(target_relation, model) %} - - {{ run_hooks(post_hooks, inside_transaction=True) }} - - {{ adapter.commit() }} - -{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/models/materialized_view/refresh_materialized_view.sql b/core/dbt/include/global_project/macros/materializations/models/materialized_view/refresh_materialized_view.sql deleted file mode 100644 index 16345138593..00000000000 --- a/core/dbt/include/global_project/macros/materializations/models/materialized_view/refresh_materialized_view.sql +++ /dev/null @@ -1,9 +0,0 @@ -{% macro refresh_materialized_view(relation) %} - {{- log('Applying REFRESH to: ' ~ relation) -}} - {{- adapter.dispatch('refresh_materialized_view', 'dbt')(relation) -}} -{% endmacro %} - - -{% macro default__refresh_materialized_view(relation) %} - {{ exceptions.raise_compiler_error("Materialized views have not been implemented for this adapter.") }} -{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/models/materialized_view/replace_materialized_view.sql b/core/dbt/include/global_project/macros/materializations/models/materialized_view/replace_materialized_view.sql deleted file mode 100644 index 43319c5cc1b..00000000000 --- a/core/dbt/include/global_project/macros/materializations/models/materialized_view/replace_materialized_view.sql +++ /dev/null @@ -1,9 +0,0 @@ -{% macro get_replace_materialized_view_as_sql(relation, sql, existing_relation, backup_relation, intermediate_relation) %} - {{- log('Applying REPLACE to: ' ~ relation) -}} - {{- adapter.dispatch('get_replace_materialized_view_as_sql', 'dbt')(relation, sql, existing_relation, backup_relation, intermediate_relation) -}} -{% endmacro %} - - -{% macro default__get_replace_materialized_view_as_sql(relation, sql, existing_relation, backup_relation, intermediate_relation) %} - {{ exceptions.raise_compiler_error("Materialized views have not been implemented for this adapter.") }} -{% endmacro %} diff --git a/core/dbt/include/global_project/macros/adapters/indexes.sql b/core/dbt/include/global_project/macros/relation_components/index/create.sql similarity index 52% rename from core/dbt/include/global_project/macros/adapters/indexes.sql rename to core/dbt/include/global_project/macros/relation_components/index/create.sql index b8663a7f971..a4fdd9cee1b 100644 --- a/core/dbt/include/global_project/macros/adapters/indexes.sql +++ b/core/dbt/include/global_project/macros/relation_components/index/create.sql @@ -21,21 +21,3 @@ {% endif %} {% endfor %} {% endmacro %} - - -{% macro get_drop_index_sql(relation, index_name) -%} - {{ adapter.dispatch('get_drop_index_sql', 'dbt')(relation, index_name) }} -{%- endmacro %} - -{% macro default__get_drop_index_sql(relation, index_name) -%} - {{ exceptions.raise_compiler_error("`get_drop_index_sql has not been implemented for this adapter.") }} -{%- endmacro %} - - -{% macro get_show_indexes_sql(relation) -%} - {{ adapter.dispatch('get_show_indexes_sql', 'dbt')(relation) }} -{%- endmacro %} - -{% macro default__get_show_indexes_sql(relation) -%} - {{ exceptions.raise_compiler_error("`get_show_indexes_sql has not been implemented for this adapter.") }} -{%- endmacro %} diff --git a/core/dbt/include/global_project/macros/relation_components/index/drop.sql b/core/dbt/include/global_project/macros/relation_components/index/drop.sql new file mode 100644 index 00000000000..5085ed8555d --- /dev/null +++ b/core/dbt/include/global_project/macros/relation_components/index/drop.sql @@ -0,0 +1,7 @@ +{% macro drop_index_sql(relation, index_name) -%} + {{ adapter.dispatch('drop_index_sql', 'dbt')(relation, index_name) }} +{%- endmacro %} + +{% macro default__drop_index_sql(relation, index_name) -%} + {{ exceptions.raise_compiler_error("`drop_index_sql` has not been implemented for this adapter.") }} +{%- endmacro %} diff --git a/core/dbt/include/global_project/macros/relation_components/index/show.sql b/core/dbt/include/global_project/macros/relation_components/index/show.sql new file mode 100644 index 00000000000..2923c742ff7 --- /dev/null +++ b/core/dbt/include/global_project/macros/relation_components/index/show.sql @@ -0,0 +1,7 @@ +{% macro show_indexes_sql(relation) -%} + {{ adapter.dispatch('show_indexes_sql', 'dbt')(relation) }} +{%- endmacro %} + +{% macro default__show_indexes_sql(relation) -%} + {{ exceptions.raise_compiler_error("`show_indexes_sql` has not been implemented for this adapter.") }} +{%- endmacro %} diff --git a/core/dbt/include/global_project/macros/adapters/schema.sql b/core/dbt/include/global_project/macros/relation_components/schema/create.sql similarity index 50% rename from core/dbt/include/global_project/macros/adapters/schema.sql rename to core/dbt/include/global_project/macros/relation_components/schema/create.sql index 9e0c7559286..7b7be3315e7 100644 --- a/core/dbt/include/global_project/macros/adapters/schema.sql +++ b/core/dbt/include/global_project/macros/relation_components/schema/create.sql @@ -7,14 +7,3 @@ create schema if not exists {{ relation.without_identifier() }} {% endcall %} {% endmacro %} - - -{% macro drop_schema(relation) -%} - {{ adapter.dispatch('drop_schema', 'dbt')(relation) }} -{% endmacro %} - -{% macro default__drop_schema(relation) -%} - {%- call statement('drop_schema') -%} - drop schema if exists {{ relation.without_identifier() }} cascade - {% endcall %} -{% endmacro %} diff --git a/core/dbt/include/global_project/macros/relation_components/schema/drop.sql b/core/dbt/include/global_project/macros/relation_components/schema/drop.sql new file mode 100644 index 00000000000..41ae6b3967b --- /dev/null +++ b/core/dbt/include/global_project/macros/relation_components/schema/drop.sql @@ -0,0 +1,9 @@ +{% macro drop_schema(relation) -%} + {{ adapter.dispatch('drop_schema', 'dbt')(relation) }} +{% endmacro %} + +{% macro default__drop_schema(relation) -%} + {%- call statement('drop_schema') -%} + drop schema if exists {{ relation.without_identifier() }} cascade + {% endcall %} +{% endmacro %} diff --git a/core/dbt/include/global_project/macros/relations/README.md b/core/dbt/include/global_project/macros/relations/README.md new file mode 100644 index 00000000000..368be417e38 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/README.md @@ -0,0 +1,51 @@ +# Relation Macro Templates + +## Composite Macro Templates + +Macros in `/composite/` are composites of atomic macros (e.g. `create_template`, `drop_template`, +`rename_template`, etc. In other words, they don't dispatch directly to a relation_type-specific macro, nor do +they contain sql of their own. They are effectively logic flow to perform transactions that are a combination of +atomic statements. This is done to minimize the amount of sql that is written in jinja and remove redundancy. + +It's unlikely that these macros will need to be overridden; instead, the adapter maintainer is encouraged to +override the atomic components (e.g. `create_template`, `drop_template`, `rename_template`, etc.). Not only will +this minimize the amount of marginal maintenance within an adapter, it will also unlock all of the functionality +in these composite macros as a result. + +## Atomic Macro Templates + +Macros in `/atomic/` represent atomic actions on the database. They aren't necessarily transactions, nor are they +single statements; they are somewhere in between. They should be thought of as atomic at the `Relation` level in +the sense that you can't break down the action any further without losing a part of the relation, or a part of the +action on the relation. For example, the `create` action for a Postgres materialized view is actually a CREATE +statement followed by a series of CREATE INDEX statements. We wouldn't want to create the materialized view +without also creating all of its components, so that's one atomic action. Many actions are straight-forward, +(e.g. `drop` and `rename`) while others are less so (e.g. `alter` and `create`). Another way to think about it +is that all of these actions focus on exactly one relation, hence have a single `relation_type`. Even +`alter_template`, which takes in two `Relation` objects, is really just saying "I want `existing_relation` to +look like `"this"`"; `"this"` just happens to be another `Relation` object that contains all of the same +attributes, some with different values. + +While these actions are atomic, the macros in this directory represent `relation_type`-agnostic actions. +For example, if you want to create a view, execute `create_template(my_view_relation)`. Since `my_view_relation` +has a `relation_type` of `materialized_view`, `create_template` will know to dispatch the call to +`create_materialized_view_template`. If the maintainer looks at any macro in this directory, they will see that +the macro merely dispatches to the `relation_type`-specific version. Hence, there are only two reasons to override +this macro: + +1. The adapter supports more/less `relation-type`s than the default +2. The action can be consolidated into the same statement regardless of `relation_type` + +## Atomic Macro Templates by Relation_Type + +The most likely place that the adapter maintainer should look when overriding macros with adapter-specific +logic is in the relation-specific directories. Those are the directories in `/relations/` that have names +corresponding to `relation_type`s (e.g. `/materialized_view/`, `/view/`, etc.). At the `dbt-core` level, +macros in these directories will default to a version that throws an exception until implemented, much like +an abstract method in python. The intention is to make no assumptions about how databases work to avoid building +dependencies between database platforms within dbt. At the `dbt-` level, each of these files should +correspond to a specific statement (give or take) from that database platform's documentation. For example, +the macro `postgres__create_materialized_view_template` aligns with the documentation found here: +https://www.postgresql.org/docs/current/sql-creatematerializedview.html. Ideally, once this macro is created, +there is not much reason to perform maintenance on it unless the database platform deploys new functionality +and dbt (or the adapter) has chosen to support that functionality. diff --git a/core/dbt/include/global_project/macros/relations/atomic/alter.sql b/core/dbt/include/global_project/macros/relations/atomic/alter.sql new file mode 100644 index 00000000000..9524b82d8bc --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/atomic/alter.sql @@ -0,0 +1,19 @@ +{%- macro alter_template(existing_relation, target_relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying ALTER to: ' ~ existing_relation) -}} + {%- endif -%} + {{- adapter.dispatch('alter_template', 'dbt')(existing_relation, target_relation) -}} +{%- endmacro -%} + + +{%- macro default__alter_template(existing_relation, target_relation) -%} + + {%- if existing_relation.type == 'materialized_view' -%} + {{ alter_materialized_view_template(existing_relation, target_relation) }} + + {%- else -%} + {{- exceptions.raise_compiler_error("`alter_template` has not been implemented for: " ~ existing_relation.type ) -}} + + {%- endif -%} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/atomic/create.sql b/core/dbt/include/global_project/macros/relations/atomic/create.sql new file mode 100644 index 00000000000..374cc0c00d7 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/atomic/create.sql @@ -0,0 +1,21 @@ +{%- macro create_template(relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying CREATE to: ' ~ relation) -}} + {%- endif -%} + {{- adapter.dispatch('create_template', 'dbt')(relation) -}} + + {{- adapter.cache_created_relation_model(relation) -}} +{%- endmacro -%} + + +{%- macro default__create_template(relation) -%} + + {%- if relation.type == 'materialized_view' -%} + {{ create_materialized_view_template(relation) }} + + {%- else -%} + {{- exceptions.raise_compiler_error("`create_template` has not been implemented for: " ~ relation.type ) -}} + + {%- endif -%} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/atomic/describe.sql b/core/dbt/include/global_project/macros/relations/atomic/describe.sql new file mode 100644 index 00000000000..80ae1fa7862 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/atomic/describe.sql @@ -0,0 +1,23 @@ +{# /* + This needs to be a {% do return(...) %} because the macro returns a dictionary, not a template. +*/ #} + +{%- macro describe_template(relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying DESCRIBE to: ' ~ relation) -}} + {%- endif -%} + {%- do return(adapter.dispatch('describe_template', 'dbt')(relation)) -%} +{%- endmacro -%} + + +{%- macro default__describe_template(relation) -%} + + {%- if relation.type == 'materialized_view' -%} + {%- do return(describe_materialized_view_template(relation)) -%} + + {%- else -%} + {{- exceptions.raise_compiler_error("`describe_template` has not been implemented for: " ~ relation.type ) -}} + + {%- endif -%} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/atomic/drop.sql b/core/dbt/include/global_project/macros/relations/atomic/drop.sql new file mode 100644 index 00000000000..ef562285de1 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/atomic/drop.sql @@ -0,0 +1,61 @@ +{%- macro drop_template(relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying DROP to: ' ~ relation) -}} + {%- endif -%} + {{- adapter.dispatch('drop_template', 'dbt')(relation) -}} + + {{- adapter.cache_dropped_relation_model(relation) -}} +{%- endmacro -%} + + +{%- macro default__drop_template(relation) -%} + + {%- if relation.type == 'view' -%} + {{ drop_view_template(relation) }} + + {%- elif relation.type == 'table' -%} + {{ drop_table_template(relation) }} + + {%- elif relation.type == 'materialized_view' -%} + {{ drop_materialized_view_template(relation) }} + + {%- else -%} + {{- exceptions.raise_compiler_error("`drop_template` has not been implemented for: " ~ relation.type ) -}} + + {%- endif -%} + +{%- endmacro -%} + + +{# /* + These are `BaseRelation` versions. The `BaseRelation` workflows are different. +*/ #} +{% macro drop_relation_if_exists(relation) %} + {% if relation is not none %} + {{ adapter.drop_relation(relation) }} + {% endif %} +{% endmacro %} + + +{% macro drop_relation(relation) -%} + {{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }} +{% endmacro %} + +{% macro default__drop_relation(relation) -%} + {% call statement('drop_relation', auto_begin=False) -%} + + {%- if relation.is_view -%} + drop view if exists {{ relation }} cascade + + {%- elif relation.is_table -%} + drop table if exists {{ relation }} cascade + + {%- elif relation.is_materialized_view -%} + drop materialized view if exists {{ relation }} cascade + + {%- else -%} + drop {{ relation.type }} if exists {{ relation }} cascade + + {%- endif -%} + {%- endcall %} +{% endmacro %} diff --git a/core/dbt/include/global_project/macros/relations/atomic/refresh.sql b/core/dbt/include/global_project/macros/relations/atomic/refresh.sql new file mode 100644 index 00000000000..925c3d24efc --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/atomic/refresh.sql @@ -0,0 +1,19 @@ +{%- macro refresh_template(relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying REFRESH to: ' ~ relation) -}} + {%- endif -%} + {{- adapter.dispatch('refresh_template', 'dbt')(relation) -}} +{%- endmacro -%} + + +{%- macro default__refresh_template(relation) -%} + + {%- if relation.type == 'materialized_view' -%} + {{ refresh_materialized_view_template(relation) }} + + {%- else -%} + {{- exceptions.raise_compiler_error("`refresh_template` has not been implemented for: " ~ relation.type ) -}} + + {%- endif -%} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/atomic/rename.sql b/core/dbt/include/global_project/macros/relations/atomic/rename.sql new file mode 100644 index 00000000000..84265912c88 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/atomic/rename.sql @@ -0,0 +1,42 @@ +{%- macro rename_template(relation, new_name, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying RENAME to: ' ~ relation) -}} + {%- endif -%} + {{- adapter.dispatch('rename_template', 'dbt')(relation, new_name) -}} + + {{- adapter.cache_renamed_relation_model(relation, new_name) -}} +{%- endmacro -%} + + +{%- macro default__rename_template(relation, new_name) -%} + + {%- if relation.type == 'view' -%} + {{ rename_view_template(relation, new_name) }} + + {%- elif relation.type == 'table' -%} + {{ rename_table_template(relation, new_name) }} + + {%- elif relation.type == 'materialized_view' -%} + {{ rename_materialized_view_template(relation, new_name) }} + + {%- else -%} + {{- exceptions.raise_compiler_error("`rename_template` has not been implemented for: " ~ relation.type ) -}} + + {%- endif -%} + +{%- endmacro -%} + + +{# /* + These are `BaseRelation` versions. The `BaseRelation` workflows are different. +*/ #} +{% macro rename_relation(from_relation, to_relation) -%} + {{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }} +{% endmacro %} + +{% macro default__rename_relation(from_relation, to_relation) -%} + {% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %} + {% call statement('rename_relation') -%} + alter table {{ from_relation }} rename to {{ target_name }} + {%- endcall %} +{% endmacro %} diff --git a/core/dbt/include/global_project/macros/relations/composite/backup.sql b/core/dbt/include/global_project/macros/relations/composite/backup.sql new file mode 100644 index 00000000000..8cd395bc541 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/composite/backup.sql @@ -0,0 +1,19 @@ +{%- macro backup_template(relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying BACKUP to: ' ~ relation) -}} + {%- endif -%} + {{- adapter.dispatch('backup_template', 'dbt')(relation) -}} +{%- endmacro -%} + + +{%- macro default__backup_template(relation) -%} + + -- get the standard backup name + {% set backup_relation_ref = adapter.relation_factory.make_backup_ref(relation) -%} + + -- drop any pre-existing backup + {{ drop_template(backup_relation_ref, called_directly=False) }}; + + {{ rename_template(relation, backup_relation_ref.name, called_directly=False) }} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/composite/deploy_stage.sql b/core/dbt/include/global_project/macros/relations/composite/deploy_stage.sql new file mode 100644 index 00000000000..5a78ff10346 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/composite/deploy_stage.sql @@ -0,0 +1,16 @@ +{%- macro deploy_stage_template(relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying DEPLOY STAGE to: ' ~ relation) -}} + {%- endif -%} + {{- adapter.dispatch('deploy_stage_template', 'dbt')(relation) -}} +{%- endmacro -%} + + +{%- macro default__deploy_stage_template(relation) -%} + + -- get the standard intermediate name + {% set intermediate_relation = adapter.relation_factory.make_intermediate(relation) -%} + + {{ rename_template(intermediate_relation, relation.name, called_directly=False) }} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/composite/drop_backup.sql b/core/dbt/include/global_project/macros/relations/composite/drop_backup.sql new file mode 100644 index 00000000000..a4d0a3088d7 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/composite/drop_backup.sql @@ -0,0 +1,16 @@ +{%- macro drop_backup_template(relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying DROP BACKUP to: ' ~ relation) -}} + {%- endif -%} + {{- adapter.dispatch('drop_backup_template', 'dbt')(relation) -}} +{%- endmacro -%} + + +{%- macro default__drop_backup_template(relation) -%} + + -- get the standard backup name + {% set backup_relation_ref = adapter.relation_factory.make_backup_ref(relation) -%} + + {{ drop_template(backup_relation_ref, called_directly=False) }} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/composite/replace.sql b/core/dbt/include/global_project/macros/relations/composite/replace.sql new file mode 100644 index 00000000000..d408b763258 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/composite/replace.sql @@ -0,0 +1,66 @@ +{%- macro replace_template(existing_relation, target_relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying REPLACE to: ' ~ target_relation) -}} + {%- endif -%} + {{- adapter.dispatch('replace_template', 'dbt')(existing_relation, target_relation) -}} +{%- endmacro -%} + +{%- macro default__replace_template(existing_relation, target_relation) -%} + + {# /* create target_relation as an intermediate relation, then swap it out with the existing one using a backup */ #} + {%- if target_relation.can_be_renamed and existing_relation.can_be_renamed -%} + {{ stage_template(target_relation, called_directly=False) }}; + {{ backup_template(existing_relation, called_directly=False) }}; + {{ deploy_stage_template(target_relation, called_directly=False) }}; + {{ drop_backup_template(existing_relation, called_directly=False) }} + + {# /* create target_relation as an intermediate relation, then swap it out with the existing one using drop */ #} + {%- elif target_relation.can_be_renamed -%} + {{ stage_template(target_relation, called_directly=False) }}; + {{ drop_template(existing_relation, called_directly=False) }}; + {{ deploy_stage_template(target_relation, called_directly=False) }} + + {# /* create target_relation in place by first backing up the existing relation */ #} + {%- elif existing_relation.can_be_renamed -%} + {{ backup_template(existing_relation, called_directly=False) }}; + {{ create_template(target_relation, called_directly=False) }}; + {{ drop_backup_template(existing_relation, called_directly=False) }} + + {# /* no renaming is allowed, so just drop and create */ #} + {%- else -%} + {{ drop_template(existing_relation, called_directly=False) }}; + {{ create_template(target_relation, called_directly=False) }} + + {%- endif -%} + +{%- endmacro -%} + + +{%- macro default__replace_sql_alt(existing_relation, target_relation) -%} + + {# /* stage the target relation if we can, otherwise we'll create it later */ #} + {%- if target_relation.can_be_renamed -%} + {{ stage_template(target_relation, called_directly=False) }}; + {%- endif -%} + + {# /* backup the existing relation if we can, otherwise just drop it */ #} + {%- if existing_relation.can_be_renamed -%} + {{ backup_template(existing_relation, called_directly=False) }}; + {%- else -%} + {{ drop_template(existing_relation, called_directly=False) }}; + {%- endif -%} + + {# /* create the target relation from the staged relation if we were able to stage it earlier, otherwise create it here */ #} + {%- if target_relation.can_be_renamed -%} + {{ deploy_stage_template(target_relation, called_directly=False) }} + {%- else -%} + {{ create_template(target_relation, called_directly=False) }} + {%- endif -%} + + {# /* drop the backup relation if we were able to create it earlier */ #} + {%- if existing_relation.can_be_renamed -%} + ; -- we need this here because we don't know if the last statement happens in the previous if block until here + {{ drop_backup_template(existing_relation, called_directly=False) }} + {%- endif -%} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/composite/stage.sql b/core/dbt/include/global_project/macros/relations/composite/stage.sql new file mode 100644 index 00000000000..1deb65cf164 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/composite/stage.sql @@ -0,0 +1,19 @@ +{%- macro stage_template(relation, called_directly=True) -%} + {%- if called_directly -%} + {{- log('Applying STAGE to: ' ~ relation) -}} + {%- endif -%} + {{- adapter.dispatch('stage_template', 'dbt')(relation) -}} +{%- endmacro -%} + + +{%- macro default__stage_template(relation) -%} + + -- get the standard intermediate name + {% set intermediate_relation = adapter.relation_factory.make_intermediate(relation) -%} + + -- drop any pre-existing intermediate + {{ drop_template(intermediate_relation, called_directly=False) }}; + + {{ create_template(intermediate_relation, called_directly=False) }} + +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/materialized_view/alter.sql b/core/dbt/include/global_project/macros/relations/materialized_view/alter.sql new file mode 100644 index 00000000000..dd4bba88e8b --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/materialized_view/alter.sql @@ -0,0 +1,8 @@ +{%- macro alter_materialized_view_template(existing_materialized_view, target_materialized_view) -%} + {{- adapter.dispatch('alter_materialized_view_template', 'dbt')(existing_materialized_view, target_materialized_view) -}} +{%- endmacro -%} + + +{%- macro default__alter_materialized_view_template(existing_materialized_view, target_materialized_view) -%} + {{- exceptions.raise_compiler_error("`alter_materialized_view_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/materialized_view/create.sql b/core/dbt/include/global_project/macros/relations/materialized_view/create.sql new file mode 100644 index 00000000000..d231fc8ac9b --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/materialized_view/create.sql @@ -0,0 +1,8 @@ +{%- macro create_materialized_view_template(materialized_view) -%} + {{- adapter.dispatch('create_materialized_view_template', 'dbt')(materialized_view) -}} +{%- endmacro -%} + + +{%- macro default__create_materialized_view_template(materialized_view) -%} + {{- exceptions.raise_compiler_error("`create_materialized_view_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/materialized_view/describe.sql b/core/dbt/include/global_project/macros/relations/materialized_view/describe.sql new file mode 100644 index 00000000000..e4ed1d39c28 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/materialized_view/describe.sql @@ -0,0 +1,12 @@ +{# /* + This needs to be a {% do return(...) %} because the macro returns a dictionary, not a template. +*/ #} + +{%- macro describe_materialized_view_template(materialized_view) -%} + {%- do return(adapter.dispatch('describe_materialized_view_template', 'dbt')(materialized_view)) -%} +{%- endmacro -%} + + +{%- macro default__describe_materialized_view_template(materialized_view) -%} + {{- exceptions.raise_compiler_error("`describe_materialized_view_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/materialized_view/drop.sql b/core/dbt/include/global_project/macros/relations/materialized_view/drop.sql new file mode 100644 index 00000000000..e4873707ba9 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/materialized_view/drop.sql @@ -0,0 +1,7 @@ +{%- macro drop_materialized_view_template(materialized_view) -%} + {{- adapter.dispatch('drop_materialized_view_template', 'dbt')(materialized_view) -}} +{%- endmacro -%} + +{%- macro default__drop_materialized_view_template(materialized_view) -%} + {{- exceptions.raise_compiler_error("`drop_materialized_view_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/materialized_view/refresh.sql b/core/dbt/include/global_project/macros/relations/materialized_view/refresh.sql new file mode 100644 index 00000000000..c892055599e --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/materialized_view/refresh.sql @@ -0,0 +1,8 @@ +{%- macro refresh_materialized_view_template(materialized_view) -%} + {{- adapter.dispatch('refresh_materialized_view_template', 'dbt')(materialized_view) -}} +{%- endmacro -%} + + +{%- macro default__refresh_materialized_view_template(materialized_view) -%} + {{- exceptions.raise_compiler_error("`refresh_materialized_view_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/materialized_view/rename.sql b/core/dbt/include/global_project/macros/relations/materialized_view/rename.sql new file mode 100644 index 00000000000..ad75a92cf19 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/materialized_view/rename.sql @@ -0,0 +1,7 @@ +{%- macro rename_materialized_view_template(materialized_view, new_name) -%} + {{- adapter.dispatch('rename_materialized_view_template', 'dbt')(materialized_view, new_name) -}} +{%- endmacro -%} + +{%- macro default__rename_materialized_view_template(materialized_view, new_name) -%} + {{- exceptions.raise_compiler_error("`rename_materialized_view_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/table/drop.sql b/core/dbt/include/global_project/macros/relations/table/drop.sql new file mode 100644 index 00000000000..61c5aae581f --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/table/drop.sql @@ -0,0 +1,7 @@ +{%- macro drop_table_template(table) -%} + {{- adapter.dispatch('drop_table_template', 'dbt')(table) -}} +{%- endmacro -%} + +{%- macro default__drop_table_template(table) -%} + {{- exceptions.raise_compiler_error("`drop_table_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/table/rename.sql b/core/dbt/include/global_project/macros/relations/table/rename.sql new file mode 100644 index 00000000000..696c80e6d75 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/table/rename.sql @@ -0,0 +1,7 @@ +{%- macro rename_table_template(table, new_name) -%} + {{- adapter.dispatch('rename_table_template', 'dbt')(table, new_name) -}} +{%- endmacro -%} + +{%- macro default__rename_table_template(table, new_name) -%} + {{- exceptions.raise_compiler_error("`rename_table_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/view/drop.sql b/core/dbt/include/global_project/macros/relations/view/drop.sql new file mode 100644 index 00000000000..91779e2a730 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/view/drop.sql @@ -0,0 +1,7 @@ +{%- macro drop_view_template(view) -%} + {{- adapter.dispatch('drop_view_template', 'dbt')(view) -}} +{%- endmacro -%} + +{%- macro default__drop_view_template(view) -%} + {{- exceptions.raise_compiler_error("`drop_view_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/include/global_project/macros/relations/view/rename.sql b/core/dbt/include/global_project/macros/relations/view/rename.sql new file mode 100644 index 00000000000..fae68327774 --- /dev/null +++ b/core/dbt/include/global_project/macros/relations/view/rename.sql @@ -0,0 +1,7 @@ +{%- macro rename_view_template(view, new_name) -%} + {{- adapter.dispatch('rename_view_template', 'dbt')(view, new_name) -}} +{%- endmacro -%} + +{%- macro default__rename_view_template(view, new_name) -%} + {{- exceptions.raise_compiler_error("`rename_view_template` has not been implemented for this adapter.") -}} +{%- endmacro -%} diff --git a/core/dbt/tests/util.py b/core/dbt/tests/util.py index 5179ceb2f04..0a84a479559 100644 --- a/core/dbt/tests/util.py +++ b/core/dbt/tests/util.py @@ -8,6 +8,7 @@ from typing import Dict, List, Optional from contextlib import contextmanager from dbt.adapters.factory import Adapter +from dbt.adapters.relation.models import Relation from dbt.cli.main import dbtRunner from dbt.logger import log_manager @@ -588,3 +589,32 @@ def __eq__(self, other): def __repr__(self): return "AnyStringWith<{!r}>".format(self.contains) + + +def assert_message_in_logs(message: str, logs: str, expected_pass: bool = True): + # if the logs are json strings, then 'jsonify' the message because of things like escape quotes + if os.environ.get("DBT_LOG_FORMAT", "") == "json": + message = message.replace(r'"', r"\"") + + if expected_pass: + assert message in logs + else: + assert message not in logs + + +def get_project_config(project): + file_yaml = read_file(project.project_root, "dbt_project.yml") + return yaml.safe_load(file_yaml) + + +def set_project_config(project, config): + config_yaml = yaml.safe_dump(config) + write_file(config_yaml, project.project_root, "dbt_project.yml") + + +def get_model_file(project, relation: Relation) -> str: + return read_file(project.project_root, "models", f"{relation.name}.sql") + + +def set_model_file(project, relation: Relation, model_sql: str): + write_file(model_sql, project.project_root, "models", f"{relation.name}.sql") diff --git a/plugins/postgres/dbt/adapters/postgres/__init__.py b/plugins/postgres/dbt/adapters/postgres/__init__.py index 38dce8bdb22..6ad0fe0a096 100644 --- a/plugins/postgres/dbt/adapters/postgres/__init__.py +++ b/plugins/postgres/dbt/adapters/postgres/__init__.py @@ -1,8 +1,7 @@ -# these are mostly just exports, #noqa them so flake8 will be happy -from dbt.adapters.postgres.connections import PostgresConnectionManager # noqa +from dbt.adapters.postgres.connections import PostgresConnectionManager from dbt.adapters.postgres.connections import PostgresCredentials -from dbt.adapters.postgres.column import PostgresColumn # noqa -from dbt.adapters.postgres.relation import PostgresRelation # noqa: F401 +from dbt.adapters.postgres.column import PostgresColumn +from dbt.adapters.postgres.relation import PostgresRelation from dbt.adapters.postgres.impl import PostgresAdapter from dbt.adapters.base import AdapterPlugin diff --git a/plugins/postgres/dbt/adapters/postgres/impl.py b/plugins/postgres/dbt/adapters/postgres/impl.py index 2fc8071efb8..6825dd1bad0 100644 --- a/plugins/postgres/dbt/adapters/postgres/impl.py +++ b/plugins/postgres/dbt/adapters/postgres/impl.py @@ -1,15 +1,14 @@ from datetime import datetime from dataclasses import dataclass -from typing import Optional, Set, List, Any +from typing import Any, List, Optional, Set from dbt.adapters.base.meta import available from dbt.adapters.base.impl import AdapterConfig, ConstraintSupport +from dbt.adapters.relation import RelationFactory from dbt.adapters.sql import SQLAdapter -from dbt.adapters.postgres import PostgresConnectionManager -from dbt.adapters.postgres.column import PostgresColumn -from dbt.adapters.postgres import PostgresRelation -from dbt.dataclass_schema import dbtClassMixin, ValidationError from dbt.contracts.graph.nodes import ConstraintType +from dbt.contracts.relation import RelationType +from dbt.dataclass_schema import dbtClassMixin, ValidationError from dbt.exceptions import ( CrossDbReferenceProhibitedError, IndexConfigNotDictError, @@ -19,6 +18,10 @@ ) import dbt.utils +from dbt.adapters.postgres import PostgresConnectionManager, PostgresRelation +from dbt.adapters.postgres.column import PostgresColumn +from dbt.adapters.postgres.relation import models as relation_models + # note that this isn't an adapter macro, so just a single underscore GET_RELATIONS_MACRO_NAME = "postgres_get_relations" @@ -74,6 +77,23 @@ class PostgresAdapter(SQLAdapter): ConstraintType.foreign_key: ConstraintSupport.ENFORCED, } + @property + def relation_factory(self): + return RelationFactory( + relation_models={ + RelationType.MaterializedView: relation_models.PostgresMaterializedViewRelation, + }, + relation_changesets={ + RelationType.MaterializedView: relation_models.PostgresMaterializedViewRelationChangeset, + }, + relation_can_be_renamed={ + RelationType.MaterializedView, + RelationType.Table, + RelationType.View, + }, + render_policy=relation_models.PostgresRenderPolicy, + ) + @classmethod def date_function(cls): return "now()" @@ -144,3 +164,19 @@ def valid_incremental_strategies(self): def debug_query(self): self.execute("select 1 as id") + + @available + def generate_index_name( + self, + relation: relation_models.PostgresMaterializedViewRelation, + index: relation_models.PostgresIndexRelation, + ) -> str: + return dbt.utils.md5( + "_".join( + { + relation.fully_qualified_path, + index.fully_qualified_path, + str(datetime.utcnow().isoformat()), + } + ) + ) diff --git a/plugins/postgres/dbt/adapters/postgres/relation.py b/plugins/postgres/dbt/adapters/postgres/relation.py deleted file mode 100644 index 43822efb11f..00000000000 --- a/plugins/postgres/dbt/adapters/postgres/relation.py +++ /dev/null @@ -1,91 +0,0 @@ -from dataclasses import dataclass -from typing import Optional, Set, FrozenSet - -from dbt.adapters.base.relation import BaseRelation -from dbt.adapters.relation_configs import ( - RelationConfigChangeAction, - RelationResults, -) -from dbt.context.providers import RuntimeConfigObject -from dbt.exceptions import DbtRuntimeError - -from dbt.adapters.postgres.relation_configs import ( - PostgresIndexConfig, - PostgresIndexConfigChange, - PostgresMaterializedViewConfig, - PostgresMaterializedViewConfigChangeCollection, - MAX_CHARACTERS_IN_IDENTIFIER, -) - - -@dataclass(frozen=True, eq=False, repr=False) -class PostgresRelation(BaseRelation): - def __post_init__(self): - # Check for length of Postgres table/view names. - # Check self.type to exclude test relation identifiers - if ( - self.identifier is not None - and self.type is not None - and len(self.identifier) > self.relation_max_name_length() - ): - raise DbtRuntimeError( - f"Relation name '{self.identifier}' " - f"is longer than {self.relation_max_name_length()} characters" - ) - - def relation_max_name_length(self): - return MAX_CHARACTERS_IN_IDENTIFIER - - def get_materialized_view_config_change_collection( - self, relation_results: RelationResults, runtime_config: RuntimeConfigObject - ) -> Optional[PostgresMaterializedViewConfigChangeCollection]: - config_change_collection = PostgresMaterializedViewConfigChangeCollection() - - existing_materialized_view = PostgresMaterializedViewConfig.from_relation_results( - relation_results - ) - new_materialized_view = PostgresMaterializedViewConfig.from_model_node( - runtime_config.model - ) - - config_change_collection.indexes = self._get_index_config_changes( - existing_materialized_view.indexes, new_materialized_view.indexes - ) - - # we return `None` instead of an empty `PostgresMaterializedViewConfigChangeCollection` object - # so that it's easier and more extensible to check in the materialization: - # `core/../materializations/materialized_view.sql` : - # {% if configuration_changes is none %} - if config_change_collection.has_changes: - return config_change_collection - - def _get_index_config_changes( - self, - existing_indexes: FrozenSet[PostgresIndexConfig], - new_indexes: FrozenSet[PostgresIndexConfig], - ) -> Set[PostgresIndexConfigChange]: - """ - Get the index updates that will occur as a result of a new run - - There are four scenarios: - - 1. Indexes are equal -> don't return these - 2. Index is new -> create these - 3. Index is old -> drop these - 4. Indexes are not equal -> drop old, create new -> two actions - - Returns: a set of index updates in the form {"action": "drop/create", "context": } - """ - drop_changes = set( - PostgresIndexConfigChange.from_dict( - {"action": RelationConfigChangeAction.drop, "context": index} - ) - for index in existing_indexes.difference(new_indexes) - ) - create_changes = set( - PostgresIndexConfigChange.from_dict( - {"action": RelationConfigChangeAction.create, "context": index} - ) - for index in new_indexes.difference(existing_indexes) - ) - return set().union(drop_changes, create_changes) diff --git a/plugins/postgres/dbt/adapters/postgres/relation/__init__.py b/plugins/postgres/dbt/adapters/postgres/relation/__init__.py new file mode 100644 index 00000000000..9d79be56eed --- /dev/null +++ b/plugins/postgres/dbt/adapters/postgres/relation/__init__.py @@ -0,0 +1,25 @@ +from dataclasses import dataclass + +from dbt.adapters.base.relation import BaseRelation +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models import MAX_CHARACTERS_IN_IDENTIFIER + + +@dataclass(frozen=True, eq=False, repr=False) +class PostgresRelation(BaseRelation): + def __post_init__(self): + # Check for length of Postgres table/view names. + # Check self.type to exclude test relation identifiers + if ( + self.identifier is not None + and self.type is not None + and len(self.identifier) > self.relation_max_name_length() + ): + raise DbtRuntimeError( + f"Relation name '{self.identifier}' " + f"is longer than {self.relation_max_name_length()} characters" + ) + + def relation_max_name_length(self): + return MAX_CHARACTERS_IN_IDENTIFIER diff --git a/plugins/postgres/dbt/adapters/postgres/relation/models/__init__.py b/plugins/postgres/dbt/adapters/postgres/relation/models/__init__.py new file mode 100644 index 00000000000..d4c4ea771bd --- /dev/null +++ b/plugins/postgres/dbt/adapters/postgres/relation/models/__init__.py @@ -0,0 +1,17 @@ +from dbt.adapters.postgres.relation.models.database import PostgresDatabaseRelation +from dbt.adapters.postgres.relation.models.index import ( + PostgresIndexMethod, + PostgresIndexRelation, + PostgresIndexRelationChange, +) +from dbt.adapters.postgres.relation.models.materialized_view import ( + PostgresMaterializedViewRelation, + PostgresMaterializedViewRelationChangeset, +) +from dbt.adapters.postgres.relation.models.policy import ( + PostgresIncludePolicy, + PostgresQuotePolicy, + PostgresRenderPolicy, + MAX_CHARACTERS_IN_IDENTIFIER, +) +from dbt.adapters.postgres.relation.models.schema import PostgresSchemaRelation diff --git a/plugins/postgres/dbt/adapters/postgres/relation/models/database.py b/plugins/postgres/dbt/adapters/postgres/relation/models/database.py new file mode 100644 index 00000000000..d652be3e947 --- /dev/null +++ b/plugins/postgres/dbt/adapters/postgres/relation/models/database.py @@ -0,0 +1,46 @@ +from dataclasses import dataclass +from typing import Set + +from dbt.adapters.relation.models import DatabaseRelation +from dbt.adapters.validation import ValidationMixin, ValidationRule +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models.policy import PostgresRenderPolicy + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class PostgresDatabaseRelation(DatabaseRelation, ValidationMixin): + """ + This config follow the specs found here: + https://www.postgresql.org/docs/current/sql-createdatabase.html + + The following parameters are configurable by dbt: + - name: name of the database + """ + + # attribution + name: str + + # configuration + render = PostgresRenderPolicy + + @classmethod + def from_dict(cls, config_dict) -> "PostgresDatabaseRelation": + database = super().from_dict(config_dict) + assert isinstance(database, PostgresDatabaseRelation) + return database + + @property + def validation_rules(self) -> Set[ValidationRule]: + """ + Returns: a set of rules that should evaluate to `True` (i.e. False == validation failure) + """ + return { + ValidationRule( + validation_check=len(self.name or "") > 0, + validation_error=DbtRuntimeError( + f"dbt-postgres requires a name to reference a database, received:\n" + f" database: {self.name}\n" + ), + ), + } diff --git a/plugins/postgres/dbt/adapters/postgres/relation/models/index.py b/plugins/postgres/dbt/adapters/postgres/relation/models/index.py new file mode 100644 index 00000000000..2c273dd51e9 --- /dev/null +++ b/plugins/postgres/dbt/adapters/postgres/relation/models/index.py @@ -0,0 +1,231 @@ +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Set, FrozenSet + +import agate +from dbt.adapters.relation.models import ( + RelationComponent, + RelationChangeAction, + RelationChange, +) +from dbt.adapters.validation import ValidationMixin, ValidationRule +from dbt.contracts.relation import ComponentName +from dbt.dataclass_schema import StrEnum +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models.policy import PostgresRenderPolicy + + +class PostgresIndexMethod(StrEnum): + btree = "btree" + hash = "hash" + gist = "gist" + spgist = "spgist" + gin = "gin" + brin = "brin" + + @classmethod + def default(cls) -> "PostgresIndexMethod": + return cls.btree + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class PostgresIndexRelation(RelationComponent, ValidationMixin): + """ + This config fallows the specs found here: + https://www.postgresql.org/docs/current/sql-createindex.html + + The following parameters are configurable by dbt: + - column_names: the columns in the index + - unique: checks for duplicate values when the index is created and on data updates + - method: the index method to be used + + The following parameters are not configurable by dbt, but are required for certain functionality: + - name: the name of the index in the database + + Applicable defaults for non-configurable parameters: + - concurrently: `False` + - nulls_distinct: `True` + + *Note: The index does not have a name until it is created in the database. The name also must be globally + unique, not just within the materialization to which it belongs. Hence, the name is a combination of attributes + on both the index and the materialization. This is calculated with `PostgresRelation.generate_index_name()`. + """ + + column_names: FrozenSet[str] = field(hash=True) + name: str = field(default=None, hash=False, compare=False) + unique: bool = field(default=False, hash=True) + method: PostgresIndexMethod = field(default=PostgresIndexMethod.default(), hash=True) + + # configuration + render = PostgresRenderPolicy + + @property + def fully_qualified_path(self) -> str: + return "_".join( + { + *sorted( + self.render.part(ComponentName.Identifier, column) + for column in self.column_names + ), + str(self.unique), + str(self.method), + } + ).replace(self.render.quote_character, "") + + @property + def validation_rules(self) -> Set[ValidationRule]: + return { + ValidationRule( + validation_check=self.column_names != frozenset(), + validation_error=DbtRuntimeError( + "Indexes require at least one column, but none were provided" + ), + ), + } + + @classmethod + def from_dict(cls, config_dict) -> "PostgresIndexRelation": + # don't alter the incoming config + kwargs_dict = deepcopy(config_dict) + + # component-specific attributes + if column_names := config_dict.get("column_names"): + kwargs_dict.update({"column_names": frozenset(column_names)}) + + if method := config_dict.get("method"): + kwargs_dict.update({"method": PostgresIndexMethod(method)}) + + index = super().from_dict(kwargs_dict) + assert isinstance(index, PostgresIndexRelation) + return index + + @classmethod + def parse_node(cls, node_entry: dict) -> dict: + """ + Parse a `ModelNode` instance into a `PostgresIndexRelation` instance as a dict + + This is generally used indirectly by calling `from_model_node()`, but there are times when the dict + version is more appropriate. + + Args: + node_entry: an entry from the `model` attribute (e.g. `config.model`) in the jinja context + + Example `model_node`: + + ModelNode({ + "config" { + "extra": { + "indexes": [{"columns": ["id"], "type": "hash", "unique": True},...], + ..., + }, + ..., + }, + ..., + }) + + Returns: a `PostgresIndexRelation` instance as a dict, can be passed into `from_dict` + """ + config_dict = { + "column_names": set(node_entry.get("columns", set())), + "unique": node_entry.get("unique"), + "method": node_entry.get("type"), + } + return config_dict + + @classmethod + def parse_describe_relation_results(cls, describe_relation_results: agate.Row) -> dict: + config_dict = { + "name": describe_relation_results["name"], + "column_names": set(describe_relation_results["column_names"].split(",")), + "unique": describe_relation_results["unique"], + "method": describe_relation_results["method"], + } + return config_dict + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class PostgresIndexRelationChange(RelationChange, ValidationMixin): + """ + Example of an index change: + { + "action": "create", + "context": { + "name": "", # we don't know the name since it gets created as a hash at runtime + "columns": ["column_1", "column_3"], + "type": "hash", + "unique": True + } + }, + { + "action": "drop", + "context": { + "name": "index_abc", # we only need this to drop, but we need the rest to compare + "columns": ["column_1"], + "type": "btree", + "unique": True + } + } + """ + + context: PostgresIndexRelation + + @property + def requires_full_refresh(self) -> bool: + return False + + @property + def validation_rules(self) -> Set[ValidationRule]: + return { + ValidationRule( + validation_check=self.action + in {RelationChangeAction.create, RelationChangeAction.drop}, + validation_error=DbtRuntimeError( + "Invalid operation, only `drop` and `create` changes are supported for indexes." + ), + ), + ValidationRule( + validation_check=not ( + self.action == RelationChangeAction.drop and self.context.name is None + ), + validation_error=DbtRuntimeError( + "Invalid operation, attempting to drop an index with no name." + ), + ), + ValidationRule( + validation_check=not ( + self.action == RelationChangeAction.create + and self.context.column_names == set() + ), + validation_error=DbtRuntimeError( + "Invalid operations, attempting to create an index with no columns." + ), + ), + } + + +def index_config_changes( + existing_indexes: FrozenSet[PostgresIndexRelation], + new_indexes: FrozenSet[PostgresIndexRelation], +) -> Set[PostgresIndexRelationChange]: + """ + Get the index updates that will occur as a result of a new run + + There are four scenarios: + + 1. Indexes are equal -> don't return these + 2. Index is new -> create these + 3. Index is old -> drop these + 4. Indexes are not equal -> drop old, create new -> two actions + + Returns: a set of index updates in the form {"action": "drop/create", "context": } + """ + drop_changes = set( + PostgresIndexRelationChange(action=RelationChangeAction.drop, context=index) + for index in existing_indexes.difference(new_indexes) + ) + create_changes = set( + PostgresIndexRelationChange(action=RelationChangeAction.create, context=index) + for index in new_indexes.difference(existing_indexes) + ) + return set().union(drop_changes, create_changes) diff --git a/plugins/postgres/dbt/adapters/postgres/relation/models/materialized_view.py b/plugins/postgres/dbt/adapters/postgres/relation/models/materialized_view.py new file mode 100644 index 00000000000..2b12ba06d18 --- /dev/null +++ b/plugins/postgres/dbt/adapters/postgres/relation/models/materialized_view.py @@ -0,0 +1,230 @@ +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Dict, FrozenSet, Optional, Set + +import agate +from dbt.adapters.relation.models import Relation, RelationChangeset +from dbt.adapters.validation import ValidationMixin, ValidationRule +from dbt.contracts.graph.nodes import CompiledNode +from dbt.contracts.relation import RelationType +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models.index import ( + index_config_changes, + PostgresIndexRelation, + PostgresIndexRelationChange, +) +from dbt.adapters.postgres.relation.models.policy import ( + PostgresRenderPolicy, + MAX_CHARACTERS_IN_IDENTIFIER, +) +from dbt.adapters.postgres.relation.models.schema import PostgresSchemaRelation + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class PostgresMaterializedViewRelation(Relation, ValidationMixin): + """ + This config follows the specs found here: + https://www.postgresql.org/docs/current/sql-creatematerializedview.html + + The following parameters are configurable by dbt: + - name: name of the materialized view + - schema: schema that contains the materialized view + - query: the query that defines the view + - indexes: the collection (set) of indexes on the materialized view + + Applicable defaults for non-configurable parameters: + - method: `heap` + - tablespace_name: `default_tablespace` + - with_data: `True` + """ + + # attribution + name: str + schema: PostgresSchemaRelation + query: str = field(hash=False, compare=False) + indexes: Optional[FrozenSet[PostgresIndexRelation]] = field(default_factory=frozenset) + + # configuration + type = RelationType.MaterializedView + render = PostgresRenderPolicy + SchemaParser = PostgresSchemaRelation + can_be_renamed = True + + @property + def validation_rules(self) -> Set[ValidationRule]: + """ + Validation rules at the materialized view level. All attribute level rules get run as a result of + `ValidationMixin`. + + Returns: a set of rules that should evaluate to `True` (i.e. False == validation failure) + """ + return { + ValidationRule( + validation_check=self.name is None + or len(self.name) <= MAX_CHARACTERS_IN_IDENTIFIER, + validation_error=DbtRuntimeError( + f"The materialized view name is more than the max allowed length" + f"of {MAX_CHARACTERS_IN_IDENTIFIER} characters.\n" + f" name: {self.name}\n" + f" characters: {len(self.name)}\n" + ), + ), + ValidationRule( + validation_check=all({self.database_name, self.schema_name, self.name}), + validation_error=DbtRuntimeError( + f"dbt-postgres requires all three parts of an object's path, received:\n" + f" database: {self.database_name}\n" + f" schema: {self.schema_name}\n" + f" identifier: {self.name}\n" + ), + ), + } + + @classmethod + def from_dict(cls, config_dict: dict) -> "PostgresMaterializedViewRelation": + """ + Creates an instance of this class given the dict representation + + This is generally used indirectly by calling either `from_model_node()` or `from_relation_results()` + + Args: + config_dict: a dict that aligns with the structure of this class, and it's attribute classes (e.g. indexes) + + Returns: an instance of this class + """ + # don't alter the incoming config + kwargs_dict = deepcopy(config_dict) + + # adapter-specific attributes + if indexes := config_dict.get("indexes"): + kwargs_dict.update( + { + "indexes": frozenset( + PostgresIndexRelation.from_dict(index) for index in indexes + ), + } + ) + + materialized_view = super().from_dict(kwargs_dict) + assert isinstance(materialized_view, PostgresMaterializedViewRelation) + return materialized_view + + @classmethod + def parse_node(cls, node: CompiledNode) -> dict: + """ + Parse a `ModelNode` instance into a `PostgresMaterializedViewRelation` instance as a dict + + This is generally used indirectly by calling `from_model_node()`, but there are times when the dict + version is more appropriate. + + Args: + node: the `model` attribute (e.g. `config.model`) in the jinja context + + Example `model_node`: + + ModelNode({ + "compiled_code": "create materialized view my_materialized_view as select * from my_table;", + "config" { + "extra": { + "indexes": [{"columns": ["id"], "type": "hash", "unique": True},...], + ..., + }, + ..., + }, + "database": "my_database", + "identifier": "my_materialized_view", + "schema": "my_schema", + ..., + }) + + Returns: a `PostgresMaterializedViewRelation` instance as a dict, can be passed into `from_dict` + """ + config_dict = super().parse_node(node) + + if indexes := node.config.extra.get("indexes"): + config_dict.update( + { + "indexes": [PostgresIndexRelation.parse_node(index) for index in indexes], + } + ) + + return config_dict + + @classmethod + def parse_describe_relation_results( + cls, describe_relation_results: Dict[str, agate.Table] + ) -> dict: + """ + Parse `RelationResults` into a dict representation of a `PostgresMaterializedViewConfig` instance + + This is generally used indirectly by calling `from_relation_results()`, but there are times when the dict + version is more appropriate. + + Args: + describe_relation_results: the results of a set of queries that fully describe an instance of this class + + Example of `relation_results`: + + { + "materialization": agate.Table(agate.Row({ + "database": "my_database", + "name": "my_materialized_view", + "query": "create materialized view my_materialized_view as select * from my_ref_table;", + "schema": "my_schema", + })), + "indexes": agate.Table([ + agate.Row({"columns": ["id"], "type": "hash", "unique": True}), + ..., + ], + } + + Returns: a dict representation of an instance of this class that can be passed into `from_dict()` + """ + config_dict = super().parse_describe_relation_results(describe_relation_results) + + if indexes := describe_relation_results.get("indexes"): + config_dict.update( + { + "indexes": [ + PostgresIndexRelation.parse_describe_relation_results(index) + for index in indexes.rows + ], + } + ) + + return config_dict + + +@dataclass +class PostgresMaterializedViewRelationChangeset(RelationChangeset): + indexes: Set[PostgresIndexRelationChange] = field(default_factory=set) + + @classmethod + def parse_relations(cls, existing_relation: Relation, target_relation: Relation) -> dict: + try: + assert isinstance(existing_relation, PostgresMaterializedViewRelation) + assert isinstance(target_relation, PostgresMaterializedViewRelation) + except AssertionError: + raise DbtRuntimeError( + f"Two Postgres materialized view relations were expected, but received:\n" + f" existing: {existing_relation}\n" + f" new: {target_relation}\n" + ) + + config_dict = { + "indexes": index_config_changes(existing_relation.indexes, target_relation.indexes), + } + + return config_dict + + @property + def requires_full_refresh(self) -> bool: + return ( + any(index.requires_full_refresh for index in self.indexes) + or super().requires_full_refresh + ) + + @property + def is_empty(self) -> bool: + return self.indexes == set() and super().is_empty diff --git a/plugins/postgres/dbt/adapters/postgres/relation/models/policy.py b/plugins/postgres/dbt/adapters/postgres/relation/models/policy.py new file mode 100644 index 00000000000..4e30fa9bd26 --- /dev/null +++ b/plugins/postgres/dbt/adapters/postgres/relation/models/policy.py @@ -0,0 +1,32 @@ +from dataclasses import dataclass + +from dbt.adapters.relation.models import IncludePolicy, QuotePolicy, RenderPolicy + + +MAX_CHARACTERS_IN_IDENTIFIER = 63 + + +class PostgresIncludePolicy(IncludePolicy): + database: bool = True + schema: bool = True + identifier: bool = True + + +@dataclass +class PostgresQuotePolicy(QuotePolicy): + database: bool = True + schema: bool = True + identifier: bool = True + + @property + def quote_character(self) -> str: + """This is property to appeal to the `Policy` serialization.""" + return '"' + + +PostgresRenderPolicy = RenderPolicy( + quote_policy=PostgresQuotePolicy(), + include_policy=PostgresIncludePolicy(), + quote_character='"', + delimiter=".", +) diff --git a/plugins/postgres/dbt/adapters/postgres/relation/models/schema.py b/plugins/postgres/dbt/adapters/postgres/relation/models/schema.py new file mode 100644 index 00000000000..41ac6af204c --- /dev/null +++ b/plugins/postgres/dbt/adapters/postgres/relation/models/schema.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from typing import Set + +from dbt.adapters.relation.models import SchemaRelation +from dbt.adapters.validation import ValidationMixin, ValidationRule +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models.database import PostgresDatabaseRelation +from dbt.adapters.postgres.relation.models.policy import PostgresRenderPolicy + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class PostgresSchemaRelation(SchemaRelation, ValidationMixin): + """ + This config follow the specs found here: + https://www.postgresql.org/docs/15/sql-createschema.html + + The following parameters are configurable by dbt: + - name: name of the schema + - database_name: name of the database + """ + + # attribution + name: str + + # configuration + render = PostgresRenderPolicy + DatabaseParser = PostgresDatabaseRelation + + @classmethod + def from_dict(cls, config_dict) -> "PostgresSchemaRelation": + schema = super().from_dict(config_dict) + assert isinstance(schema, PostgresSchemaRelation) + return schema + + @property + def validation_rules(self) -> Set[ValidationRule]: + """ + Returns: a set of rules that should evaluate to `True` (i.e. False == validation failure) + """ + return { + ValidationRule( + validation_check=len(self.name or "") > 0, + validation_error=DbtRuntimeError( + f"dbt-postgres requires a name to reference a schema, received:\n" + f" schema: {self.name}\n" + ), + ), + } diff --git a/plugins/postgres/dbt/adapters/postgres/relation_configs/__init__.py b/plugins/postgres/dbt/adapters/postgres/relation_configs/__init__.py deleted file mode 100644 index 9fdb942bfa5..00000000000 --- a/plugins/postgres/dbt/adapters/postgres/relation_configs/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from dbt.adapters.postgres.relation_configs.constants import ( # noqa: F401 - MAX_CHARACTERS_IN_IDENTIFIER, -) -from dbt.adapters.postgres.relation_configs.index import ( # noqa: F401 - PostgresIndexConfig, - PostgresIndexConfigChange, -) -from dbt.adapters.postgres.relation_configs.materialized_view import ( # noqa: F401 - PostgresMaterializedViewConfig, - PostgresMaterializedViewConfigChangeCollection, -) diff --git a/plugins/postgres/dbt/adapters/postgres/relation_configs/constants.py b/plugins/postgres/dbt/adapters/postgres/relation_configs/constants.py deleted file mode 100644 index 9228df23043..00000000000 --- a/plugins/postgres/dbt/adapters/postgres/relation_configs/constants.py +++ /dev/null @@ -1 +0,0 @@ -MAX_CHARACTERS_IN_IDENTIFIER = 63 diff --git a/plugins/postgres/dbt/adapters/postgres/relation_configs/index.py b/plugins/postgres/dbt/adapters/postgres/relation_configs/index.py deleted file mode 100644 index 3a072ea4307..00000000000 --- a/plugins/postgres/dbt/adapters/postgres/relation_configs/index.py +++ /dev/null @@ -1,165 +0,0 @@ -from dataclasses import dataclass, field -from typing import Set, FrozenSet - -import agate -from dbt.dataclass_schema import StrEnum -from dbt.exceptions import DbtRuntimeError -from dbt.adapters.relation_configs import ( - RelationConfigBase, - RelationConfigValidationMixin, - RelationConfigValidationRule, - RelationConfigChangeAction, - RelationConfigChange, -) - - -class PostgresIndexMethod(StrEnum): - btree = "btree" - hash = "hash" - gist = "gist" - spgist = "spgist" - gin = "gin" - brin = "brin" - - @classmethod - def default(cls) -> "PostgresIndexMethod": - return cls.btree - - -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class PostgresIndexConfig(RelationConfigBase, RelationConfigValidationMixin): - """ - This config fallows the specs found here: - https://www.postgresql.org/docs/current/sql-createindex.html - - The following parameters are configurable by dbt: - - name: the name of the index in the database, this isn't predictable since we apply a timestamp - - unique: checks for duplicate values when the index is created and on data updates - - method: the index method to be used - - column_names: the columns in the index - - Applicable defaults for non-configurable parameters: - - concurrently: `False` - - nulls_distinct: `True` - """ - - name: str = field(default=None, hash=False, compare=False) - column_names: FrozenSet[str] = field(default_factory=frozenset, hash=True) - unique: bool = field(default=False, hash=True) - method: PostgresIndexMethod = field(default=PostgresIndexMethod.default(), hash=True) - - @property - def validation_rules(self) -> Set[RelationConfigValidationRule]: - return { - RelationConfigValidationRule( - validation_check=self.column_names is not None, - validation_error=DbtRuntimeError( - "Indexes require at least one column, but none were provided" - ), - ), - } - - @classmethod - def from_dict(cls, config_dict) -> "PostgresIndexConfig": - # TODO: include the QuotePolicy instead of defaulting to lower() - kwargs_dict = { - "name": config_dict.get("name"), - "column_names": frozenset( - column.lower() for column in config_dict.get("column_names", set()) - ), - "unique": config_dict.get("unique"), - "method": config_dict.get("method"), - } - index: "PostgresIndexConfig" = super().from_dict(kwargs_dict) # type: ignore - return index - - @classmethod - def parse_model_node(cls, model_node_entry: dict) -> dict: - config_dict = { - "column_names": set(model_node_entry.get("columns", set())), - "unique": model_node_entry.get("unique"), - "method": model_node_entry.get("type"), - } - return config_dict - - @classmethod - def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: - config_dict = { - "name": relation_results_entry.get("name"), - "column_names": set(relation_results_entry.get("column_names", "").split(",")), - "unique": relation_results_entry.get("unique"), - "method": relation_results_entry.get("method"), - } - return config_dict - - @property - def as_node_config(self) -> dict: - """ - Returns: a dictionary that can be passed into `get_create_index_sql()` - """ - node_config = { - "columns": list(self.column_names), - "unique": self.unique, - "type": self.method.value, - } - return node_config - - -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class PostgresIndexConfigChange(RelationConfigChange, RelationConfigValidationMixin): - """ - Example of an index change: - { - "action": "create", - "context": { - "name": "", # we don't know the name since it gets created as a hash at runtime - "columns": ["column_1", "column_3"], - "type": "hash", - "unique": True - } - }, - { - "action": "drop", - "context": { - "name": "index_abc", # we only need this to drop, but we need the rest to compare - "columns": ["column_1"], - "type": "btree", - "unique": True - } - } - """ - - context: PostgresIndexConfig - - @property - def requires_full_refresh(self) -> bool: - return False - - @property - def validation_rules(self) -> Set[RelationConfigValidationRule]: - return { - RelationConfigValidationRule( - validation_check=self.action - in {RelationConfigChangeAction.create, RelationConfigChangeAction.drop}, - validation_error=DbtRuntimeError( - "Invalid operation, only `drop` and `create` changes are supported for indexes." - ), - ), - RelationConfigValidationRule( - validation_check=not ( - self.action == RelationConfigChangeAction.drop and self.context.name is None - ), - validation_error=DbtRuntimeError( - "Invalid operation, attempting to drop an index with no name." - ), - ), - RelationConfigValidationRule( - validation_check=not ( - self.action == RelationConfigChangeAction.create - and self.context.column_names == set() - ), - validation_error=DbtRuntimeError( - "Invalid operations, attempting to create an index with no columns." - ), - ), - } diff --git a/plugins/postgres/dbt/adapters/postgres/relation_configs/materialized_view.py b/plugins/postgres/dbt/adapters/postgres/relation_configs/materialized_view.py deleted file mode 100644 index 15e700e777a..00000000000 --- a/plugins/postgres/dbt/adapters/postgres/relation_configs/materialized_view.py +++ /dev/null @@ -1,113 +0,0 @@ -from dataclasses import dataclass, field -from typing import Set, FrozenSet, List - -import agate -from dbt.adapters.relation_configs import ( - RelationConfigBase, - RelationResults, - RelationConfigValidationMixin, - RelationConfigValidationRule, -) -from dbt.contracts.graph.nodes import ModelNode -from dbt.exceptions import DbtRuntimeError - -from dbt.adapters.postgres.relation_configs.constants import MAX_CHARACTERS_IN_IDENTIFIER -from dbt.adapters.postgres.relation_configs.index import ( - PostgresIndexConfig, - PostgresIndexConfigChange, -) - - -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class PostgresMaterializedViewConfig(RelationConfigBase, RelationConfigValidationMixin): - """ - This config follows the specs found here: - https://www.postgresql.org/docs/current/sql-creatematerializedview.html - - The following parameters are configurable by dbt: - - table_name: name of the materialized view - - query: the query that defines the view - - indexes: the collection (set) of indexes on the materialized view - - Applicable defaults for non-configurable parameters: - - method: `heap` - - tablespace_name: `default_tablespace` - - with_data: `True` - """ - - table_name: str = "" - query: str = "" - indexes: FrozenSet[PostgresIndexConfig] = field(default_factory=frozenset) - - @property - def validation_rules(self) -> Set[RelationConfigValidationRule]: - # index rules get run by default with the mixin - return { - RelationConfigValidationRule( - validation_check=self.table_name is None - or len(self.table_name) <= MAX_CHARACTERS_IN_IDENTIFIER, - validation_error=DbtRuntimeError( - f"The materialized view name is more than {MAX_CHARACTERS_IN_IDENTIFIER} " - f"characters: {self.table_name}" - ), - ), - } - - @classmethod - def from_dict(cls, config_dict: dict) -> "PostgresMaterializedViewConfig": - kwargs_dict = { - "table_name": config_dict.get("table_name"), - "query": config_dict.get("query"), - "indexes": frozenset( - PostgresIndexConfig.from_dict(index) for index in config_dict.get("indexes", {}) - ), - } - materialized_view: "PostgresMaterializedViewConfig" = super().from_dict(kwargs_dict) # type: ignore - return materialized_view - - @classmethod - def from_model_node(cls, model_node: ModelNode) -> "PostgresMaterializedViewConfig": - materialized_view_config = cls.parse_model_node(model_node) - materialized_view = cls.from_dict(materialized_view_config) - return materialized_view - - @classmethod - def parse_model_node(cls, model_node: ModelNode) -> dict: - indexes: List[dict] = model_node.config.extra.get("indexes", []) - config_dict = { - "table_name": model_node.identifier, - "query": model_node.compiled_code, - "indexes": [PostgresIndexConfig.parse_model_node(index) for index in indexes], - } - return config_dict - - @classmethod - def from_relation_results( - cls, relation_results: RelationResults - ) -> "PostgresMaterializedViewConfig": - materialized_view_config = cls.parse_relation_results(relation_results) - materialized_view = cls.from_dict(materialized_view_config) - return materialized_view - - @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> dict: - indexes: agate.Table = relation_results.get("indexes", agate.Table(rows={})) - config_dict = { - "indexes": [ - PostgresIndexConfig.parse_relation_results(index) for index in indexes.rows - ], - } - return config_dict - - -@dataclass -class PostgresMaterializedViewConfigChangeCollection: - indexes: Set[PostgresIndexConfigChange] = field(default_factory=set) - - @property - def requires_full_refresh(self) -> bool: - return any(index.requires_full_refresh for index in self.indexes) - - @property - def has_changes(self) -> bool: - return self.indexes != set() diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index c8bdab6eccb..71ff9f4e517 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -25,38 +25,6 @@ ); {%- endmacro %} -{% macro postgres__get_create_index_sql(relation, index_dict) -%} - {%- set index_config = adapter.parse_index(index_dict) -%} - {%- set comma_separated_columns = ", ".join(index_config.columns) -%} - {%- set index_name = index_config.render(relation) -%} - - create {% if index_config.unique -%} - unique - {%- endif %} index if not exists - "{{ index_name }}" - on {{ relation }} {% if index_config.type -%} - using {{ index_config.type }} - {%- endif %} - ({{ comma_separated_columns }}); -{%- endmacro %} - -{% macro postgres__create_schema(relation) -%} - {% if relation.database -%} - {{ adapter.verify_database(relation.database) }} - {%- endif -%} - {%- call statement('create_schema') -%} - create schema if not exists {{ relation.without_identifier().include(database=False) }} - {%- endcall -%} -{% endmacro %} - -{% macro postgres__drop_schema(relation) -%} - {% if relation.database -%} - {{ adapter.verify_database(relation.database) }} - {%- endif -%} - {%- call statement('drop_schema') -%} - drop schema if exists {{ relation.without_identifier().include(database=False) }} cascade - {%- endcall -%} -{% endmacro %} {% macro postgres__get_columns_in_relation(relation) -%} {% call statement('get_columns_in_relation', fetch_result=True) %} @@ -116,26 +84,6 @@ information_schema {%- endmacro %} -{% macro postgres__list_schemas(database) %} - {% if database -%} - {{ adapter.verify_database(database) }} - {%- endif -%} - {% call statement('list_schemas', fetch_result=True, auto_begin=False) %} - select distinct nspname from pg_namespace - {% endcall %} - {{ return(load_result('list_schemas').table) }} -{% endmacro %} - -{% macro postgres__check_schema_exists(information_schema, schema) -%} - {% if information_schema.database -%} - {{ adapter.verify_database(information_schema.database) }} - {%- endif -%} - {% call statement('check_schema_exists', fetch_result=True, auto_begin=False) %} - select count(*) from pg_namespace where nspname = '{{ schema }}' - {% endcall %} - {{ return(load_result('check_schema_exists').table) }} -{% endmacro %} - {# Postgres tables have a maximum length of 63 characters, anything longer is silently truncated. Temp and backup relations add a lot of extra characters to the end of table names to ensure uniqueness. @@ -219,34 +167,3 @@ {% macro postgres__copy_grants() %} {{ return(False) }} {% endmacro %} - - -{% macro postgres__get_show_indexes_sql(relation) %} - select - i.relname as name, - m.amname as method, - ix.indisunique as "unique", - array_to_string(array_agg(a.attname), ',') as column_names - from pg_index ix - join pg_class i - on i.oid = ix.indexrelid - join pg_am m - on m.oid=i.relam - join pg_class t - on t.oid = ix.indrelid - join pg_namespace n - on n.oid = t.relnamespace - join pg_attribute a - on a.attrelid = t.oid - and a.attnum = ANY(ix.indkey) - where t.relname = '{{ relation.identifier }}' - and n.nspname = '{{ relation.schema }}' - and t.relkind in ('r', 'm') - group by 1, 2, 3 - order by 1, 2, 3 -{% endmacro %} - - -{%- macro postgres__get_drop_index_sql(relation, index_name) -%} - drop index if exists "{{ index_name }}" -{%- endmacro -%} diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/get_catalog.sql similarity index 100% rename from plugins/postgres/dbt/include/postgres/macros/catalog.sql rename to plugins/postgres/dbt/include/postgres/macros/get_catalog.sql diff --git a/plugins/postgres/dbt/include/postgres/macros/get_relations.sql b/plugins/postgres/dbt/include/postgres/macros/get_relations.sql new file mode 100644 index 00000000000..4e20940e37a --- /dev/null +++ b/plugins/postgres/dbt/include/postgres/macros/get_relations.sql @@ -0,0 +1,77 @@ +{% macro postgres_get_relations () -%} + + {# /* + -- in pg_depend, objid is the dependent, refobjid is the referenced object + -- > a pg_depend entry indicates that the referenced object cannot be + -- > dropped without also dropping the dependent object. + */ #} + + {%- call statement('relations', fetch_result=True) -%} + with relation as ( + select + pg_rewrite.ev_class as class, + pg_rewrite.oid as id + from pg_rewrite + ), + class as ( + select + oid as id, + relname as name, + relnamespace as schema, + relkind as kind + from pg_class + ), + dependency as ( + select distinct + pg_depend.objid as id, + pg_depend.refobjid as ref + from pg_depend + ), + schema as ( + select + pg_namespace.oid as id, + pg_namespace.nspname as name + from pg_namespace + where nspname != 'information_schema' and nspname not like 'pg\_%' + ), + referenced as ( + select + relation.id AS id, + referenced_class.name , + referenced_class.schema , + referenced_class.kind + from relation + join class as referenced_class on relation.class=referenced_class.id + where referenced_class.kind in ('r', 'v', 'm') + ), + relationships as ( + select + referenced.name as referenced_name, + referenced.schema as referenced_schema_id, + dependent_class.name as dependent_name, + dependent_class.schema as dependent_schema_id, + referenced.kind as kind + from referenced + join dependency on referenced.id=dependency.id + join class as dependent_class on dependency.ref=dependent_class.id + where + (referenced.name != dependent_class.name or + referenced.schema != dependent_class.schema) + ) + + select + referenced_schema.name as referenced_schema, + relationships.referenced_name as referenced_name, + dependent_schema.name as dependent_schema, + relationships.dependent_name as dependent_name + from relationships + join schema as dependent_schema on relationships.dependent_schema_id=dependent_schema.id + join schema as referenced_schema on relationships.referenced_schema_id=referenced_schema.id + group by referenced_schema, referenced_name, dependent_schema, dependent_name + order by referenced_schema, referenced_name, dependent_schema, dependent_name; + + {%- endcall -%} + + {{ return(load_result('relations').table) }} + +{% endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/materializations/materialized_view.sql b/plugins/postgres/dbt/include/postgres/macros/materializations/materialized_view.sql deleted file mode 100644 index 1fc7d864b5b..00000000000 --- a/plugins/postgres/dbt/include/postgres/macros/materializations/materialized_view.sql +++ /dev/null @@ -1,84 +0,0 @@ -{% macro postgres__get_alter_materialized_view_as_sql( - relation, - configuration_changes, - sql, - existing_relation, - backup_relation, - intermediate_relation -) %} - - -- apply a full refresh immediately if needed - {% if configuration_changes.requires_full_refresh %} - - {{ get_replace_materialized_view_as_sql(relation, sql, existing_relation, backup_relation, intermediate_relation) }} - - -- otherwise apply individual changes as needed - {% else %} - - {{ postgres__update_indexes_on_materialized_view(relation, configuration_changes.indexes) }} - - {%- endif -%} - -{% endmacro %} - - -{% macro postgres__get_create_materialized_view_as_sql(relation, sql) %} - create materialized view if not exists {{ relation }} as {{ sql }}; - - {% for _index_dict in config.get('indexes', []) -%} - {{- get_create_index_sql(relation, _index_dict) -}} - {%- endfor -%} - -{% endmacro %} - - -{% macro postgres__get_replace_materialized_view_as_sql(relation, sql, existing_relation, backup_relation, intermediate_relation) %} - {{- get_create_materialized_view_as_sql(intermediate_relation, sql) -}} - - {% if existing_relation is not none %} - alter materialized view {{ existing_relation }} rename to {{ backup_relation.include(database=False, schema=False) }}; - {% endif %} - - alter materialized view {{ intermediate_relation }} rename to {{ relation.include(database=False, schema=False) }}; - -{% endmacro %} - - -{% macro postgres__get_materialized_view_configuration_changes(existing_relation, new_config) %} - {% set _existing_materialized_view = postgres__describe_materialized_view(existing_relation) %} - {% set _configuration_changes = existing_relation.get_materialized_view_config_change_collection(_existing_materialized_view, new_config) %} - {% do return(_configuration_changes) %} -{% endmacro %} - - -{% macro postgres__refresh_materialized_view(relation) %} - refresh materialized view {{ relation }}; -{% endmacro %} - - -{%- macro postgres__update_indexes_on_materialized_view(relation, index_changes) -%} - {{- log("Applying UPDATE INDEXES to: " ~ relation) -}} - - {%- for _index_change in index_changes -%} - {%- set _index = _index_change.context -%} - - {%- if _index_change.action == "drop" -%} - - {{ postgres__get_drop_index_sql(relation, _index.name) }}; - - {%- elif _index_change.action == "create" -%} - - {{ postgres__get_create_index_sql(relation, _index.as_node_config) }} - - {%- endif -%} - - {%- endfor -%} - -{%- endmacro -%} - - -{% macro postgres__describe_materialized_view(relation) %} - -- for now just get the indexes, we don't need the name or the query yet - {% set _indexes = run_query(get_show_indexes_sql(relation)) %} - {% do return({'indexes': _indexes}) %} -{% endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/relation_components/index.sql b/plugins/postgres/dbt/include/postgres/macros/relation_components/index.sql new file mode 100644 index 00000000000..d4f12cba0e0 --- /dev/null +++ b/plugins/postgres/dbt/include/postgres/macros/relation_components/index.sql @@ -0,0 +1,124 @@ +{#- /* + This file contains DDL that gets consumed in the Postgres implementation of the materialized view materialization. + These macros could be used elsewhere as they do not care that they are being called by a materialization; + but the original intention was to support the materialization of materialized views. These macros represent + the basic interactions dbt-postgres requires of indexes in Postgres: + - ALTER + - CREATE + - DESCRIBE + - DROP + These macros all take a `PostgresIndexRelation` instance and/or a `Relation` instance as an input. + These classes can be found in the following files, respectively: + `dbt/adapters/postgres/relation_configs/index.py` + `dbt/adapters/relation/models/_relation.py` + + Used in: + `dbt/include/postgres/macros/relations/materialized_view.sql` + Uses: + `dbt/adapters/postgres/relation/models/index.py` + `dbt/adapters/postgres/relation/models/materialized_view.py` +*/ -#} + + +{% macro postgres__alter_indexes_template(relation, index_changeset) -%} + {{- log('Applying ALTER INDEXES to: ' ~ relation) -}} + + {%- for _change in index_changeset -%} + {%- set _index = _change.context -%} + + {% if _change.action == 'drop' -%} + {{ postgres__drop_index_template(relation, _index) }}; + + {% elif _change.action == 'create' -%} + {{ postgres__create_index_template(relation, _index) }}; + + {%- endif -%} + {%- endfor -%} + +{%- endmacro %} + + +{% macro postgres__create_indexes_template(relation) -%} + + {% for _index in relation.indexes -%} + {{- postgres__create_index_template(relation, _index) -}} + {%- if not loop.last %};{% endif -%} + {%- endfor -%} + +{%- endmacro %} + + +{% macro postgres__create_index_template(relation, index) -%} + + {%- set _index_name = adapter.generate_index_name(relation, index) -%} + + create {% if index.unique -%}unique{%- endif %} index if not exists "{{ _index_name }}" + on {{ relation.fully_qualified_path }} + using {{ index.method }} + ( + {{ ", ".join(index.column_names) }} + ) + +{%- endmacro %} + + +{% macro postgres__describe_indexes_template(relation) %} + {%- if adapter.is_relation_model(relation) -%} + {%- set _name = relation.name %} + {%- set _schema = relation.schema_name %} + {%- else -%} + {%- set _name = relation.identifier %} + {%- set _schema = relation.schema %} + {%- endif -%} + select + i.relname as name, + m.amname as method, + ix.indisunique as "unique", + array_to_string(array_agg(a.attname), ',') as column_names + from pg_index ix + join pg_class i + on i.oid = ix.indexrelid + join pg_am m + on m.oid=i.relam + join pg_class t + on t.oid = ix.indrelid + join pg_namespace n + on n.oid = t.relnamespace + join pg_attribute a + on a.attrelid = t.oid + and a.attnum = ANY(ix.indkey) + where t.relname ilike '{{ _name }}' + and n.nspname ilike '{{ _schema }}' + and t.relkind in ('r', 'm') + group by 1, 2, 3 + order by 1, 2, 3 +{% endmacro %} + + +{% macro postgres__drop_index_template(relation, index) -%} + drop index if exists "{{ relation.schema_name }}"."{{ index.name }}" cascade +{%- endmacro %} + + +{# /* + These are `BaseRelation` versions. The `BaseRelation` workflows are different. +*/ #} +{% macro postgres__get_create_index_sql(relation, index_dict) -%} + {%- set index_config = adapter.parse_index(index_dict) -%} + {%- set comma_separated_columns = ", ".join(index_config.columns) -%} + {%- set index_name = index_config.render(relation) -%} + + create {% if index_config.unique -%} + unique + {%- endif %} index if not exists + "{{ index_name }}" + on {{ relation }} {% if index_config.type -%} + using {{ index_config.type }} + {%- endif %} + ({{ comma_separated_columns }}); +{%- endmacro %} + + +{%- macro postgres__get_drop_index_sql(relation, index_name) -%} + drop index if exists "{{ index_name }}" +{%- endmacro -%} diff --git a/plugins/postgres/dbt/include/postgres/macros/relation_components/schema.sql b/plugins/postgres/dbt/include/postgres/macros/relation_components/schema.sql new file mode 100644 index 00000000000..04ec7a216aa --- /dev/null +++ b/plugins/postgres/dbt/include/postgres/macros/relation_components/schema.sql @@ -0,0 +1,42 @@ +{# /* + These are `BaseRelation` versions. The `BaseRelation` workflows are different. +*/ #} +{% macro postgres__create_schema(relation) -%} + {% if relation.database -%} + {{ adapter.verify_database(relation.database) }} + {%- endif -%} + {%- call statement('create_schema') -%} + create schema if not exists {{ relation.without_identifier().include(database=False) }} + {%- endcall -%} +{% endmacro %} + + +{% macro postgres__drop_schema(relation) -%} + {% if relation.database -%} + {{ adapter.verify_database(relation.database) }} + {%- endif -%} + {%- call statement('drop_schema') -%} + drop schema if exists {{ relation.without_identifier().include(database=False) }} cascade + {%- endcall -%} +{% endmacro %} + + +{% macro postgres__list_schemas(database) %} + {% if database -%} + {{ adapter.verify_database(database) }} + {%- endif -%} + {% call statement('list_schemas', fetch_result=True, auto_begin=False) %} + select distinct nspname from pg_namespace + {% endcall %} + {{ return(load_result('list_schemas').table) }} +{% endmacro %} + +{% macro postgres__check_schema_exists(information_schema, schema) -%} + {% if information_schema.database -%} + {{ adapter.verify_database(information_schema.database) }} + {%- endif -%} + {% call statement('check_schema_exists', fetch_result=True, auto_begin=False) %} + select count(*) from pg_namespace where nspname = '{{ schema }}' + {% endcall %} + {{ return(load_result('check_schema_exists').table) }} +{% endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/relations.sql b/plugins/postgres/dbt/include/postgres/macros/relations.sql deleted file mode 100644 index 9966c5db2e8..00000000000 --- a/plugins/postgres/dbt/include/postgres/macros/relations.sql +++ /dev/null @@ -1,76 +0,0 @@ -{% macro postgres_get_relations () -%} - - {# - -- in pg_depend, objid is the dependent, refobjid is the referenced object - -- > a pg_depend entry indicates that the referenced object cannot be - -- > dropped without also dropping the dependent object. - #} - - {%- call statement('relations', fetch_result=True) -%} - with relation as ( - select - pg_rewrite.ev_class as class, - pg_rewrite.oid as id - from pg_rewrite - ), - class as ( - select - oid as id, - relname as name, - relnamespace as schema, - relkind as kind - from pg_class - ), - dependency as ( - select distinct - pg_depend.objid as id, - pg_depend.refobjid as ref - from pg_depend - ), - schema as ( - select - pg_namespace.oid as id, - pg_namespace.nspname as name - from pg_namespace - where nspname != 'information_schema' and nspname not like 'pg\_%' - ), - referenced as ( - select - relation.id AS id, - referenced_class.name , - referenced_class.schema , - referenced_class.kind - from relation - join class as referenced_class on relation.class=referenced_class.id - where referenced_class.kind in ('r', 'v', 'm') - ), - relationships as ( - select - referenced.name as referenced_name, - referenced.schema as referenced_schema_id, - dependent_class.name as dependent_name, - dependent_class.schema as dependent_schema_id, - referenced.kind as kind - from referenced - join dependency on referenced.id=dependency.id - join class as dependent_class on dependency.ref=dependent_class.id - where - (referenced.name != dependent_class.name or - referenced.schema != dependent_class.schema) - ) - - select - referenced_schema.name as referenced_schema, - relationships.referenced_name as referenced_name, - dependent_schema.name as dependent_schema, - relationships.dependent_name as dependent_name - from relationships - join schema as dependent_schema on relationships.dependent_schema_id=dependent_schema.id - join schema as referenced_schema on relationships.referenced_schema_id=referenced_schema.id - group by referenced_schema, referenced_name, dependent_schema, dependent_name - order by referenced_schema, referenced_name, dependent_schema, dependent_name; - - {%- endcall -%} - - {{ return(load_result('relations').table) }} -{% endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/relations/materialized_view.sql b/plugins/postgres/dbt/include/postgres/macros/relations/materialized_view.sql new file mode 100644 index 00000000000..07509def619 --- /dev/null +++ b/plugins/postgres/dbt/include/postgres/macros/relations/materialized_view.sql @@ -0,0 +1,110 @@ +{#- /* + This file contains DDL that gets consumed in the default materialized view materialization in `dbt-core`. + These macros could be used elsewhere as they do not care that they are being called by a materialization; + but the original intention was to support the materialization of materialized views. These macros represent + the basic interactions dbt-postgres requires of materialized views in Postgres: + - ALTER + - CREATE + - DESCRIBE + - DROP + - REFRESH + - RENAME + - REPLACE + These macros all take a PostgresMaterializedViewConfig instance as an input. This class can be found in: + `dbt/adapters/postgres/relation_configs/materialized_view.py` + + Used in: + `dbt/include/global_project/macros/materializations/models/materialized_view/materialized_view.sql` + Uses: + `dbt/adapters/postgres/relation.py` + `dbt/adapters/postgres/relation_configs/` +*/ -#} + + +{%- macro postgres__alter_materialized_view_template(existing_materialized_view, target_materialized_view) -%} + + {#- /* + We need to get the config changeset to determine if we require a full refresh (happens if any change + in the changeset requires a full refresh or if an unmonitored change was detected) + or if we can get away with altering the dynamic table in place. + */ -#} + + {%- if target_materialized_view == existing_materialized_view -%} + {{- exceptions.warn("No changes were identified for: " ~ existing_materialized_view) -}} + + {%- else -%} + {%- set _changeset = adapter.make_changeset(existing_materialized_view, target_materialized_view) -%} + + {%- if _changeset.requires_full_refresh -%} + {{ replace_template(existing_materialized_view, target_materialized_view) }} + + {%- else -%} + {{ postgres__alter_indexes_template(existing_materialized_view, _changeset.indexes) }} + + {%- endif -%} + {%- endif -%} + +{%- endmacro -%} + + +{%- macro postgres__create_materialized_view_template(materialized_view) -%} + + create materialized view {{ materialized_view.fully_qualified_path }} as + {{ materialized_view.query }} + ; + {{ postgres__create_indexes_template(materialized_view) -}} + +{%- endmacro -%} + + +{%- macro postgres__describe_materialized_view_template(materialized_view) -%} + + {%- set _materialized_view_template -%} + select + v.matviewname as name, + v.schemaname as schema_name, + '{{ this.database }}' as database_name, + v.definition as query + from pg_matviews v + where v.matviewname ilike '{{ materialized_view.name }}' + and v.schemaname ilike '{{ materialized_view.schema_name }}' + {%- endset -%} + {%- set _materialized_view = run_query(_materialized_view_template) -%} + + {%- set _indexes_template = postgres__describe_indexes_template(materialized_view) -%} + {%- set _indexes = run_query(_indexes_template) -%} + + {%- do return({'relation': _materialized_view, 'indexes': _indexes}) -%} + +{%- endmacro -%} + + +{%- macro postgres__drop_materialized_view_template(materialized_view) -%} + drop materialized view if exists {{ materialized_view.fully_qualified_path }} cascade +{%- endmacro -%} + + +{# /* + These are `BaseRelation` versions. The `BaseRelation` workflows are different. +*/ #} +{%- macro postgres__drop_materialized_view(relation) -%} + drop materialized view if exists {{ relation }} cascade +{%- endmacro -%} + + +{%- macro postgres__refresh_materialized_view_template(materialized_view) -%} + refresh materialized view {{ materialized_view.fully_qualified_path }} +{%- endmacro -%} + + +{%- macro postgres__rename_materialized_view_template(materialized_view, new_name) -%} + + {%- if adapter.is_relation_model(materialized_view) -%} + {%- set fully_qualified_path = materialized_view.fully_qualified_path -%} + {%- else -%} + {%- set fully_qualified_path = materialized_view -%} + {%- endif -%} + + alter materialized view {{ fully_qualified_path }} rename to {{ new_name }} + +{%- endmacro -%} diff --git a/plugins/postgres/dbt/include/postgres/macros/relations/table.sql b/plugins/postgres/dbt/include/postgres/macros/relations/table.sql new file mode 100644 index 00000000000..11c992e68d1 --- /dev/null +++ b/plugins/postgres/dbt/include/postgres/macros/relations/table.sql @@ -0,0 +1,8 @@ +{%- macro postgres__drop_table_template(table) -%} + drop table if exists {{ table.fully_qualified_path }} cascade +{%- endmacro -%} + + +{%- macro postgres__rename_table_template(table, new_name) -%} + alter table {{ table.fully_qualified_path }} rename to {{ new_name }} +{%- endmacro -%} diff --git a/plugins/postgres/dbt/include/postgres/macros/relations/view.sql b/plugins/postgres/dbt/include/postgres/macros/relations/view.sql new file mode 100644 index 00000000000..96b9303f445 --- /dev/null +++ b/plugins/postgres/dbt/include/postgres/macros/relations/view.sql @@ -0,0 +1,8 @@ +{%- macro postgres__drop_view_template(view) -%} + drop view if exists {{ view.fully_qualified_path }} cascade +{%- endmacro -%} + + +{%- macro postgres__rename_view_template(view, new_name) -%} + alter view {{ view.fully_qualified_path }} rename to {{ new_name }} +{%- endmacro -%} diff --git a/plugins/postgres/dbt/include/postgres/macros/timestamps.sql b/plugins/postgres/dbt/include/postgres/macros/utils/timestamps.sql similarity index 100% rename from plugins/postgres/dbt/include/postgres/macros/timestamps.sql rename to plugins/postgres/dbt/include/postgres/macros/utils/timestamps.sql diff --git a/tests/adapter/dbt/tests/adapter/materialized_view/base.py b/tests/adapter/dbt/tests/adapter/materialized_view/base.py deleted file mode 100644 index 25aef3abc6f..00000000000 --- a/tests/adapter/dbt/tests/adapter/materialized_view/base.py +++ /dev/null @@ -1,69 +0,0 @@ -from typing import List, Tuple, Optional -import os - -import pytest - -from dbt.dataclass_schema import StrEnum -from dbt.tests.util import run_dbt, get_manifest, run_dbt_and_capture - - -def run_model( - model: str, - run_args: Optional[List[str]] = None, - full_refresh: bool = False, - expect_pass: bool = True, -) -> Tuple[list, str]: - args = ["--debug", "run", "--models", model] - if full_refresh: - args.append("--full-refresh") - if run_args: - args.extend(run_args) - return run_dbt_and_capture(args, expect_pass=expect_pass) - - -def assert_message_in_logs(logs: str, message: str, expected_fail: bool = False): - # if the logs are json strings, then 'jsonify' the message because of things like escape quotes - if os.environ.get("DBT_LOG_FORMAT", "") == "json": - message = message.replace(r'"', r"\"") - - if expected_fail: - assert message not in logs - else: - assert message in logs - - -def get_records(project, model: str) -> List[tuple]: - sql = f"select * from {project.database}.{project.test_schema}.{model};" - return [tuple(row) for row in project.run_sql(sql, fetch="all")] - - -def get_row_count(project, model: str) -> int: - sql = f"select count(*) from {project.database}.{project.test_schema}.{model};" - return project.run_sql(sql, fetch="one")[0] - - -def insert_record(project, record: tuple, model: str, columns: List[str]): - sql = f""" - insert into {project.database}.{project.test_schema}.{model} ({', '.join(columns)}) - values ({','.join(str(value) for value in record)}) - ;""" - project.run_sql(sql) - - -def assert_model_exists_and_is_correct_type(project, model: str, relation_type: StrEnum): - # In general, `relation_type` will be of type `RelationType`. - # However, in some cases (e.g. `dbt-snowflake`) adapters will have their own `RelationType`. - manifest = get_manifest(project.project_root) - model_metadata = manifest.nodes[f"model.test.{model}"] - assert model_metadata.config.materialized == relation_type - assert get_row_count(project, model) >= 0 - - -class Base: - @pytest.fixture(scope="function", autouse=True) - def setup(self, project): - run_dbt(["run"]) - - @pytest.fixture(scope="class", autouse=True) - def project(self, project): - yield project diff --git a/tests/adapter/dbt/tests/adapter/materialized_view/on_configuration_change.py b/tests/adapter/dbt/tests/adapter/materialized_view/on_configuration_change.py deleted file mode 100644 index f77d9aade03..00000000000 --- a/tests/adapter/dbt/tests/adapter/materialized_view/on_configuration_change.py +++ /dev/null @@ -1,91 +0,0 @@ -from typing import List - -import pytest -import yaml - -from dbt.tests.util import read_file, write_file, relation_from_name -from dbt.contracts.results import RunStatus - -from dbt.tests.adapter.materialized_view.base import ( - Base, - assert_message_in_logs, -) - - -def get_project_config(project): - file_yaml = read_file(project.project_root, "dbt_project.yml") - return yaml.safe_load(file_yaml) - - -def set_project_config(project, config): - config_yaml = yaml.safe_dump(config) - write_file(config_yaml, project.project_root, "dbt_project.yml") - - -def get_model_file(project, model: str) -> str: - return read_file(project.project_root, "models", f"{model}.sql") - - -def set_model_file(project, model: str, model_sql: str): - write_file(model_sql, project.project_root, "models", f"{model}.sql") - - -def assert_proper_scenario( - on_configuration_change, - results, - logs, - status: RunStatus, - messages_in_logs: List[str] = None, - messages_not_in_logs: List[str] = None, -): - assert len(results.results) == 1 - result = results.results[0] - - assert result.node.config.on_configuration_change == on_configuration_change - assert result.status == status - for message in messages_in_logs or []: - assert_message_in_logs(logs, message) - for message in messages_not_in_logs or []: - assert_message_in_logs(logs, message, expected_fail=True) - - -class OnConfigurationChangeBase(Base): - - base_materialized_view = "base_materialized_view" - - @pytest.fixture(scope="function") - def alter_message(self, project): - return f"Applying ALTER to: {relation_from_name(project.adapter, self.base_materialized_view)}" - - @pytest.fixture(scope="function") - def create_message(self, project): - return f"Applying CREATE to: {relation_from_name(project.adapter, self.base_materialized_view)}" - - @pytest.fixture(scope="function") - def refresh_message(self, project): - return f"Applying REFRESH to: {relation_from_name(project.adapter, self.base_materialized_view)}" - - @pytest.fixture(scope="function") - def replace_message(self, project): - return f"Applying REPLACE to: {relation_from_name(project.adapter, self.base_materialized_view)}" - - @pytest.fixture(scope="function") - def configuration_change_message(self, project): - return ( - f"Determining configuration changes on: " - f"{relation_from_name(project.adapter, self.base_materialized_view)}" - ) - - @pytest.fixture(scope="function") - def configuration_change_continue_message(self, project): - return ( - f"Configuration changes were identified and `on_configuration_change` " - f"was set to `continue` for `{relation_from_name(project.adapter, self.base_materialized_view)}`" - ) - - @pytest.fixture(scope="function") - def configuration_change_fail_message(self, project): - return ( - f"Configuration changes were identified and `on_configuration_change` " - f"was set to `fail` for `{relation_from_name(project.adapter, self.base_materialized_view)}`" - ) diff --git a/tests/functional/materializations/materialized_view_tests/conftest.py b/tests/functional/materializations/materialized_view_tests/conftest.py new file mode 100644 index 00000000000..46916652c62 --- /dev/null +++ b/tests/functional/materializations/materialized_view_tests/conftest.py @@ -0,0 +1,66 @@ +import pytest + +from dbt.adapters.relation.models import RelationRef +from dbt.adapters.relation.factory import RelationFactory +from dbt.contracts.relation import RelationType + +from dbt.adapters.postgres.relation import models as relation_models + + +@pytest.fixture(scope="class") +def relation_factory(): + return RelationFactory( + relation_types=RelationType, + relation_models={ + RelationType.MaterializedView: relation_models.PostgresMaterializedViewRelation, + }, + relation_changesets={ + RelationType.MaterializedView: relation_models.PostgresMaterializedViewRelationChangeset, + }, + relation_can_be_renamed={ + RelationType.MaterializedView, + RelationType.Table, + RelationType.View, + }, + render_policy=relation_models.PostgresRenderPolicy, + ) + + +@pytest.fixture(scope="class") +def my_materialized_view(project, relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_materialized_view", + schema_name=project.test_schema, + database_name=project.database, + relation_type=RelationType.MaterializedView, + ) + + +@pytest.fixture(scope="class") +def my_view(project, relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_view", + schema_name=project.test_schema, + database_name=project.database, + relation_type=RelationType.View, + ) + + +@pytest.fixture(scope="class") +def my_table(project, relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_table", + schema_name=project.test_schema, + database_name=project.database, + relation_type=RelationType.Table, + ) + + +@pytest.fixture(scope="class") +def my_seed(project, relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_seed", + schema_name=project.test_schema, + database_name=project.database, + relation_type=RelationType.Table, + ) diff --git a/tests/functional/materializations/materialized_view_tests/files.py b/tests/functional/materializations/materialized_view_tests/files.py new file mode 100644 index 00000000000..9bf881ef970 --- /dev/null +++ b/tests/functional/materializations/materialized_view_tests/files.py @@ -0,0 +1,31 @@ +MY_SEED = """ +id,value +1,100 +2,200 +3,300 +""".strip() + + +MY_TABLE = """ +{{ config( + materialized='table', +) }} +select * from {{ ref('my_seed') }} +""" + + +MY_VIEW = """ +{{ config( + materialized='view', +) }} +select * from {{ ref('my_seed') }} +""" + + +MY_MATERIALIZED_VIEW = """ +{{ config( + materialized='materialized_view', + indexes=[{'columns': ['id']}], +) }} +select * from {{ ref('my_seed') }} +""" diff --git a/tests/functional/materializations/materialized_view_tests/fixtures.py b/tests/functional/materializations/materialized_view_tests/fixtures.py deleted file mode 100644 index 0250152376f..00000000000 --- a/tests/functional/materializations/materialized_view_tests/fixtures.py +++ /dev/null @@ -1,67 +0,0 @@ -import pytest - -from dbt.tests.util import relation_from_name -from tests.adapter.dbt.tests.adapter.materialized_view.base import Base -from tests.adapter.dbt.tests.adapter.materialized_view.on_configuration_change import ( - OnConfigurationChangeBase, - get_model_file, - set_model_file, -) - - -class PostgresBasicBase(Base): - @pytest.fixture(scope="class") - def models(self): - base_table = """ - {{ config(materialized='table') }} - select 1 as base_column - """ - base_materialized_view = """ - {{ config(materialized='materialized_view') }} - select * from {{ ref('base_table') }} - """ - return {"base_table.sql": base_table, "base_materialized_view.sql": base_materialized_view} - - -class PostgresOnConfigurationChangeBase(OnConfigurationChangeBase): - @pytest.fixture(scope="class") - def models(self): - base_table = """ - {{ config( - materialized='table', - indexes=[{'columns': ['id', 'value']}] - ) }} - select - 1 as id, - 100 as value, - 42 as new_id, - 4242 as new_value - """ - base_materialized_view = """ - {{ config( - materialized='materialized_view', - indexes=[{'columns': ['id', 'value']}] - ) }} - select * from {{ ref('base_table') }} - """ - return {"base_table.sql": base_table, "base_materialized_view.sql": base_materialized_view} - - @pytest.fixture(scope="function") - def configuration_changes(self, project): - initial_model = get_model_file(project, "base_materialized_view") - - # change the index from [`id`, `value`] to [`new_id`, `new_value`] - new_model = initial_model.replace( - "indexes=[{'columns': ['id', 'value']}]", - "indexes=[{'columns': ['new_id', 'new_value']}]", - ) - set_model_file(project, "base_materialized_view", new_model) - - yield - - # set this back for the next test - set_model_file(project, "base_materialized_view", initial_model) - - @pytest.fixture(scope="function") - def update_index_message(self, project): - return f"Applying UPDATE INDEXES to: {relation_from_name(project.adapter, 'base_materialized_view')}" diff --git a/tests/functional/materializations/materialized_view_tests/test_materialized_view.py b/tests/functional/materializations/materialized_view_tests/test_materialized_view.py index 733329b42ff..b4dcf19d399 100644 --- a/tests/functional/materializations/materialized_view_tests/test_materialized_view.py +++ b/tests/functional/materializations/materialized_view_tests/test_materialized_view.py @@ -1,197 +1,312 @@ import pytest + from dbt.contracts.graph.model_config import OnConfigurationChangeOption -from dbt.contracts.results import RunStatus -from dbt.contracts.relation import RelationType -from tests.adapter.dbt.tests.adapter.materialized_view.base import ( - run_model, - assert_model_exists_and_is_correct_type, - insert_record, - get_row_count, +from dbt.tests.util import ( + assert_message_in_logs, + get_model_file, + run_dbt, + run_dbt_and_capture, + set_model_file, ) -from tests.adapter.dbt.tests.adapter.materialized_view.on_configuration_change import ( - assert_proper_scenario, +from tests.functional.materializations.materialized_view_tests.files import ( + MY_SEED, + MY_TABLE, + MY_MATERIALIZED_VIEW, + MY_VIEW, ) - -from tests.functional.materializations.materialized_view_tests.fixtures import ( - PostgresOnConfigurationChangeBase, - PostgresBasicBase, +from tests.functional.materializations.materialized_view_tests.utils import ( + query_indexes, + query_relation_type, + query_row_count, + swap_indexes, ) -class TestBasic(PostgresBasicBase): - def test_relation_is_materialized_view_on_initial_creation(self, project): - assert_model_exists_and_is_correct_type( - project, "base_materialized_view", RelationType.MaterializedView - ) - assert_model_exists_and_is_correct_type(project, "base_table", RelationType.Table) +@pytest.fixture(scope="class", autouse=True) +def seeds(): + return {"my_seed.csv": MY_SEED} - def test_relation_is_materialized_view_when_rerun(self, project): - run_model("base_materialized_view") - assert_model_exists_and_is_correct_type( - project, "base_materialized_view", RelationType.MaterializedView - ) - def test_relation_is_materialized_view_on_full_refresh(self, project): - run_model("base_materialized_view", full_refresh=True) - assert_model_exists_and_is_correct_type( - project, "base_materialized_view", RelationType.MaterializedView - ) +@pytest.fixture(scope="class", autouse=True) +def models(): + yield { + "my_table.sql": MY_TABLE, + "my_view.sql": MY_VIEW, + "my_materialized_view.sql": MY_MATERIALIZED_VIEW, + } - def test_relation_is_materialized_view_on_update(self, project): - run_model("base_materialized_view", run_args=["--vars", "quoting: {identifier: True}"]) - assert_model_exists_and_is_correct_type( - project, "base_materialized_view", RelationType.MaterializedView - ) - def test_updated_base_table_data_only_shows_in_materialized_view_after_rerun(self, project): - # poll database - table_start = get_row_count(project, "base_table") - view_start = get_row_count(project, "base_materialized_view") +@pytest.fixture(scope="class", autouse=True) +def setup(project): + run_dbt(["seed"]) + yield - # insert new record in table - new_record = (2,) - insert_record(project, new_record, "base_table", ["base_column"]) - # poll database - table_mid = get_row_count(project, "base_table") - view_mid = get_row_count(project, "base_materialized_view") +def test_materialized_view_create(project, my_materialized_view): + assert query_relation_type(project, my_materialized_view) is None + run_dbt(["run", "--models", my_materialized_view.name]) + assert query_relation_type(project, my_materialized_view) == "materialized_view" - # refresh the materialized view - run_model("base_materialized_view") - # poll database - table_end = get_row_count(project, "base_table") - view_end = get_row_count(project, "base_materialized_view") +def test_materialized_view_create_idempotent(project, my_materialized_view): + assert query_relation_type(project, my_materialized_view) is None + run_dbt(["run", "--models", my_materialized_view.name]) + assert query_relation_type(project, my_materialized_view) == "materialized_view" + run_dbt(["run", "--models", my_materialized_view.name]) + assert query_relation_type(project, my_materialized_view) == "materialized_view" - # new records were inserted in the table but didn't show up in the view until it was refreshed - assert table_start < table_mid == table_end - assert view_start == view_mid < view_end +def test_materialized_view_full_refresh(project, my_materialized_view): + run_dbt(["run", "--models", my_materialized_view.name]) + _, logs = run_dbt_and_capture( + ["--debug", "run", "--models", my_materialized_view.name, "--full-refresh"] + ) + assert query_relation_type(project, my_materialized_view) == "materialized_view" + assert_message_in_logs( + f"Applying REPLACE to: {my_materialized_view.fully_qualified_path}", logs + ) -class TestOnConfigurationChangeApply(PostgresOnConfigurationChangeBase): - # we don't need to specify OnConfigurationChangeOption.Apply because it's the default - # this is part of the test - def test_full_refresh_takes_precedence_over_any_configuration_changes( - self, configuration_changes, replace_message, configuration_change_message - ): - results, logs = run_model("base_materialized_view", full_refresh=True) - assert_proper_scenario( - OnConfigurationChangeOption.Apply, - results, - logs, - RunStatus.Success, - messages_in_logs=[replace_message], - messages_not_in_logs=[configuration_change_message], - ) - - def test_model_is_refreshed_with_no_configuration_changes( - self, refresh_message, configuration_change_message - ): - results, logs = run_model("base_materialized_view") - assert_proper_scenario( - OnConfigurationChangeOption.Apply, - results, - logs, - RunStatus.Success, - messages_in_logs=[refresh_message, configuration_change_message], - ) +def test_materialized_view_replaces_table(project, my_materialized_view, my_table): + run_dbt(["run", "--models", my_table.name]) + project.run_sql( + f""" + alter table {my_table.fully_qualified_path} + rename to {my_materialized_view.name} + """ + ) + assert query_relation_type(project, my_materialized_view) == "table" + run_dbt(["run", "--models", my_materialized_view.name]) + assert query_relation_type(project, my_materialized_view) == "materialized_view" - def test_model_applies_changes_with_configuration_changes( - self, configuration_changes, alter_message, update_index_message - ): - results, logs = run_model("base_materialized_view") - assert_proper_scenario( - OnConfigurationChangeOption.Apply, - results, - logs, - RunStatus.Success, - messages_in_logs=[alter_message, update_index_message], + +def test_materialized_view_replaces_view(project, my_materialized_view, my_view): + run_dbt(["run", "--models", my_view.name]) + project.run_sql( + f""" + alter view {my_view.fully_qualified_path} + rename to {my_materialized_view.name} + """ + ) + assert query_relation_type(project, my_materialized_view) == "view" + run_dbt(["run", "--models", my_materialized_view.name]) + assert query_relation_type(project, my_materialized_view) == "materialized_view" + + +def test_view_replaces_materialized_table(project, my_materialized_view, my_table): + run_dbt(["run", "--models", my_materialized_view.name]) + project.run_sql( + f""" + alter materialized view {my_materialized_view.fully_qualified_path} + rename to {my_table.name} + """ + ) + assert query_relation_type(project, my_table) == "materialized_view" + run_dbt(["run", "--models", my_table.name]) + assert query_relation_type(project, my_table) == "table" + + +def test_view_replaces_materialized_view(project, my_materialized_view, my_view): + run_dbt(["run", "--models", my_materialized_view.name]) + project.run_sql( + f""" + alter materialized view {my_materialized_view.fully_qualified_path} + rename to {my_view.name} + """ + ) + assert query_relation_type(project, my_view) == "materialized_view" + run_dbt(["run", "--models", my_view.name]) + assert query_relation_type(project, my_view) == "view" + + +def test_materialized_view_only_updates_after_refresh(project, my_materialized_view, my_seed): + run_dbt(["run", "--models", my_materialized_view.name]) + + # poll database + table_start = query_row_count(project, my_seed) + view_start = query_row_count(project, my_materialized_view) + + # insert new record in table + project.run_sql(f"insert into {my_seed.fully_qualified_path} (id, value) values (4, 400);") + + # poll database + table_mid = query_row_count(project, my_seed) + view_mid = query_row_count(project, my_materialized_view) + + # refresh the materialized view + project.run_sql(f"refresh materialized view {my_materialized_view.fully_qualified_path};") + + # poll database + table_end = query_row_count(project, my_seed) + view_end = query_row_count(project, my_materialized_view) + + # new records were inserted in the table but didn't show up in the view until it was refreshed + assert table_start < table_mid == table_end + assert view_start == view_mid < view_end + + +class OnConfigurationChangeBase: + @pytest.fixture(scope="class", autouse=True) + def models(self): + yield { + "my_table.sql": MY_TABLE, + "my_view.sql": MY_VIEW, + "my_materialized_view.sql": MY_MATERIALIZED_VIEW, + } + + @pytest.fixture(scope="function", autouse=True) + def setup(self, project, my_materialized_view): + run_dbt(["seed"]) + run_dbt(["run", "--models", my_materialized_view.name, "--full-refresh"]) + + # the tests touch these files, store their contents in memory + initial_model = get_model_file(project, my_materialized_view) + + yield + + # and then reset them after the test runs + set_model_file(project, my_materialized_view, initial_model) + + +class TestOnConfigurationChangeApply(OnConfigurationChangeBase): + @pytest.fixture(scope="class") + def project_config_update(self): + return {"models": {"on_configuration_change": OnConfigurationChangeOption.Apply.value}} + + def test_index_updates_are_applied_with_alter(self, project, my_materialized_view): + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "id" + + swap_indexes(project, my_materialized_view) + _, logs = run_dbt_and_capture(["--debug", "run", "--models", my_materialized_view.name]) + + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "value" # this changed + + assert_message_in_logs( + f"Applying ALTER to: {my_materialized_view.fully_qualified_path}", logs + ) + assert_message_in_logs( + f"Applying ALTER INDEXES to: {my_materialized_view.fully_qualified_path}", logs + ) + assert_message_in_logs( + f"Applying REPLACE to: {my_materialized_view.fully_qualified_path}", logs, False ) -class TestOnConfigurationChangeContinue(PostgresOnConfigurationChangeBase): +class TestOnConfigurationChangeContinue(OnConfigurationChangeBase): @pytest.fixture(scope="class") def project_config_update(self): return {"models": {"on_configuration_change": OnConfigurationChangeOption.Continue.value}} - def test_full_refresh_takes_precedence_over_any_configuration_changes( - self, configuration_changes, replace_message, configuration_change_message - ): - results, logs = run_model("base_materialized_view", full_refresh=True) - assert_proper_scenario( - OnConfigurationChangeOption.Continue, - results, - logs, - RunStatus.Success, - messages_in_logs=[replace_message], - messages_not_in_logs=[configuration_change_message], - ) - - def test_model_is_refreshed_with_no_configuration_changes( - self, refresh_message, configuration_change_message - ): - results, logs = run_model("base_materialized_view") - assert_proper_scenario( - OnConfigurationChangeOption.Continue, - results, + def test_index_updates_are_not_applied(self, project, my_materialized_view): + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "id" + + swap_indexes(project, my_materialized_view) + _, logs = run_dbt_and_capture(["--debug", "run", "--models", my_materialized_view.name]) + + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "id" # this did not change + + assert_message_in_logs( + f"Configuration changes were identified and `on_configuration_change` was set" + f" to `continue` for `{my_materialized_view.fully_qualified_path}`", logs, - RunStatus.Success, - messages_in_logs=[refresh_message, configuration_change_message], + ) + assert_message_in_logs( + f"Applying ALTER to: {my_materialized_view.fully_qualified_path}", logs, False + ) + assert_message_in_logs( + f"Applying UPDATE INDEXES to: {my_materialized_view.fully_qualified_path}", logs, False + ) + assert_message_in_logs( + f"Applying REPLACE to: {my_materialized_view.fully_qualified_path}", logs, False ) - def test_model_is_not_refreshed_with_configuration_changes( - self, configuration_changes, configuration_change_continue_message, refresh_message - ): - results, logs = run_model("base_materialized_view") - assert_proper_scenario( - OnConfigurationChangeOption.Continue, - results, - logs, - RunStatus.Success, - messages_in_logs=[configuration_change_continue_message], - messages_not_in_logs=[refresh_message], + def test_index_updates_are_applied_on_full_refresh(self, project, my_materialized_view): + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "id" + + swap_indexes(project, my_materialized_view) + _, logs = run_dbt_and_capture( + ["--debug", "run", "--models", my_materialized_view.name, "--full-refresh"] + ) + + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "value" # this changed despite `continue` + + assert_message_in_logs( + f"Applying ALTER to: {my_materialized_view.fully_qualified_path}", logs, False + ) + assert_message_in_logs( + f"Applying UPDATE INDEXES to: {my_materialized_view.fully_qualified_path}", logs, False + ) + assert_message_in_logs( + f"Applying REPLACE to: {my_materialized_view.fully_qualified_path}", logs, True ) -class TestOnConfigurationChangeFail(PostgresOnConfigurationChangeBase): +class TestOnConfigurationChangeFail(OnConfigurationChangeBase): @pytest.fixture(scope="class") def project_config_update(self): return {"models": {"on_configuration_change": OnConfigurationChangeOption.Fail.value}} - def test_full_refresh_takes_precedence_over_any_configuration_changes( - self, configuration_changes, replace_message, configuration_change_message - ): - results, logs = run_model("base_materialized_view", full_refresh=True) - assert_proper_scenario( - OnConfigurationChangeOption.Fail, - results, - logs, - RunStatus.Success, - messages_in_logs=[replace_message], - messages_not_in_logs=[configuration_change_message], - ) - - def test_model_is_refreshed_with_no_configuration_changes( - self, refresh_message, configuration_change_message - ): - results, logs = run_model("base_materialized_view") - assert_proper_scenario( - OnConfigurationChangeOption.Fail, - results, - logs, - RunStatus.Success, - messages_in_logs=[refresh_message, configuration_change_message], + def test_index_updates_are_not_applied(self, project, my_materialized_view): + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "id" + + swap_indexes(project, my_materialized_view) + _, logs = run_dbt_and_capture( + ["--debug", "run", "--models", my_materialized_view.name], expect_pass=False ) - def test_run_fails_with_configuration_changes( - self, configuration_changes, configuration_change_fail_message - ): - results, logs = run_model("base_materialized_view", expect_pass=False) - assert_proper_scenario( - OnConfigurationChangeOption.Fail, - results, + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "id" # this did not change + + assert_message_in_logs( + f"Configuration changes were identified and `on_configuration_change` was set" + f" to `fail` for `{my_materialized_view.fully_qualified_path}`", logs, - RunStatus.Error, - messages_in_logs=[configuration_change_fail_message], + ) + assert_message_in_logs( + f"Applying ALTER to: {my_materialized_view.fully_qualified_path}", logs, False + ) + assert_message_in_logs( + f"Applying UPDATE INDEXES to: {my_materialized_view.fully_qualified_path}", logs, False + ) + assert_message_in_logs( + f"Applying REPLACE to: {my_materialized_view.fully_qualified_path}", logs, False + ) + + def test_index_updates_are_applied_on_full_refresh(self, project, my_materialized_view): + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "id" + + swap_indexes(project, my_materialized_view) + _, logs = run_dbt_and_capture( + ["--debug", "run", "--models", my_materialized_view.name, "--full-refresh"] + ) + + indexes = query_indexes(project, my_materialized_view) + assert len(indexes) == 1 + assert indexes[0]["column_names"] == "value" # this changed despite `fail` + + assert_message_in_logs( + f"Applying ALTER to: {my_materialized_view.fully_qualified_path}", logs, False + ) + assert_message_in_logs( + f"Applying UPDATE INDEXES to: {my_materialized_view.fully_qualified_path}", logs, False + ) + assert_message_in_logs( + f"Applying REPLACE to: {my_materialized_view.fully_qualified_path}", logs, True ) diff --git a/tests/functional/materializations/materialized_view_tests/utils.py b/tests/functional/materializations/materialized_view_tests/utils.py new file mode 100644 index 00000000000..373c57ed7ae --- /dev/null +++ b/tests/functional/materializations/materialized_view_tests/utils.py @@ -0,0 +1,81 @@ +from typing import Dict, List, Optional + +from dbt.adapters.relation.models import Relation +from dbt.tests.util import get_model_file, set_model_file + + +def swap_indexes(project, my_materialized_view): + initial_model = get_model_file(project, my_materialized_view) + new_model = initial_model.replace( + "indexes=[{'columns': ['id']}]", + "indexes=[{'columns': ['value']}]", + ) + set_model_file(project, my_materialized_view, new_model) + + +def query_relation_type(project, relation: Relation) -> Optional[str]: + sql = f""" + select 'table' as relation_type + from pg_tables + where schemaname = '{relation.schema_name}' + and tablename = '{relation.name}' + union all + select 'view' as relation_type + from pg_views + where schemaname = '{relation.schema_name}' + and viewname = '{relation.name}' + union all + select 'materialized_view' as relation_type + from pg_matviews + where schemaname = '{relation.schema_name}' + and matviewname = '{relation.name}' + """ + results = project.run_sql(sql, fetch="all") + if len(results) == 0: + return None + elif len(results) > 1: + raise ValueError(f"More than one instance of {relation.name} found!") + else: + return results[0][0] + + +def query_row_count(project, relation: Relation) -> int: + sql = f"select count(*) from {relation.fully_qualified_path};" + return project.run_sql(sql, fetch="one")[0] + + +def query_indexes(project, relation: Relation) -> List[Dict[str, str]]: + # pulled directly from `postgres__describe_indexes_template` and manually verified + sql = f""" + select + i.relname as name, + m.amname as method, + ix.indisunique as "unique", + array_to_string(array_agg(a.attname), ',') as column_names + from pg_index ix + join pg_class i + on i.oid = ix.indexrelid + join pg_am m + on m.oid=i.relam + join pg_class t + on t.oid = ix.indrelid + join pg_namespace n + on n.oid = t.relnamespace + join pg_attribute a + on a.attrelid = t.oid + and a.attnum = ANY(ix.indkey) + where t.relname ilike '{ relation.name }' + and n.nspname ilike '{ relation.schema_name }' + and t.relkind in ('r', 'm') + group by 1, 2, 3 + order by 1, 2, 3 + """ + raw_indexes = project.run_sql(sql, fetch="all") + indexes = [ + { + header: value + for header, value in zip(["name", "method", "unique", "column_names"], index) + } + for index in raw_indexes + ] + return indexes diff --git a/tests/unit/dbt_postgres_tests/conftest.py b/tests/unit/dbt_postgres_tests/conftest.py new file mode 100644 index 00000000000..8c4c48ae26f --- /dev/null +++ b/tests/unit/dbt_postgres_tests/conftest.py @@ -0,0 +1,174 @@ +from typing import Dict + +import agate +import pytest + +from dbt.adapters.materialization.factory import MaterializationFactory +from dbt.adapters.materialization.models import ( + MaterializationType, + MaterializedViewMaterialization, +) +from dbt.adapters.relation.factory import RelationFactory +from dbt.adapters.relation.models import Relation, RelationRef +from dbt.contracts.files import FileHash +from dbt.contracts.graph.nodes import CompiledNode, DependsOn, NodeConfig +from dbt.contracts.relation import RelationType +from dbt.node_types import NodeType + +from dbt.adapters.postgres.relation import models + + +@pytest.fixture +def relation_factory() -> RelationFactory: + return RelationFactory( + relation_types=RelationType, + relation_models={ + RelationType.MaterializedView: models.PostgresMaterializedViewRelation, + }, + relation_changesets={ + RelationType.MaterializedView: models.PostgresMaterializedViewRelationChangeset, + }, + relation_can_be_renamed={ + RelationType.MaterializedView, + RelationType.Table, + RelationType.View, + }, + render_policy=models.PostgresRenderPolicy, + ) + + +@pytest.fixture +def materialization_factory(relation_factory) -> MaterializationFactory: + return MaterializationFactory( + relation_factory=relation_factory, + materialization_map={ + MaterializationType.MaterializedView: MaterializedViewMaterialization, + }, + ) + + +@pytest.fixture +def materialized_view_ref(relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_materialized_view", + schema_name="my_schema", + database_name="my_database", + relation_type=RelationType.MaterializedView, + ) + + +@pytest.fixture +def view_ref(relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_view", + schema_name="my_schema", + database_name="my_database", + relation_type=RelationType.View, + ) + + +@pytest.fixture +def materialized_view_compiled_node() -> CompiledNode: + return CompiledNode( + alias="my_materialized_view", + name="my_materialized_view", + database="my_database", + schema="my_schema", + resource_type=NodeType.Model, + unique_id="model.root.my_materialized_view", + fqn=["root", "my_materialized_view"], + package_name="root", + original_file_path="my_materialized_view.sql", + refs=[], + sources=[], + depends_on=DependsOn(), + config=NodeConfig.from_dict( + { + "enabled": True, + "materialized": "materialized_view", + "persist_docs": {}, + "post-hook": [], + "pre-hook": [], + "vars": {}, + "quoting": {}, + "column_types": {}, + "tags": [], + "indexes": [ + {"columns": ["id", "value"], "type": "hash"}, + {"columns": ["id"], "unique": True}, + ], + # "full_refresh": False, -- purposely excluding to test default + "on_configuration_change": "continue", + } + ), + tags=[], + path="my_materialized_view.sql", + language="sql", + raw_code="select 42 from meaning_of_life", + compiled_code="select 42 from meaning_of_life", + description="", + columns={}, + checksum=FileHash.from_contents(""), + ) + + +@pytest.fixture +def materialized_view_describe_relation_results() -> Dict[str, agate.Table]: + materialized_view_agate = agate.Table.from_object( + [ + { + "name": "my_materialized_view", + "schema_name": "my_schema", + "database_name": "my_database", + "query": "select 42 from meaning_of_life", + } + ] + ) + indexes_agate = agate.Table.from_object( + [ + {"name": "index_1", "column_names": "id,value", "method": "hash", "unique": None}, + {"name": "index_2", "column_names": "id", "method": None, "unique": True}, + ] + ) + return {"relation": materialized_view_agate, "indexes": indexes_agate} + + +@pytest.fixture +def materialized_view_relation( + relation_factory, materialized_view_describe_relation_results +) -> Relation: + return relation_factory.make_from_describe_relation_results( + materialized_view_describe_relation_results, RelationType.MaterializedView + ) + + +""" +Make sure the fixtures at least work, more thorough testing is done elsewhere +""" + + +def test_relation_factory(relation_factory): + assert ( + relation_factory._get_relation_model(RelationType.MaterializedView) + == models.PostgresMaterializedViewRelation + ) + + +def test_materialization_factory(materialization_factory): + relation_model = materialization_factory.relation_factory._get_relation_model( + RelationType.MaterializedView + ) + assert relation_model == models.PostgresMaterializedViewRelation + + +def test_materialized_view_ref(materialized_view_ref): + assert materialized_view_ref.name == "my_materialized_view" + + +def test_materialized_view_model_node(materialized_view_compiled_node): + assert materialized_view_compiled_node.name == "my_materialized_view" + + +def test_materialized_view_relation(materialized_view_relation): + assert materialized_view_relation.type == RelationType.MaterializedView + assert materialized_view_relation.name == "my_materialized_view" diff --git a/tests/unit/dbt_postgres_tests/materialization_tests/test_materialization_factory_postgres.py b/tests/unit/dbt_postgres_tests/materialization_tests/test_materialization_factory_postgres.py new file mode 100644 index 00000000000..e94039953ed --- /dev/null +++ b/tests/unit/dbt_postgres_tests/materialization_tests/test_materialization_factory_postgres.py @@ -0,0 +1,36 @@ +from dbt.adapters.materialization.models import MaterializationType +from dbt.contracts.relation import RelationType + +from dbt.adapters.postgres.relation import models as relation_models + + +def test_make_from_node(materialization_factory, materialized_view_compiled_node): + materialization = materialization_factory.make_from_node( + node=materialized_view_compiled_node, + existing_relation_ref=None, + ) + assert materialization.type == MaterializationType.MaterializedView + + materialized_view = materialization.target_relation + assert materialized_view.type == RelationType.MaterializedView + assert isinstance(materialized_view, relation_models.PostgresMaterializedViewRelation) + + assert materialized_view.name == "my_materialized_view" + assert materialized_view.schema_name == "my_schema" + assert materialized_view.database_name == "my_database" + assert materialized_view.query == "select 42 from meaning_of_life" + + index_1 = relation_models.PostgresIndexRelation( + column_names=frozenset({"id", "value"}), + method=relation_models.PostgresIndexMethod.hash, + unique=False, + render=relation_models.PostgresRenderPolicy, + ) + index_2 = relation_models.PostgresIndexRelation( + column_names=frozenset({"id"}), + method=relation_models.PostgresIndexMethod.btree, + unique=True, + render=relation_models.PostgresRenderPolicy, + ) + assert index_1 in materialized_view.indexes + assert index_2 in materialized_view.indexes diff --git a/tests/unit/dbt_postgres_tests/materialization_tests/test_materialization_postgres.py b/tests/unit/dbt_postgres_tests/materialization_tests/test_materialization_postgres.py new file mode 100644 index 00000000000..10e81d46fab --- /dev/null +++ b/tests/unit/dbt_postgres_tests/materialization_tests/test_materialization_postgres.py @@ -0,0 +1,35 @@ +from dataclasses import replace + +from dbt.adapters.materialization.models import ( + MaterializedViewMaterialization, + MaterializationBuildStrategy, +) + + +def test_materialized_view_create(materialized_view_compiled_node, relation_factory): + materialization = MaterializedViewMaterialization.from_node( + materialized_view_compiled_node, relation_factory + ) + assert materialization.build_strategy == MaterializationBuildStrategy.Create + assert materialization.should_revoke_grants is False + + +def test_materialized_view_replace(materialized_view_compiled_node, relation_factory, view_ref): + + materialization = MaterializedViewMaterialization.from_node( + materialized_view_compiled_node, relation_factory, view_ref + ) + assert materialization.build_strategy == MaterializationBuildStrategy.Replace + assert materialization.should_revoke_grants is True + + +def test_materialized_view_alter( + materialized_view_compiled_node, relation_factory, materialized_view_relation +): + altered_materialized_view = replace(materialized_view_relation, indexes={}) + + materialization = MaterializedViewMaterialization.from_node( + materialized_view_compiled_node, relation_factory, altered_materialized_view + ) + assert materialization.build_strategy == MaterializationBuildStrategy.Alter + assert materialization.should_revoke_grants is True diff --git a/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_database_postgres.py b/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_database_postgres.py new file mode 100644 index 00000000000..ce4792b2f31 --- /dev/null +++ b/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_database_postgres.py @@ -0,0 +1,24 @@ +from typing import Type + +import pytest +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models import PostgresDatabaseRelation + + +@pytest.mark.parametrize( + "config_dict,exception", + [ + ({"name": "my_database"}, None), + ({"name": ""}, DbtRuntimeError), + ({"wrong_name": "my_database"}, DbtRuntimeError), + ({}, DbtRuntimeError), + ], +) +def test_make_database(config_dict: dict, exception: Type[Exception]): + if exception: + with pytest.raises(exception): + PostgresDatabaseRelation.from_dict(config_dict) + else: + my_database = PostgresDatabaseRelation.from_dict(config_dict) + assert my_database.name == config_dict.get("name") diff --git a/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_index_postgres.py b/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_index_postgres.py new file mode 100644 index 00000000000..8885f3301a1 --- /dev/null +++ b/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_index_postgres.py @@ -0,0 +1,24 @@ +from typing import Type + +import pytest +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models import PostgresIndexRelation + + +@pytest.mark.parametrize( + "config_dict,exception", + [ + ({"column_names": frozenset({"id", "value"}), "method": "hash", "unique": False}, None), + ({"column_names": frozenset("id"), "method": "btree", "unique": True}, None), + ({}, DbtRuntimeError), + ({"method": "btree", "unique": True}, DbtRuntimeError), + ], +) +def test_create_index(config_dict: dict, exception: Type[Exception]): + if exception: + with pytest.raises(exception): + PostgresIndexRelation.from_dict(config_dict) + else: + my_index = PostgresIndexRelation.from_dict(config_dict) + assert my_index.column_names == config_dict.get("column_names") diff --git a/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_materialized_view_postgres.py b/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_materialized_view_postgres.py new file mode 100644 index 00000000000..4d24889d498 --- /dev/null +++ b/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_materialized_view_postgres.py @@ -0,0 +1,112 @@ +from dataclasses import replace +from typing import Type + +import pytest + +from dbt.adapters.relation.models import RelationChangeAction +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models import ( + PostgresIndexRelation, + PostgresIndexRelationChange, + PostgresMaterializedViewRelation, + PostgresMaterializedViewRelationChangeset, +) + + +@pytest.mark.parametrize( + "config_dict,exception", + [ + ( + { + "name": "my_materialized_view", + "schema": { + "name": "my_schema", + "database": {"name": "my_database"}, + }, + "query": "select 1 from my_favoriate_table", + }, + None, + ), + ( + { + "name": "my_indexed_materialized_view", + "schema": { + "name": "my_schema", + "database": {"name": "my_database"}, + }, + "query": "select 42 from meaning_of_life", + "indexes": [ + { + "column_names": frozenset({"id", "value"}), + "method": "hash", + "unique": False, + }, + {"column_names": frozenset({"id"}), "method": "btree", "unique": True}, + ], + }, + None, + ), + ( + { + "my_name": "my_materialized_view", + "schema": { + "name": "my_schema", + "database": {"name": "my_database"}, + }, + }, + DbtRuntimeError, + ), + ], +) +def test_create_materialized_view(config_dict: dict, exception: Type[Exception]): + if exception: + with pytest.raises(exception): + PostgresMaterializedViewRelation.from_dict(config_dict) + else: + my_materialized_view = PostgresMaterializedViewRelation.from_dict(config_dict) + assert my_materialized_view.name == config_dict.get("name") + assert my_materialized_view.schema_name == config_dict.get("schema").get("name") + assert my_materialized_view.database_name == config_dict.get("schema").get("database").get( + "name" + ) + assert my_materialized_view.query == config_dict.get("query") + if indexes := config_dict.get("indexes"): + parsed = {(index.method, index.unique) for index in my_materialized_view.indexes} + raw = {(index.get("method"), index.get("unique")) for index in indexes} + assert parsed == raw + + +def test_create_materialized_view_changeset(materialized_view_relation): + existing_materialized_view = replace(materialized_view_relation) + + # pulled from `./dbt_postgres_tests/conftest.py` + original_index_1 = PostgresIndexRelation.from_dict( + { + "name": "index_1", + "column_names": frozenset({"id", "value"}), + "method": "hash", + "unique": False, + } + ) + original_index_2 = PostgresIndexRelation.from_dict( + {"name": "index_2", "column_names": frozenset({"id"}), "method": "btree", "unique": True}, + ) + + new_index = PostgresIndexRelation.from_dict( + {"column_names": frozenset({"id", "value"}), "method": "btree", "unique": False} + ) + + target_materialized_view = replace( + existing_materialized_view, indexes=frozenset({new_index, original_index_2}) + ) + + changeset = PostgresMaterializedViewRelationChangeset.from_relations( + existing_materialized_view, target_materialized_view + ) + assert changeset.is_empty is False + assert changeset.requires_full_refresh is False + assert changeset.indexes == { + PostgresIndexRelationChange(action=RelationChangeAction.drop, context=original_index_1), + PostgresIndexRelationChange(action=RelationChangeAction.create, context=new_index), + } diff --git a/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_schema_postgres.py b/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_schema_postgres.py new file mode 100644 index 00000000000..25864f9e3d5 --- /dev/null +++ b/tests/unit/dbt_postgres_tests/relation_tests/model_tests/test_schema_postgres.py @@ -0,0 +1,31 @@ +from typing import Type + +import pytest +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.postgres.relation.models import PostgresSchemaRelation + + +@pytest.mark.parametrize( + "config_dict,exception", + [ + ({"name": "my_schema", "database": {"name": "my_database"}}, None), + ({"name": "my_schema", "database": None}, DbtRuntimeError), + ({"name": "my_schema"}, DbtRuntimeError), + ({"name": "", "database": {"name": "my_database"}}, DbtRuntimeError), + ({"wrong_name": "my_database", "database": {"name": "my_database"}}, DbtRuntimeError), + ( + {"name": "my_schema", "database": {"name": "my_database"}, "meaning_of_life": 42}, + DbtRuntimeError, + ), + ({}, DbtRuntimeError), + ], +) +def test_make_schema(config_dict: dict, exception: Type[Exception]): + if exception: + with pytest.raises(exception): + PostgresSchemaRelation.from_dict(config_dict) + else: + my_schema = PostgresSchemaRelation.from_dict(config_dict) + assert my_schema.name == config_dict.get("name") + assert my_schema.database_name == config_dict.get("database").get("name") diff --git a/tests/unit/dbt_postgres_tests/relation_tests/test_relation_factory_postgres.py b/tests/unit/dbt_postgres_tests/relation_tests/test_relation_factory_postgres.py new file mode 100644 index 00000000000..1b78c94eb44 --- /dev/null +++ b/tests/unit/dbt_postgres_tests/relation_tests/test_relation_factory_postgres.py @@ -0,0 +1,81 @@ +""" +Uses the following fixtures in `unit/dbt_postgres_tests/conftest.py`: +- `relation_factory` +- `materialized_view_ref` +""" + +from dbt.contracts.relation import RelationType + +from dbt.adapters.postgres.relation import models + + +def test_make_ref(materialized_view_ref): + assert materialized_view_ref.name == "my_materialized_view" + assert materialized_view_ref.schema_name == "my_schema" + assert materialized_view_ref.database_name == "my_database" + assert materialized_view_ref.type == "materialized_view" + assert materialized_view_ref.can_be_renamed is True + + +def test_make_backup_ref(relation_factory, materialized_view_ref): + backup_ref = relation_factory.make_backup_ref(materialized_view_ref) + assert backup_ref.name == '"my_materialized_view__dbt_backup"' + + +def test_make_intermediate(relation_factory, materialized_view_ref): + intermediate_relation = relation_factory.make_intermediate(materialized_view_ref) + assert intermediate_relation.name == '"my_materialized_view__dbt_tmp"' + + +def test_make_from_describe_relation_results( + relation_factory, materialized_view_describe_relation_results +): + materialized_view = relation_factory.make_from_describe_relation_results( + materialized_view_describe_relation_results, RelationType.MaterializedView + ) + assert isinstance(materialized_view, models.PostgresMaterializedViewRelation) + + assert materialized_view.name == "my_materialized_view" + assert materialized_view.schema_name == "my_schema" + assert materialized_view.database_name == "my_database" + assert materialized_view.query == "select 42 from meaning_of_life" + + index_1 = models.PostgresIndexRelation( + column_names=frozenset({"id", "value"}), + method=models.PostgresIndexMethod.hash, + unique=False, + render=models.PostgresRenderPolicy, + ) + index_2 = models.PostgresIndexRelation( + column_names=frozenset({"id"}), + method=models.PostgresIndexMethod.btree, + unique=True, + render=models.PostgresRenderPolicy, + ) + assert index_1 in materialized_view.indexes + assert index_2 in materialized_view.indexes + + +def test_make_from_node(relation_factory, materialized_view_compiled_node): + materialized_view = relation_factory.make_from_node(materialized_view_compiled_node) + assert isinstance(materialized_view, models.PostgresMaterializedViewRelation) + + assert materialized_view.name == "my_materialized_view" + assert materialized_view.schema_name == "my_schema" + assert materialized_view.database_name == "my_database" + assert materialized_view.query == "select 42 from meaning_of_life" + + index_1 = models.PostgresIndexRelation( + column_names=frozenset({"id", "value"}), + method=models.PostgresIndexMethod.hash, + unique=False, + render=models.PostgresRenderPolicy, + ) + index_2 = models.PostgresIndexRelation( + column_names=frozenset({"id"}), + method=models.PostgresIndexMethod.btree, + unique=True, + render=models.PostgresRenderPolicy, + ) + assert index_1 in materialized_view.indexes + assert index_2 in materialized_view.indexes diff --git a/tests/unit/materialization_tests/conftest.py b/tests/unit/materialization_tests/conftest.py new file mode 100644 index 00000000000..b5d6b9a7edc --- /dev/null +++ b/tests/unit/materialization_tests/conftest.py @@ -0,0 +1,163 @@ +from typing import Dict + +import agate +import pytest + +from dbt.adapters.materialization import MaterializationFactory +from dbt.adapters.materialization.models import ( + MaterializationType, + MaterializedViewMaterialization, +) +from dbt.adapters.relation import RelationFactory +from dbt.adapters.relation.models import ( + MaterializedViewRelation, + MaterializedViewRelationChangeset, + Relation, + RelationRef, + RenderPolicy, +) +from dbt.contracts.files import FileHash +from dbt.contracts.graph.nodes import CompiledNode, DependsOn, NodeConfig +from dbt.contracts.relation import RelationType +from dbt.node_types import NodeType + + +@pytest.fixture +def relation_factory() -> RelationFactory: + return RelationFactory( + relation_models={ + RelationType.MaterializedView: MaterializedViewRelation, + }, + relation_changesets={ + RelationType.MaterializedView: MaterializedViewRelationChangeset, + }, + relation_can_be_renamed=set(), + render_policy=RenderPolicy(), + ) + + +@pytest.fixture +def materialization_factory(relation_factory) -> MaterializationFactory: + return MaterializationFactory( + relation_factory=relation_factory, + materialization_map={ + MaterializationType.MaterializedView: MaterializedViewMaterialization, + }, + ) + + +@pytest.fixture +def materialized_view_ref(relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_materialized_view", + schema_name="my_schema", + database_name="my_database", + relation_type=RelationType.MaterializedView, + ) + + +@pytest.fixture +def view_ref(relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_view", + schema_name="my_schema", + database_name="my_database", + relation_type=RelationType.View, + ) + + +@pytest.fixture +def materialized_view_compiled_node() -> CompiledNode: + return CompiledNode( + alias="my_materialized_view", + name="my_materialized_view", + database="my_database", + schema="my_schema", + unique_id="model.root.my_materialized_view", + resource_type=NodeType.Model, + fqn=["root", "my_materialized_view"], + package_name="root", + original_file_path="my_materialized_view.sql", + refs=[], + sources=[], + depends_on=DependsOn(), + config=NodeConfig.from_dict( + { + "enabled": True, + "materialized": "materialized_view", + "persist_docs": {}, + "post-hook": [], + "pre-hook": [], + "vars": {}, + "quoting": {}, + "column_types": {}, + "tags": [], + "full_refresh": True, + "on_configuration_change": "continue", + } + ), + tags=[], + path="my_materialized_view.sql", + language="sql", + raw_code="select 42 from meaning_of_life", + compiled_code="select 42 from meaning_of_life", + description="", + columns={}, + checksum=FileHash.from_contents(""), + ) + + +@pytest.fixture +def materialized_view_describe_relation_results() -> Dict[str, agate.Table]: + materialized_view_agate = agate.Table.from_object( + [ + { + "name": "my_materialized_view", + "schema_name": "my_schema", + "database_name": "my_database", + "query": "select 42 from meaning_of_life", + } + ] + ) + return {"relation": materialized_view_agate} + + +@pytest.fixture +def materialized_view_relation( + relation_factory, materialized_view_describe_relation_results +) -> Relation: + return relation_factory.make_from_describe_relation_results( + materialized_view_describe_relation_results, RelationType.MaterializedView + ) + + +""" +Make sure the fixtures at least work, more thorough testing is done elsewhere +""" + + +def test_relation_factory(relation_factory): + assert ( + relation_factory._get_relation_class(RelationType.MaterializedView) + == MaterializedViewRelation + ) + + +def test_materialization_factory(materialization_factory): + postgres_parser = materialization_factory.relation_factory._get_relation_model( + RelationType.MaterializedView + ) + assert postgres_parser == MaterializedViewRelation + + +def test_materialized_view_ref(materialized_view_ref): + assert materialized_view_ref.name == "my_materialized_view" + + +def test_materialized_view_model_node(materialized_view_compiled_node): + assert materialized_view_compiled_node.name == "my_materialized_view" + + +def test_materialized_view_relation(materialized_view_relation): + assert materialized_view_relation.type == RelationType.MaterializedView + assert materialized_view_relation.name == "my_materialized_view" diff --git a/tests/unit/materialization_tests/test_materialization.py b/tests/unit/materialization_tests/test_materialization.py new file mode 100644 index 00000000000..80821264855 --- /dev/null +++ b/tests/unit/materialization_tests/test_materialization.py @@ -0,0 +1,35 @@ +from dataclasses import replace + +from dbt.adapters.materialization.models import ( + MaterializedViewMaterialization, + MaterializationBuildStrategy, +) + + +def test_materialized_view_create(materialized_view_compiled_node, relation_factory): + materialization = MaterializedViewMaterialization.from_node( + materialized_view_compiled_node, relation_factory + ) + assert materialization.build_strategy == MaterializationBuildStrategy.Create + assert materialization.should_revoke_grants is False + + +def test_materialized_view_replace(materialized_view_compiled_node, relation_factory, view_ref): + materialization = MaterializedViewMaterialization.from_node( + materialized_view_compiled_node, relation_factory, view_ref + ) + assert materialization.build_strategy == MaterializationBuildStrategy.Replace + assert materialization.should_revoke_grants is True + + +def test_materialized_view_alter( + materialized_view_compiled_node, relation_factory, materialized_view_ref +): + # Alter is Replace for the built-in because there is no change monitoring + altered_materialized_view_ref = replace(materialized_view_ref) + + materialization = MaterializedViewMaterialization.from_node( + materialized_view_compiled_node, relation_factory, altered_materialized_view_ref + ) + assert materialization.build_strategy == MaterializationBuildStrategy.Replace + assert materialization.should_revoke_grants is True diff --git a/tests/unit/materialization_tests/test_materialization_factory.py b/tests/unit/materialization_tests/test_materialization_factory.py new file mode 100644 index 00000000000..4f073efa96e --- /dev/null +++ b/tests/unit/materialization_tests/test_materialization_factory.py @@ -0,0 +1,18 @@ +from dbt.adapters.materialization.models import MaterializationType +from dbt.contracts.relation import RelationType + + +def test_make_from_node(materialization_factory, materialized_view_compiled_node): + materialization = materialization_factory.make_from_node( + node=materialized_view_compiled_node, + existing_relation_ref=None, + ) + assert materialization.type == MaterializationType.MaterializedView + + materialized_view = materialization.target_relation + assert materialized_view.type == RelationType.MaterializedView + + assert materialized_view.name == "my_materialized_view" + assert materialized_view.schema_name == "my_schema" + assert materialized_view.database_name == "my_database" + assert materialized_view.query == "select 42 from meaning_of_life" diff --git a/tests/unit/relation_tests/conftest.py b/tests/unit/relation_tests/conftest.py new file mode 100644 index 00000000000..5e037e22e0b --- /dev/null +++ b/tests/unit/relation_tests/conftest.py @@ -0,0 +1,171 @@ +from typing import Dict + +import agate +import pytest + +from dbt.adapters.materialization import MaterializationFactory +from dbt.adapters.materialization.models import ( + MaterializationType, + MaterializedViewMaterialization, +) +from dbt.adapters.relation import RelationFactory +from dbt.adapters.relation.models import ( + MaterializedViewRelation, + MaterializedViewRelationChangeset, + Relation, + RelationRef, + RenderPolicy, +) +from dbt.contracts.files import FileHash +from dbt.contracts.graph.nodes import CompiledNode, DependsOn, NodeConfig +from dbt.contracts.relation import RelationType +from dbt.node_types import NodeType + + +@pytest.fixture +def relation_factory() -> RelationFactory: + return RelationFactory( + relation_types=RelationType, + relation_models={ + RelationType.MaterializedView: MaterializedViewRelation, + }, + relation_changesets={ + RelationType.MaterializedView: MaterializedViewRelationChangeset, + }, + relation_can_be_renamed=set(), + render_policy=RenderPolicy(), + ) + + +@pytest.fixture +def materialization_factory(relation_factory) -> MaterializationFactory: + return MaterializationFactory( + relation_factory=relation_factory, + materialization_map={ + MaterializationType.MaterializedView: MaterializedViewMaterialization, + }, + ) + + +@pytest.fixture +def materialized_view_ref(relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_materialized_view", + schema_name="my_schema", + database_name="my_database", + relation_type=RelationType.MaterializedView, + ) + + +@pytest.fixture +def view_ref(relation_factory) -> RelationRef: + return relation_factory.make_ref( + name="my_view", + schema_name="my_schema", + database_name="my_database", + relation_type=RelationType.View, + ) + + +@pytest.fixture +def materialized_view_compiled_node() -> CompiledNode: + return CompiledNode( + alias="my_materialized_view", + name="my_materialized_view", + database="my_database", + schema="my_schema", + unique_id="model.root.my_materialized_view", + resource_type=NodeType.Model, + fqn=["root", "my_materialized_view"], + package_name="root", + original_file_path="my_materialized_view.sql", + refs=[], + sources=[], + depends_on=DependsOn(), + config=NodeConfig.from_dict( + { + "enabled": True, + "materialized": "materialized_view", + "persist_docs": {}, + "post-hook": [], + "pre-hook": [], + "vars": {}, + "quoting": {}, + "column_types": {}, + "tags": [], + "full_refresh": True, + "on_configuration_change": "continue", + } + ), + tags=[], + path="my_materialized_view.sql", + language="sql", + raw_code="select 42 from meaning_of_life", + compiled_code="select 42 from meaning_of_life", + description="", + columns={}, + checksum=FileHash.from_contents(""), + ) + + +@pytest.fixture +def materialized_view_describe_relation_results() -> Dict[str, agate.Table]: + materialized_view_agate = agate.Table.from_object( + [ + { + "name": "my_materialized_view", + "schema_name": "my_schema", + "database_name": "my_database", + "query": "select 42 from meaning_of_life", + } + ] + ) + return {"relation": materialized_view_agate} + + +@pytest.fixture +def materialized_view_relation( + relation_factory, materialized_view_describe_relation_results +) -> Relation: + return relation_factory.make_from_describe_relation_results( + materialized_view_describe_relation_results, RelationType.MaterializedView + ) + + +""" +Make sure the fixtures at least work, more thorough testing is done elsewhere +""" + + +def test_relation_factory(relation_factory): + assert ( + relation_factory._get_relation_model(RelationType.MaterializedView) + == MaterializedViewRelation + ) + + +def test_materialization_factory(materialization_factory): + postgres_parser = materialization_factory.relation_factory._get_relation_model( + RelationType.MaterializedView + ) + assert postgres_parser == MaterializedViewRelation + + +def test_materialized_view_ref(materialized_view_ref): + assert materialized_view_ref.name == "my_materialized_view" + + +def test_view_ref(view_ref): + assert view_ref.name == "my_view" + + +def test_materialized_view_compiled_node(materialized_view_compiled_node): + assert materialized_view_compiled_node.name == "my_materialized_view" + assert materialized_view_compiled_node.config.full_refresh is True + assert materialized_view_compiled_node.config.on_configuration_change == "continue" + + +def test_materialized_view_relation(materialized_view_relation): + # covers `materialized_view_describe_relation_results` as a dependent fixture + assert materialized_view_relation.type == RelationType.MaterializedView + assert materialized_view_relation.name == "my_materialized_view" diff --git a/tests/unit/relation_tests/model_tests/test_database.py b/tests/unit/relation_tests/model_tests/test_database.py new file mode 100644 index 00000000000..c65b4dc0ec5 --- /dev/null +++ b/tests/unit/relation_tests/model_tests/test_database.py @@ -0,0 +1,25 @@ +from typing import Type + +import pytest +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.relation.models import DatabaseRelation + + +@pytest.mark.parametrize( + "config_dict,exception", + [ + ({"name": "my_database"}, None), + # there are no validation rules, so "" is a valid name for the default + ({"name": ""}, None), + ({"wrong_name": "my_database"}, DbtRuntimeError), + ({}, DbtRuntimeError), + ], +) +def test_make_database(config_dict: dict, exception: Type[Exception]): + if exception: + with pytest.raises(exception): + DatabaseRelation.from_dict(config_dict) + else: + my_database = DatabaseRelation.from_dict(config_dict) + assert my_database.name == config_dict.get("name") diff --git a/tests/unit/relation_tests/model_tests/test_materialized_view.py b/tests/unit/relation_tests/model_tests/test_materialized_view.py new file mode 100644 index 00000000000..375615fa644 --- /dev/null +++ b/tests/unit/relation_tests/model_tests/test_materialized_view.py @@ -0,0 +1,63 @@ +from dataclasses import replace +from typing import Type + +import pytest + +from dbt.adapters.relation.models import ( + MaterializedViewRelation, + MaterializedViewRelationChangeset, +) +from dbt.exceptions import DbtRuntimeError + + +@pytest.mark.parametrize( + "config_dict,exception", + [ + ( + { + "name": "my_materialized_view", + "schema": { + "name": "my_schema", + "database": {"name": "my_database"}, + }, + "query": "select 1 from my_favoriate_table", + }, + None, + ), + ( + { + "my_name": "my_materialized_view", + "schema": { + "name": "my_schema", + "database": {"name": "my_database"}, + }, + # missing `query` + }, + DbtRuntimeError, + ), + ], +) +def test_create_materialized_view(config_dict: dict, exception: Type[Exception]): + if exception: + with pytest.raises(exception): + MaterializedViewRelation.from_dict(config_dict) + else: + my_materialized_view = MaterializedViewRelation.from_dict(config_dict) + assert my_materialized_view.name == config_dict.get("name") + assert my_materialized_view.schema_name == config_dict.get("schema").get("name") + assert my_materialized_view.database_name == config_dict.get("schema").get("database").get( + "name" + ) + assert my_materialized_view.query == config_dict.get("query") + + +def test_create_materialized_view_changeset(materialized_view_relation): + existing_materialized_view = replace(materialized_view_relation) + target_materialized_view = replace(existing_materialized_view) + + changeset = MaterializedViewRelationChangeset.from_relations( + existing_materialized_view, target_materialized_view + ) + assert changeset.is_empty is False + # the default is a full refresh, even with no change + assert changeset.requires_full_refresh is True diff --git a/tests/unit/relation_tests/model_tests/test_schema.py b/tests/unit/relation_tests/model_tests/test_schema.py new file mode 100644 index 00000000000..b63e14e6aa2 --- /dev/null +++ b/tests/unit/relation_tests/model_tests/test_schema.py @@ -0,0 +1,32 @@ +from typing import Type + +import pytest +from dbt.exceptions import DbtRuntimeError + +from dbt.adapters.relation.models import SchemaRelation + + +@pytest.mark.parametrize( + "config_dict,exception", + [ + ({"name": "my_schema", "database": {"name": "my_database"}}, None), + ({"name": "my_schema", "database": None}, DbtRuntimeError), + ({"name": "my_schema"}, DbtRuntimeError), + # there are no validation rules, so "" is a valid name for the default + ({"name": "", "database": {"name": "my_database"}}, None), + ({"wrong_name": "my_database", "database": {"name": "my_database"}}, DbtRuntimeError), + ( + {"name": "my_schema", "database": {"name": "my_database"}, "meaning_of_life": 42}, + DbtRuntimeError, + ), + ({}, DbtRuntimeError), + ], +) +def test_make_schema(config_dict: dict, exception: Type[Exception]): + if exception: + with pytest.raises(exception): + SchemaRelation.from_dict(config_dict) + else: + my_schema = SchemaRelation.from_dict(config_dict) + assert my_schema.name == config_dict.get("name") + assert my_schema.database_name == config_dict.get("database").get("name") diff --git a/tests/unit/relation_tests/test_relation_factory.py b/tests/unit/relation_tests/test_relation_factory.py new file mode 100644 index 00000000000..5bdc73eff50 --- /dev/null +++ b/tests/unit/relation_tests/test_relation_factory.py @@ -0,0 +1,56 @@ +""" +Uses the following fixtures in `unit/conftest.py`: +- `relation_factory` +- `materialized_view_ref` +""" +import pytest + +from dbt.contracts.relation import RelationType +from dbt.exceptions import DbtRuntimeError + + +def test_make_ref(materialized_view_ref): + assert materialized_view_ref.name == "my_materialized_view" + assert materialized_view_ref.schema_name == "my_schema" + assert materialized_view_ref.database_name == "my_database" + assert materialized_view_ref.type == "materialized_view" + assert materialized_view_ref.can_be_renamed is False + + +def test_make_backup_ref(relation_factory, materialized_view_ref): + if materialized_view_ref.can_be_renamed: + assert materialized_view_ref.name == '"my_materialized_view__dbt_backup"' + else: + with pytest.raises(DbtRuntimeError): + relation_factory.make_backup_ref(materialized_view_ref) + + +def test_make_intermediate(relation_factory, materialized_view_ref): + if materialized_view_ref.can_be_renamed: + intermediate_relation = relation_factory.make_intermediate(materialized_view_ref) + assert intermediate_relation.name == '"my_materialized_view__dbt_tmp"' + else: + with pytest.raises(DbtRuntimeError): + relation_factory.make_intermediate(materialized_view_ref) + + +def test_make_from_describe_relation_results( + relation_factory, materialized_view_describe_relation_results +): + materialized_view = relation_factory.make_from_describe_relation_results( + materialized_view_describe_relation_results, RelationType.MaterializedView + ) + + assert materialized_view.name == "my_materialized_view" + assert materialized_view.schema_name == "my_schema" + assert materialized_view.database_name == "my_database" + assert materialized_view.query == "select 42 from meaning_of_life" + + +def test_make_from_node(relation_factory, materialized_view_compiled_node): + materialized_view = relation_factory.make_from_node(materialized_view_compiled_node) + + assert materialized_view.name == "my_materialized_view" + assert materialized_view.schema_name == "my_schema" + assert materialized_view.database_name == "my_database" + assert materialized_view.query == "select 42 from meaning_of_life"