diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 6e4fcc6d..6094f06f 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -1,14 +1,12 @@ --- name: "PyPI Release" -# yamllint disable-line rule:truthy on: push: tags: - 'v*' workflow_dispatch: - jobs: publish: name: PyPI Release diff --git a/.github/workflows/test_cloud.yml b/.github/workflows/test_cloud.yml index d6403b1d..d11a2d11 100644 --- a/.github/workflows/test_cloud.yml +++ b/.github/workflows/test_cloud.yml @@ -17,16 +17,15 @@ jobs: DBT_CH_TEST_HOST: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_HOST }} DBT_CH_TEST_PASSWORD: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_PASSWORD }} DBT_CH_TEST_CLUSTER_MODE: true - DBT_CH_TEST_CLOUD: true steps: - name: Checkout uses: actions/checkout@v3 - - name: Setup Python 3.11 + - name: Setup Python 3.10 uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: '3.10' - name: Install requirements run: pip3 install -r dev_requirements.txt diff --git a/.github/workflows/test_matrix.yml b/.github/workflows/test_matrix.yml index 3da76748..d204477c 100644 --- a/.github/workflows/test_matrix.yml +++ b/.github/workflows/test_matrix.yml @@ -23,14 +23,15 @@ jobs: strategy: matrix: python-version: + - '3.8' - '3.9' - '3.10' - '3.11' clickhouse-version: + - '22.8' - '23.3' - - '23.8' - - '23.9' - - '23.10' + - '23.5' + - '23.6' - latest steps: @@ -43,10 +44,16 @@ jobs: echo "TEST_SETTINGS_FILE=22_3" >> $GITHUB_ENV echo "DBT_CH_TEST_CH_VERSION=22.3" >> $GITHUB_ENV - - name: Run ClickHouse Cluster Containers - env: - PROJECT_ROOT: ${{ github.workspace }}/tests/integration - run: REPLICA_NUM=1 docker-compose -f ${{ github.workspace }}/tests/integration/docker-compose.yml up -d + - name: Run ClickHouse Container + run: docker run + -d + -p 8123:8123 + -p 9000:9000 + --name clickhouse + -v /var/lib/clickhouse + -v ${{ github.workspace }}/tests/integration/test_settings_$TEST_SETTINGS_FILE.xml:/etc/clickhouse-server/users.d/test_settings.xml + --ulimit nofile=262144:262144 + clickhouse/clickhouse-server:${{ matrix.clickhouse-version }} - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v4 @@ -57,8 +64,6 @@ jobs: run: pip3 install -r dev_requirements.txt - name: Run HTTP tests - env: - DBT_CH_TEST_CLUSTER: test_shard run: | PYTHONPATH="${PYTHONPATH}:dbt" pytest tests @@ -66,7 +71,6 @@ jobs: - name: Run Native tests env: DBT_CH_TEST_PORT: 9000 - DBT_CH_TEST_CLUSTER: test_shard run: | PYTHONPATH="${PYTHONPATH}:dbt" pytest tests diff --git a/.gitignore b/.gitignore index 583c17ae..745da238 100644 --- a/.gitignore +++ b/.gitignore @@ -96,4 +96,3 @@ dbt-tut # local development stuff dev/ .python-version -*_project/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 35f64950..e66caf96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,120 +1,3 @@ -### Release [1.7.1], 2023-12-13 -#### Bug Fixes -- Some models with LIMIT clauses were broken in recent releases. This has been fixed. Thanks to -[ptemarvelde](https://github.com/ptemarvelde) for the PR! -- It was possible for incremental models with the delete+insert strategy to fail if ClickHouse "light weight deletes" were -not enabled or the required setting `allow_nondetermistic_mutations` was not enabled and the user did not have permission -to apply it. This condition is now detected on startup, and an exception will be thrown if `use_lw_deletes` is configured -in the profile. Otherwise, a warning will be logged that incremental models will be slower (because such models will -be downgraded to use the `legacy` incremental strategy). This should prevent the confusing behavior in -https://github.com/ClickHouse/dbt-clickhouse/issues/197 by throwing an early exception for an unsupported configuration. - -### Release [1.7.0], 2023-12-07 -#### Improvements -- Minimal compatibility with dbt 1.7.x. The date_spine macro and additional automated tests have not been implemented, -but are planned for a future patch release. -- DBT 1.7 introduces a (complex) optimization mechanism for retrieving a dbt catalog which is overkill for ClickHouse -(which has no separate schema/database level), so this release includes some internal catalog changes to simplify that process. - -### Release [1.6.2], 2023-12-06 -#### Bug Fix -- The dbt `on_schema_change` configuration value for incremental models was effectively being ignored. This has been fixed -with a very limited implementation. Closes https://github.com/ClickHouse/dbt-clickhouse/issues/199. Because of the way that -ORDER BY/SORT BY/PARTITION BY/PRIMARY KEYS work in ClickHouse, plus the complexities of correctly transforming ClickHouse data types, -`sync_all_columns` is not currently supported (although an implementation that works for non-key columns is theoretically possible, -such an enhancement is not currently planned). Accordingly, only `ignore`, `fail`, and `append_new_columns` values are supported -for `on_schema_change`. It is also not currently supported for Distributed tables. - -Note that actually appending new columns requires a fallback to the `legacy` incremental strategy, which is quite inefficient, -so while theoretically possible, using `append_new_columns` is not recommended except for very small data volumes. - -### Release [1.6.1], 2023-12-04 -#### Bug Fixes -- Identifier quoting was disabled for tables/databases etc. This would cause failures for schemas or tables using reserved words -or containing special characters. This has been fixed and some macros have been updated to correctly handle such identifiers. -Note that there still may be untested edge cases where nonstandard identifiers cause issues, so they are still not recommended. -Closes https://github.com/ClickHouse/dbt-clickhouse/issues/144. Thanks to [Alexandru Pisarenco](https://github.com/apisarenco) for the -report and initial PR! -- The new `allow_automatic_deduplication` setting was not being correctly propagated to the adapter, so setting it to `True` -did not have the intended affect. In addition, this setting is now ignored for older ClickHouse versions that -do not support `CREATE TABLE AS SELECT ... EMPTY`, since the automatic deduplication window is required to allow correct -inserts in Replicated tables on those older versions. Fixes https://github.com/ClickHouse/dbt-clickhouse/issues/216. - -### Release [1.6.0], 2023-11-30 -#### Improvements -- Compatible with dbt 1.6.x. Note that dbt new `clone` feature is not supported, as ClickHouse has no native "light weight" -clone functionality, and copying tables without actual data transfer is not possible in ClickHouse (barring file manipulation -outside ClickHouse itself). -- A new ClickHouse specific Materialized View materialization contributed by [Rory Sawyer](https://github.com/SoryRawyer). -This creates a ClickHouse Materialized view using the `TO` form with the name `_mv` and the associated target -table ``. It's highly recommended to fully understand how ClickHouse materialized views work before using -this materialization. - -### Release [1.5.2], 2023-11-28 -#### Bug Fixes -- The `ON CLUSTER` clause was in the incorrect place for legacy incremental materializations. This has been fixed. Thanks to -[Steven Reitsma](https://github.com/StevenReitsma) for the fix! -- The `ON CLUSTER` DDL for drop tables did not include a SYNC modifier, which might be the cause of some "table already exists" -errors. The `SYNC` modifier has been added to the `on_cluster` macro when dropping relations. -- Fixed a bug where using table settings such as `allow_nullable_key` would break "legacy" incremental materializations. Closes -https://github.com/ClickHouse/dbt-clickhouse/issues/209. Also see the new model `config` property `insert_settings` described -below. -- Fixed an issue where incremental materializations would incorrectly exclude duplicated inserted elements due to "automatic" -ClickHouse deduplication on replicated tables. Closes https://github.com/ClickHouse/dbt-clickhouse/issues/213. The fix consists -of always sending a `replicated_deduplication_window=0` table setting when creating the incremental relations. This -behavior can be overridden by setting the new profile parameter `allow_automatic_deduplication` to `True`, although for -general dbt operations this is probably not necessary and not recommended. Finally thanks to Andy(https://github.com/andy-miracl) -for the report and debugging help! - -#### Improvements -- Added a new profile property `allow_automatic_deduplication`, which defaults to `False`. ClickHouse Replicated deduplication is -now disable for incremental inserts, but this property can be set to true if for some reason the default ClickHouse behavior -for inserted blocks is desired. -- Added a new model `config` property `query_settings` for any ClickHouse settings that should be sent with the `INSERT INTO` -or `DELETE_FROM` queries used with materializations. Note this is distinct from the existing property `settings` which is -used for ClickHouse "table" settings in DDL statements like `CREATE TABLE ... AS`. - -### Release [1.5.1], 2023-11-27 -#### Bug Fix -- Fix table materialization for compatibility with SQLFluff. Thanks to [Kristof Szaloki](https://github.com/kris947) for the PR! - -### Release [1.5.0], 2023-11-23 -#### Improvements -- Compatible with dbt 1.5.x -- Contract support (using exact column data types) - -#### Bug Fix -- Fix s3 macro when bucket includes `https://` prefix. Closes https://github.com/ClickHouse/dbt-clickhouse/issues/192. - -### Release [1.4.9], 2023-10-27 -#### Improvement -- Lots of work on Distributed table materializations. Big thanks to [gfunc](https://github.com/gfunc) for the additional PR -and [Zhenbang](https://github.com/zli06160) for code review and suggestions. See the README for details on how to -use the new functionality. -#### Bug Fix -- dbt would fail if a cluster name contained a dash. This has been fixed. Thanks to [Andy](https://github.com/the4thamigo-uk -for the PR - -### Release [1.4.8], 2023-08-22 -#### Bug Fix -- Fixed issues with experimental Distributed table materializations. Closes https://github.com/ClickHouse/dbt-clickhouse/issues/179. -Thanks to [Zhebnang](https://github.com/zli06160) for the report and for contributing to the fix with [gfunc](https://github.com/gfunc). - -### Release [1.4.7], 2023-08-09 -#### Bug Fix -- Fixed an exception in "legacy" incremental materializations that are not distributed - -### Release [1.4.6], 2023-07-27 -#### Bug fix -- Lightweight deletes could fail in environments where the HTTP session was not preserved (such as clusters behind a non-sticky -load balancer). This has been fixed by sending the required settings with every request instead of relying on a SET statement. -A similar approach has been used to persist the 'insert_distributed_sync' setting for Distributed table materializations. - -### Release [1.4.5], 2023-07-27 -#### Improvement -- Adds additional experimental support for Distributed table engine models and incremental materialization. See the README for -details. Thanks to [gladkikhtutu](https://github.com/gladkikhtutu) for the contribution! - ### Release [1.4.4], 2023-07-19 #### Bug Fixes - Fixed two logging/exception handling issues that would cause exception on startup or when handling some exceptions diff --git a/README.md b/README.md index 8022f214..030df42e 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,6 @@ pip install dbt-clickhouse - [x] Table materialization - [x] View materialization - [x] Incremental materialization -- [x] Materialized View materializations (uses the `TO` form of MATERIALIZED VIEW, experimental) - [x] Seeds - [x] Sources - [x] Docs generate @@ -30,20 +29,9 @@ pip install dbt-clickhouse - [x] Snapshots - [x] Most dbt-utils macros (now included in dbt-core) - [x] Ephemeral materialization -- [x] Distributed table materialization (experimental) -- [x] Distributed incremental materialization (experimental) -- [x] Contracts # Usage Notes -## SET Statement Warning -In many environments, using the SET statement to persist a ClickHouse setting across all DBT queries is not reliable -and can cause unexpected failures. This is particularly true when using HTTP connections through a load balancer that -distributes queries across multiple nodes (such as ClickHouse cloud), although in some circumstances this can also -happen with native ClickHouse connections. Accordingly, we recommend configuring any required ClickHouse settings in the -"custom_settings" property of the DBT profile as a best practice, instead of relying on a prehook "SET" statement as -has been occasionally suggested. - ## Database The dbt model relation identifier `database.schema.table` is not compatible with Clickhouse because Clickhouse does not support a `schema`. @@ -67,7 +55,7 @@ your_profile_name: port: [8123] # If not set, defaults to 8123, 8443, 9000, 9440 depending on the secure and driver settings user: [default] # User for all database operations password: [] # Password for the user - cluster: [] If set, certain DDL/table operations will be executed with the `ON CLUSTER` clause using this cluster. Distributed materializations require this setting to work. See the following ClickHouse Cluster section for more details. + cluster: [] If set, DDL/table operations will be executed with the `ON CLUSTER` clause using this cluster verify: [True] # Validate TLS certificate if using TLS/SSL secure: [False] # Use TLS (native protocol) or HTTPS (http protocol) retries: [1] # Number of times to retry a "retriable" database exception (such as a 503 'Service Unavailable' error) @@ -77,9 +65,7 @@ your_profile_name: cluster_mode: [False] # Use specific settings designed to improve operation on Replicated databases (recommended for ClickHouse Cloud) use_lw_deletes: [False] Use the strategy `delete+insert` as the default incremental strategy. check_exchange: [True] # Validate that clickhouse support the atomic EXCHANGE TABLES command. (Not needed for most ClickHouse versions) - local_suffix [_local] # Table suffix of local tables on shards for distributed materializations. - allow_automatic_deduplication [False] # Enable ClickHouse automatic deduplication for Replicated tables - custom_settings: [{}] # A dictionary/mapping of custom ClickHouse settings for the connection - default is empty. + custom_settings: [{}] # A dicitonary/mapping of custom ClickHouse settings for the connection - default is empty. # Native (clickhouse-driver) connection settings sync_request_timeout: [5] Timeout for server ping @@ -89,52 +75,19 @@ your_profile_name: ## Model Configuration -| Option | Description | Default if any | -|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| -| engine | The table engine (type of table) to use when creating tables | `MergeTree()` | -| order_by | A tuple of column names or arbitrary expressions. This allows you to create a small sparse index that helps find data faster. | `tuple()` | -| partition_by | A partition is a logical combination of records in a table by a specified criterion. The partition key can be any expression from the table columns. | | -| sharding_key | Sharding key determines the destination server when inserting into distributed engine table. The sharding key can be random or as an output of a hash function | `rand()`) | -| primary_key | Like order_by, a ClickHouse primary key expression. If not specified, ClickHouse will use the order by expression as the primary key | | -| unique_key | A tuple of column names that uniquely identify rows. Used with incremental models for updates. | | -| inserts_only | If set to True for an incremental model, incremental updates will be inserted directly to the target table without creating intermediate table. It has been deprecated in favor of the `append` incremental `strategy`, which operates in the same way | | -| incremental_strategy | Incremental model update strategy of `delete+insert` or `append`. See the following Incremental Model Strategies | `default` | -| incremental_predicates | Additional conditions to be applied to the incremental materialization (only applied to `delete+insert` strategy | | -| settings | A map/dictionary of "TABLE" settings to be used to DDL statements like 'CREATE TABLE' with this model | | -| query_settings | A map/dictionary of ClickHouse user level settings to be used with `INSERT` or `DELETE` statements in conjunction with this model | | - -## ClickHouse Cluster - -The `cluster` setting in profile enables dbt-clickhouse to run against a ClickHouse cluster. - -### Effective Scope - - -if `cluster` is set in profile, `on_cluster_clause` now will return cluster info for: -- Database creation -- View materialization -- Distributed materializations -- Models with Replicated engines - -table and incremental materializations with non-replicated engine will not be affected by `cluster` setting (model would be created on the connected node only). - -### Compatibility - - -If a model has been created without a `cluster` setting, dbt-clickhouse will detect the situation and run all DDL/DML without `on cluster` clause for this model. - - -## A Note on Model Settings - -ClickHouse has several types/levels of "settings". In the model configuration above, two types of these are configurable. `settings` means the `SETTINGS` -clause used in `CREATE TABLE/VIEW` types of DDL statements, so this is generally settings that are specific to the specific ClickHouse table engine. The new -`query_settings` is use to add a `SETTINGS` clause to the `INSERT` and `DELETE` queries used for model materialization (including incremental materializations). -There are hundreds of ClickHouse settings, and it's not always clear which is a "table" setting and which is a "user" setting (although the latter are generally -available in the `system.settings` table.) In general the defaults are recommended, and any use of these properties should be carefully researched and tested. - - +| Option | Description | Required? | +|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------| +| engine | The table engine (type of table) to use when creating tables | Optional (default: `MergeTree()`) | +| order_by | A tuple of column names or arbitrary expressions. This allows you to create a small sparse index that helps find data faster. | Optional (default: `tuple()`) | +| partition_by | A partition is a logical combination of records in a table by a specified criterion. The partition key can be any expression from the table columns. | Optional | +| primary_key | Like order_by, a ClickHouse primary key expression. If not specified, ClickHouse will use the order by expression as the primary key | +| unique_key | A tuple of column names that uniquely identify rows. Used with incremental models for updates. | Optional | +| inserts_only | If set to True for an incremental model, incremental updates will be inserted directly to the target table without creating intermediate table. It has been deprecated in favor of the `append` incremental `strategy`, which operates in the same way | Optional | +| incremental_strategy | Incremental model update strategy of `delete+insert` or `append`. See the following Incremental Model Strategies | Optional (default: `default`) | +| incremental_predicates | Additional conditions to be applied to the incremental materialization (only applied to `delete+insert` strategy | ## Known Limitations +* Replicated tables (combined with the `cluster` profile setting) are available using the `on_cluster_clause` macro but are not included in the test suite and not formally tested. * Ephemeral models/CTEs don't work if placed before the "INSERT INTO" in a ClickHouse insert statement, see https://github.com/ClickHouse/ClickHouse/issues/30323. This should not affect most models, but care should be taken where an ephemeral model is placed in model definitions and other SQL statements. @@ -179,7 +132,7 @@ The following macros are included to facilitate creating ClickHouse specific tab - `partition_cols` -- Uses the `partition_by` model configuration property to assign a ClickHouse partition key. No partition key is assigned by default. - `order_cols` -- Uses the `order_by` model configuration to assign a ClickHouse order by/sorting key. If not specified ClickHouse will use an empty tuple() and the table will be unsorted - `primary_key_clause` -- Uses the `primary_key` model configuration property to assign a ClickHouse primary key. By default, primary key is set and ClickHouse will use the order by clause as the primary key. -- `on_cluster_clause` -- Uses the `cluster` profile property to add an `ON CLUSTER` clause to certain dbt-operations: distributed materializations, views creation, database creation. +- `on_cluster_clause` -- Uses the `cluster` profile property to add an `ON CLUSTER` clause to all dbt-operations ### s3Source Helper Macro @@ -195,123 +148,10 @@ keys used to populate the parameters of the S3 table function: | fmt | The expected ClickHouse input format (such as `TSV` or `CSVWithNames`) of the referenced S3 objects. | | structure | The column structure of the data in bucket, as a list of name/datatype pairs, such as `['id UInt32', 'date DateTime', 'value String']` If not provided ClickHouse will infer the structure. | | aws_access_key_id | The S3 access key id. | -| aws_secret_access_key | The S3 secret key. | +| aws_secret_access_key | The S3 secrete key. | | compression | The compression method used with the S3 objects. If not provided ClickHouse will attempt to determine compression based on the file name. | -See the [S3 test file](https://github.com/ClickHouse/dbt-clickhouse/blob/main/tests/integration/adapter/clickhouse/test_clickhouse_s3.py) for examples of how to use this macro. - -# Contracts and Constraints - -Only exact column type contracts are supported. For example, a contract with a UInt32 column type will fail if the model returns a UInt64 or other integer type. -ClickHouse also support _only_ `CHECK` constraints on the entire table/model. Primary key, foreign key, unique, and column level CHECK constraints are not supported. -(See ClickHouse documentation on primary/order by keys.) - -# Materialized Views (Experimental) -A `materialized_view` materialization should be a `SELECT` from an existing (source) table. The adapter will create a target table with the model name -and a ClickHouse MATERIALIZED VIEW with the name `_mv`. Unlike PostgreSQL, a ClickHouse materialized view is not "static" (and has -no corresponding REFRESH operation). Instead, it acts as an "insert trigger", and will insert new rows into the target table using the defined `SELECT` -"transformation" in the view definition on rows inserted into the source table. See the [test file] -(https://github.com/ClickHouse/dbt-clickhouse/blob/main/tests/integration/adapter/materialized_view/test_materialized_view.py) for an introductory example -of how to use this functionality. - -# Distributed materializations - -Notes: - -- dbt-clickhouse queries now automatically include the setting `insert_distributed_sync = 1` in order to ensure that downstream incremental -materialization operations execute correctly. This could cause some distributed table inserts to run more slowly than expected. - -## Distributed table materialization - -Distributed table created with following steps: -1. Creates temp view with sql query to get right structure -2. Create empty local tables based on view -3. Create distributed table based on local tables. -4. Data inserts into distributed table, so it is distributed across shards without duplicating. - -### Distributed table model example -```sql -{{ - config( - materialized='distributed_table', - order_by='id, created_at', - sharding_key='cityHash64(id)', - engine='ReplacingMergeTree' - ) -}} - -select id, created_at, item from {{ source('db', 'table') }} -``` - -### Generated migrations - -```sql -CREATE TABLE db.table_local on cluster cluster -( - `id` UInt64, - `created_at` DateTime, - `item` String -) -ENGINE = ReplacingMergeTree -ORDER BY (id, created_at) -SETTINGS index_granularity = 8192; - - -CREATE TABLE db.table on cluster cluster -( - `id` UInt64, - `created_at` DateTime, - `item` String -) -ENGINE = Distributed('cluster', 'db', 'table_local', cityHash64(id)); -``` - -## Distributed incremental materialization - -Incremental model based on the same idea as distributed table, the main difficulty is to process all incremental strategies correctly. - -1. _The Append Strategy_ just insert data into distributed table. -2. _The Delete+Insert_ Strategy creates distributed temp table to work with all data on every shard. -3. _The Default (Legacy) Strategy_ creates distributed temp and intermediate tables for the same reason. - -Only shard tables are replacing, because distributed table does not keep data. -The distributed table reloads only when the full_refresh mode is enabled or the table structure may have changed. - -### Distributed incremental model example -```sql -{{ - config( - materialized='distributed_incremental', - engine='MergeTree', - incremental_strategy='append', - unique_key='id,created_at' - ) -}} - -select id, created_at, item from {{ source('db', 'table') }} -``` - -### Generated migrations - -```sql -CREATE TABLE db.table_local on cluster cluster -( - `id` UInt64, - `created_at` DateTime, - `item` String -) -ENGINE = MergeTree -SETTINGS index_granularity = 8192; - - -CREATE TABLE db.table on cluster cluster -( - `id` UInt64, - `created_at` DateTime, - `item` String -) -ENGINE = Distributed('cluster', 'db', 'table_local', cityHash64(id)); -``` +See the [S3 test file](https://github.com/ClickHouse/dbt-clickhouse/blob/main/tests/integration/adapter/test_s3.py) for examples of how to use this macro. # Running Tests @@ -330,7 +170,6 @@ configuration file (this file should not be checked into git). The following en 8. DBT_CH_TEST_CH_VERSION - ClickHouse docker image to use. Defaults to `latest` 9. DBT_CH_TEST_INCLUDE_S3 - Include S3 tests. Default=False since these are currently dependent on a specific ClickHouse S3 bucket/test dataset 10. DBT_CH_TEST_CLUSTER_MODE - Use the profile value -11. DBT_CH_TEST_CLUSTER - ClickHouse cluster name, if DBT_CH_TEST_USE_DOCKER set to true, only `test_replica` and `test_shard` is valid (see tests/test_config.xml for cluster settings) ## Original Author diff --git a/dbt/adapters/clickhouse/__version__.py b/dbt/adapters/clickhouse/__version__.py index 1f796f9b..f91302bf 100644 --- a/dbt/adapters/clickhouse/__version__.py +++ b/dbt/adapters/clickhouse/__version__.py @@ -1 +1 @@ -version = '1.7.1' +version = '1.4.4' diff --git a/dbt/adapters/clickhouse/cache.py b/dbt/adapters/clickhouse/cache.py deleted file mode 100644 index 28d9fa21..00000000 --- a/dbt/adapters/clickhouse/cache.py +++ /dev/null @@ -1,432 +0,0 @@ -import threading -from collections import namedtuple -from copy import deepcopy -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple - -from dbt.events.functions import fire_event, fire_event_if -from dbt.events.types import CacheAction, CacheDumpGraph -from dbt.exceptions import ( - NewNameAlreadyInCacheError, - NoneRelationFoundError, - TruncatedModelNameCausedCollisionError, -) -from dbt.flags import get_flags - -ReferenceKey = namedtuple("ReferenceKey", "schema identifier") - - -def dot_separated(key: ReferenceKey) -> str: - """Return the key in dot-separated string form. - - :param _ReferenceKey key: The key to stringify. - """ - return ".".join(map(str, key)) - - -class CachedRelation: - """Nothing about _CachedRelation is guaranteed to be thread-safe! - - :attr str database: The schema of this relation. - :attr str identifier: The identifier of this relation. - :attr Dict[ReferenceKey, CachedRelation] referenced_by: The relations - that refer to this relation. - :attr BaseRelation inner: The underlying dbt relation. - """ - - def __init__(self, inner): - self.referenced_by = {} - self.inner = inner - - def __str__(self) -> str: - return "CachedRelation(schema={}, identifier={}, inner={})".format( - self.schema, self.identifier, self.inner - ) - - @property - def schema(self) -> Optional[str]: - return self.inner.schema - - @property - def identifier(self) -> Optional[str]: - return self.inner.identifier - - def __copy__(self): - new = self.__class__(self.inner) - new.__dict__.update(self.__dict__) - return new - - def __deepcopy__(self, memo): - new = self.__class__(self.inner.incorporate()) - new.__dict__.update(self.__dict__) - new.referenced_by = deepcopy(self.referenced_by, memo) - - def is_referenced_by(self, key): - return key in self.referenced_by - - def key(self): - """Get the _ReferenceKey that represents this relation - - :return _ReferenceKey: A key for this relation. - """ - return ReferenceKey(self.schema, self.identifier) - - def add_reference(self, referrer: "CachedRelation"): - """Add a reference from referrer to self, indicating that if this node - were drop...cascaded, the referrer would be dropped as well. - - :param _CachedRelation referrer: The node that refers to this node. - """ - self.referenced_by[referrer.key()] = referrer - - def collect_consequences(self): - """Recursively collect a set of _ReferenceKeys that would - consequentially get dropped if this were dropped via - "drop ... cascade". - - :return Set[_ReferenceKey]: All the relations that would be dropped - """ - consequences = {self.key()} - for relation in self.referenced_by.values(): - consequences.update(relation.collect_consequences()) - return consequences - - def release_references(self, keys): - """Non-recursively indicate that an iterable of _ReferenceKey no longer - exist. Unknown keys are ignored. - - :param Iterable[_ReferenceKey] keys: The keys to drop. - """ - keys = set(self.referenced_by) & set(keys) - for key in keys: - self.referenced_by.pop(key) - - def rename(self, new_relation): - """Rename this cached relation to new_relation. - Note that this will change the output of key(), all refs must be - updated! - - :param _CachedRelation new_relation: The new name to apply to the - relation - """ - # Relations store this stuff inside their `path` dict. But they - # also store a table_name, and usually use it in their .render(), - # so we need to update that as well. It doesn't appear that - # table_name is ever anything but the identifier (via .create()) - self.inner = self.inner.incorporate( - path={"identifier": new_relation.inner.identifier}, - ) - - def rename_key(self, old_key, new_key): - """Rename a reference that may or may not exist. Only handles the - reference itself, so this is the other half of what `rename` does. - - If old_key is not in referenced_by, this is a no-op. - - :param _ReferenceKey old_key: The old key to be renamed. - :param _ReferenceKey new_key: The new key to rename to. - :raises InternalError: If the new key already exists. - """ - if new_key in self.referenced_by: - raise NewNameAlreadyInCacheError(old_key, new_key) - - if old_key not in self.referenced_by: - return - value = self.referenced_by.pop(old_key) - self.referenced_by[new_key] = value - - def dump_graph_entry(self): - """Return a key/value pair representing this key and its referents. - - return List[str]: The dot-separated form of all referent keys. - """ - return [dot_separated(r) for r in self.referenced_by] - - -class ClickHouseRelationsCache: - """A cache of the relations known to dbt. Keeps track of relationships - declared between tables and handles renames/drops as a real database would. - - :attr Dict[_ReferenceKey, _CachedRelation] relations: The known relations. - :attr threading.RLock lock: The lock around relations, held during updates. - The adapters also hold this lock while filling the cache. - :attr Set[str] schemas: The set of known/cached schemas - """ - - def __init__(self) -> None: - self.relations: Dict[ReferenceKey, CachedRelation] = {} - self.lock = threading.RLock() - self.schemas: Set[Optional[str]] = set() - - def add_schema( - self, - _database: Optional[str], - schema: Optional[str], - ) -> None: - """Add a schema to the set of known schemas (case-insensitive) - - :param _database: The database name to add (not used in ClickHouse) - :param schema: The schema name to add. - """ - self.schemas.add(schema) - - def drop_schema( - self, - _database: Optional[str], - schema: Optional[str], - ) -> None: - """Drop the given schema and remove it from the set of known schemas. - - Then remove all its contents (and their dependents, etc) as well. - """ - key = schema - if key not in self.schemas: - return - - # avoid iterating over self.relations while removing things by - # collecting the list first. - - with self.lock: - to_remove = self._list_relations_in_schema(schema) - self._remove_all(to_remove) - # handle a drop_schema race by using discard() over remove() - self.schemas.discard(key) - - def update_schemas(self, schemas: Iterable[Tuple[Optional[str], str]]): - """Add multiple schemas to the set of known schemas - - :param schemas: An iterable of the schema names to add. - """ - self.schemas.update(s[1] for s in schemas) - - def __contains__(self, schema_id: Tuple[Optional[str], str]): - """A schema is 'in' the relations cache if it is in the set of cached - schemas. - - :param schema_id: The db name and schema name to look up. - """ - return schema_id[1] in self.schemas - - def dump_graph(self): - """Dump a key-only representation of the schema to a dictionary. Every - known relation is a key with a value of a list of keys it is referenced - by. - """ - # we have to hold the lock for the entire dump, if other threads modify - # self.relations or any cache entry's referenced_by during iteration - # it's a runtime error! - with self.lock: - return {dot_separated(k): str(v.dump_graph_entry()) for k, v in self.relations.items()} - - def _setdefault(self, relation: CachedRelation): - """Add a relation to the cache, or return it if it already exists. - - :param CachedRelation relation: The relation to set or get. - :return CachedRelation: The relation stored under the given relation's - key - """ - self.add_schema(None, relation.schema) - key = relation.key() - return self.relations.setdefault(key, relation) - - def add(self, relation): - """Add the relation inner to the cache - - :param BaseRelation relation: The underlying relation. - """ - flags = get_flags() - cached = CachedRelation(relation) - fire_event_if( - flags.LOG_CACHE_EVENTS, - lambda: CacheDumpGraph(before_after="before", action="adding", dump=self.dump_graph()), - ) - fire_event(CacheAction(action="add_relation", ref_key=_make_ref_key_dict(cached))) - - with self.lock: - self._setdefault(cached) - fire_event_if( - flags.LOG_CACHE_EVENTS, - lambda: CacheDumpGraph(before_after="after", action="adding", dump=self.dump_graph()), - ) - - def _remove_refs(self, keys): - """Removes all references to all entries in keys. This does not - cascade! - - :param Iterable[_ReferenceKey] keys: The keys to remove. - """ - # remove direct refs - for key in keys: - del self.relations[key] - # then remove all entries from each child - for cached in self.relations.values(): - cached.release_references(keys) - - def drop(self, relation): - """Drop the named relation and cascade it appropriately to all - dependent relations. - - Because dbt proactively does many `drop relation if exist ... cascade` - that are noops, nonexistent relation drops cause a debug log and no - other actions. - - :param relation relation: The relation to drop. - - """ - dropped_key = _make_ref_key(relation) - dropped_key_msg = _make_ref_key_dict(relation) - fire_event(CacheAction(action="drop_relation", ref_key=dropped_key_msg)) - with self.lock: - if dropped_key not in self.relations: - fire_event(CacheAction(action="drop_missing_relation", ref_key=dropped_key_msg)) - return - consequences = self.relations[dropped_key].collect_consequences() - # convert from a list of _ReferenceKeys to a list of ReferenceKeyMsgs - consequence_msgs = [key._asdict() for key in consequences] - fire_event( - CacheAction( - action="drop_cascade", ref_key=dropped_key_msg, ref_list=consequence_msgs - ) - ) - self._remove_refs(consequences) - - def _rename_relation(self, old_key, new_relation): - """Rename a relation named old_key to new_key, updating references. - Return whether here was a key to rename. - - :param _ReferenceKey old_key: The existing key, to rename from. - :param _CachedRelation new_relation: The new relation, to rename to. - """ - # On the database level, a rename updates all values that were - # previously referenced by old_name to be referenced by new_name. - # basically, the name changes but some underlying ID moves. Kind of - # like an object reference! - relation = self.relations.pop(old_key) - new_key = new_relation.key() - - # relation has to rename its innards, so it needs the _CachedRelation. - relation.rename(new_relation) - # update all the relations that refer to it - for cached in self.relations.values(): - if cached.is_referenced_by(old_key): - fire_event( - CacheAction( - action="update_reference", - ref_key=_make_ref_key_dict(old_key), - ref_key_2=_make_ref_key_dict(new_key), - ref_key_3=_make_ref_key_dict(cached.key()), - ) - ) - - cached.rename_key(old_key, new_key) - - self.relations[new_key] = relation - # also fixup the schemas! - self.add_schema(None, new_key.schema) - - return True - - def _check_rename_constraints(self, old_key, new_key): - """Check the rename constraints, and return whether the rename can proceed. - - If the new key is already present, that is an error. - If the old key is absent, we debug log and return False, assuming it's - a temp table being renamed. - - :param _ReferenceKey old_key: The existing key, to rename from. - :param _ReferenceKey new_key: The new key, to rename to. - :return bool: If the old relation exists for renaming. - :raises InternalError: If the new key is already present. - """ - if new_key in self.relations: - # Tell user when collision caused by model names truncated during - # materialization. - raise TruncatedModelNameCausedCollisionError(new_key, self.relations) - - if old_key not in self.relations: - fire_event(CacheAction(action="temporary_relation", ref_key=old_key._asdict())) - return False - return True - - def rename(self, old, new): - """Rename the old schema/identifier to the new schema/identifier and - update references. - - If the new schema/identifier is already present, that is an error. - If the schema/identifier key is absent, we only debug log and return, - assuming it's a temp table being renamed. - - :param BaseRelation old: The existing relation name information. - :param BaseRelation new: The new relation name information. - :raises InternalError: If the new key is already present. - """ - old_key = _make_ref_key(old) - new_key = _make_ref_key(new) - fire_event( - CacheAction( - action="rename_relation", - ref_key=old_key._asdict(), - ref_key_2=new_key._asdict(), - ) - ) - flags = get_flags() - fire_event_if( - flags.LOG_CACHE_EVENTS, - lambda: CacheDumpGraph(before_after="before", action="rename", dump=self.dump_graph()), - ) - - with self.lock: - if self._check_rename_constraints(old_key, new_key): - self._rename_relation(old_key, CachedRelation(new)) - else: - self._setdefault(CachedRelation(new)) - - fire_event_if( - flags.LOG_CACHE_EVENTS, - lambda: CacheDumpGraph(before_after="after", action="rename", dump=self.dump_graph()), - ) - - def get_relations(self, _database: Optional[str], schema: Optional[str]) -> List[Any]: - """Yield all relations matching the given schema (ClickHouse database).""" - with self.lock: - results = [r.inner for r in self.relations.values() if r.schema == schema] - - if None in results: - raise NoneRelationFoundError() - return results - - def clear(self): - """Clear the cache""" - with self.lock: - self.relations.clear() - self.schemas.clear() - - def _list_relations_in_schema(self, schema: Optional[str]) -> List[CachedRelation]: - """Get the relations in a schema. Callers should hold the lock.""" - key = schema - - to_remove: List[CachedRelation] = [] - for cachekey, relation in self.relations.items(): - if cachekey.schema == key: - to_remove.append(relation) - return to_remove - - def _remove_all(self, to_remove: List[CachedRelation]): - """Remove all the listed relations. Ignore relations that have been - cascaded out. - """ - for relation in to_remove: - # it may have been cascaded out already - drop_key = _make_ref_key(relation) - if drop_key in self.relations: - self.drop(drop_key) - - -def _make_ref_key(relation: Any) -> ReferenceKey: - return ReferenceKey(relation.schema, relation.identifier) - - -def _make_ref_key_dict(relation: Any): - return { - "schema": relation.schema, - "identifier": relation.identifier, - } diff --git a/dbt/adapters/clickhouse/connections.py b/dbt/adapters/clickhouse/connections.py index dcb411f8..85c141be 100644 --- a/dbt/adapters/clickhouse/connections.py +++ b/dbt/adapters/clickhouse/connections.py @@ -1,7 +1,7 @@ import re import time from contextlib import contextmanager -from typing import Any, Optional, Tuple, Union +from typing import Any, Optional, Tuple import agate import dbt.exceptions @@ -73,7 +73,7 @@ def get_table_from_response(cls, response, column_names) -> agate.Table: return dbt.clients.agate_helper.table_from_data_flat(data, column_names) def execute( - self, sql: str, auto_begin: bool = False, fetch: bool = False, limit: Optional[int] = None + self, sql: str, auto_begin: bool = False, fetch: bool = False ) -> Tuple[AdapterResponse, agate.Table]: # Don't try to fetch result of clustered DDL responses, we don't know what to do with them if fetch and ddl_re.match(sql): @@ -141,8 +141,3 @@ def begin(self): def commit(self): pass - - @classmethod - def data_type_code_to_name(cls, type_code: Union[int, str]) -> str: - assert isinstance(type_code, int) - return '' diff --git a/dbt/adapters/clickhouse/credentials.py b/dbt/adapters/clickhouse/credentials.py index d0775c6a..4ccf2d63 100644 --- a/dbt/adapters/clickhouse/credentials.py +++ b/dbt/adapters/clickhouse/credentials.py @@ -16,7 +16,7 @@ class ClickHouseCredentials(Credentials): port: Optional[int] = None user: Optional[str] = 'default' retries: int = 1 - database: Optional[str] = '' + database: Optional[str] = None schema: Optional[str] = 'default' password: str = '' cluster: Optional[str] = None @@ -33,7 +33,6 @@ class ClickHouseCredentials(Credentials): custom_settings: Optional[Dict[str, Any]] = None use_lw_deletes: bool = False local_suffix: str = 'local' - allow_automatic_deduplication: bool = False @property def type(self): @@ -44,7 +43,7 @@ def unique_field(self): return self.host def __post_init__(self): - if self.database and self.database != self.schema: + if self.database is not None and self.database != self.schema: raise DbtRuntimeError( f' schema: {self.schema} \n' f' database: {self.database} \n' @@ -52,7 +51,7 @@ def __post_init__(self): f'On Clickhouse, database must be omitted or have the same value as' f' schema.' ) - self.database = '' + self.database = None def _connection_keys(self): return ( @@ -74,5 +73,4 @@ def _connection_keys(self): 'check_exchange', 'custom_settings', 'use_lw_deletes', - 'allow_automatic_deduplication', ) diff --git a/dbt/adapters/clickhouse/dbclient.py b/dbt/adapters/clickhouse/dbclient.py index c693a82e..2051d635 100644 --- a/dbt/adapters/clickhouse/dbclient.py +++ b/dbt/adapters/clickhouse/dbclient.py @@ -1,23 +1,10 @@ import uuid from abc import ABC, abstractmethod -from typing import Dict -from dbt.exceptions import DbtConfigError, DbtDatabaseError, FailedToConnectError +from dbt.exceptions import DbtDatabaseError, FailedToConnectError from dbt.adapters.clickhouse.credentials import ClickHouseCredentials -from dbt.adapters.clickhouse.errors import ( - lw_deletes_not_enabled_error, - lw_deletes_not_enabled_warning, - nd_mutations_not_enabled_error, - nd_mutations_not_enabled_warning, -) from dbt.adapters.clickhouse.logger import logger -from dbt.adapters.clickhouse.query import quote_identifier -from dbt.adapters.clickhouse.util import compare_versions - -LW_DELETE_SETTING = 'allow_experimental_lightweight_delete' -ND_MUTATION_SETTING = 'allow_nondeterministic_mutations' -DEDUP_WINDOW_SETTING = 'replicated_deduplication_window' def get_db_client(credentials: ClickHouseCredentials): @@ -76,7 +63,6 @@ def __init__(self, credentials: ClickHouseCredentials): self._conn_settings['database_replicated_enforce_synchronous_settings'] = '1' self._conn_settings['insert_quorum'] = 'auto' self._conn_settings['mutations_sync'] = '2' - self._conn_settings['insert_distributed_sync'] = '1' self._client = self._create_client(credentials) check_exchange = credentials.check_exchange and not credentials.cluster_mode try: @@ -89,12 +75,6 @@ def __init__(self, credentials: ClickHouseCredentials): except Exception as ex: self.close() raise ex - self._model_settings = {} - if ( - not credentials.allow_automatic_deduplication - and compare_versions(self._server_version(), '22.7.1.2484') >= 0 - ): - self._model_settings[DEDUP_WINDOW_SETTING] = '0' @abstractmethod def query(self, sql: str, **kwargs): @@ -104,10 +84,6 @@ def query(self, sql: str, **kwargs): def command(self, sql: str, **kwargs): pass - @abstractmethod - def columns_in_query(self, sql: str, **kwargs): - pass - @abstractmethod def get_ch_setting(self, setting_name): pass @@ -131,66 +107,39 @@ def _set_client_database(self): def _server_version(self): pass - def update_model_settings(self, model_settings: Dict[str, str]): - for key, value in self._model_settings.items(): - if key not in model_settings: - model_settings[key] = value - def _check_lightweight_deletes(self, requested: bool): - lw_deletes, lw_read_only = self.get_ch_setting(LW_DELETE_SETTING) - nd_mutations, nd_mutations_read_only = self.get_ch_setting(ND_MUTATION_SETTING) - if lw_deletes is None or nd_mutations is None: + lw_deletes = self.get_ch_setting('allow_experimental_lightweight_delete') + if lw_deletes is None: if requested: - logger.warning(lw_deletes_not_enabled_error) + logger.warning( + 'use_lw_deletes requested but are not available on this ClickHouse server' + ) return False, False - lw_deletes = int(lw_deletes) > 0 - if not lw_deletes: - if lw_read_only: - lw_deletes = False - if requested: - raise DbtConfigError(lw_deletes_not_enabled_error) - logger.warning(lw_deletes_not_enabled_warning) - else: - try: - self.command(f'SET {LW_DELETE_SETTING} = 1') - self._conn_settings[LW_DELETE_SETTING] = '1' - lw_deletes = True - except DbtDatabaseError: - logger.warning(lw_deletes_not_enabled_warning) - nd_mutations = int(nd_mutations) > 0 - if lw_deletes and not nd_mutations: - if nd_mutations_read_only: - nd_mutations = False - if requested: - raise DbtConfigError(nd_mutations_not_enabled_error) - logger.warning(nd_mutations_not_enabled_warning) - else: - try: - self.command(f'SET {ND_MUTATION_SETTING} = 1') - self._conn_settings[ND_MUTATION_SETTING] = '1' - nd_mutations = True - except DbtDatabaseError: - logger.warning(nd_mutations_not_enabled_warning) - if lw_deletes and nd_mutations: + lw_deletes = int(lw_deletes) + if lw_deletes == 1: return True, requested - return False, False + if not requested: + return False, False + try: + self.command('SET allow_experimental_lightweight_delete = 1') + self.command('SET allow_nondeterministic_mutations = 1') + return True, True + except DbtDatabaseError as ex: + logger.warning( + 'use_lw_deletes requested but cannot enable on this ClickHouse server %s', str(ex) + ) + return False, False def _ensure_database(self, database_engine, cluster_name) -> None: if not self.database: return - check_db = f'EXISTS DATABASE {quote_identifier(self.database)}' + check_db = f'EXISTS DATABASE {self.database}' try: db_exists = self.command(check_db) if not db_exists: engine_clause = f' ENGINE {database_engine} ' if database_engine else '' - cluster_clause = ( - f' ON CLUSTER "{cluster_name}" ' - if cluster_name is not None and cluster_name.strip() != '' - else '' - ) - self.command( - f'CREATE DATABASE IF NOT EXISTS {quote_identifier(self.database)}{cluster_clause}{engine_clause}' - ) + cluster_clause = f' ON CLUSTER {cluster_name} ' if cluster_name is not None else '' + self.command(f'CREATE DATABASE {self.database}{cluster_clause}{engine_clause}') db_exists = self.command(check_db) if not db_exists: raise FailedToConnectError( @@ -213,7 +162,7 @@ def _check_atomic_exchange(self) -> bool: table_id = str(uuid.uuid1()).replace('-', '') swap_tables = [f'__dbt_exchange_test_{x}_{table_id}' for x in range(0, 2)] for table in swap_tables: - self.command(create_cmd.format(quote_identifier(table))) + self.command(create_cmd.format(table)) try: self.command('EXCHANGE TABLES {} AND {}'.format(*swap_tables)) return True diff --git a/dbt/adapters/clickhouse/errors.py b/dbt/adapters/clickhouse/errors.py deleted file mode 100644 index bfcd5f95..00000000 --- a/dbt/adapters/clickhouse/errors.py +++ /dev/null @@ -1,45 +0,0 @@ -schema_change_fail_error = """ -The source and target schemas on this incremental model are out of sync. - They can be reconciled in several ways: - - set the `on_schema_change` config to `append_new_columns`. (ClickHouse does not support `sync_all_columns`) - - Re-run the incremental model with `full_refresh: True` to update the target schema. - - update the schema manually and re-run the process. - - Additional troubleshooting context: - Source columns not in target: {0} - Target columns not in source: {1} - New column types: {2} -""" - -schema_change_datatype_error = """ -The source and target schemas on this incremental model contain different data types. This is not supported. - - Changed column types: {0} -""" - -schema_change_missing_source_error = """ -The target schema in on this incremental model contains a column not in the source schema. This is not supported. - - Source columns not in target: {0} -""" - -lw_deletes_not_enabled_error = """ -Attempting to apply the configuration `use_lw_deletes` to enable the delete+insert incremental strategy, but -`light weight deletes` are either not available or not enabled on this ClickHouse server. -""" - -lw_deletes_not_enabled_warning = """ -`light weight deletes` are either not available or not enabled on this ClickHouse server. This prevents the use -of the delete+insert incremental strategy, which may negatively affect performance for incremental models. -""" - -nd_mutations_not_enabled_error = """ -Attempting to apply the configuration `use_lw_deletes` to enable the delete+insert incremental strategy, but -the required `allow_nondeterministic_mutations` is not enabled and is `read_only` for this user -""" - -nd_mutations_not_enabled_warning = """ -The setting `allow_nondeterministic_mutations` is not enabled and is `read_only` for this user` This prevents the use -of `light weight deletes` and therefore the delete+insert incremental strategy. This may negatively affect performance -for incremental models -""" diff --git a/dbt/adapters/clickhouse/httpclient.py b/dbt/adapters/clickhouse/httpclient.py index 17795e44..6a1991e9 100644 --- a/dbt/adapters/clickhouse/httpclient.py +++ b/dbt/adapters/clickhouse/httpclient.py @@ -1,11 +1,8 @@ -from typing import List - import clickhouse_connect from clickhouse_connect.driver.exceptions import DatabaseError, OperationalError from dbt.exceptions import DbtDatabaseError from dbt.version import __version__ as dbt_version -from dbt.adapters.clickhouse import ClickHouseColumn from dbt.adapters.clickhouse.__version__ import version as dbt_clickhouse_version from dbt.adapters.clickhouse.dbclient import ChClientWrapper, ChRetryableException @@ -23,19 +20,9 @@ def command(self, sql, **kwargs): except DatabaseError as ex: raise DbtDatabaseError(str(ex).strip()) from ex - def columns_in_query(self, sql: str, **kwargs) -> List[ClickHouseColumn]: - try: - query_result = self._client.query(f"SELECT * FROM ({sql}) LIMIT 0", **kwargs) - return [ - ClickHouseColumn.create(name, ch_type.name) - for name, ch_type in zip(query_result.column_names, query_result.column_types) - ] - except DatabaseError as ex: - raise DbtDatabaseError(str(ex).strip()) from ex - def get_ch_setting(self, setting_name): setting = self._client.server_settings.get(setting_name) - return (setting.value, setting.readonly) if setting else (None, 0) + return setting.value if setting else None def database_dropped(self, database: str): # This is necessary for the http client to avoid exceptions when ClickHouse doesn't recognize the database diff --git a/dbt/adapters/clickhouse/impl.py b/dbt/adapters/clickhouse/impl.py index f5b9b0cf..f482ddb4 100644 --- a/dbt/adapters/clickhouse/impl.py +++ b/dbt/adapters/clickhouse/impl.py @@ -1,35 +1,25 @@ import csv import io +from concurrent.futures import Future from dataclasses import dataclass -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union +from typing import Callable, Dict, List, Optional, Set, Union import agate +import dbt.exceptions from dbt.adapters.base import AdapterConfig, available -from dbt.adapters.base.impl import BaseAdapter, ConstraintSupport +from dbt.adapters.base.impl import catch_as_completed from dbt.adapters.base.relation import BaseRelation, InformationSchema -from dbt.adapters.capability import Capability, CapabilityDict, CapabilitySupport, Support from dbt.adapters.sql import SQLAdapter +from dbt.clients.agate_helper import table_from_rows from dbt.contracts.graph.manifest import Manifest -from dbt.contracts.graph.nodes import ConstraintType, ModelLevelConstraint -from dbt.contracts.relation import Path, RelationType -from dbt.events.functions import warn_or_error -from dbt.events.types import ConstraintNotSupported +from dbt.contracts.relation import RelationType from dbt.exceptions import DbtInternalError, DbtRuntimeError, NotImplementedError -from dbt.utils import filter_null_values +from dbt.utils import executor, filter_null_values -import dbt -from dbt.adapters.clickhouse.cache import ClickHouseRelationsCache from dbt.adapters.clickhouse.column import ClickHouseColumn from dbt.adapters.clickhouse.connections import ClickHouseConnectionManager -from dbt.adapters.clickhouse.errors import ( - schema_change_datatype_error, - schema_change_fail_error, - schema_change_missing_source_error, -) from dbt.adapters.clickhouse.logger import logger -from dbt.adapters.clickhouse.query import quote_identifier from dbt.adapters.clickhouse.relation import ClickHouseRelation -from dbt.adapters.clickhouse.util import NewColumnDataType, compare_versions GET_CATALOG_MACRO_NAME = 'get_catalog' LIST_SCHEMAS_MACRO_NAME = 'list_schemas' @@ -40,7 +30,6 @@ class ClickHouseConfig(AdapterConfig): engine: str = 'MergeTree()' order_by: Optional[Union[List[str], str]] = 'tuple()' partition_by: Optional[Union[List[str], str]] = None - sharding_key: Optional[Union[List[str], str]] = 'rand()' class ClickHouseAdapter(SQLAdapter): @@ -49,25 +38,6 @@ class ClickHouseAdapter(SQLAdapter): ConnectionManager = ClickHouseConnectionManager AdapterSpecificConfigs = ClickHouseConfig - CONSTRAINT_SUPPORT = { - ConstraintType.check: ConstraintSupport.ENFORCED, - ConstraintType.not_null: ConstraintSupport.NOT_SUPPORTED, - ConstraintType.unique: ConstraintSupport.NOT_SUPPORTED, - ConstraintType.primary_key: ConstraintSupport.NOT_SUPPORTED, - ConstraintType.foreign_key: ConstraintSupport.NOT_SUPPORTED, - } - - _capabilities: CapabilityDict = CapabilityDict( - { - Capability.SchemaMetadataByRelations: CapabilitySupport(support=Support.Unsupported), - Capability.TableLastModifiedMetadata: CapabilitySupport(support=Support.Unsupported), - } - ) - - def __init__(self, config): - BaseAdapter.__init__(self, config) - self.cache = ClickHouseRelationsCache() - @classmethod def date_function(cls): return 'now()' @@ -107,11 +77,8 @@ def get_clickhouse_cluster_name(self): @available.parse(lambda *a, **k: {}) def get_clickhouse_local_suffix(self): conn = self.connections.get_if_exists() - suffix = conn.credentials.local_suffix - if suffix: - if suffix.startswith('_'): - return f'{suffix}' - return f'_{suffix}' + if conn.credentials.local_suffix: + return f'{conn.credentials.local_suffix}' @available def clickhouse_db_engine_clause(self): @@ -140,13 +107,6 @@ def can_exchange(self, schema: str, rel_type: str) -> bool: ch_db = self.get_ch_database(schema) return ch_db and ch_db.engine in ('Atomic', 'Replicated') - @available.parse_none - def should_on_cluster(self, materialized: str = '', engine: str = '') -> bool: - conn = self.connections.get_if_exists() - if conn and conn.credentials.cluster: - return ClickHouseRelation.get_on_cluster(conn.credentials.cluster, materialized, engine) - return ClickHouseRelation.get_on_cluster('', materialized, engine) - @available.parse_none def calculate_incremental_strategy(self, strategy: str) -> str: conn = self.connections.get_if_exists() @@ -164,39 +124,6 @@ def calculate_incremental_strategy(self, strategy: str) -> str: strategy = 'legacy' return strategy - @available.parse_none - def check_incremental_schema_changes( - self, on_schema_change, existing, target_sql - ) -> List[ClickHouseColumn]: - if on_schema_change not in ('fail', 'ignore', 'append_new_columns'): - raise DbtRuntimeError( - "Only `fail`, `ignore`, and `append_new_columns` supported for `on_schema_change`" - ) - source = self.get_columns_in_relation(existing) - source_map = {column.name: column for column in source} - target = self.get_column_schema_from_query(target_sql) - target_map = {column.name: column for column in source} - source_not_in_target = [column for column in source if column.name not in target_map.keys()] - target_not_in_source = [column for column in target if column.name not in source_map.keys()] - new_column_data_types = [] - for target_column in target: - source_column = source_map.get(target_column.name) - if source_column and source_column.dtype != target_column.dtype: - new_column_data_types.append( - NewColumnDataType(source_column.name, target_column.dtype) - ) - if new_column_data_types: - raise DbtRuntimeError(schema_change_datatype_error.format(new_column_data_types)) - if source_not_in_target: - raise DbtRuntimeError(schema_change_missing_source_error.format(source_not_in_target)) - if target_not_in_source and on_schema_change == 'fail': - raise DbtRuntimeError( - schema_change_fail_error.format( - source_not_in_target, target_not_in_source, new_column_data_types - ) - ) - return target_not_in_source - @available.parse_none def s3source_clause( self, @@ -225,12 +152,13 @@ def s3source_clause( fmt = fmt or s3config.get('fmt') bucket = bucket or s3config.get('bucket', '') path = path or s3config.get('path', '') - url = bucket.replace('https://', '') + url = bucket if path: if bucket and path and not bucket.endswith('/') and not bucket.startswith('/'): path = f'/{path}' url = f'{url}{path}'.replace('//', '/') - url = f'https://{url}' + if not url.startswith('http'): + url = f'https://{url}' access = '' if aws_access_key_id and not aws_secret_access_key: raise DbtRuntimeError('S3 aws_access_key_id specified without aws_secret_access_key') @@ -270,28 +198,26 @@ def list_relations_without_caching( relations = [] for row in results: - name, schema, type_info, db_engine, on_cluster = row + name, schema, type_info, db_engine = row rel_type = RelationType.View if 'view' in type_info else RelationType.Table can_exchange = ( conn_supports_exchange and rel_type == RelationType.Table and db_engine in ('Atomic', 'Replicated') ) - relation = self.Relation.create( - database='', + database=None, schema=schema, identifier=name, type=rel_type, can_exchange=can_exchange, - can_on_cluster=(on_cluster >= 1), ) relations.append(relation) return relations def get_relation(self, database: Optional[str], schema: str, identifier: str): - return super().get_relation('', schema, identifier) + return super().get_relation(None, schema, identifier) @available.parse_none def get_ch_database(self, schema: str): @@ -303,29 +229,47 @@ def get_ch_database(self, schema: str): except DbtRuntimeError: return None - def get_catalog(self, manifest) -> Tuple[agate.Table, List[Exception]]: - relations = self._get_catalog_relations(manifest) - schemas = set(relation.schema for relation in relations) - if schemas: - catalog = self._get_one_catalog(InformationSchema(Path()), schemas, manifest) - else: - catalog = dbt.clients.agate_helper.empty_table() - return catalog, [] - - def get_filtered_catalog( - self, manifest: Manifest, relations: Optional[Set[BaseRelation]] = None - ): - catalog, exceptions = self.get_catalog(manifest) - if relations and catalog: - relation_map = {(r.schema, r.identifier) for r in relations} + def get_catalog(self, manifest): + schema_map = self._get_catalog_schemas(manifest) + + with executor(self.config) as tpe: + futures: List[Future[agate.Table]] = [] + for info, schemas in schema_map.items(): + for schema in schemas: + futures.append( + tpe.submit_connected( + self, + schema, + self._get_one_catalog, + info, + [schema], + manifest, + ) + ) + catalogs, exceptions = catch_as_completed(futures) + return catalogs, exceptions + + def _get_one_catalog( + self, + information_schema: InformationSchema, + schemas: Set[str], + manifest: Manifest, + ) -> agate.Table: + if len(schemas) != 1: + dbt.exceptions.raise_compiler_error( + f'Expected only one schema in clickhouse _get_one_catalog, found ' f'{schemas}' + ) - def in_map(row: agate.Row): - s = _expect_row_value("table_schema", row) - i = _expect_row_value("table_name", row) - return (s, i) in relation_map + return super()._get_one_catalog(information_schema, schemas, manifest) - catalog = catalog.where(in_map) - return catalog, exceptions + @classmethod + def _catalog_filter_table(cls, table: agate.Table, manifest: Manifest) -> agate.Table: + table = table_from_rows( + table.rows, + table.column_names, + text_only_columns=['table_schema', 'table_name'], + ) + return table.where(_catalog_filter_schemas(manifest)) def get_rows_different_sql( self, @@ -406,49 +350,12 @@ def run_sql_for_tests(self, sql, fetch, conn): @available def get_model_settings(self, model): - settings = model['config'].get('settings', {}) - conn = self.connections.get_if_exists() - conn.handle.update_model_settings(settings) + settings = model['config'].get('settings', dict()) res = [] for key in settings: res.append(f' {key}={settings[key]}') return '' if len(res) == 0 else 'SETTINGS ' + ', '.join(res) + '\n' - @available - def get_model_query_settings(self, model): - settings = model['config'].get('query_settings', {}) - res = [] - for key in settings: - res.append(f' {key}={settings[key]}') - return '' if len(res) == 0 else 'SETTINGS ' + ', '.join(res) + '\n' - - @available.parse_none - def get_column_schema_from_query(self, sql: str, *_) -> List[ClickHouseColumn]: - """Get a list of the Columns with names and data types from the given sql.""" - conn = self.connections.get_if_exists() - return conn.handle.columns_in_query(sql) - - @available.parse_none - def format_columns(self, columns) -> List[Dict]: - return [{'name': column.name, 'data_type': column.dtype} for column in columns] - - @classmethod - def render_raw_columns_constraints(cls, raw_columns: Dict[str, Dict[str, Any]]) -> List: - rendered_columns = [] - for v in raw_columns.values(): - rendered_columns.append(f"{quote_identifier(v['name'])} {v['data_type']}") - if v.get("constraints"): - warn_or_error(ConstraintNotSupported(constraint='column', adapter='clickhouse')) - return rendered_columns - - @classmethod - def render_model_constraint(cls, constraint: ModelLevelConstraint) -> Optional[str]: - if constraint.type == ConstraintType.check and constraint.expression: - if not constraint.name: - raise DbtRuntimeError("CHECK Constraint 'name' is required") - return f"CONSTRAINT {constraint.name} CHECK ({constraint.expression})" - return None - @dataclass class ClickHouseDatabase: @@ -477,6 +384,18 @@ def test(row: agate.Row) -> bool: return test +def compare_versions(v1: str, v2: str) -> int: + v1_parts = v1.split('.') + v2_parts = v2.split('.') + for part1, part2 in zip(v1_parts, v2_parts): + try: + if int(part1) != int(part2): + return 1 if int(part1) > int(part2) else -1 + except ValueError: + raise DbtRuntimeError("Version must consist of only numbers separated by '.'") + return 0 + + COLUMNS_EQUAL_SQL = ''' SELECT row_count_diff.difference as row_count_difference, diff --git a/dbt/adapters/clickhouse/nativeclient.py b/dbt/adapters/clickhouse/nativeclient.py index aaec97f9..0f400a09 100644 --- a/dbt/adapters/clickhouse/nativeclient.py +++ b/dbt/adapters/clickhouse/nativeclient.py @@ -1,12 +1,10 @@ -from typing import List - import clickhouse_driver import pkg_resources from clickhouse_driver.errors import NetworkError, SocketTimeoutError from dbt.exceptions import DbtDatabaseError from dbt.version import __version__ as dbt_version -from dbt.adapters.clickhouse import ClickHouseColumn, ClickHouseCredentials +from dbt.adapters.clickhouse import ClickHouseCredentials from dbt.adapters.clickhouse.__version__ import version as dbt_clickhouse_version from dbt.adapters.clickhouse.dbclient import ChClientWrapper, ChRetryableException from dbt.adapters.clickhouse.logger import logger @@ -32,24 +30,15 @@ def command(self, sql, **kwargs): except clickhouse_driver.errors.Error as ex: raise DbtDatabaseError(str(ex).strip()) from ex - def columns_in_query(self, sql: str, **kwargs) -> List[ClickHouseColumn]: - try: - _, columns = self._client.execute( - f"SELECT * FROM ({sql}) LIMIT 0", with_column_types=True - ) - return [ClickHouseColumn.create(column[0], column[1]) for column in columns] - except clickhouse_driver.errors.Error as ex: - raise DbtDatabaseError(str(ex).strip()) from ex - def get_ch_setting(self, setting_name): try: result = self._client.execute( - f"SELECT value, readonly FROM system.settings WHERE name = '{setting_name}'" + f"SELECT value FROM system.settings WHERE name = '{setting_name}'" ) except clickhouse_driver.errors.Error as ex: logger.warn('Unexpected error retrieving ClickHouse server setting', ex) return None - return (result[0][0], result[0][1]) if result else (None, 0) + return result[0][0] if result else None def close(self): self._client.disconnect() diff --git a/dbt/adapters/clickhouse/query.py b/dbt/adapters/clickhouse/query.py deleted file mode 100644 index f69222e0..00000000 --- a/dbt/adapters/clickhouse/query.py +++ /dev/null @@ -1,14 +0,0 @@ -BS = '\\' -must_escape = (BS, '\'', '`') - - -def quote_identifier(identifier: str): - first_char = identifier[0] - if first_char in ('`', '"') and identifier[-1] == first_char: - # Identifier is already quoted, assume that it's valid - return identifier - return f'`{escape_str(identifier)}`' - - -def escape_str(value: str): - return ''.join(f'{BS}{c}' if c in must_escape else c for c in value) diff --git a/dbt/adapters/clickhouse/relation.py b/dbt/adapters/clickhouse/relation.py index cc2865f4..f7a044e5 100644 --- a/dbt/adapters/clickhouse/relation.py +++ b/dbt/adapters/clickhouse/relation.py @@ -1,20 +1,17 @@ from dataclasses import dataclass, field -from typing import Any, Dict, Optional, Type +from typing import Any, Optional, Type from dbt.adapters.base.relation import BaseRelation, Policy, Self -from dbt.contracts.graph.nodes import ManifestNode, SourceDefinition -from dbt.contracts.relation import HasQuoting, Path, RelationType +from dbt.contracts.graph.nodes import SourceDefinition from dbt.exceptions import DbtRuntimeError -from dbt.utils import deep_merge, merge - -from dbt.adapters.clickhouse.query import quote_identifier +from dbt.utils import deep_merge @dataclass class ClickHouseQuotePolicy(Policy): - database: bool = True - schema: bool = True - identifier: bool = True + database: bool = False + schema: bool = False + identifier: bool = False @dataclass @@ -28,26 +25,24 @@ class ClickHouseIncludePolicy(Policy): class ClickHouseRelation(BaseRelation): quote_policy: Policy = field(default_factory=lambda: ClickHouseQuotePolicy()) include_policy: Policy = field(default_factory=lambda: ClickHouseIncludePolicy()) - quote_character: str = '`' + quote_character: str = '' can_exchange: bool = False - can_on_cluster: bool = False def __post_init__(self): if self.database != self.schema and self.database: raise DbtRuntimeError(f'Cannot set database {self.database} in clickhouse!') - self.path.database = '' - - def render(self) -> str: - return ".".join(quote_identifier(part) for _, part in self._render_iterator() if part) - def derivative(self, suffix: str, relation_type: Optional[str] = None) -> BaseRelation: - path = Path(schema=self.path.schema, database='', identifier=self.path.identifier + suffix) - derivative_type = RelationType[relation_type] if relation_type else self.type - return ClickHouseRelation(type=derivative_type, path=path) + def render(self): + if self.include_policy.database and self.include_policy.schema: + raise DbtRuntimeError( + 'Got a clickhouse relation with schema and database set to ' + 'include, but only one can be set' + ) + return super().render() def matches( self, - database: Optional[str] = '', + database: Optional[str] = None, schema: Optional[str] = None, identifier: Optional[str] = None, ): @@ -55,23 +50,6 @@ def matches( raise DbtRuntimeError(f'Passed unexpected schema value {schema} to Relation.matches') return self.database == database and self.identifier == identifier - @property - def should_on_cluster(self) -> bool: - if self.include_policy.identifier: - return self.can_on_cluster - else: - # create database/schema on cluster by default - return True - - @classmethod - def get_on_cluster( - cls: Type[Self], cluster: str = '', materialized: str = '', engine: str = '' - ) -> bool: - if cluster.strip(): - return 'view' == materialized or 'distributed' in materialized or 'Replicated' in engine - else: - return False - @classmethod def create_from_source(cls: Type[Self], source: SourceDefinition, **kwargs: Any) -> Self: source_quoting = source.quoting.to_dict(omit_none=True) @@ -89,36 +67,9 @@ def create_from_source(cls: Type[Self], source: SourceDefinition, **kwargs: Any) schema = source.database return cls.create( - database='', + database=source.database, schema=schema, identifier=source.identifier, quote_policy=quote_policy, **kwargs, ) - - @classmethod - def create_from_node( - cls: Type[Self], - config: HasQuoting, - node: ManifestNode, - quote_policy: Optional[Dict[str, bool]] = None, - **kwargs: Any, - ) -> Self: - if quote_policy is None: - quote_policy = {} - - quote_policy = merge(config.quoting, quote_policy) - - cluster = config.credentials.cluster if config.credentials.cluster else '' - materialized = node.get_materialization() if node.get_materialization() else '' - engine = node.config.get('engine') if node.config.get('engine') else '' - can_on_cluster = cls.get_on_cluster(cluster, materialized, engine) - - return cls.create( - database='', - schema=node.schema, - identifier=node.alias, - quote_policy=quote_policy, - can_on_cluster=can_on_cluster, - **kwargs, - ) diff --git a/dbt/adapters/clickhouse/util.py b/dbt/adapters/clickhouse/util.py deleted file mode 100644 index 7114dbde..00000000 --- a/dbt/adapters/clickhouse/util.py +++ /dev/null @@ -1,21 +0,0 @@ -from dataclasses import dataclass - -from dbt.exceptions import DbtRuntimeError - - -def compare_versions(v1: str, v2: str) -> int: - v1_parts = v1.split('.') - v2_parts = v2.split('.') - for part1, part2 in zip(v1_parts, v2_parts): - try: - if int(part1) != int(part2): - return 1 if int(part1) > int(part2) else -1 - except ValueError: - raise DbtRuntimeError("Version must consist of only numbers separated by '.'") - return 0 - - -@dataclass -class NewColumnDataType: - column_name: str - new_type: str diff --git a/dbt/include/clickhouse/macros/adapters.sql b/dbt/include/clickhouse/macros/adapters.sql index 718c775a..8ef23409 100644 --- a/dbt/include/clickhouse/macros/adapters.sql +++ b/dbt/include/clickhouse/macros/adapters.sql @@ -1,3 +1,14 @@ +{% macro clickhouse__create_view_as(relation, sql) -%} + {%- set sql_header = config.get('sql_header', none) -%} + + {{ sql_header if sql_header is not none }} + + create view {{ relation.include(database=False) }} {{ on_cluster_clause()}} + as ( + {{ sql }} + ) +{%- endmacro %} + {% macro clickhouse__list_schemas(database) %} {% call statement('list_schemas', fetch_result=True, auto_begin=False) %} select name from system.databases @@ -8,14 +19,14 @@ {% macro clickhouse__create_schema(relation) -%} {%- call statement('create_schema') -%} create database if not exists {{ relation.without_identifier().include(database=False) }} - {{ on_cluster_clause(relation)}} + {{ on_cluster_clause()}} {{ adapter.clickhouse_db_engine_clause() }} {% endcall %} {% endmacro %} {% macro clickhouse__drop_schema(relation) -%} {%- call statement('drop_schema') -%} - drop database if exists {{ relation.without_identifier().include(database=False) }} {{ on_cluster_clause(relation)}} + drop database if exists {{ relation.without_identifier().include(database=False) }} {{ on_cluster_clause()}} {%- endcall -%} {% endmacro %} @@ -25,19 +36,9 @@ t.name as name, t.database as schema, if(engine not in ('MaterializedView', 'View'), 'table', 'view') as type, - db.engine as db_engine, - {%- if adapter.get_clickhouse_cluster_name() -%} - count(distinct _shard_num) > 1 as is_on_cluster - from clusterAllReplicas({{ adapter.get_clickhouse_cluster_name() }}, system.tables) as t - join system.databases as db on t.database = db.name - where schema = '{{ schema_relation.schema }}' - group by name, schema, type, db_engine - {%- else -%} - 0 as is_on_cluster - from system.tables as t join system.databases as db on t.database = db.name - where schema = '{{ schema_relation.schema }}' - {% endif %} - + db.engine as db_engine + from system.tables as t JOIN system.databases as db on t.database = db.name + where schema = '{{ schema_relation.schema }}' {% endcall %} {{ return(load_result('list_relations_without_caching').table) }} {% endmacro %} @@ -55,23 +56,22 @@ {% macro clickhouse__drop_relation(relation, obj_type='table') -%} {% call statement('drop_relation', auto_begin=False) -%} - {# drop relation on cluster by default if cluster is set #} - drop {{ obj_type }} if exists {{ relation }} {{ on_cluster_clause(relation.without_identifier(), True)}} + drop {{ obj_type }} if exists {{ relation }} {{ on_cluster_clause()}} {%- endcall %} {% endmacro %} {% macro clickhouse__rename_relation(from_relation, to_relation, obj_type='table') -%} {% call statement('drop_relation') %} - drop {{ obj_type }} if exists {{ to_relation }} {{ on_cluster_clause(to_relation.without_identifier())}} + drop {{ obj_type }} if exists {{ to_relation }} {{ on_cluster_clause()}} {% endcall %} {% call statement('rename_relation') %} - rename {{ obj_type }} {{ from_relation }} to {{ to_relation }} {{ on_cluster_clause(from_relation)}} + rename {{ obj_type }} {{ from_relation }} to {{ to_relation }} {{ on_cluster_clause()}} {% endcall %} {% endmacro %} {% macro clickhouse__truncate_relation(relation) -%} {% call statement('truncate_relation') -%} - truncate table {{ relation }} {{ on_cluster_clause(relation)}} + truncate table {{ relation }} {{ on_cluster_clause()}} {%- endcall %} {% endmacro %} @@ -84,7 +84,7 @@ {% macro clickhouse__generate_database_name(custom_database_name=none, node=none) -%} - {% do return('') %} + {% do return(None) %} {%- endmacro %} {% macro clickhouse__get_columns_in_query(select_sql) %} @@ -100,18 +100,17 @@ {% macro clickhouse__alter_column_type(relation, column_name, new_column_type) -%} {% call statement('alter_column_type') %} - alter table {{ relation }} {{ on_cluster_clause(relation)}} modify column {{ adapter.quote(column_name) }} {{ new_column_type }} + alter table {{ relation }} {{ on_cluster_clause()}} modify column {{ adapter.quote(column_name) }} {{ new_column_type }} {% endcall %} {% endmacro %} {% macro exchange_tables_atomic(old_relation, target_relation, obj_types='TABLES') %} - {%- if adapter.get_clickhouse_cluster_name() is not none and obj_types == 'TABLES' and 'Replicated' in engine_clause() %} - {%- call statement('exchange_table_sync_replica') -%} - SYSTEM SYNC REPLICA {{ on_cluster_clause(target_relation) }} {{ target_relation.schema }}.{{ target_relation.identifier }} - {% endcall %} + {%- if adapter.get_clickhouse_cluster_name() is not none and obj_types == 'TABLES' %} + {% do run_query("SYSTEM SYNC REPLICA " + on_cluster_clause() + target_relation.schema + '.' + target_relation.identifier) %} {%- endif %} + {%- call statement('exchange_tables_atomic') -%} - EXCHANGE {{ obj_types }} {{ old_relation }} AND {{ target_relation }} {{ on_cluster_clause(target_relation)}} + EXCHANGE {{ obj_types }} {{ old_relation }} AND {{ target_relation }} {{ on_cluster_clause()}} {% endcall %} {% endmacro %} diff --git a/dbt/include/clickhouse/macros/adapters/apply_grants.sql b/dbt/include/clickhouse/macros/adapters/apply_grants.sql index 387b333b..cd9732d4 100644 --- a/dbt/include/clickhouse/macros/adapters/apply_grants.sql +++ b/dbt/include/clickhouse/macros/adapters/apply_grants.sql @@ -1,5 +1,5 @@ {% macro clickhouse__get_show_grant_sql(relation) %} - SELECT access_type as privilege_type, COALESCE(user_name, role_name) as grantee from system.grants where table = '{{ relation.name }}' + SELECT access_type as privilege_type, COALESCE(user_name, role_name) as grantee FROM system.grants WHERE table = '{{ relation.name }}' AND database = '{{ relation.schema }}' {%- endmacro %} @@ -13,9 +13,9 @@ {%- macro clickhouse__get_grant_sql(relation, privilege, grantees) -%} - grant {{ on_cluster_clause(relation)}} {{ privilege }} on {{ relation }} to {{ grantees | join(', ') }} + grant {{ on_cluster_clause()}} {{ privilege }} on {{ relation }} to {{ grantees | join(', ') }} {%- endmacro -%} {%- macro clickhouse__get_revoke_sql(relation, privilege, grantees) -%} - revoke {{ on_cluster_clause(relation)}} {{ privilege }} on {{ relation }} from {{ grantees | join(', ') }} + revoke {{ on_cluster_clause()}} {{ privilege }} on {{ relation }} from {{ grantees | join(', ') }} {%- endmacro -%} diff --git a/dbt/include/clickhouse/macros/adapters/relation.sql b/dbt/include/clickhouse/macros/adapters/relation.sql index 59ce37ab..d6ec3d0f 100644 --- a/dbt/include/clickhouse/macros/adapters/relation.sql +++ b/dbt/include/clickhouse/macros/adapters/relation.sql @@ -5,14 +5,12 @@ {% endif %} {%- set can_exchange = adapter.can_exchange(schema, type) %} - {%- set should_on_cluster = adapter.should_on_cluster(config.get('materialized'), engine_clause()) %} {%- set new_relation = api.Relation.create( database=None, schema=schema, identifier=identifier, type=type, - can_exchange=can_exchange, - can_on_cluster=should_on_cluster + can_exchange=can_exchange ) -%} {% do return([false, new_relation]) %} {% endmacro %} diff --git a/dbt/include/clickhouse/macros/catalog.sql b/dbt/include/clickhouse/macros/catalog.sql index b2c55999..16b3987e 100644 --- a/dbt/include/clickhouse/macros/catalog.sql +++ b/dbt/include/clickhouse/macros/catalog.sql @@ -1,7 +1,7 @@ {% macro clickhouse__get_catalog(information_schema, schemas) -%} {%- call statement('catalog', fetch_result=True) -%} select - '' as table_database, + null as table_database, columns.database as table_schema, columns.table as table_name, if(tables.engine not in ('MaterializedView', 'View'), 'table', 'view') as table_type, diff --git a/dbt/include/clickhouse/macros/column_spec_ddl.sql b/dbt/include/clickhouse/macros/column_spec_ddl.sql deleted file mode 100644 index 24194d91..00000000 --- a/dbt/include/clickhouse/macros/column_spec_ddl.sql +++ /dev/null @@ -1,40 +0,0 @@ -{% macro clickhouse__get_assert_columns_equivalent(sql) -%} - {%- set user_defined_columns = model['columns'] -%} - - {%- if not user_defined_columns -%} - {{ exceptions.raise_contract_error([], []) }} - {%- endif -%} - - {%- set yaml_columns = user_defined_columns.values() -%} - - {%- set sql_file_provided_columns = adapter.get_column_schema_from_query(sql) -%} - {%- set sql_columns = adapter.format_columns(sql_file_provided_columns) -%} - - {%- if sql_columns|length != yaml_columns|length -%} - {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%} - {%- endif -%} - - {%- if sql_columns|length != yaml_columns|length -%} - {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%} - {%- endif -%} - - {%- for sql_col in sql_columns -%} - {%- set yaml_col = [] -%} - {%- for this_col in yaml_columns -%} - {%- if this_col['name'] == sql_col['name'] -%} - {%- do yaml_col.append(this_col) -%} - {%- break -%} - {%- endif -%} - {%- endfor -%} - {%- if not yaml_col -%} - {#-- Column with name not found in yaml #} - {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%} - {%- endif -%} - {%- if sql_col['data_type'] != yaml_col[0]['data_type'] -%} - {#-- Column data types don't match #} - {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%} - {%- endif -%} - {%- endfor -%} - -{% endmacro %} - diff --git a/dbt/include/clickhouse/macros/materializations/distributed_table.sql b/dbt/include/clickhouse/macros/materializations/distributed_table.sql index 9f920ad9..44eeb740 100644 --- a/dbt/include/clickhouse/macros/materializations/distributed_table.sql +++ b/dbt/include/clickhouse/macros/materializations/distributed_table.sql @@ -1,21 +1,11 @@ {% materialization distributed_table, adapter='clickhouse' %} - {% set insert_distributed_sync = run_query("SELECT value FROM system.settings WHERE name = 'insert_distributed_sync'")[0][0] %} - {% if insert_distributed_sync != '1' %} - {% do exceptions.raise_compiler_error('To use distributed materialization setting insert_distributed_sync should be set to 1') %} - {% endif %} - {%- set local_suffix = adapter.get_clickhouse_local_suffix() -%} {%- set existing_relation = load_cached_relation(this) -%} {%- set target_relation = this.incorporate(type='table') -%} - {% set on_cluster = on_cluster_clause(target_relation) %} - {% if on_cluster.strip() == '' %} - {% do exceptions.raise_compiler_error('To use distributed materialization cluster setting in dbt profile must be set') %} - {% endif %} - - {% set existing_relation_local = existing_relation.incorporate(path={"identifier": this.identifier + local_suffix}) if existing_relation is not none else none %} - {% set target_relation_local = target_relation.incorporate(path={"identifier": this.identifier + local_suffix}) if target_relation is not none else none %} + {% set existing_relation_local = existing_relation.incorporate(path={"identifier": model['name'] + local_suffix}) if existing_relation is not none else none %} + {% set target_relation_local = target_relation.incorporate(path={"identifier": model['name'] + local_suffix}) if target_relation is not none else none %} {%- set backup_relation = none -%} {%- set preexisting_backup_relation = none -%} @@ -47,24 +37,23 @@ {{ run_hooks(pre_hooks, inside_transaction=True) }} {% if backup_relation is none %} - {{ create_distributed_local_table(target_relation, target_relation_local, view_relation) }} + {% do run_query(create_empty_table_from_relation(target_relation_local, view_relation)) or '' %} + {% do run_query(create_distributed_table(target_relation, target_relation_local)) or '' %} {% elif existing_relation.can_exchange %} -- We can do an atomic exchange, so no need for an intermediate {% call statement('main') -%} - {{ create_empty_table_from_relation(backup_relation, view_relation) }} + {% do run_query(create_empty_table_from_relation(backup_relation, view_relation)) or '' %} {%- endcall %} - {% do exchange_tables_atomic(backup_relation, existing_relation_local) %} + {% do exchange_tables_atomic(backup_relation, existing_relation) %} {% else %} {% do run_query(create_empty_table_from_relation(intermediate_relation, view_relation)) or '' %} {{ adapter.rename_relation(existing_relation_local, backup_relation) }} {{ adapter.rename_relation(intermediate_relation, target_relation_local) }} - {{ create_distributed_table(target_relation, target_relation_local) }} {% endif %} {% do run_query(clickhouse__insert_into(target_relation, sql)) or '' %} {{ drop_relation_if_exists(view_relation) }} -- cleanup {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} - {% do apply_grants(target_relation_local, grant_config, should_revoke=should_revoke) %} {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} {% do persist_docs(target_relation, model) %} @@ -77,21 +66,14 @@ {% endmaterialization %} {% macro create_distributed_table(relation, local_relation) %} - {%- set cluster = adapter.get_clickhouse_cluster_name() -%} - {% if cluster is none %} - {% do exceptions.raise_compiler_error('Cluster name should be defined for using distributed materializations, current is None') %} - {% endif %} - - {%- set cluster = cluster[1:-1] -%} + {%- set cluster = adapter.get_clickhouse_cluster_name()[1:-1] -%} {%- set sharding = config.get('sharding_key') -%} - create table {{ relation }} {{ on_cluster_clause(relation) }} as {{ local_relation }} + CREATE TABLE {{ relation }} {{ on_cluster_clause() }} AS {{ local_relation }} ENGINE = Distributed('{{ cluster}}', '{{ relation.schema }}', '{{ local_relation.name }}' - {%- if sharding is not none and sharding.strip() != '' -%} + {% if sharding is not none %} , {{ sharding }} - {%- else %} - , rand() - {% endif -%} + {% endif %} ) {% endmacro %} @@ -106,7 +88,7 @@ {{ sql_header if sql_header is not none }} create table {{ relation.include(database=False) }} - {{ on_cluster_clause(relation) }} ( + {{ on_cluster_clause() }} ( {{col_list | join(', ')}} ) @@ -116,13 +98,3 @@ {{ partition_cols(label="partition by") }} {{ adapter.get_model_settings(model) }} {%- endmacro %} - -{% macro create_distributed_local_table(distributed_relation, shard_relation, structure_relation, sql_query=none) -%} - {{ drop_relation_if_exists(shard_relation) }} - {{ drop_relation_if_exists(distributed_relation) }} - {% do run_query(create_empty_table_from_relation(shard_relation, structure_relation)) or '' %} - {% do run_query(create_distributed_table(distributed_relation, shard_relation)) or '' %} - {% if sql_query is not none %} - {% do run_query(clickhouse__insert_into(distributed_relation, sql_query)) or '' %} - {% endif %} -{%- endmacro %} diff --git a/dbt/include/clickhouse/macros/materializations/incremental/incremental.sql b/dbt/include/clickhouse/macros/materializations/incremental.sql similarity index 64% rename from dbt/include/clickhouse/macros/materializations/incremental/incremental.sql rename to dbt/include/clickhouse/macros/materializations/incremental.sql index 023b3beb..2687d13e 100644 --- a/dbt/include/clickhouse/macros/materializations/incremental/incremental.sql +++ b/dbt/include/clickhouse/macros/materializations/incremental.sql @@ -65,27 +65,24 @@ {% endcall %} {% else %} - {% set column_changes = none %} {% if config.get('distributed') %} {% do clickhouse__incremental_create_distributed(target_relation) %} {% endif %} {% set schema_changes = none %} {% set incremental_strategy = adapter.calculate_incremental_strategy(config.get('incremental_strategy')) %} {% set incremental_predicates = config.get('predicates', none) or config.get('incremental_predicates', none) %} - {%- if on_schema_change != 'ignore' %} - {%- set column_changes = adapter.check_incremental_schema_changes(on_schema_change, existing_relation, sql) -%} - {%- if column_changes %} - {%- if incremental_strategy in ('append', 'delete_insert') %} - {% set incremental_strategy = 'legacy' %} - {{ log('Schema changes detected, switching to legacy incremental strategy') }} - {%- endif %} + {% if on_schema_change != 'ignore' %} + {%- set schema_changes = check_for_schema_changes(existing_relation, target_relation) -%} + {% if schema_changes['schema_changed'] and incremental_strategy in ('append', 'delete_insert') %} + {% set incremental_strategy = 'legacy' %} + {% do log('Schema changes detected, switching to legacy incremental strategy') %} {% endif %} {% endif %} {% if incremental_strategy != 'delete_insert' and incremental_predicates %} {% do exceptions.raise_compiler_error('Cannot apply incremental predicates with ' + incremental_strategy + ' strategy.') %} {% endif %} {% if incremental_strategy == 'legacy' %} - {% do clickhouse__incremental_legacy(existing_relation, intermediate_relation, column_changes, unique_key) %} + {% do clickhouse__incremental_legacy(existing_relation, intermediate_relation, schema_changes, unique_key) %} {% set need_swap = true %} {% elif incremental_strategy == 'delete_insert' %} {% do clickhouse__incremental_delete_insert(existing_relation, unique_key, incremental_predicates) %} @@ -130,6 +127,7 @@ {%- endmaterialization %} + {% macro process_schema_changes(on_schema_change, source_relation, target_relation) %} {%- set schema_changes_dict = check_for_schema_changes(source_relation, target_relation) -%} @@ -153,144 +151,81 @@ {% endmacro %} -{% macro clickhouse__incremental_legacy(existing_relation, intermediate_relation, on_schema_change, unique_key, is_distributed=False) %} - -- First create a temporary table for all of the new data - {% set new_data_relation = existing_relation.incorporate(path={"identifier": existing_relation.identifier + '__dbt_new_data'}) %} - {{ drop_relation_if_exists(new_data_relation) }} - - {%- set inserted_relation = intermediate_relation -%} - {%- set inserting_relation = new_data_relation -%} +{% macro clickhouse__incremental_legacy(existing_relation, intermediate_relation, on_schema_change, unique_key) %} -- First create a temporary table for all of the new data - {% if is_distributed %} - {% if column_changes %} - {% do exceptions.raise_compiler_error('Schema changes not supported with Distributed tables ') %} - {% endif %} - -- Need to use distributed table to have data on all shards - {%- set distributed_new_data_relation = existing_relation.incorporate(path={"identifier": existing_relation.identifier + '__dbt_distributed_new_data'}) -%} - {%- set inserting_relation = distributed_new_data_relation -%} - {{ create_distributed_local_table(distributed_new_data_relation, new_data_relation, existing_relation, sql) }} - {% elif column_changes %} - {% call statement('create_new_data_temp') %} - {{ get_create_table_as_sql(False, new_data_relation, sql) }} - {% endcall %} - {% else %} - {% call statement('create_new_data_temp') %} + {% set new_data_relation = existing_relation.incorporate(path={"identifier": existing_relation.name + '__dbt_new_data'}) %} + {{ drop_relation_if_exists(new_data_relation) }} + {% call statement('create_new_data_temp') %} {{ get_create_table_as_sql(False, new_data_relation, sql) }} - {% endcall %} - {% endif %} + {% endcall %} -- Next create another temporary table that will eventually be used to replace the existing table. We can't -- use the table just created in the previous step because we don't want to override any updated rows with -- old rows when we insert the old data - {% if is_distributed %} - {%- set distributed_intermediate_relation = make_intermediate_relation(existing_relation) -%} - {%- set inserted_relation = distributed_intermediate_relation -%} - {{ create_distributed_local_table(distributed_intermediate_relation, intermediate_relation, existing_relation) }} - {% else %} - {% call statement('main') %} - create table {{ intermediate_relation }} - {% set active_cluster = adapter.get_clickhouse_cluster_name() %} - {%- if active_cluster is not none %} - ON CLUSTER {{ active_cluster }} - {% endif %} - as {{ new_data_relation }} - {% endcall %} - {% endif %} + {% call statement('main') %} + create table {{ intermediate_relation }} + {% set active_cluster = adapter.get_clickhouse_cluster_name() %} + {%- if active_cluster is not none %} + ON CLUSTER {{ active_cluster }} + {% endif %} + as {{ new_data_relation }} + {% endcall %} -- Insert all the existing rows into the new temporary table, ignoring any rows that have keys in the "new data" -- table. - {%- set source_columns = adapter.get_columns_in_relation(existing_relation) -%} - {%- set source_columns_csv = source_columns | map(attribute='quoted') | join(', ') -%} + {%- set dest_columns = adapter.get_columns_in_relation(existing_relation) -%} + {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} {% call statement('insert_existing_data') %} - insert into {{ inserted_relation }} ({{ source_columns_csv }}) - select {{ source_columns_csv }} + insert into {{ intermediate_relation }} ({{ dest_cols_csv }}) + select {{ dest_cols_csv }} from {{ existing_relation }} where ({{ unique_key }}) not in ( select {{ unique_key }} - from {{ inserting_relation }} + from {{ new_data_relation }} ) - {{ adapter.get_model_query_settings(model) }} + {{ adapter.get_model_settings(model) }} {% endcall %} -- Insert all of the new data into the temporary table - {% if column_changes %} - {%- set dest_columns = adapter.get_columns_in_relation(new_data_relation) -%} - {%- set dest_columns_csv = dest_columns | map(attribute='quoted') | join(', ') -%} - {% else %} - {%- set dest_columns_csv = source_columns_csv %} - {% endif %} {% call statement('insert_new_data') %} - insert into {{ inserted_relation }} ({{ dest_columns_csv }}) - select {{ dest_columns_csv }} - from {{ inserting_relation }} - {{ adapter.get_model_query_settings(model) }} + insert into {{ intermediate_relation }} ({{ dest_cols_csv }}) + select {{ dest_cols_csv }} + from {{ new_data_relation }} + {{ adapter.get_model_settings(model) }} {% endcall %} {% do adapter.drop_relation(new_data_relation) %} - {% if is_distributed %} - {{ drop_relation_if_exists(distributed_new_data_relation) }} - {{ drop_relation_if_exists(distributed_intermediate_relation) }} - {% endif %} - {% endmacro %} -{% macro clickhouse__incremental_delete_insert(existing_relation, unique_key, incremental_predicates, is_distributed=False) %} - {% set new_data_relation = existing_relation.incorporate(path={"identifier": existing_relation.identifier + +{% macro clickhouse__incremental_delete_insert(existing_relation, unique_key, incremental_predicates) %} + {% set new_data_relation = existing_relation.incorporate(path={"identifier": existing_relation.name + '__dbt_new_data_' + invocation_id.replace('-', '_')}) %} {{ drop_relation_if_exists(new_data_relation) }} - {%- set distributed_new_data_relation = existing_relation.incorporate(path={"identifier": existing_relation.identifier + '__dbt_distributed_new_data'}) -%} - - {%- set inserting_relation = new_data_relation -%} - - {% if is_distributed %} - -- Need to use distributed table to have data on all shards - {%- set inserting_relation = distributed_new_data_relation -%} - {{ create_distributed_local_table(distributed_new_data_relation, new_data_relation, existing_relation, sql) }} - {% else %} - {% call statement('main') %} + {% call statement('main') %} {{ get_create_table_as_sql(False, new_data_relation, sql) }} - {% endcall %} - {% endif %} - + {% endcall %} {% call statement('delete_existing_data') %} - {% if is_distributed %} - {%- set existing_local = existing_relation.derivative(adapter.get_clickhouse_local_suffix()) %} - delete from {{ existing_local }} {{ on_cluster_clause(existing_relation) }} where ({{ unique_key }}) in (select {{ unique_key }} - from {{ inserting_relation }}) - {% else %} - delete from {{ existing_relation }} {{ on_cluster_clause() }} where ({{ unique_key }}) in (select {{ unique_key }} - from {{ new_data_relation }}) - {% endif %} + delete from {{ existing_relation }} {{ on_cluster_clause() }} where ({{ unique_key }}) in (select {{ unique_key }} + from {{ new_data_relation }}) {%- if incremental_predicates %} {% for predicate in incremental_predicates %} and {{ predicate }} {% endfor %} -<<<<<<< HEAD:dbt/include/clickhouse/macros/materializations/incremental/incremental.sql - {%- endif -%} - {{ adapter.get_model_query_settings(model) }} -======= {%- endif %} SETTINGS mutations_sync = 2, allow_nondeterministic_mutations = 1 ->>>>>>> bd0556f (feat(incremental): distrubted append):dbt/include/clickhouse/macros/materializations/incremental.sql {% endcall %} {%- set dest_columns = adapter.get_columns_in_relation(existing_relation) -%} {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} {% call statement('insert_new_data') %} -<<<<<<< HEAD:dbt/include/clickhouse/macros/materializations/incremental/incremental.sql - insert into {{ existing_relation }} {{ adapter.get_model_query_settings(model) }} select {{ dest_cols_csv }} from {{ inserting_relation }} - {% endcall %} - {% do adapter.drop_relation(new_data_relation) %} - {{ drop_relation_if_exists(distributed_new_data_relation) }} -======= insert into {{ model['name'] }} select {{ dest_cols_csv}} from {{ new_data_relation }} SETTINGS mutations_sync = 2, insert_distributed_sync = 1 {% endcall %} {% do adapter.drop_relation(new_data_relation) %} {% call statement('optimize_table') %} optimize table {{ existing_relation }} {{ on_cluster_clause() }} FINAL DEDUPLICATE {% endcall %} ->>>>>>> bd0556f (feat(incremental): distrubted append):dbt/include/clickhouse/macros/materializations/incremental.sql {% endmacro %} {% macro clickhouse__incremental_create_distributed(relation) %} diff --git a/dbt/include/clickhouse/macros/materializations/incremental/distributed_incremental.sql b/dbt/include/clickhouse/macros/materializations/incremental/distributed_incremental.sql deleted file mode 100644 index 568ada36..00000000 --- a/dbt/include/clickhouse/macros/materializations/incremental/distributed_incremental.sql +++ /dev/null @@ -1,146 +0,0 @@ -{% materialization distributed_incremental, adapter='clickhouse' %} - {% set insert_distributed_sync = run_query("SELECT value FROM system.settings WHERE name = 'insert_distributed_sync'")[0][0] %} - {% if insert_distributed_sync != '1' %} - {% do exceptions.raise_compiler_error('To use distributed materialization setting insert_distributed_sync should be set to 1') %} - {% endif %} - - {%- set local_suffix = adapter.get_clickhouse_local_suffix() -%} - - {%- set existing_relation = load_cached_relation(this) -%} - {%- set target_relation = this.incorporate(type='table') -%} - - {% set on_cluster = on_cluster_clause(target_relation) %} - {% if on_cluster.strip() == '' %} - {% do exceptions.raise_compiler_error('To use distributed materializations cluster setting in dbt profile must be set') %} - {% endif %} - - {% set existing_relation_local = existing_relation.incorporate(path={"identifier": this.identifier + local_suffix}) if existing_relation is not none else none %} - {% set target_relation_local = target_relation.incorporate(path={"identifier": this.identifier + local_suffix}) if target_relation is not none else none %} - - {%- set unique_key = config.get('unique_key') -%} - {% if unique_key is not none and unique_key|length == 0 %} - {% set unique_key = none %} - {% endif %} - {% if unique_key is iterable and (unique_key is not string and unique_key is not mapping) %} - {% set unique_key = unique_key|join(', ') %} - {% endif %} - {%- set inserts_only = config.get('inserts_only') -%} - {%- set grant_config = config.get('grants') -%} - {%- set full_refresh_mode = (should_full_refresh() or existing_relation.is_view) -%} - {%- set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') -%} - - {%- set intermediate_relation = make_intermediate_relation(target_relation_local)-%} - {%- set distributed_intermediate_relation = make_intermediate_relation(target_relation)-%} - {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%} - {%- set backup_relation = make_backup_relation(target_relation_local, backup_relation_type) -%} - {%- set distributed_backup_relation = make_backup_relation(target_relation, backup_relation_type) -%} - {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation)-%} - {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%} - {%- set view_relation = default__make_temp_relation(target_relation, '__dbt_view_tmp') -%} - - {{ drop_relation_if_exists(preexisting_intermediate_relation) }} - {{ drop_relation_if_exists(preexisting_backup_relation) }} - {{ drop_relation_if_exists(view_relation) }} - {{ drop_relation_if_exists(distributed_intermediate_relation) }} - - {{ run_hooks(pre_hooks, inside_transaction=False) }} - {{ run_hooks(pre_hooks, inside_transaction=True) }} - {% set to_drop = [] %} - {% set schema_changes = none %} - - {% call statement('main') %} - {{ create_view_as(view_relation, sql) }} - {% endcall %} - - {% if existing_relation is none %} - -- No existing table, simply create a new one - {{ create_distributed_local_table(target_relation, target_relation_local, view_relation, sql) }} - - {% elif full_refresh_mode %} - -- Completely replacing the old table, so create a temporary table and then swap it - {{ create_distributed_local_table(distributed_intermediate_relation, intermediate_relation, view_relation, sql) }} - {% do adapter.drop_relation(distributed_intermediate_relation) or '' %} - {% set need_swap = true %} - - {% elif inserts_only or unique_key is none -%} - -- There are no updates/deletes or duplicate keys are allowed. Simply add all of the new rows to the existing - -- table. It is the user's responsibility to avoid duplicates. Note that "inserts_only" is a ClickHouse adapter - -- specific configurable that is used to avoid creating an expensive intermediate table. - {% call statement('main') %} - {{ clickhouse__insert_into(target_relation, sql) }} - {% endcall %} - - {% else %} - {% set incremental_strategy = adapter.calculate_incremental_strategy(config.get('incremental_strategy')) %} - {% set incremental_predicates = config.get('predicates', none) or config.get('incremental_predicates', none) %} - {% if on_schema_change != 'ignore' %} - {%- set schema_changes = check_for_schema_changes(existing_relation, target_relation) -%} - {% if schema_changes['schema_changed'] and incremental_strategy in ('append', 'delete_insert') %} - {% set incremental_strategy = 'legacy' %} - {% do log('Schema changes detected, switching to legacy incremental strategy') %} - {% endif %} - {% endif %} - {% if incremental_strategy != 'delete_insert' and incremental_predicates %} - {% do exceptions.raise_compiler_error('Cannot apply incremental predicates with ' + incremental_strategy + ' strategy.') %} - {% endif %} - {% if incremental_strategy == 'legacy' %} - {% do clickhouse__incremental_legacy(existing_relation, intermediate_relation, schema_changes, unique_key, True) %} - {% set need_swap = true %} - {% elif incremental_strategy == 'delete_insert' %} - {% do clickhouse__incremental_delete_insert(existing_relation, unique_key, incremental_predicates, True) %} - {% elif incremental_strategy == 'append' %} - {% call statement('main') %} - {{ clickhouse__insert_into(target_relation, sql) }} - {% endcall %} - {% endif %} - {% endif %} - - {% if need_swap %} - {% if False %} - {% do adapter.rename_relation(intermediate_relation, backup_relation) %} - {% do exchange_tables_atomic(backup_relation, target_relation_local) %} - {% else %} - {% do adapter.rename_relation(target_relation_local, backup_relation) %} - {% do adapter.rename_relation(intermediate_relation, target_relation_local) %} - {% endif %} - - -- Structure could have changed, need to update distributed table from replaced local table - {% set target_relation_new = target_relation.incorporate(path={"identifier": target_relation.identifier + '_temp'}) %} - {{ drop_relation_if_exists(target_relation_new) }} - {% do run_query(create_distributed_table(target_relation_new, target_relation_local)) %} - - {% if False %} - {% do adapter.rename_relation(target_relation_new, distributed_backup_relation) %} - {% do exchange_tables_atomic(distributed_backup_relation, target_relation) %} - {% else %} - {% do adapter.rename_relation(target_relation, distributed_backup_relation) %} - {% do adapter.rename_relation(target_relation_new, target_relation) %} - {% endif %} - - {% do to_drop.append(backup_relation) %} - {% do to_drop.append(distributed_backup_relation) %} - {% endif %} - - {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %} - {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} - {% do apply_grants(target_relation_local, grant_config, should_revoke=should_revoke) %} - - {% do persist_docs(target_relation, model) %} - - {% if existing_relation is none or existing_relation.is_view or should_full_refresh() %} - {% do create_indexes(target_relation) %} - {% endif %} - - {{ run_hooks(post_hooks, inside_transaction=True) }} - - {% do adapter.commit() %} - - {% for rel in to_drop %} - {% do adapter.drop_relation(rel) %} - {% endfor %} - - {{ run_hooks(post_hooks, inside_transaction=False) }} - - {{ return({'relations': [target_relation]}) }} - -{%- endmaterialization %} \ No newline at end of file diff --git a/dbt/include/clickhouse/macros/materializations/incremental/is_incremental.sql b/dbt/include/clickhouse/macros/materializations/incremental/is_incremental.sql deleted file mode 100644 index 552e0ac5..00000000 --- a/dbt/include/clickhouse/macros/materializations/incremental/is_incremental.sql +++ /dev/null @@ -1,13 +0,0 @@ - -{% macro is_incremental() %} - {#-- do not run introspective queries in parsing #} - {% if not execute %} - {{ return(False) }} - {% else %} - {% set relation = adapter.get_relation(this.database, this.schema, this.table) %} - {{ return(relation is not none - and relation.type == 'table' - and (model.config.materialized == 'incremental' or model.config.materialized == 'distributed_incremental' ) - and not should_full_refresh()) }} - {% endif %} -{% endmacro %} diff --git a/dbt/include/clickhouse/macros/materializations/materialized_view.sql b/dbt/include/clickhouse/macros/materializations/materialized_view.sql deleted file mode 100644 index 293cc41b..00000000 --- a/dbt/include/clickhouse/macros/materializations/materialized_view.sql +++ /dev/null @@ -1,119 +0,0 @@ -{#- - Create or update a materialized view in ClickHouse. - This involves creating both the materialized view itself and a - target table that the materialized view writes to. --#} -{%- materialization materialized_view, adapter='clickhouse' -%} - - {%- set target_relation = this.incorporate(type='table') -%} - {%- set mv_relation = target_relation.derivative('_mv', 'MaterializedView') -%} - {%- set cluster_clause = on_cluster_clause(target_relation) -%} - - {# look for an existing relation for the target table and create backup relations if necessary #} - {%- set existing_relation = load_cached_relation(this) -%} - {%- set backup_relation = none -%} - {%- set preexisting_backup_relation = none -%} - {%- set preexisting_intermediate_relation = none -%} - {% if existing_relation is not none %} - {%- set backup_relation_type = existing_relation.type -%} - {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%} - {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%} - {% if not existing_relation.can_exchange %} - {%- set intermediate_relation = make_intermediate_relation(target_relation) -%} - {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%} - {% endif %} - {% endif %} - - {% set grant_config = config.get('grants') %} - - {{ run_hooks(pre_hooks, inside_transaction=False) }} - - -- drop the temp relations if they exist already in the database - {{ drop_relation_if_exists(preexisting_intermediate_relation) }} - {{ drop_relation_if_exists(preexisting_backup_relation) }} - - -- `BEGIN` happens here: - {{ run_hooks(pre_hooks, inside_transaction=True) }} - - {% if backup_relation is none %} - {{ log('Creating new materialized view ' + target_relation.name )}} - {% call statement('main') -%} - {{ clickhouse__get_create_materialized_view_as_sql(target_relation, sql) }} - {%- endcall %} - {% elif existing_relation.can_exchange %} - {{ log('Replacing existing materialized view' + target_relation.name) }} - {% call statement('drop existing materialized view') %} - drop view if exists {{ mv_relation }} {{ cluster_clause }} - {% endcall %} - {% call statement('main') -%} - {{ get_create_table_as_sql(False, backup_relation, sql) }} - {%- endcall %} - {% do exchange_tables_atomic(backup_relation, existing_relation) %} - {% call statement('create new materialized view') %} - {{ clickhouse__create_mv_sql(mv_relation, existing_relation.name, cluster_clause, sql) }} - {% endcall %} - {% else %} - {{ log('Replacing existing materialized view' + target_relation.name) }} - {{ clickhouse__replace_mv(target_relation, existing_relation, intermediate_relation, backup_relation, sql) }} - {% endif %} - - -- cleanup - {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} - {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} - - {% do persist_docs(target_relation, model) %} - - {{ run_hooks(post_hooks, inside_transaction=True) }} - - {{ adapter.commit() }} - - {{ drop_relation_if_exists(backup_relation) }} - - {{ run_hooks(post_hooks, inside_transaction=False) }} - - {{ return({'relations': [target_relation, mv_relation]}) }} - -{%- endmaterialization -%} - - -{# - There are two steps to creating a materialized view: - 1. Create a new table based on the SQL in the model - 2. Create a materialized view using the SQL in the model that inserts - data into the table creating during step 1 -#} -{% macro clickhouse__get_create_materialized_view_as_sql(relation, sql) -%} - {% call statement('create_target_table') %} - {{ get_create_table_as_sql(False, relation, sql) }} - {% endcall %} - {%- set cluster_clause = on_cluster_clause(relation) -%} - {%- set mv_relation = relation.derivative('_mv', 'MaterializedView') -%} - {{ clickhouse__create_mv_sql(mv_relation, relation, cluster_clause, sql) }} -{%- endmacro %} - - -{% macro clickhouse__create_mv_sql(mv_relation, target_table, cluster_clause, sql) -%} - create materialized view if not exists {{ mv_relation }} {{ cluster_clause }} - to {{ target_table }} - as {{ sql }} -{%- endmacro %} - - -{% macro clickhouse__replace_mv(target_relation, existing_relation, intermediate_relation, backup_relation, sql) %} - {# drop existing materialized view while we recreate the target table #} - {%- set cluster_clause = on_cluster_clause(target_relation) -%} - {%- set mv_relation = target_relation.derivative('_mv', 'MaterializedView') -%} - {% call statement('drop existing mv') -%} - drop view if exists {{ mv_relation }} {{ cluster_clause }} - {%- endcall %} - - {# recreate the target table #} - {% call statement('main') -%} - {{ get_create_table_as_sql(False, intermediate_relation, sql) }} - {%- endcall %} - {{ adapter.rename_relation(existing_relation, backup_relation) }} - {{ adapter.rename_relation(intermediate_relation, target_relation) }} - - {# now that the target table is recreated, we can finally create our new view #} - {{ clickhouse__create_mv_sql(mv_relation, target_relation, cluster_clause, sql) }} -{% endmacro %} diff --git a/dbt/include/clickhouse/macros/materializations/seed.sql b/dbt/include/clickhouse/macros/materializations/seed.sql index f05a5ac4..c5f7a02b 100644 --- a/dbt/include/clickhouse/macros/materializations/seed.sql +++ b/dbt/include/clickhouse/macros/materializations/seed.sql @@ -4,7 +4,7 @@ {% set sql -%} insert into {{ this.render() }} ({{ cols_sql }}) - {{ adapter.get_model_query_settings(model) }} + {{ adapter.get_model_settings(model) }} format CSV {{ data_sql }} {%- endset %} @@ -17,7 +17,7 @@ {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} {% set sql %} - create table {{ this.render() }} {{ on_cluster_clause(this) }} ( + create table {{ this.render() }} {{ on_cluster_clause() }} ( {%- for col_name in agate_table.column_names -%} {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%} {%- set type = column_override.get(col_name, inferred_type) -%} diff --git a/dbt/include/clickhouse/macros/materializations/snapshot.sql b/dbt/include/clickhouse/macros/materializations/snapshot.sql index 71e5acc5..2a317736 100644 --- a/dbt/include/clickhouse/macros/materializations/snapshot.sql +++ b/dbt/include/clickhouse/macros/materializations/snapshot.sql @@ -25,7 +25,7 @@ {%- set insert_cols_csv = insert_cols | join(', ') -%} {%- set valid_to_col = adapter.quote('dbt_valid_to') -%} - {%- set upsert = target.derivative('__snapshot_upsert') -%} + {%- set upsert = target ~ '__snapshot_upsert' -%} {% call statement('create_upsert_relation') %} create table if not exists {{ upsert }} as {{ target }} {% endcall %} diff --git a/dbt/include/clickhouse/macros/materializations/table.sql b/dbt/include/clickhouse/macros/materializations/table.sql index 3cb95cff..4304ac55 100644 --- a/dbt/include/clickhouse/macros/materializations/table.sql +++ b/dbt/include/clickhouse/macros/materializations/table.sql @@ -121,67 +121,52 @@ {%- endif %} {%- endmacro -%} -{% macro on_cluster_clause(relation, force_sync) %} +{% macro on_cluster_clause(label) %} {% set active_cluster = adapter.get_clickhouse_cluster_name() %} - {%- if active_cluster is not none and relation.should_on_cluster %} + {%- if active_cluster is not none %} {# Add trailing whitespace to avoid problems when this clause is not last #} ON CLUSTER {{ active_cluster + ' ' }} - {%- if force_sync %} - SYNC - {%- endif %} {%- endif %} {%- endmacro -%} {% macro clickhouse__create_table_as(temporary, relation, sql) -%} - {% set has_contract = config.get('contract').enforced %} - {% set create_table = create_table_or_empty(temporary, relation, sql, has_contract) %} + {% set create_table = create_table_or_empty(temporary, relation, sql) %} {% if adapter.is_before_version('22.7.1.2484') -%} {{ create_table }} {%- else %} {% call statement('create_table_empty') %} {{ create_table }} {% endcall %} - {{ clickhouse__insert_into(relation, sql, has_contract) }} + {{ clickhouse__insert_into(relation.include(database=False), sql) }} {%- endif %} {%- endmacro %} -{% macro create_table_or_empty(temporary, relation, sql, has_contract) -%} +{% macro create_table_or_empty(temporary, relation, sql) -%} {%- set sql_header = config.get('sql_header', none) -%} {{ sql_header if sql_header is not none }} {% if temporary -%} - create temporary table {{ relation }} + create temporary table {{ relation.name }} engine Memory {{ order_cols(label="order by") }} {{ partition_cols(label="partition by") }} {{ adapter.get_model_settings(model) }} - as ( - {{ sql }} - ) {%- else %} - create table {{ relation }} - {{ on_cluster_clause(relation)}} - {%- if has_contract%} - {{ get_assert_columns_equivalent(sql) }} - {{ get_table_columns_and_constraints() }} - {%- endif %} + create table {{ relation.include(database=False) }} + {{ on_cluster_clause()}} {{ engine_clause() }} {{ order_cols(label="order by") }} {{ primary_key_clause(label="primary key") }} {{ partition_cols(label="partition by") }} {{ adapter.get_model_settings(model) }} - - {%- if not has_contract %} - {%- if not adapter.is_before_version('22.7.1.2484') %} + {% if not adapter.is_before_version('22.7.1.2484') -%} empty - {%- endif %} - as ( - {{ sql }} - ) {%- endif %} {%- endif %} - + as ( + {{ sql }} + ) {%- endmacro %} {% macro clickhouse__insert_into(target_relation, sql, override_name) %} @@ -189,13 +174,7 @@ {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} {%- set target_name = override_name or target_relation.name -%} - insert into {{ target_relation }} - ({{ dest_cols_csv }}) - {%- if has_contract -%} - -- Use a subquery to get columns in the right order - SELECT {{ dest_cols_csv }} FROM ( {{ sql }} ) - {%- else -%} - {{ sql }} - {{ adapter.get_model_query_settings(model) }} - {%- endif -%} + insert into {{ target_name }} ({{ dest_cols_csv }}) + {{ sql }} + SETTINGS mutations_sync = 2, insert_distributed_sync = 1 {%- endmacro %} diff --git a/dbt/include/clickhouse/macros/materializations/view.sql b/dbt/include/clickhouse/macros/materializations/view.sql index 735ec973..01ea6dcf 100644 --- a/dbt/include/clickhouse/macros/materializations/view.sql +++ b/dbt/include/clickhouse/macros/materializations/view.sql @@ -65,19 +65,3 @@ {{ return({'relations': [target_relation]}) }} {%- endmaterialization -%} - - -{% macro clickhouse__create_view_as(relation, sql) -%} - {%- set sql_header = config.get('sql_header', none) -%} - {{ sql_header if sql_header is not none }} - - create view {{ relation.include(database=False) }} {{ on_cluster_clause(relation)}} - {% set contract_config = config.get('contract') %} - {% if contract_config.enforced %} - {{ get_assert_columns_equivalent(sql) }} - {%- endif %} - as ( - {{ sql }} - ) -{%- endmacro %} - diff --git a/dbt/include/clickhouse/macros/persist_docs.sql b/dbt/include/clickhouse/macros/persist_docs.sql index 5e175fd1..5d9db873 100644 --- a/dbt/include/clickhouse/macros/persist_docs.sql +++ b/dbt/include/clickhouse/macros/persist_docs.sql @@ -1,13 +1,13 @@ {% macro one_alter_relation(relation, alter_comments) %} - alter table {{ relation }} {{ on_cluster_clause(relation) }} {{ alter_comments }} + alter table {{ relation }} {{ on_cluster_clause() }} {{ alter_comments }} {% endmacro %} {% macro one_alter_column_comment(relation, column_name, comment) %} - alter table {{ relation }} {{ on_cluster_clause(relation) }} comment column {{ column_name }} '{{ comment }}' + alter table {{ relation }} {{ on_cluster_clause() }} comment column {{ column_name }} '{{ comment }}' {% endmacro %} {% macro clickhouse__alter_relation_comment(relation, comment) %} - alter table {{ relation }} {{ on_cluster_clause(relation) }} modify comment '{{ comment }}' + alter table {{ relation }} {{ on_cluster_clause() }} modify comment '{{ comment }}' {% endmacro %} {% macro clickhouse__persist_docs(relation, model, for_relation, for_columns) %} diff --git a/dev_requirements.txt b/dev_requirements.txt index fcadbaaf..4f5a9403 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,16 +1,16 @@ -dbt-core~=1.7.3 -clickhouse-connect>=0.6.22 -clickhouse-driver>=0.2.6 +dbt-core~=1.4.1 +clickhouse-connect>=0.5.24 +clickhouse-driver>=0.2.3 pytest>=7.2.0 pytest-dotenv==0.5.2 -dbt-tests-adapter~=1.7.3 -black==23.11.0 +dbt-tests-adapter~=1.4.1 +black==22.3.0 isort==5.10.1 mypy==0.991 yamllint==1.26.3 flake8==4.0.1 types-requests==2.27.29 -agate~=1.7.1 +agate~=1.6.3 requests~=2.27.1 setuptools~=65.3.0 types-setuptools==67.1.0.0 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 68570715..34c3848d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.black] line-length = 100 skip-string-normalization = true -target-version = ['py310', 'py311'] +target-version = ['py38', 'py39'] exclude = '(\.eggs|\.git|\.mypy_cache|\.venv|venv|env|_build|build|build|dist|)' [tool.isort] diff --git a/setup.py b/setup.py index 7beb9ba9..1a2d2359 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def _dbt_clickhouse_version(): package_version = _dbt_clickhouse_version() description = '''The Clickhouse plugin for dbt (data build tool)''' -dbt_version = '1.7.0' +dbt_version = '1.4.0' dbt_minor = '.'.join(dbt_version.split('.')[0:2]) if not package_version.startswith(dbt_minor): @@ -55,10 +55,10 @@ def _dbt_clickhouse_version(): }, install_requires=[ f'dbt-core~={dbt_version}', - 'clickhouse-connect>=0.6.22', - 'clickhouse-driver>=0.2.6', + 'clickhouse-connect>=0.5.24', + 'clickhouse-driver>=0.2.3', ], - python_requires=">=3.8", + python_requires=">=3.7", platforms='any', classifiers=[ 'Development Status :: 5 - Production/Stable', @@ -66,10 +66,10 @@ def _dbt_clickhouse_version(): 'Operating System :: Microsoft :: Windows', 'Operating System :: MacOS :: MacOS X', 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: 3.12', ], ) diff --git a/tests/conftest.py b/tests/conftest.py index 89fc9395..a04b964b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,3 @@ -import os -import time - -os.environ['TZ'] = 'UTC' -time.tzset() - - # Import the standard integration fixtures as a plugin # Note: fixtures with session scope need to be local pytest_plugins = ["dbt.tests.fixtures.project"] diff --git a/tests/integration/adapter/aliases/test_aliases.py b/tests/integration/adapter/aliases/test_aliases.py deleted file mode 100644 index a9a3d585..00000000 --- a/tests/integration/adapter/aliases/test_aliases.py +++ /dev/null @@ -1,101 +0,0 @@ -import os - -import pytest -from dbt.tests.adapter.aliases.fixtures import ( - MODELS__ALIAS_IN_PROJECT_SQL, - MODELS__ALIAS_IN_PROJECT_WITH_OVERRIDE_SQL, - MODELS__SCHEMA_YML, -) -from dbt.tests.adapter.aliases.test_aliases import ( - BaseAliasErrors, - BaseAliases, - BaseSameAliasDifferentDatabases, - BaseSameAliasDifferentSchemas, -) -from dbt.tests.util import relation_from_name, run_dbt - -MODELS__DISTRIBUTED_FOO_ALIAS_SQL = """ - -{{ - config( - alias='foo', - materialized='distributed_table' - ) -}} - -select {{ string_literal(this.name) }} as tablename - -""" - -MODELS__DISTRIBUTED_REF_FOO_ALIAS_SQL = """ - -{{ - config( - materialized='distributed_table' - ) -}} - -with trigger_ref as ( - - -- we should still be able to ref a model by its filepath - select * from {{ ref('foo_alias') }} - -) - --- this name should still be the filename -select {{ string_literal(this.name) }} as tablename - -""" - - -class TestAliases(BaseAliases): - pass - - -class TestAliasErrors(BaseAliasErrors): - pass - - -class TestSameAliasDifferentSchemas(BaseSameAliasDifferentSchemas): - pass - - -class TestSameAliasDifferentDatabases(BaseSameAliasDifferentDatabases): - pass - - -class TestDistributedAliases(BaseAliases): - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": MODELS__SCHEMA_YML, - "foo_alias.sql": MODELS__DISTRIBUTED_FOO_ALIAS_SQL, - "alias_in_project.sql": MODELS__ALIAS_IN_PROJECT_SQL, - "alias_in_project_with_override.sql": MODELS__ALIAS_IN_PROJECT_WITH_OVERRIDE_SQL, - "ref_foo_alias.sql": MODELS__DISTRIBUTED_REF_FOO_ALIAS_SQL, - } - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_alias_model_name(self, project): - results = run_dbt(["run"]) - assert len(results) == 4 - - cluster = project.test_config['cluster'] - local_relation = relation_from_name(project.adapter, "foo_local") - - result = project.run_sql( - f"select max(tablename) AS tablename From clusterAllReplicas('{cluster}', {local_relation}) ", - fetch="one", - ) - assert result[0] == "foo" - - local_relation = relation_from_name(project.adapter, "ref_foo_alias_local") - result = project.run_sql( - f"select max(tablename) AS tablename From clusterAllReplicas('{cluster}', {local_relation}) ", - fetch="one", - ) - assert result[0] == "ref_foo_alias" - - run_dbt(["test"]) diff --git a/tests/integration/adapter/basic/test_adapter_methods.py b/tests/integration/adapter/basic/test_adapter_methods.py deleted file mode 100644 index 8a70c3c6..00000000 --- a/tests/integration/adapter/basic/test_adapter_methods.py +++ /dev/null @@ -1,9 +0,0 @@ -from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod - - -class TestBaseAdapterMethod(BaseAdapterMethod): - pass - - -class TestBaseCaching(BaseAdapterMethod): - pass diff --git a/tests/integration/adapter/basic/test_base.py b/tests/integration/adapter/basic/test_base.py deleted file mode 100644 index e5ef1a69..00000000 --- a/tests/integration/adapter/basic/test_base.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations - - -class TestBaseSimpleMaterializations(BaseSimpleMaterializations): - pass diff --git a/tests/integration/adapter/basic/test_basic.py b/tests/integration/adapter/basic/test_basic.py deleted file mode 100644 index 75936f0b..00000000 --- a/tests/integration/adapter/basic/test_basic.py +++ /dev/null @@ -1,97 +0,0 @@ -import os - -import pytest -from dbt.tests.util import run_dbt - -# CSV content with boolean column type. -seeds_boolean_csv = """ -key,value -abc,true -def,false -hij,true -klm,false -""".lstrip() - -# CSV content with empty fields. -seeds_empty_csv = """ -key,val1,val2,str1 -abc,1,1,some_str -abc,1,0,"another string" -def,1,0, -hij,1,1,Caps -hij,1,,"second string" -klm,1,0,"test" -klm,1,,"test4" -""".lstrip() - -seeds_schema_yml = """ -version: 2 - -seeds: - - name: empty - config: - column_types: - val2: Nullable(UInt32) - str1: Nullable(String) - settings: - allow_nullable_key: 1 -""" - -replicated_seeds_schema_yml = """ -version: 2 - -seeds: - - name: empty - config: - engine: ReplicatedMergeTree('/clickhouse/tables/{uuid}/one_shard', '{server_index}' ) - column_types: - val2: Nullable(UInt32) - str1: Nullable(String) -""" - -base_seeds_schema_yml = """ -version: 2 - -seeds: - - name: base - config: - engine: ReplicatedMergeTree('/clickhouse/tables/{uuid}/one_shard', '{server_index}' ) -""" - - -class TestCSVSeed: - @pytest.fixture(scope="class") - def seeds(self): - return { - "schema.yml": seeds_schema_yml, - "boolean.csv": seeds_boolean_csv, - "empty.csv": seeds_empty_csv, - } - - def test_seed(self, project): - # seed command - results = run_dbt(["seed"]) - assert len(results) == 2 - columns = project.run_sql("DESCRIBE TABLE empty", fetch='all') - assert columns[2][1] == 'Nullable(UInt32)' - assert columns[3][1] == 'Nullable(String)' - - -class TestReplicatedCSVSeed: - @pytest.fixture(scope="class") - def seeds(self): - return { - "schema.yml": replicated_seeds_schema_yml, - "empty.csv": seeds_empty_csv, - } - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_seed(self, project): - # seed command - results = run_dbt(["seed"]) - assert len(results) == 1 - columns = project.run_sql("DESCRIBE TABLE empty", fetch='all') - assert columns[2][1] == 'Nullable(UInt32)' - assert columns[3][1] == 'Nullable(String)' diff --git a/tests/integration/adapter/basic/test_empty.py b/tests/integration/adapter/basic/test_empty.py deleted file mode 100644 index 4ef30c3f..00000000 --- a/tests/integration/adapter/basic/test_empty.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_empty import BaseEmpty - - -class TestEmpty(BaseEmpty): - pass diff --git a/tests/integration/adapter/basic/test_ephemeral.py b/tests/integration/adapter/basic/test_ephemeral.py deleted file mode 100644 index c04caa0b..00000000 --- a/tests/integration/adapter/basic/test_ephemeral.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_ephemeral import BaseEphemeral - - -class TestEphemeral(BaseEphemeral): - pass diff --git a/tests/integration/adapter/basic/test_generic_tests.py b/tests/integration/adapter/basic/test_generic_tests.py deleted file mode 100644 index 63246ea9..00000000 --- a/tests/integration/adapter/basic/test_generic_tests.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests - - -class TestGenericTests(BaseGenericTests): - pass diff --git a/tests/integration/adapter/basic/test_incremental.py b/tests/integration/adapter/basic/test_incremental.py deleted file mode 100644 index c50d477a..00000000 --- a/tests/integration/adapter/basic/test_incremental.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest -from dbt.tests.adapter.basic.test_incremental import BaseIncremental, BaseIncrementalNotSchemaChange - - -class TestIncremental(BaseIncremental): - pass - - -incremental_not_schema_change_sql = """ -{{ config(materialized="incremental", unique_key="user_id_current_time",on_schema_change="append_new_columns") }} -select - toString(1) || '-' || toString(now64()) as user_id_current_time, - {% if is_incremental() %} - 'thisis18characters' as platform - {% else %} - 'okthisis20characters' as platform - {% endif %} -""" - - -class TestIncrementalNotSchemaChange(BaseIncrementalNotSchemaChange): - @pytest.fixture(scope="class") - def models(self): - return {"incremental_not_schema_change.sql": incremental_not_schema_change_sql} diff --git a/tests/integration/adapter/basic/test_singular_tests.py b/tests/integration/adapter/basic/test_singular_tests.py deleted file mode 100644 index 2e5d7917..00000000 --- a/tests/integration/adapter/basic/test_singular_tests.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests - - -class TestSingularTests(BaseSingularTests): - pass diff --git a/tests/integration/adapter/basic/test_snapshot_check_cols.py b/tests/integration/adapter/basic/test_snapshot_check_cols.py deleted file mode 100644 index 3a57d7f4..00000000 --- a/tests/integration/adapter/basic/test_snapshot_check_cols.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols - - -class TestSnapshotCheckCols(BaseSnapshotCheckCols): - pass diff --git a/tests/integration/adapter/basic/test_snapshot_timestamp.py b/tests/integration/adapter/basic/test_snapshot_timestamp.py deleted file mode 100644 index d9ebf373..00000000 --- a/tests/integration/adapter/basic/test_snapshot_timestamp.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp - - -class TestSnapshotTimestamp(BaseSnapshotTimestamp): - pass diff --git a/tests/integration/adapter/basic/test_table_materialization.py b/tests/integration/adapter/basic/test_table_materialization.py deleted file mode 100644 index 4664f189..00000000 --- a/tests/integration/adapter/basic/test_table_materialization.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_table_materialization import BaseTableMaterialization - - -class TestTableMat(BaseTableMaterialization): - pass diff --git a/tests/integration/adapter/basic/test_validate_connection.py b/tests/integration/adapter/basic/test_validate_connection.py deleted file mode 100644 index e1389e65..00000000 --- a/tests/integration/adapter/basic/test_validate_connection.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.basic.test_validate_connection import BaseValidateConnection - - -class TestValidateConnection(BaseValidateConnection): - pass diff --git a/tests/integration/adapter/caching/test_caching.py b/tests/integration/adapter/caching/test_caching.py deleted file mode 100644 index f71dffd0..00000000 --- a/tests/integration/adapter/caching/test_caching.py +++ /dev/null @@ -1,99 +0,0 @@ -import pytest -from dbt.tests.util import run_dbt - -model_sql = """ -{{ - config( - materialized='table' - ) -}} -select 1 as id -""" - -another_schema_model_sql = """ -{{ - config( - materialized='table', - schema='another_schema' - ) -}} -select 1 as id -""" - - -class BaseCachingTest: - @pytest.fixture(scope="class") - def project_config_update(self): - return { - "config-version": 2, - "quoting": { - "identifier": False, - "schema": False, - }, - } - - def run_and_inspect_cache(self, project, run_args=None): - run_dbt(run_args) - - # the cache was empty at the start of the run. - # the model materialization returned a relation and added to the cache. - adapter = project.adapter - assert len(adapter.cache.relations) == 1 - relation = list(adapter.cache.relations).pop() - assert relation.schema == project.test_schema - - # on the second run, dbt will find a relation in the database during cache population. - run_dbt(run_args) - adapter = project.adapter - assert len(adapter.cache.relations) == 1 - second_relation = list(adapter.cache.relations).pop() - - for key in ["schema", "identifier"]: - assert getattr(relation, key) == getattr(second_relation, key) - - def test_cache(self, project): - self.run_and_inspect_cache(project, run_args=["run"]) - - -class TestNoPopulateCache(BaseCachingTest): - @pytest.fixture(scope="class") - def models(self): - return { - "model.sql": model_sql, - } - - def test_cache(self, project): - # --no-populate-cache still allows the cache to populate all relations - # under a schema, so the behavior here remains the same as other tests - run_args = ["--no-populate-cache", "run"] - self.run_and_inspect_cache(project, run_args) - - -class TestCachingLowerCaseModel(BaseCachingTest): - @pytest.fixture(scope="class") - def models(self): - return { - "model.sql": model_sql, - } - - -class TestCachingUppercaseModel(BaseCachingTest): - @pytest.fixture(scope="class") - def models(self): - return { - "MODEL.sql": model_sql, - } - - -class TestCachingSelectedSchemaOnly(BaseCachingTest): - @pytest.fixture(scope="class") - def models(self): - return { - "model.sql": model_sql, - "another_schema_model.sql": another_schema_model_sql, - } - - def test_cache(self, project): - # this should only cache the schema containing the selected model - run_args = ["--cache-selected-only", "run", "--select", "model"] - self.run_and_inspect_cache(project, run_args) diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_errors.py b/tests/integration/adapter/clickhouse/test_clickhouse_errors.py deleted file mode 100644 index cd841e83..00000000 --- a/tests/integration/adapter/clickhouse/test_clickhouse_errors.py +++ /dev/null @@ -1,30 +0,0 @@ -import pytest -from dbt.tests.util import run_dbt - -oom_table_sql = """ -SELECT a FROM system.numbers_mt GROUP BY repeat(toString(number), 100000) as a -""" - -schema_yaml = """ -version: 2 - -models: - - name: oom_table - description: Table that generates OOM - config: - materialized: table - order_by: a -""" - - -class TestOOMError: - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": schema_yaml, - "oom_table.sql": oom_table_sql, - } - - def test_oom(self, project): - res = run_dbt(["run"], expect_pass=False) - assert 'exceeded' in res.results[0].message diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_sql_header.py b/tests/integration/adapter/clickhouse/test_clickhouse_sql_header.py deleted file mode 100644 index 0b135a96..00000000 --- a/tests/integration/adapter/clickhouse/test_clickhouse_sql_header.py +++ /dev/null @@ -1,28 +0,0 @@ -import pytest -from dbt.tests.util import run_dbt_and_capture - -my_model_sql_header_sql = """ -{{ - config( - materialized = "table", - ) -}} - -{% call set_sql_header(config) %} -set log_comment = 'TEST_LOG_COMMENT'; -{%- endcall %} -select getSettings('log_comment') as column_name -""" - - -class TestSQLHeader: - @pytest.fixture(scope="class") - def models(self): - return { - "my_model_sql_header.sql": my_model_sql_header_sql, - } - - def test__sql_header(self, project): - _, log_output = run_dbt_and_capture(["run", "-s", "my_model_sql_header"], expect_pass=False) - - assert 'Multi-statements' in log_output diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_table_materializations.py b/tests/integration/adapter/clickhouse/test_clickhouse_table_materializations.py deleted file mode 100644 index ff6e2efb..00000000 --- a/tests/integration/adapter/clickhouse/test_clickhouse_table_materializations.py +++ /dev/null @@ -1,251 +0,0 @@ -import os - -import pytest -from dbt.tests.adapter.basic.files import model_base, schema_base_yml, seeds_base_csv -from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations -from dbt.tests.util import ( - check_relation_types, - check_relations_equal, - check_result_nodes_by_name, - relation_from_name, - run_dbt, -) - -from tests.integration.adapter.basic.test_basic import base_seeds_schema_yml - - -class TestMergeTreeTableMaterialization(BaseSimpleMaterializations): - @pytest.fixture(scope="class") - def models(self): - config_materialized_table = """ - {{ config( - order_by='(some_date, id, name)', - engine='MergeTree()', - materialized='table', - settings={'allow_nullable_key': 1}, - query_settings={'allow_nondeterministic_mutations': 1}) - }} - """ - base_table_sql = config_materialized_table + model_base - return { - "table_model.sql": base_table_sql, - "schema.yml": schema_base_yml, - } - - def test_base(self, project): - # seed command - results = run_dbt(["seed"]) - # seed result length - assert len(results) == 1 - - # run command - results = run_dbt() - # run result length - assert len(results) == 1 - - check_relation_types(project.adapter, {"table_model": "table"}) - - # base table rowcount - relation = relation_from_name(project.adapter, "table_model") - result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") - assert result[0] == 10 - - -class TestDistributedMaterializations(BaseSimpleMaterializations): - '''Test distributed materializations and check if data is properly distributed/replicated''' - - @pytest.fixture(scope="class") - def models(self): - config_distributed_table = """ - {{ config( - order_by='(some_date, id, name)', - engine='MergeTree()', - materialized='distributed_table', - settings={'allow_nullable_key': 1}) - }} - """ - return { - "distributed.sql": config_distributed_table + model_base, - "schema.yml": schema_base_yml, - } - - @pytest.fixture(scope="class") - def seeds(self): - return { - "schema.yml": base_seeds_schema_yml, - "base.csv": seeds_base_csv, - } - - def assert_total_count_correct(self, project): - # Check if data is properly distributed - cluster = project.test_config['cluster'] - table_relation = relation_from_name(project.adapter, "distributed_local") - cluster_info = project.run_sql( - f"select shard_num,max(host_name) as host_name, count(distinct replica_num) as replica_counts " - f"from system.clusters where cluster='{cluster}' group by shard_num", - fetch="all", - ) - sum_count = project.run_sql( - f"select count() From clusterAllReplicas('{cluster}',{table_relation})", - fetch="one", - ) - total_count = 0 - # total count should be equal to sum(count of each shard * replica_counts) - for shard_num, host_name, replica_counts in cluster_info: - count = project.run_sql( - f"select count() From remote('{host_name}',{table_relation})", - fetch="one", - ) - total_count += count[0] * replica_counts - assert total_count == sum_count[0] - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_base(self, project): - # cluster setting must exist - cluster = project.test_config['cluster'] - assert cluster - - # seed command - results = run_dbt(["seed"]) - # seed result length - assert len(results) == 1 - - # run command - results = run_dbt() - # run result length - assert len(results) == 1 - - # names exist in result nodes - check_result_nodes_by_name(results, ["distributed"]) - - # check relation types - expected = { - "base": "table", - "distributed": "table", - } - check_relation_types(project.adapter, expected) - - relation = relation_from_name(project.adapter, "base") - # table rowcount - result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") - assert result[0] == 10 - - # relations_equal - check_relations_equal(project.adapter, ["base", "distributed"]) - - # check result - self.assert_total_count_correct(project) - - # run full-refresh - results = run_dbt(['run', '--full-refresh']) - # run result length - assert len(results) == 1 - # check result - self.assert_total_count_correct(project) - - # check relations in catalog - catalog = run_dbt(["docs", "generate"]) - assert len(catalog.nodes) == 2 - assert len(catalog.sources) == 1 - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() != '', reason='Not on a cluster' - ) - def test_no_cluster_setting(self, project): - result = run_dbt(['run', '--select', 'distributed'], False) - assert result[0].status == 'error' - assert 'Compilation Error' in result[0].message - - -class TestReplicatedTableMaterialization(BaseSimpleMaterializations): - '''Test ReplicatedMergeTree table with table materialization''' - - @pytest.fixture(scope="class") - def models(self): - config_replicated_table = """ - {{ config( - order_by='(some_date, id, name)', - engine="ReplicatedMergeTree('/clickhouse/tables/{uuid}/one_shard', '{server_index}' )", - materialized='table', - settings={'allow_nullable_key': 1}) - }} - """ - - return { - "replicated.sql": config_replicated_table + model_base, - "schema.yml": schema_base_yml, - } - - def assert_total_count_correct(self, project): - '''Check if table is created on cluster and data is properly replicated''' - cluster = project.test_config['cluster'] - # check if data is properly distributed/replicated - table_relation = relation_from_name(project.adapter, "replicated") - # ClickHouse cluster in the docker-compose file - # under tests/integration is configured with 3 nodes - host_count = project.run_sql( - f"select count(host_name) as host_count from system.clusters where cluster='{cluster}'", - fetch="one", - ) - assert host_count[0] > 1 - - table_count = project.run_sql( - f"select count() From clusterAllReplicas('{cluster}', system.tables) " - f"where database='{table_relation.schema}' and name='{table_relation.identifier}'", - fetch="one", - ) - assert table_count[0] == host_count[0] - - sum_count = project.run_sql( - f"select count() From clusterAllReplicas('{cluster}',{table_relation})", - fetch="one", - ) - - assert sum_count[0] >= 20 - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_base(self, project): - # cluster setting must exist - cluster = project.test_config['cluster'] - assert cluster - - # seed command - results = run_dbt(["seed"]) - # seed result length - assert len(results) == 1 - - # run command - results = run_dbt() - # run result length - assert len(results) == 1 - - # names exist in result nodes - check_result_nodes_by_name(results, ["replicated"]) - - # check relation types - expected = { - "base": "table", - "replicated": "table", - } - check_relation_types(project.adapter, expected) - - relation = relation_from_name(project.adapter, "base") - # table rowcount - result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") - assert result[0] == 10 - - # relations_equal - check_relations_equal(project.adapter, ["base", "replicated"]) - - self.assert_total_count_correct(project) - - # run full refresh - results = run_dbt(['--debug', 'run', '--full-refresh']) - # run result length - assert len(results) == 1 - - self.assert_total_count_correct(project) diff --git a/tests/integration/adapter/constraints/fixtures_contraints.py b/tests/integration/adapter/constraints/fixtures_contraints.py deleted file mode 100644 index 508b25b1..00000000 --- a/tests/integration/adapter/constraints/fixtures_contraints.py +++ /dev/null @@ -1,258 +0,0 @@ -contract_model_schema_yml = """ -version: 2 -models: - - name: my_model - config: - contract: - enforced: true - columns: - - name: id - data_type: Int32 - description: hello - - name: color - data_type: String - - name: date_day - data_type: Date - - name: my_model_error - config: - contract: - enforced: true - columns: - - name: id - data_type: Int32 - description: hello - tests: - - unique - - name: color - data_type: String - - name: date_day - data_type: Date - - name: my_model_wrong_order - config: - contract: - enforced: true - columns: - - name: id - data_type: UInt32 - description: hello - tests: - - unique - - name: color - data_type: String - - name: date_day - data_type: Date - - name: my_model_wrong_name - config: - contract: - enforced: true - columns: - - name: id - data_type: Int32 - description: hello - - name: color - data_type: String - - name: date_day - data_type: Date -""" - - -# model columns in a different order to schema definitions -my_model_wrong_order_sql = """ -{{ - config( - materialized = "table" - ) -}} - -select - 'blue' as color, - 1::UInt32 as id, - toDate('2019-01-01') as date_day -""" - - -# model columns name different to schema definitions -my_model_wrong_name_sql = """ -{{ - config( - materialized = "table" - ) -}} - -select - 'blue' as color, - 1 as error, - '2019-01-01' as date_day -""" - - -my_model_data_type_sql = """ -{{{{ - config( - materialized = "table" - ) -}}}} - -select - {sql_value} as wrong_data_type_column_name -""" - - -model_data_type_schema_yml = """ -version: 2 -models: - - name: my_model_data_type - config: - contract: - enforced: true - columns: - - name: wrong_data_type_column_name - data_type: {data_type} -""" - -my_model_view_wrong_name_sql = """ -{{ - config( - materialized = "view" - ) -}} - -select - 'blue' as color, - 1 as error, - toDate('2019-01-01') as date_day -""" - -my_model_view_wrong_order_sql = """ -{{ - config( - materialized = "view" - ) -}} - -select - 'blue' as color, - 1::UInt32 as id, - toDate('2019-01-01') as date_day -""" - - -my_model_incremental_wrong_order_sql = """ -{{ - config( - materialized = "incremental", - on_schema_change='append_new_columns' - ) -}} - -select - 'blue' as color, - 1::UInt32 as id, - toDate('2019-01-01') as date_day -""" - -my_model_incremental_wrong_name_sql = """ -{{ - config( - materialized = "incremental", - on_schema_change='append_new_columns' - ) -}} - -select - 'blue' as color, - 1 as error, - '2019-01-01' as date_day -""" - -constraint_model_schema_yml = """ -version: 2 -models: - - name: bad_column_constraint_model - materialized: table - config: - contract: - enforced: true - columns: - - name: id - data_type: Int32 - constraints: - - type: check - expression: '> 0' - - name: color - data_type: String - - name: date_day - data_type: Date - - name: bad_foreign_key_model - config: - contract: - enforced: true - constraints: - - type: foreign_key - columns: [ id ] - expression: 'foreign_key_model (id)' - columns: - - name: id - data_type: Int32 - - name: check_constraints_model - config: - contract: - enforced: true - constraints: - - type: check - name: valid_id - expression: 'id > 100 and id < 200' - columns: - - name: id - data_type: Int32 - - name: color - data_type: String - - name: date_day - data_type: Date -""" - -bad_column_constraint_model_sql = """ -{{ - config( - materialized = "table" - ) -}} - -SELECT 5::Int32 as id, 'black' as color, toDate('2023-01-01') as date_day -""" - -bad_foreign_key_model_sql = """ -{{ - config( - materialized = "table" - ) -}} - -SELECT 1::Int32 as id -""" - -check_constraints_model_sql = """ -{{ - config( - materialized = "table", - ) -}} - -select - 'blue' as color, - 101::Int32 as id, - toDate('2019-01-01') as date_day -""" - -check_constraints_model_fail_sql = """ -{{ - config( - materialized = "table", - ) -}} - -select - 'blue' as color, - 1::Int32 as id, - toDate('2019-01-01') as date_day -""" diff --git a/tests/integration/adapter/constraints/test_constraints.py b/tests/integration/adapter/constraints/test_constraints.py deleted file mode 100644 index f18a7ca9..00000000 --- a/tests/integration/adapter/constraints/test_constraints.py +++ /dev/null @@ -1,190 +0,0 @@ -import pytest -from dbt.tests.util import get_manifest, run_dbt, run_dbt_and_capture, write_file -from fixtures_contraints import ( - bad_column_constraint_model_sql, - bad_foreign_key_model_sql, - check_constraints_model_fail_sql, - check_constraints_model_sql, - constraint_model_schema_yml, - contract_model_schema_yml, - model_data_type_schema_yml, - my_model_data_type_sql, - my_model_incremental_wrong_name_sql, - my_model_incremental_wrong_order_sql, - my_model_view_wrong_name_sql, - my_model_view_wrong_order_sql, - my_model_wrong_name_sql, - my_model_wrong_order_sql, -) - - -class ClickHouseContractColumnsEqual: - """ - dbt should catch these mismatches during its "preflight" checks. - """ - - @pytest.fixture - def data_types(self): - # sql_column_value, schema_data_type, error_data_type - return [ - ["1::Int32", "Int32", "Int32"], - ["'1'", "String", "String"], - ["true", "Bool", "Bool"], - ["'2013-11-03'::DateTime", "DateTime", "DateTime"], - ["['a','b','c']", "Array(String)", "Array(String)"], - ["[1::Int32,2::Int32,3::Int32]", "Array(Int32)", "Array(Int32)"], - ["'1'::Float64", "Float64", "Float64"], - ] - - def test__contract_wrong_column_order(self, project): - # This no longer causes an error, since we enforce yaml column order - run_dbt(["run", "-s", "my_model_wrong_order"], expect_pass=True) - manifest = get_manifest(project.project_root) - model_id = "model.test.my_model_wrong_order" - my_model_config = manifest.nodes[model_id].config - contract_actual_config = my_model_config.contract - - assert contract_actual_config.enforced is True - - def test__contract_wrong_column_names(self, project): - _, log_output = run_dbt_and_capture(["run", "-s", "my_model_wrong_name"], expect_pass=False) - run_dbt(["run", "-s", "my_model_wrong_name"], expect_pass=False) - manifest = get_manifest(project.project_root) - model_id = "model.test.my_model_wrong_name" - my_model_config = manifest.nodes[model_id].config - contract_actual_config = my_model_config.contract - - assert contract_actual_config.enforced is True - - expected = ["id", "error", "missing in definition", "missing in contract"] - assert all([(exp in log_output or exp.upper() in log_output) for exp in expected]) - - def test__contract_wrong_column_data_types(self, project, data_types): - for sql_column_value, schema_data_type, error_data_type in data_types: - # Write parametrized data_type to sql file - write_file( - my_model_data_type_sql.format(sql_value=sql_column_value), - "models", - "my_model_data_type.sql", - ) - write_file( - model_data_type_schema_yml.format(data_type='Int128'), - "models", - "contract_schema.yml", - ) - - results, log_output = run_dbt_and_capture( - ["run", "-s", "my_model_data_type"], expect_pass=False - ) - manifest = get_manifest(project.project_root) - model_id = "model.test.my_model_data_type" - my_model_config = manifest.nodes[model_id].config - contract_actual_config = my_model_config.contract - - assert contract_actual_config.enforced is True - expected = [ - "wrong_data_type_column_name", - error_data_type, - "Int128", - "data type mismatch", - ] - assert all([(exp in log_output or exp.upper() in log_output) for exp in expected]) - - def test__contract_correct_column_data_types(self, project, data_types): - for sql_column_value, schema_data_type, _ in data_types: - # Write parametrized data_type to sql file - write_file( - my_model_data_type_sql.format(sql_value=sql_column_value), - "models", - "my_model_data_type.sql", - ) - # Write correct data_type to corresponding schema file - write_file( - model_data_type_schema_yml.format(data_type=schema_data_type), - "models", - "contract_schema.yml", - ) - - run_dbt(["run", "-s", "my_model_data_type"]) - - manifest = get_manifest(project.project_root) - model_id = "model.test.my_model_data_type" - my_model_config = manifest.nodes[model_id].config - contract_actual_config = my_model_config.contract - - assert contract_actual_config.enforced is True - - -class TestTableContractColumnsEqual(ClickHouseContractColumnsEqual): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model_wrong_order.sql": my_model_wrong_order_sql, - "my_model_wrong_name.sql": my_model_wrong_name_sql, - "contract_schema.yml": contract_model_schema_yml, - } - - -class TestViewContractColumnsEqual(ClickHouseContractColumnsEqual): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model_wrong_order.sql": my_model_view_wrong_order_sql, - "my_model_wrong_name.sql": my_model_view_wrong_name_sql, - "contract_schema.yml": contract_model_schema_yml, - } - - -class TestIncrementalContractColumnsEqual(ClickHouseContractColumnsEqual): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model_wrong_order.sql": my_model_incremental_wrong_order_sql, - "my_model_wrong_name.sql": my_model_incremental_wrong_name_sql, - "contract_schema.yml": contract_model_schema_yml, - } - - -class TestBadConstraints: - @pytest.fixture(scope="class") - def models(self): - return { - "bad_column_constraint_model.sql": bad_column_constraint_model_sql, - "bad_foreign_key_model.sql": bad_foreign_key_model_sql, - "constraints_schema.yml": constraint_model_schema_yml, - } - - def test_invalid_column_constraint(self, project): - _, log_output = run_dbt_and_capture(["run", "-s", "bad_column_constraint_model"]) - assert "not supported" in log_output - - def test_invalid_fk_constraint(self, project): - _, log_output = run_dbt_and_capture(["run", "-s", "bad_foreign_key_model"]) - assert "not supported" in log_output - - -class TestModelConstraints: - @pytest.fixture(scope="class") - def models(self): - return { - "check_constraints_model.sql": check_constraints_model_sql, - "constraints_schema.yml": constraint_model_schema_yml, - } - - def test_model_constraints_ddl(self, project): - run_dbt(["run", "-s", "check_constraints_model"]) - - -class TestModelConstraintApplied: - @pytest.fixture(scope="class") - def models(self): - return { - "check_constraints_model.sql": check_constraints_model_fail_sql, - "constraints_schema.yml": constraint_model_schema_yml, - } - - def test_model_constraints_fail_ddl(self, project): - _, log_output = run_dbt_and_capture( - ["run", "-s", "check_constraints_model"], expect_pass=False - ) - assert 'violated' in log_output.lower() diff --git a/tests/integration/adapter/dbt_clone/test_dbt_clone.py b/tests/integration/adapter/dbt_clone/test_dbt_clone.py deleted file mode 100644 index 0252a2f7..00000000 --- a/tests/integration/adapter/dbt_clone/test_dbt_clone.py +++ /dev/null @@ -1,7 +0,0 @@ -import pytest -from dbt.tests.adapter.dbt_clone.test_dbt_clone import BaseClonePossible - - -@pytest.mark.skip("clone not supported") -class TestBaseClonePossible(BaseClonePossible): - pass diff --git a/tests/integration/adapter/dbt_debug/test_dbt_debug.py b/tests/integration/adapter/dbt_debug/test_dbt_debug.py deleted file mode 100644 index 044d0634..00000000 --- a/tests/integration/adapter/dbt_debug/test_dbt_debug.py +++ /dev/null @@ -1,22 +0,0 @@ -import re - -from dbt.tests.adapter.dbt_debug.test_dbt_debug import BaseDebug -from dbt.tests.util import run_dbt - - -class TestDebugClickHouse(BaseDebug): - def test_ok(self, project): - run_dbt(["debug"]) - assert "ERROR" not in self.capsys.readouterr().out - - def test_nopass(self, project): - run_dbt(["debug", "--target", "nopass"], expect_pass=False) - self.assertGotValue(re.compile(r"\s+profiles\.yml file"), "ERROR invalid") - - def test_wronguser(self, project): - run_dbt(["debug", "--target", "wronguser"], expect_pass=False) - self.assertGotValue(re.compile(r"\s+Connection test"), "ERROR") - - def test_empty_target(self, project): - run_dbt(["debug", "--target", "none_target"], expect_pass=False) - self.assertGotValue(re.compile(r"\s+output 'none_target'"), "misconfigured") diff --git a/tests/integration/adapter/dbt_show/test_dbt_show.py b/tests/integration/adapter/dbt_show/test_dbt_show.py deleted file mode 100644 index 98d60315..00000000 --- a/tests/integration/adapter/dbt_show/test_dbt_show.py +++ /dev/null @@ -1,9 +0,0 @@ -from dbt.tests.adapter.dbt_show.test_dbt_show import BaseShowLimit, BaseShowSqlHeader - - -class TestShowLimit(BaseShowLimit): - pass - - -class TestShowSqlHeader(BaseShowSqlHeader): - pass diff --git a/tests/integration/adapter/grants/test_distributed_grants.py b/tests/integration/adapter/grants/test_distributed_grants.py deleted file mode 100644 index 4f2aca32..00000000 --- a/tests/integration/adapter/grants/test_distributed_grants.py +++ /dev/null @@ -1,50 +0,0 @@ -import os - -import pytest -from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants -from dbt.tests.util import get_manifest, run_dbt_and_capture, write_file - -distributed_table_model_schema_yml = """ -version: 2 -models: - - name: my_model - config: - materialized: distributed_table - grants: - select: ["{{ env_var('DBT_TEST_USER_1') }}"] - insert: ["{{ env_var('DBT_TEST_USER_2') }}"] -""" - - -class TestDistributedTableModelGrants(BaseModelGrants): - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_view_table_grants(self, project, get_test_users): - # we want the test to fail, not silently skip - test_users = get_test_users - select_privilege_name = self.privilege_grantee_name_overrides()["select"] - insert_privilege_name = self.privilege_grantee_name_overrides()["insert"] - assert len(test_users) == 3 - # Distributed Table materialization, single select grant - updated_yaml = self.interpolate_name_overrides(distributed_table_model_schema_yml) - write_file(updated_yaml, project.project_root, "models", "schema.yml") - (results, log_output) = run_dbt_and_capture(["--debug", "run"]) - assert len(results) == 1 - manifest = get_manifest(project.project_root) - model_id = "model.test.my_model" - model = manifest.nodes[model_id] - assert model.config.materialized == "distributed_table" - expected = {select_privilege_name: [test_users[0]], insert_privilege_name: [test_users[1]]} - self.assert_expected_grants_match_actual(project, "my_model", expected) - - def assert_expected_grants_match_actual(self, project, relation_name, expected_grants): - super().assert_expected_grants_match_actual(project, relation_name, expected_grants) - - # also needs grants for local table - actual_local_grants = self.get_grants_on_relation(project, relation_name + "_local") - from dbt.context.base import BaseContext - - diff_a_local = BaseContext.diff_of_two_dicts(actual_local_grants, expected_grants) - diff_b_local = BaseContext.diff_of_two_dicts(expected_grants, actual_local_grants) - assert diff_a_local == diff_b_local == {} diff --git a/tests/integration/adapter/grants/test_incremental_grants.py b/tests/integration/adapter/grants/test_incremental_grants.py deleted file mode 100644 index 06c1aad8..00000000 --- a/tests/integration/adapter/grants/test_incremental_grants.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.grants.test_incremental_grants import BaseIncrementalGrants - - -class TestIncrementalGrants(BaseIncrementalGrants): - pass diff --git a/tests/integration/adapter/grants/test_invalid_grants.py b/tests/integration/adapter/grants/test_invalid_grants.py deleted file mode 100644 index 2f54e290..00000000 --- a/tests/integration/adapter/grants/test_invalid_grants.py +++ /dev/null @@ -1,10 +0,0 @@ -from dbt.tests.adapter.grants.test_invalid_grants import BaseInvalidGrants - - -class TestInvalidGrants(BaseInvalidGrants): - def grantee_does_not_exist_error(self): - return "511" - - # ClickHouse doesn't give a very specific error for an invalid privilege - def privilege_does_not_exist_error(self): - return "Syntax error" diff --git a/tests/integration/adapter/grants/test_model_grants.py b/tests/integration/adapter/grants/test_model_grants.py deleted file mode 100644 index a6db5924..00000000 --- a/tests/integration/adapter/grants/test_model_grants.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants - - -class TestModelGrants(BaseModelGrants): - pass diff --git a/tests/integration/adapter/grants/test_seed_grants.py b/tests/integration/adapter/grants/test_seed_grants.py deleted file mode 100644 index e08361b0..00000000 --- a/tests/integration/adapter/grants/test_seed_grants.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.grants.test_seed_grants import BaseSeedGrants - - -class TestSeedGrants(BaseSeedGrants): - pass diff --git a/tests/integration/adapter/grants/test_snapshot_grants.py b/tests/integration/adapter/grants/test_snapshot_grants.py deleted file mode 100644 index 098a996b..00000000 --- a/tests/integration/adapter/grants/test_snapshot_grants.py +++ /dev/null @@ -1,5 +0,0 @@ -from dbt.tests.adapter.grants.test_snapshot_grants import BaseSnapshotGrants - - -class TestSnapshotGrants(BaseSnapshotGrants): - pass diff --git a/tests/integration/adapter/hooks/test_model_hooks.py b/tests/integration/adapter/hooks/test_model_hooks.py deleted file mode 100644 index 3df77579..00000000 --- a/tests/integration/adapter/hooks/test_model_hooks.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest -from dbt.exceptions import CompilationError -from dbt.tests.adapter.hooks.fixtures import models__hooks_error -from dbt.tests.util import run_dbt - - -class TestDuplicateHooksInConfigs: - @pytest.fixture(scope="class") - def models(self): - return {"hooks.sql": models__hooks_error} - - def test_run_duplicate_hook_defs(self, project): - with pytest.raises(CompilationError) as exc: - run_dbt() - assert "pre_hook" in str(exc.value) - assert "pre-hook" in str(exc.value) diff --git a/tests/integration/adapter/incremental/test_distributed_incremental.py b/tests/integration/adapter/incremental/test_distributed_incremental.py deleted file mode 100644 index f132933d..00000000 --- a/tests/integration/adapter/incremental/test_distributed_incremental.py +++ /dev/null @@ -1,205 +0,0 @@ -import os - -import pytest -from dbt.tests.adapter.basic.files import ( - model_incremental, - schema_base_yml, - seeds_added_csv, - seeds_base_csv, -) -from dbt.tests.adapter.basic.test_incremental import BaseIncremental, BaseIncrementalNotSchemaChange -from dbt.tests.util import run_dbt - -from tests.integration.adapter.incremental.test_base_incremental import uniq_schema - -uniq_source_model = """ -{{config( - materialized='distributed_table', - engine='MergeTree()', - order_by=['ts'], - unique_key=['impid'] - ) -}} -SELECT now() - toIntervalHour(number) as ts, toInt32(number) as impid, concat('value', toString(number)) as value1 - FROM numbers(100) -""" - -uniq_incremental_model = """ -{{ - config( - materialized='distributed_incremental', - engine='MergeTree()', - order_by=['ts'], - unique_key=['impid'] - ) -}} -select ts, impid from unique_source_one -{% if is_incremental() %} -where ts >= now() - toIntervalHour(1) -{% endif %} -""" - - -class TestSimpleDistributedIncremental: - @pytest.fixture(scope="class") - def models(self): - return { - "unique_source_one.sql": uniq_source_model, - "unique_incremental_one.sql": uniq_incremental_model, - "schema.yml": uniq_schema, - } - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_simple_incremental(self, project): - run_dbt(["run", "--select", "unique_source_one"]) - run_dbt(["run", "--select", "unique_incremental_one"]) - - -lw_delete_schema = """ -version: 2 - -models: - - name: "lw_delete_inc" - description: "Incremental table" -""" - -lw_delete_inc = """ -{{ config( - materialized='distributed_incremental', - order_by=['key1'], - unique_key='key1', - incremental_strategy='delete+insert' - ) -}} -{% if is_incremental() %} - WITH (SELECT max(key1) - 20 FROM lw_delete_inc) as old_max - SELECT assumeNotNull(toUInt64(number + old_max + 1)) as key1, toInt64(-(number + old_max)) as key2, toString(number + 30) as value FROM numbers(100) -{% else %} - SELECT toUInt64(number) as key1, toInt64(-number) as key2, toString(number) as value FROM numbers(100) -{% endif %} -""" - - -class TestLWDeleteDistributedIncremental: - @pytest.fixture(scope="class") - def models(self): - return {"lw_delete_inc.sql": lw_delete_inc} - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_lw_delete(self, project): - run_dbt() - result = project.run_sql("select count(*) as num_rows from lw_delete_inc", fetch="one") - assert result[0] == 100 - run_dbt() - result = project.run_sql("select count(*) as num_rows from lw_delete_inc", fetch="one") - assert result[0] == 180 - - -compound_key_schema = """ -version: 2 - -models: - - name: "compound_key_inc" - description: "Incremental table" -""" - -compound_key_inc = """ -{{ config( - materialized='distributed_incremental', - order_by=['key1', 'key2'], - unique_key='key1, key2', - incremental_strategy='delete+insert' - ) -}} -{% if is_incremental() %} - WITH (SELECT max(key1) - 20 FROM compound_key_inc) as old_max - SELECT assumeNotNull(toUInt64(number + old_max + 1)) as key1, toInt64(-key1) as key2, toString(number + 30) as value FROM numbers(100) -{% else %} - SELECT toUInt64(number) as key1, toInt64(-number) as key2, toString(number) as value FROM numbers(100) -{% endif %} -""" - - -class TestDistributedIncrementalCompoundKey: - @pytest.fixture(scope="class") - def models(self): - return {"compound_key_inc.sql": compound_key_inc} - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_compound_key(self, project): - run_dbt() - result = project.run_sql("select count(*) as num_rows from compound_key_inc", fetch="one") - assert result[0] == 100 - run_dbt() - result = project.run_sql("select count(*) as num_rows from compound_key_inc", fetch="one") - assert result[0] == 180 - - -replicated_seed_schema_yml = """ -version: 2 - -seeds: - - name: base - config: - engine: ReplicatedMergeTree('/clickhouse/tables/{uuid}/one_shard', '{server_index}' ) - - name: added - config: - engine: ReplicatedMergeTree('/clickhouse/tables/{uuid}/one_shard', '{server_index}' ) -""" - - -class TestInsertsOnlyDistributedIncrementalMaterialization(BaseIncremental): - @pytest.fixture(scope="class") - def models(self): - config_materialized_incremental = """ - {{ config(order_by='(some_date, id, name)', inserts_only=True, materialized='distributed_incremental', unique_key='id') }} - """ - incremental_sql = config_materialized_incremental + model_incremental - return { - "incremental.sql": incremental_sql, - "schema.yml": schema_base_yml, - } - - @pytest.fixture(scope="class") - def seeds(self): - return { - "base.csv": seeds_base_csv, - "added.csv": seeds_added_csv, - "schema.yml": replicated_seed_schema_yml, - } - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_incremental(self, project): - super().test_incremental(project) - - -incremental_not_schema_change_sql = """ -{{ config(materialized="distributed_incremental", unique_key="user_id_current_time",on_schema_change="sync_all_columns") }} -select - toString(1) || '-' || toString(now64()) as user_id_current_time, - {% if is_incremental() %} - 'thisis18characters' as platform - {% else %} - 'okthisis20characters' as platform - {% endif %} -""" - - -class TestDistributedIncrementalNotSchemaChange(BaseIncrementalNotSchemaChange): - @pytest.fixture(scope="class") - def models(self): - return {"incremental_not_schema_change.sql": incremental_not_schema_change_sql} - - @pytest.mark.skipif( - os.environ.get('DBT_CH_TEST_CLUSTER', '').strip() == '', reason='Not on a cluster' - ) - def test_incremental_not_schema_change(self, project): - super().test_incremental_not_schema_change(project) diff --git a/tests/integration/adapter/incremental/test_base_incremental.py b/tests/integration/adapter/incremental/test_incremental.py similarity index 70% rename from tests/integration/adapter/incremental/test_base_incremental.py rename to tests/integration/adapter/incremental/test_incremental.py index 24635db5..bfa97fab 100644 --- a/tests/integration/adapter/incremental/test_base_incremental.py +++ b/tests/integration/adapter/incremental/test_incremental.py @@ -1,6 +1,6 @@ import pytest from dbt.tests.adapter.basic.files import model_incremental, schema_base_yml -from dbt.tests.adapter.basic.test_incremental import BaseIncremental +from dbt.tests.adapter.basic.test_incremental import BaseIncremental, BaseIncrementalNotSchemaChange from dbt.tests.util import run_dbt uniq_schema = """ @@ -33,8 +33,7 @@ materialized='incremental', engine='MergeTree()', order_by=['ts'], - unique_key=['impid'], - settings={'allow_nullable_key':'1'} + unique_key=['impid'] ) }} select ts, impid from unique_source_one @@ -58,18 +57,25 @@ def test_simple_incremental(self, project): run_dbt(["run", "--select", "unique_incremental_one"]) +lw_delete_schema = """ +version: 2 + +models: + - name: "lw_delete_inc" + description: "Incremental table" +""" + lw_delete_inc = """ {{ config( materialized='incremental', order_by=['key1'], unique_key='key1', - incremental_strategy='delete+insert', - settings={'allow_nullable_key':1} + incremental_strategy='delete+insert' ) }} {% if is_incremental() %} - select 2 as key1, 500 as key2, 'test' as value UNION ALL - select 102 as key1, 400 as key2, 'test2' as value + WITH (SELECT max(key1) - 20 FROM lw_delete_inc) as old_max + SELECT assumeNotNull(toUInt64(number + old_max + 1)) as key1, toInt64(-(number + old_max)) as key2, toString(number + 30) as value FROM numbers(100) {% else %} SELECT toUInt64(number) as key1, toInt64(-number) as key2, toString(number) as value FROM numbers(100) {% endif %} @@ -87,45 +93,7 @@ def test_lw_delete(self, project): assert result[0] == 100 run_dbt() result = project.run_sql("select count(*) as num_rows from lw_delete_inc", fetch="one") - assert result[0] == 101 - run_dbt() - result = project.run_sql("select count(*) as num_rows from lw_delete_inc", fetch="one") - assert result[0] == 101 - - -legacy_inc = """ -{{ config( - materialized='incremental', - order_by=['key1'], - unique_key='key1', - incremental_strategy='legacy', - settings={'allow_nullable_key':1} - ) -}} -{% if is_incremental() %} - select 2 as key1, 500 as key2, 'test' as value UNION ALL - select 102 as key1, 400 as key2, 'test2' as value -{% else %} - SELECT toUInt64(number) as key1, toInt64(-number) as key2, toString(number) as value FROM numbers(100) -{% endif %} -""" - - -class TestLegacyIncremental: - @pytest.fixture(scope="class") - def models(self): - return {"legacy_inc.sql": legacy_inc} - - def test_legacy(self, project): - run_dbt() - result = project.run_sql("select count(*) as num_rows from legacy_inc", fetch="one") - assert result[0] == 100 - run_dbt() - result = project.run_sql("select count(*) as num_rows from legacy_inc", fetch="one") - assert result[0] == 101 - run_dbt() - result = project.run_sql("select count(*) as num_rows from legacy_inc", fetch="one") - assert result[0] == 101 + assert result[0] == 180 compound_key_schema = """ @@ -165,9 +133,6 @@ def test_compound_key(self, project): run_dbt() result = project.run_sql("select count(*) as num_rows from compound_key_inc", fetch="one") assert result[0] == 180 - run_dbt() - result = project.run_sql("select count(*) as num_rows from compound_key_inc", fetch="one") - assert result[0] == 260 class TestInsertsOnlyIncrementalMaterialization(BaseIncremental): @@ -181,3 +146,21 @@ def models(self): "incremental.sql": incremental_sql, "schema.yml": schema_base_yml, } + + +incremental_not_schema_change_sql = """ +{{ config(materialized="incremental", unique_key="user_id_current_time",on_schema_change="sync_all_columns") }} +select + toString(1) || '-' || toString(now64()) as user_id_current_time, + {% if is_incremental() %} + 'thisis18characters' as platform + {% else %} + 'okthisis20characters' as platform + {% endif %} +""" + + +class TestIncrementalNotSchemaChange(BaseIncrementalNotSchemaChange): + @pytest.fixture(scope="class") + def models(self): + return {"incremental_not_schema_change.sql": incremental_not_schema_change_sql} diff --git a/tests/integration/adapter/incremental/test_schema_change.py b/tests/integration/adapter/incremental/test_schema_change.py deleted file mode 100644 index 9bccaf4e..00000000 --- a/tests/integration/adapter/incremental/test_schema_change.py +++ /dev/null @@ -1,71 +0,0 @@ -import pytest -from dbt.tests.util import run_dbt, run_dbt_and_capture - -schema_change_sql = """ -{{ - config( - materialized='incremental', - unique_key='col_1', - on_schema_change='%schema_change%' - ) -}} - -{% if not is_incremental() %} -select - number as col_1, - number + 1 as col_2 -from numbers(3) -{% else %} -select - number as col_1, - number + 1 as col_2, - number + 2 as col_3 -from numbers(2, 3) -{% endif %} -""" - - -class TestOnSchemaChange: - @pytest.fixture(scope="class") - def models(self): - return { - "schema_change_ignore.sql": schema_change_sql.replace("%schema_change%", "ignore"), - "schema_change_fail.sql": schema_change_sql.replace("%schema_change%", "fail"), - "schema_change_append.sql": schema_change_sql.replace( - "%schema_change%", "append_new_columns" - ), - } - - def test_ignore(self, project): - run_dbt(["run", "--select", "schema_change_ignore"]) - result = project.run_sql("select * from schema_change_ignore order by col_1", fetch="all") - assert len(result) == 3 - assert result[0][1] == 1 - run_dbt(["run", "--select", "schema_change_ignore"]) - result = project.run_sql("select * from schema_change_ignore", fetch="all") - assert len(result) == 5 - - def test_fail(self, project): - run_dbt(["run", "--select", "schema_change_fail"]) - result = project.run_sql("select * from schema_change_fail order by col_1", fetch="all") - assert len(result) == 3 - assert result[0][1] == 1 - _, log_output = run_dbt_and_capture( - [ - "run", - "--select", - "schema_change_fail", - ], - expect_pass=False, - ) - assert 'out of sync' in log_output.lower() - - def test_append(self, project): - run_dbt(["run", "--select", "schema_change_append"]) - result = project.run_sql("select * from schema_change_append order by col_1", fetch="all") - assert len(result) == 3 - assert result[0][1] == 1 - run_dbt(["--debug", "run", "--select", "schema_change_append"]) - result = project.run_sql("select * from schema_change_append order by col_1", fetch="all") - assert result[0][2] == 0 - assert result[3][2] == 5 diff --git a/tests/integration/adapter/materialized_view/test_materialized_view.py b/tests/integration/adapter/materialized_view/test_materialized_view.py deleted file mode 100644 index 9305d064..00000000 --- a/tests/integration/adapter/materialized_view/test_materialized_view.py +++ /dev/null @@ -1,167 +0,0 @@ -""" -test materialized view creation. This is ClickHouse specific, which has a significantly different implementation -of materialized views from PostgreSQL or Oracle -""" - -import json - -import pytest -from dbt.tests.util import check_relation_types, run_dbt - -from dbt.adapters.clickhouse.query import quote_identifier - -PEOPLE_SEED_CSV = """ -id,name,age,department -1231,Dade,33,engineering -6666,Ksenia,48,engineering -8888,Kate,50,engineering -""".lstrip() - -# This model is parameterized, in a way, by the "run_type" dbt project variable -# This is to be able to switch between different model definitions within -# the same test run and allow us to test the evolution of a materialized view -MV_MODEL = """ -{{ config( - materialized='materialized_view', - engine='MergeTree()', - order_by='(id)', -) }} - -{% if var('run_type', '') == '' %} -select - id, - name, - case - when name like 'Dade' then 'crash_override' - when name like 'Kate' then 'acid burn' - else 'N/A' - end as hacker_alias -from {{ source('raw', 'people') }} -where department = 'engineering' - -{% else %} - -select - id, - name, - case - -- Dade wasn't always known as 'crash override'! - when name like 'Dade' and age = 11 then 'zero cool' - when name like 'Dade' and age != 11 then 'crash override' - when name like 'Kate' then 'acid burn' - else 'N/A' - end as hacker_alias -from {{ source('raw', 'people') }} -where department = 'engineering' - -{% endif %} -""" - - -SEED_SCHEMA_YML = """ -version: 2 - -sources: - - name: raw - schema: "{{ target.schema }}" - tables: - - name: people -""" - - -class TestBasicMV: - @pytest.fixture(scope="class") - def seeds(self): - """ - we need a base table to pull from - """ - return { - "people.csv": PEOPLE_SEED_CSV, - "schema.yml": SEED_SCHEMA_YML, - } - - @pytest.fixture(scope="class") - def models(self): - return { - "hackers.sql": MV_MODEL, - } - - def test_create(self, project): - """ - 1. create a base table via dbt seed - 2. create a model as a materialized view, selecting from the table created in (1) - 3. insert data into the base table and make sure it's there in the target table created in (2) - """ - results = run_dbt(["seed"]) - assert len(results) == 1 - columns = project.run_sql("DESCRIBE TABLE people", fetch="all") - assert columns[0][1] == "Int32" - - # create the model - results = run_dbt() - assert len(results) == 1 - - columns = project.run_sql("DESCRIBE TABLE hackers", fetch="all") - assert columns[0][1] == "Int32" - - columns = project.run_sql("DESCRIBE hackers_mv", fetch="all") - assert columns[0][1] == "Int32" - - check_relation_types( - project.adapter, - { - "hackers_mv": "view", - "hackers": "table", - }, - ) - - # insert some data and make sure it reaches the target table - project.run_sql( - f""" - insert into {quote_identifier(project.test_schema)}.people ("id", "name", "age", "department") - values (1232,'Dade',16,'engineering'), (9999,'eugene',40,'malware'); - """ - ) - - result = project.run_sql("select count(*) from hackers", fetch="all") - assert result[0][0] == 4 - - -class TestUpdateMV: - @pytest.fixture(scope="class") - def seeds(self): - """ - we need a base table to pull from - """ - return { - "people.csv": PEOPLE_SEED_CSV, - "schema.yml": SEED_SCHEMA_YML, - } - - @pytest.fixture(scope="class") - def models(self): - return { - "hackers.sql": MV_MODEL, - } - - def test_update(self, project): - # create our initial materialized view - run_dbt(["seed"]) - run_dbt() - - # re-run dbt but this time with the new MV SQL - run_vars = {"run_type": "extended_schema"} - run_dbt(["run", "--vars", json.dumps(run_vars)]) - - project.run_sql( - f""" - insert into {quote_identifier(project.test_schema)}.people ("id", "name", "age", "department") - values (1232,'Dade',11,'engineering'), (9999,'eugene',40,'malware'); - """ - ) - - # assert that we now have both of Dade's aliases in our hackers table - result = project.run_sql( - "select distinct hacker_alias from hackers where name = 'Dade'", fetch="all" - ) - assert len(result) == 2 diff --git a/tests/integration/adapter/persist_docs/test_persist_docs.py b/tests/integration/adapter/persist_docs/test_persist_docs.py index a9129a3b..710ce611 100644 --- a/tests/integration/adapter/persist_docs/test_persist_docs.py +++ b/tests/integration/adapter/persist_docs/test_persist_docs.py @@ -101,16 +101,14 @@ def project_config_update(self): } } - def test_has_comments_pg_like(self): - if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes'): - pytest.skip('Not running comment test for cloud') + def test_has_comments_pglike(self, project): run_dbt(["docs", "generate"]) with open("target/catalog.json") as fp: catalog_data = json.load(fp) assert "nodes" in catalog_data assert len(catalog_data["nodes"]) == 4 table_node = catalog_data["nodes"]["model.test.table_model"] - self._assert_has_table_comments(table_node) + view_node = self._assert_has_table_comments(table_node) view_node = catalog_data["nodes"]["model.test.view_model"] self._assert_has_view_comments(view_node) diff --git a/tests/integration/adapter/test_basic.py b/tests/integration/adapter/test_basic.py new file mode 100644 index 00000000..fc11e146 --- /dev/null +++ b/tests/integration/adapter/test_basic.py @@ -0,0 +1,130 @@ +import pytest +from dbt.tests.adapter.basic.files import model_base, schema_base_yml +from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod +from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations +from dbt.tests.adapter.basic.test_empty import BaseEmpty +from dbt.tests.adapter.basic.test_ephemeral import BaseEphemeral +from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests +from dbt.tests.adapter.basic.test_incremental import BaseIncremental +from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests +from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols +from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp +from dbt.tests.util import check_relation_types, relation_from_name, run_dbt + +# CSV content with boolean column type. +seeds_boolean_csv = """ +key,value +abc,true +def,false +hij,true +klm,false +""".lstrip() + +# CSV content with empty fields. +seeds_empty_csv = """ +key,val1,val2,str1 +abc,1,1,some_str +abc,1,0,"another string" +def,1,0, +hij,1,1,Caps +hij,1,,"second string" +klm,1,0,"test" +klm,1,,"test4" +""".lstrip() + +seeds_schema_yml = """ +version: 2 + +seeds: + - name: empty + config: + column_types: + val2: Nullable(UInt32) + str1: Nullable(String) +""" + + +class TestBaseSimpleMaterializations(BaseSimpleMaterializations): + pass + + +class TestEmpty(BaseEmpty): + pass + + +class TestIncremental(BaseIncremental): + pass + + +class TestEphemeral(BaseEphemeral): + pass + + +class TestSnapshotTimestamp(BaseSnapshotTimestamp): + pass + + +class TestSnapshotCheckCols(BaseSnapshotCheckCols): + pass + + +class TestSingularTests(BaseSingularTests): + pass + + +class TestGenericTests(BaseGenericTests): + pass + + +class TestBaseAdapterMethod(BaseAdapterMethod): + pass + + +class TestMergeTreeTableMaterialization(BaseSimpleMaterializations): + @pytest.fixture(scope="class") + def models(self): + config_materialized_table = """ + {{ config(order_by='(some_date, id, name)', engine='MergeTree()', materialized='table', + settings={'allow_nullable_key': 1}) }} + """ + base_table_sql = config_materialized_table + model_base + return { + "table_model.sql": base_table_sql, + "schema.yml": schema_base_yml, + } + + def test_base(self, project): + # seed command + results = run_dbt(["seed"]) + # seed result length + assert len(results) == 1 + + # run command + results = run_dbt() + # run result length + assert len(results) == 1 + + check_relation_types(project.adapter, {"table_model": "table"}) + + # base table rowcount + relation = relation_from_name(project.adapter, "table_model") + result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") + assert result[0] == 10 + + +class TestCSVSeed: + @pytest.fixture(scope="class") + def seeds(self): + return { + "schema.yml": seeds_schema_yml, + "boolean.csv": seeds_boolean_csv, + "empty.csv": seeds_empty_csv, + } + + def test_seed(self, project): + # seed command + results = run_dbt(["seed"]) + assert len(results) == 2 + columns = project.run_sql("DESCRIBE TABLE empty", fetch='all') + assert columns[2][1] == 'Nullable(UInt32)' + assert columns[3][1] == 'Nullable(String)' diff --git a/tests/integration/adapter/relations/test_changing_relation_type.py b/tests/integration/adapter/test_changing_relation_type.py similarity index 100% rename from tests/integration/adapter/relations/test_changing_relation_type.py rename to tests/integration/adapter/test_changing_relation_type.py diff --git a/tests/integration/adapter/column_types/test_column_types.py b/tests/integration/adapter/test_column.py similarity index 100% rename from tests/integration/adapter/column_types/test_column_types.py rename to tests/integration/adapter/test_column.py diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_comments.py b/tests/integration/adapter/test_comments.py similarity index 95% rename from tests/integration/adapter/clickhouse/test_clickhouse_comments.py rename to tests/integration/adapter/test_comments.py index 5179954a..2e310c0c 100644 --- a/tests/integration/adapter/clickhouse/test_clickhouse_comments.py +++ b/tests/integration/adapter/test_comments.py @@ -67,7 +67,7 @@ def models(self): ['table_comment', 'view_comment'], ) def test_comment(self, project, model_name): - if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes'): + if '_cloud' in os.environ.get('GITHUB_REF', ''): pytest.skip('Not running comment test for cloud') run_dbt(["run"]) run_dbt(["docs", "generate"]) diff --git a/tests/integration/adapter/concurrency/test_concurrency.py b/tests/integration/adapter/test_concurrency.py similarity index 100% rename from tests/integration/adapter/concurrency/test_concurrency.py rename to tests/integration/adapter/test_concurrency.py diff --git a/tests/integration/adapter/basic/test_docs_generate.py b/tests/integration/adapter/test_docs.py similarity index 100% rename from tests/integration/adapter/basic/test_docs_generate.py rename to tests/integration/adapter/test_docs.py diff --git a/tests/integration/adapter/test_grants.py b/tests/integration/adapter/test_grants.py new file mode 100644 index 00000000..418264aa --- /dev/null +++ b/tests/integration/adapter/test_grants.py @@ -0,0 +1,30 @@ +from dbt.tests.adapter.grants.test_incremental_grants import BaseIncrementalGrants +from dbt.tests.adapter.grants.test_invalid_grants import BaseInvalidGrants +from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants +from dbt.tests.adapter.grants.test_seed_grants import BaseSeedGrants +from dbt.tests.adapter.grants.test_snapshot_grants import BaseSnapshotGrants + + +class TestModelGrants(BaseModelGrants): + pass + + +class TestIncrementalGrants(BaseIncrementalGrants): + pass + + +class TestSeedGrants(BaseSeedGrants): + pass + + +class TestInvalidGrants(BaseInvalidGrants): + def grantee_does_not_exist_error(self): + return "511" + + # ClickHouse doesn't give a very specific error for an invalid privilege + def privilege_does_not_exist_error(self): + return "Syntax error" + + +class TestSnapshotGrants(BaseSnapshotGrants): + pass diff --git a/tests/integration/adapter/query_comment/test_query_comment.py b/tests/integration/adapter/test_query_comments.py similarity index 100% rename from tests/integration/adapter/query_comment/test_query_comment.py rename to tests/integration/adapter/test_query_comments.py diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_source_schema.py b/tests/integration/adapter/test_relations.py similarity index 100% rename from tests/integration/adapter/clickhouse/test_clickhouse_source_schema.py rename to tests/integration/adapter/test_relations.py diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_s3.py b/tests/integration/adapter/test_s3.py similarity index 74% rename from tests/integration/adapter/clickhouse/test_clickhouse_s3.py rename to tests/integration/adapter/test_s3.py index 10f1289e..8fb8727f 100644 --- a/tests/integration/adapter/clickhouse/test_clickhouse_s3.py +++ b/tests/integration/adapter/test_s3.py @@ -27,10 +27,6 @@ select * from {{ clickhouse_s3source('taxi_s3', path='/trips_4.gz') }} LIMIT 5000 """ -s3_taxis_full_source = """ -select * from {{ clickhouse_s3source('taxi_s3', path='/trips_5.gz') }} LIMIT 1000 -""" - s3_taxis_inc = """ {{ config( materialized='incremental', @@ -88,28 +84,3 @@ def test_s3_incremental(self, project): ) assert 5000 < result[0] < 10000 assert result[1] > 0 - - -class TestS3Bucket: - @pytest.fixture(scope="class") - def project_config_update(self): - return { - 'vars': { - 'taxi_s3': { - 'bucket': 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/', - 'fmt': 'TabSeparatedWithNames', - } - } - } - - @pytest.fixture(scope="class") - def models(self): - return { - "s3_taxis_source.sql": s3_taxis_full_source, - "schema.yml": schema_yaml, - } - - def test_read(self, project): - run_dbt(["run", "--select", "s3_taxis_source.sql"]) - result = project.run_sql("select count() as num_rows from s3_taxis_source", fetch="one") - assert result[0] == 1000 diff --git a/tests/integration/adapter/basic/test_singular_tests_ephemeral.py b/tests/integration/adapter/test_singular.py similarity index 56% rename from tests/integration/adapter/basic/test_singular_tests_ephemeral.py rename to tests/integration/adapter/test_singular.py index 89919591..c81bafe1 100644 --- a/tests/integration/adapter/basic/test_singular_tests_ephemeral.py +++ b/tests/integration/adapter/test_singular.py @@ -1,5 +1,10 @@ +from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests from dbt.tests.adapter.basic.test_singular_tests_ephemeral import BaseSingularTestsEphemeral +class TestSingularTests(BaseSingularTests): + pass + + class TestSingularTestsEphemeral(BaseSingularTestsEphemeral): pass diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_upper_case.py b/tests/integration/adapter/test_upper_case.py similarity index 100% rename from tests/integration/adapter/clickhouse/test_clickhouse_upper_case.py rename to tests/integration/adapter/test_upper_case.py diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 50b1af6a..5e79e256 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -32,13 +32,9 @@ def test_config(ch_test_users, ch_test_version): compose_file = f'{Path(__file__).parent}/docker-compose.yml' test_host = os.environ.get('DBT_CH_TEST_HOST', 'localhost') test_port = int(os.environ.get('DBT_CH_TEST_PORT', 8123)) - client_port = int(os.environ.get('DBT_CH_TEST_CLIENT_PORT', 0)) - test_driver = os.environ.get('DBT_CH_TEST_DRIVER', '').lower() - if test_driver == '': - test_driver = 'native' if test_port in (10900, 9000, 9440) else 'http' + test_driver = 'native' if test_port in (10900, 9000, 9440) else 'http' test_user = os.environ.get('DBT_CH_TEST_USER', 'default') test_password = os.environ.get('DBT_CH_TEST_PASSWORD', '') - test_cluster = os.environ.get('DBT_CH_TEST_CLUSTER', '') test_db_engine = os.environ.get('DBT_CH_TEST_DB_ENGINE', '') test_secure = test_port in (8443, 9440) test_cluster_mode = os.environ.get('DBT_CH_TEST_CLUSTER_MODE', '').lower() in ( @@ -52,12 +48,11 @@ def test_config(ch_test_users, ch_test_version): docker = os.environ.get('DBT_CH_TEST_USE_DOCKER', '').lower() in ('1', 'true', 'yes') if docker: - client_port = client_port or 10723 + client_port = 10723 test_port = 10900 if test_driver == 'native' else client_port try: run_cmd(['docker-compose', '-f', compose_file, 'down', '-v']) sys.stderr.write('Starting docker compose') - os.environ['PROJECT_ROOT'] = '.' up_result = run_cmd(['docker-compose', '-f', compose_file, 'up', '-d']) if up_result[0]: raise Exception(f'Failed to start docker: {up_result[2]}') @@ -65,7 +60,7 @@ def test_config(ch_test_users, ch_test_version): wait_until_responsive(timeout=30.0, pause=0.5, check=lambda: is_responsive(url)) except Exception as e: raise Exception('Failed to run docker-compose: {}', str(e)) - elif not client_port: + else: if test_driver == 'native': client_port = 8443 if test_port == 9440 else 8123 else: @@ -79,12 +74,8 @@ def test_config(ch_test_users, ch_test_version): secure=test_secure, ) for dbt_user in ch_test_users: - cmd = 'CREATE USER IF NOT EXISTS %s IDENTIFIED WITH sha256_hash BY %s' - if test_cluster != '': - cmd = f'CREATE USER IF NOT EXISTS %s ON CLUSTER "{test_cluster}" IDENTIFIED WITH sha256_hash BY %s' - test_client.command( - cmd, + 'CREATE USER IF NOT EXISTS %s IDENTIFIED WITH sha256_hash BY %s', (dbt_user, '5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8'), ) yield { @@ -93,11 +84,9 @@ def test_config(ch_test_users, ch_test_version): 'port': test_port, 'user': test_user, 'password': test_password, - 'cluster': test_cluster, 'db_engine': test_db_engine, 'secure': test_secure, 'cluster_mode': test_cluster_mode, - 'database': '', } if docker: @@ -122,7 +111,6 @@ def dbt_profile_target(test_config): 'user': test_config['user'], 'password': test_config['password'], 'port': test_config['port'], - 'cluster': test_config['cluster'], 'database_engine': test_config['db_engine'], 'cluster_mode': test_config['cluster_mode'], 'secure': test_config['secure'], diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml index e7810f0f..d3a90fa1 100644 --- a/tests/integration/docker-compose.yml +++ b/tests/integration/docker-compose.yml @@ -1,51 +1,15 @@ --- version: '3' -x-ch-common: &ch-common - volumes: - - /var/lib/clickhouse - - type: bind - source: ${PROJECT_ROOT:-.}/test_settings_${DBT_CH_TEST_SETTINGS:-latest}.xml - target: /etc/clickhouse-server/users.d/test_settings.xml - - type: bind - source: ${PROJECT_ROOT:-.}/test_config.xml - target: /etc/clickhouse-server/config.d/test_config.xml - ulimits: - nofile: - soft: 262144 - hard: 262144 - services: - ch0: + ch_server: image: clickhouse/clickhouse-server:${DBT_CH_TEST_CH_VERSION:-latest} - environment: - - SERVER_INDEX=1 - - SHARD_NUM=${SHARD_NUM:-1} - - REPLICA_NUM=${REPLICA_NUM:-1} ports: - - "8123:8123" - - "8443:8443" - - "9000:9000" - # for local docker tests - "10723:8123" - "10743:8443" - "10900:9000" - <<: *ch-common - ch1: - image: clickhouse/clickhouse-server:${DBT_CH_TEST_CH_VERSION:-latest} - environment: - - SERVER_INDEX=2 - - SHARD_NUM=${SHARD_NUM:-2} - - REPLICA_NUM=${REPLICA_NUM:-2} - <<: *ch-common - ch2: - image: clickhouse/clickhouse-server:${DBT_CH_TEST_CH_VERSION:-latest} - environment: - - SERVER_INDEX=3 - - SHARD_NUM=${SHARD_NUM:-3} - - REPLICA_NUM=${REPLICA_NUM:-3} - <<: *ch-common - -networks: - default: - name: integration-test + volumes: + - /var/lib/clickhouse + - type: bind + source: ./test_settings_${DBT_CH_TEST_SETTINGS:-latest}.xml + target: /etc/clickhouse-server/users.d/test_settings.xml diff --git a/tests/integration/test_config.xml b/tests/integration/test_config.xml deleted file mode 100644 index 9f2aec4f..00000000 --- a/tests/integration/test_config.xml +++ /dev/null @@ -1,89 +0,0 @@ - - 8123 - 9000 - 9009 - - - - - - - - - - ch0 - 9000 - - - - - ch1 - 9000 - - - - - ch2 - 9000 - - - - - - - ch0 - 9000 - - - ch1 - 9000 - - - ch2 - 9000 - - - - - - 9181 - - - - 10000 - 30000 - - - - - 1 - ch0 - 9234 - - - 2 - ch1 - 9234 - - - 3 - ch2 - 9234 - - - - - - ch0 - 9181 - - - ch1 - 9181 - - - ch2 - 9181 - - - diff --git a/tests/unit/test_util.py b/tests/unit/test_adapter.py similarity index 89% rename from tests/unit/test_util.py rename to tests/unit/test_adapter.py index d87d2e57..0faf9dbe 100644 --- a/tests/unit/test_util.py +++ b/tests/unit/test_adapter.py @@ -1,4 +1,4 @@ -from dbt.adapters.clickhouse.util import compare_versions +from dbt.adapters.clickhouse.impl import compare_versions def test_is_before_version():