Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug] Fix scenario where dbt attempts to add existing columns to relations when using the SDK for column metadata #919

Closed
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240927-171725.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix scenario where dbt attempts to add existing columns to relations when using the SDK for column metadata
time: 2024-09-27T17:17:25.584838-04:00
custom:
Author: mikealfare
Issue: "914"
9 changes: 9 additions & 0 deletions dbt/adapters/redshift/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,15 @@ def _parse_column_results(record: Tuple[Any, ...]) -> Dict[str, Any]:
char_dtypes = [1, 12]
num_dtypes = [2, 3, 4, 5, 6, 7, 8, -5, 2003]

# the results from `get_columns` vary slightly from the pg_catalog tables for dtype names
dtype_alias = {
"bool": "boolean",
"int4": "integer",
"timestamp": "timestamp without time zone",
"varchar": "character varying",
}
dtype_name = dtype_alias.get(dtype_name, dtype_name)

if dtype_code in char_dtypes:
return {"column": column_name, "dtype": dtype_name, "char_size": column_size}
elif dtype_code in num_dtypes:
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,7 @@ def models(self):
def setup(self, project):
run_dbt(["run"])

@pytest.fixture(scope="class")
def expected_columns(self):
return []

def test_columns_in_relation(self, project, expected_columns):
def test_columns_in_relation(self, project):
my_relation = RedshiftRelation.create(
database=project.database,
schema=project.test_schema,
Expand All @@ -28,32 +24,20 @@ def test_columns_in_relation(self, project, expected_columns):
)
with project.adapter.connection_named("_test"):
actual_columns = project.adapter.get_columns_in_relation(my_relation)
expected_columns = [
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we are aliasing dtypes back to the original names, we don't need to specify the expected column dtypes based on whether the flag is enabled.

Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2),
Column(column="my_char", dtype="character varying", char_size=1),
]
assert actual_columns == expected_columns


class TestColumnsInRelationBehaviorFlagOff(ColumnsInRelation):
@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {}}

@pytest.fixture(scope="class")
def expected_columns(self):
# the SDK query returns "varchar" whereas our custom query returns "character varying"
return [
Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2),
Column(column="my_char", dtype="character varying", char_size=1),
]
return {"flags": {"restrict_direct_pg_catalog_access": False}}


class TestColumnsInRelationBehaviorFlagOn(ColumnsInRelation):
@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {"restrict_direct_pg_catalog_access": True}}

@pytest.fixture(scope="class")
def expected_columns(self):
# the SDK query returns "varchar" whereas our custom query returns "character varying"
return [
Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2),
Column(column="my_char", dtype="varchar", char_size=1),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from dbt.tests.util import run_dbt
import pytest

from tests.functional.utils import update_model


SEED = """
id,col7,col6,occurred_at
1,a,green,'2024-01-01'
2,b,green,'2024-01-01'
3,c,green,'2024-01-01'
""".strip()


SEED_UPDATES = """
id,col7,col6,occurred_at
4,b,red,'2024-02-01'
5,c,red,'2024-02-01'
6,c,blue,'2024-03-01'
""".strip()


MODEL = """
{{ config(materialized='incremental') }}
select * from {{ ref('my_seed') }}
where occurred_at::timestamptz >= '2024-01-01'::timestamptz
and occurred_at::timestamptz < '2024-02-01'::timestamptz
"""


MODEL_UPDATES = """
{{ config(materialized='incremental') }}
select * from {{ ref('my_seed') }}
where occurred_at::timestamptz >= '2024-02-01'::timestamptz
and occurred_at::timestamptz < '2024-03-01'::timestamptz
"""


class TestIncrementalUpdates:
"""
This addresses: https://github.com/dbt-labs/dbt-redshift/issues/914

We test it with the `restrict_direct_pg_catalog_access` flag both off and on since the bug
only emerges when the flag is on (the former is a control).
"""

@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {"restrict_direct_pg_catalog_access": False}}

@pytest.fixture(scope="class")
def seeds(self):
return {"my_seed.csv": SEED, "my_seed_updates.csv": SEED_UPDATES}

@pytest.fixture(scope="class")
def models(self):
return {"my_model.sql": MODEL}

def test_columns_in_relation(self, project):
# create the initial table
run_dbt(["seed"])
run_dbt(["run"])

# verify the table starts with the initial records
sql = (
f"select count(*) as row_count from {project.database}.{project.test_schema}.my_model"
)
assert project.run_sql(sql, fetch="one")[0] == 3

# move forward in time and pick up records in the source that should generate an incremental
sql = f"""
insert into {project.database}.{project.test_schema}.my_seed
select * from {project.database}.{project.test_schema}.my_seed_updates
"""
project.run_sql(sql)
update_model(project, "my_model", MODEL_UPDATES)

# apply the incremental
run_dbt(["run"])

# verify the new records made it into the table
sql = (
f"select count(*) as row_count from {project.database}.{project.test_schema}.my_model"
)
assert project.run_sql(sql, fetch="one")[0] == 5


class TestIncrementalUpdatesFlagOn(TestIncrementalUpdates):

@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {"restrict_direct_pg_catalog_access": True}}
8 changes: 8 additions & 0 deletions tests/functional/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from dbt.tests.util import get_model_file, relation_from_name, set_model_file


def update_model(project, name: str, model: str) -> str:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was copy/pasted from other adapters. We should move it to dbt-tests-adapter eventually, but that would complicate this PR. It should be done as a separate exercise.

relation = relation_from_name(project.adapter, name)
original_model = get_model_file(project, relation)
set_model_file(project, relation, model)
return original_model
Loading