From e9428c1a2c24c253e760ace0fb45f2c09994fded Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 6 Dec 2023 14:20:16 -0500 Subject: [PATCH] provide test case for dbt-bigquery#1044 --- .../adapter/sources_tests/project_files.py | 38 ++++++++++++ .../sources_tests/test_source_freshness.py | 61 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 tests/functional/adapter/sources_tests/project_files.py create mode 100644 tests/functional/adapter/sources_tests/test_source_freshness.py diff --git a/tests/functional/adapter/sources_tests/project_files.py b/tests/functional/adapter/sources_tests/project_files.py new file mode 100644 index 000000000..c4dda197a --- /dev/null +++ b/tests/functional/adapter/sources_tests/project_files.py @@ -0,0 +1,38 @@ +SCHEMA__YML = """ +version: 2 + +sources: + - name: test_source + loader: custom + freshness: + warn_after: {count: 12, period: hour} + error_after: {count: 24, period: hour} + schema: "{{ var('test_run_schema') }}" + tables: + - name: source_with_loaded_at_field + identifier: source_with_loaded_at_field + loaded_at_field: "{{ var('test_loaded_at') | as_text }}" + - name: source_without_loaded_at_field + identifier: source_without_loaded_at_field +""" + + +MODEL__WITH_LOADED_AT_FIELD__SQL = """ +select * from {{ source('test_source', 'source_with_loaded_at_field') }} +""" + + +MODEL__WITHOUT_LOADED_AT_FIELD__SQL = """ +select * from {{ source('test_source', 'source_without_loaded_at_field') }} +""" + + +SEED__SOURCE__CSV = """id,first_name,email,ip_address,updated_at +1,Larry,lking0@miitbeian.gov.cn,'69.135.206.194',2008-09-12 19:08:31 +2,Larry,lperkins1@toplist.cz,'64.210.133.162',1978-05-09 04:15:14 +3,Anna,amontgomery2@miitbeian.gov.cn,'168.104.64.114',2011-10-16 04:07:57 +4,Sandra,sgeorge3@livejournal.com,'229.235.252.98',1973-07-19 10:52:43 +5,Fred,fwoods4@google.cn,'78.229.170.124',2012-09-30 16:38:29 +6,Stephen,shanson5@livejournal.com,'182.227.157.105',1995-11-07 21:40:50 +7,William,wmartinez6@upenn.edu,'135.139.249.50',1982-09-05 03:11:59 +""".lstrip() diff --git a/tests/functional/adapter/sources_tests/test_source_freshness.py b/tests/functional/adapter/sources_tests/test_source_freshness.py new file mode 100644 index 000000000..d68a99787 --- /dev/null +++ b/tests/functional/adapter/sources_tests/test_source_freshness.py @@ -0,0 +1,61 @@ +from typing import List +import yaml + +import pytest + +from dbt.tests.util import run_dbt + +from tests.functional.adapter.sources_tests import project_files + + +class TestSourceFreshness: + @pytest.fixture(scope="class") + def seeds(self): + # these are the same files, but the schema doesn't specify the loaded at field for the second one + return { + "source_with_loaded_at_field.csv": project_files.SEED__SOURCE__CSV, + "source_without_loaded_at_field.csv": project_files.SEED__SOURCE__CSV, + } + + @pytest.fixture(scope="class") + def models(self): + return { + "schema.yml": project_files.SCHEMA__YML, + "with_loaded_at_field.sql": project_files.MODEL__WITH_LOADED_AT_FIELD__SQL, + "without_loaded_at_field.sql": project_files.MODEL__WITHOUT_LOADED_AT_FIELD__SQL, + } + + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + self._run_dbt(project, ["seed"]) + + @staticmethod + def _run_dbt(project, commands: List[str], *args, **kwargs): + vars_dict = { + "test_run_schema": project.test_schema, + "test_loaded_at": project.adapter.quote("updated_at"), + } + commands.extend(["--vars", yaml.safe_dump(vars_dict)]) + return run_dbt(commands, *args, **kwargs) + + @pytest.mark.parametrize( + "source,max_loaded_at,expect_pass", + [ + ("source_with_loaded_at_field", "2012-09-30 16:38:29", False), + # ("source_without_loaded_at_field", "2012-09-30 16:38:29", False), + ], + ) + def test_source_freshness(self, project, source, max_loaded_at, expect_pass): + """ + This test case addresses https://github.com/dbt-labs/dbt-bigquery/issues/1044 + + The first scenario above passes as expected. The second scenario is not currently testable. + `dbt source freshness` never returns because `loaded_at_field` is missing. + The expected behavior is that it should at least return, likely with an error or warning message + indicating that no `loaded_at_field` was provided for the source. Consult with Product and DX. + """ + commands = ["source", "freshness", "--select", f"source:test_source.{source}"] + results = self._run_dbt(project, commands, expect_pass=expect_pass) + assert len(results) == 1 + result = results[0] + assert result.max_loaded_at.strftime("%Y-%m-%d %H:%M:%S") == max_loaded_at