From b6fd2808f5f6d3f0471548471299afb7a721ccdf Mon Sep 17 00:00:00 2001 From: Victoria Perez Mola Date: Mon, 3 Jun 2024 17:52:12 +0200 Subject: [PATCH 1/3] add is_freshness_enabled --- macros/unpack/get_source_values.sql | 2 ++ models/staging/graph/stg_sources.sql | 1 + 2 files changed, 3 insertions(+) diff --git a/macros/unpack/get_source_values.sql b/macros/unpack/get_source_values.sql index b8d3abad..7c026dd3 100644 --- a/macros/unpack/get_source_values.sql +++ b/macros/unpack/get_source_values.sql @@ -24,6 +24,8 @@ "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)", "cast(" ~ node.config.enabled ~ " as boolean)", wrap_string_with_quotes(node.loaded_at_field | replace("'", "_")), + "cast(" ~ (dbt_project_evaluator.is_not_empty_string(node.freshness.warn_after.count) + or dbt_project_evaluator.is_not_empty_string(node.freshness.error_after.count)) | trim ~ " as boolean)", wrap_string_with_quotes(node.database), wrap_string_with_quotes(node.schema), wrap_string_with_quotes(node.package_name), diff --git a/models/staging/graph/stg_sources.sql b/models/staging/graph/stg_sources.sql index ed018971..a790534b 100644 --- a/models/staging/graph/stg_sources.sql +++ b/models/staging/graph/stg_sources.sql @@ -26,6 +26,7 @@ select cast(True as boolean) as is_described, cast(True as boolean) as is_enabled, cast(null as {{ dbt.type_string() }}) as loaded_at_field, + cast(True as boolean) as is_freshness_enabled, cast(null as {{ dbt.type_string() }}) as database, cast(null as {{ dbt.type_string() }}) as schema, cast(null as {{ dbt.type_string() }}) as package_name, From e9e2812e6993ea4d104ba01507a39fd48875a9d3 Mon Sep 17 00:00:00 2001 From: Victoria Perez Mola Date: Mon, 3 Jun 2024 18:37:01 +0200 Subject: [PATCH 2/3] Add test for source freshness --- docs/rules/testing.md | 12 +++++++++++ models/marts/core/int_all_graph_resources.sql | 1 + .../tests/fct_sources_without_freshness.sql | 21 +++++++++++++++++++ models/marts/tests/testing.yml | 7 ++++++- 4 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 models/marts/tests/fct_sources_without_freshness.sql diff --git a/docs/rules/testing.md b/docs/rules/testing.md index e1c1b654..fea8d884 100644 --- a/docs/rules/testing.md +++ b/docs/rules/testing.md @@ -27,6 +27,18 @@ You can optionally extend this test to apply to more node types (`source`,`snaps Snapshots should always have a multi-field primary key in order to function, while sources and seeds may not. Depending on your expectations for duplicates and null values, different kinds of primary key tests may be appropriate. Consider your use case carefully. +--- +## Missing Source Freshness + +`fct_sources_without_freshness` ([source](https://github.com/dbt-labs/dbt-project-evaluator/tree/main/models/marts/tests/fct_sources_without_freshness.sql)) lists every source that does not have a source freshness threshold defined. Any source that does not have one or both of warn_after and error_after will be flagged by this model. + +**Reason to Flag** + +Source freshness is useful for understanding if your data pipelines are in a healthy state and is a critical component of defining SLAs for your warehouse. Enabling freshness for sources also facilitates [referencing the source freshness results in the selectors](https://docs.getdbt.com/reference/node-selection/methods#the-source_status-method) for a more efficient execution. + +**How to Remediate** + +Apply a [source freshness block](https://docs.getdbt.com/docs/build/sources#declaring-source-freshness) to the source definition. This can be implemented at either the source name or table name level. --- ## Test Coverage diff --git a/models/marts/core/int_all_graph_resources.sql b/models/marts/core/int_all_graph_resources.sql index 9ee98fba..ed4603d8 100644 --- a/models/marts/core/int_all_graph_resources.sql +++ b/models/marts/core/int_all_graph_resources.sql @@ -109,6 +109,7 @@ joined as ( unioned_with_calc.source_name, -- NULL for non-source resources unioned_with_calc.is_source_described, unioned_with_calc.loaded_at_field, + unioned_with_calc.is_freshness_enabled, unioned_with_calc.loader, unioned_with_calc.identifier, unioned_with_calc.hard_coded_references, -- NULL for non-model resources diff --git a/models/marts/tests/fct_sources_without_freshness.sql b/models/marts/tests/fct_sources_without_freshness.sql new file mode 100644 index 00000000..464db3d3 --- /dev/null +++ b/models/marts/tests/fct_sources_without_freshness.sql @@ -0,0 +1,21 @@ +with + +all_resources as ( + select * from {{ ref('int_all_graph_resources') }} + where not is_excluded + +), + +final as ( + + select distinct + resource_name + + from all_resources + where not is_freshness_enabled and resource_type = 'source' + +) + +select * from final + +{{ filter_exceptions() }} \ No newline at end of file diff --git a/models/marts/tests/testing.yml b/models/marts/tests/testing.yml index 0448e805..30eab8e3 100644 --- a/models/marts/tests/testing.yml +++ b/models/marts/tests/testing.yml @@ -20,4 +20,9 @@ models: - name: fct_missing_primary_key_tests description: this model has one record for every model without unique and not null tests configured on a single column tests: - - is_empty \ No newline at end of file + - is_empty + + - name: fct_sources_without_freshness + description: This table shows each source that does not have a source freshness defined, either as a warn or an error + tests: + - is_empty \ No newline at end of file From 1a89805d07595bdaaf4a12222ef9c4f4da459659 Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:36:56 +0200 Subject: [PATCH 3/3] Add integration test for source freshness rule --- integration_tests/models/staging/source_1/source.yml | 4 ++++ .../seeds/tests/test_fct_sources_without_freshness.csv | 3 +++ integration_tests/seeds/tests/tests_seeds.yml | 6 ++++++ 3 files changed, 13 insertions(+) create mode 100644 integration_tests/seeds/tests/test_fct_sources_without_freshness.csv diff --git a/integration_tests/models/staging/source_1/source.yml b/integration_tests/models/staging/source_1/source.yml index 4e55fa28..35f5eb02 100644 --- a/integration_tests/models/staging/source_1/source.yml +++ b/integration_tests/models/staging/source_1/source.yml @@ -4,6 +4,8 @@ sources: - name: source_1 description: this is source 1. schema: real_schema + freshness: # default freshness + warn_after: {count: 12, period: hour} # database: real_database tables: - name: table_1 @@ -14,6 +16,8 @@ sources: - name: table_2 - name: table_4 - name: table_5 + freshness: # default freshness + warn_after: null - name: raw_table_5 identifier: table_5 diff --git a/integration_tests/seeds/tests/test_fct_sources_without_freshness.csv b/integration_tests/seeds/tests/test_fct_sources_without_freshness.csv new file mode 100644 index 00000000..b8700e06 --- /dev/null +++ b/integration_tests/seeds/tests/test_fct_sources_without_freshness.csv @@ -0,0 +1,3 @@ +resource_name +source_2.table_3 +source_1.table_5 diff --git a/integration_tests/seeds/tests/tests_seeds.yml b/integration_tests/seeds/tests/tests_seeds.yml index 91aeabff..7d5b0933 100644 --- a/integration_tests/seeds/tests/tests_seeds.yml +++ b/integration_tests/seeds/tests/tests_seeds.yml @@ -32,3 +32,9 @@ seeds: - intermediate_test_coverage_pct - marts_test_coverage_pct - other_test_coverage_pct + + - name: test_fct_sources_without_freshness + tests: + - dbt_utils.equality: + name: equality_fct_sources_without_freshness + compare_model: ref('fct_sources_without_freshness')