diff --git a/docs/rules/testing.md b/docs/rules/testing.md index 3c27e894..7b784f06 100644 --- a/docs/rules/testing.md +++ b/docs/rules/testing.md @@ -28,6 +28,18 @@ You can optionally extend this test to apply to more node types (`source`,`snaps Snapshots should always have a multi-field primary key in order to function, while sources and seeds may not. Depending on your expectations for duplicates and null values, different kinds of primary key tests may be appropriate. Consider your use case carefully. +--- +## Missing Source Freshness + +`fct_sources_without_freshness` ([source](https://github.com/dbt-labs/dbt-project-evaluator/tree/main/models/marts/tests/fct_sources_without_freshness.sql)) lists every source that does not have a source freshness threshold defined. Any source that does not have one or both of warn_after and error_after will be flagged by this model. + +**Reason to Flag** + +Source freshness is useful for understanding if your data pipelines are in a healthy state and is a critical component of defining SLAs for your warehouse. Enabling freshness for sources also facilitates [referencing the source freshness results in the selectors](https://docs.getdbt.com/reference/node-selection/methods#the-source_status-method) for a more efficient execution. + +**How to Remediate** + +Apply a [source freshness block](https://docs.getdbt.com/docs/build/sources#declaring-source-freshness) to the source definition. This can be implemented at either the source name or table name level. --- ## Test Coverage diff --git a/integration_tests/models/staging/source_1/source.yml b/integration_tests/models/staging/source_1/source.yml index 4e55fa28..35f5eb02 100644 --- a/integration_tests/models/staging/source_1/source.yml +++ b/integration_tests/models/staging/source_1/source.yml @@ -4,6 +4,8 @@ sources: - name: source_1 description: this is source 1. schema: real_schema + freshness: # default freshness + warn_after: {count: 12, period: hour} # database: real_database tables: - name: table_1 @@ -14,6 +16,8 @@ sources: - name: table_2 - name: table_4 - name: table_5 + freshness: # default freshness + warn_after: null - name: raw_table_5 identifier: table_5 diff --git a/integration_tests/seeds/tests/test_fct_sources_without_freshness.csv b/integration_tests/seeds/tests/test_fct_sources_without_freshness.csv new file mode 100644 index 00000000..b8700e06 --- /dev/null +++ b/integration_tests/seeds/tests/test_fct_sources_without_freshness.csv @@ -0,0 +1,3 @@ +resource_name +source_2.table_3 +source_1.table_5 diff --git a/integration_tests/seeds/tests/tests_seeds.yml b/integration_tests/seeds/tests/tests_seeds.yml index 91aeabff..7d5b0933 100644 --- a/integration_tests/seeds/tests/tests_seeds.yml +++ b/integration_tests/seeds/tests/tests_seeds.yml @@ -32,3 +32,9 @@ seeds: - intermediate_test_coverage_pct - marts_test_coverage_pct - other_test_coverage_pct + + - name: test_fct_sources_without_freshness + tests: + - dbt_utils.equality: + name: equality_fct_sources_without_freshness + compare_model: ref('fct_sources_without_freshness') diff --git a/macros/unpack/get_source_values.sql b/macros/unpack/get_source_values.sql index b8d3abad..7c026dd3 100644 --- a/macros/unpack/get_source_values.sql +++ b/macros/unpack/get_source_values.sql @@ -24,6 +24,8 @@ "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)", "cast(" ~ node.config.enabled ~ " as boolean)", wrap_string_with_quotes(node.loaded_at_field | replace("'", "_")), + "cast(" ~ (dbt_project_evaluator.is_not_empty_string(node.freshness.warn_after.count) + or dbt_project_evaluator.is_not_empty_string(node.freshness.error_after.count)) | trim ~ " as boolean)", wrap_string_with_quotes(node.database), wrap_string_with_quotes(node.schema), wrap_string_with_quotes(node.package_name), diff --git a/models/marts/core/int_all_graph_resources.sql b/models/marts/core/int_all_graph_resources.sql index f72dcb48..1f080107 100644 --- a/models/marts/core/int_all_graph_resources.sql +++ b/models/marts/core/int_all_graph_resources.sql @@ -111,6 +111,7 @@ joined as ( unioned_with_calc.source_name, -- NULL for non-source resources unioned_with_calc.is_source_described, unioned_with_calc.loaded_at_field, + unioned_with_calc.is_freshness_enabled, unioned_with_calc.loader, unioned_with_calc.identifier, unioned_with_calc.hard_coded_references, -- NULL for non-model resources diff --git a/models/marts/tests/fct_sources_without_freshness.sql b/models/marts/tests/fct_sources_without_freshness.sql new file mode 100644 index 00000000..464db3d3 --- /dev/null +++ b/models/marts/tests/fct_sources_without_freshness.sql @@ -0,0 +1,21 @@ +with + +all_resources as ( + select * from {{ ref('int_all_graph_resources') }} + where not is_excluded + +), + +final as ( + + select distinct + resource_name + + from all_resources + where not is_freshness_enabled and resource_type = 'source' + +) + +select * from final + +{{ filter_exceptions() }} \ No newline at end of file diff --git a/models/marts/tests/testing.yml b/models/marts/tests/testing.yml index 0448e805..30eab8e3 100644 --- a/models/marts/tests/testing.yml +++ b/models/marts/tests/testing.yml @@ -20,4 +20,9 @@ models: - name: fct_missing_primary_key_tests description: this model has one record for every model without unique and not null tests configured on a single column tests: - - is_empty \ No newline at end of file + - is_empty + + - name: fct_sources_without_freshness + description: This table shows each source that does not have a source freshness defined, either as a warn or an error + tests: + - is_empty \ No newline at end of file diff --git a/models/staging/graph/stg_sources.sql b/models/staging/graph/stg_sources.sql index 4c6caf4d..328a3c39 100644 --- a/models/staging/graph/stg_sources.sql +++ b/models/staging/graph/stg_sources.sql @@ -26,6 +26,7 @@ select cast(True as boolean) as is_described, cast(True as boolean) as is_enabled, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as loaded_at_field, + cast(True as boolean) as is_freshness_enabled, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as database, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as schema, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as package_name,