dbt-labs · graciegoheen · Feb 1, 2024 · Oct 7, 2023 · Oct 24, 2023 · Oct 24, 2023
diff --git a/dbt_project.yml b/dbt_project.yml
@@ -34,6 +34,9 @@ models:
         int_all_dag_relationships:
           # required for BigQuery, Redshift, and Databricks for performance/memory reasons
           +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
+        int_all_columns:
+          # required for BigQuery, Redshift, and Databricks for performance/memory reasons
+          +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
       dag:
         +materialized: table
     staging:

@@ -0,0 +1,16 @@
+# Querying columns with SQL
+
+The model `int_all_columns` ([source](https://github.com/dbt-labs/dbt-project-evaluator/tree/main/models/marts/core/int_all_columns.sql)), created with the package, lists all the columns from all the dbt nodes (models, sources, tests, snapshots)
+
+You can use this model to help with questions such as:
+
+- Are there columns with the same name in different nodes?
+- Do any columns in the YAML configuration lack descriptions?
+- Do any columns share the same name but have different descriptions?
+- Are there columns with names that match a specific pattern (regex)?
+- Have any prohibited names been used for columns?
+
+
+## Defining additional tests that match your exact requirements
+
+You can create a custom test against  `{{ ref(int_all_columns) }}` to test for your specific check! When running the package you'd need to make sure to also include children of the packag's models by using the `package:dbt_project_evalutator+` selector.
@@ -9,7 +9,7 @@ models:
         tests:
           - unique
           - not_null 
-          
+
   - name: int_all_graph_resources
     description: "This table shows one record for each enabled resource in the graph and information about that resource."
     columns:
@@ -34,4 +34,12 @@ models:
       - name: unique_id
         tests:
           - unique
-          - not_null
+          - not_null
+
+  - name: int_all_columns
+    description: "This table shows one record for each column for all nodes and sources in the graph."
+    tests:
+      - dbt_utils.unique_combination_of_columns:
+          combination_of_columns:
+            - node_unique_id
+            - name
@@ -49,6 +49,14 @@ models:
           - unique
           - not_null
 
+  - name: stg_columns
+    description: "Staging model from the graph variable, one record per column resource."
+    tests:
+      - dbt_utils.unique_combination_of_columns:
+          combination_of_columns:
+            - node_unique_id
+            - name
+
   - name: stg_sources
     description: "Staging model from the graph variable, one record per source resource."
     columns:

diff --git a/integration_tests/seeds/core/core_seeds.yml b/integration_tests/seeds/core/core_seeds.yml
@@ -0,0 +1,24 @@
+version: 2
+
+seeds:
+  - name: test_int_all_columns
+    config:
+      column_types:
+        quote: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
+    tests:
+      - dbt_utils.equality:
+          name: equality_int_all_columns
+          compare_model: ref('int_all_columns')
+          compare_columns:
+            - node_unique_id
+            - node_name
+            - node_resource_type
+            - node_file_path
+            - node_is_enabled
+            - node_schema
+            - node_package_name
+            - node_alias
+            - name
+            - description
+            - data_type
+            - quote
diff --git a/integration_tests/seeds/core/test_int_all_columns.csv b/integration_tests/seeds/core/test_int_all_columns.csv
@@ -0,0 +1,35 @@
+node_unique_id,node_name,node_resource_type,node_file_path,node_is_enabled,node_database,node_schema,node_package_name,node_alias,name,description,data_type,quote
+model.exclude_package.excluded_model,excluded_model,model,models/staging/excluded_model.sql,true,database,integration_tests,exclude_package,excluded_model,id,,integer,
+model.dbt_project_evaluator.stg_nodes,stg_nodes,model,models/staging/graph/stg_nodes.sql,true,database,integration_tests,dbt_project_evaluator,stg_nodes,unique_id,,,
+model.dbt_project_evaluator.stg_metrics,stg_metrics,model,models/staging/graph/stg_metrics.sql,true,database,integration_tests,dbt_project_evaluator,stg_metrics,unique_id,,,
+model.dbt_project_evaluator.stg_sources,stg_sources,model,models/staging/graph/stg_sources.sql,true,database,integration_tests,dbt_project_evaluator,stg_sources,unique_id,,,
+model.dbt_project_evaluator.stg_exposures,stg_exposures,model,models/staging/graph/stg_exposures.sql,true,database,integration_tests,dbt_project_evaluator,stg_exposures,unique_id,,,
+model.dbt_project_evaluator.fct_test_coverage,fct_test_coverage,model,models/marts/tests/fct_test_coverage.sql,true,database,integration_tests,dbt_project_evaluator,fct_test_coverage,test_to_model_ratio,the total number of tests in the project divided by the total number of models in the project,,
+model.dbt_project_evaluator.fct_test_coverage,fct_test_coverage,model,models/marts/tests/fct_test_coverage.sql,true,database,integration_tests,dbt_project_evaluator,fct_test_coverage,test_coverage_pct,the number of models in the project with at least one test configured divided by the total number of models in the project,,
+model.dbt_project_evaluator.int_all_graph_resources,int_all_graph_resources,model,models/marts/core/int_all_graph_resources.sql,true,database,integration_tests,dbt_project_evaluator,int_all_graph_resources,resource_id,,,
+model.dbt_project_evaluator.int_all_graph_resources,int_all_graph_resources,model,models/marts/core/int_all_graph_resources.sql,true,database,integration_tests,dbt_project_evaluator,int_all_graph_resources,on_schema_change,"this contains the on_schema_change setting for incremental models. This column was sometimes an empty string, so should be tested to detect regressions",,
+model.dbt_project_evaluator.int_all_graph_resources,int_all_graph_resources,model,models/marts/core/int_all_graph_resources.sql,true,database,integration_tests,dbt_project_evaluator,int_all_graph_resources,model_type,,,
+model.dbt_project_evaluator.int_direct_relationships,int_direct_relationships,model,models/marts/core/int_direct_relationships.sql,true,database,integration_tests,dbt_project_evaluator,int_direct_relationships,unique_id,,,
+model.dbt_project_evaluator.int_all_dag_relationships,int_all_dag_relationships,model,models/marts/core/int_all_dag_relationships.sql,true,database,integration_tests,dbt_project_evaluator,int_all_dag_relationships,path,unique identifier representing a distinct path from each resource to another resource,,
+model.dbt_project_evaluator.stg_node_relationships,stg_node_relationships,model,models/staging/graph/stg_node_relationships.sql,true,database,integration_tests,dbt_project_evaluator,stg_node_relationships,unique_id,,,
+model.dbt_project_evaluator.stg_metric_relationships,stg_metric_relationships,model,models/staging/graph/stg_metric_relationships.sql,true,database,integration_tests,dbt_project_evaluator,stg_metric_relationships,unique_id,,,
+model.dbt_project_evaluator.stg_exposure_relationships,stg_exposure_relationships,model,models/staging/graph/stg_exposure_relationships.sql,true,database,integration_tests,dbt_project_evaluator,stg_exposure_relationships,unique_id,,,
+model.dbt_project_evaluator.fct_documentation_coverage,fct_documentation_coverage,model,models/marts/documentation/fct_documentation_coverage.sql,true,database,integration_tests,dbt_project_evaluator,fct_documentation_coverage,documentation_coverage_pct,the number of models in the project with a description divided by the total number of models in the project,,
+model.dbt_project_evaluator.int_model_test_summary,int_model_test_summary,model,models/marts/tests/intermediate/int_model_test_summary.sql,true,database,integration_tests,dbt_project_evaluator,int_model_test_summary,resource_name,,,
+model.dbt_project_evaluator.stg_naming_convention_folders,stg_naming_convention_folders,model,models/staging/variables/stg_naming_convention_folders.sql,true,database,integration_tests,dbt_project_evaluator,stg_naming_convention_folders,model_type,,,
+model.dbt_project_evaluator.stg_naming_convention_folders,stg_naming_convention_folders,model,models/staging/variables/stg_naming_convention_folders.sql,true,database,integration_tests,dbt_project_evaluator,stg_naming_convention_folders,folder_name,,,
+model.dbt_project_evaluator.fct_chained_views_dependencies,fct_chained_views_dependencies,model,models/marts/performance/fct_chained_views_dependencies.sql,true,database,integration_tests,dbt_project_evaluator,fct_chained_views_dependencies,distance,,,
+model.dbt_project_evaluator.stg_naming_convention_prefixes,stg_naming_convention_prefixes,model,models/staging/variables/stg_naming_convention_prefixes.sql,true,database,integration_tests,dbt_project_evaluator,stg_naming_convention_prefixes,model_type,,,
+model.dbt_project_evaluator.stg_naming_convention_prefixes,stg_naming_convention_prefixes,model,models/staging/variables/stg_naming_convention_prefixes.sql,true,database,integration_tests,dbt_project_evaluator,stg_naming_convention_prefixes,unique_id,surrogate key of model_type and prefix value,,
+model.dbt_project_evaluator_integration_tests.report_1,report_1,model,models/reports/report_1.sql,true,database,integration_tests,dbt_project_evaluator_integration_tests,report_1,col,col me maybe,boolean,
+model.dbt_project_evaluator_integration_tests.report_2,report_2,model,models/reports/report_2.sql,true,database,integration_tests,dbt_project_evaluator_integration_tests,report_2,id,,integer,
+model.dbt_project_evaluator_integration_tests.int_model_4,int_model_4,model,models/marts/int_model_4.sql,true,database,integration_tests,dbt_project_evaluator_integration_tests,int_model_4,id,,,
+model.dbt_project_evaluator_integration_tests.stg_model_4,stg_model_4,model,models/staging/stg_model_4.sql,true,database,integration_tests,dbt_project_evaluator_integration_tests,stg_model_4,id,,,
+model.dbt_project_evaluator_integration_tests.stg_model_4,stg_model_4,model,models/staging/stg_model_4.sql,true,database,integration_tests,dbt_project_evaluator_integration_tests,stg_model_4,"concat(coalesce('fake_column', ' '),'for_testing')",,,
+model.dbt_project_evaluator_integration_tests.metricflow_time_spine,metricflow_time_spine,model,models/utils/metricflow_time_spine.sql,true,database,integration_tests,dbt_project_evaluator_integration_tests,metricflow_time_spine,date_day,,,
+model.dbt_project_evaluator_integration_tests.stg_model_1,stg_model_1,model,models/staging/source_1/stg_model_1.sql,true,database,integration_tests,dbt_project_evaluator_integration_tests,stg_model_1,id,hocus pocus,,
+model.dbt_project_evaluator_integration_tests.stg_model_2,stg_model_2,model,models/staging/source_1/stg_model_2.sql,true,database,integration_tests,dbt_project_evaluator_integration_tests,stg_model_2,id,,,
+seed.dbt_project_evaluator_integration_tests.dbt_project_evaluator_exceptions,dbt_project_evaluator_exceptions,seed,seeds/dbt_project_evaluator_exceptions.csv,true,database,integration_tests,dbt_project_evaluator_integration_tests,dbt_project_evaluator_exceptions,id_to_exclude,Values (or like pattern) to exclude for column_name.,,
+seed.dbt_project_evaluator_integration_tests.dbt_project_evaluator_exceptions,dbt_project_evaluator_exceptions,seed,seeds/dbt_project_evaluator_exceptions.csv,true,database,integration_tests,dbt_project_evaluator_integration_tests,dbt_project_evaluator_exceptions,comment,Field to document why a given exception is legitimate.,,
+seed.dbt_project_evaluator_integration_tests.dbt_project_evaluator_exceptions,dbt_project_evaluator_exceptions,seed,seeds/dbt_project_evaluator_exceptions.csv,true,database,integration_tests,dbt_project_evaluator_integration_tests,dbt_project_evaluator_exceptions,fct_name,Name of the fact table to define exceptions.,,
+seed.dbt_project_evaluator_integration_tests.dbt_project_evaluator_exceptions,dbt_project_evaluator_exceptions,seed,seeds/dbt_project_evaluator_exceptions.csv,true,database,integration_tests,dbt_project_evaluator_integration_tests,dbt_project_evaluator_exceptions,column_name,Column name from fct_name to define exceptions.,,
@@ -1,5 +1,5 @@
-{% macro insert_resources_from_graph(relation, resource_type='nodes', relationships=False, batch_size=var('insert_batch_size') | int) %}
-  {%- set values = get_resource_values(resource_type, relationships) -%}
+{% macro insert_resources_from_graph(relation, resource_type='nodes', relationships=False, columns=False, batch_size=var('insert_batch_size') | int) %}
+  {%- set values = get_resource_values(resource_type, relationships, columns) -%}
   {%- set values_length = values | length -%}
   {%- set loop_count = (values_length / batch_size) | round(0, 'ceil') | int -%}
 

@@ -0,0 +1,39 @@
+{%- macro get_column_values(node_type) -%}
+    {{ return(adapter.dispatch('get_column_values', 'dbt_project_evaluator')(node_type)) }}
+{%- endmacro -%}
+
+{%- macro default__get_column_values(node_type) -%}
+
+    {%- if execute -%}
+        {%- if node_type == 'nodes' %}
+            {% set nodes_list = graph.nodes.values() %}   
+        {%- elif node_type == 'sources' -%}
+            {% set nodes_list = graph.sources.values() %}
+        {%- else -%}
+            {{ exceptions.raise_compiler_error("node_type needs to be either nodes or sources, got " ~ node_type) }}
+        {% endif -%}
+
+        {%- set values = [] -%}
+
+        {%- for node in nodes_list -%}
+            {%- for column in node.columns.values() -%}
+
+                {%- set values_line  = 
+                    [
+                        wrap_string_with_quotes(node.unique_id),
+                        wrap_string_with_quotes(dbt.escape_single_quotes(column.name)),
+                        wrap_string_with_quotes(dbt.escape_single_quotes(column.description)),
+                        'null' if not column.data_type else wrap_string_with_quotes(dbt.escape_single_quotes(column.data_type)),
-                        'null' if not column.data_type else wrap_string_with_quotes(dbt.escape_single_quotes(column.data_type)),
+ wrap_string_with_quotes(dbt.escape_single_quotes(column.data_type)),
-                        'null' if not column.data_type else wrap_string_with_quotes(dbt.escape_single_quotes(column.data_type)),
+ wrap_string_with_quotes(dbt.escape_single_quotes(column.data_type)),
+                        'null' if not column.quote else wrap_string_with_quotes(dbt.escape_single_quotes(column.quote))
+                    ]
+                %}
+
+                {%- do values.append(values_line) -%}
+
+            {%- endfor -%}
+        {%- endfor -%}
+    {{ return(values) }}
+
+    {%- endif -%}
+
+{%- endmacro -%}
@@ -1,6 +1,8 @@
-{% macro get_resource_values(resource=None, relationships=None) %}
+{% macro get_resource_values(resource=None, relationships=None, columns=None) %}
   {% if relationships %}
     {{ return(adapter.dispatch('get_relationship_values', 'dbt_project_evaluator')(node_type=resource)) }}
+  {% elif columns %}
+    {{ return(adapter.dispatch('get_column_values', 'dbt_project_evaluator')(node_type=resource)) }}
   {% elif resource == 'exposures' %}
     {{ return(adapter.dispatch('get_exposure_values', 'dbt_project_evaluator')()) }}
   {% elif resource == 'sources' %}

@@ -81,6 +81,7 @@ nav:
     - Configuring exceptions to the rules: customization/exceptions.md
     - Excluding packages and models/sources based on path: customization/excluding-packages-and-paths.md
     - Display issues in the logs: customization/issues-in-log.md
+    - Querying columns: customization/querying-columns.md
   - Run in CI Check: ci-check.md
   - Querying the DAG: querying-the-dag.md
   - Contributing: contributing.md
diff --git a/models/marts/core/int_all_columns.sql b/models/marts/core/int_all_columns.sql
@@ -0,0 +1,59 @@
+-- one row for each column in a node or source
+
+with
+
+stg_columns as (
+
+    select
+        node_unique_id,
+        name,
+        description,
+        data_type,
+        quote
+
+    from {{ ref('stg_columns') }}
+
+),
+
+stg_nodes as (
+
+    select
+        unique_id,
+        name,
+        resource_type,
+        file_path,
+        is_enabled,
+        database,
+        schema,
+        package_name,
+        alias
+
+    from {{ ref('stg_nodes') }}
+
+),
+
+final as (
+
+    select
+        stg_nodes.unique_id as node_unique_id,
+        stg_nodes.name as node_name,
+        stg_nodes.resource_type as node_resource_type,
+        stg_nodes.file_path as node_file_path,
+        stg_nodes.is_enabled as node_is_enabled,
+        stg_nodes.database as node_database,
+        stg_nodes.schema as node_schema,
+        stg_nodes.package_name as node_package_name,
+        stg_nodes.alias as node_alias,
+        stg_columns.name as name,
+        stg_columns.description as description,
+        stg_columns.data_type as data_type,
+        stg_columns.quote as quote
+
+    from stg_columns
+
+    inner join stg_nodes
+        on stg_nodes.unique_id = stg_columns.node_unique_id
+
+)
+
+select * from final
@@ -0,0 +1,24 @@
+{{
+    config(
+        materialized='table',
+        post_hook="{{ insert_resources_from_graph(this, resource_type='nodes', columns=True) }}"
+    )
+}}
+
+{% if execute %}
+    {{ check_model_is_table(model) }}
+{% endif %}
+/* Bigquery won't let us `where` without `from` so we use this workaround */
+with dummy_cte as (
+    select 1 as foo
+) 
+
+select 
+    cast(null as {{ dbt.type_string() }}) as node_unique_id,
+    cast(null as {{ dbt.type_string()}}) as name,
+    cast(null as {{ dbt.type_string()}}) as description,
+    cast(null as {{ dbt.type_string()}}) as data_type,
+    cast(null as {{ dbt.type_string()}}) as quote
+
+from dummy_cte
+where false
@@ -0,0 +1,24 @@
+{{
+    config(
+        materialized='table',
+        post_hook="{{ insert_resources_from_graph(this, resource_type='sources', columns=True) }}"
+    )
+}}
+
+{% if execute %}
+    {{ check_model_is_table(model) }}
+{% endif %}
+/* Bigquery won't let us `where` without `from` so we use this workaround */
+with dummy_cte as (
+    select 1 as foo
+) 
+
+select 
+    cast(null as {{ dbt.type_string() }}) as node_unique_id,
+    cast(null as {{ dbt.type_string()}}) as name,
+    cast(null as {{ dbt.type_string()}}) as description,
+    cast(null as {{ dbt.type_string()}}) as data_type,
+    cast(null as {{ dbt.type_string()}}) as quote
+
+from dummy_cte
+where false
@@ -0,0 +1,11 @@
+with
+
+final as (
+
+    {{ dbt_utils.union_relations([
+        ref('base_node_columns'),
+        ref('base_source_columns')
+    ])}}
+)
+
+select * from final