From 707cdf83e32f696c4a25581b645ec2405c08cfce Mon Sep 17 00:00:00 2001 From: Ofek Weiss Date: Sun, 12 Jan 2025 18:41:52 +0200 Subject: [PATCH] fixes to common test configs --- macros/utils/common_test_configs.sql | 200 +++++++++++++++++++-------- 1 file changed, 141 insertions(+), 59 deletions(-) diff --git a/macros/utils/common_test_configs.sql b/macros/utils/common_test_configs.sql index 22eac6cae..d64e2a4a1 100644 --- a/macros/utils/common_test_configs.sql +++ b/macros/utils/common_test_configs.sql @@ -81,12 +81,12 @@ "failed_row_count_calc": "count(*)" }, "expect_column_values_to_be_increasing": { - "description": "Expect column values to be increasing. If `strictly: True`, then this expectation is only satisfied if each consecutive value is strictly increasing \u2013 equal values are treated as failures.", + "description": "Expect column values to be increasing.\nIf strictly: True, then this expectation is only satisfied if each consecutive value is strictly increasing \u2013 equal values are treated as failures.", "quality_dimension": "accuracy", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_be_decreasing": { - "description": "Expect column values to be decreasing. If `strictly=True`, then this expectation is only satisfied if each consecutive value is strictly decreasing \u2013 equal values are treated as failures.", + "description": "Expect column values to be decreasing.\nIf strictly: True, then this expectation is only satisfied if each consecutive value is strictly decreasing \u2013 equal values are treated as failures.", "quality_dimension": "accuracy", "failed_row_count_calc": "count(*)" }, @@ -101,42 +101,42 @@ "failed_row_count_calc": "count(*)" }, "expect_column_values_to_match_regex": { - "description": "Expect column entries to be strings that match a given regular expression. Valid matches can be found anywhere in the string, for example \"[at]+\" will identify the following strings as expected: \"cat\", \"hat\", \"aa\", \"a\", and \"t\", and the following strings as unexpected: \"fish\", \"dog\". Optionally, `is_raw` indicates the `regex` pattern is a \"raw\" string and should be escaped. The default is `False`.", + "description": "Expect column entries to be strings that match a given regular expression.\nValid matches can be found anywhere in the string.\nFor example, \"[at]+\" will identify the following strings as expected: \"cat\", \"hat\", \"aa\", \"a\", and \"t\", and the following strings as unexpected: \"fish\", \"dog\".", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_not_match_regex": { - "description": "Expect column entries to be strings that do NOT match a given regular expression. The regex must not match any portion of the provided string. For example, \"[at]+\" would identify the following strings as expected: \"fish\u201d, \"dog\u201d, and the following as unexpected: \"cat\u201d, \"hat\u201d. Optionally, `is_raw` indicates the `regex` pattern is a \"raw\" string and should be escaped. The default is `False`.", + "description": "Expect column entries to be strings that do NOT match a given regular expression.\nThe regex must not match any portion of the provided string.\nFor example, \"[at]+\" would identify the following strings as expected: \"fish\u201d, \"dog\u201d, and the following as unexpected: \"cat\u201d, \"hat\u201d.", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_match_regex_list": { - "description": "Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions. Matches can be anywhere in the string. Optionally, `is_raw` indicates the `regex` patterns are \"raw\" strings and should be escaped. The default is `False`.", + "description": "Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions. Matches can be anywhere in the string.", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_not_match_regex_list": { - "description": "Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string. Optionally, `is_raw` indicates the `regex` patterns are \"raw\" strings and should be escaped. The default is `False`.", + "description": "Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string.", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_match_like_pattern": { - "description": "Expect column entries to be strings that match a given SQL like pattern.", + "description": "Expect column entries to be strings that match a given SQL LIKE pattern.", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_not_match_like_pattern": { - "description": "Expect column entries to be strings that do not match a given SQL like pattern.", + "description": "Expect column entries to be strings that do not match a given SQL LIKE pattern.", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_match_like_pattern_list": { - "description": "Expect the column entries to be strings that match any of a list of SQL like patterns.", + "description": "Expect column entries to be strings that match any of a list of SQL LIKE patterns.", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_not_match_like_pattern_list": { - "description": "Expect the column entries to be strings that do not match any of a list of SQL like patterns.", + "description": "Expect column entries to be strings that do not match any of a list of SQL LIKE patterns.", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, @@ -151,125 +151,159 @@ "failed_row_count_calc": "count(*)" }, "expect_column_pair_values_to_be_in_set": { - "description": "Expect paired values from columns A and B to belong to a set of valid pairs. Note: value pairs are expressed as lists within lists", + "description": "Expect paired values from columns A and B to belong to a set of valid pairs.\nNote: value pairs are expressed as lists within lists", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_select_column_values_to_be_unique_within_record": { - "description": "Expect the values for each record to be unique across the columns listed. Note that records can be duplicated.", + "description": "Expect the values for each record to be unique across the columns listed.\nNote that records can be duplicated.", "quality_dimension": "validity", "failed_row_count_calc": "count(*)" }, "expect_column_values_to_be_unique": { - "description": "Expect each column value to be unique." + "description": "Expect each column value to be unique.", + "quality_dimension": "uniqueness" }, "expect_compound_columns_to_be_unique": { - "description": "Expect that the columns are unique together, e.g. a multi-column primary key." + "description": "Expect that the columns are unique together, e.g. a multi-column primary key.", + "quality_dimension": "uniqueness" }, "expect_column_to_exist": { "description": "Expect the specified column to exist." }, "expect_row_values_to_have_recent_data": { - "description": "Expect the model to have rows that are at least as recent as the defined interval prior to the current timestamp. Optionally gives the possibility to apply filters on the results." + "description": "Expect the model to have rows that are at least as recent as the defined interval prior to the current timestamp.\nOptionally gives the possibility to apply filters on the results.", + "quality_dimension": "freshness" }, "expect_grouped_row_values_to_have_recent_data": { - "description": "Expect the model to have grouped rows that are at least as recent as the defined interval prior to the current timestamp. Use this to test whether there is recent data for each grouped row defined by `group_by` (which is a list of columns) and a `timestamp_column`. Optionally gives the possibility to apply filters on the results." + "description": "Expect the model to have grouped rows that are at least as recent as the defined interval prior to the current timestamp.\nUse this to test whether there is recent data for each grouped row defined by group_by (which is a list of columns) and a timestamp_column.\nOptionally gives the possibility to apply filters on the results.", + "quality_dimension": "freshness" }, "expect_table_column_count_to_be_between": { - "description": "Expect the number of columns in a model to be between two values." + "description": "Expect the number of columns in a model to be between two values.", + "quality_dimension": "consistency" }, "expect_table_column_count_to_equal_other_table": { - "description": "Expect the number of columns in a model to match another model." + "description": "Expect the number of columns in a model to match another model.", + "quality_dimension": "consistency" }, "expect_table_columns_to_not_contain_set": { - "description": "Expect the columns in a model not to contain a given list." + "description": "Expect the columns in a model not to contain a given list.", + "quality_dimension": "validity" }, "expect_table_columns_to_contain_set": { - "description": "Expect the columns in a model to contain a given list." + "description": "Expect the columns in a model to contain a given list.", + "quality_dimension": "consistency" }, "expect_table_column_count_to_equal": { - "description": "Expect the number of columns in a model to be equal to `expected_number_of_columns`." + "description": "Expect the number of columns in a model to be equal to `expected_number_of_columns`.", + "quality_dimension": "consistency" }, "expect_table_columns_to_match_ordered_list": { - "description": "Expect the columns to exactly match a specified list." + "description": "Expect the columns to exactly match a specified list.", + "quality_dimension": "validity" }, "expect_table_columns_to_match_set": { - "description": "Expect the columns in a model to match a given list." + "description": "Expect the columns in a model to match a given set.", + "quality_dimension": "validity" }, "expect_table_row_count_to_be_between": { - "description": "Expect the number of rows in a model to be between two values." + "description": "Expect the number of rows in a model to be between two values.", + "quality_dimension": "accuracy" }, "expect_table_row_count_to_equal_other_table": { - "description": "Expect the number of rows in a model match another model." + "description": "Expect the number of rows in a model match another model.", + "quality_dimension": "consistency" }, "expect_table_row_count_to_equal_other_table_times_factor": { - "description": "Expect the number of rows in a model to match another model times a preconfigured factor." + "description": "Expect the number of rows in a model to match another model times a preconfigured factor.", + "quality_dimension": "consistency" }, "expect_table_row_count_to_equal": { - "description": "Expect the number of rows in a model to be equal to expected_number_of_rows." + "description": "Expect the number of rows in a model to be equal to `expected_number_of_rows`." }, "expect_column_values_to_be_of_type": { - "description": "Expect a column to be of a specified data type." + "description": "Expect a column to be of a specified data type.", + "quality_dimension": "validity" }, "expect_column_values_to_be_in_type_list": { "description": "Expect a column to be one of a specified type list." }, "expect_column_values_to_have_consistent_casing": { - "description": "Expect a column to have consistent casing. By setting `display_inconsistent_columns` to true, the number of inconsistent values in the column will be displayed in the terminal whereas the inconsistent values themselves will be returned if the SQL compiled test is run." + "description": "Expect a column to have consistent casing.\nBy setting display_inconsistent_columns to true, the number of inconsistent values in the column will be displayed in the terminal\nwhereas the inconsistent values themselves will be returned if the SQL compiled test is run.", + "quality_dimension": "validity" }, "expect_column_distinct_count_to_equal": { - "description": "Expect the number of distinct column values to be equal to a given value." + "description": "Expect the number of distinct column values to be equal to a given value.", + "quality_dimension": "accuracy" }, "expect_column_distinct_count_to_be_greater_than": { - "description": "Expect the number of distinct column values to be greater than a given value." + "description": "Expect the number of distinct column values to be greater than a given value.", + "quality_dimension": "accuracy" }, "expect_column_distinct_count_to_be_less_than": { - "description": "Expect the number of distinct column values to be less than a given value." + "description": "Expect the number of distinct column values to be less than a given value.", + "quality_dimension": "accuracy" }, "expect_column_distinct_values_to_be_in_set": { - "description": "Expect the set of distinct column values to be contained by a given set." + "description": "Expect the set of distinct column values to be contained by a given set.", + "quality_dimension": "validity" }, "expect_column_distinct_values_to_contain_set": { - "description": "Expect the set of distinct column values to contain a given set. In contrast to `expect_column_values_to_be_in_set` this ensures not that all column values are members of the given set but that values from the set must be present in the column." + "description": "Expect the set of distinct column values to contain a given set.\nIn contrast to expect_column_values_to_be_in_set this ensures not that all column values are members of the given set\nbut that values from the set must be present in the column.", + "quality_dimension": "validity" }, "expect_column_distinct_values_to_equal_set": { - "description": "Expect the set of distinct column values to equal a given set. In contrast to `expect_column_distinct_values_to_contain_set` this ensures not only that a certain set of values are present in the column but that these and only these values are present." + "description": "Expect the set of distinct column values to equal a given set.\nIn contrast to expect_column_distinct_values_to_contain_set this ensures not only that a certain set of values are present in the column\nbut that these and only these values are present.", + "quality_dimension": "validity" }, "expect_column_distinct_count_to_equal_other_table": { - "description": "Expect the number of distinct column values to be equal to number of distinct values in another model." + "description": "Expect the number of distinct column values to be equal to number of distinct values in another model.", + "quality_dimension": "accuracy" }, "expect_column_mean_to_be_between": { - "description": "Expect the column mean to be between a min_value value and a max_value value (inclusive)." + "description": "Expect the column mean to be between a min_value value and a max_value value (inclusive).", + "quality_dimension": "validity" }, "expect_column_median_to_be_between": { - "description": "Expect the column median to be between a min_value value and a max_value value (inclusive)." + "description": "Expect the column median to be between a min_value value and a max_value value (inclusive).", + "quality_dimension": "validity" }, "expect_column_quantile_values_to_be_between": { - "description": "Expect specific provided column quantiles to be between provided min_value and max_value values." + "description": "Expect specific provided column quantiles to be between provided min_value and max_value values.", + "quality_dimension": "validity" }, "expect_column_stdev_to_be_between": { - "description": "Expect the column standard deviation to be between a min_value value and a max_value value. Uses sample standard deviation (normalized by N-1)." + "description": "Expect the column standard deviation to be between a min_value value and a max_value value. Uses sample standard deviation (normalized by N-1).", + "quality_dimension": "validity" }, "expect_column_unique_value_count_to_be_between": { - "description": "Expect the number of unique values to be between a min_value value and a max_value value." + "description": "Expect the number of unique values to be between a min_value value and a max_value value.", + "quality_dimension": "validity" }, "expect_column_proportion_of_unique_values_to_be_between": { - "description": "Expect the proportion of unique values to be between a min_value value and a max_value value. For example, in a column containing [1, 2, 2, 3, 3, 3, 4, 4, 4, 4], there are 4 unique values and 10 total values for a proportion of 0.4." + "description": "Expect the proportion of unique values to be between a min_value value and a max_value value.\nFor example, in a column containing [1, 2, 2, 3, 3, 3, 4, 4, 4, 4], there are 4 unique values and 10 total values for a proportion of 0.4.", + "quality_dimension": "uniqueness" }, "expect_column_most_common_value_to_be_in_set": { - "description": "Expect the most common value to be within the designated value set." + "description": "Expect the most common value to be within the designated value set", + "quality_dimension": "accuracy" }, "expect_column_max_to_be_between": { - "description": "Expect the column max to be between a min and max value." + "description": "Expect the column max to be between a min and max value", + "quality_dimension": "validity" }, "expect_column_min_to_be_between": { - "description": "Expect the column min to be between a min and max value." + "description": "Expect the column min to be between a min and max value", + "quality_dimension": "validity" }, "expect_column_sum_to_be_between": { - "description": "Expect the column to sum to be between a min and max value." + "description": "Expect the column to sum to be between a min and max value", + "quality_dimension": "validity" }, "expect_multicolumn_sum_to_equal": { - "description": "Expects that sum of all rows for a set of columns is equal to a specific value" + "description": "Expects that sum of all rows for a set of columns is equal to a specific value", + "quality_dimension": "consistency" }, "expect_column_values_to_be_within_n_moving_stdevs": { "description": "A simple anomaly test based on the assumption that differences between periods in a given time series follow a log-normal distribution. Thus, we would expect the logged differences (vs N periods ago) in metric values to be within Z sigma away from a moving average. By applying a list of columns in the `group_by` parameter, you can also test for deviations within a group." @@ -279,40 +313,88 @@ }, "expect_row_values_to_have_data_for_every_n_datepart": { "description": "Expects model to have values for every grouped `date_part`." + }, + "expect_column_values_to_be_of_type_list": { + "description": "Expect a column to be one of a specified type list.", + "quality_dimension": "validity" + }, + "expect_table_aggregation_to_equal_other_table": { + "description": "Except an (optionally grouped) expression to match the same (or optionally other) expression in a different table.", + "quality_dimension": "consistency" } }, "dbt_utils": { "equal_rowcount": { "quality_dimension": "consistency", - "failed_row_count_calc": "sum(diff_count)" + "failed_row_count_calc": "sum(diff_count)", + "description": "Asserts that two relations have the same number of rows." }, "fewer_rows_than": { "quality_dimension": "consistency", - "failed_row_count_calc": "sum(row_count_delta)" + "failed_row_count_calc": "sum(row_count_delta)", + "description": "Asserts that the respective model has fewer rows than the model being compared." }, "expression_is_true": { "quality_dimension": "accuracy", - "failed_row_count_calc": "count(*)" + "failed_row_count_calc": "count(*)", + "description": "Asserts that a valid SQL expression is true for all records." }, "not_empty_string": { "quality_dimension": "completeness", - "failed_row_count_calc": "count(*)" + "failed_row_count_calc": "count(*)", + "description": "Asserts that a column does not have any values equal to ''." }, "cardinality_equality": { "quality_dimension": "consistency", - "failed_row_count_calc": "sum(num_rows)" + "failed_row_count_calc": "sum(num_rows)", + "description": "Asserts that values in a given column have exactly the same cardinality as values from a different column in a different model." }, "sequential_values": { "quality_dimension": "accuracy", - "failed_row_count_calc": "count(*)" + "failed_row_count_calc": "count(*)", + "description": "Confirms that a column contains sequential values." }, "accepted_range": { "quality_dimension": "validity", - "failed_row_count_calc": "count(*)" + "failed_row_count_calc": "count(*)", + "description": "Asserts that a column's values fall inside an expected range." }, "unique_combination_of_columns": { "quality_dimension": "uniqueness", - "failed_row_count_calc": "count(*)" + "failed_row_count_calc": "count(*)", + "description": "Asserts that the combination of columns is unique." + }, + "at_least_one": { + "description": "Asserts that a column has at least one value.", + "quality_dimension": "completeness" + }, + "equality": { + "description": "Asserts the equality of two relations.\nOptionally specify a subset of columns to compare or exclude, and a precision to compare numeric columns on.", + "quality_dimension": "consistency" + }, + "mutually_exclusive_ranges": { + "description": "Asserts that for a given lower_bound_column and upper_bound_column, the ranges between the lower and upper bounds do not overlap with the ranges of another row.", + "quality_dimension": "accuracy" + }, + "not_accepted_values": { + "description": "Asserts that there are no rows that match the given values.", + "quality_dimension": "validity" + }, + "not_constant": { + "description": "Asserts that a column does not have the same value in all rows.", + "quality_dimension": "validity" + }, + "not_null_proportion": { + "description": "Asserts that the proportion of non-null values present in a column is between a specified range [at_least, at_most].", + "quality_dimension": "completeness" + }, + "recency": { + "description": "Asserts that a timestamp column in the reference model contains data that is at least as recent as the defined date interval.", + "quality_dimension": "freshness" + }, + "relationships_where": { + "description": "Asserts the referential integrity between two relations with an added predicate to filter out some rows from the test.", + "quality_dimension": "consistency" } }, "elementary": { @@ -340,17 +422,17 @@ "description": "Monitors the frequency of values in the configured dimensions over time." }, "all_columns_anomalies": { - "description": "Column-level anomaly monitors (null_count, null_percent, zero_count, string_length, variance, etc.) on all the columns of the table. The test checks the data type of each column and only executes monitors that are relevant to it." + "description": "Executes column level monitors and anomaly detection on all of the columns in the table." }, "column_anomalies": { - "description": "Column-level anomaly monitors (null_count, null_percent, zero_count, string_length, variance, etc.) on the column according to its data type." + "description": "Executes column level monitors and anomaly detection on the column" }, "exposure_schema_validity": { "description": "Column level exposure validation according to the meta.columns property in exposures.yml" }, "collect_metrics": { - "description": "Collects metrics for the specified column or table. The test will always pass.", - }, + "description": "Collects metrics for the specified column or table. The test will always pass." + } } } %} {% do return(common_tests_configs_mapping) %}