feat: lint sources (#78)

# Overview Extends rules so that they can be used to against dbt [sources](https://docs.getdbt.com/docs/build/sources) in addition to models. # Usage A rule defines what resource-type it acts against in the type signature of the function it wraps or in a class-based `evaluate` method: ```python from dbt_score import Model, Source rule, Rule, RuleViolation # decorator-based # for a Model @rule def model_has_description(model: Model) -> RuleViolation | None: """A model should have a description.""" if not model.description: return RuleViolation(message="Model lacks a description.") # for a Source @rule def has_description(source: Source) -> RuleViolation | None: """A source should have a loader defined.""" if not source.loader: return RuleViolation(message="Source lacks a loader.") # class-based class ExampleSource(Rule): """Example class-based rule.""" description = "A source should have a loader defined." def evaluate(self, source: Source) -> RuleViolation | None: """Evaluate source.""" if not source.loader: return RuleViolation(message="Source lacks a loader.") ``` The `Evaluation` handler is then responsible for applying source-rules to Source objects and model-rules to Model objects. --- closes #76 --------- Co-authored-by: Jochem van Dooren <[email protected]> Co-authored-by: Jochem van Dooren <[email protected]> Co-authored-by: Matthieu Caneill <[email protected]>
PicnicSupermarket · Nov 12, 2024 · 8aa8aad · 8aa8aad
1 parent b0bb6f3
commit 8aa8aad
Show file tree

Hide file tree

Showing 44 changed files with 1,452 additions and 491 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,16 @@ and this project adheres to
 
 ## [Unreleased]
 
+- Support linting of sources.
+- **Breaking**: Renamed modules: `dbt_score.model_filter` becomes
+  `dbt_score.rule_filter`
+- **Breaking**: Renamed filter class and decorator: `@model_filter` becomes
+  `@rule_filter` and `ModelFilter` becomes `RuleFilter`.
+- **Breaking**: Config option `model_filter_names` becomes `rule_filter_names`.
+- **Breaking**: CLI flag naming fixes: `--fail_any_model_under` becomes
+  `--fail-any-item-under` and `--fail_project_under` becomes
+  `--fail-project-under`.
+
 ## [0.7.1] - 2024-11-01
 
 - Fix mkdocs.

diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@
 
 ## What is `dbt-score`?
 
-`dbt-score` is a linter for dbt model metadata.
+`dbt-score` is a linter for dbt metadata.
 
 [dbt][dbt] (Data Build Tool) is a great framework for creating, building,
 organizing, testing and documenting _data models_, i.e. data sets living in a

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -18,7 +18,7 @@ rule_namespaces = ["dbt_score.rules", "dbt_score_rules", "custom_rules"]
 disabled_rules = ["dbt_score.rules.generic.columns_have_description"]
 inject_cwd_in_python_path = true
 fail_project_under = 7.5
-fail_any_model_under = 8.0
+fail_any_item_under = 8.0
 
 [tool.dbt-score.badges]
 first.threshold = 10.0
@@ -51,8 +51,8 @@ The following options can be set in the `pyproject.toml` file:
 - `disabled_rules`: A list of rules to disable.
 - `fail_project_under` (default: `5.0`): If the project score is below this
   value the command will fail with return code 1.
-- `fail_any_model_under` (default: `5.0`): If any model scores below this value
-  the command will fail with return code 1.
+- `fail_any_item_under` (default: `5.0`): If any model or source scores below
+  this value the command will fail with return code 1.
 
 #### Badges configuration
 
@@ -70,7 +70,7 @@ All badges except `wip` can be configured with the following option:
 
 - `threshold`: The threshold for the badge. A decimal number between `0.0` and
   `10.0` that will be used to compare to the score. The threshold is the minimum
-  score required for a model to be rewarded with a certain badge.
+  score required for a model or source to be rewarded with a certain badge.
 
 The default values can be found in the
 [BadgeConfig](reference/config.md#dbt_score.config.BadgeConfig).
@@ -86,7 +86,7 @@ Every rule can be configured with the following option:
 - `severity`: The severity of the rule. Rules have a default severity and can be
   overridden. It's an integer with a minimum value of 1 and a maximum value
   of 4.
-- `model_filter_names`: Filters used by the rule. Takes a list of names that can
+- `rule_filter_names`: Filters used by the rule. Takes a list of names that can
   be found in the same namespace as the rules (see
   [Package rules](package_rules.md)).
 

diff --git a/docs/create_rules.md b/docs/create_rules.md
@@ -1,9 +1,9 @@
 # Create rules
 
-In order to lint and score models, `dbt-score` uses a set of rules that are
-applied to each model. A rule can pass or fail when it is run. Based on the
-severity of the rule, models are scored with the weighted average of the rules
-results. Note that `dbt-score` comes bundled with a
+In order to lint and score models or sources, `dbt-score` uses a set of rules
+that are applied to each item. A rule can pass or fail when it is run. Based on
+the severity of the rule, items are scored with the weighted average of the
+rules results. Note that `dbt-score` comes bundled with a
 [set of default rules](rules/generic.md).
 
 On top of the generic rules, it's possible to add your own rules. Two ways exist
@@ -21,7 +21,7 @@ The `@rule` decorator can be used to easily create a new rule:
 from dbt_score import Model, rule, RuleViolation
 
 @rule
-def has_description(model: Model) -> RuleViolation | None:
+def model_has_description(model: Model) -> RuleViolation | None:
     """A model should have a description."""
     if not model.description:
         return RuleViolation(message="Model lacks a description.")
@@ -31,6 +31,21 @@ The name of the function is the name of the rule and the docstring of the
 function is its description. Therefore, it is important to use a
 self-explanatory name for the function and document it well.
 
+The type annotation for the rule's argument dictates whether the rule should be
+applied to dbt models or sources.
+
+Here is the same example rule, applied to sources:
+
+```python
+from dbt_score import rule, RuleViolation, Source
+
+@rule
+def source_has_description(source: Source) -> RuleViolation | None:
+   """A source should have a description."""
+   if not source.description:
+      return RuleViolation(message="Source lacks a description.")
+```
+
 The severity of a rule can be set using the `severity` argument:
 
 ```python
@@ -45,15 +60,23 @@ For more advanced use cases, a rule can be created by inheriting from the `Rule`
 class:
 
 ```python
-from dbt_score import Model, Rule, RuleViolation
+from dbt_score import Model, Rule, RuleViolation, Source
 
-class HasDescription(Rule):
+class ModelHasDescription(Rule):
     description = "A model should have a description."
 
     def evaluate(self, model: Model) -> RuleViolation | None:
         """Evaluate the rule."""
         if not model.description:
             return RuleViolation(message="Model lacks a description.")
+
+class SourceHasDescription(Rule):
+   description = "A source should have a description."
+
+   def evaluate(self, source: Source) -> RuleViolation | None:
+      """Evaluate the rule."""
+      if not source.description:
+         return RuleViolation(message="Source lacks a description.")
 ```
 
 ### Rules location
@@ -91,30 +114,48 @@ def sql_has_reasonable_number_of_lines(model: Model, max_lines: int = 200) -> Ru
         )
 ```
 
-### Filtering models
+### Filtering rules
 
-Custom and standard rules can be configured to have model filters. Filters allow
-models to be ignored by one or multiple rules.
+Custom and standard rules can be configured to have filters. Filters allow
+models or sources to be ignored by one or multiple rules if the item doesn't
+satisfy the filter criteria.
 
 Filters are created using the same discovery mechanism and interface as custom
 rules, except they do not accept parameters. Similar to Python's built-in
-`filter` function, when the filter evaluation returns `True` the model should be
+`filter` function, when the filter evaluation returns `True` the item should be
 evaluated, otherwise it should be ignored.
 
 ```python
-from dbt_score import ModelFilter, model_filter
+from dbt_score import Model, RuleFilter, rule_filter
 
-@model_filter
+@rule_filter
 def only_schema_x(model: Model) -> bool:
     """Only applies a rule to schema X."""
     return model.schema.lower() == 'x'
 
-class SkipSchemaY(ModelFilter):
+class SkipSchemaY(RuleFilter):
     description = "Applies a rule to every schema but Y."
     def evaluate(self, model: Model) -> bool:
       return model.schema.lower() != 'y'
 ```
 
+Filters also rely on type-annotations to dictate whether they apply to models or
+sources:
+
+```python
+from dbt_score import RuleFilter, rule_filter, Source
+
+@rule_filter
+def only_from_source_a(source: Source) -> bool:
+   """Only applies a rule to source tables from source X."""
+   return source.source_name.lower() == 'a'
+
+class SkipSourceDatabaseB(RuleFilter):
+   description = "Applies a rule to every source except Database B."
+   def evaluate(self, source: Source) -> bool:
+      return source.database.lower() != 'b'
+```
+
 Similar to setting a rule severity, standard rules can have filters set in the
 [configuration file](configuration.md/#tooldbt-scorerulesrule_namespacerule_name),
 while custom rules accept the configuration file or a decorator parameter.
@@ -123,7 +164,7 @@ while custom rules accept the configuration file or a decorator parameter.
 from dbt_score import Model, rule, RuleViolation
 from my_project import only_schema_x
 
-@rule(model_filters={only_schema_x()})
+@rule(rule_filters={only_schema_x()})
 def models_in_x_follow_naming_standard(model: Model) -> RuleViolation | None:
     """Models in schema X must follow the naming standard."""
     if some_regex_fails(model.name):

diff --git a/docs/get_started.md b/docs/get_started.md
@@ -40,8 +40,8 @@ It's also possible to automatically run `dbt parse`, to generate the
 dbt-score lint --run-dbt-parse
 ```
 
-To lint only a selection of models, the argument `--select` can be used. It
-accepts any
+To lint only a selection of models or sources, the argument `--select` can be
+used. It accepts any
 [dbt node selection syntax](https://docs.getdbt.com/reference/node-selection/syntax):
 
 ```shell

diff --git a/docs/index.md b/docs/index.md
@@ -2,8 +2,9 @@
 
 `dbt-score` is a linter for [dbt](https://www.getdbt.com/) metadata.
 
-dbt allows data practitioners to organize their data in to _models_. Those
-models have metadata associated with them: documentation, tests, types, etc.
+dbt allows data practitioners to organize their data in to _models_ and
+_sources_. Those models and sources have metadata associated with them:
+documentation, tests, types, etc.
 
 `dbt-score` allows to lint and score this metadata, in order to enforce (or
 encourage) good practices.
@@ -12,7 +13,7 @@ encourage) good practices.
 
 ```
 > dbt-score lint
-🥇 customers (score: 10.0)
+🥇 M: customers (score: 10.0)
     OK   dbt_score.rules.generic.has_description
     OK   dbt_score.rules.generic.has_owner
     OK   dbt_score.rules.generic.sql_has_reasonable_number_of_lines
@@ -25,17 +26,17 @@ score.
 
 ## Philosophy
 
-dbt models are often used as metadata containers: either in YAML files or
-through the use of `{{ config() }}` blocks, they are associated with a lot of
+dbt models/sources are often used as metadata containers: either in YAML files
+or through the use of `{{ config() }}` blocks, they are associated with a lot of
 information. At scale, it becomes tedious to enforce good practices in large
-data teams dealing with many models.
+data teams dealing with many models/sources.
 
 To that end, `dbt-score` has 2 main features:
 
-- It runs rules on models, and displays rule violations. Those can be used in
-  interactive environments or in CI.
-- Using those run results, it scores models, as to give them a measure of their
-  maturity. This score can help gamify model metadata improvements, and be
+- It runs rules on dbt models and sources, and displays any rule violations.
+  These can be used in interactive environments or in CI.
+- Using those run results, it scores items, to ascribe them a measure of their
+  maturity. This score can help gamify metadata improvements/coverage, and be
   reflected in data catalogs.
 
 `dbt-score` aims to:

diff --git a/docs/programmatic_invocations.md b/docs/programmatic_invocations.md
@@ -61,9 +61,9 @@ When `dbt-score` terminates, it exists with one of the following exit codes:
   project being linted either doesn't raise any warning, or the warnings are
   small enough to be above the thresholds. This generally means "successful
   linting".
-- `1` in case of linting errors. This is the unhappy case: some models in the
-  project raise enough warnings to have a score below the defined thresholds.
-  This generally means "linting doesn't pass".
+- `1` in case of linting errors. This is the unhappy case: some models or
+  sources in the project raise enough warnings to have a score below the defined
+  thresholds. This generally means "linting doesn't pass".
 - `2` in case of an unexpected error. This happens for example if something is
   misconfigured (for example a faulty dbt project), or the wrong parameters are
   given to the CLI. This generally means "setup needs to be fixed".
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
 name = "dbt-score"
 dynamic = ["version"]
 
-description = "Linter for dbt model metadata."
+description = "Linter for dbt metadata."
 authors = [
     {name = "Picnic Analyst Development Platform", email = "[email protected]"}
 ]
@@ -101,6 +101,7 @@ max-args = 9
 [tool.ruff.lint.per-file-ignores]
 "tests/**/*.py" = [
     "PLR2004",  # Magic value comparisons
+    "PLR0913",  # Too many args in func def
 ]
 
 ### Coverage ###
@@ -114,3 +115,7 @@ source = [
 [tool.coverage.report]
 show_missing = true
 fail_under = 80
+exclude_also = [
+    "@overload"
+]
+
diff --git a/src/dbt_score/__init__.py b/src/dbt_score/__init__.py
@@ -1,15 +1,16 @@
 """Init dbt_score package."""
 
-from dbt_score.model_filter import ModelFilter, model_filter
-from dbt_score.models import Model
+from dbt_score.models import Model, Source
 from dbt_score.rule import Rule, RuleViolation, Severity, rule
+from dbt_score.rule_filter import RuleFilter, rule_filter
 
 __all__ = [
     "Model",
-    "ModelFilter",
+    "Source",
+    "RuleFilter",
     "Rule",
     "RuleViolation",
     "Severity",
-    "model_filter",
+    "rule_filter",
     "rule",
 ]
diff --git a/src/dbt_score/cli.py b/src/dbt_score/cli.py
@@ -81,15 +81,15 @@ def cli() -> None:
     default=False,
 )
 @click.option(
-    "--fail_project_under",
+    "--fail-project-under",
     help="Fail if the project score is under this value.",
     type=float,
     is_flag=False,
     default=None,
 )
 @click.option(
-    "--fail_any_model_under",
-    help="Fail if any model is under this value.",
+    "--fail-any-item-under",
+    help="Fail if any evaluable item is under this value.",
     type=float,
     is_flag=False,
     default=None,
@@ -104,9 +104,9 @@ def lint(
     manifest: Path,
     run_dbt_parse: bool,
     fail_project_under: float,
-    fail_any_model_under: float,
+    fail_any_item_under: float,
 ) -> None:
-    """Lint dbt models metadata."""
+    """Lint dbt metadata."""
     manifest_provided = (
         click.get_current_context().get_parameter_source("manifest")
         != ParameterSource.DEFAULT
@@ -122,8 +122,8 @@ def lint(
         config.overload({"disabled_rules": disabled_rule})
     if fail_project_under:
         config.overload({"fail_project_under": fail_project_under})
-    if fail_any_model_under:
-        config.overload({"fail_any_model_under": fail_any_model_under})
+    if fail_any_item_under:
+        config.overload({"fail_any_item_under": fail_any_item_under})
 
     try:
         if run_dbt_parse:
@@ -148,7 +148,7 @@ def lint(
         ctx.exit(2)
 
     if (
-        any(x.value < config.fail_any_model_under for x in evaluation.scores.values())
+        any(x.value < config.fail_any_item_under for x in evaluation.scores.values())
         or evaluation.project_score.value < config.fail_project_under
     ):
         ctx.exit(1)

diff --git a/src/dbt_score/config.py b/src/dbt_score/config.py
@@ -56,7 +56,7 @@ class Config:
         "disabled_rules",
         "inject_cwd_in_python_path",
         "fail_project_under",
-        "fail_any_model_under",
+        "fail_any_item_under",
     ]
     _rules_section: Final[str] = "rules"
     _badges_section: Final[str] = "badges"
@@ -70,7 +70,7 @@ def __init__(self) -> None:
         self.config_file: Path | None = None
         self.badge_config: BadgeConfig = BadgeConfig()
         self.fail_project_under: float = 5.0
-        self.fail_any_model_under: float = 5.0
+        self.fail_any_item_under: float = 5.0
 
     def set_option(self, option: str, value: Any) -> None:
         """Set an option in the config."""

diff --git a/src/dbt_score/dbt_utils.py b/src/dbt_score/dbt_utils.py
@@ -69,7 +69,7 @@ def dbt_parse() -> "dbtRunnerResult":
 @dbt_required
 def dbt_ls(select: Iterable[str] | None) -> Iterable[str]:
     """Run dbt ls."""
-    cmd = ["ls", "--resource-type", "model", "--output", "name"]
+    cmd = ["ls", "--resource-types", "model", "source", "--output", "name"]
     if select:
         cmd += ["--select", *select]