Skip to content

Commit

Permalink
POST REVIEW: add attributes to dataset.CheckAttributes model
Browse files Browse the repository at this point in the history
  • Loading branch information
sf-dcp committed Jan 14, 2025
1 parent 999d7e8 commit 727426f
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 3 deletions.
6 changes: 6 additions & 0 deletions dcpy/lifecycle/validate/pandera_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ def create_check(check: str | dict[str, CheckAttributes]) -> pa.Check:
**check_args.args,
raise_warning=check_args.warn_only,
description=check_args.description,
name=check_args.name,
title=check_args.title,
n_failure_cases=check_args.n_failure_cases,
groups=check_args.groups,
groupby=check_args.groupby,
ignore_na=check_args.ignore_na,
)
if check_args
else check_constructor()
Expand Down
16 changes: 14 additions & 2 deletions dcpy/models/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dcpy.models.base import SortedSerializedBase
from typing import Literal, Any
from typing import Literal, Any, Callable

from pydantic import field_validator

Expand All @@ -24,12 +24,24 @@ class Checks(SortedSerializedBase):

class CheckAttributes(SortedSerializedBase, extra="forbid"):
"""
Represents constraints and optional metadata for column checks.
Defines the settings and parameters for a column data check,
aligning with the `pandera.Check` object.
This class mirrors the `pandera.Check` constructor, where the `args` property
holds parameters specific to individual checks (e.g., thresholds or conditions).
Additional fields in this class configure options such as whether to raise
warnings or how to handle missing data.
"""

args: dict[str, Any]
description: str | None = None
warn_only: bool = False
name: str | None = None
title: str | None = None
n_failure_cases: int | None = None
groups: str | list[str] | None = None
groupby: str | list[str] | Callable | None = None
ignore_na: bool = True


class Column(SortedSerializedBase, extra="forbid"):
Expand Down
12 changes: 12 additions & 0 deletions dcpy/test/lifecycle/validate/resources/valid_data_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,15 @@
args:
min_value: abc # adding this for visibility. Pandera sees this as a valid check
warn_only: false

# case for default properties
- greater_than:
args:
min_value: 1
warn_only: true
name: greater than
title: My greater than check
n_failure_cases: 1
ignore_na: false
groups: col_a
groupby: ["col_a", "col_b"]
15 changes: 14 additions & 1 deletion dcpy/test/lifecycle/validate/test_pandera_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,20 @@ def get_invalid_checks():
),
(
valid_data_checks[2],
pa.Check.greater_than(min_value="abc"),
pa.Check.greater_than(min_value="abc", raise_warning=False),
),
(
valid_data_checks[3],
pa.Check.greater_than(
min_value=1,
raise_warning=True,
name="greater than",
title="My greater than check",
n_failure_cases=1,
ignore_na=False,
groups="col_a",
groupby=["col_a", "col_b"],
),
),
# TODO: add custom registered check
],
Expand Down

0 comments on commit 727426f

Please sign in to comment.