Skip to content

Commit

Permalink
POST REVIEW: Appease mypy (socrata/metadata files)
Browse files Browse the repository at this point in the history
  • Loading branch information
sf-dcp committed Jan 14, 2025
1 parent f874c8c commit 3f87188
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 32 deletions.
9 changes: 7 additions & 2 deletions dcpy/connectors/socrata/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from dcpy.utils.logging import logger

import dcpy.models.product.dataset.metadata as md
import dcpy.models.dataset as dataset
from .utils import SOCRATA_USER, SOCRATA_PASSWORD, _socrata_request

SOCRATA_REVISION_APPLY_TIMEOUT_SECS = 10 * 60 # Ten Mins
Expand Down Expand Up @@ -176,7 +177,9 @@ def __init__(self, col: md.DatasetColumn):
self.display_name = col.name
self.description = col.description
self.is_primary_key = (
bool(col.checks.is_primary_key) if col.checks else False
bool(col.checks.is_primary_key)
if isinstance(col.checks, dataset.Checks)
else False
)

class Attachment(TypedDict):
Expand Down Expand Up @@ -298,7 +301,9 @@ def calculate_pushed_col_metadata(self, our_columns: list[md.DatasetColumn]):
new_col["initial_output_column_id"] = new_col["id"]

new_col["is_primary_key"] = (
True if (our_col.checks and our_col.checks.is_primary_key) else False
bool(our_col.checks.is_primary_key)
if isinstance(our_col.checks, dataset.Checks)
else False
)

new_col["display_name"] = our_col.name
Expand Down
3 changes: 3 additions & 0 deletions dcpy/lifecycle/package/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import typer

import dcpy.models.product.dataset.metadata as dataset_md
import dcpy.models.dataset as dataset
from dcpy.utils.logging import logger


Expand Down Expand Up @@ -218,6 +219,8 @@ def validate_df(
)
)

if isinstance(col.checks, list): # TODO: delete after refactoring
raise NotImplementedError("Must be old dataset.Checks format to run checks")
# Check Nulls
if col.checks and col.checks.non_nullable:
if not df_only_col_nulls.empty:
Expand Down
62 changes: 32 additions & 30 deletions dcpy/test/lifecycle/package/test_column_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dcpy.test.lifecycle.package.conftest import TEST_METADATA_YAML_PATH

import dcpy.models.product.dataset.metadata as md
import dcpy.models.dataset as dataset
from dcpy.lifecycle.package import validate

rd = random.Random()
Expand Down Expand Up @@ -82,12 +83,13 @@ def _fake_row(columns: list[md.DatasetColumn]):
)

for c in columns:
if c.checks and not c.checks.non_nullable and random.choice([True, False]):
# adding some extra chaos
if random.choice([True, False]):
del row[c.name]
else:
row[c.name] = ""
if isinstance(c.checks, dataset.Checks):
if c.checks.non_nullable and random.choice([True, False]):
# adding some extra chaos
if random.choice([True, False]):
del row[c.name]
else:
row[c.name] = ""
return row


Expand Down Expand Up @@ -118,21 +120,21 @@ def test_invalid_standardized_values():
fake_ds.loc[2:, "Owner type"] = INVALID_OWNERSHIP_VALUES[2]

errors = validate.validate_df(fake_ds, metadata.columns)
assert len(errors) == 1, (
"One error should have been found for invalid standardized values"
)
assert (
len(errors) == 1
), "One error should have been found for invalid standardized values"

# Assert that the error message should mention the invalid values with their counts
result_msg = errors[0].message
assert f"'{INVALID_OWNERSHIP_VALUES[0]}': 1," in result_msg, (
"The error message should include the invalid value and count"
)
assert f"'{INVALID_OWNERSHIP_VALUES[1]}': 1," in result_msg, (
"The error message should include the invalid value and count"
)
assert f"'{INVALID_OWNERSHIP_VALUES[2]}': {ROW_COUNT - 2}" in result_msg, (
"The error message should include the invalid value and count"
)
assert (
f"'{INVALID_OWNERSHIP_VALUES[0]}': 1," in result_msg
), "The error message should include the invalid value and count"
assert (
f"'{INVALID_OWNERSHIP_VALUES[1]}': 1," in result_msg
), "The error message should include the invalid value and count"
assert (
f"'{INVALID_OWNERSHIP_VALUES[2]}': {ROW_COUNT - 2}" in result_msg
), "The error message should include the invalid value and count"


def test_standardized_values_with_nulls():
Expand All @@ -142,9 +144,9 @@ def test_standardized_values_with_nulls():
fake_ds.loc[0, "Nullable Owner type"] = ""

errors = validate.validate_df(fake_ds, metadata.columns)
assert not errors, (
"No errors should have been found for invalid standardized values"
)
assert (
not errors
), "No errors should have been found for invalid standardized values"


def test_non_nullable_bbls():
Expand All @@ -156,9 +158,9 @@ def test_non_nullable_bbls():
errors = validate.validate_df(fake_ds, metadata.columns)
assert len(errors) == 1, "One error should have been found"

assert errors[0].error_type == validate.ErrorType.NULLS_FOUND, (
"The error type should be NULLS_FOUND"
)
assert (
errors[0].error_type == validate.ErrorType.NULLS_FOUND
), "The error type should be NULLS_FOUND"


# TODO: revisit after determining how to specify geometry types in md
Expand Down Expand Up @@ -189,10 +191,10 @@ def test_additional_cols_in_source():
errors = validate.validate_df(fake_ds, metadata.columns)
assert len(errors) == 1, "One error should have been found"

assert errors[0].error_type == validate.ErrorType.COLUMM_MISMATCH, (
"The correct error type should be returned"
)
assert (
errors[0].error_type == validate.ErrorType.COLUMM_MISMATCH
), "The correct error type should be returned"

assert FAKE_COL_NAME in errors[0].message, (
"The fake column name should be mentioned in the error message"
)
assert (
FAKE_COL_NAME in errors[0].message
), "The fake column name should be mentioned in the error message"

0 comments on commit 3f87188

Please sign in to comment.