POST REVIEW: Appease mypy (socrata/metadata files)

NYCPlanning · Jan 14, 2025 · 3f87188 · 3f87188
1 parent f874c8c
commit 3f87188
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 32 deletions.
diff --git a/dcpy/connectors/socrata/publish.py b/dcpy/connectors/socrata/publish.py
@@ -23,6 +23,7 @@
 from dcpy.utils.logging import logger
 
 import dcpy.models.product.dataset.metadata as md
+import dcpy.models.dataset as dataset
 from .utils import SOCRATA_USER, SOCRATA_PASSWORD, _socrata_request
 
 SOCRATA_REVISION_APPLY_TIMEOUT_SECS = 10 * 60  # Ten Mins
@@ -176,7 +177,9 @@ def __init__(self, col: md.DatasetColumn):
                 self.display_name = col.name
                 self.description = col.description
                 self.is_primary_key = (
-                    bool(col.checks.is_primary_key) if col.checks else False
+                    bool(col.checks.is_primary_key)
+                    if isinstance(col.checks, dataset.Checks)
+                    else False
                 )
 
         class Attachment(TypedDict):
@@ -298,7 +301,9 @@ def calculate_pushed_col_metadata(self, our_columns: list[md.DatasetColumn]):
             new_col["initial_output_column_id"] = new_col["id"]
 
             new_col["is_primary_key"] = (
-                True if (our_col.checks and our_col.checks.is_primary_key) else False
+                bool(our_col.checks.is_primary_key)
+                if isinstance(our_col.checks, dataset.Checks)
+                else False
             )
 
             new_col["display_name"] = our_col.name

diff --git a/dcpy/lifecycle/package/validate.py b/dcpy/lifecycle/package/validate.py
@@ -10,6 +10,7 @@
 import typer
 
 import dcpy.models.product.dataset.metadata as dataset_md
+import dcpy.models.dataset as dataset
 from dcpy.utils.logging import logger
 
 
@@ -218,6 +219,8 @@ def validate_df(
                     )
                 )
 
+        if isinstance(col.checks, list):  # TODO: delete after refactoring
+            raise NotImplementedError("Must be old dataset.Checks format to run checks")
         # Check Nulls
         if col.checks and col.checks.non_nullable:
             if not df_only_col_nulls.empty:

diff --git a/dcpy/test/lifecycle/package/test_column_validation.py b/dcpy/test/lifecycle/package/test_column_validation.py
@@ -8,6 +8,7 @@
 from dcpy.test.lifecycle.package.conftest import TEST_METADATA_YAML_PATH
 
 import dcpy.models.product.dataset.metadata as md
+import dcpy.models.dataset as dataset
 from dcpy.lifecycle.package import validate
 
 rd = random.Random()
@@ -82,12 +83,13 @@ def _fake_row(columns: list[md.DatasetColumn]):
         )
 
     for c in columns:
-        if c.checks and not c.checks.non_nullable and random.choice([True, False]):
-            # adding some extra chaos
-            if random.choice([True, False]):
-                del row[c.name]
-            else:
-                row[c.name] = ""
+        if isinstance(c.checks, dataset.Checks):
+            if c.checks.non_nullable and random.choice([True, False]):
+                # adding some extra chaos
+                if random.choice([True, False]):
+                    del row[c.name]
+                else:
+                    row[c.name] = ""
     return row
 
 
@@ -118,21 +120,21 @@ def test_invalid_standardized_values():
     fake_ds.loc[2:, "Owner type"] = INVALID_OWNERSHIP_VALUES[2]
 
     errors = validate.validate_df(fake_ds, metadata.columns)
-    assert len(errors) == 1, (
-        "One error should have been found for invalid standardized values"
-    )
+    assert (
+        len(errors) == 1
+    ), "One error should have been found for invalid standardized values"
 
     # Assert that the error message should mention the invalid values with their counts
     result_msg = errors[0].message
-    assert f"'{INVALID_OWNERSHIP_VALUES[0]}': 1," in result_msg, (
-        "The error message should include the invalid value and count"
-    )
-    assert f"'{INVALID_OWNERSHIP_VALUES[1]}': 1," in result_msg, (
-        "The error message should include the invalid value and count"
-    )
-    assert f"'{INVALID_OWNERSHIP_VALUES[2]}': {ROW_COUNT - 2}" in result_msg, (
-        "The error message should include the invalid value and count"
-    )
+    assert (
+        f"'{INVALID_OWNERSHIP_VALUES[0]}': 1," in result_msg
+    ), "The error message should include the invalid value and count"
+    assert (
+        f"'{INVALID_OWNERSHIP_VALUES[1]}': 1," in result_msg
+    ), "The error message should include the invalid value and count"
+    assert (
+        f"'{INVALID_OWNERSHIP_VALUES[2]}': {ROW_COUNT - 2}" in result_msg
+    ), "The error message should include the invalid value and count"
 
 
 def test_standardized_values_with_nulls():
@@ -142,9 +144,9 @@ def test_standardized_values_with_nulls():
     fake_ds.loc[0, "Nullable Owner type"] = ""
 
     errors = validate.validate_df(fake_ds, metadata.columns)
-    assert not errors, (
-        "No errors should have been found for invalid standardized values"
-    )
+    assert (
+        not errors
+    ), "No errors should have been found for invalid standardized values"
 
 
 def test_non_nullable_bbls():
@@ -156,9 +158,9 @@ def test_non_nullable_bbls():
     errors = validate.validate_df(fake_ds, metadata.columns)
     assert len(errors) == 1, "One error should have been found"
 
-    assert errors[0].error_type == validate.ErrorType.NULLS_FOUND, (
-        "The error type should be NULLS_FOUND"
-    )
+    assert (
+        errors[0].error_type == validate.ErrorType.NULLS_FOUND
+    ), "The error type should be NULLS_FOUND"
 
 
 # TODO: revisit after determining how to specify geometry types in md
@@ -189,10 +191,10 @@ def test_additional_cols_in_source():
     errors = validate.validate_df(fake_ds, metadata.columns)
     assert len(errors) == 1, "One error should have been found"
 
-    assert errors[0].error_type == validate.ErrorType.COLUMM_MISMATCH, (
-        "The correct error type should be returned"
-    )
+    assert (
+        errors[0].error_type == validate.ErrorType.COLUMM_MISMATCH
+    ), "The correct error type should be returned"
 
-    assert FAKE_COL_NAME in errors[0].message, (
-        "The fake column name should be mentioned in the error message"
-    )
+    assert (
+        FAKE_COL_NAME in errors[0].message
+    ), "The fake column name should be mentioned in the error message"