Skip to content

Commit

Permalink
Ruff linting fix (#365)
Browse files Browse the repository at this point in the history
* ruff fixes for typing

* bump version: v0.16.0
  • Loading branch information
fdosani authored Jan 6, 2025
1 parent e62247a commit 23e3ab1
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 58 deletions.
2 changes: 1 addition & 1 deletion datacompy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Then extended to carry that functionality over to Spark Dataframes.
"""

__version__ = "0.15.0"
__version__ = "0.16.0"

import platform
from warnings import warn
Expand Down
4 changes: 2 additions & 2 deletions datacompy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"""

from abc import ABC, abstractmethod
from typing import Any, Optional
from typing import Any

from ordered_set import OrderedSet

Expand Down Expand Up @@ -154,7 +154,7 @@ def report(
self,
sample_count: int = 10,
column_count: int = 10,
html_file: Optional[str] = None,
html_file: str | None = None,
) -> str:
"""Return a string representation of a report."""
pass
Expand Down
10 changes: 5 additions & 5 deletions datacompy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"""

import os
from typing import Any, Dict, List, Optional, Union, cast
from typing import Any, Dict, List, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -84,7 +84,7 @@ def __init__(
self,
df1: pd.DataFrame,
df2: pd.DataFrame,
join_columns: Optional[Union[List[str], str]] = None,
join_columns: List[str] | str | None = None,
on_index: bool = False,
abs_tol: float = 0,
rel_tol: float = 0,
Expand All @@ -100,7 +100,7 @@ def __init__(
elif on_index:
self.on_index = True
self.join_columns = []
elif isinstance(join_columns, (str, int, float)):
elif isinstance(join_columns, str | int | float):
self.join_columns = [
str(join_columns).lower()
if self.cast_column_names_lower
Expand Down Expand Up @@ -564,7 +564,7 @@ def report(
self,
sample_count: int = 10,
column_count: int = 10,
html_file: Optional[str] = None,
html_file: str | None = None,
) -> str:
"""Return a string representation of a report.
Expand Down Expand Up @@ -728,7 +728,7 @@ def df_to_str(pdf: pd.DataFrame) -> str:
return report


def render(filename: str, *fields: Union[int, float, str]) -> str:
def render(filename: str, *fields: int | float | str) -> str:
"""Render out an individual template.
This basically just reads in a
Expand Down
24 changes: 12 additions & 12 deletions datacompy/fugue.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import pickle
from collections import defaultdict
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
from typing import Any, Callable, Dict, Iterable, List, Tuple, cast

import pandas as pd
from ordered_set import OrderedSet
Expand Down Expand Up @@ -105,15 +105,15 @@ def all_columns_match(df1: "AnyDataFrame", df2: "AnyDataFrame") -> bool:
def is_match(
df1: "AnyDataFrame",
df2: "AnyDataFrame",
join_columns: Union[str, List[str]],
join_columns: str | List[str],
abs_tol: float = 0,
rel_tol: float = 0,
df1_name: str = "df1",
df2_name: str = "df2",
ignore_spaces: bool = False,
ignore_case: bool = False,
cast_column_names_lower: bool = True,
parallelism: Optional[int] = None,
parallelism: int | None = None,
strict_schema: bool = False,
) -> bool:
"""Check whether two dataframes match.
Expand Down Expand Up @@ -204,15 +204,15 @@ def is_match(
def all_rows_overlap(
df1: "AnyDataFrame",
df2: "AnyDataFrame",
join_columns: Union[str, List[str]],
join_columns: str | List[str],
abs_tol: float = 0,
rel_tol: float = 0,
df1_name: str = "df1",
df2_name: str = "df2",
ignore_spaces: bool = False,
ignore_case: bool = False,
cast_column_names_lower: bool = True,
parallelism: Optional[int] = None,
parallelism: int | None = None,
strict_schema: bool = False,
) -> bool:
"""Check if the rows are all present in both dataframes.
Expand Down Expand Up @@ -300,15 +300,15 @@ def all_rows_overlap(
def count_matching_rows(
df1: "AnyDataFrame",
df2: "AnyDataFrame",
join_columns: Union[str, List[str]],
join_columns: str | List[str],
abs_tol: float = 0,
rel_tol: float = 0,
df1_name: str = "df1",
df2_name: str = "df2",
ignore_spaces: bool = False,
ignore_case: bool = False,
cast_column_names_lower: bool = True,
parallelism: Optional[int] = None,
parallelism: int | None = None,
strict_schema: bool = False,
) -> int:
"""Count the number of rows match (on overlapping fields).
Expand Down Expand Up @@ -395,7 +395,7 @@ def count_matching_rows(
def report(
df1: "AnyDataFrame",
df2: "AnyDataFrame",
join_columns: Union[str, List[str]],
join_columns: str | List[str],
abs_tol: float = 0,
rel_tol: float = 0,
df1_name: str = "df1",
Expand All @@ -405,8 +405,8 @@ def report(
cast_column_names_lower: bool = True,
sample_count: int = 10,
column_count: int = 10,
html_file: Optional[str] = None,
parallelism: Optional[int] = None,
html_file: str | None = None,
parallelism: int | None = None,
) -> str:
"""Return a string representation of a report.
Expand Down Expand Up @@ -648,7 +648,7 @@ def _any(col: str) -> int:
def _distributed_compare(
df1: "AnyDataFrame",
df2: "AnyDataFrame",
join_columns: Union[str, List[str]],
join_columns: str | List[str],
return_obj_func: Callable[[Compare], Any],
abs_tol: float = 0,
rel_tol: float = 0,
Expand All @@ -657,7 +657,7 @@ def _distributed_compare(
ignore_spaces: bool = False,
ignore_case: bool = False,
cast_column_names_lower: bool = True,
parallelism: Optional[int] = None,
parallelism: int | None = None,
strict_schema: bool = False,
) -> List[Any]:
"""Compare the data distributively using the core Compare class.
Expand Down
12 changes: 6 additions & 6 deletions datacompy/polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

import os
from copy import deepcopy
from typing import Any, Dict, List, Optional, Union, cast
from typing import Any, Dict, List, cast

import numpy as np
import polars as pl
Expand Down Expand Up @@ -85,7 +85,7 @@ def __init__(
self,
df1: "pl.DataFrame",
df2: "pl.DataFrame",
join_columns: Union[List[str], str],
join_columns: List[str] | str,
abs_tol: float = 0,
rel_tol: float = 0,
df1_name: str = "df1",
Expand Down Expand Up @@ -327,8 +327,8 @@ def _intersect_compare(self, ignore_spaces: bool, ignore_case: bool) -> None:
creates a column column_match which is True for matches, False
otherwise.
"""
match_cnt: Union[int, float]
null_diff: Union[int, float]
match_cnt: int | float
null_diff: int | float

LOG.debug("Comparing intersection")
row_cnt = len(self.intersect_rows)
Expand Down Expand Up @@ -571,7 +571,7 @@ def report(
self,
sample_count: int = 10,
column_count: int = 10,
html_file: Optional[str] = None,
html_file: str | None = None,
) -> str:
"""Return a string representation of a report.
Expand Down Expand Up @@ -734,7 +734,7 @@ def df_to_str(pdf: "pl.DataFrame") -> str:
return report


def render(filename: str, *fields: Union[int, float, str]) -> str:
def render(filename: str, *fields: int | float | str) -> str:
"""Render out an individual template.
This basically just reads in a
Expand Down
20 changes: 11 additions & 9 deletions datacompy/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from copy import deepcopy
from typing import Any, Dict, List, Optional, Union, cast
from typing import Any, Dict, List, Union, cast

import pandas as pd
from ordered_set import OrderedSet
Expand Down Expand Up @@ -115,11 +115,11 @@ def __init__(
session: "sp.Session",
df1: Union[str, "sp.DataFrame"],
df2: Union[str, "sp.DataFrame"],
join_columns: Optional[Union[List[str], str]],
join_columns: List[str] | str | None,
abs_tol: float = 0,
rel_tol: float = 0,
df1_name: Optional[str] = None,
df2_name: Optional[str] = None,
df1_name: str | None = None,
df2_name: str | None = None,
ignore_spaces: bool = False,
) -> None:
if join_columns is None:
Expand All @@ -128,7 +128,7 @@ def __init__(
elif not join_columns:
errmsg = "join_columns is empty"
raise ValueError(errmsg)
elif isinstance(join_columns, (str, int, float)):
elif isinstance(join_columns, str | int | float):
self.join_columns = [str(join_columns).replace('"', "").upper()]
else:
self.join_columns = [
Expand All @@ -155,7 +155,7 @@ def df1(self) -> "sp.DataFrame":
return self._df1

@df1.setter
def df1(self, df1: tuple[Union[str, "sp.DataFrame"], Optional[str]]) -> None:
def df1(self, df1: tuple[Union[str, "sp.DataFrame"], str | None]) -> None:
"""Check that df1 is either a Snowpark DF or the name of a valid Snowflake table."""
(df, df_name) = df1
if isinstance(df, str):
Expand All @@ -176,7 +176,7 @@ def df2(self) -> "sp.DataFrame":
return self._df2

@df2.setter
def df2(self, df2: tuple[Union[str, "sp.DataFrame"], Optional[str]]) -> None:
def df2(self, df2: tuple[Union[str, "sp.DataFrame"], str | None]) -> None:
"""Check that df2 is either a Snowpark DF or the name of a valid Snowflake table."""
(df, df_name) = df2
if isinstance(df, str):
Expand Down Expand Up @@ -215,6 +215,7 @@ def _validate_dataframe(self, df_name: str, index: str) -> None:
zip(
self._df1.columns,
[str(c).replace('"', "").upper() for c in self._df1.columns],
strict=False,
)
)
self._df1 = self._df1.rename(col_map)
Expand All @@ -223,6 +224,7 @@ def _validate_dataframe(self, df_name: str, index: str) -> None:
zip(
self._df2.columns,
[str(c).replace('"', "").upper() for c in self._df2.columns],
strict=False,
)
)
self._df2 = self._df2.rename(dict(col_map))
Expand Down Expand Up @@ -711,7 +713,7 @@ def report(
self,
sample_count: int = 10,
column_count: int = 10,
html_file: Optional[str] = None,
html_file: str | None = None,
) -> str:
"""Return a string representation of a report.
Expand Down Expand Up @@ -876,7 +878,7 @@ def report(
return report


def render(filename: str, *fields: Union[int, float, str]) -> str:
def render(filename: str, *fields: int | float | str) -> str:
"""Render out an individual template.
This basically just reads in a
Expand Down
26 changes: 13 additions & 13 deletions datacompy/spark/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import sys
from enum import Enum
from itertools import chain
from typing import Any, Dict, List, Optional, Set, TextIO, Tuple, Union
from typing import Any, Dict, List, Optional, Set, TextIO, Tuple
from warnings import warn

try:
Expand Down Expand Up @@ -160,10 +160,10 @@ def __init__(
spark_session: "pyspark.sql.SparkSession",
base_df: "pyspark.sql.DataFrame",
compare_df: "pyspark.sql.DataFrame",
join_columns: List[Union[str, Tuple[str, str]]],
column_mapping: Optional[List[Tuple[str, str]]] = None,
join_columns: List[str | Tuple[str, str]],
column_mapping: List[Tuple[str, str]] | None = None,
cache_intermediates: bool = False,
known_differences: Optional[List[Dict[str, Any]]] = None,
known_differences: List[Dict[str, Any]] | None = None,
rel_tol: float = 0,
abs_tol: float = 0,
show_all_columns: bool = False,
Expand Down Expand Up @@ -198,14 +198,14 @@ def __init__(

self.spark = spark_session
self.base_unq_rows = self.compare_unq_rows = None
self._base_row_count: Optional[int] = None
self._compare_row_count: Optional[int] = None
self._common_row_count: Optional[int] = None
self._joined_dataframe: Optional[pyspark.sql.DataFrame] = None
self._rows_only_base: Optional[pyspark.sql.DataFrame] = None
self._rows_only_compare: Optional[pyspark.sql.DataFrame] = None
self._all_matched_rows: Optional[pyspark.sql.DataFrame] = None
self._all_rows_mismatched: Optional[pyspark.sql.DataFrame] = None
self._base_row_count: int | None = None
self._compare_row_count: int | None = None
self._common_row_count: int | None = None
self._joined_dataframe: pyspark.sql.DataFrame | None = None
self._rows_only_base: pyspark.sql.DataFrame | None = None
self._rows_only_compare: pyspark.sql.DataFrame | None = None
self._all_matched_rows: pyspark.sql.DataFrame | None = None
self._all_rows_mismatched: pyspark.sql.DataFrame | None = None
self.columns_match_dict: Dict[str, Any] = {}

# drop the duplicates before actual comparison made.
Expand All @@ -219,7 +219,7 @@ def __init__(
self._compare_row_count = self.compare_df.count()

def _tuplizer(
self, input_list: List[Union[str, Tuple[str, str]]]
self, input_list: List[str | Tuple[str, str]]
) -> List[Tuple[str, str]]:
join_columns: List[Tuple[str, str]] = []
for val in input_list:
Expand Down
Loading

0 comments on commit 23e3ab1

Please sign in to comment.