From c9c2398ee66d231ec8dc3cdc323c7f428a94e14d Mon Sep 17 00:00:00 2001 From: Yasin Tatar Date: Thu, 27 Jul 2023 16:34:16 +0200 Subject: [PATCH 1/3] update: use newly added formatter for diff_color --- docs/source/getting_started.rst | 4 ++-- src/datajudge/constraints/nrows.py | 16 ++++++++++++---- src/datajudge/formatter.py | 7 ++++++- src/datajudge/utils.py | 26 ++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 src/datajudge/utils.py diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index 0faa61fc..09c78ede 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -150,9 +150,9 @@ The following table lists all the supported codes, along with their descriptions * - `numMatch` - Indicates the part of a number that matches the expected value. - `[numMatch]3.141[/numMatch]` - * - `numDifference` + * - `numDiff` - Indicates the part of a number that differs. - - `[numDifference]6[/numDifference]` + - `[numDiff]6[/numDiff]` Alternative DataSources --------------------------- diff --git a/src/datajudge/constraints/nrows.py b/src/datajudge/constraints/nrows.py index 4e7bad1c..1b3ee591 100644 --- a/src/datajudge/constraints/nrows.py +++ b/src/datajudge/constraints/nrows.py @@ -3,6 +3,8 @@ import sqlalchemy as sa +from datajudge.utils import diff_color + from .. import db_access from ..db_access import DataReference from .base import Constraint, OptionalSelections, TestResult, ToleranceGetter @@ -47,9 +49,12 @@ def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]: class NRowsMax(NRows): def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]: result = n_rows_factual <= n_rows_target + n_rows_factual_fmt, n_rows_target_fmt = diff_color( + n_rows_factual, n_rows_target + ) assertion_text = ( - f"{self.ref} has {n_rows_factual} " - f"> {self.target_prefix} {n_rows_target} rows. " + f"{self.ref} has {n_rows_factual_fmt} " + f"> {self.target_prefix} {n_rows_target_fmt} rows. " f"{self.condition_string}" ) return result, assertion_text @@ -58,9 +63,12 @@ def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]: class NRowsEquality(NRows): def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]: result = n_rows_factual == n_rows_target + n_rows_factual_fmt, n_rows_target_fmt = diff_color( + n_rows_factual, n_rows_target + ) assertion_text = ( - f"{self.ref} has {n_rows_factual} row(s) " - f"instead of {self.target_prefix} {n_rows_target}. " + f"{self.ref} has {n_rows_factual_fmt} row(s) " + f"instead of {self.target_prefix} {n_rows_target_fmt}. " f"{self.condition_string}" ) return result, assertion_text diff --git a/src/datajudge/formatter.py b/src/datajudge/formatter.py index 32c2b53e..cfd21d41 100644 --- a/src/datajudge/formatter.py +++ b/src/datajudge/formatter.py @@ -1,8 +1,9 @@ import abc import re -from colorama import Back +from colorama import Back, just_fix_windows_console +# example: match = [numMatch]...[/numMatch] STYLING_CODES = r"\[(numMatch|numDiff)\](.*?)\[/\1\]" @@ -24,6 +25,10 @@ def fmt_str(self, string: str) -> str: class AnsiColorFormatter(Formatter): + def __init__(self): + super().__init__() + just_fix_windows_console() + def apply_formatting(self, code: str, inner: str) -> str: if code == "numDiff": return f"{Back.CYAN}{inner}{Back.RESET}" diff --git a/src/datajudge/utils.py b/src/datajudge/utils.py new file mode 100644 index 00000000..7c57307d --- /dev/null +++ b/src/datajudge/utils.py @@ -0,0 +1,26 @@ +from typing import Tuple, Union + +from colorama import just_fix_windows_console + +just_fix_windows_console() + + +def diff_color(n1: Union[float, int], n2: Union[float, int]) -> Tuple[str, str]: + """ + Given two numbers, returns a tuple of strings where the numbers are colored based on their difference. + Examples: + 123, 123.0 -> 123, 123[.0] # the part in squared brackets is colored + 122593859432, 122593859432347 -> 122,593,859,432 and 122,593,859,432[,347] + """ + s1, s2 = f"{n1:,}", f"{n2:,}" + + min_len = min(len(s1), len(s2)) + dif_idx = next( + (i for i in range(min_len) if s1[i] != s2[i]), + min_len, + ) + + return ( + f"[numDiff]{s1[:dif_idx]}[/numDiff]{s1[dif_idx:]}", + f"[numDiff]{s2[:dif_idx]}[/numDiff]{s2[dif_idx:]}", + ) From c4640fa1233cc075ef60bf70b64f51fff0485fe0 Mon Sep 17 00:00:00 2001 From: Yasin Tatar Date: Thu, 27 Jul 2023 17:22:56 +0200 Subject: [PATCH 2/3] add: tests --- src/datajudge/constraints/nrows.py | 7 ++-- src/datajudge/utils.py | 38 ++++++++++++++++----- tests/unit/test_utils.py | 55 ++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 12 deletions(-) create mode 100644 tests/unit/test_utils.py diff --git a/src/datajudge/constraints/nrows.py b/src/datajudge/constraints/nrows.py index 1b3ee591..0f556364 100644 --- a/src/datajudge/constraints/nrows.py +++ b/src/datajudge/constraints/nrows.py @@ -3,10 +3,9 @@ import sqlalchemy as sa -from datajudge.utils import diff_color - from .. import db_access from ..db_access import DataReference +from ..utils import format_difference from .base import Constraint, OptionalSelections, TestResult, ToleranceGetter @@ -49,7 +48,7 @@ def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]: class NRowsMax(NRows): def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]: result = n_rows_factual <= n_rows_target - n_rows_factual_fmt, n_rows_target_fmt = diff_color( + n_rows_factual_fmt, n_rows_target_fmt = format_difference( n_rows_factual, n_rows_target ) assertion_text = ( @@ -63,7 +62,7 @@ def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]: class NRowsEquality(NRows): def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]: result = n_rows_factual == n_rows_target - n_rows_factual_fmt, n_rows_target_fmt = diff_color( + n_rows_factual_fmt, n_rows_target_fmt = format_difference( n_rows_factual, n_rows_target ) assertion_text = ( diff --git a/src/datajudge/utils.py b/src/datajudge/utils.py index 7c57307d..6e920198 100644 --- a/src/datajudge/utils.py +++ b/src/datajudge/utils.py @@ -5,22 +5,44 @@ just_fix_windows_console() -def diff_color(n1: Union[float, int], n2: Union[float, int]) -> Tuple[str, str]: +def _fmt_diff_part(s, d): + return f"[numDiff]{s[d:]}[/numDiff]" if d < len(s) else "" + + +def format_difference( + n1: Union[float, int], n2: Union[float, int], decimal_separator: bool = True +) -> Tuple[str, str]: """ - Given two numbers, returns a tuple of strings where the numbers are colored based on their difference. + Given two numbers, n1 and n2, return a tuple of two strings, + each representing one of the input numbers with the differing part highlighted. + Highlighting is done using BBCode-like tags, which are replaced by the formatter. + Examples: - 123, 123.0 -> 123, 123[.0] # the part in squared brackets is colored - 122593859432, 122593859432347 -> 122,593,859,432 and 122,593,859,432[,347] + 123, 123.0 + -> 123, 123[numDiff].0[/numDiff] + 122593859432, 122593859432347 + -> 122593859432, 122593859432[numDiff]347[/numDiff] + + Args: + - n1: The first number to compare. + - n2: The second number to compare. + - decimal_separator: Whether to separate the decimal part of the numbers with commas. + + Returns: + - A tuple of two strings, each representing one of the input numbers with the differing part highlighted. """ - s1, s2 = f"{n1:,}", f"{n2:,}" + if decimal_separator: + s1, s2 = f"{n1:,}", f"{n2:,}" + else: + s1, s2 = str(n1), str(n2) min_len = min(len(s1), len(s2)) - dif_idx = next( + diff_idx = next( (i for i in range(min_len) if s1[i] != s2[i]), min_len, ) return ( - f"[numDiff]{s1[:dif_idx]}[/numDiff]{s1[dif_idx:]}", - f"[numDiff]{s2[:dif_idx]}[/numDiff]{s2[dif_idx:]}", + f"{s1[:diff_idx]}{_fmt_diff_part(s1, diff_idx)}", + f"{s2[:diff_idx]}{_fmt_diff_part(s2, diff_idx)}", ) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py new file mode 100644 index 00000000..c56eb6ca --- /dev/null +++ b/tests/unit/test_utils.py @@ -0,0 +1,55 @@ +import pytest + +from datajudge.utils import format_difference + + +@pytest.mark.parametrize( + "n1, n2", + [ + (123, 123.0), + (122593859432347, 122593859432347 // 1000), # one group less + (1.2, 1234567), + (1.2, 1.3), + ], +) +@pytest.mark.xfail +def test_print_diff_color(n1, n2): + format_n1, format_n2 = format_difference(n1, n2) + + assert True, f"{format_n1} vs {format_n2}" + + +@pytest.mark.parametrize( + "n1, n2, sep_decimal, expected_n1, expected_n2", + [ + (123, 123.0, False, "123", "123[numDiff].0[/numDiff]"), + ( + 122593859432, + 122593859432347, + False, + "122593859432", + "122593859432[numDiff]347[/numDiff]", + ), + ( + 122593859432, + 122593859432347, + True, + "122,593,859,432", + "122,593,859,432[numDiff],347[/numDiff]", + ), + (0, 0, False, "0", "0"), + (1, 2, False, "[numDiff]1[/numDiff]", "[numDiff]2[/numDiff]"), + ( + 123456789, + 987654321, + False, + "[numDiff]123456789[/numDiff]", + "[numDiff]987654321[/numDiff]", + ), + ], +) +def test_diff_color(n1, n2, sep_decimal, expected_n1, expected_n2): + assert format_difference(n1, n2, decimal_separator=sep_decimal) == ( + expected_n1, + expected_n2, + ) From b3e0ee708ce2cd495dee22410a1caff3b20244b7 Mon Sep 17 00:00:00 2001 From: Yasin Tatar Date: Fri, 25 Aug 2023 18:04:52 +0200 Subject: [PATCH 3/3] remove just_fix_windows_console duplicate --- src/datajudge/utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/datajudge/utils.py b/src/datajudge/utils.py index 6e920198..aa968e32 100644 --- a/src/datajudge/utils.py +++ b/src/datajudge/utils.py @@ -1,9 +1,5 @@ from typing import Tuple, Union -from colorama import just_fix_windows_console - -just_fix_windows_console() - def _fmt_diff_part(s, d): return f"[numDiff]{s[d:]}[/numDiff]" if d < len(s) else ""