Quantco · ivergara · Aug 28, 2023 · Jul 27, 2023 · Jul 27, 2023 · Aug 22, 2023
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
@@ -150,9 +150,9 @@ The following table lists all the supported codes, along with their descriptions
    * - `numMatch`
      - Indicates the part of a number that matches the expected value.
      - `[numMatch]3.141[/numMatch]`
-   * - `numDifference`
+   * - `numDiff`
      - Indicates the part of a number that differs.
-     - `[numDifference]6[/numDifference]`
+     - `[numDiff]6[/numDiff]`
 
 Alternative DataSources
 ---------------------------

diff --git a/src/datajudge/constraints/nrows.py b/src/datajudge/constraints/nrows.py
@@ -5,6 +5,7 @@
 
 from .. import db_access
 from ..db_access import DataReference
+from ..utils import format_difference
 from .base import Constraint, OptionalSelections, TestResult, ToleranceGetter
 
 
@@ -47,9 +48,12 @@ def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]:
 class NRowsMax(NRows):
     def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]:
         result = n_rows_factual <= n_rows_target
+        n_rows_factual_fmt, n_rows_target_fmt = format_difference(
+            n_rows_factual, n_rows_target
+        )
         assertion_text = (
-            f"{self.ref} has {n_rows_factual} "
-            f"> {self.target_prefix} {n_rows_target} rows. "
+            f"{self.ref} has {n_rows_factual_fmt} "
+            f"> {self.target_prefix} {n_rows_target_fmt} rows. "
             f"{self.condition_string}"
         )
         return result, assertion_text
@@ -58,9 +62,12 @@ def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]:
 class NRowsEquality(NRows):
     def compare(self, n_rows_factual: int, n_rows_target: int) -> Tuple[bool, str]:
         result = n_rows_factual == n_rows_target
+        n_rows_factual_fmt, n_rows_target_fmt = format_difference(
+            n_rows_factual, n_rows_target
+        )
         assertion_text = (
-            f"{self.ref} has {n_rows_factual} row(s) "
-            f"instead of {self.target_prefix} {n_rows_target}. "
+            f"{self.ref} has {n_rows_factual_fmt} row(s) "
+            f"instead of {self.target_prefix} {n_rows_target_fmt}. "
             f"{self.condition_string}"
         )
         return result, assertion_text

diff --git a/src/datajudge/formatter.py b/src/datajudge/formatter.py
@@ -1,8 +1,9 @@
 import abc
 import re
 
-from colorama import Back
+from colorama import Back, just_fix_windows_console
 
+# example: match = [numMatch]...[/numMatch]
 STYLING_CODES = r"\[(numMatch|numDiff)\](.*?)\[/\1\]"
 
 
@@ -24,6 +25,10 @@ def fmt_str(self, string: str) -> str:
 
 
 class AnsiColorFormatter(Formatter):
+    def __init__(self):
+        super().__init__()
+        just_fix_windows_console()
+
     def apply_formatting(self, code: str, inner: str) -> str:
         if code == "numDiff":
             return f"{Back.CYAN}{inner}{Back.RESET}"

diff --git a/src/datajudge/utils.py b/src/datajudge/utils.py
@@ -0,0 +1,44 @@
+from typing import Tuple, Union
+
+
+def _fmt_diff_part(s, d):
+    return f"[numDiff]{s[d:]}[/numDiff]" if d < len(s) else ""
+
+
+def format_difference(
+    n1: Union[float, int], n2: Union[float, int], decimal_separator: bool = True
+) -> Tuple[str, str]:
+    """
+    Given two numbers, n1 and n2, return a tuple of two strings,
+    each representing one of the input numbers with the differing part highlighted.
+    Highlighting is done using BBCode-like tags, which are replaced by the formatter.
+
+    Examples:
+        123, 123.0
+        -> 123, 123[numDiff].0[/numDiff]
+        122593859432, 122593859432347
+        -> 122593859432, 122593859432[numDiff]347[/numDiff]
+
+    Args:
+    - n1: The first number to compare.
+    - n2: The second number to compare.
+    - decimal_separator: Whether to separate the decimal part of the numbers with commas.
+
+    Returns:
+    - A tuple of two strings, each representing one of the input numbers with the differing part highlighted.
+    """
+    if decimal_separator:
+        s1, s2 = f"{n1:,}", f"{n2:,}"
+    else:
+        s1, s2 = str(n1), str(n2)
+
+    min_len = min(len(s1), len(s2))
+    diff_idx = next(
+        (i for i in range(min_len) if s1[i] != s2[i]),
+        min_len,
+    )
+
+    return (
+        f"{s1[:diff_idx]}{_fmt_diff_part(s1, diff_idx)}",
+        f"{s2[:diff_idx]}{_fmt_diff_part(s2, diff_idx)}",
+    )
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
@@ -0,0 +1,55 @@
+import pytest
+
+from datajudge.utils import format_difference
+
+
+@pytest.mark.parametrize(
+    "n1, n2",
+    [
+        (123, 123.0),
+        (122593859432347, 122593859432347 // 1000),  # one group less
+        (1.2, 1234567),
+        (1.2, 1.3),
+    ],
+)
+@pytest.mark.xfail
+def test_print_diff_color(n1, n2):
+    format_n1, format_n2 = format_difference(n1, n2)
+
+    assert True, f"{format_n1} vs {format_n2}"
+
+
+@pytest.mark.parametrize(
+    "n1, n2, sep_decimal, expected_n1, expected_n2",
+    [
+        (123, 123.0, False, "123", "123[numDiff].0[/numDiff]"),
+        (
+            122593859432,
+            122593859432347,
+            False,
+            "122593859432",
+            "122593859432[numDiff]347[/numDiff]",
+        ),
+        (
+            122593859432,
+            122593859432347,
+            True,
+            "122,593,859,432",
+            "122,593,859,432[numDiff],347[/numDiff]",
+        ),
+        (0, 0, False, "0", "0"),
+        (1, 2, False, "[numDiff]1[/numDiff]", "[numDiff]2[/numDiff]"),
+        (
+            123456789,
+            987654321,
+            False,
+            "[numDiff]123456789[/numDiff]",
+            "[numDiff]987654321[/numDiff]",
+        ),
+    ],
+)
+def test_diff_color(n1, n2, sep_decimal, expected_n1, expected_n2):
+    assert format_difference(n1, n2, decimal_separator=sep_decimal) == (
+        expected_n1,
+        expected_n2,
+    )