From 424f5c1c400b6d41b3371414123cfe8eca724dfe Mon Sep 17 00:00:00 2001
From: axif <muhamadasif570@gmail.com>
Date: Tue, 15 Oct 2024 23:06:57 +0600
Subject: [PATCH 1/2] fix  lists of arguments to be validated

---
 src/scribe_data/cli/cli_utils.py | 142 +++++++++++++++++--------------
 src/scribe_data/cli/main.py      |  13 ++-
 tests/cli/test_utils.py          |  42 ++++++++-
 3 files changed, 127 insertions(+), 70 deletions(-)

diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
index e3e62485c..8de5c7dec 100644
--- a/src/scribe_data/cli/cli_utils.py
+++ b/src/scribe_data/cli/cli_utils.py
@@ -23,7 +23,7 @@
 import difflib
 import json
 from pathlib import Path
-from typing import Union
+from typing import Union, List
 
 from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
 
@@ -155,79 +155,91 @@ def print_formatted_data(data: Union[dict, list], data_type: str) -> None:
 # MARK: Validate
 
 
-def validate_language_and_data_type(language: str, data_type: str):
+def validate_language_and_data_type(
+    language: Union[str, List[str], bool, None],
+    data_type: Union[str, List[str], bool, None],
+):
     """
     Validates that the language and data type QIDs are not None.
 
     Parameters
     ----------
-        language : str
-            The language to validate.
-
-        data_type : str
-            The data type to validate.
+    language : str or list
+        The language(s) to validate.
+    data_type : str or list
+        The data type(s) to validate.
 
     Raises
     ------
-        ValueError
-            If either the language or data type is invalid (None).
+    ValueError
+        If any of the languages or data types is invalid, with all errors reported together.
     """
-    # Not functional for lists of arguments yet.
-    if isinstance(language, list) or isinstance(data_type, list):
-        return
-
-    language_is_valid = True
-    data_type_is_valid = True
-
-    value_error = ""
-    closest_language_match_string = ""
-    closest_data_type_match_string = ""
-
-    if (
-        isinstance(language, str)
-        and language.lower() not in language_to_qid.keys()
-        and not language.startswith("Q")
-        and not language[1:].isdigit()
-    ):
-        language_is_valid = False
-        if closest_language_match := difflib.get_close_matches(
-            language, language_map.keys(), n=1
-        ):
-            closest_language_match_cap = closest_language_match[0].capitalize()
-            closest_language_match_string = (
-                f" The closest matching language is {closest_language_match_cap}."
-            )
-
-    if (
-        isinstance(data_type, str)
-        and data_type not in data_type_metadata.keys()
-        and not data_type.startswith("Q")
-        and not data_type[1:].isdigit()
-    ):
-        data_type_is_valid = False
 
-        if closest_data_type_match := difflib.get_close_matches(
-            data_type, data_type_metadata.keys(), n=1
+    def validate_single_item(item, valid_options, item_type):
+        """
+        Validates a single item against a list of valid options, providing error messages and suggestions.
+
+        Parameters
+        ----------
+        item : str
+            The item to validate.
+        valid_options : list
+            A list of valid options against which the item will be validated.
+        item_type : str
+            A description of the item type (e.g., "language", "data-type") used in error messages.
+
+        Returns
+        -------
+        str or None
+            Returns an error message if the item is invalid, or None if the item is valid.
+        """
+        if (
+            isinstance(item, str)
+            and item.lower().strip() not in valid_options
+            and not item.startswith("Q")
+            and not item[1:].isdigit()
         ):
-            closest_data_type_match_string = (
-                f" The closest matching data-type is {closest_data_type_match[0]}."
+            closest_match = difflib.get_close_matches(item, valid_options, n=1)
+            closest_match_str = (
+                f" The closest matching {item_type} is {closest_match[0]}"
+                if closest_match
+                else ""
             )
-
-    if not language_is_valid and data_type_is_valid:
-        value_error = (
-            f"Invalid language {language} passed.{closest_language_match_string}"
-        )
-
-        raise ValueError(value_error)
-
-    elif language_is_valid and not data_type_is_valid:
-        value_error = (
-            f"Invalid data-type {data_type} passed.{closest_data_type_match_string}"
-        )
-
-        raise ValueError(value_error)
-
-    elif not language_is_valid and not data_type_is_valid:
-        value_error = f"Invalid language {language} and data-type {data_type} passed.{closest_language_match_string}{closest_data_type_match_string}"
-
-        raise ValueError(value_error)
+            return f"Invalid {item_type} {item}{closest_match_str}"
+        return None
+
+    errors = []
+
+    # Handle language validation
+    if language is None or isinstance(language, bool):
+        pass
+    elif isinstance(language, str):
+        language = [language]
+    elif not isinstance(language, list):
+        errors.append("Language must be a string or a list of strings.")
+
+    if language is not None and isinstance(language, list):
+        for lang in language:
+            error = validate_single_item(lang, language_to_qid.keys(), "language")
+            if error:
+                errors.append(error)
+
+    # Handle data type validation
+    if data_type is None or isinstance(data_type, bool):
+        pass
+    elif isinstance(data_type, str):
+        data_type = [data_type]
+    elif not isinstance(data_type, list):
+        errors.append("Data type must be a string or a list of strings.")
+
+    if data_type is not None and isinstance(data_type, list):
+        for dt in data_type:
+            error = validate_single_item(dt, data_type_metadata.keys(), "data-type")
+            if error:
+                errors.append(error)
+
+    # Raise ValueError with the combined error message
+    if errors:
+        raise ValueError(" and ".join(errors) + " passed.")
+    else:
+        return True
diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index 7c88485a2..1cf4758a0 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -201,10 +201,15 @@ def main() -> None:
     # MARK: Setup CLI
 
     args = parser.parse_args()
-    if args.language or args.data_type:
-        validate_language_and_data_type(
-            language=args.language, data_type=args.data_type
-        )
+
+    try:
+        if args.language or args.data_type:
+            validate_language_and_data_type(
+                language=args.language, data_type=args.data_type
+            )
+    except ValueError as e:
+        print(e)
+        return
 
     if args.upgrade:
         upgrade_cli()
diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py
index 149716c2d..32ab82262 100644
--- a/tests/cli/test_utils.py
+++ b/tests/cli/test_utils.py
@@ -216,5 +216,45 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
 
         self.assertEqual(
             str(context.exception),
-            "Invalid language InvalidLanguage and data-type InvalidDataType passed.",
+            "Invalid language InvalidLanguage and Invalid data-type InvalidDataType passed.",
         )
+
+    def test_validate_language_and_data_type_with_list(self):
+        """Test validation with lists of languages and data types."""
+        languages = ["English", "Spanish"]
+        data_types = ["nouns", "verbs"]
+        try:
+            validate_language_and_data_type(languages, data_types)
+        except ValueError:
+            self.fail(
+                "validate_language_and_data_type raised ValueError unexpectedly with valid lists!"
+            )
+
+    def test_validate_language_and_data_type_with_qids(self):
+        """Test validation directly with QIDs."""
+        language_qid = "Q1860"  # QID for English
+        data_type_qid = "Q1084"  # QID for nouns
+        try:
+            validate_language_and_data_type(language_qid, data_type_qid)
+        except ValueError:
+            self.fail(
+                "validate_language_and_data_type raised ValueError unexpectedly with valid QIDs!"
+            )
+
+    def test_validate_language_and_data_type_invalid_list(self):
+        """Test validation with invalid lists."""
+        languages = ["English", "Klingon"]
+        data_types = ["nouns", "alienverbs"]
+        with self.assertRaises(ValueError) as context:
+            validate_language_and_data_type(languages, data_types)
+        self.assertIn("Invalid language Klingon", str(context.exception))
+        self.assertIn("Invalid data-type alienverbs", str(context.exception))
+
+    def test_validate_language_and_data_type_mixed_validity_in_lists(self):
+        """Test validation with mixed valid and invalid entries in lists."""
+        languages = ["English", "InvalidLanguage"]
+        data_types = ["nouns", "InvalidDataType"]
+        with self.assertRaises(ValueError) as context:
+            validate_language_and_data_type(languages, data_types)
+        self.assertIn("Invalid language InvalidLanguage", str(context.exception))
+        self.assertIn("Invalid data-type InvalidDataType", str(context.exception))

From ad8d2b01c66c051d3cb9cf265480145c439389da Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 22:06:17 +0200
Subject: [PATCH 2/2] Minor formatting and edits to outputs

---
 src/scribe_data/cli/cli_utils.py | 52 +++++++++++++++++++-------------
 src/scribe_data/cli/main.py      |  3 +-
 tests/cli/test_utils.py          | 24 +++++----------
 3 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
index 8de5c7dec..4f59a65ef 100644
--- a/src/scribe_data/cli/cli_utils.py
+++ b/src/scribe_data/cli/cli_utils.py
@@ -23,7 +23,7 @@
 import difflib
 import json
 from pathlib import Path
-from typing import Union, List
+from typing import List, Union
 
 from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
 
@@ -164,15 +164,16 @@ def validate_language_and_data_type(
 
     Parameters
     ----------
-    language : str or list
-        The language(s) to validate.
-    data_type : str or list
-        The data type(s) to validate.
+        language : str or list
+            The language(s) to validate.
+
+        data_type : str or list
+            The data type(s) to validate.
 
     Raises
     ------
-    ValueError
-        If any of the languages or data types is invalid, with all errors reported together.
+        ValueError
+            If any of the languages or data types is invalid, with all errors reported together.
     """
 
     def validate_single_item(item, valid_options, item_type):
@@ -181,17 +182,17 @@ def validate_single_item(item, valid_options, item_type):
 
         Parameters
         ----------
-        item : str
-            The item to validate.
-        valid_options : list
-            A list of valid options against which the item will be validated.
-        item_type : str
-            A description of the item type (e.g., "language", "data-type") used in error messages.
+            item : str
+                The item to validate.
+            valid_options : list
+                A list of valid options against which the item will be validated.
+            item_type : str
+                A description of the item type (e.g., "language", "data-type") used in error messages.
 
         Returns
         -------
-        str or None
-            Returns an error message if the item is invalid, or None if the item is valid.
+            str or None
+                Returns an error message if the item is invalid, or None if the item is valid.
         """
         if (
             isinstance(item, str)
@@ -201,45 +202,54 @@ def validate_single_item(item, valid_options, item_type):
         ):
             closest_match = difflib.get_close_matches(item, valid_options, n=1)
             closest_match_str = (
-                f" The closest matching {item_type} is {closest_match[0]}"
+                f" The closest matching {item_type} is {closest_match[0]}."
                 if closest_match
                 else ""
             )
-            return f"Invalid {item_type} {item}{closest_match_str}"
+
+            return f"Invalid {item_type} {item}.{closest_match_str}"
+
         return None
 
     errors = []
 
-    # Handle language validation
+    # Handle language validation.
     if language is None or isinstance(language, bool):
         pass
+
     elif isinstance(language, str):
         language = [language]
+
     elif not isinstance(language, list):
         errors.append("Language must be a string or a list of strings.")
 
     if language is not None and isinstance(language, list):
         for lang in language:
             error = validate_single_item(lang, language_to_qid.keys(), "language")
+
             if error:
                 errors.append(error)
 
-    # Handle data type validation
+    # Handle data type validation.
     if data_type is None or isinstance(data_type, bool):
         pass
+
     elif isinstance(data_type, str):
         data_type = [data_type]
+
     elif not isinstance(data_type, list):
         errors.append("Data type must be a string or a list of strings.")
 
     if data_type is not None and isinstance(data_type, list):
         for dt in data_type:
             error = validate_single_item(dt, data_type_metadata.keys(), "data-type")
+
             if error:
                 errors.append(error)
 
-    # Raise ValueError with the combined error message
+    # Raise ValueError with the combined error message.
     if errors:
-        raise ValueError(" and ".join(errors) + " passed.")
+        raise ValueError("\n".join(errors))
+
     else:
         return True
diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index 1cf4758a0..506bbcdd1 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -207,8 +207,9 @@ def main() -> None:
             validate_language_and_data_type(
                 language=args.language, data_type=args.data_type
             )
+
     except ValueError as e:
-        print(e)
+        print(f"Input validation failed with error: {e}")
         return
 
     if args.upgrade:
diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py
index 32ab82262..a827666a2 100644
--- a/tests/cli/test_utils.py
+++ b/tests/cli/test_utils.py
@@ -29,6 +29,8 @@
     validate_language_and_data_type,
 )
 
+# MARK: Utils
+
 
 class TestCLIUtils(unittest.TestCase):
     def test_correct_data_type(self):
@@ -145,6 +147,9 @@ def test_print_formatted_data_unknown_type(self):
             mock_print.assert_called_once_with("unknown data type")
 
 
+# MARK: Validate
+
+
 class TestValidateLanguageAndDataType(unittest.TestCase):
     def setUp(self):
         self.qid_mapping = {
@@ -182,9 +187,7 @@ def test_validate_language_and_data_type_invalid_language(self, mock_get_qid):
                 language=language_qid, data_type=data_type_qid
             )
 
-        self.assertEqual(
-            str(context.exception), "Invalid language InvalidLanguage passed."
-        )
+        self.assertEqual(str(context.exception), "Invalid language InvalidLanguage.")
 
     @patch("scribe_data.cli.total.get_qid_by_input")
     def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
@@ -198,9 +201,7 @@ def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
                 language=language_qid, data_type=data_type_qid
             )
 
-        self.assertEqual(
-            str(context.exception), "Invalid data-type InvalidDataType passed."
-        )
+        self.assertEqual(str(context.exception), "Invalid data-type InvalidDataType.")
 
     @patch("scribe_data.cli.total.get_qid_by_input")
     def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
@@ -216,7 +217,7 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
 
         self.assertEqual(
             str(context.exception),
-            "Invalid language InvalidLanguage and Invalid data-type InvalidDataType passed.",
+            "Invalid language InvalidLanguage.\nInvalid data-type InvalidDataType.",
         )
 
     def test_validate_language_and_data_type_with_list(self):
@@ -241,15 +242,6 @@ def test_validate_language_and_data_type_with_qids(self):
                 "validate_language_and_data_type raised ValueError unexpectedly with valid QIDs!"
             )
 
-    def test_validate_language_and_data_type_invalid_list(self):
-        """Test validation with invalid lists."""
-        languages = ["English", "Klingon"]
-        data_types = ["nouns", "alienverbs"]
-        with self.assertRaises(ValueError) as context:
-            validate_language_and_data_type(languages, data_types)
-        self.assertIn("Invalid language Klingon", str(context.exception))
-        self.assertIn("Invalid data-type alienverbs", str(context.exception))
-
     def test_validate_language_and_data_type_mixed_validity_in_lists(self):
         """Test validation with mixed valid and invalid entries in lists."""
         languages = ["English", "InvalidLanguage"]