Merge pull request #375 from axif0/validation

fix lists of arguments to be validated
scribe-org · Oct 15, 2024 · 362dea4 · 362dea4
2 parents 4bfce7c + ad8d2b0
commit 362dea4
Show file tree

Hide file tree

Showing 3 changed files with 126 additions and 66 deletions.
diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
@@ -23,7 +23,7 @@
 import difflib
 import json
 from pathlib import Path
-from typing import Union
+from typing import List, Union
 
 from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
 
@@ -155,79 +155,101 @@ def print_formatted_data(data: Union[dict, list], data_type: str) -> None:
 # MARK: Validate
 
 
-def validate_language_and_data_type(language: str, data_type: str):
+def validate_language_and_data_type(
+    language: Union[str, List[str], bool, None],
+    data_type: Union[str, List[str], bool, None],
+):
     """
     Validates that the language and data type QIDs are not None.
 
     Parameters
     ----------
-        language : str
-            The language to validate.
+        language : str or list
+            The language(s) to validate.
 
-        data_type : str
-            The data type to validate.
+        data_type : str or list
+            The data type(s) to validate.
 
     Raises
     ------
         ValueError
-            If either the language or data type is invalid (None).
+            If any of the languages or data types is invalid, with all errors reported together.
     """
-    # Not functional for lists of arguments yet.
-    if isinstance(language, list) or isinstance(data_type, list):
-        return
 
-    language_is_valid = True
-    data_type_is_valid = True
-
-    value_error = ""
-    closest_language_match_string = ""
-    closest_data_type_match_string = ""
-
-    if (
-        isinstance(language, str)
-        and language.lower() not in language_to_qid.keys()
-        and not language.startswith("Q")
-        and not language[1:].isdigit()
-    ):
-        language_is_valid = False
-        if closest_language_match := difflib.get_close_matches(
-            language, language_map.keys(), n=1
+    def validate_single_item(item, valid_options, item_type):
+        """
+        Validates a single item against a list of valid options, providing error messages and suggestions.
+
+        Parameters
+        ----------
+            item : str
+                The item to validate.
+            valid_options : list
+                A list of valid options against which the item will be validated.
+            item_type : str
+                A description of the item type (e.g., "language", "data-type") used in error messages.
+
+        Returns
+        -------
+            str or None
+                Returns an error message if the item is invalid, or None if the item is valid.
+        """
+        if (
+            isinstance(item, str)
+            and item.lower().strip() not in valid_options
+            and not item.startswith("Q")
+            and not item[1:].isdigit()
         ):
-            closest_language_match_cap = closest_language_match[0].capitalize()
-            closest_language_match_string = (
-                f" The closest matching language is {closest_language_match_cap}."
+            closest_match = difflib.get_close_matches(item, valid_options, n=1)
+            closest_match_str = (
+                f" The closest matching {item_type} is {closest_match[0]}."
+                if closest_match
+                else ""
             )
 
-    if (
-        isinstance(data_type, str)
-        and data_type not in data_type_metadata.keys()
-        and not data_type.startswith("Q")
-        and not data_type[1:].isdigit()
-    ):
-        data_type_is_valid = False
+            return f"Invalid {item_type} {item}.{closest_match_str}"
 
-        if closest_data_type_match := difflib.get_close_matches(
-            data_type, data_type_metadata.keys(), n=1
-        ):
-            closest_data_type_match_string = (
-                f" The closest matching data-type is {closest_data_type_match[0]}."
-            )
+        return None
+
+    errors = []
+
+    # Handle language validation.
+    if language is None or isinstance(language, bool):
+        pass
 
-    if not language_is_valid and data_type_is_valid:
-        value_error = (
-            f"Invalid language {language} passed.{closest_language_match_string}"
-        )
+    elif isinstance(language, str):
+        language = [language]
 
-        raise ValueError(value_error)
+    elif not isinstance(language, list):
+        errors.append("Language must be a string or a list of strings.")
 
-    elif language_is_valid and not data_type_is_valid:
-        value_error = (
-            f"Invalid data-type {data_type} passed.{closest_data_type_match_string}"
-        )
+    if language is not None and isinstance(language, list):
+        for lang in language:
+            error = validate_single_item(lang, language_to_qid.keys(), "language")
 
-        raise ValueError(value_error)
+            if error:
+                errors.append(error)
 
-    elif not language_is_valid and not data_type_is_valid:
-        value_error = f"Invalid language {language} and data-type {data_type} passed.{closest_language_match_string}{closest_data_type_match_string}"
+    # Handle data type validation.
+    if data_type is None or isinstance(data_type, bool):
+        pass
 
-        raise ValueError(value_error)
+    elif isinstance(data_type, str):
+        data_type = [data_type]
+
+    elif not isinstance(data_type, list):
+        errors.append("Data type must be a string or a list of strings.")
+
+    if data_type is not None and isinstance(data_type, list):
+        for dt in data_type:
+            error = validate_single_item(dt, data_type_metadata.keys(), "data-type")
+
+            if error:
+                errors.append(error)
+
+    # Raise ValueError with the combined error message.
+    if errors:
+        raise ValueError("\n".join(errors))
+
+    else:
+        return True
diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
@@ -201,10 +201,16 @@ def main() -> None:
     # MARK: Setup CLI
 
     args = parser.parse_args()
-    if args.language or args.data_type:
-        validate_language_and_data_type(
-            language=args.language, data_type=args.data_type
-        )
+
+    try:
+        if args.language or args.data_type:
+            validate_language_and_data_type(
+                language=args.language, data_type=args.data_type
+            )
+
+    except ValueError as e:
+        print(f"Input validation failed with error: {e}")
+        return
 
     if args.upgrade:
         upgrade_cli()

diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py
@@ -29,6 +29,8 @@
     validate_language_and_data_type,
 )
 
+# MARK: Utils
+
 
 class TestCLIUtils(unittest.TestCase):
     def test_correct_data_type(self):
@@ -145,6 +147,9 @@ def test_print_formatted_data_unknown_type(self):
             mock_print.assert_called_once_with("unknown data type")
 
 
+# MARK: Validate
+
+
 class TestValidateLanguageAndDataType(unittest.TestCase):
     def setUp(self):
         self.qid_mapping = {
@@ -182,9 +187,7 @@ def test_validate_language_and_data_type_invalid_language(self, mock_get_qid):
                 language=language_qid, data_type=data_type_qid
             )
 
-        self.assertEqual(
-            str(context.exception), "Invalid language InvalidLanguage passed."
-        )
+        self.assertEqual(str(context.exception), "Invalid language InvalidLanguage.")
 
     @patch("scribe_data.cli.total.get_qid_by_input")
     def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
@@ -198,9 +201,7 @@ def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
                 language=language_qid, data_type=data_type_qid
             )
 
-        self.assertEqual(
-            str(context.exception), "Invalid data-type InvalidDataType passed."
-        )
+        self.assertEqual(str(context.exception), "Invalid data-type InvalidDataType.")
 
     @patch("scribe_data.cli.total.get_qid_by_input")
     def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
@@ -216,5 +217,36 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
 
         self.assertEqual(
             str(context.exception),
-            "Invalid language InvalidLanguage and data-type InvalidDataType passed.",
+            "Invalid language InvalidLanguage.\nInvalid data-type InvalidDataType.",
         )
+
+    def test_validate_language_and_data_type_with_list(self):
+        """Test validation with lists of languages and data types."""
+        languages = ["English", "Spanish"]
+        data_types = ["nouns", "verbs"]
+        try:
+            validate_language_and_data_type(languages, data_types)
+        except ValueError:
+            self.fail(
+                "validate_language_and_data_type raised ValueError unexpectedly with valid lists!"
+            )
+
+    def test_validate_language_and_data_type_with_qids(self):
+        """Test validation directly with QIDs."""
+        language_qid = "Q1860"  # QID for English
+        data_type_qid = "Q1084"  # QID for nouns
+        try:
+            validate_language_and_data_type(language_qid, data_type_qid)
+        except ValueError:
+            self.fail(
+                "validate_language_and_data_type raised ValueError unexpectedly with valid QIDs!"
+            )
+
+    def test_validate_language_and_data_type_mixed_validity_in_lists(self):
+        """Test validation with mixed valid and invalid entries in lists."""
+        languages = ["English", "InvalidLanguage"]
+        data_types = ["nouns", "InvalidDataType"]
+        with self.assertRaises(ValueError) as context:
+            validate_language_and_data_type(languages, data_types)
+        self.assertIn("Invalid language InvalidLanguage", str(context.exception))
+        self.assertIn("Invalid data-type InvalidDataType", str(context.exception))