From 424f5c1c400b6d41b3371414123cfe8eca724dfe Mon Sep 17 00:00:00 2001 From: axif <muhamadasif570@gmail.com> Date: Tue, 15 Oct 2024 23:06:57 +0600 Subject: [PATCH 1/2] fix lists of arguments to be validated --- src/scribe_data/cli/cli_utils.py | 142 +++++++++++++++++-------------- src/scribe_data/cli/main.py | 13 ++- tests/cli/test_utils.py | 42 ++++++++- 3 files changed, 127 insertions(+), 70 deletions(-) diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py index e3e62485c..8de5c7dec 100644 --- a/src/scribe_data/cli/cli_utils.py +++ b/src/scribe_data/cli/cli_utils.py @@ -23,7 +23,7 @@ import difflib import json from pathlib import Path -from typing import Union +from typing import Union, List from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR @@ -155,79 +155,91 @@ def print_formatted_data(data: Union[dict, list], data_type: str) -> None: # MARK: Validate -def validate_language_and_data_type(language: str, data_type: str): +def validate_language_and_data_type( + language: Union[str, List[str], bool, None], + data_type: Union[str, List[str], bool, None], +): """ Validates that the language and data type QIDs are not None. Parameters ---------- - language : str - The language to validate. - - data_type : str - The data type to validate. + language : str or list + The language(s) to validate. + data_type : str or list + The data type(s) to validate. Raises ------ - ValueError - If either the language or data type is invalid (None). + ValueError + If any of the languages or data types is invalid, with all errors reported together. """ - # Not functional for lists of arguments yet. - if isinstance(language, list) or isinstance(data_type, list): - return - - language_is_valid = True - data_type_is_valid = True - - value_error = "" - closest_language_match_string = "" - closest_data_type_match_string = "" - - if ( - isinstance(language, str) - and language.lower() not in language_to_qid.keys() - and not language.startswith("Q") - and not language[1:].isdigit() - ): - language_is_valid = False - if closest_language_match := difflib.get_close_matches( - language, language_map.keys(), n=1 - ): - closest_language_match_cap = closest_language_match[0].capitalize() - closest_language_match_string = ( - f" The closest matching language is {closest_language_match_cap}." - ) - - if ( - isinstance(data_type, str) - and data_type not in data_type_metadata.keys() - and not data_type.startswith("Q") - and not data_type[1:].isdigit() - ): - data_type_is_valid = False - if closest_data_type_match := difflib.get_close_matches( - data_type, data_type_metadata.keys(), n=1 + def validate_single_item(item, valid_options, item_type): + """ + Validates a single item against a list of valid options, providing error messages and suggestions. + + Parameters + ---------- + item : str + The item to validate. + valid_options : list + A list of valid options against which the item will be validated. + item_type : str + A description of the item type (e.g., "language", "data-type") used in error messages. + + Returns + ------- + str or None + Returns an error message if the item is invalid, or None if the item is valid. + """ + if ( + isinstance(item, str) + and item.lower().strip() not in valid_options + and not item.startswith("Q") + and not item[1:].isdigit() ): - closest_data_type_match_string = ( - f" The closest matching data-type is {closest_data_type_match[0]}." + closest_match = difflib.get_close_matches(item, valid_options, n=1) + closest_match_str = ( + f" The closest matching {item_type} is {closest_match[0]}" + if closest_match + else "" ) - - if not language_is_valid and data_type_is_valid: - value_error = ( - f"Invalid language {language} passed.{closest_language_match_string}" - ) - - raise ValueError(value_error) - - elif language_is_valid and not data_type_is_valid: - value_error = ( - f"Invalid data-type {data_type} passed.{closest_data_type_match_string}" - ) - - raise ValueError(value_error) - - elif not language_is_valid and not data_type_is_valid: - value_error = f"Invalid language {language} and data-type {data_type} passed.{closest_language_match_string}{closest_data_type_match_string}" - - raise ValueError(value_error) + return f"Invalid {item_type} {item}{closest_match_str}" + return None + + errors = [] + + # Handle language validation + if language is None or isinstance(language, bool): + pass + elif isinstance(language, str): + language = [language] + elif not isinstance(language, list): + errors.append("Language must be a string or a list of strings.") + + if language is not None and isinstance(language, list): + for lang in language: + error = validate_single_item(lang, language_to_qid.keys(), "language") + if error: + errors.append(error) + + # Handle data type validation + if data_type is None or isinstance(data_type, bool): + pass + elif isinstance(data_type, str): + data_type = [data_type] + elif not isinstance(data_type, list): + errors.append("Data type must be a string or a list of strings.") + + if data_type is not None and isinstance(data_type, list): + for dt in data_type: + error = validate_single_item(dt, data_type_metadata.keys(), "data-type") + if error: + errors.append(error) + + # Raise ValueError with the combined error message + if errors: + raise ValueError(" and ".join(errors) + " passed.") + else: + return True diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py index 7c88485a2..1cf4758a0 100644 --- a/src/scribe_data/cli/main.py +++ b/src/scribe_data/cli/main.py @@ -201,10 +201,15 @@ def main() -> None: # MARK: Setup CLI args = parser.parse_args() - if args.language or args.data_type: - validate_language_and_data_type( - language=args.language, data_type=args.data_type - ) + + try: + if args.language or args.data_type: + validate_language_and_data_type( + language=args.language, data_type=args.data_type + ) + except ValueError as e: + print(e) + return if args.upgrade: upgrade_cli() diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py index 149716c2d..32ab82262 100644 --- a/tests/cli/test_utils.py +++ b/tests/cli/test_utils.py @@ -216,5 +216,45 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid): self.assertEqual( str(context.exception), - "Invalid language InvalidLanguage and data-type InvalidDataType passed.", + "Invalid language InvalidLanguage and Invalid data-type InvalidDataType passed.", ) + + def test_validate_language_and_data_type_with_list(self): + """Test validation with lists of languages and data types.""" + languages = ["English", "Spanish"] + data_types = ["nouns", "verbs"] + try: + validate_language_and_data_type(languages, data_types) + except ValueError: + self.fail( + "validate_language_and_data_type raised ValueError unexpectedly with valid lists!" + ) + + def test_validate_language_and_data_type_with_qids(self): + """Test validation directly with QIDs.""" + language_qid = "Q1860" # QID for English + data_type_qid = "Q1084" # QID for nouns + try: + validate_language_and_data_type(language_qid, data_type_qid) + except ValueError: + self.fail( + "validate_language_and_data_type raised ValueError unexpectedly with valid QIDs!" + ) + + def test_validate_language_and_data_type_invalid_list(self): + """Test validation with invalid lists.""" + languages = ["English", "Klingon"] + data_types = ["nouns", "alienverbs"] + with self.assertRaises(ValueError) as context: + validate_language_and_data_type(languages, data_types) + self.assertIn("Invalid language Klingon", str(context.exception)) + self.assertIn("Invalid data-type alienverbs", str(context.exception)) + + def test_validate_language_and_data_type_mixed_validity_in_lists(self): + """Test validation with mixed valid and invalid entries in lists.""" + languages = ["English", "InvalidLanguage"] + data_types = ["nouns", "InvalidDataType"] + with self.assertRaises(ValueError) as context: + validate_language_and_data_type(languages, data_types) + self.assertIn("Invalid language InvalidLanguage", str(context.exception)) + self.assertIn("Invalid data-type InvalidDataType", str(context.exception)) From ad8d2b01c66c051d3cb9cf265480145c439389da Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com> Date: Tue, 15 Oct 2024 22:06:17 +0200 Subject: [PATCH 2/2] Minor formatting and edits to outputs --- src/scribe_data/cli/cli_utils.py | 52 +++++++++++++++++++------------- src/scribe_data/cli/main.py | 3 +- tests/cli/test_utils.py | 24 +++++---------- 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py index 8de5c7dec..4f59a65ef 100644 --- a/src/scribe_data/cli/cli_utils.py +++ b/src/scribe_data/cli/cli_utils.py @@ -23,7 +23,7 @@ import difflib import json from pathlib import Path -from typing import Union, List +from typing import List, Union from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR @@ -164,15 +164,16 @@ def validate_language_and_data_type( Parameters ---------- - language : str or list - The language(s) to validate. - data_type : str or list - The data type(s) to validate. + language : str or list + The language(s) to validate. + + data_type : str or list + The data type(s) to validate. Raises ------ - ValueError - If any of the languages or data types is invalid, with all errors reported together. + ValueError + If any of the languages or data types is invalid, with all errors reported together. """ def validate_single_item(item, valid_options, item_type): @@ -181,17 +182,17 @@ def validate_single_item(item, valid_options, item_type): Parameters ---------- - item : str - The item to validate. - valid_options : list - A list of valid options against which the item will be validated. - item_type : str - A description of the item type (e.g., "language", "data-type") used in error messages. + item : str + The item to validate. + valid_options : list + A list of valid options against which the item will be validated. + item_type : str + A description of the item type (e.g., "language", "data-type") used in error messages. Returns ------- - str or None - Returns an error message if the item is invalid, or None if the item is valid. + str or None + Returns an error message if the item is invalid, or None if the item is valid. """ if ( isinstance(item, str) @@ -201,45 +202,54 @@ def validate_single_item(item, valid_options, item_type): ): closest_match = difflib.get_close_matches(item, valid_options, n=1) closest_match_str = ( - f" The closest matching {item_type} is {closest_match[0]}" + f" The closest matching {item_type} is {closest_match[0]}." if closest_match else "" ) - return f"Invalid {item_type} {item}{closest_match_str}" + + return f"Invalid {item_type} {item}.{closest_match_str}" + return None errors = [] - # Handle language validation + # Handle language validation. if language is None or isinstance(language, bool): pass + elif isinstance(language, str): language = [language] + elif not isinstance(language, list): errors.append("Language must be a string or a list of strings.") if language is not None and isinstance(language, list): for lang in language: error = validate_single_item(lang, language_to_qid.keys(), "language") + if error: errors.append(error) - # Handle data type validation + # Handle data type validation. if data_type is None or isinstance(data_type, bool): pass + elif isinstance(data_type, str): data_type = [data_type] + elif not isinstance(data_type, list): errors.append("Data type must be a string or a list of strings.") if data_type is not None and isinstance(data_type, list): for dt in data_type: error = validate_single_item(dt, data_type_metadata.keys(), "data-type") + if error: errors.append(error) - # Raise ValueError with the combined error message + # Raise ValueError with the combined error message. if errors: - raise ValueError(" and ".join(errors) + " passed.") + raise ValueError("\n".join(errors)) + else: return True diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py index 1cf4758a0..506bbcdd1 100644 --- a/src/scribe_data/cli/main.py +++ b/src/scribe_data/cli/main.py @@ -207,8 +207,9 @@ def main() -> None: validate_language_and_data_type( language=args.language, data_type=args.data_type ) + except ValueError as e: - print(e) + print(f"Input validation failed with error: {e}") return if args.upgrade: diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py index 32ab82262..a827666a2 100644 --- a/tests/cli/test_utils.py +++ b/tests/cli/test_utils.py @@ -29,6 +29,8 @@ validate_language_and_data_type, ) +# MARK: Utils + class TestCLIUtils(unittest.TestCase): def test_correct_data_type(self): @@ -145,6 +147,9 @@ def test_print_formatted_data_unknown_type(self): mock_print.assert_called_once_with("unknown data type") +# MARK: Validate + + class TestValidateLanguageAndDataType(unittest.TestCase): def setUp(self): self.qid_mapping = { @@ -182,9 +187,7 @@ def test_validate_language_and_data_type_invalid_language(self, mock_get_qid): language=language_qid, data_type=data_type_qid ) - self.assertEqual( - str(context.exception), "Invalid language InvalidLanguage passed." - ) + self.assertEqual(str(context.exception), "Invalid language InvalidLanguage.") @patch("scribe_data.cli.total.get_qid_by_input") def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid): @@ -198,9 +201,7 @@ def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid): language=language_qid, data_type=data_type_qid ) - self.assertEqual( - str(context.exception), "Invalid data-type InvalidDataType passed." - ) + self.assertEqual(str(context.exception), "Invalid data-type InvalidDataType.") @patch("scribe_data.cli.total.get_qid_by_input") def test_validate_language_and_data_type_both_invalid(self, mock_get_qid): @@ -216,7 +217,7 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid): self.assertEqual( str(context.exception), - "Invalid language InvalidLanguage and Invalid data-type InvalidDataType passed.", + "Invalid language InvalidLanguage.\nInvalid data-type InvalidDataType.", ) def test_validate_language_and_data_type_with_list(self): @@ -241,15 +242,6 @@ def test_validate_language_and_data_type_with_qids(self): "validate_language_and_data_type raised ValueError unexpectedly with valid QIDs!" ) - def test_validate_language_and_data_type_invalid_list(self): - """Test validation with invalid lists.""" - languages = ["English", "Klingon"] - data_types = ["nouns", "alienverbs"] - with self.assertRaises(ValueError) as context: - validate_language_and_data_type(languages, data_types) - self.assertIn("Invalid language Klingon", str(context.exception)) - self.assertIn("Invalid data-type alienverbs", str(context.exception)) - def test_validate_language_and_data_type_mixed_validity_in_lists(self): """Test validation with mixed valid and invalid entries in lists.""" languages = ["English", "InvalidLanguage"]