Skip to content

Commit

Permalink
Merge pull request #375 from axif0/validation
Browse files Browse the repository at this point in the history
fix  lists of arguments to be validated
  • Loading branch information
andrewtavis authored Oct 15, 2024
2 parents 4bfce7c + ad8d2b0 commit 362dea4
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 66 deletions.
132 changes: 77 additions & 55 deletions src/scribe_data/cli/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import difflib
import json
from pathlib import Path
from typing import Union
from typing import List, Union

from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR

Expand Down Expand Up @@ -155,79 +155,101 @@ def print_formatted_data(data: Union[dict, list], data_type: str) -> None:
# MARK: Validate


def validate_language_and_data_type(language: str, data_type: str):
def validate_language_and_data_type(
language: Union[str, List[str], bool, None],
data_type: Union[str, List[str], bool, None],
):
"""
Validates that the language and data type QIDs are not None.
Parameters
----------
language : str
The language to validate.
language : str or list
The language(s) to validate.
data_type : str
The data type to validate.
data_type : str or list
The data type(s) to validate.
Raises
------
ValueError
If either the language or data type is invalid (None).
If any of the languages or data types is invalid, with all errors reported together.
"""
# Not functional for lists of arguments yet.
if isinstance(language, list) or isinstance(data_type, list):
return

language_is_valid = True
data_type_is_valid = True

value_error = ""
closest_language_match_string = ""
closest_data_type_match_string = ""

if (
isinstance(language, str)
and language.lower() not in language_to_qid.keys()
and not language.startswith("Q")
and not language[1:].isdigit()
):
language_is_valid = False
if closest_language_match := difflib.get_close_matches(
language, language_map.keys(), n=1
def validate_single_item(item, valid_options, item_type):
"""
Validates a single item against a list of valid options, providing error messages and suggestions.
Parameters
----------
item : str
The item to validate.
valid_options : list
A list of valid options against which the item will be validated.
item_type : str
A description of the item type (e.g., "language", "data-type") used in error messages.
Returns
-------
str or None
Returns an error message if the item is invalid, or None if the item is valid.
"""
if (
isinstance(item, str)
and item.lower().strip() not in valid_options
and not item.startswith("Q")
and not item[1:].isdigit()
):
closest_language_match_cap = closest_language_match[0].capitalize()
closest_language_match_string = (
f" The closest matching language is {closest_language_match_cap}."
closest_match = difflib.get_close_matches(item, valid_options, n=1)
closest_match_str = (
f" The closest matching {item_type} is {closest_match[0]}."
if closest_match
else ""
)

if (
isinstance(data_type, str)
and data_type not in data_type_metadata.keys()
and not data_type.startswith("Q")
and not data_type[1:].isdigit()
):
data_type_is_valid = False
return f"Invalid {item_type} {item}.{closest_match_str}"

if closest_data_type_match := difflib.get_close_matches(
data_type, data_type_metadata.keys(), n=1
):
closest_data_type_match_string = (
f" The closest matching data-type is {closest_data_type_match[0]}."
)
return None

errors = []

# Handle language validation.
if language is None or isinstance(language, bool):
pass

if not language_is_valid and data_type_is_valid:
value_error = (
f"Invalid language {language} passed.{closest_language_match_string}"
)
elif isinstance(language, str):
language = [language]

raise ValueError(value_error)
elif not isinstance(language, list):
errors.append("Language must be a string or a list of strings.")

elif language_is_valid and not data_type_is_valid:
value_error = (
f"Invalid data-type {data_type} passed.{closest_data_type_match_string}"
)
if language is not None and isinstance(language, list):
for lang in language:
error = validate_single_item(lang, language_to_qid.keys(), "language")

raise ValueError(value_error)
if error:
errors.append(error)

elif not language_is_valid and not data_type_is_valid:
value_error = f"Invalid language {language} and data-type {data_type} passed.{closest_language_match_string}{closest_data_type_match_string}"
# Handle data type validation.
if data_type is None or isinstance(data_type, bool):
pass

raise ValueError(value_error)
elif isinstance(data_type, str):
data_type = [data_type]

elif not isinstance(data_type, list):
errors.append("Data type must be a string or a list of strings.")

if data_type is not None and isinstance(data_type, list):
for dt in data_type:
error = validate_single_item(dt, data_type_metadata.keys(), "data-type")

if error:
errors.append(error)

# Raise ValueError with the combined error message.
if errors:
raise ValueError("\n".join(errors))

else:
return True
14 changes: 10 additions & 4 deletions src/scribe_data/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,16 @@ def main() -> None:
# MARK: Setup CLI

args = parser.parse_args()
if args.language or args.data_type:
validate_language_and_data_type(
language=args.language, data_type=args.data_type
)

try:
if args.language or args.data_type:
validate_language_and_data_type(
language=args.language, data_type=args.data_type
)

except ValueError as e:
print(f"Input validation failed with error: {e}")
return

if args.upgrade:
upgrade_cli()
Expand Down
46 changes: 39 additions & 7 deletions tests/cli/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
validate_language_and_data_type,
)

# MARK: Utils


class TestCLIUtils(unittest.TestCase):
def test_correct_data_type(self):
Expand Down Expand Up @@ -145,6 +147,9 @@ def test_print_formatted_data_unknown_type(self):
mock_print.assert_called_once_with("unknown data type")


# MARK: Validate


class TestValidateLanguageAndDataType(unittest.TestCase):
def setUp(self):
self.qid_mapping = {
Expand Down Expand Up @@ -182,9 +187,7 @@ def test_validate_language_and_data_type_invalid_language(self, mock_get_qid):
language=language_qid, data_type=data_type_qid
)

self.assertEqual(
str(context.exception), "Invalid language InvalidLanguage passed."
)
self.assertEqual(str(context.exception), "Invalid language InvalidLanguage.")

@patch("scribe_data.cli.total.get_qid_by_input")
def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
Expand All @@ -198,9 +201,7 @@ def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
language=language_qid, data_type=data_type_qid
)

self.assertEqual(
str(context.exception), "Invalid data-type InvalidDataType passed."
)
self.assertEqual(str(context.exception), "Invalid data-type InvalidDataType.")

@patch("scribe_data.cli.total.get_qid_by_input")
def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
Expand All @@ -216,5 +217,36 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):

self.assertEqual(
str(context.exception),
"Invalid language InvalidLanguage and data-type InvalidDataType passed.",
"Invalid language InvalidLanguage.\nInvalid data-type InvalidDataType.",
)

def test_validate_language_and_data_type_with_list(self):
"""Test validation with lists of languages and data types."""
languages = ["English", "Spanish"]
data_types = ["nouns", "verbs"]
try:
validate_language_and_data_type(languages, data_types)
except ValueError:
self.fail(
"validate_language_and_data_type raised ValueError unexpectedly with valid lists!"
)

def test_validate_language_and_data_type_with_qids(self):
"""Test validation directly with QIDs."""
language_qid = "Q1860" # QID for English
data_type_qid = "Q1084" # QID for nouns
try:
validate_language_and_data_type(language_qid, data_type_qid)
except ValueError:
self.fail(
"validate_language_and_data_type raised ValueError unexpectedly with valid QIDs!"
)

def test_validate_language_and_data_type_mixed_validity_in_lists(self):
"""Test validation with mixed valid and invalid entries in lists."""
languages = ["English", "InvalidLanguage"]
data_types = ["nouns", "InvalidDataType"]
with self.assertRaises(ValueError) as context:
validate_language_and_data_type(languages, data_types)
self.assertIn("Invalid language InvalidLanguage", str(context.exception))
self.assertIn("Invalid data-type InvalidDataType", str(context.exception))

0 comments on commit 362dea4

Please sign in to comment.