Skip to content

Commit

Permalink
Instrument regress-backed pattern evaluation
Browse files Browse the repository at this point in the history
When an alternate RegexImplementation is selected, it will be passed
down to the code which builds a Validator. The Validator is extended
with the keyword validator provided by the RegexImplementation.

Because this uses the `extend()` interface, a test which subclassed a
validator broke -- this is documented in `jsonschema` as unsupported
usage, so the test simply had to be updated to use supported
interfaces.
  • Loading branch information
sirosen committed Nov 2, 2024
1 parent 5469baf commit 7bf7e19
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 26 deletions.
7 changes: 5 additions & 2 deletions src/check_jsonschema/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .formats import FormatOptions
from .instance_loader import InstanceLoader
from .parsers import ParseError
from .regex_variants import RegexImplementation
from .reporter import Reporter
from .result import CheckResult
from .schema_loader import SchemaLoaderBase, SchemaParseError, UnsupportedUrlScheme
Expand All @@ -29,14 +30,16 @@ def __init__(
reporter: Reporter,
*,
format_opts: FormatOptions,
regex_impl: RegexImplementation,
traceback_mode: str = "short",
fill_defaults: bool = False,
) -> None:
self._schema_loader = schema_loader
self._instance_loader = instance_loader
self._reporter = reporter

self._format_opts = format_opts if format_opts is not None else FormatOptions()
self._format_opts = format_opts
self._regex_impl = regex_impl
self._traceback_mode = traceback_mode
self._fill_defaults = fill_defaults

Expand All @@ -51,7 +54,7 @@ def get_validator(
) -> jsonschema.protocols.Validator:
try:
return self._schema_loader.get_validator(
path, doc, self._format_opts, self._fill_defaults
path, doc, self._format_opts, self._regex_impl, self._fill_defaults
)
except SchemaParseError as e:
self._fail("Error: schemafile could not be parsed as JSON", e)
Expand Down
4 changes: 3 additions & 1 deletion src/check_jsonschema/cli/main_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@

from ..catalog import CUSTOM_SCHEMA_NAMES, SCHEMA_CATALOG
from ..checker import SchemaChecker
from ..formats import KNOWN_FORMATS, RegexVariantName
from ..formats import KNOWN_FORMATS
from ..instance_loader import InstanceLoader
from ..parsers import SUPPORTED_FILE_FORMATS
from ..regex_variants import RegexImplementation, RegexVariantName
from ..reporter import REPORTER_BY_NAME, Reporter
from ..schema_loader import (
BuiltinSchemaLoader,
Expand Down Expand Up @@ -340,6 +341,7 @@ def build_checker(args: ParseResult) -> SchemaChecker:
instance_loader,
reporter,
format_opts=args.format_opts,
regex_impl=RegexImplementation(args.regex_variant),
traceback_mode=args.traceback_mode,
fill_defaults=args.fill_defaults,
)
Expand Down
4 changes: 2 additions & 2 deletions src/check_jsonschema/cli/parse_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import jsonschema

from ..formats import FormatOptions
from ..regex_variants import RegexVariantName
from ..regex_variants import RegexImplementation, RegexVariantName
from ..transforms import Transform

if sys.version_info >= (3, 8):
Expand Down Expand Up @@ -99,7 +99,7 @@ def set_validator(
@property
def format_opts(self) -> FormatOptions:
return FormatOptions(
regex_impl=RegexImplementation(self.regex_variant),
enabled=not self.disable_all_formats,
regex_variant=self.regex_variant,
disabled_formats=self.disable_formats,
)
9 changes: 4 additions & 5 deletions src/check_jsonschema/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import jsonschema
import jsonschema.validators

from ..regex_variants import RegexImplementation, RegexVariantName
from ..regex_variants import RegexImplementation
from .implementations import validate_rfc3339, validate_time

# all known format strings except for a selection from draft3 which have either
Expand Down Expand Up @@ -40,12 +40,12 @@ class FormatOptions:
def __init__(
self,
*,
regex_impl: RegexImplementation,
enabled: bool = True,
regex_variant: RegexVariantName = RegexVariantName.default,
disabled_formats: tuple[str, ...] = (),
) -> None:
self.enabled = enabled
self.regex_variant = regex_variant
self.regex_impl = regex_impl
self.disabled_formats = disabled_formats


Expand All @@ -72,8 +72,7 @@ def make_format_checker(

# replace the regex check
del checker.checkers["regex"]
regex_impl = RegexImplementation(opts.regex_variant)
checker.checks("regex")(regex_impl.check_format)
checker.checks("regex")(opts.regex_impl.check_format)
checker.checks("date-time")(validate_rfc3339)
checker.checks("time")(validate_time)

Expand Down
30 changes: 30 additions & 0 deletions src/check_jsonschema/regex_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import typing as t

import jsonschema
import regress


Expand Down Expand Up @@ -29,3 +30,32 @@ def check_format(self, instance: t.Any) -> bool:
return False

return True

def pattern_keyword(
self, validator: t.Any, pattern: str, instance: str, schema: t.Any
) -> t.Iterator[jsonschema.ValidationError]:
if not validator.is_type(instance, "string"):
return

if self.variant == RegexVariantName.default:
try:
regress_pattern = regress.Regex(pattern)
except regress.RegressError: # type: ignore[attr-defined]
yield jsonschema.ValidationError(
f"pattern {pattern!r} failed to compile"
)
if not regress_pattern.find(instance):
yield jsonschema.ValidationError(
f"{instance!r} does not match {pattern!r}"
)
else:
try:
re_pattern = re.compile(pattern)
except re.error:
yield jsonschema.ValidationError(
f"pattern {pattern!r} failed to compile"
)
if not re_pattern.search(instance):
yield jsonschema.ValidationError(
f"{instance!r} does not match {pattern!r}"
)
20 changes: 19 additions & 1 deletion src/check_jsonschema/schema_loader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..builtin_schemas import get_builtin_schema
from ..formats import FormatOptions, make_format_checker
from ..parsers import ParserSet
from ..regex_variants import RegexImplementation
from ..utils import is_url_ish
from .errors import UnsupportedUrlScheme
from .readers import HttpSchemaReader, LocalSchemaReader, StdinSchemaReader
Expand Down Expand Up @@ -45,12 +46,23 @@ def set_defaults_then_validate(
)


def _extend_with_pattern_implementation(
validator_class: type[jsonschema.protocols.Validator],
regex_impl: RegexImplementation,
) -> type[jsonschema.Validator]:
return jsonschema.validators.extend(
validator_class,
{"pattern": regex_impl.pattern_keyword},
)


class SchemaLoaderBase:
def get_validator(
self,
path: pathlib.Path | str,
instance_doc: dict[str, t.Any],
format_opts: FormatOptions,
regex_impl: RegexImplementation,
fill_defaults: bool,
) -> jsonschema.protocols.Validator:
raise NotImplementedError
Expand Down Expand Up @@ -130,14 +142,16 @@ def get_validator(
path: pathlib.Path | str,
instance_doc: dict[str, t.Any],
format_opts: FormatOptions,
regex_impl: RegexImplementation,
fill_defaults: bool,
) -> jsonschema.protocols.Validator:
return self._get_validator(format_opts, fill_defaults)
return self._get_validator(format_opts, regex_impl, fill_defaults)

@functools.lru_cache
def _get_validator(
self,
format_opts: FormatOptions,
regex_impl: RegexImplementation,
fill_defaults: bool,
) -> jsonschema.protocols.Validator:
retrieval_uri = self.get_schema_retrieval_uri()
Expand Down Expand Up @@ -174,6 +188,9 @@ def _get_validator(
if fill_defaults:
validator_cls = _extend_with_default(validator_cls)

# set the regex variant for 'pattern' keywords
validator_cls = _extend_with_pattern_implementation(validator_cls, regex_impl)

# now that we know it's safe to try to create the validator instance, do it
validator = validator_cls(
schema,
Expand Down Expand Up @@ -212,6 +229,7 @@ def get_validator(
path: pathlib.Path | str,
instance_doc: dict[str, t.Any],
format_opts: FormatOptions,
regex_impl: RegexImplementation,
fill_defaults: bool,
) -> jsonschema.protocols.Validator:
schema_validator = jsonschema.validators.validator_for(instance_doc)
Expand Down
38 changes: 23 additions & 15 deletions tests/acceptance/test_custom_validator_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,24 +66,32 @@ def _foo_module(mock_module):
"""\
import jsonschema
class MyValidator:
def __init__(self, schema, *args, **kwargs):
self.schema = schema
self.real_validator = jsonschema.validators.Draft7Validator(
schema, *args, **kwargs
)
def iter_errors(self, data, *args, **kwargs):
yield from self.real_validator.iter_errors(data, *args, **kwargs)
for event in data["events"]:
if "Occult" in event["title"]:
def check_occult_properties(validator, properties, instance, schema):
if not validator.is_type(instance, "object"):
return
for property, subschema in properties.items():
if property in instance:
if property == "title" and "Occult" in instance["title"]:
yield jsonschema.exceptions.ValidationError(
"Error! Occult event detected! Run!",
validator=None,
validator=validator,
validator_value=None,
instance=event,
schema=self.schema,
instance=instance,
schema=schema,
)
yield from validator.descend(
instance[property],
subschema,
path=property,
schema_path=property,
)
MyValidator = jsonschema.validators.extend(
jsonschema.validators.Draft7Validator,
{"properties": check_occult_properties},
)
""",
)

Expand Down Expand Up @@ -115,7 +123,7 @@ def test_custom_validator_class_can_detect_custom_conditions(run_line, tmp_path)
str(doc),
],
)
assert result.exit_code == 1 # fail
assert result.exit_code == 1, result.stdout # fail
assert "Occult event detected" in result.stdout, result.stdout


Expand Down

0 comments on commit 7bf7e19

Please sign in to comment.