Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix caching issues which render remote ref caching ineffective #466

Merged
merged 2 commits into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
[flake8]
exclude = .git,.tox,__pycache__,.eggs,dist,.venv*,docs,build,_build
# we enforce 80 char width with `black` "loosely", so flake8 should be set to
# not fail on up to 90 chars of width
exclude = .git,.tox,__pycache__,dist,.venv*,docs,build
max-line-length = 90

# based on the flake8 conf for `black` itself:
# https://github.com/ambv/black/blob/master/.flake8
#
# W503/W504 conflict, black causes E203
ignore = W503,W504,E203,
# black related: W503/W504 conflict, black causes E203
ignore = W503,W504,E203,B019
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ repos:
rev: 7.1.0
hooks:
- id: flake8
args: ['--config', '.flake8']
additional_dependencies:
- 'flake8-bugbear==24.1.17'
- 'flake8-typing-as-t==0.0.3'
Expand Down
11 changes: 11 additions & 0 deletions src/check_jsonschema/schema_loader/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import functools
import pathlib
import typing as t
import urllib.error
Expand Down Expand Up @@ -130,11 +131,21 @@ def get_validator(
instance_doc: dict[str, t.Any],
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.protocols.Validator:
return self._get_validator(format_opts, fill_defaults)

@functools.lru_cache
def _get_validator(
self,
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.protocols.Validator:
retrieval_uri = self.get_schema_retrieval_uri()
schema = self.get_schema()

schema_dialect = schema.get("$schema")
if schema_dialect is not None and not isinstance(schema_dialect, str):
schema_dialect = None

# format checker (which may be None)
format_checker = make_format_checker(format_opts, schema_dialect)
Expand Down
8 changes: 4 additions & 4 deletions src/check_jsonschema/schema_loader/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
else:
full_uri = uri

if full_uri in cache._cache:
return cache[uri]
if full_uri in cache:
return cache[full_uri]

full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
if full_uri_scheme in ("http", "https"):
Expand All @@ -100,8 +100,8 @@ def validation_callback(content: bytes) -> None:
else:
parsed_object = get_local_file(full_uri)

cache[uri] = parsed_object
return cache[uri]
cache[full_uri] = parsed_object
return cache[full_uri]

return retrieve_reference

Expand Down
57 changes: 57 additions & 0 deletions tests/acceptance/test_remote_ref_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,60 @@ def test_ref_resolution_with_custom_base_uri(run_line, tmp_path, check_passes):
assert result.exit_code == 0, output
else:
assert result.exit_code == 1, output


@pytest.mark.parametrize("num_instances", (1, 2, 10))
@pytest.mark.parametrize("check_passes", (True, False))
def test_remote_ref_resolution_callout_count_is_scale_free_in_instancefiles(
run_line, tmp_path, num_instances, check_passes
):
"""
Test that for any N > 1, validation of a schema with a ref against N instance files
has exactly the same number of callouts as validation when N=1

This proves that the validator and caching are working correctly, and we aren't
repeating callouts to rebuild state.
"""
schema_uri = "https://example.org/schemas/main.json"
ref_uri = "https://example.org/schemas/title_schema.json"

main_schema = {
"$id": schema_uri,
"$schema": "http://json-schema.org/draft-07/schema",
"properties": {
"title": {"$ref": "./title_schema.json"},
},
"additionalProperties": False,
}
title_schema = {"type": "string"}
responses.add("GET", schema_uri, json=main_schema)
responses.add("GET", ref_uri, json=title_schema)

# write N documents
instance_doc = {"title": "doc one" if check_passes else 2}
instance_paths = []
for i in range(num_instances):
instance_path = tmp_path / f"instance{i}.json"
instance_path.write_text(json.dumps(instance_doc))
instance_paths.append(str(instance_path))

result = run_line(
[
"check-jsonschema",
"--schemafile",
schema_uri,
]
+ instance_paths
)
output = f"\nstdout:\n{result.stdout}\n\nstderr:\n{result.stderr}"
if check_passes:
assert result.exit_code == 0, output
else:
assert result.exit_code == 1, output

# this is the moment of the "real" test run here:
# no matter how many instances there were, there should only have been two calls
# (one for the schema and one for the $ref)
assert len(responses.calls) == 2
assert len([c for c in responses.calls if c.request.url == schema_uri]) == 1
assert len([c for c in responses.calls if c.request.url == ref_uri]) == 1
Loading