From c6b80e94aca96a25d80d470fb74db383ac84bd72 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Wed, 5 Feb 2025 00:05:02 +0100 Subject: [PATCH] perf: Fewer Value clones Signed-off-by: Dmitry Dygalo --- crates/jsonschema/src/compiler.rs | 52 +++++---- crates/jsonschema/src/keywords/ref_.rs | 109 +++++++++++++----- .../src/keywords/unevaluated_properties.rs | 6 +- 3 files changed, 113 insertions(+), 54 deletions(-) diff --git a/crates/jsonschema/src/compiler.rs b/crates/jsonschema/src/compiler.rs index 3fddfa4b..759b1e29 100644 --- a/crates/jsonschema/src/compiler.rs +++ b/crates/jsonschema/src/compiler.rs @@ -15,16 +15,20 @@ use crate::{ }; use ahash::{AHashMap, AHashSet}; use referencing::{ - Draft, List, Registry, Resolved, Resolver, Resource, ResourceRef, Uri, Vocabulary, - VocabularySet, + Draft, List, Registry, Resolved, Resolver, ResourceRef, Uri, Vocabulary, VocabularySet, }; use serde_json::Value; -use std::{borrow::Cow, cell::RefCell, iter::once, rc::Rc, sync::Arc}; +use std::{ + borrow::Cow, + iter::once, + rc::Rc, + sync::{Arc, Mutex}, +}; const DEFAULT_SCHEME: &str = "json-schema"; pub(crate) const DEFAULT_ROOT_URL: &str = "json-schema:///"; type BaseUri = Uri; -type ResolverComponents = (Arc, List, Resource); +type ResolverComponents<'a> = (Arc, List, Resolved<'a>); /// Container for information required to build a tree. /// @@ -37,7 +41,7 @@ pub(crate) struct Context<'a> { vocabularies: VocabularySet, location: Location, pub(crate) draft: Draft, - seen: Rc>>>>, + seen: Arc>>>>, } impl<'a> Context<'a> { @@ -48,6 +52,7 @@ impl<'a> Context<'a> { vocabularies: VocabularySet, draft: Draft, location: Location, + seen: Arc>>>>, ) -> Self { Context { config, @@ -56,7 +61,7 @@ impl<'a> Context<'a> { location, vocabularies, draft, - seen: Rc::new(RefCell::new(AHashSet::new())), + seen, } } pub(crate) fn draft(&self) -> Draft { @@ -79,7 +84,7 @@ impl<'a> Context<'a> { vocabularies: self.vocabularies.clone(), draft: resource.draft(), location: self.location.clone(), - seen: Rc::clone(&self.seen), + seen: Arc::clone(&self.seen), }) } pub(crate) fn as_resource_ref<'r>(&'a self, contents: &'r Value) -> ResourceRef<'r> { @@ -99,7 +104,7 @@ impl<'a> Context<'a> { vocabularies: self.vocabularies.clone(), location, draft: self.draft, - seen: Rc::clone(&self.seen), + seen: Arc::clone(&self.seen), } } @@ -110,7 +115,9 @@ impl<'a> Context<'a> { pub(crate) fn scopes(&self) -> List> { self.resolver.dynamic_scope() } - + pub(crate) fn full_base_uri(&self) -> Arc> { + self.resolver.base_uri() + } pub(crate) fn base_uri(&self) -> Option> { let base_uri = self.resolver.base_uri(); if base_uri.scheme().as_str() == DEFAULT_SCHEME { @@ -151,7 +158,7 @@ impl<'a> Context<'a> { draft, vocabularies, location, - seen: Rc::clone(&self.seen), + seen: Arc::clone(&self.seen), } } pub(crate) fn get_content_media_type_check( @@ -186,13 +193,13 @@ impl<'a> Context<'a> { let uri = self .resolver .resolve_against(&self.resolver.base_uri().borrow(), reference)?; - Ok(self.seen.borrow().contains(&*uri)) + Ok(self.seen.lock().unwrap().contains(&*uri)) } pub(crate) fn mark_seen(&self, reference: &str) -> Result<(), referencing::Error> { let uri = self .resolver .resolve_against(&self.resolver.base_uri().borrow(), reference)?; - self.seen.borrow_mut().insert(uri); + self.seen.lock().unwrap().insert(uri); Ok(()) } @@ -204,25 +211,23 @@ impl<'a> Context<'a> { pub(crate) fn lookup_maybe_recursive( &self, reference: &str, - is_recursive: bool, + maybe_recursive: bool, ) -> Result, ValidationError<'static>> { let resolved = if self.is_circular_reference(reference)? { // Otherwise we need to manually check whether this location has already been explored self.resolver.lookup(reference)? } else { // This is potentially recursive, but it is unknown yet - if !is_recursive { + if !maybe_recursive { self.mark_seen(reference)?; } return Ok(None); }; - let resource = self.draft().create_resource(resolved.contents().clone()); - let mut base_uri = resolved.resolver().base_uri(); - let scopes = resolved.resolver().dynamic_scope(); - if let Some(id) = resource.id() { - base_uri = self.registry.resolve_against(&base_uri.borrow(), id)?; - }; - Ok(Some((base_uri, scopes, resource))) + Ok(Some(( + self.resolver.base_uri(), + self.resolver.dynamic_scope(), + resolved, + ))) } pub(crate) fn location(&self) -> &Location { @@ -240,6 +245,10 @@ impl<'a> Context<'a> { self.vocabularies.contains(vocabulary) } } + + pub(crate) fn seen(&self) -> Arc>>>> { + Arc::clone(&self.seen) + } } pub(crate) fn build_validator( @@ -275,6 +284,7 @@ pub(crate) fn build_validator( vocabularies, draft, Location::new(), + Arc::new(Mutex::new(AHashSet::new())), ); // Validate the schema itself diff --git a/crates/jsonschema/src/keywords/ref_.rs b/crates/jsonschema/src/keywords/ref_.rs index 74a3c777..a4a42677 100644 --- a/crates/jsonschema/src/keywords/ref_.rs +++ b/crates/jsonschema/src/keywords/ref_.rs @@ -1,4 +1,7 @@ -use std::{rc::Rc, sync::Arc}; +use std::{ + rc::Rc, + sync::{Arc, Mutex}, +}; use crate::{ compiler, @@ -10,8 +13,9 @@ use crate::{ validator::{PartialApplication, Validate}, ValidationError, ValidationOptions, }; +use ahash::AHashSet; use once_cell::sync::OnceCell; -use referencing::{Draft, List, Registry, Resource, Uri, VocabularySet}; +use referencing::{Draft, List, Registry, Uri, VocabularySet}; use serde_json::{Map, Value}; pub(crate) enum RefValidator { @@ -24,19 +28,19 @@ impl RefValidator { pub(crate) fn compile<'a>( ctx: &compiler::Context, reference: &str, - is_recursive: bool, + maybe_recursive: bool, keyword: &str, ) -> Option> { let location = ctx.location().join(keyword); Some( - if let Some((base_uri, scopes, resource)) = { - match ctx.lookup_maybe_recursive(reference, is_recursive) { + if let Some((base_uri, scopes, resolved)) = { + match ctx.lookup_maybe_recursive(reference, maybe_recursive) { Ok(resolved) => resolved, Err(error) => return Some(Err(error)), } } { // NOTE: A better approach would be to compare the absolute locations - if let Value::Object(contents) = resource.contents() { + if let Value::Object(contents) = resolved.contents() { if let Some(Some(resolved)) = contents.get(keyword).map(Value::as_str) { if resolved == reference { return None; @@ -44,11 +48,14 @@ impl RefValidator { } } Ok(Box::new(RefValidator::Lazy(LazyRefValidator { - resource, + reference: Reference::Default { + reference: reference.to_string(), + }, config: Arc::clone(ctx.config()), registry: Arc::clone(&ctx.registry), base_uri, scopes, + seen: ctx.seen(), location, vocabularies: ctx.vocabularies().clone(), draft: ctx.draft(), @@ -79,6 +86,11 @@ impl RefValidator { } } +enum Reference { + Default { reference: String }, + Recursive, +} + /// Lazily evaluated validator used for recursive references. /// /// The validator tree nodes can't be arbitrary looked up in the current @@ -87,11 +99,12 @@ impl RefValidator { /// representation for the validation tree may allow building cycles easier and /// lazy evaluation won't be needed. pub(crate) struct LazyRefValidator { - resource: Resource, + reference: Reference, config: Arc, registry: Arc, scopes: List>, base_uri: Arc>, + seen: Arc>>>>, vocabularies: VocabularySet, location: Location, draft: Draft, @@ -101,20 +114,15 @@ pub(crate) struct LazyRefValidator { impl LazyRefValidator { #[inline] pub(crate) fn compile<'a>(ctx: &compiler::Context) -> CompilationResult<'a> { - let scopes = ctx.scopes(); - let resolved = ctx.lookup_recursive_reference()?; - let resource = ctx.draft().create_resource(resolved.contents().clone()); - let resolver = resolved.resolver(); - let mut base_uri = resolver.base_uri(); - if let Some(id) = resource.id() { - base_uri = resolver.resolve_against(&base_uri.borrow(), id)?; - }; + // Verify that the reference is resolvable + ctx.lookup_recursive_reference()?; Ok(Box::new(LazyRefValidator { - resource, + reference: Reference::Recursive, config: Arc::clone(ctx.config()), registry: Arc::clone(&ctx.registry), - base_uri, - scopes, + base_uri: ctx.full_base_uri(), + scopes: ctx.scopes(), + seen: ctx.seen(), vocabularies: ctx.vocabularies().clone(), location: ctx.location().join("$recursiveRef"), draft: ctx.draft(), @@ -123,21 +131,58 @@ impl LazyRefValidator { } fn lazy_compile(&self) -> &SchemaNode { self.inner.get_or_init(|| { - let resolver = self - .registry - .resolver_from_raw_parts(self.base_uri.clone(), self.scopes.clone()); - - let ctx = compiler::Context::new( - Arc::clone(&self.config), - Arc::clone(&self.registry), - Rc::new(resolver), - self.vocabularies.clone(), - self.draft, - self.location.clone(), - ); // INVARIANT: This schema was already used during compilation before detecting a // reference cycle that lead to building this validator. - compiler::compile(&ctx, self.resource.as_ref()).expect("Invalid schema") + match &self.reference { + Reference::Default { reference } => { + let resolver = self + .registry + .resolver_from_raw_parts(self.base_uri.clone(), self.scopes.clone()); + let resolved = resolver.lookup(reference).unwrap(); + let resource = self.draft.create_resource_ref(resolved.contents()); + let mut base_uri = resolved.resolver().base_uri(); + let scopes = resolved.resolver().dynamic_scope(); + if let Some(id) = resource.id() { + base_uri = self + .registry + .resolve_against(&base_uri.borrow(), id) + .unwrap(); + }; + + let resolver = self.registry.resolver_from_raw_parts(base_uri, scopes); + + let ctx = compiler::Context::new( + Arc::clone(&self.config), + Arc::clone(&self.registry), + Rc::new(resolver), + self.vocabularies.clone(), + self.draft, + self.location.clone(), + self.seen.clone(), + ); + + compiler::compile(&ctx, resource).expect("Invalid schema") + } + Reference::Recursive => { + let resolver = self + .registry + .resolver_from_raw_parts(self.base_uri.clone(), self.scopes.clone()); + let resolved = resolver + .lookup_recursive_ref() + .expect("Failed to resolve a recursive reference"); + let ctx = compiler::Context::new( + Arc::clone(&self.config), + Arc::clone(&self.registry), + Rc::new(resolver), + self.vocabularies.clone(), + self.draft, + self.location.clone(), + self.seen.clone(), + ); + let resource = ctx.draft().create_resource_ref(resolved.contents()); + compiler::compile(&ctx, resource).expect("Invalid schema") + } + } }) } } diff --git a/crates/jsonschema/src/keywords/unevaluated_properties.rs b/crates/jsonschema/src/keywords/unevaluated_properties.rs index 5ee48795..58c9670c 100644 --- a/crates/jsonschema/src/keywords/unevaluated_properties.rs +++ b/crates/jsonschema/src/keywords/unevaluated_properties.rs @@ -1,4 +1,7 @@ -use std::{rc::Rc, sync::Arc}; +use std::{ + rc::Rc, + sync::{Arc, Mutex}, +}; use ahash::AHashSet; use fancy_regex::Regex; @@ -185,6 +188,7 @@ impl LazyReference { self.vocabularies.clone(), self.draft, self.location.clone(), + Arc::new(Mutex::new(AHashSet::new())), ); Box::new(