diff --git a/generators/rust/treeldr-rs-macros/src/generate/de.rs b/generators/rust/treeldr-rs-macros/src/generate/de.rs index e00fc09..a75b62f 100644 --- a/generators/rust/treeldr-rs-macros/src/generate/de.rs +++ b/generators/rust/treeldr-rs-macros/src/generate/de.rs @@ -214,9 +214,13 @@ pub fn generate(input: DeriveInput) -> Result { } }); - let unwrap_fields = layout.fields.keys().map(|name| { + let unwrap_fields = layout.fields.iter().map(|(name, f)| { let ident = syn::Ident::new(name, Span::call_site()); - quote!(#ident: data.#ident.ok_or_else(|| ::treeldr::DeserializeError::MissingField(#name.to_owned()))?) + if f.required { + quote!(#ident: data.#ident.ok_or_else(|| ::treeldr::DeserializeError::MissingField(#name.to_owned()))?) + } else { + quote!(#ident: data.#ident) + } }); quote! { @@ -715,7 +719,7 @@ fn generate_data( for l in rdf.interpretation.literals_of(&resource) { has_literal = true; let literal = rdf.vocabulary.literal(l).unwrap(); - let ty_iri = match literal.type_() { + let ty_iri = match &literal.type_ { ::treeldr::rdf_types::LiteralType::Any(i) => { rdf.vocabulary.iri(i).unwrap() }, @@ -725,7 +729,7 @@ fn generate_data( }; if ty_iri == expected_ty_iri { - if let Ok(value) = ::treeldr::de::FromRdfLiteral::from_rdf_literal(literal.value().as_str()) { + if let Ok(value) = ::treeldr::de::FromRdfLiteral::from_rdf_literal(&literal.value) { if result.replace(value).is_some() { return Err(::treeldr::DeserializeError::AmbiguousLiteralValue) } diff --git a/generators/rust/treeldr-rs-macros/src/generate/ser.rs b/generators/rust/treeldr-rs-macros/src/generate/ser.rs index 915392b..bdf7108 100644 --- a/generators/rust/treeldr-rs-macros/src/generate/ser.rs +++ b/generators/rust/treeldr-rs-macros/src/generate/ser.rs @@ -59,12 +59,12 @@ pub fn generate(input: DeriveInput) -> Result { match ::treeldr::AsId::as_id(self) { ::treeldr::rdf_types::Id::Iri(value) => { let id = rdf.vocabulary.insert(value); - rdf.interpretation.assign_iri(inputs[0].clone(), id); + rdf.interpretation.assign_iri(&inputs[0], id); Ok(()) } ::treeldr::rdf_types::Id::Blank(value) => { let id = rdf.vocabulary.insert_blank_id(value); - rdf.interpretation.assign_blank_id(inputs[0].clone(), id); + rdf.interpretation.assign_blank_id(&inputs[0], id); Ok(()) } } @@ -179,17 +179,33 @@ pub fn generate(input: DeriveInput) -> Result { let m = field.value.input.len(); - quote! { - { - let env = env.intro(rdf, #field_intro); - env.instantiate_dataset(&#field_dataset, output); - <#field_layout as ::treeldr::SerializeLd<#m, V, I>>::serialize_ld_with( - &self.#field_ident, - rdf, - &env.instantiate_patterns(&#field_inputs), - #field_graph.as_ref(), - output - )?; + if field.required { + quote! { + { + let env = env.intro(rdf, #field_intro); + env.instantiate_dataset(&#field_dataset, output); + <#field_layout as ::treeldr::SerializeLd<#m, V, I>>::serialize_ld_with( + &self.#field_ident, + rdf, + &env.instantiate_patterns(&#field_inputs), + #field_graph.as_ref(), + output + )?; + } + } + } else { + quote! { + if let Some(value) = &self.#field_ident { + let env = env.intro(rdf, #field_intro); + env.instantiate_dataset(&#field_dataset, output); + <#field_layout as ::treeldr::SerializeLd<#m, V, I>>::serialize_ld_with( + value, + rdf, + &env.instantiate_patterns(&#field_inputs), + #field_graph.as_ref(), + output + )?; + } } } }); @@ -457,9 +473,12 @@ fn pattern_interpretation(pattern: &Pattern) -> TokenStream { match pattern { Pattern::Var(i) => { let i = *i as usize; - quote!(inputs[#i].clone()) + quote!(&inputs[#i]) + } + Pattern::Resource(term) => { + let term = term_interpretation(term); + quote!(&#term) } - Pattern::Resource(term) => term_interpretation(term), } } diff --git a/generators/rust/treeldr-rs-macros/src/parse.rs b/generators/rust/treeldr-rs-macros/src/parse.rs index 3bd5d44..29981bb 100644 --- a/generators/rust/treeldr-rs-macros/src/parse.rs +++ b/generators/rust/treeldr-rs-macros/src/parse.rs @@ -213,15 +213,18 @@ pub fn parse(input: syn::DeriveInput) -> Result { .map(|f| { let name = f.ident.unwrap().to_string(); let attrs = ComponentAttributes::parse(f.attrs)?; + let ty = FieldType::new(f.ty); + let required = ty.is_required(); let field = Field { intro: attrs.intro.map(Into::into).unwrap_or_default(), dataset: attrs.dataset.unwrap_or_default(), property: attrs.property, value: ValueFormatOrLayout::Format(ValueFormat { - layout: type_map.insert(f.ty).into(), + layout: type_map.insert(ty.into_type()).into(), input: attrs.input.map(Into::into).unwrap_or_default(), graph: attrs.graph.unwrap_or_default().into(), }), + required, }; Ok((name, field)) @@ -637,55 +640,61 @@ impl ComponentAttributes { } } -// pub enum FieldType { -// Optional(syn::Type), -// Required(syn::Type) -// } - -// impl FieldType { -// pub fn new(ty: syn::Type) -> Self { -// if is_option_type(&ty) { -// let syn::Type::Path(path) = ty else { unreachable!() }; -// let syn::PathArguments::AngleBracketed(args) = path.path.segments.into_iter().next().unwrap().arguments else { unreachable!() }; -// let syn::GenericArgument::Type(item) = args.args.into_iter().next().unwrap() else { unreachable!() }; -// Self::Optional(item) -// } else { -// Self::Required(ty) -// } -// } - -// pub fn is_required(&self) -> bool { -// matches!(self, Self::Required(_)) -// } - -// pub fn into_type(self) -> syn::Type { -// match self { -// Self::Required(ty) => ty, -// Self::Optional(ty) => ty -// } -// } -// } - -// fn is_option_type(ty: &syn::Type) -> bool { -// if let syn::Type::Path(path) = ty { -// if path.qself.is_none() { -// if path.path.segments.len() == 1 { -// let segment = path.path.segments.iter().next().unwrap(); -// if segment.ident == "Option" { -// if let syn::PathArguments::AngleBracketed(args) = &segment.arguments { -// if args.args.len() == 1 { -// if let syn::GenericArgument::Type(_) = args.args.iter().next().unwrap() { -// return true -// } -// } -// } -// } -// } -// } -// } - -// false -// } +pub enum FieldType { + Optional(syn::Type), + Required(syn::Type), +} + +impl FieldType { + pub fn new(ty: syn::Type) -> Self { + if is_option_type(&ty) { + let syn::Type::Path(path) = ty else { + unreachable!() + }; + let syn::PathArguments::AngleBracketed(args) = + path.path.segments.into_iter().next().unwrap().arguments + else { + unreachable!() + }; + let syn::GenericArgument::Type(item) = args.args.into_iter().next().unwrap() else { + unreachable!() + }; + Self::Optional(item) + } else { + Self::Required(ty) + } + } + + pub fn is_required(&self) -> bool { + matches!(self, Self::Required(_)) + } + + pub fn into_type(self) -> syn::Type { + match self { + Self::Required(ty) => ty, + Self::Optional(ty) => ty, + } + } +} + +fn is_option_type(ty: &syn::Type) -> bool { + if let syn::Type::Path(path) = ty { + if path.qself.is_none() && path.path.segments.len() == 1 { + let segment = path.path.segments.iter().next().unwrap(); + if segment.ident == "Option" { + if let syn::PathArguments::AngleBracketed(args) = &segment.arguments { + if args.args.len() == 1 { + if let syn::GenericArgument::Type(_) = args.args.iter().next().unwrap() { + return true; + } + } + } + } + } + } + + false +} fn extract_vec_item(ty: syn::Type) -> Result { let span = ty.span(); diff --git a/generators/rust/treeldr-rs/tests/id.rs b/generators/rust/treeldr-rs/tests/id.rs index 7667a93..6c9d492 100644 --- a/generators/rust/treeldr-rs/tests/id.rs +++ b/generators/rust/treeldr-rs/tests/id.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn id() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] @@ -7,7 +7,7 @@ fn id() { impl treeldr::AsId for Id { fn as_id(&self) -> rdf_types::Id<&iref::Iri, &rdf_types::BlankId> { - self.0.as_id_ref() + self.0.as_lexical_id_ref() } } diff --git a/generators/rust/treeldr-rs/tests/list.rs b/generators/rust/treeldr-rs/tests/list.rs index f10c0e4..a0987f4 100644 --- a/generators/rust/treeldr-rs/tests/list.rs +++ b/generators/rust/treeldr-rs/tests/list.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn list_unordered() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] @@ -6,7 +6,7 @@ fn list_unordered() { pub struct UnorderedList(Vec); } -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn list_ordered() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] @@ -14,7 +14,7 @@ fn list_ordered() { pub struct UnorderedList(Vec); } -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn list_sized() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] diff --git a/generators/rust/treeldr-rs/tests/literal.rs b/generators/rust/treeldr-rs/tests/literal.rs index 64c16b6..16ed04a 100644 --- a/generators/rust/treeldr-rs/tests/literal.rs +++ b/generators/rust/treeldr-rs/tests/literal.rs @@ -1,11 +1,11 @@ -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn literal_unit() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] pub struct Unit; } -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn literal_boolean() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] @@ -13,7 +13,7 @@ fn literal_boolean() { pub struct Boolean(bool); } -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn literal_i32() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] @@ -22,7 +22,7 @@ fn literal_i32() { pub struct I32(i32); } -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn literal_string() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] diff --git a/generators/rust/treeldr-rs/tests/record.rs b/generators/rust/treeldr-rs/tests/record.rs index a445721..7288b4f 100644 --- a/generators/rust/treeldr-rs/tests/record.rs +++ b/generators/rust/treeldr-rs/tests/record.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn record() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] @@ -6,5 +6,8 @@ fn record() { pub struct Record { #[tldr("ex:foo")] foo: String, + + #[tldr("ex:bar")] + optional: Option, } } diff --git a/generators/rust/treeldr-rs/tests/sum.rs b/generators/rust/treeldr-rs/tests/sum.rs index 3f738e1..57bfb5f 100644 --- a/generators/rust/treeldr-rs/tests/sum.rs +++ b/generators/rust/treeldr-rs/tests/sum.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "derive")] +#[cfg(feature = "macros")] #[test] fn sum() { #[derive(treeldr::SerializeLd, treeldr::DeserializeLd)] diff --git a/layouts/book/book.toml b/layouts/book/book.toml index 99bb19e..9943281 100644 --- a/layouts/book/book.toml +++ b/layouts/book/book.toml @@ -2,4 +2,7 @@ title = "TreeLDR Layouts" [build] -build-dir = "target" \ No newline at end of file +build-dir = "target" + +[preprocessor.graphviz] +command = "mdbook-graphviz" \ No newline at end of file diff --git a/layouts/book/src/README.md b/layouts/book/src/README.md index 39c0ebb..62067b5 100644 --- a/layouts/book/src/README.md +++ b/layouts/book/src/README.md @@ -2,13 +2,13 @@ TreeLDR Layouts are a data serialization and deserialization tool for the Resource Description Framework (RDF). -It can be used to convert RDF datasets into tree-like values (such as JSON), +It can be used to convert RDF graphs into tree-like values (such as JSON), and back. The idea behind layouts is simple: each layout describes the expected shape of a tree value. This shape can be either a record (sometimes called object, in JSON for instance), a list, a number, etc. Each part of this shape is then associated to -a fraction of the represented RDF dataset. +a subset of the represented RDF dataset. ## Basic layout diff --git a/layouts/book/src/SUMMARY.md b/layouts/book/src/SUMMARY.md index 692e79e..0ae5c92 100644 --- a/layouts/book/src/SUMMARY.md +++ b/layouts/book/src/SUMMARY.md @@ -4,19 +4,21 @@ - [RDF Basics](rdf-basics.md) - [Data Model](data-model/README.md) - - [Values](data-model/values.md) + - [Literals](data-model/literals.md) + - [Records (Objects)](data-model/records.md) + - [Lists](data-model/lists.md) - [Types](data-model/types.md) - - [Paths](data-model/paths.md) + - [Layouts](layouts/README.md) + - [Literals](layouts/literals.md) - [Product (Record)](layouts/record.md) - [Sum](layouts/sum.md) - - [Literals](layouts/literals.md) - [Lists](layouts/lists.md) - - [Functional Layouts](layouts/functional-layouts.md) -- [Abstract Layouts](abstract-layouts/README.md) + + + - [Algorithms](algorithms/README.md) - [Serialization](algorithms/serialization.md) - [Deserialization](algorithms/deserialization.md) diff --git a/layouts/book/src/data-model/README.md b/layouts/book/src/data-model/README.md index 346bd44..732d4c8 100644 --- a/layouts/book/src/data-model/README.md +++ b/layouts/book/src/data-model/README.md @@ -1,8 +1,25 @@ # Data Model -Layouts define a mapping from RDF datasets to structured data. +Layouts define a mapping from RDF datasets to tree data. The RDF dataset model is already specified by the RDF specification. -This chapter details the data model for structured values. + +This section specifies all the *structured values* that can be processed and/or produced using TreeLDR layouts. A value can be either: + - a **literal** value, representing any atomic value; + - a **record**, representing a collection of key-value pairs; + - a **list**, representing a sequence of values. + +This data-model is close to the JSON data model, with some notable exceptions: + - The value space of numbers is all the rational numbers, and not just decimal numbers; + - Surrogate Unicode code points are not allowed in the lexical representation of text strings; + - There is a dedicated datatype for binary strings. + +This chapter details the data model for tree values. + +# Syntax In addition, to write the formal specification of layouts, we also define -a syntax for values, along with a type system. \ No newline at end of file +a syntax for values, along with a type system. + +```abnf +value = literal | record | list +``` \ No newline at end of file diff --git a/layouts/book/src/data-model/lists.md b/layouts/book/src/data-model/lists.md new file mode 100644 index 0000000..0a7a3d0 --- /dev/null +++ b/layouts/book/src/data-model/lists.md @@ -0,0 +1,10 @@ +# Lists + +The list datatype contains all the finite sequences of values. + +## Syntax + +```abnf +list-type = "[" [items] ws "]" +items = ws value | ws value ws "," ws items +``` \ No newline at end of file diff --git a/layouts/book/src/data-model/literals.md b/layouts/book/src/data-model/literals.md new file mode 100644 index 0000000..f618acb --- /dev/null +++ b/layouts/book/src/data-model/literals.md @@ -0,0 +1,100 @@ +# Literals + +A literal value can be: + - the unit value **unit** written `()` + - a **boolean** value, either `true` or `false`, + - a **number**, written as a decimal number (e.g. `12.9`), + - a **binary string** written as an hexadecimal value preceded by a `#` character, + - a **text string**, written between double quotes `"`. + +## Syntax + +The ABNF grammar of literals is as follows: + +```abnf +literal = unit | boolean | number | bytes | string +``` + +# Unit + +Unit is a singleton datatype. It contains one unique value, the unit value. +This value is very similar to JSON's `null` value. + +## Syntax + +The unit value is written using a pair of parentheses `()`. + +```abnf +unit = "()" +``` + +# Boolean + +The boolean datatype contains the two values `true` and `false`. + +## Syntax + +The ABNF grammar of boolean values is as follows: + +```abnf +boolean = "true" | "false" +``` + +# Number + +The number datatype contains all the [rational numbers (ℚ)](https://en.wikipedia.org/wiki/Rational_number). + +## Syntax + +Numbers are written either as decimal numbers, or as fractions of two integer +numbers. + +```abnf +number = decimal | fraction +decimal = +DIGIT [ "." DIGIT ] +fraction = +DIGIT "\" NZDIGIT +DIGIT + +NZDIGIT = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" +``` + +# Binary String + +The binary string datatype contains any string of bytes. + +## Syntax + +```abnf +bytes = "#" *( HEXDIGIT HEXDIGIT ) +``` + +# Text String + +The text string datatype contains any string of [Unicode scalar value](https://www.unicode.org/glossary/#unicode_scalar_value), which is any [Unicode code point](https://www.unicode.org/glossary/#code_point) other than a [surrogate code point](https://www.unicode.org/glossary/#surrogate_code_point). + +## Syntax + +A string is written as a sequence of characters between double quotes. Any +Unicode scalar value is allowed starting from U+0020 inclusive (the whitespace +character) except for U+0022 (the quotation mark) and U+005C (the reverse +solidus) which must be escaped along with control characters before U+0020. + +```abnf +string = quotation-mark *char quotation-mark + +char = unescaped + / escape ( + %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 "{" 1*6HEXDIG "}" ; u{XXXX} U+XXXX + ) + +escape = %x5C ; \ +quotation-mark = %x22 ; " +unescaped = %x20-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF +``` \ No newline at end of file diff --git a/layouts/book/src/data-model/records.md b/layouts/book/src/data-model/records.md new file mode 100644 index 0000000..f4b0204 --- /dev/null +++ b/layouts/book/src/data-model/records.md @@ -0,0 +1,13 @@ +# Records + +The record datatype contains all finite [partial functions](https://en.wikipedia.org/wiki/Partial_function) from keys to values, where keys are text string literals. + +## Syntax + +```abnf +record = "{" [bindings] ws "}" +bindings = ws binding | ws binding ws "," ws bindings +binding = key ws ":" ws value +key = string +ws = *WS +``` \ No newline at end of file diff --git a/layouts/book/src/data-model/types.md b/layouts/book/src/data-model/types.md index 9575a03..01bb613 100644 --- a/layouts/book/src/data-model/types.md +++ b/layouts/book/src/data-model/types.md @@ -1,28 +1,33 @@ # Types -A type is a set of values. +The data-model presented so far in this chapter is fundamentally *untyped*. +However, it can (and will in the next chapter) be useful to formally describe +a subset of values sharing a given *shape*. +In this section we define types as sets of tree values. -## Syntax +There are three primary sorts of types: + - Data-types, describing sets of literal values; + - Record types: describing sets of record values; + - List types: describing sets of list values. -```abnf -type = record-type | list-type | value -``` +In addition, it is possible to compose new types by union or intersection. -## Type reference +## Syntax -A type reference is a name referring to a type definition. +Just like for the data-model itself, we define a syntax for types. ```abnf +type = datatype | record-type | list-type type-ref = ALPHA *(ALPHA | DIGIT) +type-expr = type-ref | type ``` -## Type expression +### Type references and expressions -A type expression is either a type reference or definition. - -```abnf -type-expr = type-ref | type -``` +A type reference, corresponding to the `type-ref` production in the above +grammar, is a name referring to a type definition. +A type expression (`type-expr` production) is either a type reference or +definition. ## Datatype @@ -45,7 +50,7 @@ binding-type = key ws ":" ws type-expr For example: -``` +```ts { "id": string, "name": string diff --git a/layouts/book/src/data-model/values.md b/layouts/book/src/data-model/values.md index 87d8305..89a9695 100644 --- a/layouts/book/src/data-model/values.md +++ b/layouts/book/src/data-model/values.md @@ -1,135 +1 @@ # Values - -This section specifies all the *structured values* that can be processed and/or produced using TreeLDR layouts. A value can be either: - - a **literal** value, representing any atomic value; - - a **record**, representing a collection of key-value pairs; - - a **list**, representing a sequence of values. - -This data-model is close to the JSON data model, with some notable exceptions: - - The value space of numbers is all the rational numbers, and not just decimal numbers; - - Surrogate Unicode code points are not allowed in the lexical representation of text strings; - - There is a dedicated datatype for binary strings. - -## Syntax - -```abnf -value = literal | record | list -``` - -## Literals - -A literal value can be: - - the unit value **unit** written `()` - - a **boolean** value, either `true` or `false`, - - a **number**, written as a decimal number (e.g. `12.9`), - - a **binary string** written as an hexadecimal value preceded by a `#` character, - - a **text string**, written between double quotes `"`. - -### Syntax - -```abnf -literal = unit | boolean | number | bytes | string -``` - -## Unit - -Unit is a singleton datatype. - -### Syntax - -The unit value is written using a pair of parentheses `()`. - -```abnf -unit = "()" -``` - -## Boolean - -The boolean datatype contains the two values `true` and `false`. - -### Syntax - -```abnf -boolean = "true" | "false" -``` - -## Number - -The number datatype contains all the [rational numbers (ℚ)](https://en.wikipedia.org/wiki/Rational_number). - -### Syntax - -```abnf -number = decimal | fraction -decimal = +DIGIT [ "." DIGIT ] -fraction = +DIGIT \ NZDIGIT +DIGIT - -NZDIGIT = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -``` - -## Binary String - -The binary string datatype contains any string of bytes. - -### Syntax - -```abnf -bytes = "#" *( HEXDIGIT HEXDIGIT ) -``` - -## Text String - -The text string datatype contains any string of [Unicode scalar value](https://www.unicode.org/glossary/#unicode_scalar_value), which is any [Unicode code point](https://www.unicode.org/glossary/#code_point) other than a [surrogate code point](https://www.unicode.org/glossary/#surrogate_code_point). - -### Syntax - -A string is written as a sequence of characters between double quotes. Any -Unicode scalar value is allowed starting from U+0020 inclusive (the whitespace -character) except for U+0022 (the quotation mark) and U+005C (the reverse -solidus) which must be escaped along with control characters before U+0020. - -```abnf -string = quotation-mark *char quotation-mark - -char = unescaped - / escape ( - %x22 / ; " quotation mark U+0022 - %x5C / ; \ reverse solidus U+005C - %x2F / ; / solidus U+002F - %x62 / ; b backspace U+0008 - %x66 / ; f form feed U+000C - %x6E / ; n line feed U+000A - %x72 / ; r carriage return U+000D - %x74 / ; t tab U+0009 - %x75 "{" 1*6HEXDIG "}" ; u{XXXX} U+XXXX - ) - -escape = %x5C ; \ -quotation-mark = %x22 ; " -unescaped = %x20-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF -``` - -## Records - -The record datatype contains all finite [partial functions](https://en.wikipedia.org/wiki/Partial_function) from keys to values, where keys are text string literals. - -### Syntax - -```abnf -record = "{" [bindings] ws "}" -bindings = ws binding | ws binding ws "," ws bindings -binding = key ws ":" ws value -key = string -ws = *WS -``` - -## Lists - -The list datatype contains all the finite sequences of values. - -### Syntax - -```abnf -list-type = "[" [items] ws "]" -items = ws value | ws value ws "," ws items -``` \ No newline at end of file diff --git a/layouts/book/src/layouts/README.md b/layouts/book/src/layouts/README.md index 88fc3dd..f1b4c20 100644 --- a/layouts/book/src/layouts/README.md +++ b/layouts/book/src/layouts/README.md @@ -1,7 +1,7 @@ # Layouts A layout defines a bidirectional transformation from/to RDF datasets and -structured values (as defined in the [Values](/data-model/values.md) section). +tree values (as defined in the [Values](/data-model/values.md) section). Using a layout to transform an RDF dataset to a value is called *serialization*. The inverse transformation, from value to RDF dataset, is called *deserialization*. @@ -22,5 +22,5 @@ TODO ## Type Definition ```ts -type Layout = Never | LiteralLayout | ProductLayout | SumLayout | ListLayout | Always ; +type Layout = LiteralLayout | ProductLayout | SumLayout | ListLayout | Always | Never ; ``` \ No newline at end of file diff --git a/layouts/book/src/layouts/literals.md b/layouts/book/src/layouts/literals.md index 46a84db..beebe3b 100644 --- a/layouts/book/src/layouts/literals.md +++ b/layouts/book/src/layouts/literals.md @@ -1,19 +1,82 @@ # Literal Layouts +A literal layout matches any literal tree value and any RDF literal lexical +representation satisfying a given set of constraints. +Literal layouts are refined further into five categories corresponding to the +five primitive data-types defined by TreeLDR's data-model (unit, boolean, +number, binary string and text string). +The following table summarizes what matches a literal layout in the tree space, +and RDF space. + +| Literal layout type | Tree space | RDF space | +| ------------------- | ---------- | --------- | +| Unit | Unit, or any predefined constant | Any resource | +| Boolean | Any boolean | Any resource with a literal representation of type | +| Number | Any number | Any resource with a literal representation of type | +| Binary string | Any binary string | Any resource with a literal representation of type or | +| Text string | Any text string | Any resource with a literal representation | + +Literal layouts are represented by values of the following type: + +```ts +type LiteralLayout = + UnitLayout + | BooleanLayout + | NumberLayout + | BinaryStringLayout + | TextStringLayout +``` + ## Unit -TODO +The unit layout matches, in the tree space, the unit value (or any given +constant) and in the RDF space, any resource. + +Unit layouts are represented by values of the following type: + +```ts +type UnitLayout = LayoutDefinition & { + "type": "unit", + "const"?: Any +} +``` + +The optional `const` attribute specifies which tree value matches the layout. +The default value for the `const` attribute is the unit value `()`. ## Boolean -TODO +```ts +type BooleanLayout = LayoutDefinition & { + "type": "boolean", + "resource": Resource +} +``` ## Number -TODO +```ts +type NumberLayout = LayoutDefinition & { + "type": "number", + "resource": Resource +} +``` ## Binary String -TODO +```ts +type BinaryStringLayout = LayoutDefinition & { + "type": "bytes", + "resource": Resource +} +``` + +## Text String -## Text String \ No newline at end of file +```ts +type TextStringLayout = LayoutDefinition & { + "type": "string", + "resource": Resource, + "pattern"?: Regex +} +``` \ No newline at end of file diff --git a/layouts/book/src/layouts/record.md b/layouts/book/src/layouts/record.md new file mode 100644 index 0000000..c074218 --- /dev/null +++ b/layouts/book/src/layouts/record.md @@ -0,0 +1 @@ +# Product (Record) diff --git a/layouts/book/src/rdf-basics.md b/layouts/book/src/rdf-basics.md index 152e332..9e69c2a 100644 --- a/layouts/book/src/rdf-basics.md +++ b/layouts/book/src/rdf-basics.md @@ -5,48 +5,105 @@ In this model, every piece of data is a node in a labeled directed graph. Each node is called a **Resource**, and resources are connected together using *properties*, which are resources themselves. -TODO example +
+ +```dot process +digraph { + a -> b [label = "d"] + a -> c [label = "e"] +} +``` +
+ +In this example, `a`, `b`, `c`, `d` and `e` are all resources. ## Lexical Representations Resources are given *lexical representations* that uniquely identify them across the Web and give them meaning. There are three kind of lexical representations a resource can have: - - International Resource Identifiers (IRI), similar to URLs but with + - [International Resource Identifiers (IRI)][iri], similar to URLs but with international characters; Example: `https://example.org/Ῥόδος` - - Literal values (a text string, a number, etc.); - - Blank node identifiers, that locally identifies a resource in a give RDF - document. Blank node identifiers do not cross document boundaries and are - solely used to give a temporary name to nameless resources. + - [Literal values][literals] (a text string, a number, etc.); + - [Blank node identifiers][blank-ids], that locally identifies a resource in a + given RDF document. Blank node identifiers do not cross document boundaries + and are solely used to give a temporary name to nameless resources. Such identifiers are similar to IRIs with `_` as scheme. Example: `_:SomeAnonymousResource`. - -This lexical representation allows the definition of some -proper textual syntaxes for RDF, such as N-Triples or RDF-Turtle. The former is -a simple enumeration of the graph's edges, called triples. -A triple has three parameters: the subject resource, the property, and the -object (value) of the property. -TODO example +[iri]: +[literals]: +[blank-ids]: + +
+ +```dot process +digraph { + "https://example.org/#Alice" -> "_:Bob" [label = "https://example.org/#knows"] + "https://example.org/#Alice" -> "29" [label = "https://example.org/#age"] + "_:Bob" -> "https://example.org/#Alice" +} +``` +
+ +Lexical representation are also used to define of some proper textual syntaxes +for RDF, such as [N-Triples][n-triples] or [RDF-Turtle][rdf-turtle]. +In the document, we will primarily use the N-Triples syntax to write RDF +datasets. +In this syntax, a dataset is described by enumerating every edge of the graph, +a **triple** of the form `subject predicate object`, in sequence. +The `subject` is the starting point of the edge, the predicate the label of +the edge, and the object the endpoint of the edge. + +[n-triples]: +[rdf-turtle]: + +Here is the above graph written as N-Triples: +``` + _:Bob . + "29"^^http://www.w3.org/2001/XMLSchema#integer . +_:Bob . +``` ## Interpretations The mapping from lexical representation to resource is called an **interpretation**. It is a partial function mapping the set of lexical representations to the set of resources. +For instance, the following lexical dataset: + +
+ +```dot process +digraph { + rankdir="LR" + "_:Superman" -> "_:ClarkKent" [label = "http://www.w3.org/2002/07/owl#sameAs"] +} +``` +
-TODO illustration +can be *interpreted* into the following interpreted resource graph when the +terms `_:Superman` and `_:ClarkKent` are interpreted as the same resource +following the semantics of `http://www.w3.org/2002/07/owl#sameAs`: +
-As shown above, a resource may have more than one lexical representation. +```dot process +digraph { + rankdir="LR" + a [label=""] + a -> a +} +``` +
+ +As shown in this example, the same resource may have more than one lexical +representation. In this case the shape of the lexical representation of a graph may differ from its actual shape. -In the following graph, `_:A` and `_:B` have the same -interpretation (by definition of `http://www.w3.org/2002/07/owl#sameAs`). -As a result, its lexical form (on the left) contains two nodes, while its -interpreted form (on the right) contains only a single node. - -TODO example +Here since `_:Superman` and `_:ClarkKent` are interpreted as the same resource, +the lexical form of the graph (on top) contains two nodes, while its interpreted +form (on the bottom) contains only a single node. ## Datasets diff --git a/layouts/src/abs.rs b/layouts/src/abs.rs index 21d0692..30109ee 100644 --- a/layouts/src/abs.rs +++ b/layouts/src/abs.rs @@ -110,3 +110,7 @@ pub struct BuilderWithInterpretationMut<'a, V, I: Interpretation> { interpretation: &'a mut I, builder: &'a mut Builder, } + +fn is_false(b: &bool) -> bool { + !*b +} diff --git a/layouts/src/abs/syntax.rs b/layouts/src/abs/syntax.rs index 76fb334..ddcbde0 100644 --- a/layouts/src/abs/syntax.rs +++ b/layouts/src/abs/syntax.rs @@ -1697,6 +1697,7 @@ where intro: field.intro.len() as u32, value: field.value.build(context, &scope)?, dataset, + required: field.required, }, ); } @@ -1759,6 +1760,9 @@ pub struct Field { #[serde(default, skip_serializing_if = "Option::is_none")] pub property: Option, + + #[serde(default, skip_serializing_if = "super::is_false")] + pub required: bool, } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] diff --git a/layouts/src/distill/de.rs b/layouts/src/distill/de.rs index faa7765..3af2150 100644 --- a/layouts/src/distill/de.rs +++ b/layouts/src/distill/de.rs @@ -1,10 +1,10 @@ use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeMap, HashMap, HashSet}, hash::Hash, }; use crate::{ - layout::{DataLayout, LayoutType, ListLayout, LiteralLayout}, + layout::{DataLayout, LayoutType, ListLayout, LiteralLayout, ProductLayoutType}, Layout, Layouts, Literal, Pattern, Ref, Value, ValueFormat, }; use iref::IriBuf; @@ -45,6 +45,13 @@ pub enum Error { #[error("layout {0} not found")] LayoutNotFound(Ref), + + #[error("missing required field `{field_name}`")] + MissingRequiredField { + layout: Ref, + field_name: String, + value: BTreeMap, + }, } impl Error { @@ -58,6 +65,15 @@ impl Error { Self::DataAmbiguity => Error::DataAmbiguity, Self::TermAmbiguity(a) => Error::TermAmbiguity(a), Self::LayoutNotFound(layout_ref) => Error::LayoutNotFound(layout_ref.map(f)), + Self::MissingRequiredField { + layout, + field_name, + value, + } => Error::MissingRequiredField { + layout: layout.map(f), + field_name, + value, + }, } } } @@ -678,6 +694,16 @@ where } } + for (name, field) in &layout.fields { + if field.required && !value.contains_key(name) { + return Err(Error::MissingRequiredField { + layout: layout_ref.clone().cast(), + field_name: name.clone(), + value: value.clone(), + }); + } + } + Ok(()) } _ => Err(Error::IncompatibleLayout), diff --git a/layouts/src/distill/hy.rs b/layouts/src/distill/hy.rs index 653823a..b558770 100644 --- a/layouts/src/distill/hy.rs +++ b/layouts/src/distill/hy.rs @@ -1,7 +1,8 @@ use std::collections::BTreeMap; use crate::{ - layout::{LayoutType, ListLayout, LiteralLayout}, + abs::syntax::{OrderedListLayoutType, SizedListLayoutType}, + layout::{LayoutType, ListLayout, LiteralLayout, ProductLayoutType, SumLayoutType}, matching, pattern::Substitution, utils::QuadsExt, @@ -27,11 +28,11 @@ pub enum Error { #[error("invalid input count (expected {expected}, found {found})")] InvalidInputCount { expected: u32, found: u32 }, - #[error("data ambiguity")] - DataAmbiguity, + #[error("ambiguous {0}")] + DataAmbiguity(Box>), - #[error("missing data")] - MissingData, + #[error("missing required {0}")] + MissingData(Box>), #[error("unknown number datatype")] UnknownNumberDatatype(IriBuf), @@ -43,15 +44,60 @@ pub enum Error { LayoutNotFound(Ref), } -impl From for Error { - fn from(value: matching::Error) -> Self { +#[derive(Debug, thiserror::Error)] +pub enum DataFragment { + #[error("layout discriminant")] + Discriminant(Ref), + + #[error("variant `{variant_name}`")] + Variant { + layout: Ref, + variant_name: String, + }, + + #[error("field `{field_name}`")] + Field { + layout: Ref, + field_name: String, + }, + + #[error("list node")] + OrderedListNode { + layout: Ref, + head: R, + tail: R, + }, + + #[error("list item")] + SizedListItem { + layout: Ref, + index: usize, + }, +} + +impl Error { + fn from_matching_error(value: matching::Error, f: DataFragment) -> Self { match value { - matching::Error::Ambiguity => Self::DataAmbiguity, - matching::Error::Empty => Self::MissingData, + matching::Error::Ambiguity => Self::DataAmbiguity(Box::new(f)), + matching::Error::Empty => Self::MissingData(Box::new(f)), } } } +trait MatchingForFragment { + type Ok; + + fn for_fragment(self, f: impl FnOnce() -> DataFragment) -> Result>; +} + +impl MatchingForFragment for Result { + type Ok = T; + + fn for_fragment(self, f: impl FnOnce() -> DataFragment) -> Result> { + self.map_err(|e| Error::from_matching_error(e, f())) + } +} + /// Serialize the given RDF `dataset` using the provided `layout`. /// /// This is a simplified version of [`hydrate_with`] using the basic unit `()` @@ -131,7 +177,8 @@ where substitution.clone(), layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone()))?; let resource = layout .resource @@ -171,7 +218,8 @@ where substitution.clone(), layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone()))?; let mut failures = Vec::new(); let mut selected = None; @@ -185,7 +233,11 @@ where substitution.clone(), variant.dataset.quads().with_default_graph(current_graph), ) - .into_unique()?; + .into_unique() + .for_fragment(|| DataFragment::Variant { + layout: layout_ref.clone().cast(), + variant_name: variant.name.clone(), + })?; match variant_substitution { Some(variant_substitution) => { @@ -244,7 +296,8 @@ where substitution.clone(), layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone()))?; let mut record = BTreeMap::new(); @@ -257,7 +310,11 @@ where field_substitution, field.dataset.quads().with_default_graph(current_graph), ) - .into_unique()?; + .into_unique() + .for_fragment(|| DataFragment::Field { + layout: layout_ref.clone().cast(), + field_name: name.clone(), + })?; match field_substitution { Some(field_substitution) => { @@ -279,7 +336,12 @@ where record.insert(name.clone(), value); } None => { - // TODO check required fields + if field.required { + return Err(Error::MissingData(Box::new(DataFragment::Field { + layout: layout_ref.clone().cast(), + field_name: name.clone(), + }))); + } } } } @@ -297,7 +359,8 @@ where substitution, layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone()))?; item_substitution.intro(layout.item.intro); let matching = Matching::new( @@ -348,7 +411,8 @@ where substitution, layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone()))?; let mut head = layout.head.apply(&substitution).into_resource().unwrap(); let tail = layout.tail.apply(&substitution).into_resource().unwrap(); @@ -357,7 +421,7 @@ where while head != tail { let mut item_substitution = substitution.clone(); - item_substitution.push(Some(head)); // the head + item_substitution.push(Some(head.clone())); // the head let rest = item_substitution.intro(1 + layout.node.intro); // the rest, and other intro variables. let item_substitution = Matching::new( @@ -369,7 +433,12 @@ where .quads() .with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::OrderedListNode { + layout: layout_ref.clone().cast(), + head, + tail: tail.clone(), + })?; let item_inputs = select_inputs(&layout.node.value.input, &item_substitution); @@ -406,11 +475,12 @@ where substitution, layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone()))?; let mut items = Vec::with_capacity(layout.items.len()); - for item in &layout.items { + for (index, item) in layout.items.iter().enumerate() { let mut item_substitution = substitution.clone(); item_substitution.intro(item.intro); @@ -419,7 +489,11 @@ where item_substitution, item.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::SizedListItem { + layout: layout_ref.clone().cast(), + index, + })?; let item_inputs = select_inputs(&item.value.input, &item_substitution); let item_graph = diff --git a/layouts/src/distill/hy/data.rs b/layouts/src/distill/hy/data.rs index 5ad586e..e41e7c8 100644 --- a/layouts/src/distill/hy/data.rs +++ b/layouts/src/distill/hy/data.rs @@ -13,7 +13,7 @@ use rdf_types::{ }; use xsd_types::{lexical::Lexical, ParseXsd}; -use super::Error; +use super::{DataFragment, Error, MatchingForFragment}; pub fn hydrate_data( vocabulary: &V, @@ -41,7 +41,8 @@ where substitution.clone(), layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone().cast()))?; Ok(TypedLiteral::Unit(layout.const_.clone(), layout_ref.cast())) } @@ -54,7 +55,8 @@ where substitution.clone(), layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone().cast()))?; let resource = layout .resource @@ -97,7 +99,8 @@ where substitution.clone(), layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone().cast()))?; let resource = layout .resource @@ -140,7 +143,8 @@ where substitution.clone(), layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone().cast()))?; let resource = layout .resource @@ -185,7 +189,8 @@ where substitution.clone(), layout.dataset.quads().with_default_graph(current_graph), ) - .into_required_unique()?; + .into_required_unique() + .for_fragment(|| DataFragment::Discriminant(layout_ref.clone().cast()))?; let resource = layout .resource diff --git a/layouts/src/layout/product.rs b/layouts/src/layout/product.rs index 9aa33a4..865b8c3 100644 --- a/layouts/src/layout/product.rs +++ b/layouts/src/layout/product.rs @@ -64,6 +64,9 @@ pub struct Field { /// Dataset. pub dataset: Dataset, + + /// Whether or not the field is required. + pub required: bool, } impl PartialOrd for Field { diff --git a/layouts/tests/distill.rs b/layouts/tests/distill.rs index 0ddf5e2..0f08ab2 100644 --- a/layouts/tests/distill.rs +++ b/layouts/tests/distill.rs @@ -137,6 +137,28 @@ macro_rules! test { }; } +macro_rules! negative_test { + ($(#[$meta:meta])* $name:ident ($($e:expr),*)) => { + paste! { + $(#[$meta])* + #[test] + #[should_panic] + fn [] () { + hydrate(stringify!($name), [$($e),*]) + } + } + + paste! { + $(#[$meta])* + #[test] + #[should_panic] + fn [] () { + dehydrate(stringify!($name), [$($e),*]) + } + } + }; +} + test! { /// Simple record layout. t01 (Term::blank(BlankIdBuf::new("_:john_smith".to_string()).unwrap())) @@ -206,3 +228,8 @@ test! { /// Layout reference. t14 (Term::blank(BlankIdBuf::new("_:subject".to_string()).unwrap())) } + +negative_test! { + /// Missing required field. + e01 (Term::blank(BlankIdBuf::new("_:john_smith".to_string()).unwrap())) +} diff --git a/layouts/tests/distill/e01-in.nq b/layouts/tests/distill/e01-in.nq new file mode 100644 index 0000000..0ac30da --- /dev/null +++ b/layouts/tests/distill/e01-in.nq @@ -0,0 +1 @@ +_:john_smith "John Smith" . \ No newline at end of file diff --git a/layouts/tests/distill/e01-layout.json b/layouts/tests/distill/e01-layout.json new file mode 100644 index 0000000..d2364da --- /dev/null +++ b/layouts/tests/distill/e01-layout.json @@ -0,0 +1,16 @@ +{ + "type": "record", + "input": ["self"], + "fields": { + "name": { + "value": { "type": "string" }, + "property": "https://schema.org/name", + "required": true + }, + "email": { + "value": { "type": "string" }, + "property": "https://schema.org/email", + "required": true + } + } +} \ No newline at end of file diff --git a/layouts/tests/distill/e01-out.json b/layouts/tests/distill/e01-out.json new file mode 100644 index 0000000..7c3f615 --- /dev/null +++ b/layouts/tests/distill/e01-out.json @@ -0,0 +1,3 @@ +{ + "name": "John Smith" +} \ No newline at end of file