diff --git a/datapet_codegen_macro/Cargo.toml b/datapet_codegen_macro/Cargo.toml index ef8a3d5..6d3e702 100644 --- a/datapet_codegen_macro/Cargo.toml +++ b/datapet_codegen_macro/Cargo.toml @@ -10,6 +10,7 @@ proc-macro = true [dependencies] annotate-snippets = { version = "0.9", features = ["color"] } datapet_codegen = { path = "../datapet_codegen" } +datapet_lang = { path = "../datapet_lang" } proc-macro2 = "1" proc-macro-error = "1" quote = "1" diff --git a/datapet_codegen_macro/src/lib.rs b/datapet_codegen_macro/src/lib.rs index b01bce2..91c970a 100644 --- a/datapet_codegen_macro/src/lib.rs +++ b/datapet_codegen_macro/src/lib.rs @@ -1,8 +1,6 @@ -use annotate_snippets::{ - display_list::{DisplayList, FormatOptions}, - snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation}, -}; +use annotate_snippets::display_list::DisplayList; use datapet_codegen::ParseError; +use datapet_lang::snippet::snippet_for_input_and_part; use proc_macro2::Ident; use proc_macro_error::{abort_if_dirty, emit_error, proc_macro_error}; use quote::format_ident; @@ -12,89 +10,6 @@ use syn::{ Error, LitStr, }; -// Copied from https://github.com/botika/yarte -fn lines_offsets(s: &str) -> Vec { - let mut lines = vec![0]; - let mut prev = 0; - while let Some(len) = s[prev..].find('\n') { - prev += len + 1; - lines.push(prev); - } - lines -} - -// Inspired by from https://github.com/botika/yarte -fn slice_spans<'a>(input: &'a str, part: &'a str) -> (usize, (usize, usize), (usize, usize)) { - let (lo, hi) = { - let a = input.as_ptr(); - let b = part.as_ptr(); - if a <= b { - ( - b as usize - a as usize, - (b as usize - a as usize + part.len()).min(input.len()), - ) - } else { - panic!("not a part of input"); - } - }; - - let lines = lines_offsets(input); - - const CONTEXT: usize = 3; - - let lo_index = match lines.binary_search(&lo) { - Ok(index) => index, - Err(index) => index - 1, - } - .saturating_sub(CONTEXT); - let lo_line = lines[lo_index]; - let hi_index = match lines.binary_search(&hi) { - Ok(index) => index, - Err(index) => index, - }; - let hi_line = lines - .get(hi_index + CONTEXT) - .copied() - .unwrap_or(input.len()); - ( - lo_index + 1, - (lo_line, hi_line), - (lo - lo_line, hi - lo_line), - ) -} - -fn snippet_for_input_and_part<'a>(label: &'a str, input: &'a str, part: &'a str) -> Snippet<'a> { - let (line_start, (lo_line, hi_line), (lo, hi)) = slice_spans(input, part); - - let slice = Slice { - source: &input[lo_line..hi_line], - line_start, - origin: None, - annotations: vec![SourceAnnotation { - label, - range: (lo, hi), - annotation_type: AnnotationType::Error, - }], - fold: false, - }; - - Snippet { - title: Some(Annotation { - id: None, - label: None, - annotation_type: AnnotationType::Error, - }), - footer: vec![], - slices: vec![slice], - opt: FormatOptions { - // No color until https://github.com/rust-lang/rust-analyzer/issues/15443 - // has a proper fix. - color: false, - ..Default::default() - }, - } -} - struct ProcMacroErrorEmitter<'a> { def_type: &'a Ident, input: &'a str, diff --git a/datapet_lang/Cargo.toml b/datapet_lang/Cargo.toml index bb9fb25..91b332a 100644 --- a/datapet_lang/Cargo.toml +++ b/datapet_lang/Cargo.toml @@ -5,7 +5,12 @@ edition = "2021" rust-version = "1.65.0" [dependencies] +annotate-snippets = { version = "0.9", features = ["color"] } nom = "7" [dev-dependencies] +antinom = { git = "https://github.com/arnodb/antinom.git" } assert_matches = "1" +itertools = "0.10" +rand_chacha = { version = "0.3" } +rstest = "0.18.2" diff --git a/datapet_lang/src/lib.rs b/datapet_lang/src/lib.rs index 707076f..67bdf49 100644 --- a/datapet_lang/src/lib.rs +++ b/datapet_lang/src/lib.rs @@ -6,3 +6,4 @@ pub use nom; pub mod ast; pub mod parser; +pub mod snippet; diff --git a/datapet_lang/src/parser/fuzz_tests.rs b/datapet_lang/src/parser/fuzz_tests.rs new file mode 100644 index 0000000..c2c06ba --- /dev/null +++ b/datapet_lang/src/parser/fuzz_tests.rs @@ -0,0 +1,68 @@ +use annotate_snippets::display_list::DisplayList; +use antinom::rng::AntiNomRandRng; +use itertools::Itertools; +use rand_chacha::rand_core::SeedableRng; +use rstest::rstest; + +use crate::{ast::GraphDefinitionSignature, snippet::snippet_for_input_and_part}; + +use super::{fuzzer, SpannedResult}; + +const FUZZ_ITERATIONS: usize = 1000; + +macro_rules! fuzz_test { + ($seed: expr, $fuzzer:path, $parser:path) => { + for _ in 0..FUZZ_ITERATIONS { + let mut rng = AntiNomRandRng { + rng: if let Some(seed) = $seed { + rand_chacha::ChaCha8Rng::from_seed(seed) + } else { + rand_chacha::ChaCha8Rng::from_entropy() + }, + }; + println!( + "Seed: [{}]", + rng.rng + .get_seed() + .iter() + .map(|b| format!("0x{:02x}", b)) + .join(", ") + ); + let mut dtpt = String::new(); + $fuzzer(&mut rng, &mut dtpt); + if let Err(err) = $parser(&dtpt) { + let error = match &err { + nom::Err::Incomplete(_) => "incomplete".into(), + nom::Err::Error(err) | nom::Err::Failure(err) => err.kind.description(), + }; + let part = match &err { + nom::Err::Incomplete(_) => &dtpt, + nom::Err::Error(err) | nom::Err::Failure(err) => err.span, + }; + eprintln!( + "{}", + DisplayList::from(snippet_for_input_and_part(&error, &dtpt, part)) + ); + panic!("parse error {}", error); + } + } + }; +} + +#[rstest] +#[case(crate::parser::graph_definition_signature, None)] +fn fuzz_graph_definition_signature(#[case] parser: F, #[case] seed: Option<[u8; 32]>) +where + F: Fn(&str) -> SpannedResult<&str, GraphDefinitionSignature>, +{ + fuzz_test!(seed, fuzzer::graph_definition_signature, parser); +} + +#[rstest] +#[case(crate::parser::identifier, None)] +fn fuzz_identifier(#[case] parser: F, #[case] seed: Option<[u8; 32]>) +where + F: Fn(&str) -> SpannedResult<&str, &str>, +{ + fuzz_test!(seed, fuzzer::identifier, parser); +} diff --git a/datapet_lang/src/parser/fuzzer.rs b/datapet_lang/src/parser/fuzzer.rs new file mode 100644 index 0000000..5d8d40f --- /dev/null +++ b/datapet_lang/src/parser/fuzzer.rs @@ -0,0 +1,105 @@ +use antinom::{ + branch::alt, + bytes::complete::tag, + character::complete::{alpha1, alphanumeric1, multispace0}, + combinator::{cut, opt, recognize}, + multi::{many0, separated_list0}, + rng::AntiNomRng, + sequence::{delimited, pair, preceded, terminated, tuple}, + Buffer, Generator, +}; + +const MAX_FILTER_STREAMS: u8 = 5; +const MAX_IDENTIFIER_FRAGMENT_LENGTH: u8 = 3; +const MAX_IDENTIFIER_FRAGMENTS: u8 = 3; +const MAX_PARAMS: u8 = 5; +const MAX_SPACES: u8 = 3; + +pub fn graph_definition_signature(rng: &mut R, buffer: &mut String) +where + R: AntiNomRng, +{ + tuple(( + opt_streams0, + ps(identifier), + cut(pair(ps(params), ps(opt_streams0))), + )) + .gen(rng, buffer) +} + +pub fn params(rng: &mut R, buffer: &mut String) +where + R: AntiNomRng, +{ + delimited( + token("("), + ps(separated_list0(token(","), ds(identifier), MAX_PARAMS)), + token(")"), + ) + .gen(rng, buffer) +} + +pub fn opt_streams0(rng: &mut R, buffer: &mut String) +where + R: AntiNomRng, +{ + opt(preceded(token("["), cut(opened_streams0))).gen(rng, buffer) +} + +pub fn opened_streams0(rng: &mut R, buffer: &mut String) +where + R: AntiNomRng, +{ + terminated( + ps(separated_list0( + token(","), + ds(identifier), + MAX_FILTER_STREAMS, + )), + token("]"), + ) + .gen(rng, buffer) +} + +pub fn identifier(rng: &mut R, buffer: &mut String) +where + R: AntiNomRng, +{ + recognize(tuple(( + alt((alpha1(MAX_IDENTIFIER_FRAGMENT_LENGTH), tag("_"))), + many0( + alt((alphanumeric1(MAX_IDENTIFIER_FRAGMENT_LENGTH), tag("_"))), + MAX_IDENTIFIER_FRAGMENTS - 1, + ), + ))) + .gen(rng, buffer) +} + +fn token(token: T) -> impl Generator +where + R: AntiNomRng, + B: Buffer, + T: AsRef + Clone, +{ + tag(token) +} + +fn ds(f: F) -> impl Generator +where + R: AntiNomRng, + B: Buffer, + B::Char: From, + F: Generator, +{ + delimited(multispace0(MAX_SPACES), f, multispace0(MAX_SPACES)) +} + +fn ps(f: F) -> impl Generator +where + R: AntiNomRng, + B: Buffer, + B::Char: From, + F: Generator, +{ + preceded(multispace0(MAX_SPACES), f) +} diff --git a/datapet_lang/src/parser.rs b/datapet_lang/src/parser/mod.rs similarity index 79% rename from datapet_lang/src/parser.rs rename to datapet_lang/src/parser/mod.rs index 517e95e..cf1202d 100644 --- a/datapet_lang/src/parser.rs +++ b/datapet_lang/src/parser/mod.rs @@ -16,6 +16,9 @@ use crate::ast::{ StreamLineInput, StreamLineOutput, UseDeclaration, }; +#[cfg(test)] +pub mod fuzzer; + #[derive(Debug)] pub struct SpannedError { pub kind: SpannedErrorKind, @@ -70,8 +73,8 @@ pub fn module(input: &str) -> SpannedResult<&str, Module> { fn use_declaration(input: &str) -> SpannedResult<&str, UseDeclaration> { preceded( - terminated(tag("use"), multispace1), - cut(terminated(code, ps(tag(";")))), + terminated(token("use"), multispace1), + cut(terminated(use_tree, ps(token(";")))), ) .map(|use_tree| UseDeclaration { use_tree: use_tree.into(), @@ -79,6 +82,46 @@ fn use_declaration(input: &str) -> SpannedResult<&str, UseDeclaration> { .parse(input) } +fn use_tree(input: &str) -> SpannedResult<&str, &str> { + recognize(alt(( + recognize(pair( + simple_path, + cut(alt(( + recognize(pair(ds(token("::")), ts(use_sub_tree))), + recognize(opt(tuple(( + multispace1, + token("as"), + cut(tuple(( + multispace1, + alt((identifier, tag("_"))), + multispace0, + ))), + )))), + ))), + )), + recognize(pair(opt(ts(token("::"))), ts(use_sub_tree))), + ))) + .parse(input) +} + +fn use_sub_tree(input: &str) -> SpannedResult<&str, &str> { + recognize(alt(( + recognize(token("*")), + recognize(tuple(( + ts(token("{")), + cut(pair( + opt(tuple(( + ts(use_tree), + many0(pair(ts(token(",")), ts(use_tree))), + opt(ts(token(","))), + ))), + token("}"), + )), + ))), + ))) + .parse(input) +} + fn graph_definition(input: &str) -> SpannedResult<&str, GraphDefinition> { tuple(( opt(tag("pub")), @@ -476,3 +519,75 @@ mod tests { assert_eq!(ident, "caf"); } } + +#[cfg(test)] +mod fuzz_tests { + use annotate_snippets::display_list::DisplayList; + use antinom::rng::AntiNomRandRng; + use itertools::Itertools; + use rand_chacha::rand_core::SeedableRng; + use rstest::rstest; + + use crate::{ast::GraphDefinitionSignature, snippet::snippet_for_input_and_part}; + + use super::{fuzzer, SpannedResult}; + + const FUZZ_ITERATIONS: usize = 1000; + + macro_rules! fuzz_test { + ($seed: expr, $fuzzer:path, $parser:path) => { + for _ in 0..FUZZ_ITERATIONS { + let mut rng = AntiNomRandRng { + rng: if let Some(seed) = $seed { + rand_chacha::ChaCha8Rng::from_seed(seed) + } else { + rand_chacha::ChaCha8Rng::from_entropy() + }, + }; + println!( + "Seed: [{}]", + rng.rng + .get_seed() + .iter() + .map(|b| format!("0x{:02x}", b)) + .join(", ") + ); + let mut dtpt = String::new(); + $fuzzer(&mut rng, &mut dtpt); + if let Err(err) = $parser(&dtpt) { + let error = match &err { + nom::Err::Incomplete(_) => "incomplete".into(), + nom::Err::Error(err) | nom::Err::Failure(err) => err.kind.description(), + }; + let part = match &err { + nom::Err::Incomplete(_) => &dtpt, + nom::Err::Error(err) | nom::Err::Failure(err) => err.span, + }; + eprintln!( + "{}", + DisplayList::from(snippet_for_input_and_part(&error, &dtpt, part)) + ); + panic!("parse error {}", error); + } + } + }; + } + + #[rstest] + #[case(crate::parser::graph_definition_signature, None)] + fn fuzz_graph_definition_signature(#[case] parser: F, #[case] seed: Option<[u8; 32]>) + where + F: Fn(&str) -> SpannedResult<&str, GraphDefinitionSignature>, + { + fuzz_test!(seed, fuzzer::graph_definition_signature, parser); + } + + #[rstest] + #[case(crate::parser::identifier, None)] + fn fuzz_identifier(#[case] parser: F, #[case] seed: Option<[u8; 32]>) + where + F: Fn(&str) -> SpannedResult<&str, &str>, + { + fuzz_test!(seed, fuzzer::identifier, parser); + } +} diff --git a/datapet_lang/src/snippet.rs b/datapet_lang/src/snippet.rs new file mode 100644 index 0000000..70a43f9 --- /dev/null +++ b/datapet_lang/src/snippet.rs @@ -0,0 +1,91 @@ +use annotate_snippets::{ + display_list::FormatOptions, + snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation}, +}; + +// Copied from https://github.com/botika/yarte +fn lines_offsets(s: &str) -> Vec { + let mut lines = vec![0]; + let mut prev = 0; + while let Some(len) = s[prev..].find('\n') { + prev += len + 1; + lines.push(prev); + } + lines +} + +// Inspired by from https://github.com/botika/yarte +fn slice_spans<'a>(input: &'a str, part: &'a str) -> (usize, (usize, usize), (usize, usize)) { + let (lo, hi) = { + let a = input.as_ptr(); + let b = part.as_ptr(); + if a <= b { + ( + b as usize - a as usize, + (b as usize - a as usize + part.len()).min(input.len()), + ) + } else { + panic!("not a part of input"); + } + }; + + let lines = lines_offsets(input); + + const CONTEXT: usize = 3; + + let lo_index = match lines.binary_search(&lo) { + Ok(index) => index, + Err(index) => index - 1, + } + .saturating_sub(CONTEXT); + let lo_line = lines[lo_index]; + let hi_index = match lines.binary_search(&hi) { + Ok(index) => index, + Err(index) => index, + }; + let hi_line = lines + .get(hi_index + CONTEXT) + .copied() + .unwrap_or(input.len()); + ( + lo_index + 1, + (lo_line, hi_line), + (lo - lo_line, hi - lo_line), + ) +} + +pub fn snippet_for_input_and_part<'a>( + label: &'a str, + input: &'a str, + part: &'a str, +) -> Snippet<'a> { + let (line_start, (lo_line, hi_line), (lo, hi)) = slice_spans(input, part); + + let slice = Slice { + source: &input[lo_line..hi_line], + line_start, + origin: None, + annotations: vec![SourceAnnotation { + label, + range: (lo, hi), + annotation_type: AnnotationType::Error, + }], + fold: false, + }; + + Snippet { + title: Some(Annotation { + id: None, + label: Some(label), + annotation_type: AnnotationType::Error, + }), + footer: vec![], + slices: vec![slice], + opt: FormatOptions { + // No color until https://github.com/rust-lang/rust-analyzer/issues/15443 + // has a proper fix. + color: false, + ..Default::default() + }, + } +}