Skip to content

Commit

Permalink
Introduce fuzzers with antinom
Browse files Browse the repository at this point in the history
  • Loading branch information
arnodb committed Nov 8, 2023
1 parent a6b0849 commit 2696db7
Show file tree
Hide file tree
Showing 8 changed files with 390 additions and 89 deletions.
1 change: 1 addition & 0 deletions datapet_codegen_macro/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ proc-macro = true
[dependencies]
annotate-snippets = { version = "0.9", features = ["color"] }
datapet_codegen = { path = "../datapet_codegen" }
datapet_lang = { path = "../datapet_lang" }
proc-macro2 = "1"
proc-macro-error = "1"
quote = "1"
Expand Down
89 changes: 2 additions & 87 deletions datapet_codegen_macro/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use annotate_snippets::{
display_list::{DisplayList, FormatOptions},
snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation},
};
use annotate_snippets::display_list::DisplayList;
use datapet_codegen::ParseError;
use datapet_lang::snippet::snippet_for_input_and_part;
use proc_macro2::Ident;
use proc_macro_error::{abort_if_dirty, emit_error, proc_macro_error};
use quote::format_ident;
Expand All @@ -12,89 +10,6 @@ use syn::{
Error, LitStr,
};

// Copied from https://github.com/botika/yarte
fn lines_offsets(s: &str) -> Vec<usize> {
let mut lines = vec![0];
let mut prev = 0;
while let Some(len) = s[prev..].find('\n') {
prev += len + 1;
lines.push(prev);
}
lines
}

// Inspired by from https://github.com/botika/yarte
fn slice_spans<'a>(input: &'a str, part: &'a str) -> (usize, (usize, usize), (usize, usize)) {
let (lo, hi) = {
let a = input.as_ptr();
let b = part.as_ptr();
if a <= b {
(
b as usize - a as usize,
(b as usize - a as usize + part.len()).min(input.len()),
)
} else {
panic!("not a part of input");
}
};

let lines = lines_offsets(input);

const CONTEXT: usize = 3;

let lo_index = match lines.binary_search(&lo) {
Ok(index) => index,
Err(index) => index - 1,
}
.saturating_sub(CONTEXT);
let lo_line = lines[lo_index];
let hi_index = match lines.binary_search(&hi) {
Ok(index) => index,
Err(index) => index,
};
let hi_line = lines
.get(hi_index + CONTEXT)
.copied()
.unwrap_or(input.len());
(
lo_index + 1,
(lo_line, hi_line),
(lo - lo_line, hi - lo_line),
)
}

fn snippet_for_input_and_part<'a>(label: &'a str, input: &'a str, part: &'a str) -> Snippet<'a> {
let (line_start, (lo_line, hi_line), (lo, hi)) = slice_spans(input, part);

let slice = Slice {
source: &input[lo_line..hi_line],
line_start,
origin: None,
annotations: vec![SourceAnnotation {
label,
range: (lo, hi),
annotation_type: AnnotationType::Error,
}],
fold: false,
};

Snippet {
title: Some(Annotation {
id: None,
label: None,
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![slice],
opt: FormatOptions {
// No color until https://github.com/rust-lang/rust-analyzer/issues/15443
// has a proper fix.
color: false,
..Default::default()
},
}
}

struct ProcMacroErrorEmitter<'a> {
def_type: &'a Ident,
input: &'a str,
Expand Down
5 changes: 5 additions & 0 deletions datapet_lang/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ edition = "2021"
rust-version = "1.65.0"

[dependencies]
annotate-snippets = { version = "0.9", features = ["color"] }
nom = "7"

[dev-dependencies]
antinom = { git = "https://github.com/arnodb/antinom.git" }
assert_matches = "1"
itertools = "0.10"
rand_chacha = { version = "0.3" }
rstest = "0.18.2"
1 change: 1 addition & 0 deletions datapet_lang/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ pub use nom;

pub mod ast;
pub mod parser;
pub mod snippet;
68 changes: 68 additions & 0 deletions datapet_lang/src/parser/fuzz_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use annotate_snippets::display_list::DisplayList;
use antinom::rng::AntiNomRandRng;
use itertools::Itertools;
use rand_chacha::rand_core::SeedableRng;
use rstest::rstest;

use crate::{ast::GraphDefinitionSignature, snippet::snippet_for_input_and_part};

use super::{fuzzer, SpannedResult};

const FUZZ_ITERATIONS: usize = 1000;

macro_rules! fuzz_test {
($seed: expr, $fuzzer:path, $parser:path) => {
for _ in 0..FUZZ_ITERATIONS {
let mut rng = AntiNomRandRng {
rng: if let Some(seed) = $seed {
rand_chacha::ChaCha8Rng::from_seed(seed)
} else {
rand_chacha::ChaCha8Rng::from_entropy()
},
};
println!(
"Seed: [{}]",
rng.rng
.get_seed()
.iter()
.map(|b| format!("0x{:02x}", b))
.join(", ")
);
let mut dtpt = String::new();
$fuzzer(&mut rng, &mut dtpt);
if let Err(err) = $parser(&dtpt) {
let error = match &err {
nom::Err::Incomplete(_) => "incomplete".into(),
nom::Err::Error(err) | nom::Err::Failure(err) => err.kind.description(),
};
let part = match &err {
nom::Err::Incomplete(_) => &dtpt,
nom::Err::Error(err) | nom::Err::Failure(err) => err.span,
};
eprintln!(
"{}",
DisplayList::from(snippet_for_input_and_part(&error, &dtpt, part))
);
panic!("parse error {}", error);
}
}
};
}

#[rstest]
#[case(crate::parser::graph_definition_signature, None)]
fn fuzz_graph_definition_signature<F>(#[case] parser: F, #[case] seed: Option<[u8; 32]>)
where
F: Fn(&str) -> SpannedResult<&str, GraphDefinitionSignature>,
{
fuzz_test!(seed, fuzzer::graph_definition_signature, parser);
}

#[rstest]
#[case(crate::parser::identifier, None)]
fn fuzz_identifier<F>(#[case] parser: F, #[case] seed: Option<[u8; 32]>)
where
F: Fn(&str) -> SpannedResult<&str, &str>,
{
fuzz_test!(seed, fuzzer::identifier, parser);
}
105 changes: 105 additions & 0 deletions datapet_lang/src/parser/fuzzer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
use antinom::{
branch::alt,
bytes::complete::tag,
character::complete::{alpha1, alphanumeric1, multispace0},
combinator::{cut, opt, recognize},
multi::{many0, separated_list0},
rng::AntiNomRng,
sequence::{delimited, pair, preceded, terminated, tuple},
Buffer, Generator,
};

const MAX_FILTER_STREAMS: u8 = 5;
const MAX_IDENTIFIER_FRAGMENT_LENGTH: u8 = 3;
const MAX_IDENTIFIER_FRAGMENTS: u8 = 3;
const MAX_PARAMS: u8 = 5;
const MAX_SPACES: u8 = 3;

pub fn graph_definition_signature<R>(rng: &mut R, buffer: &mut String)
where
R: AntiNomRng,
{
tuple((
opt_streams0,
ps(identifier),
cut(pair(ps(params), ps(opt_streams0))),
))
.gen(rng, buffer)
}

pub fn params<R>(rng: &mut R, buffer: &mut String)
where
R: AntiNomRng,
{
delimited(
token("("),
ps(separated_list0(token(","), ds(identifier), MAX_PARAMS)),
token(")"),
)
.gen(rng, buffer)
}

pub fn opt_streams0<R>(rng: &mut R, buffer: &mut String)
where
R: AntiNomRng,
{
opt(preceded(token("["), cut(opened_streams0))).gen(rng, buffer)
}

pub fn opened_streams0<R>(rng: &mut R, buffer: &mut String)
where
R: AntiNomRng,
{
terminated(
ps(separated_list0(
token(","),
ds(identifier),
MAX_FILTER_STREAMS,
)),
token("]"),
)
.gen(rng, buffer)
}

pub fn identifier<R>(rng: &mut R, buffer: &mut String)
where
R: AntiNomRng,
{
recognize(tuple((
alt((alpha1(MAX_IDENTIFIER_FRAGMENT_LENGTH), tag("_"))),
many0(
alt((alphanumeric1(MAX_IDENTIFIER_FRAGMENT_LENGTH), tag("_"))),
MAX_IDENTIFIER_FRAGMENTS - 1,
),
)))
.gen(rng, buffer)
}

fn token<R, B, T>(token: T) -> impl Generator<R, B>
where
R: AntiNomRng,
B: Buffer,
T: AsRef<B::Slice> + Clone,
{
tag(token)
}

fn ds<R, B, F>(f: F) -> impl Generator<R, B>
where
R: AntiNomRng,
B: Buffer,
B::Char: From<u8>,
F: Generator<R, B>,
{
delimited(multispace0(MAX_SPACES), f, multispace0(MAX_SPACES))
}

fn ps<R, B, F>(f: F) -> impl Generator<R, B>
where
R: AntiNomRng,
B: Buffer,
B::Char: From<u8>,
F: Generator<R, B>,
{
preceded(multispace0(MAX_SPACES), f)
}
Loading

0 comments on commit 2696db7

Please sign in to comment.