From 282bf5ec3bdfe10970494cc23565708acd27984b Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 1 Jan 2025 22:27:37 +0000 Subject: [PATCH] =?UTF-8?q?Overhauled=20error=20system=20to=20make=20label?= =?UTF-8?q?s=20a=20first-class=20abstraction=20and=20=E2=80=A6=20(#712)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Overhauled error system to make labels a first-class abstraction and minimise missing labels * Handle keywords more elegantly * Fixed tests * Fixed broken tests * Appease clippy * Added MSRV to Cargo.toml * Fixed MSRV violation --- Cargo.toml | 9 +- benches/cbor.rs | 3 +- examples/io.rs | 12 +- src/combinator.rs | 18 ++- src/error.rs | 379 +++++++++++++++++++++++----------------------- src/extension.rs | 12 +- src/input.rs | 56 +++++-- src/label.rs | 46 +++++- src/lib.rs | 138 ++++++++++------- src/number.rs | 6 +- src/pratt.rs | 6 +- src/primitive.rs | 24 +-- src/regex.rs | 2 +- src/stream.rs | 2 +- src/text.rs | 200 ++++++++++++++++++++---- 15 files changed, 598 insertions(+), 315 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0feb3bd0..87616f53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ exclude = [ "/benches/samples/*", ] build = "build.rs" +rust-version = "1.65" [features] default = ["std", "stacker"] @@ -35,9 +36,6 @@ memoization = [] # Allows extending chumsky by writing your own parser implementations. extension = [] -# Enable support for parser labelling -label = [] - # Make builtin parsers such as `Boxed` use atomic instead of non-atomic internals. sync = ["spin"] @@ -65,7 +63,7 @@ docsrs = ["dep:vergen-gix"] # An alias of all features that work with the stable compiler. # Do not use this feature, its removal is not considered a breaking change and its behaviour may change. # If you're working on chumsky and you're adding a feature that does not require nightly support, please add it to this list. -_test_stable = ["std", "stacker", "memoization", "extension", "label", "sync"] +_test_stable = ["std", "stacker", "memoization", "extension", "sync"] [package.metadata.docs.rs] all-features = true @@ -129,7 +127,6 @@ harness = false [[example]] name = "nano_rust" -required-features = ["label"] [[example]] name = "json" @@ -145,4 +142,4 @@ required-features = ["std"] [[example]] name = "mini_ml" -required-features = ["pratt", "label"] +required-features = ["pratt"] diff --git a/benches/cbor.rs b/benches/cbor.rs index 55df4e32..c5f34c71 100644 --- a/benches/cbor.rs +++ b/benches/cbor.rs @@ -1,5 +1,4 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use std::hint::black_box; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; mod utils; diff --git a/examples/io.rs b/examples/io.rs index 403743c0..a8905f5a 100644 --- a/examples/io.rs +++ b/examples/io.rs @@ -1,8 +1,5 @@ -use chumsky::extra::ParserExtra; -use chumsky::input::IoInput; -use chumsky::prelude::*; -use std::env; -use std::fs::File; +use chumsky::{error::LabelError, extra::ParserExtra, input::IoInput, prelude::*, util::MaybeRef}; +use std::{env, fs::File}; #[allow(unused)] #[derive(Debug)] @@ -29,7 +26,10 @@ fn digits<'a, E: ParserExtra<'a, IoInput>>() -> impl Parser<'a, IoInput>>() -> impl Parser<'a, IoInput, Vec, E> { +fn parser<'a, E: ParserExtra<'a, IoInput>>() -> impl Parser<'a, IoInput, Vec, E> +where + E::Error: LabelError<'a, IoInput, MaybeRef<'a, u8>>, +{ group((ident(), just(b':').padded(), digits())) .map(|(name, _, digits)| Foo { name, diff --git a/src/combinator.rs b/src/combinator.rs index 39e4b762..f88d12f7 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -254,7 +254,7 @@ where Ok(M::bind(|| out)) } else { let err_span = inp.span_since(&before); - inp.add_alt(None, None, err_span); + inp.add_alt([DefaultExpected::SomethingElse], None, err_span); Err(()) } }) @@ -814,7 +814,8 @@ where inp.add_alt_err(&before.inner /*&err.pos*/, err.err); } else { let err_span = inp.span_since(&before); - inp.add_alt(None, None, err_span); + // TODO: Is this an appropriate way to handle infinite recursion? + inp.add_alt([], None, err_span); } return Err(()); } @@ -1992,7 +1993,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { - let before = inp.cursor(); + // let before = inp.cursor(); let mut output = M::bind(|| C::uninit()); let mut iter_state = self.parser.make_iter::(inp)?; for idx in 0..C::LEN { @@ -2001,8 +2002,9 @@ where M::combine_mut(&mut output, out, |c, out| C::write(c, idx, out)); } Ok(None) => { - let span = inp.span_since(&before); - inp.add_alt(None, None, span); + // let span = inp.span_since(&before); + // We don't add an alt here because we assume the inner parser will. Is this safe to assume? + // inp.add_alt([ExpectedMoreElements(Some(C::LEN - idx))], None, span); // SAFETY: We're guaranteed to have initialized up to `idx` values M::map(output, |mut output| unsafe { C::drop_before(&mut output, idx) @@ -2131,7 +2133,11 @@ where match result { Ok(()) => { let found = inp.next_inner(); - inp.add_alt(None, found.map(|f| f.into()), result_span); + inp.add_alt( + [DefaultExpected::SomethingElse], + found.map(|f| f.into()), + result_span, + ); Err(()) } Err(()) => Ok(M::bind(|| ())), diff --git a/src/error.rs b/src/error.rs index 8670e07d..30e53d11 100644 --- a/src/error.rs +++ b/src/error.rs @@ -7,8 +7,9 @@ //! like [`Cheap`], [`Simple`] or [`Rich`]. use super::*; -#[cfg(not(feature = "std"))] -use alloc::string::ToString; +use alloc::{borrow::Cow, string::ToString}; + +pub use label::LabelError; /// A trait that describes parser error types. /// @@ -19,7 +20,7 @@ use alloc::string::ToString; /// # Examples /// /// ``` -/// use chumsky::{prelude::*, error::Error, util::MaybeRef}; +/// use chumsky::{prelude::*, error::{Error, LabelError}, util::MaybeRef, DefaultExpected}; /// type Span = SimpleSpan; /// /// // A custom error type @@ -27,25 +28,13 @@ use alloc::string::ToString; /// enum MyError { /// ExpectedFound { /// span: Span, -/// expected: Vec>, +/// expected: Vec>, /// found: Option, /// }, /// NotADigit(Span, char), /// } /// /// impl<'a> Error<'a, &'a str> for MyError { -/// fn expected_found>>>( -/// expected: Iter, -/// found: Option>, -/// span: Span, -/// ) -> Self { -/// Self::ExpectedFound { -/// span, -/// expected: expected.into_iter().map(|e| e.as_deref().copied()).collect(), -/// found: found.as_deref().copied(), -/// } -/// } -/// /// fn merge(mut self, mut other: Self) -> Self { /// if let (Self::ExpectedFound { expected, .. }, Self::ExpectedFound { expected: expected_other, .. }) = ( /// &mut self, @@ -57,6 +46,23 @@ use alloc::string::ToString; /// } /// } /// +/// impl<'a> LabelError<'a, &'a str, DefaultExpected<'a, char>> for MyError { +/// fn expected_found>>( +/// expected: Iter, +/// found: Option>, +/// span: Span, +/// ) -> Self { +/// Self::ExpectedFound { +/// span, +/// expected: expected +/// .into_iter() +/// .map(|e| e.into_owned()) +/// .collect(), +/// found: found.as_deref().copied(), +/// } +/// } +/// } +/// /// let numeral = any::<_, extra::Err>().try_map(|c: char, span| match c.to_digit(10) { /// Some(x) => Ok(x), /// None => Err(MyError::NotADigit(span, c)), @@ -67,46 +73,15 @@ use alloc::string::ToString; /// assert_eq!(numeral.parse("f").into_errors(), vec![MyError::NotADigit((0..1).into(), 'f')]); /// ``` // TODO: Add support for more specialised kinds of error: unclosed delimiters, and more -pub trait Error<'a, I: Input<'a>>: Sized { - /// Create a new error describing a conflict between expected inputs and that which was actually found. - /// - /// `found` having the value `None` indicates that the end of input was reached, but was not expected. - /// - /// An expected input having the value `None` indicates that the end of input was expected. - fn expected_found>>>( - expected: E, - found: Option>, - span: I::Span, - ) -> Self; - +pub trait Error<'a, I: Input<'a>>: + Sized + LabelError<'a, I, DefaultExpected<'a, I::Token>> +{ /// Merge two errors that point to the same input together, combining their information. #[inline(always)] fn merge(self, other: Self) -> Self { #![allow(unused_variables)] self } - - /// Fast path for `a.merge(Error::expected_found(...))` that may incur less overhead by, for example, reusing allocations. - #[inline(always)] - fn merge_expected_found>>>( - self, - expected: E, - found: Option>, - span: I::Span, - ) -> Self { - self.merge(Self::expected_found(expected, found, span)) - } - - /// Fast path for `a = Error::expected_found(...)` that may incur less overhead by, for example, reusing allocations. - #[inline(always)] - fn replace_expected_found>>>( - self, - expected: E, - found: Option>, - span: I::Span, - ) -> Self { - Self::expected_found(expected, found, span) - } } /// A ZST error type that tracks only whether a parse error occurred at all. This type is for when @@ -115,9 +90,11 @@ pub trait Error<'a, I: Input<'a>>: Sized { #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Copy, Clone, Default)] pub struct EmptyErr(()); -impl<'a, I: Input<'a>> Error<'a, I> for EmptyErr { +impl<'a, I: Input<'a>> Error<'a, I> for EmptyErr {} + +impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for EmptyErr { #[inline(always)] - fn expected_found>>>( + fn expected_found>( _: E, _: Option>, _: I::Span, @@ -147,9 +124,11 @@ impl Cheap { } } -impl<'a, I: Input<'a>> Error<'a, I> for Cheap { +impl<'a, I: Input<'a>> Error<'a, I> for Cheap {} + +impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Cheap { #[inline] - fn expected_found>>>( + fn expected_found>( _expected: E, _found: Option>, span: I::Span, @@ -214,9 +193,11 @@ impl<'a, T, S> Simple<'a, T, S> { } } -impl<'a, I: Input<'a>> Error<'a, I> for Simple<'a, I::Token, I::Span> { +impl<'a, I: Input<'a>> Error<'a, I> for Simple<'a, I::Token, I::Span> {} + +impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Simple<'a, I::Token, I::Span> { #[inline] - fn expected_found>>>( + fn expected_found>( _expected: E, found: Option>, span: I::Span, @@ -251,39 +232,105 @@ where /// An expected pattern for a [`Rich`] error. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum RichPattern<'a, T, L = &'static str> { - /// A specific token was expected. +pub enum RichPattern<'a, T> { + /// A specific token. Token(MaybeRef<'a, T>), - /// A labelled pattern was expected. - Label(L), - /// The end of input was expected. + /// A labelled pattern. + Label(Cow<'a, str>), + /// A specific keyword. + Identifier(String), + /// Anything other than the end of input. + Any, + /// Something other than the provided input. + SomethingElse, + /// The end of input. EndOfInput, } -impl<'a, T, L> RichPattern<'a, T, L> { +impl<'a, T> From> for RichPattern<'a, T> { + fn from(expected: DefaultExpected<'a, T>) -> Self { + match expected { + DefaultExpected::Token(tok) => Self::Token(tok), + DefaultExpected::Any => Self::Any, + DefaultExpected::SomethingElse => Self::SomethingElse, + DefaultExpected::EndOfInput => Self::EndOfInput, + } + } +} + +impl<'a, I: StrInput<'a>, T> From> for RichPattern<'a, T> +where + I::Token: Char, +{ + fn from(expected: text::TextExpected<'a, I>) -> Self { + match expected { + text::TextExpected::Whitespace => Self::Label(Cow::Borrowed("whitespace")), + text::TextExpected::InlineWhitespace => Self::Label(Cow::Borrowed("inline whitespace")), + text::TextExpected::Newline => Self::Label(Cow::Borrowed("newline")), + text::TextExpected::Digit(r) if r.start > 0 => { + Self::Label(Cow::Borrowed("non-zero digit")) + } + text::TextExpected::Digit(_) => Self::Label(Cow::Borrowed("digit")), + text::TextExpected::IdentifierPart => Self::Label(Cow::Borrowed("identifier")), + text::TextExpected::Identifier(i) => Self::Identifier(I::stringify(i)), + } + } +} + +impl<'a, T> From> for RichPattern<'a, T> { + fn from(tok: MaybeRef<'a, T>) -> Self { + Self::Token(tok) + } +} + +impl From<&'static str> for RichPattern<'_, T> { + fn from(label: &'static str) -> Self { + Self::Label(Cow::Borrowed(label)) + } +} + +impl From for RichPattern<'_, T> { + fn from(label: String) -> Self { + Self::Label(Cow::Owned(label)) + } +} + +impl From for RichPattern<'_, char> { + fn from(c: char) -> Self { + Self::Token(MaybeRef::Val(c)) + } +} + +impl<'a, T> RichPattern<'a, T> { /// Transform this pattern's tokens using the given function. /// /// This is useful when you wish to combine errors from multiple compilation passes (lexing and parsing, say) where /// the token type for each pass is different (`char` vs `MyToken`, say). - pub fn map_token U>(self, mut f: F) -> RichPattern<'a, U, L> + pub fn map_token U>(self, mut f: F) -> RichPattern<'a, U> where T: Clone, { match self { Self::Token(t) => RichPattern::Token(f(t.into_inner()).into()), - Self::Label(s) => RichPattern::Label(s), + Self::Label(l) => RichPattern::Label(l), + Self::Identifier(i) => RichPattern::Identifier(i), + Self::Any => RichPattern::Any, + Self::SomethingElse => RichPattern::SomethingElse, Self::EndOfInput => RichPattern::EndOfInput, } } /// Convert this pattern into an owned version of itself by cloning any borrowed internal tokens, if necessary. - pub fn into_owned<'b>(self) -> RichPattern<'b, T, L> + pub fn into_owned<'b>(self) -> RichPattern<'b, T> where T: Clone, { match self { Self::Token(tok) => RichPattern::Token(tok.into_owned()), - Self::Label(label) => RichPattern::Label(label), + Self::Label(l) => RichPattern::Label(Cow::Owned(l.into_owned())), + Self::Identifier(i) => RichPattern::Identifier(i), + Self::Any => RichPattern::Any, + Self::SomethingElse => RichPattern::SomethingElse, Self::EndOfInput => RichPattern::EndOfInput, } } @@ -292,7 +339,6 @@ impl<'a, T, L> RichPattern<'a, T, L> { &self, f: &mut fmt::Formatter, mut fmt_token: impl FnMut(&T, &mut fmt::Formatter<'_>) -> fmt::Result, - mut fmt_label: impl FnMut(&L, &mut fmt::Formatter<'_>) -> fmt::Result, ) -> fmt::Result { match self { Self::Token(tok) => { @@ -300,49 +346,41 @@ impl<'a, T, L> RichPattern<'a, T, L> { fmt_token(tok, f)?; write!(f, "'") } - Self::Label(label) => fmt_label(label, f), + Self::Label(l) => write!(f, "{l}"), + Self::Identifier(i) => write!(f, "'{i}'"), + Self::Any => write!(f, "any"), + Self::SomethingElse => write!(f, "something else"), Self::EndOfInput => write!(f, "end of input"), } } } -impl fmt::Debug for RichPattern<'_, T, L> +impl fmt::Debug for RichPattern<'_, T> where T: fmt::Debug, - L: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::Token(t) => write!(f, "{t:?}"), - Self::Label(label) => write!(f, "{label:?}"), - Self::EndOfInput => write!(f, "end of input"), - } + self.write(f, |t, f| write!(f, "{t:?}")) } } -impl fmt::Display for RichPattern<'_, T, L> +impl fmt::Display for RichPattern<'_, T> where T: fmt::Display, - L: fmt::Display, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Token(t) => write!(f, "'{}'", &**t), - Self::Label(s) => write!(f, "{s}"), - Self::EndOfInput => write!(f, "end of input"), - } + self.write(f, |t, f| write!(f, "'{t}'")) } } -// TODO: Maybe should make ExpectedFound encapsulated a bit more /// The reason for a [`Rich`] error. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum RichReason<'a, T, L = &'static str> { +pub enum RichReason<'a, T> { /// An unexpected input was found ExpectedFound { /// The tokens expected - expected: Vec>, + expected: Vec>, /// The tokens found found: Option>, }, @@ -350,7 +388,7 @@ pub enum RichReason<'a, T, L = &'static str> { Custom(String), } -impl<'a, T, L> RichReason<'a, T, L> { +impl<'a, T> RichReason<'a, T> { /// Return the token that was found by this error reason. `None` implies that the end of input was expected. pub fn found(&self) -> Option<&T> { match self { @@ -360,7 +398,7 @@ impl<'a, T, L> RichReason<'a, T, L> { } /// Convert this reason into an owned version of itself by cloning any borrowed internal tokens, if necessary. - pub fn into_owned<'b>(self) -> RichReason<'b, T, L> + pub fn into_owned<'b>(self) -> RichReason<'b, T> where T: Clone, { @@ -373,7 +411,6 @@ impl<'a, T, L> RichReason<'a, T, L> { } } - #[cfg(feature = "label")] fn take_found(&mut self) -> Option> { match self { RichReason::ExpectedFound { found, .. } => found.take(), @@ -385,7 +422,7 @@ impl<'a, T, L> RichReason<'a, T, L> { /// /// This is useful when you wish to combine errors from multiple compilation passes (lexing and parsing, say) where /// the token type for each pass is different (`char` vs `MyToken`, say). - pub fn map_token U>(self, mut f: F) -> RichReason<'a, U, L> + pub fn map_token U>(self, mut f: F) -> RichReason<'a, U> where T: Clone, { @@ -406,9 +443,8 @@ impl<'a, T, L> RichReason<'a, T, L> { f: &mut fmt::Formatter<'_>, mut fmt_token: impl FnMut(&T, &mut fmt::Formatter<'_>) -> fmt::Result, mut fmt_span: impl FnMut(&S, &mut fmt::Formatter<'_>) -> fmt::Result, - mut fmt_label: impl FnMut(&L, &mut fmt::Formatter<'_>) -> fmt::Result, span: Option<&S>, - #[cfg(feature = "label")] context: &[(L, S)], + context: &[(RichPattern<'a, T>, S)], ) -> fmt::Result { match self { RichReason::ExpectedFound { expected, found } => { @@ -421,17 +457,14 @@ impl<'a, T, L> RichReason<'a, T, L> { write!(f, " expected ")?; match &expected[..] { [] => write!(f, "something else")?, - [expected] => expected.write(f, &mut fmt_token, &mut fmt_label)?, + [expected] => expected.write(f, &mut fmt_token)?, _ => { for expected in &expected[..expected.len() - 1] { - expected.write(f, &mut fmt_token, &mut fmt_label)?; + expected.write(f, &mut fmt_token)?; write!(f, ", ")?; } write!(f, "or ")?; - expected - .last() - .unwrap() - .write(f, &mut fmt_token, &mut fmt_label)?; + expected.last().unwrap().write(f, &mut fmt_token)?; } } } @@ -443,10 +476,9 @@ impl<'a, T, L> RichReason<'a, T, L> { } } } - #[cfg(feature = "label")] for (l, s) in context { write!(f, " in ")?; - fmt_label(l, f)?; + l.write(f, &mut fmt_token)?; write!(f, " at ")?; fmt_span(s, f)?; } @@ -454,10 +486,9 @@ impl<'a, T, L> RichReason<'a, T, L> { } } -impl RichReason<'_, T, L> +impl RichReason<'_, T> where T: PartialEq, - L: PartialEq, { #[inline] fn flat_merge(self, other: Self) -> Self { @@ -493,21 +524,12 @@ where } } -impl fmt::Display for RichReason<'_, T, L> +impl fmt::Display for RichReason<'_, T> where T: fmt::Display, - L: fmt::Display, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.inner_fmt( - f, - T::fmt, - |_: &(), _| Ok(()), - L::fmt, - None, - #[cfg(feature = "label")] - &[], - ) + self.inner_fmt(f, T::fmt, |_: &(), _| Ok(()), None, &[]) } } @@ -516,42 +538,37 @@ where /// Please note that it uses a [`Vec`] to remember expected symbols. If you find this to be too slow, you can /// implement [`Error`] for your own error type or use [`Simple`] instead. #[derive(Clone, PartialEq, Eq, Hash)] -pub struct Rich<'a, T, S = SimpleSpan, L = &'static str> { +pub struct Rich<'a, T, S = SimpleSpan> { span: S, - reason: Box>, - #[cfg(feature = "label")] - context: Vec<(L, S)>, + reason: Box>, + context: Vec<(RichPattern<'a, T>, S)>, } -impl Rich<'_, T, S, L> { +impl Rich<'_, T, S> { fn inner_fmt( &self, f: &mut fmt::Formatter<'_>, fmt_token: impl FnMut(&T, &mut fmt::Formatter<'_>) -> fmt::Result, fmt_span: impl FnMut(&S, &mut fmt::Formatter<'_>) -> fmt::Result, - fmt_label: impl FnMut(&L, &mut fmt::Formatter<'_>) -> fmt::Result, with_spans: bool, ) -> fmt::Result { self.reason.inner_fmt( f, fmt_token, fmt_span, - fmt_label, if with_spans { Some(&self.span) } else { None }, - #[cfg(feature = "label")] &self.context, ) } } -impl<'a, T, S, L> Rich<'a, T, S, L> { +impl<'a, T, S> Rich<'a, T, S> { /// Create an error with a custom message and span #[inline] pub fn custom(span: S, msg: M) -> Self { Rich { span, reason: Box::new(RichReason::Custom(msg.to_string())), - #[cfg(feature = "label")] context: Vec::new(), } } @@ -562,12 +579,12 @@ impl<'a, T, S, L> Rich<'a, T, S, L> { } /// Get the reason for this error. - pub fn reason(&self) -> &RichReason<'a, T, L> { + pub fn reason(&self) -> &RichReason<'a, T> { &self.reason } /// Take the reason from this error. - pub fn into_reason(self) -> RichReason<'a, T, L> { + pub fn into_reason(self) -> RichReason<'a, T> { *self.reason } @@ -580,24 +597,28 @@ impl<'a, T, S, L> Rich<'a, T, S, L> { /// /// 'Context' here means parser patterns that the parser was in the process of parsing when the error occurred. To /// add labelled contexts, see [`Parser::labelled`]. - #[cfg(feature = "label")] - pub fn contexts(&self) -> impl Iterator { + pub fn contexts(&self) -> impl Iterator, &S)> { self.context.iter().map(|(l, s)| (l, s)) } /// Convert this error into an owned version of itself by cloning any borrowed internal tokens, if necessary. - pub fn into_owned<'b>(self) -> Rich<'b, T, S, L> + pub fn into_owned<'b>(self) -> Rich<'b, T, S> where T: Clone, { Rich { reason: Box::new(self.reason.into_owned()), + context: self + .context + .into_iter() + .map(|(p, s)| (p.into_owned(), s)) + .collect(), ..self } } /// Get an iterator over the expected items associated with this error - pub fn expected(&self) -> impl ExactSizeIterator> { + pub fn expected(&self) -> impl ExactSizeIterator> { match &*self.reason { RichReason::ExpectedFound { expected, .. } => expected.iter(), RichReason::Custom(_) => [].iter(), @@ -608,26 +629,44 @@ impl<'a, T, S, L> Rich<'a, T, S, L> { /// /// This is useful when you wish to combine errors from multiple compilation passes (lexing and parsing, say) where /// the token type for each pass is different (`char` vs `MyToken`, say). - pub fn map_token U>(self, f: F) -> Rich<'a, U, S, L> + pub fn map_token U>(self, mut f: F) -> Rich<'a, U, S> where T: Clone, { Rich { span: self.span, - reason: Box::new(self.reason.map_token(f)), - #[cfg(feature = "label")] - context: self.context, + reason: Box::new(self.reason.map_token(&mut f)), + context: self + .context + .into_iter() + .map(|(p, s)| (p.map_token(&mut f), s)) + .collect(), + } + } +} + +impl<'a, I: Input<'a>> Error<'a, I> for Rich<'a, I::Token, I::Span> +where + I::Token: PartialEq, +{ + #[inline] + fn merge(self, other: Self) -> Self { + let new_reason = self.reason.flat_merge(*other.reason); + Self { + span: self.span, + reason: Box::new(new_reason), + context: self.context, // TOOD: Merge contexts } } } -impl<'a, I: Input<'a>, L> Error<'a, I> for Rich<'a, I::Token, I::Span, L> +impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Rich<'a, I::Token, I::Span> where I::Token: PartialEq, - L: PartialEq, + L: Into>, { #[inline] - fn expected_found>>>( + fn expected_found>( expected: E, found: Option>, span: I::Span, @@ -635,33 +674,15 @@ where Self { span, reason: Box::new(RichReason::ExpectedFound { - expected: expected - .into_iter() - .map(|tok| { - tok.map(RichPattern::Token) - .unwrap_or(RichPattern::EndOfInput) - }) - .collect(), + expected: expected.into_iter().map(|tok| tok.into()).collect(), found, }), - #[cfg(feature = "label")] context: Vec::new(), } } #[inline] - fn merge(self, other: Self) -> Self { - let new_reason = self.reason.flat_merge(*other.reason); - Self { - span: self.span, - reason: Box::new(new_reason), - #[cfg(feature = "label")] - context: self.context, // TOOD: Merge contexts - } - } - - #[inline] - fn merge_expected_found>>>( + fn merge_expected_found>( mut self, new_expected: E, new_found: Option>, @@ -670,9 +691,7 @@ where match &mut *self.reason { RichReason::ExpectedFound { expected, found } => { for new_expected in new_expected { - let new_expected = new_expected - .map(RichPattern::Token) - .unwrap_or(RichPattern::EndOfInput); + let new_expected = new_expected.into(); if !expected[..].contains(&new_expected) { expected.push(new_expected); } @@ -686,7 +705,7 @@ where } #[inline] - fn replace_expected_found>>>( + fn replace_expected_found>( mut self, new_expected: E, new_found: Option>, @@ -696,48 +715,31 @@ where match &mut *self.reason { RichReason::ExpectedFound { expected, found } => { expected.clear(); - expected.extend(new_expected.into_iter().map(|tok| { - tok.map(RichPattern::Token) - .unwrap_or(RichPattern::EndOfInput) - })); + expected.extend(new_expected.into_iter().map(|tok| tok.into())); *found = new_found; } _ => { self.reason = Box::new(RichReason::ExpectedFound { - expected: new_expected - .into_iter() - .map(|tok| { - tok.map(RichPattern::Token) - .unwrap_or(RichPattern::EndOfInput) - }) - .collect(), + expected: new_expected.into_iter().map(|tok| tok.into()).collect(), found: new_found, }); } } - #[cfg(feature = "label")] self.context.clear(); self } -} -#[cfg(feature = "label")] -impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Rich<'a, I::Token, I::Span, L> -where - I::Token: PartialEq, - L: PartialEq, -{ #[inline] fn label_with(&mut self, label: L) { // Opportunistically attempt to reuse allocations if we can match &mut *self.reason { RichReason::ExpectedFound { expected, found: _ } => { expected.clear(); - expected.push(RichPattern::Label(label)); + expected.push(label.into()); } _ => { self.reason = Box::new(RichReason::ExpectedFound { - expected: vec![RichPattern::Label(label)], + expected: vec![label.into()], found: self.reason.take_found(), }); } @@ -746,31 +748,30 @@ where #[inline] fn in_context(&mut self, label: L, span: I::Span) { + let label = label.into(); if self.context.iter().all(|(l, _)| l != &label) { self.context.push((label, span)); } } } -impl fmt::Debug for Rich<'_, T, S, L> +impl fmt::Debug for Rich<'_, T, S> where T: fmt::Debug, S: fmt::Debug, - L: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.inner_fmt(f, T::fmt, S::fmt, L::fmt, true) + self.inner_fmt(f, T::fmt, S::fmt, true) } } -impl fmt::Display for Rich<'_, T, S, L> +impl fmt::Display for Rich<'_, T, S> where T: fmt::Display, S: fmt::Display, - L: fmt::Display, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.inner_fmt(f, T::fmt, S::fmt, L::fmt, false) + self.inner_fmt(f, T::fmt, S::fmt, false) } } @@ -780,7 +781,11 @@ fn write_token( tok: Option<&T>, ) -> fmt::Result { match tok { - Some(tok) => fmt_token(tok, f), + Some(tok) => { + write!(f, "'")?; + fmt_token(tok, f)?; + write!(f, "'") + } None => write!(f, "end of input"), } } diff --git a/src/extension.rs b/src/extension.rs index 43956f46..dd4410d9 100644 --- a/src/extension.rs +++ b/src/extension.rs @@ -15,7 +15,13 @@ //! # Example //! //! ``` -//! use chumsky::{prelude::*, input::InputRef, extension::v1::{ExtParser, Ext}}; +//! use chumsky::{ +//! prelude::*, +//! error::LabelError, +//! input::InputRef, +//! extension::v1::{ExtParser, Ext}, +//! DefaultExpected, +//! }; //! //! // An example extension parser that expects a null byte. //! pub struct Null_; @@ -32,9 +38,9 @@ //! // The next token was a null byte, meaning that parsing was successful //! Some(b'\0') => Ok(()), //! // The next token was something that wasn't a null byte, generate an error instead -//! found => Err(E::Error::expected_found( +//! found => Err(LabelError::::expected_found( //! // Expected a null byte -//! core::iter::once(Some(b'\0'.into())), +//! [DefaultExpected::Token(b'\0'.into())], //! // Found whatever the token was instead //! found.copied().map(Into::into), //! // The span of the error is the span of the token that was found instead diff --git a/src/input.rs b/src/input.rs index c1794322..f202ebf2 100644 --- a/src/input.rs +++ b/src/input.rs @@ -10,6 +10,7 @@ use inspector::Inspector; pub use crate::stream::{BoxedExactSizeStream, BoxedStream, IterInput, Stream}; use super::*; +use alloc::string::ToString; #[cfg(feature = "std")] use std::io::{BufReader, Read, Seek}; @@ -182,7 +183,7 @@ pub trait ExactSizeInput<'src>: Input<'src> { /// Implemented by inputs that represent slice-like streams of input tokens. pub trait SliceInput<'src>: ExactSizeInput<'src> { /// The unsized slice type of this input. For [`&str`] it's `&str`, and for [`&[T]`] it will be `&[T]`. - type Slice; + type Slice: Copy; /// Get the full slice of the input /// @@ -213,6 +214,8 @@ pub trait StrInput<'src>: Sealed + ValueInput<'src, Cursor = usize> + SliceInput where Self::Token: Char, { + #[doc(hidden)] + fn stringify(slice: Self::Slice) -> String; } /// Implemented by inputs that can have tokens borrowed from them. @@ -298,7 +301,12 @@ impl<'src> ValueInput<'src> for &'src str { } impl Sealed for &str {} -impl<'src> StrInput<'src> for &'src str {} +impl<'src> StrInput<'src> for &'src str { + #[doc(hidden)] + fn stringify(slice: Self::Slice) -> String { + slice.to_string() + } +} impl<'src> SliceInput<'src> for &'src str { type Slice = &'src str; @@ -365,7 +373,16 @@ impl<'src, T> ExactSizeInput<'src> for &'src [T] { } impl Sealed for &[u8] {} -impl<'src> StrInput<'src> for &'src [u8] {} +impl<'src> StrInput<'src> for &'src [u8] { + #[doc(hidden)] + fn stringify(slice: Self::Slice) -> String { + slice + .iter() + // .map(|e| core::ascii::Char::from_u8(e).unwrap_or(AsciiChar::Substitute).to_char()) + .map(|e| char::from(*e)) + .collect() + } +} impl<'src, T> SliceInput<'src> for &'src [T] { type Slice = &'src [T]; @@ -449,7 +466,12 @@ impl<'src, T: 'src, const N: usize> ExactSizeInput<'src> for &'src [T; N] { } impl Sealed for &[u8; N] {} -impl<'src, const N: usize> StrInput<'src> for &'src [u8; N] {} +impl<'src, const N: usize> StrInput<'src> for &'src [u8; N] { + #[doc(hidden)] + fn stringify(slice: Self::Slice) -> String { + <&[u8]>::stringify(slice) + } +} impl<'src, T: 'src, const N: usize> SliceInput<'src> for &'src [T; N] { type Slice = &'src [T]; @@ -807,6 +829,10 @@ where S::Offset: From<::Offset>, F: Fn(I::Span) -> S, { + #[doc(hidden)] + fn stringify(slice: Self::Slice) -> String { + I::stringify(slice) + } } /// An input wrapper that returns a custom span, with the user-defined context @@ -952,6 +978,10 @@ where S::Context: Clone + 'src, S::Offset: From<::Offset>, { + #[doc(hidden)] + fn stringify(slice: Self::Slice) -> String { + I::stringify(slice) + } } /// Input type which supports seekable readers. Uses a [`BufReader`] internally to buffer input and @@ -1511,7 +1541,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars I: ValueInput<'src>, { // SAFETY: cursor was generated by previous call to `Input::next` - unsafe { I::next(self.cache, &mut self.cursor.clone()).map(Into::into) } + unsafe { I::next(self.cache, &mut self.cursor.clone()) } } /// Peek the next token in the input. Returns `None` if the end of the input has been reached. @@ -1521,7 +1551,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars I: BorrowInput<'src>, { // SAFETY: cursor was generated by previous call to `Input::next` - unsafe { I::next_ref(self.cache, &mut self.cursor.clone()).map(Into::into) } + unsafe { I::next_ref(self.cache, &mut self.cursor.clone()) } } /// Skip the next token in the input. @@ -1634,12 +1664,15 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars } #[inline] - pub(crate) fn add_alt>>>( + pub(crate) fn add_alt( &mut self, expected: Exp, found: Option>, span: I::Span, - ) { + ) where + Exp: IntoIterator, + E::Error: LabelError<'src, I, L>, + { if core::mem::size_of::() == 0 { return; } @@ -1648,7 +1681,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars // Prioritize errors before choosing whether to generate the alt (avoids unnecessary error creation) self.errors.alt = Some(match self.errors.alt.take() { - Some(alt) => match { I::cursor_location(&alt.pos).cmp(&I::cursor_location(at)) } { + Some(alt) => match I::cursor_location(&alt.pos).cmp(&I::cursor_location(at)) { Ordering::Equal => { Located::at(alt.pos, alt.err.merge_expected_found(expected, found, span)) } @@ -1658,7 +1691,10 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars alt.err.replace_expected_found(expected, found, span), ), }, - None => Located::at(at.clone(), Error::expected_found(expected, found, span)), + None => Located::at( + at.clone(), + LabelError::expected_found(expected, found, span), + ), }); } diff --git a/src/label.rs b/src/label.rs index f2bf165e..f77932b7 100644 --- a/src/label.rs +++ b/src/label.rs @@ -3,19 +3,59 @@ use super::*; /// A trait implemented by [`Error`]s that can originate from labelled parsers. See [`Parser::labelled`]. -pub trait LabelError<'src, I: Input<'src>, L>: Error<'src, I> { +pub trait LabelError<'src, I: Input<'src>, L>: Sized { + /// Create a new error describing a conflict between expected inputs and that which was actually found. + /// + /// `found` having the value `None` indicates that the end of input was reached, but was not expected. + /// + /// An expected input having the value `None` indicates that the end of input was expected. + fn expected_found>( + expected: E, + found: Option>, + span: I::Span, + ) -> Self; + + /// Fast path for `a.merge(LabelError::expected_found(...))` that may incur less overhead by, for example, reusing allocations. + #[inline(always)] + fn merge_expected_found>( + self, + expected: E, + found: Option>, + span: I::Span, + ) -> Self + where + Self: Error<'src, I>, + { + self.merge(LabelError::expected_found(expected, found, span)) + } + + /// Fast path for `a = LabelError::expected_found(...)` that may incur less overhead by, for example, reusing allocations. + #[inline(always)] + fn replace_expected_found>( + self, + expected: E, + found: Option>, + span: I::Span, + ) -> Self { + LabelError::expected_found(expected, found, span) + } + /// Annotate the expected patterns within this parser with the given label. /// /// In practice, this usually removes all other labels and expected tokens in favor of a single label that /// represents the overall pattern. - fn label_with(&mut self, label: L); + fn label_with(&mut self, label: L) { + #![allow(unused_variables)] + } /// Annotate this error, indicating that it occurred within the context denoted by the given label. /// /// A span that runs from the beginning of the context up until the error location is also provided. /// /// In practice, this usually means adding the context to a context 'stack', similar to a backtrace. - fn in_context(&mut self, label: L, span: I::Span); + fn in_context(&mut self, label: L, span: I::Span) { + #![allow(unused_variables)] + } } /// See [`Parser::labelled`]. diff --git a/src/lib.rs b/src/lib.rs index e2b8955c..d2a9af66 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,7 +69,6 @@ pub mod extra; pub mod guide; pub mod input; pub mod inspector; -#[cfg(feature = "label")] pub mod label; #[cfg(feature = "lexical-numbers")] pub mod number; @@ -136,8 +135,6 @@ use hashbrown::HashMap; #[cfg(feature = "serde")] use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer}; -#[cfg(feature = "label")] -use self::label::{LabelError, Labelled}; use self::{ combinator::*, container::*, @@ -147,6 +144,7 @@ use self::{ BorrowInput, Emitter, ExactSizeInput, InputRef, MapExtra, SliceInput, StrInput, ValueInput, }, inspector::Inspector, + label::{LabelError, Labelled}, prelude::*, primitive::Any, private::{Check, Emit, IPResult, Located, MaybeUninitExt, Mode, PResult, Sealed}, @@ -186,6 +184,36 @@ pub(crate) type DynParser<'src, 'b, I, O, E> = dyn Parser<'src, I, O, E> + 'b; #[cfg(feature = "pratt")] pub(crate) type DynOperator<'src, 'b, I, O, E> = dyn pratt::Operator<'src, I, O, E> + 'b; +/// Labels corresponding to a variety of patterns. +#[derive(Clone, Debug, PartialEq)] +#[non_exhaustive] +pub enum DefaultExpected<'a, T> { + /// A specific token was expected. + Token(MaybeRef<'a, T>), + /// Anything other than the end of input was expected. + Any, + /// Something other than the provided input was expected. + SomethingElse, + /// The end of input was expected. + EndOfInput, +} + +impl DefaultExpected<'_, T> { + /// Convert this [`DefaultExpected`] into an owned version of itself, cloning any inner references if required. + #[inline] + pub fn into_owned(self) -> DefaultExpected<'static, T> + where + T: Clone, + { + match self { + Self::Token(tok) => DefaultExpected::Token(tok.into_owned()), + Self::Any => DefaultExpected::Any, + Self::SomethingElse => DefaultExpected::SomethingElse, + Self::EndOfInput => DefaultExpected::EndOfInput, + } + } +} + /// The result of performing a parse on an input with [`Parser`]. /// /// Unlike `Result`, this type is designed to express the fact that generating outputs and errors are not @@ -359,6 +387,7 @@ pub trait Parser<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Defau let res = self.then_ignore(end()).go::(&mut inp); let alt = inp.take_alt().map(|alt| alt.err).unwrap_or_else(|| { let fake_span = inp.span_since(&inp.cursor()); + // TODO: Why is this needed? E::Error::expected_found([], None, fake_span) }); let mut errs = own.into_errs(); @@ -407,6 +436,7 @@ pub trait Parser<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Defau let res = self.then_ignore(end()).go::(&mut inp); let alt = inp.take_alt().map(|alt| alt.err).unwrap_or_else(|| { let fake_span = inp.span_since(&inp.cursor()); + // TODO: Why is this needed? E::Error::expected_found([], None, fake_span) }); let mut errs = own.into_errs(); @@ -837,7 +867,6 @@ pub trait Parser<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Defau /// within the parser. For example, labelling a parser for an expression would yield "expected expression" errors /// rather than "expected integer, string, binary op, etc." errors. // TODO: Example - #[cfg(feature = "label")] fn labelled(self, label: L) -> Labelled where Self: Sized, @@ -1852,10 +1881,9 @@ pub trait Parser<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Defau /// To show the difference in behavior from [`Parser::try_map`]: /// /// ``` - /// # use chumsky::prelude::*; - /// # use chumsky::util::MaybeRef; - /// # use chumsky::error::Error; - /// // start with the same large_int validator + /// # use chumsky::{text::TextExpected, util::MaybeRef, error::LabelError, prelude::*}; + /// + /// // Start with the same large_int validator /// let large_int_val = text::int::<_, extra::Err>>(10) /// .from_str() /// .unwrapped() @@ -1897,7 +1925,7 @@ pub trait Parser<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Defau /// multi_step_val.parse("100 2").into_result(), /// Err(vec![ /// Rich::::custom((0..3).into(), "100 must be 256 or higher"), - /// as Error<&str>>::expected_found([], Some(MaybeRef::Val('2')), (4..5).into()), + /// as LabelError<&str, _>>::expected_found([TextExpected::<&str>::IdentifierPart], Some(MaybeRef::Val('2')), (4..5).into()), /// ]) /// ); /// @@ -3397,25 +3425,27 @@ mod tests { #[derive(Copy, Clone, Debug, PartialEq, Eq)] struct MyErr(&'static str); - impl<'src, I> crate::Error<'src, I> for MyErr + impl<'src, I: Input<'src>> crate::Error<'src, I> for MyErr { + fn merge(self, other: Self) -> Self { + if other == MyErr("special") { + MyErr("special") + } else { + self + } + } + } + + impl<'src, I> crate::LabelError<'src, I, crate::DefaultExpected<'src, I::Token>> for MyErr where I: Input<'src>, { - fn expected_found>>>( + fn expected_found>>( _expected: E, _found: Option>, _span: I::Span, ) -> Self { MyErr("expected found") } - - fn merge(self, other: Self) -> Self { - if self == MyErr("special") || other == MyErr("special") { - MyErr("special") - } else { - self - } - } } #[test] @@ -3535,7 +3565,6 @@ mod tests { } } - #[cfg(feature = "label")] #[test] fn label() { use crate::label::LabelError; @@ -3544,20 +3573,20 @@ mod tests { just("hello").labelled("greeting").as_context().ignored() } - let mut err = as crate::Error<&str>>::expected_found( - Some(Some('h'.into())), + let mut err = as crate::LabelError<&str, char>>::expected_found( + ['h'], Some('b'.into()), (0..1).into(), ); - as LabelError<&str, _>>::label_with(&mut err, "greeting"); + as LabelError<&str, _>>::label_with(&mut err, "greeting"); assert_eq!(parser().parse("bye").into_errors(), vec![err]); - let mut err = as crate::Error<&str>>::expected_found( - Some(Some('l'.into())), + let mut err = as crate::LabelError<&str, char>>::expected_found( + ['l'], Some('p'.into()), (3..4).into(), ); - as LabelError<&str, _>>::in_context(&mut err, "greeting", (0..3).into()); + as LabelError<&str, _>>::in_context(&mut err, "greeting", (0..3).into()); assert_eq!(parser().parse("help").into_errors(), vec![err]); fn parser2<'src>() -> impl Parser<'src, &'src str, (), extra::Err>> { @@ -3567,19 +3596,16 @@ mod tests { .ignored() } - let mut err = as crate::Error<&str>>::expected_found( - Some(Some('h'.into())), - None, - (0..7).into(), - ); - as LabelError<&str, _>>::label_with(&mut err, "greeting"); + let mut err = + as crate::LabelError<&str, char>>::expected_found(['h'], None, (0..7).into()); + as LabelError<&str, _>>::label_with(&mut err, "greeting"); assert_eq!(parser2().parse("goodbye").into_errors(), vec![err]); } #[test] #[allow(dead_code)] fn invalid_escape() { - use crate::error::Error; + use crate::LabelError; fn string<'src>() -> impl Parser<'src, &'src str, &'src str, extra::Err>> { let quote = just("\""); @@ -3596,27 +3622,29 @@ mod tests { assert_eq!( string().parse(r#""Hello\m""#).into_result(), - Err(vec![ as Error::<&str>>::expected_found( - Some(Some('n'.into())), - Some('m'.into()), - (7..8).into(), - )]), + Err(vec![ + as LabelError::<&str, char>>::expected_found( + ['n'], + Some('m'.into()), + (7..8).into(), + ) + ]), ); } #[test] #[allow(dead_code)] fn map_err_missed_info() { - use crate::error::Error; + use crate::LabelError; fn zero<'src>() -> impl Parser<'src, &'src str, (), extra::Err>> { just("-") .or_not() .then(just("0").map_err(move |e: Rich<_>| { - Error::<&str>::expected_found( - vec![Some('n'.into())], + LabelError::<&str, char>::expected_found( + ['n'], e.found().map(|i| From::from(*i)), - e.span().clone(), + *e.span(), ) })) .ignored() @@ -3624,30 +3652,36 @@ mod tests { assert_eq!( zero().parse("_0").into_result(), - Err(vec![ as Error::<&str>>::expected_found( - vec![Some('-'.into()), Some('n'.into())], - Some('_'.into()), - (0..1).into(), - )]), + Err(vec![ + as LabelError::<&str, char>>::expected_found( + ['-', 'n'], + Some('_'.into()), + (0..1).into(), + ) + ]), ); } #[test] fn map_err() { - use crate::{error::Error, util::Maybe::Val}; + use crate::LabelError; let parser = just::>('"').map_err(move |e: Rich| { println!("Found = {:?}", e.found()); println!("Expected = {:?}", e.expected().collect::>()); println!("Span = {:?}", e.span()); - Error::<&str>::expected_found([Some(Val('"'))], e.found().copied().map(Val), *e.span()) + LabelError::<&str, char>::expected_found( + ['"'], + e.found().copied().map(Into::into), + *e.span(), + ) }); assert_eq!( parser.parse(r#"H"#).into_result(), - Err(vec![Error::<&str>::expected_found( - [Some(Val('"'))], - Some(Val('H')), + Err(vec![LabelError::<&str, char>::expected_found( + ['"'], + Some('H'.into()), (0..1).into() )]) ); diff --git a/src/number.rs b/src/number.rs index 08c4c030..cfad0539 100644 --- a/src/number.rs +++ b/src/number.rs @@ -26,12 +26,16 @@ pub const fn number() -> Number { } } +/// A label denoting a parseable number. +pub struct ExpectedNumber; + impl<'src, const F: u128, I, O, E> Parser<'src, I, O, E> for Number where O: FromLexical, I: SliceInput<'src, Cursor = usize>, >::Slice: AsRef<[u8]>, E: ParserExtra<'src, I>, + E::Error: LabelError<'src, I, ExpectedNumber>, { #[inline] fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { @@ -45,7 +49,7 @@ where Err(_err) => { // TODO: Improve error let span = inp.span_since(&before); - inp.add_alt(None, None, span); + inp.add_alt([ExpectedNumber], None, span); Err(()) } } diff --git a/src/pratt.rs b/src/pratt.rs index a838e642..96761d09 100644 --- a/src/pratt.rs +++ b/src/pratt.rs @@ -1111,7 +1111,11 @@ mod tests { c: C, span: S, ) -> Simple<'src, char> { - as Error<'_, &'_ str>>::expected_found(None, c.into(), span.into()) + as LabelError<&[char], _>>::expected_found::<[DefaultExpected; 0]>( + [], + c.into(), + span.into(), + ) } #[test] diff --git a/src/primitive.rs b/src/primitive.rs index a078f9d6..99ad6102 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -47,7 +47,7 @@ where Some(tok) => { let span = inp.span_since(before.cursor()); inp.rewind(before); - inp.add_alt(Some(None), Some(tok.into()), span); + inp.add_alt([DefaultExpected::EndOfInput], Some(tok.into()), span); Err(()) } } @@ -192,7 +192,7 @@ where let span = inp.span_since(before.cursor()); inp.rewind(before); inp.add_alt( - Some(Some(T::to_maybe_ref(next))), + [DefaultExpected::Token(T::to_maybe_ref(next))], found.map(|f| f.into()), span, ); @@ -270,7 +270,9 @@ where let err_span = inp.span_since(before.cursor()); inp.rewind(before); inp.add_alt( - self.seq.seq_iter().map(|e| Some(T::to_maybe_ref(e))), + self.seq + .seq_iter() + .map(|e| DefaultExpected::Token(T::to_maybe_ref(e))), found.map(|f| f.into()), err_span, ); @@ -344,7 +346,11 @@ where found => { let err_span = inp.span_since(before.cursor()); inp.rewind(before); - inp.add_alt(None, found.map(|f| f.into()), err_span); + inp.add_alt( + [DefaultExpected::SomethingElse], + found.map(|f| f.into()), + err_span, + ); Err(()) } } @@ -470,7 +476,7 @@ where }; let err_span = inp.span_since(before.cursor()); inp.rewind(before); - inp.add_alt(None, found, err_span); + inp.add_alt([DefaultExpected::SomethingElse], found, err_span); Err(()) } @@ -528,7 +534,7 @@ where }; let err_span = inp.span_since(before.cursor()); inp.rewind(before); - inp.add_alt(None, found, err_span); + inp.add_alt([DefaultExpected::SomethingElse], found, err_span); Err(()) } @@ -561,7 +567,7 @@ where found => { let err_span = inp.span_since(before.cursor()); inp.rewind(before); - inp.add_alt(None, found.map(|f| f.into()), err_span); + inp.add_alt([DefaultExpected::Any], found.map(|f| f.into()), err_span); Err(()) } } @@ -617,7 +623,7 @@ where found => { let err_span = inp.span_since(before.cursor()); inp.rewind(before); - inp.add_alt(None, found.map(|f| f.into()), err_span); + inp.add_alt([DefaultExpected::Any], found.map(|f| f.into()), err_span); Err(()) } } @@ -944,7 +950,7 @@ where if self.parsers.is_empty() { let offs = inp.cursor(); let err_span = inp.span_since(&offs); - inp.add_alt(None, None, err_span); + inp.add_alt([], None, err_span); Err(()) } else { let before = inp.save(); diff --git a/src/regex.rs b/src/regex.rs index cd616b69..8d83d63f 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -57,7 +57,7 @@ where None => { // TODO: Improve error let span = inp.span_since(&before); - inp.add_alt(None, None, span); + inp.add_alt([DefaultExpected::SomethingElse], None, span); Err(()) } } diff --git a/src/stream.rs b/src/stream.rs index 3723e2e7..b90b2f21 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -18,7 +18,7 @@ impl Stream { /// # use chumsky::{prelude::*, input::Stream}; /// let stream = Stream::from_iter((0..10).map(|i| char::from_digit(i, 10).unwrap())); /// - /// let parser = text::digits::<_, extra::Err>>(10).collect::(); + /// let parser = any::<_, extra::Err>>().filter(|c: &char| c.is_ascii_digit()).repeated().collect::(); /// /// assert_eq!(parser.parse(stream).into_result().as_deref(), Ok("0123456789")); /// ``` diff --git a/src/text.rs b/src/text.rs index f1e79633..754de487 100644 --- a/src/text.rs +++ b/src/text.rs @@ -7,6 +7,7 @@ //! a type parameter, `C`, that can be either [`u8`] or [`char`] in order to handle either case. use crate::prelude::*; +use alloc::string::ToString; use super::*; @@ -208,6 +209,31 @@ where go_extra!(O); } +/// Labels denoting a variety of text-related patterns. +#[non_exhaustive] +pub enum TextExpected<'src, I: StrInput<'src>> +where + I::Token: Char, +{ + /// Whitespace (for example: spaces, tabs, or newlines). + Whitespace, + /// Inline whitespace (for example: spaces or tabs). + InlineWhitespace, + /// A newline character or sequence. + Newline, + /// A numeric digit within the given radix range. + /// + /// For example: + /// + /// - `Digit(0..10)` implies any base-10 digit + /// - `Digit(1..16)` implies any non-zero hexadecimal digit + Digit(Range), + /// Part of an identifier, either ASCII or unicode. + IdentifierPart, + /// A specific identifier. + Identifier(I::Slice), +} + /// A parser that accepts (and ignores) any number of whitespace characters. /// /// This parser is a `Parser::Repeated` and so methods such as `at_least()` can be called on it. @@ -230,9 +256,20 @@ where I: StrInput<'src>, I::Token: Char + 'src, E: ParserExtra<'src, I>, + E::Error: LabelError<'src, I, TextExpected<'src, I>>, { - select! { c if (c as I::Token).is_whitespace() => () } - .ignored() + any() + .try_map(|c: I::Token, span| { + if c.is_whitespace() { + Ok(()) + } else { + Err(LabelError::expected_found( + [TextExpected::Whitespace], + Some(MaybeRef::Val(c)), + span, + )) + } + }) .repeated() } @@ -260,9 +297,20 @@ where I: StrInput<'src>, I::Token: Char + 'src, E: ParserExtra<'src, I>, + E::Error: LabelError<'src, I, TextExpected<'src, I>>, { - select! { c if (c as I::Token).is_inline_whitespace() => () } - .ignored() + any() + .try_map(|c: I::Token, span| { + if c.is_inline_whitespace() { + Ok(()) + } else { + Err(LabelError::expected_found( + [TextExpected::InlineWhitespace], + Some(MaybeRef::Val(c)), + span, + )) + } + }) .repeated() } @@ -299,14 +347,41 @@ where #[must_use] pub fn newline<'src, I, E>() -> impl Parser<'src, I, (), E> + Copy where - I: ValueInput<'src>, + I: StrInput<'src>, I::Token: Char + 'src, E: ParserExtra<'src, I>, &'src str: OrderedSeq<'src, I::Token>, + E::Error: LabelError<'src, I, TextExpected<'src, I>>, { - just("\r\n") - .ignored() - .or(any().filter(I::Token::is_newline).ignored()) + custom(|inp| { + let before = inp.cursor(); + + if inp + .peek() + .map_or(false, |c: I::Token| c.to_ascii() == Some(b'\r')) + { + inp.skip(); + if inp + .peek() + .map_or(false, |c: I::Token| c.to_ascii() == Some(b'\n')) + { + inp.skip(); + } + Ok(()) + } else { + let c = inp.next(); + if c.map_or(false, |c: I::Token| c.is_newline()) { + Ok(()) + } else { + let span = inp.span_since(&before); + Err(LabelError::expected_found( + [TextExpected::Newline], + c.map(MaybeRef::Val), + span, + )) + } + } + }) } /// A parser that accepts one or more ASCII digits. @@ -335,17 +410,21 @@ pub fn digits<'src, I, E>( radix: u32, ) -> Repeated>::Token, E> + Copy, I::Token, I, E> where - I: ValueInput<'src>, + I: StrInput<'src>, I::Token: Char + 'src, E: ParserExtra<'src, I>, + E::Error: LabelError<'src, I, TextExpected<'src, I>>, { any() - // Use try_map over filter to get a better error on failure .try_map(move |c: I::Token, span| { if c.is_digit(radix) { Ok(c) } else { - Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) + Err(LabelError::expected_found( + [TextExpected::Digit(0..radix)], + Some(MaybeRef::Val(c)), + span, + )) } }) .repeated() @@ -388,18 +467,36 @@ where I: StrInput<'src>, I::Token: Char + 'src, E: ParserExtra<'src, I>, + E::Error: + LabelError<'src, I, TextExpected<'src, I>> + LabelError<'src, I, MaybeRef<'src, I::Token>>, { any() - // Use try_map over filter to get a better error on failure .try_map(move |c: I::Token, span| { if c.is_digit(radix) && c != I::Token::digit_zero() { Ok(c) } else { - Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) + Err(LabelError::expected_found( + [TextExpected::Digit(1..radix)], + Some(MaybeRef::Val(c)), + span, + )) } }) - // This error never appears due to `repeated` so can use `filter` - .then(select! { c if (c as I::Token).is_digit(radix) => () }.repeated()) + .then( + any() + .try_map(move |c: I::Token, span| { + if c.is_digit(radix) { + Ok(()) + } else { + Err(LabelError::expected_found( + [TextExpected::Digit(0..radix)], + Some(MaybeRef::Val(c)), + span, + )) + } + }) + .repeated(), + ) .ignored() .or(just(I::Token::digit_zero()).ignored()) .to_slice() @@ -422,18 +519,38 @@ pub mod ascii { I: StrInput<'src>, I::Token: Char + 'src, E: ParserExtra<'src, I>, + E::Error: LabelError<'src, I, TextExpected<'src, I>>, { any() - // Use try_map over filter to get a better error on failure .try_map(|c: I::Token, span| { - if c.to_ascii().map(|i| i.is_ascii_alphabetic() || i == b'_').unwrap_or(false) { + if c.to_ascii() + .map(|i| i.is_ascii_alphabetic() || i == b'_') + .unwrap_or(false) + { Ok(c) } else { - Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) + Err(LabelError::expected_found( + [TextExpected::IdentifierPart], + Some(MaybeRef::Val(c)), + span, + )) } }) .then( - select! { c if (c as I::Token).to_ascii().map(|i| i.is_ascii_alphabetic() || i == b'_').unwrap_or(false) => () } + any() + .try_map(|c: I::Token, span| { + if c.to_ascii() + .map_or(false, |i| i.is_ascii_alphabetic() || i == b'_') + { + Ok(()) + } else { + Err(LabelError::expected_found( + [TextExpected::IdentifierPart], + Some(MaybeRef::Val(c)), + span, + )) + } + }) .repeated(), ) .to_slice() @@ -468,6 +585,7 @@ pub mod ascii { I::Token: Char + fmt::Debug + 'src, S: Borrow + Clone + 'src, E: ParserExtra<'src, I> + 'src, + E::Error: LabelError<'src, I, TextExpected<'src, I>> + LabelError<'src, I, S>, { /* #[cfg(debug_assertions)] @@ -490,7 +608,11 @@ pub mod ascii { if &s == keyword.borrow() { Ok(()) } else { - Err(Error::expected_found(None, None, span)) + Err(LabelError::expected_found( + [TextExpected::Identifier(*keyword.borrow())], + None, + span, + )) } }) .to_slice() @@ -677,7 +799,12 @@ pub mod unicode { } impl Sealed for &'_ Graphemes {} - impl<'src> StrInput<'src> for &'src Graphemes {} + impl<'src> StrInput<'src> for &'src Graphemes { + #[doc(hidden)] + fn stringify(slice: Self::Slice) -> String { + slice.to_string() + } + } impl<'src> Input<'src> for &'src Graphemes { type Cursor = usize; @@ -819,17 +946,35 @@ pub mod unicode { I: StrInput<'src>, I::Token: Char + 'src, E: ParserExtra<'src, I>, + E::Error: LabelError<'src, I, TextExpected<'src, I>>, { any() - // Use try_map over filter to get a better error on failure .try_map(|c: I::Token, span| { if c.is_ident_start() { Ok(c) } else { - Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) + Err(LabelError::expected_found( + [TextExpected::IdentifierPart], + Some(MaybeRef::Val(c)), + span, + )) } }) - .then(select! { c if (c as I::Token).is_ident_continue() => () }.repeated()) + .then( + any() + .try_map(|c: I::Token, span| { + if c.is_ident_continue() { + Ok(c) + } else { + Err(LabelError::expected_found( + [TextExpected::IdentifierPart], + Some(MaybeRef::Val(c)), + span, + )) + } + }) + .repeated(), + ) .to_slice() } @@ -860,8 +1005,9 @@ pub mod unicode { I: StrInput<'src>, I::Slice: PartialEq, I::Token: Char + fmt::Debug + 'src, - S: Borrow + Clone + 'src, + S: PartialEq + Clone + 'src, E: ParserExtra<'src, I> + 'src, + E::Error: LabelError<'src, I, TextExpected<'src, I>> + LabelError<'src, I, S>, { /* #[cfg(debug_assertions)] @@ -885,10 +1031,10 @@ pub mod unicode { */ ident() .try_map(move |s: I::Slice, span| { - if &s == keyword.borrow() { + if keyword.borrow() == &s { Ok(()) } else { - Err(Error::expected_found(None, None, span)) + Err(LabelError::expected_found([keyword.clone()], None, span)) } }) .to_slice()