Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC007] Migrate the parser to the new AST #2083

Draft
wants to merge 14 commits into
base: master
Choose a base branch
from
10 changes: 5 additions & 5 deletions core/src/bytecode/ast/compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> {
Term::Bool(b) => Node::Bool(*b),
Term::Num(n) => alloc.number(n.clone()),
Term::Str(s) => alloc.string(s),
Term::StrChunks(chunks) => alloc.str_chunks(
Term::StrChunks(chunks) => alloc.string_chunks(
chunks
.iter()
.map(|chunk| match chunk {
Expand All @@ -270,14 +270,14 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> {
),
Term::Fun(id, body) => alloc.fun(Pattern::any(*id), body.to_ast(alloc)),
Term::FunPattern(pat, body) => alloc.fun(pat.to_ast(alloc), body.to_ast(alloc)),
Term::Let(bindings, body, attrs) => alloc.let_binding(
Term::Let(bindings, body, attrs) => alloc.let_block(
bindings
.iter()
.map(|(id, term)| (Pattern::any(*id), term.to_ast(alloc))),
body.to_ast(alloc),
attrs.rec,
),
Term::LetPattern(bindings, body, attrs) => alloc.let_binding(
Term::LetPattern(bindings, body, attrs) => alloc.let_block(
bindings
.iter()
.map(|(pat, term)| (pat.to_ast(alloc), term.to_ast(alloc))),
Expand Down Expand Up @@ -1050,7 +1050,7 @@ impl<'ast> FromAst<Node<'ast>> for term::Term {
Node::Bool(b) => Term::Bool(*b),
Node::Number(n) => Term::Num((**n).clone()),
Node::String(s) => Term::Str((*s).into()),
Node::StrChunks(chunks) => {
Node::StringChunks(chunks) => {
let chunks = chunks
.iter()
.map(|chunk| match chunk {
Expand Down Expand Up @@ -1099,7 +1099,7 @@ impl<'ast> FromAst<Node<'ast>> for term::Term {
Term::LetPattern(bindings, body, attrs)
}
}
Node::App { fun, args } => {
Node::App { head: fun, args } => {
// unwrap(): the position of Ast should always be set (we might move to `RawSpan`
// instead of `TermPos` soon)
let fun_span = fun.pos.unwrap();
Expand Down
198 changes: 166 additions & 32 deletions core/src/bytecode/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use record::Record;
use crate::{cache::InputFormat, error::ParseError, identifier::LocIdent, position::TermPos};

// For now, we reuse those types from the term module.
pub use crate::term::{Number, StrChunk};
pub use crate::term::{MergePriority, Number, StrChunk as StringChunk};

use bumpalo::Bump;

Expand All @@ -33,16 +33,6 @@ use pattern::*;
use primop::PrimOp;
use typ::*;

/// A Nickel AST. Contains a root node and a span.
///
//TODO: we don't expect to access the span much on the happy path. Should we add an indirection
//through a reference?
#[derive(Clone, Debug, PartialEq)]
pub struct Ast<'ast> {
node: Node<'ast>,
pos: TermPos,
}

/// A node of the Nickel AST.
///
/// Nodes are built by the parser and then mostly traversed immutably. Such nodes are optimized for
Expand Down Expand Up @@ -76,24 +66,24 @@ pub enum Node<'ast> {
///
/// As opposed to [crate::term::Term::StrChunks], the chunks are stored in the original order:
/// `"hello%{var}"` will give `["hello", var]`.
StrChunks(&'ast [StrChunk<Ast<'ast>>]),
StringChunks(&'ast [StringChunk<Ast<'ast>>]),

/// A function.
Fun {
arg: &'ast Pattern<'ast>,
body: &'ast Ast<'ast>,
},

/// A let-binding.
/// A let block.
Let {
bindings: &'ast [(Pattern<'ast>, Ast<'ast>)],
bindings: &'ast [LetBinding<'ast>],
body: &'ast Ast<'ast>,
rec: bool,
},

/// An application to one or more arguments.
App {
fun: &'ast Ast<'ast>,
head: &'ast Ast<'ast>,
args: &'ast [Ast<'ast>],
},

Expand Down Expand Up @@ -157,6 +147,95 @@ pub enum Node<'ast> {
ParseError(&'ast ParseError),
}

/// An individual binding in a let block.
#[derive(Debug, Clone, PartialEq)]
pub struct LetBinding<'ast> {
pub pattern: Pattern<'ast>,
pub metadata: LetMetadata<'ast>,
pub value: Ast<'ast>,
}

/// The metadata that can be attached to a let. It's a subset of [record::FieldMetadata].
#[derive(Debug, Default, Clone, PartialEq)]
pub struct LetMetadata<'ast> {
pub doc: Option<rc::Rc<str>>,
pub annotation: Annotation<'ast>,
}

impl<'ast> From<LetMetadata<'ast>> for record::FieldMetadata<'ast> {
fn from(let_metadata: LetMetadata<'ast>) -> Self {
record::FieldMetadata {
annotation: let_metadata.annotation,
doc: let_metadata.doc,
..Default::default()
}
}
}

impl<'ast> TryFrom<record::FieldMetadata<'ast>> for LetMetadata<'ast> {
type Error = ();

fn try_from(field_metadata: record::FieldMetadata<'ast>) -> Result<Self, Self::Error> {
if let record::FieldMetadata {
doc,
annotation,
opt: false,
not_exported: false,
priority: MergePriority::Neutral,
} = field_metadata
{
Ok(LetMetadata { doc, annotation })
} else {
Err(())
}
}
}

impl<'ast> Node<'ast> {
/// Tries to extract a static literal from string chunks.
///
/// This methods returns a `Some(..)` when the term is a [Node::StringChunks] and all the
/// chunks are [StringChunk::Literal]
pub fn try_str_chunk_as_static_str(&self) -> Option<String> {
match self {
Node::StringChunks(chunks) => {
chunks
.iter()
.try_fold(String::new(), |mut acc, next| match next {
StringChunk::Literal(lit) => {
acc.push_str(lit);
Some(acc)
}
_ => None,
})
}
_ => None,
}
}

/// Attaches a position to this node turning it into an [Ast].
pub fn spanned(self, pos: TermPos) -> Ast<'ast> {
Ast { node: self, pos }
}
}

/// A Nickel AST. Contains a root node and a span.
///
//TODO: we don't expect to access the span much on the happy path. Should we add an indirection
//through a reference?
#[derive(Clone, Debug, PartialEq)]
pub struct Ast<'ast> {
pub node: Node<'ast>,
pub pos: TermPos,
}

impl<'ast> Ast<'ast> {
/// Sets a new position for this AST node.
pub fn with_pos(self, pos: TermPos) -> Self {
Ast { pos, ..self }
}
}

/// A branch of a match expression.
#[derive(Debug, PartialEq, Clone)]
pub struct MatchBranch<'ast> {
Expand Down Expand Up @@ -256,16 +335,24 @@ impl AstAlloc {
Node::Number(self.number_arena.alloc(number))
}

pub fn number_move(&self, number: Number) -> &'_ Number {
self.number_arena.alloc(number)
}

pub fn string<'ast>(&'ast self, s: &str) -> Node<'ast> {
Node::String(self.generic_arena.alloc_str(s))
}

pub fn str_chunks<'ast, I>(&'ast self, chunks: I) -> Node<'ast>
pub fn string_move<'ast>(&'ast self, s: &str) -> &'_ str {
self.generic_arena.alloc_str(s)
}

pub fn string_chunks<'ast, I>(&'ast self, chunks: I) -> Node<'ast>
where
I: IntoIterator<Item = StrChunk<Ast<'ast>>>,
I: IntoIterator<Item = StringChunk<Ast<'ast>>>,
I::IntoIter: ExactSizeIterator,
{
Node::StrChunks(self.generic_arena.alloc_slice_fill_iter(chunks))
Node::StringChunks(self.generic_arena.alloc_slice_fill_iter(chunks))
}

pub fn fun<'ast>(&'ast self, pat: Pattern<'ast>, body: Ast<'ast>) -> Node<'ast> {
Expand All @@ -274,9 +361,23 @@ impl AstAlloc {
Node::Fun { arg, body }
}

pub fn let_binding<'ast, I>(&'ast self, bindings: I, body: Ast<'ast>, rec: bool) -> Node<'ast>
pub fn nary_fun<'ast, I>(&'ast self, args: I, body: Ast<'ast>) -> Node<'ast>
where
I: IntoIterator<Item = Pattern<'ast>>,
I::IntoIter: DoubleEndedIterator,
{
args.into_iter()
.rev()
.fold(body, |body, arg| Ast {
node: self.fun(arg, body),
pos: TermPos::None,
})
.node
}

pub fn let_block<'ast, I>(&'ast self, bindings: I, body: Ast<'ast>, rec: bool) -> Node<'ast>
where
I: IntoIterator<Item = (Pattern<'ast>, Ast<'ast>)>,
I: IntoIterator<Item = LetBinding<'ast>>,
I::IntoIter: ExactSizeIterator,
{
let bindings = self.generic_arena.alloc_slice_fill_iter(bindings);
Expand All @@ -289,13 +390,13 @@ impl AstAlloc {
}
}

pub fn app<'ast, I>(&'ast self, fun: Ast<'ast>, args: I) -> Node<'ast>
pub fn app<'ast, I>(&'ast self, head: Ast<'ast>, args: I) -> Node<'ast>
where
I: IntoIterator<Item = Ast<'ast>>,
I::IntoIter: ExactSizeIterator,
{
Node::App {
fun: self.generic_arena.alloc(fun),
head: self.generic_arena.alloc(head),
args: self.generic_arena.alloc_slice_fill_iter(args),
}
}
Expand Down Expand Up @@ -403,14 +504,20 @@ impl AstAlloc {
}
}

/// As opposed to [Self::typ], this method takes an already constructed type and move it into
/// the arena, instead of taking each constituent separately.
pub fn typ<'ast>(&'ast self, typ: Type<'ast>) -> Node<'ast> {
Node::Type(self.generic_arena.alloc(typ))
}

pub fn typ_from_unr<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> Node<'ast> {
Node::Type(self.generic_arena.alloc(Type { typ, pos }))
pub fn type_from_unr<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> Node<'ast> {
Node::Type(self.type_move(Type { typ, pos }))
}

pub fn type_data<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> &'ast Type<'ast> {
self.type_move(Type { typ, pos })
}

pub fn type_move<'ast>(&'ast self, typ: Type<'ast>) -> &'ast Type<'ast> {
self.generic_arena.alloc(typ)
}

pub fn types<'ast, I>(&'ast self, types: I) -> &'ast [Type<'ast>]
Expand All @@ -425,10 +532,25 @@ impl AstAlloc {
self.generic_arena.alloc(EnumRows(erows))
}

pub fn enum_rows_move<'ast>(&'ast self, erows: EnumRows<'ast>) -> &'ast EnumRows<'ast> {
self.generic_arena.alloc(erows)
}

pub fn record_rows<'ast>(&'ast self, rrows: RecordRowsUnr<'ast>) -> &'ast RecordRows<'ast> {
self.generic_arena.alloc(RecordRows(rrows))
}

pub fn record_rows_move<'ast>(&'ast self, rrows: RecordRows<'ast>) -> &'ast RecordRows<'ast> {
self.generic_arena.alloc(rrows)
}

pub fn record_row<'ast>(&'ast self, id: LocIdent, typ: Type<'ast>) -> &'ast RecordRow<'ast> {
self.generic_arena.alloc(RecordRow {
id,
typ: self.generic_arena.alloc(typ),
})
}

pub fn parse_error(&self, error: ParseError) -> Node<'_> {
Node::ParseError(self.error_arena.alloc(error))
}
Expand All @@ -441,6 +563,14 @@ impl AstAlloc {
self.generic_arena.alloc(pattern)
}

pub fn patterns<'ast, I>(&'ast self, patterns: I) -> &'ast [Pattern<'ast>]
where
I: IntoIterator<Item = Pattern<'ast>>,
I::IntoIter: ExactSizeIterator,
{
self.generic_arena.alloc_slice_fill_iter(patterns)
}

pub fn enum_pattern<'ast>(
&'ast self,
enum_pattern: EnumPattern<'ast>,
Expand All @@ -455,6 +585,14 @@ impl AstAlloc {
self.generic_arena.alloc(field_pat)
}

pub fn field_patterns<'ast>(&'ast self, field_pats: I) -> &'ast [FieldPattern<'ast>]
where
I: IntoIterator<Item = FieldPattern<'ast>>,
I::IntoIter: ExactSizeIterator,
{
self.generic_arena.alloc_slice_fill_iter(field_pats)
}

pub fn record_pattern<'ast, I>(
&'ast self,
patterns: I,
Expand All @@ -465,10 +603,8 @@ impl AstAlloc {
I: IntoIterator<Item = FieldPattern<'ast>>,
I::IntoIter: ExactSizeIterator,
{
let patterns = self.generic_arena.alloc_slice_fill_iter(patterns);

self.generic_arena.alloc(RecordPattern {
patterns,
patterns: self.field_patterns(patterns),
tail,
pos,
})
Expand All @@ -484,10 +620,8 @@ impl AstAlloc {
I: IntoIterator<Item = Pattern<'ast>>,
I::IntoIter: ExactSizeIterator,
{
let patterns = self.generic_arena.alloc_slice_fill_iter(patterns);

self.generic_arena.alloc(ArrayPattern {
patterns,
patterns: self.patterns(patterns),
tail,
pos,
})
Expand Down
8 changes: 8 additions & 0 deletions core/src/combine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,11 @@ pub trait Combine: Default {
/// Combine two elements.
fn combine(left: Self, right: Self) -> Self;
}

/// [combine::Combine] doens't work for new ast nodes, which requires an external allocator to
/// create new nodes. This trait is a version that takes this additional allocator. It's temporary:
/// I suspect we won't need the original general `Combine` trait once we move to the bytecode vm,
/// as [crate::combine::Combine] is used mostly on ast-like data.
pub trait CombineAlloc<'ast> {
fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self;
}
2 changes: 2 additions & 0 deletions core/src/eval/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,8 @@ fn merge_fields<'a, C: Cache, I: DoubleEndedIterator<Item = &'a LocIdent> + Clon
/// This function is parametrized temporarily to accomodate both the mainline Nickel AST
/// ([crate::term::Term]) where documentation is represented as a `String`, and the new bytecode
/// AST where documentation is represented as an `Rc<str>`.
//FIXME: remove the type parameter `D` once we've moved evaluation to the new bytecode VM.
//Currently we need to handle both the old representation `D=String` and the new one `D=Rc<str>`.
pub(crate) fn merge_doc<D>(doc1: Option<D>, doc2: Option<D>) -> Option<D> {
//FIXME: how to merge documentation? Just concatenate?
doc1.or(doc2)
Expand Down
Loading
Loading