From 895cc3a7b2fbc56caec33704daecd996d5823861 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Sat, 30 May 2020 09:11:56 +0100 Subject: [PATCH] Remove the pervasive assumption that the start state is 0. There's no good reason for us exposing an assumption about which state in the stategraph/statetable is the start state, and it may reduce our flexibility later. This commit abstracts this assumption away such that the start state is specified concretely just once (in pager::pager_stategraph), thus giving us a single place to consider if we ever do want to change this assumption. --- lrpar/src/lib/mf.rs | 12 ++++----- lrpar/src/lib/parser.rs | 10 +++---- lrtable/src/lib/pager.rs | 49 +++++++++++++++++++---------------- lrtable/src/lib/stategraph.rs | 15 +++++++++-- lrtable/src/lib/statetable.rs | 21 ++++++++++----- 5 files changed, 64 insertions(+), 43 deletions(-) diff --git a/lrpar/src/lib/mf.rs b/lrpar/src/lib/mf.rs index 19a3eb033..a947c6925 100644 --- a/lrpar/src/lib/mf.rs +++ b/lrpar/src/lib/mf.rs @@ -900,8 +900,8 @@ mod test { yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind}, Symbol, }; - use lrtable::{from_yacc, Minimiser, StIdx, StIdxStorageT}; - use num_traits::{AsPrimitive, PrimInt, ToPrimitive, Unsigned, Zero}; + use lrtable::{from_yacc, Minimiser}; + use num_traits::{AsPrimitive, PrimInt, ToPrimitive, Unsigned}; use crate::{ lex::Lexeme, @@ -947,7 +947,7 @@ A: '(' A ')' let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), grms).unwrap(); let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager).unwrap(); let d = Dist::new(&grm, &sgraph, &stable, |_| 1); - let s0 = StIdx::from(StIdxStorageT::zero()); + let s0 = sgraph.start_state(); assert_eq!(d.dist(s0, grm.token_idx("(").unwrap()), 0); assert_eq!(d.dist(s0, grm.token_idx(")").unwrap()), 1); assert_eq!(d.dist(s0, grm.token_idx("a").unwrap()), 0); @@ -1014,7 +1014,7 @@ U: 'B'; // This only tests a subset of all the states and distances but, I believe, it tests all // more interesting edge cases that the example from the Kim/Yi paper. - let s0 = StIdx::from(StIdxStorageT::zero()); + let s0 = sgraph.start_state(); assert_eq!(d.dist(s0, grm.token_idx("A").unwrap()), 0); assert_eq!(d.dist(s0, grm.token_idx("B").unwrap()), 1); assert_eq!(d.dist(s0, grm.token_idx("C").unwrap()), 2); @@ -1072,7 +1072,7 @@ Factor: '(' Expr ')' // This only tests a subset of all the states and distances but, I believe, it tests all // more interesting edge cases that the example from the Kim/Yi paper. - let s0 = StIdx::from(StIdxStorageT::zero()); + let s0 = sgraph.start_state(); assert_eq!(d.dist(s0, grm.token_idx("+").unwrap()), 1); assert_eq!(d.dist(s0, grm.token_idx("*").unwrap()), 1); assert_eq!(d.dist(s0, grm.token_idx("(").unwrap()), 0); @@ -1134,7 +1134,7 @@ W: 'b' ; let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager).unwrap(); let d = Dist::new(&grm, &sgraph, &stable, |_| 1); - let s0 = StIdx::from(StIdxStorageT::zero()); + let s0 = sgraph.start_state(); assert_eq!(d.dist(s0, grm.token_idx("a").unwrap()), 0); assert_eq!(d.dist(s0, grm.token_idx("b").unwrap()), 1); assert_eq!(d.dist(s0, grm.eof_token_idx()), 0); diff --git a/lrpar/src/lib/parser.rs b/lrpar/src/lib/parser.rs index f9e8b1139..25f0a102b 100644 --- a/lrpar/src/lib/parser.rs +++ b/lrpar/src/lib/parser.rs @@ -9,8 +9,8 @@ use std::{ use cactus::Cactus; use cfgrammar::{yacc::YaccGrammar, RIdx, TIdx}; -use lrtable::{Action, StIdx, StIdxStorageT, StateGraph, StateTable}; -use num_traits::{AsPrimitive, PrimInt, Unsigned, Zero}; +use lrtable::{Action, StIdx, StateGraph, StateTable}; +use num_traits::{AsPrimitive, PrimInt, Unsigned}; use crate::{ cpctplus, @@ -125,7 +125,7 @@ where lexemes, actions: actions.as_slice(), }; - let mut pstack = vec![StIdx::from(StIdxStorageT::zero())]; + let mut pstack = vec![sgraph.start_state()]; let mut astack = Vec::new(); let mut errors = Vec::new(); let mut spans = Vec::new(); @@ -180,7 +180,7 @@ where lexemes, actions: actions.as_slice(), }; - let mut pstack = vec![StIdx::from(StIdxStorageT::zero())]; + let mut pstack = vec![sgraph.start_state()]; let mut astack = Vec::new(); let mut errors = Vec::new(); let mut spans = Vec::new(); @@ -231,7 +231,7 @@ where lexemes, actions, }; - let mut pstack = vec![StIdx::from(StIdxStorageT::zero())]; + let mut pstack = vec![sgraph.start_state()]; let mut astack = Vec::new(); let mut errors = Vec::new(); let mut spans = Vec::new(); diff --git a/lrtable/src/lib/pager.rs b/lrtable/src/lib/pager.rs index 49954ea16..568135e5b 100644 --- a/lrtable/src/lib/pager.rs +++ b/lrtable/src/lib/pager.rs @@ -4,7 +4,7 @@ use std::{ }; use cfgrammar::{yacc::YaccGrammar, SIdx, Symbol}; -use num_traits::{AsPrimitive, PrimInt, Unsigned}; +use num_traits::{AsPrimitive, PrimInt, Unsigned, Zero}; use vob::Vob; use crate::{itemset::Itemset, stategraph::StateGraph, StIdx, StIdxStorageT}; @@ -130,6 +130,7 @@ where let mut core_states = Vec::new(); let mut edges: Vec, StIdx>> = Vec::new(); + let start_state = StIdx::from(StIdxStorageT::zero()); let mut state0 = Itemset::new(grm); let mut ctx = Vob::from_elem(usize::from(grm.tokens_len()), false); ctx.set(usize::from(grm.eof_token_idx()), true); @@ -286,15 +287,17 @@ where .drain(..) .zip(closed_states.drain(..).map(Option::unwrap)) .collect(), + start_state, edges, ); - StateGraph::new(gc_states, gc_edges) + StateGraph::new(gc_states, start_state, gc_edges) } /// Garbage collect `zip_states` (of `(core_states, closed_state)`) and `edges`. Returns a new pair /// with unused states and their corresponding edges removed. fn gc( mut states: Vec<(Itemset, Itemset)>, + start_state: StIdx, mut edges: Vec, StIdx>>, ) -> ( Vec<(Itemset, Itemset)>, @@ -303,7 +306,7 @@ fn gc( // First of all, do a simple pass over all states. All state indexes reachable from the // start state will be inserted into the 'seen' set. let mut todo = HashSet::new(); - todo.insert(StIdx(0)); + todo.insert(start_state); let mut seen = HashSet::new(); while !todo.is_empty() { // XXX This is the clumsy way we're forced to do what we'd prefer to be: @@ -448,12 +451,12 @@ mod test { assert_eq!(sg.all_states_len(), StIdx(10)); assert_eq!(sg.all_edges_len(), 10); - assert_eq!(sg.closed_state(StIdx(0)).items.len(), 3); - state_exists(&grm, &sg.closed_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]); - state_exists(&grm, &sg.closed_state(StIdx(0)), "S", 0, SIdx(0), vec!["$", "b"]); - state_exists(&grm, &sg.closed_state(StIdx(0)), "S", 1, SIdx(0), vec!["$", "b"]); + assert_eq!(sg.closed_state(sg.start_state()).items.len(), 3); + state_exists(&grm, &sg.closed_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]); + state_exists(&grm, &sg.closed_state(sg.start_state()), "S", 0, SIdx(0), vec!["$", "b"]); + state_exists(&grm, &sg.closed_state(sg.start_state()), "S", 1, SIdx(0), vec!["$", "b"]); - let s1 = sg.edge(StIdx(0), Symbol::Rule(grm.rule_idx("S").unwrap())).unwrap(); + let s1 = sg.edge(sg.start_state(), Symbol::Rule(grm.rule_idx("S").unwrap())).unwrap(); assert_eq!(sg.closed_state(s1).items.len(), 2); state_exists(&grm, &sg.closed_state(s1), "^", 0, SIdx(1), vec!["$"]); state_exists(&grm, &sg.closed_state(s1), "S", 0, SIdx(1), vec!["$", "b"]); @@ -462,7 +465,7 @@ mod test { assert_eq!(sg.closed_state(s2).items.len(), 1); state_exists(&grm, &sg.closed_state(s2), "S", 0, SIdx(2), vec!["$", "b"]); - let s3 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("b").unwrap())).unwrap(); + let s3 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("b").unwrap())).unwrap(); assert_eq!(sg.closed_state(s3).items.len(), 4); state_exists(&grm, &sg.closed_state(s3), "S", 1, SIdx(1), vec!["$", "b", "c"]); state_exists(&grm, &sg.closed_state(s3), "A", 0, SIdx(0), vec!["a"]); @@ -529,16 +532,16 @@ mod test { assert_eq!(sg.all_edges_len(), 27); // State 0 - assert_eq!(sg.closed_state(StIdx(0)).items.len(), 7); - state_exists(&grm, &sg.closed_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]); - state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 0, SIdx(0), vec!["$"]); - state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 1, SIdx(0), vec!["$"]); - state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 2, SIdx(0), vec!["$"]); - state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 3, SIdx(0), vec!["$"]); - state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 4, SIdx(0), vec!["$"]); - state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 5, SIdx(0), vec!["$"]); - - let s1 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("a").unwrap())).unwrap(); + assert_eq!(sg.closed_state(sg.start_state()).items.len(), 7); + state_exists(&grm, &sg.closed_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]); + state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 0, SIdx(0), vec!["$"]); + state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 1, SIdx(0), vec!["$"]); + state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 2, SIdx(0), vec!["$"]); + state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 3, SIdx(0), vec!["$"]); + state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 4, SIdx(0), vec!["$"]); + state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 5, SIdx(0), vec!["$"]); + + let s1 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("a").unwrap())).unwrap(); assert_eq!(sg.closed_state(s1).items.len(), 7); state_exists(&grm, &sg.closed_state(s1), "X", 0, SIdx(1), vec!["a", "d", "e", "$"]); state_exists(&grm, &sg.closed_state(s1), "X", 1, SIdx(1), vec!["a", "d", "e", "$"]); @@ -548,7 +551,7 @@ mod test { state_exists(&grm, &sg.closed_state(s1), "Z", 0, SIdx(0), vec!["c"]); state_exists(&grm, &sg.closed_state(s1), "T", 0, SIdx(0), vec!["a", "d", "e", "$"]); - let s7 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("b").unwrap())).unwrap(); + let s7 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("b").unwrap())).unwrap(); assert_eq!(sg.closed_state(s7).items.len(), 7); state_exists(&grm, &sg.closed_state(s7), "X", 3, SIdx(1), vec!["a", "d", "e", "$"]); state_exists(&grm, &sg.closed_state(s7), "X", 4, SIdx(1), vec!["a", "d", "e", "$"]); @@ -609,7 +612,7 @@ mod test { // Ommitted successors from the graph in Fig.3 // X-successor of S0 - let s0x = sg.edge(StIdx(0), Symbol::Rule(grm.rule_idx("X").unwrap())).unwrap(); + let s0x = sg.edge(sg.start_state(), Symbol::Rule(grm.rule_idx("X").unwrap())).unwrap(); state_exists(&grm, &sg.closed_state(s0x), "^", 0, SIdx(1), vec!["$"]); // Y-successor of S1 (and it's d-successor) @@ -678,7 +681,7 @@ mod test { let sg = pager_stategraph(&grm); // State 0 - assert_eq!(sg.core_state(StIdx(0)).items.len(), 1); - state_exists(&grm, &sg.core_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]); + assert_eq!(sg.core_state(sg.start_state()).items.len(), 1); + state_exists(&grm, &sg.core_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]); } } diff --git a/lrtable/src/lib/stategraph.rs b/lrtable/src/lib/stategraph.rs index c21393d22..e5547e673 100644 --- a/lrtable/src/lib/stategraph.rs +++ b/lrtable/src/lib/stategraph.rs @@ -13,6 +13,7 @@ use crate::{itemset::Itemset, StIdx, StIdxStorageT}; pub struct StateGraph { /// A vector of `(core_states, closed_states)` tuples. states: Vec<(Itemset, Itemset)>, + start_state: StIdx, /// For each state in `states`, edges is a hashmap from symbols to state offsets. edges: Vec, StIdx>>, } @@ -23,12 +24,22 @@ where { pub(crate) fn new( states: Vec<(Itemset, Itemset)>, + start_state: StIdx, edges: Vec, StIdx>>, ) -> Self { // states.len() needs to fit into StIdxStorageT; however we don't need to worry about // edges.len() (which merely needs to fit in a usize) assert!(StIdxStorageT::try_from(states.len()).is_ok()); - StateGraph { states, edges } + StateGraph { + states, + start_state, + edges, + } + } + + /// Return this state graph's start state. + pub fn start_state(&self) -> StIdx { + self.start_state } /// Return an iterator which produces (in order from `0..self.rules_len()`) all this @@ -113,7 +124,7 @@ where let mut o = String::new(); for (stidx, &(ref core_st, ref closed_st)) in self.iter_stidxs().zip(self.states.iter()) { - if StIdxStorageT::from(stidx) > 0 { + if stidx != self.start_state { o.push_str(&"\n"); } { diff --git a/lrtable/src/lib/statetable.rs b/lrtable/src/lib/statetable.rs index 0de4fe409..3faa5e66a 100644 --- a/lrtable/src/lib/statetable.rs +++ b/lrtable/src/lib/statetable.rs @@ -120,6 +120,7 @@ pub struct StateTable { actions: SparseVec, state_actions: Vob, gotos: SparseVec, + start_state: StIdx, core_reduces: Vob, state_shifts: Vob, reduce_states: Vob, @@ -350,6 +351,7 @@ where actions: actions_sv, state_actions, gotos: gotos_sv, + start_state: sg.start_state(), state_shifts, core_reduces, reduce_states, @@ -463,6 +465,11 @@ where } } + /// Return this state table's start state. + pub fn start_state(&self) -> StIdx { + self.start_state + } + /// Return a struct containing all conflicts or `None` if there aren't any. pub fn conflicts(&self) -> Option<&Conflicts> { self.conflicts.as_ref() @@ -602,7 +609,7 @@ mod test { let sg = pager_stategraph(&grm); assert_eq!(sg.all_states_len(), StIdx(9)); - let s0 = StIdx(0); + let s0 = sg.start_state(); let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap(); let s2 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Term").unwrap())).unwrap(); let s3 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Factor").unwrap())).unwrap(); @@ -685,7 +692,7 @@ mod test { assert_eq!(st.actions.len(), len); // We only extract the states necessary to test those rules affected by the reduce/reduce. - let s0 = StIdx(0); + let s0 = sg.start_state(); let s4 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap(); assert_eq!(st.action(s4, grm.token_idx("x").unwrap()), @@ -707,7 +714,7 @@ mod test { let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len()); assert_eq!(st.actions.len(), len); - let s0 = StIdx(0); + let s0 = sg.start_state(); let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap(); let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap(); let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap(); @@ -737,7 +744,7 @@ mod test { ").unwrap(); let sg = pager_stategraph(&grm); let st = StateTable::new(&grm, &sg).unwrap(); - let s0 = StIdx(0); + let s0 = sg.start_state(); let s1 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap(); let s2 = sg.edge(s0, Symbol::Token(grm.token_idx("b").unwrap())).unwrap(); @@ -764,7 +771,7 @@ mod test { let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len()); assert_eq!(st.actions.len(), len); - let s0 = StIdx(0); + let s0 = sg.start_state(); let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap(); let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap(); let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap(); @@ -805,7 +812,7 @@ mod test { let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len()); assert_eq!(st.actions.len(), len); - let s0 = StIdx(0); + let s0 = sg.start_state(); let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap(); let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap(); let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap(); @@ -863,7 +870,7 @@ mod test { let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len()); assert_eq!(st.actions.len(), len); - let s0 = StIdx(0); + let s0 = sg.start_state(); let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap(); let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap(); let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();