Skip to content

Commit

Permalink
[asl][menhir2bnfc] Minor improvements
Browse files Browse the repository at this point in the history
 * Made the output sotred using String.compare if no order is
   specified to keep things structured
 * Made minor code quality improvements
  • Loading branch information
IGGeorgiev committed Jan 29, 2025
1 parent 0a3603f commit 51aca48
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 68 deletions.
29 changes: 16 additions & 13 deletions asllib/menhir2bnfc/BNFC.ml
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,16 @@ let string_of_bnfc bnfc =
using the order of the names specified *)
let sort_bnfc bnfc order =
let sort_fn el1 el2 =
let get_idx (Decl { name }) =
let get_idx name =
let idx_opt = Utils.list_find_index (String.equal name) order in
Option.value ~default:Int.max_int idx_opt
in
let l = get_idx el1 in
let r = get_idx el2 in
Int.compare l r
let get_name (Decl { name }) = name in
let l_name = get_name el1 in
let r_name = get_name el2 in
let l = get_idx l_name in
let r = get_idx r_name in
match Int.compare l r with 0 -> String.compare l_name r_name | x -> x
in
(* Sanity check that all order names actually exsit *)
let () =
Expand All @@ -179,15 +182,15 @@ let sort_bnfc bnfc order =
(** Convert the bnfc ast into a simpler format which exludes AST information *)
let simplified_bnfc bnfc =
let snake_case_id name =
let cvt_char idx c =
let is_upper c = match c with 'A' .. 'Z' -> true | _ -> false in
if not @@ is_upper c then String.make 1 c
else
let lower = String.make 1 @@ Char.lowercase_ascii c in
if Int.equal idx 0 then lower else "_" ^ lower
in
List.init (String.length name) (String.get name)
|> List.mapi cvt_char |> String.concat ""
let is_upper = function 'A' .. 'Z' -> true | _ -> false in
String.fold_left
(fun acc c ->
let s = String.make 1 @@ Char.lowercase_ascii c in
match acc with
| "" -> s
| _ when is_upper c -> acc ^ "_" ^ s
| _ -> acc ^ s)
"" name
in
let print_terms (Decl { terms }) =
let print_term term =
Expand Down
96 changes: 56 additions & 40 deletions asllib/menhir2bnfc/CvtGrammar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -150,18 +150,21 @@ end = struct
(* 2.5. Determine if any of the detected nonterminals can be removed and do so if possible *)
(* This is sometimes possible. There seems to be an inefficiency in the LR(1) table which causes the following case to happen:
A := ... B [t1, t2]
A := ... A [t1, t2, t3, ...]
A := ... B [t1, t2]
A := ... A [t1, t2, t3, ...]
B := ... A [t1, t2, t3, ...]
B := ... A [t1, t2, t3, ...]
C := ... A [t1, t2, t3, ...]
...
C := ... A [t1, t2, t3, ...]
...
In this case we can infer that all highlighted cases share the same terminal reduction set as `A := ... B` because:
* `A := ... B` is the last reduction of `A` (as all others recurse as their last step), so its terminals are what reduces the wider `A`
* `B := ... A` will always reduce with the last reduction terminal set of `A` (or the subset overlapping with `A`)
* `C := ... A` will always reduce with the last reduction terminal set of `A` (or the subset overlapping with `A`)
In this case we can infer that all highlighted cases share the same terminal reduction set as `A := ... B` because:
* `A := ... B` is the last reduction of `A` (as all others recurse as their last step), so its terminals are what reduces the wider `A`
* `B := ... A` will always reduce with the last reduction terminal set of `A` (or the subset overlapping with `A`)
* `C := ... A` will always reduce with the last reduction terminal set of `A` (or the subset overlapping with `A`)
The following function therefore would remove the A nonterminal from consideration and update the terminal sets
of the productions in B and C which terminate in A to match the [t1; t12] case.
*)
let reduced_production_sets : TerminalSet.t ProductionMap.t NonterminalMap.t =
let get_last_rhs prod = get_last @@ Production.rhs prod in
Expand Down Expand Up @@ -209,21 +212,22 @@ end = struct
(* An example of what this nested list could look like (production type being expanded for clarity):
[
[
A := A "&&" B
B := B "&&" B
];
[
A := A "+" B
B := B "+" B
];
[
A := A "*" B
B := B "*" B
]
]
Notes:
* Since we could have more than one (related) nonterminal we expect
the precedence levels of the nonterminals to overlap.
Each nonterminal, however, could appear in a different part of the parse tree
as such we need to only consider terminals coming from binary ops at higher precedence levels
as such we need to only consider terminals coming from the target productions themselves.
(Not where the A and B nonterminals are used, but only their recursive calls)
*)
let precedence_list : Production.t list list =
(* First create a nested production list for each nonterminal.
Expand All @@ -243,7 +247,7 @@ end = struct
let current_level, rest =
List.partition (fun (_, c2) -> Int.equal c1 c2) prod_list
in
let level = List.split current_level |> fst in
let level = List.map fst current_level in
level :: loop rest
in
loop sorted)
Expand All @@ -257,36 +261,43 @@ end = struct
This is not the case as this approach is fairly naive (but is almost exact) a better one might be
to try to build an LR1 parser and see near which terminals we get shift/reduce conflicts. *)
let term_sets : TerminalSet.t list NonterminalMap.t =
let rec collect_terms terms set =
match terms with
| (N n, _, _) :: (T t, _, _) :: tl
when NonterminalMap.mem n nterm_prec_levels ->
collect_terms tl (TerminalSet.add t set)
| (N n, _, _) :: (N n2, _, _) :: _ as tl
when NonterminalMap.mem n nterm_prec_levels ->
let follow : TerminalSet.t =
Nonterminal.first n2 |> TerminalSet.of_list
in
collect_terms tl (TerminalSet.union set follow)
| _ :: tl -> collect_terms tl set
| [] -> set
in
NonterminalMap.map
(fun prec_list ->
List.fold_right
(fun level acc ->
List.fold_right
(fun prod set ->
let rhs = Production.rhs prod |> Array.to_list in
let rec collect_terms terms set =
match terms with
| (N n, _, _) :: (T t, _, _) :: tl
when NonterminalMap.mem n nterm_prec_levels ->
collect_terms tl (TerminalSet.add t set)
| (N n, _, _) :: (N n2, _, _) :: tl
when NonterminalMap.mem n nterm_prec_levels ->
let follow : TerminalSet.t =
Nonterminal.first n2 |> TerminalSet.of_list
in
collect_terms tl (TerminalSet.union set follow)
| _ :: tl -> collect_terms tl set
| [] -> set
in
collect_terms rhs set)
level TerminalSet.empty
:: acc)
prec_list [])
nterm_prec_levels
in
(* Lastly we merge all the nonterminal precedence levels by matching them up against each other's
terminal sets. If two sets overlap the they belong to the same precedence level. If a set is found to not
overlap then it either belongs at a higher or lower precedence than any other one in the other nonterminal. *)
terminal sets. If two sets overlap the they belong to the same precedence level.
A nonterminal might skip a precedence level if it omits another nonterminal's operands or it
could introduce its own operands at a new level.
As we merge the new level with the accumulation in the case of a non-overlap
we decide which one to prioritize based on whether the new level for consideration
appears later in the accumulation. If yes, then all other old levels must come
before it and have a relatively lower precedence. Otherwise if the new level does
not appear in the accumulation we insert it expecting a later ovelrap *)
NonterminalMap.fold
(fun nterm prec_levels acc ->
let other_ops_per_level = NonterminalMap.find nterm term_sets in
Expand All @@ -298,12 +309,17 @@ end = struct
match (old_levels, new_levels) with
| _, [] -> old_levels
| [], _ -> new_levels
| (l1, s1) :: tl1, (l2, s2) :: tl2 when is_inter s1 s2 ->
(l1 @ l2, TerminalSet.union s1 s2) :: merge_levels tl1 tl2
| (l1, s1) :: tl1, (l2, s2) :: tl2 -> (
match List.find_opt (fun (_, s2) -> is_inter s1 s2) tl2 with
| None -> (l1, s1) :: merge_levels tl1 new_levels
| Some _ -> (l2, s2) :: merge_levels old_levels tl2)
| (old_l, old_s) :: old_tl, (new_l, new_s) :: new_tl
when is_inter old_s new_s ->
(old_l @ new_l, TerminalSet.union old_s new_s)
:: merge_levels old_tl new_tl
| (old_l, old_s) :: old_tl, (new_l, new_s) :: new_tl ->
if
List.exists
(fun (_, next_old_s) -> is_inter next_old_s new_s)
old_tl
then (old_l, old_s) :: merge_levels old_tl new_levels
else (new_l, new_s) :: merge_levels old_levels new_tl
in
merge_levels acc new_levels)
nterm_prec_levels []
Expand Down Expand Up @@ -628,7 +644,7 @@ end = struct
NonterminalMap.fold
(fun nterm prod_map acc ->
let name = n_name nterm in
let prods = ProductionMap.bindings prod_map |> List.split |> fst in
let prods = List.map fst (ProductionMap.bindings prod_map) in
let suffixes = mk_short_suffixes prods in
let names = List.map (Printf.sprintf "%s_%s" name) suffixes in
let ast_name_map = List.combine prods names in
Expand All @@ -647,8 +663,8 @@ end = struct
(fun nterm _ -> not @@ NonterminalMap.mem nterm reduced_production_sets)
production_to_terminals
|> NonterminalMap.map (fun m ->
ProductionMap.bindings m |> List.split |> fst |> mk_productions)
|> NonterminalMap.bindings |> List.split |> snd |> List.concat
List.map fst (ProductionMap.bindings m) |> mk_productions)
|> NonterminalMap.bindings |> List.map snd |> List.concat
in
decls @ prec_decls
end
21 changes: 12 additions & 9 deletions asllib/menhir2bnfc/menhir2bnfc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,17 @@ let parse_args () =
let translate_to_str args =
let open BNFC in
let bnfc =
let order_data =
match args.order_file with
| None -> []
| Some ord_file ->
let parse_order chan =
let data = really_input_string chan (in_channel_length chan) in
String.trim data |> String.split_on_char '\n'
|> List.map String.trim
in
Utils.with_open_in_bin ord_file parse_order
in
let module GrammarData =
CvtGrammar.Convert (MenhirSdk.Cmly_read.Read (struct
let filename = args.cmly_file
Expand All @@ -127,15 +138,7 @@ let translate_to_str args =
tokens;
}
in
match args.order_file with
| None -> initial
| Some ord_file ->
let parse_order chan =
let data = really_input_string chan (in_channel_length chan) in
String.trim data |> String.split_on_char '\n' |> List.map String.trim
in
let order = Utils.with_open_in_bin ord_file parse_order in
sort_bnfc initial order
sort_bnfc initial order_data
in
if args.no_ast then simplified_bnfc bnfc else string_of_bnfc bnfc

Expand Down
4 changes: 2 additions & 2 deletions asllib/menhir2bnfc/tests/grammars.t/calc.bnf.expected
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
// entrypoints Main;

main ::= expr EOL

expr ::= expr PLUS expr1
| expr MINUS expr1
| expr1
Expand All @@ -13,3 +11,5 @@ expr1 ::= expr1 TIMES expr2
expr2 ::= INT
| LPAREN expr RPAREN
| MINUS expr2

main ::= expr EOL
4 changes: 2 additions & 2 deletions asllib/menhir2bnfc/tests/grammars.t/calc.cf.expected
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
entrypoints Main;

Main__Rule. Main ::= Expr EOL;

Expr_Expr_Plus. Expr ::= Expr PLUS Expr1;
Expr_Expr_Minus. Expr ::= Expr MINUS Expr1;
_. Expr ::= Expr1;
Expand All @@ -13,3 +11,5 @@ _. Expr1 ::= Expr2;
Expr_Int. Expr2 ::= INT;
Expr_Lparen. Expr2 ::= LPAREN Expr RPAREN;
Expr_Minus. Expr2 ::= MINUS Expr2;

Main__Rule. Main ::= Expr EOL;
4 changes: 2 additions & 2 deletions asllib/menhir2bnfc/tests/grammars.t/calc_full.cf.expected
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ entrypoints Main;

token INT ["0123456789"]+;

Main__Rule. Main ::= Expr "\n";

Expr_Expr_Plus. Expr ::= Expr "+" Expr1;
Expr_Expr_Minus. Expr ::= Expr "-" Expr1;
_. Expr ::= Expr1;
Expand All @@ -15,3 +13,5 @@ _. Expr1 ::= Expr2;
Expr_Int. Expr2 ::= INT;
Expr_Lparen. Expr2 ::= "(" Expr ")";
Expr_Minus. Expr2 ::= "-" Expr2;

Main__Rule. Main ::= Expr "\n";

0 comments on commit 51aca48

Please sign in to comment.