From b166ae57d6f1a0ab7e68313596b63dcb7591b4df Mon Sep 17 00:00:00 2001 From: Jim Fehrle Date: Sun, 1 Jul 2018 13:02:02 -0700 Subject: [PATCH] Make tokenize_string an optional parameter for diff methods in pp_diffs. Remove forward reference to lexer. --- lib/pp_diff.ml | 25 ++++++++++++------- lib/pp_diff.mli | 9 +++---- printing/proof_diffs.ml | 7 ++---- printing/proof_diffs.mli | 3 +++ .../unit-tests/printing/proof_diffs_test.ml | 4 ++- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/lib/pp_diff.ml b/lib/pp_diff.ml index d10e7390a99d8..a1888974aa71f 100644 --- a/lib/pp_diff.ml +++ b/lib/pp_diff.ml @@ -126,13 +126,20 @@ let diff_strs old_strs new_strs = let diffs = List.rev (StringDiff.diff old_strs new_strs) in shorten_diff_span `Removed (shorten_diff_span `Added diffs);; -(* to be set to Proof_diffs.tokenize_string to allow a forward reference to the lexer *) -let tokenize_string = ref (fun (s : string) -> [s]) +(* Default string tokenizer. Makes each character a separate strin. +Whitespace is not ignored. Doesn't handle UTF-8 differences well. *) +let def_tokenize_string s = + let limit = (String.length s) - 1 in + let strs : string list ref = ref [] in + for i = 0 to limit do + strs := (String.make 1 s.[i]) :: !strs + done; + List.rev !strs (* get the Myers diff of 2 strings *) -let diff_str old_str new_str = - let old_toks = Array.of_list (!tokenize_string old_str) - and new_toks = Array.of_list (!tokenize_string new_str) in +let diff_str ?(tokenize_string=def_tokenize_string) old_str new_str = + let old_toks = Array.of_list (tokenize_string old_str) + and new_toks = Array.of_list (tokenize_string new_str) in diff_strs old_toks new_toks;; let get_dinfo = function @@ -277,18 +284,18 @@ let add_diff_tags which pp diffs = raise (Diff_Failure "left-over diff info at end of Pp.t, should be impossible"); if has_added || has_removed then wrap_in_bg diff_tag rv else rv;; -let diff_pp o_pp n_pp = +let diff_pp ?(tokenize_string=def_tokenize_string) o_pp n_pp = let open Pp in let o_str = string_of_ppcmds o_pp in let n_str = string_of_ppcmds n_pp in - let diffs = diff_str o_str n_str in + let diffs = diff_str ~tokenize_string o_str n_str in (add_diff_tags `Removed o_pp diffs, add_diff_tags `Added n_pp diffs);; -let diff_pp_combined ?(show_removed=false) o_pp n_pp = +let diff_pp_combined ?(tokenize_string=def_tokenize_string) ?(show_removed=false) o_pp n_pp = let open Pp in let o_str = string_of_ppcmds o_pp in let n_str = string_of_ppcmds n_pp in - let diffs = diff_str o_str n_str in + let diffs = diff_str ~tokenize_string o_str n_str in let (_, has_removed) = has_changes diffs in let added = add_diff_tags `Added n_pp diffs in if show_removed && has_removed then diff --git a/lib/pp_diff.mli b/lib/pp_diff.mli index 94b00fed6a35b..03468271d2ebd 100644 --- a/lib/pp_diff.mli +++ b/lib/pp_diff.mli @@ -33,19 +33,16 @@ Limitations/Possible enhancements: (** Compute the diff between two Pp.t structures and return versions of each with diffs highlighted as (old, new) *) -val diff_pp : Pp.t -> Pp.t -> Pp.t * Pp.t +val diff_pp : ?tokenize_string:(string -> string list) -> Pp.t -> Pp.t -> Pp.t * Pp.t (** Compute the diff between two Pp.t structures and return a highlighted Pp.t. If [show_removed] is true, show separate lines for removals and additions, otherwise only show additions *) -val diff_pp_combined : ?show_removed:bool -> Pp.t -> Pp.t -> Pp.t +val diff_pp_combined : ?tokenize_string:(string -> string list) -> ?show_removed:bool -> Pp.t -> Pp.t -> Pp.t (** Raised if the diff fails *) exception Diff_Failure of string -(* for dependency injection to allow calling the lexer *) -val tokenize_string : (string -> string list) ref - module StringDiff : sig type elem = String.t @@ -69,7 +66,7 @@ If the strings are not lexable, this routine will raise Diff_Failure. Therefore you should catch any exceptions. The workaround for now is for the caller to tokenize the strings itself and then call diff_strs. *) -val diff_str : string -> string -> StringDiff.elem Diff2.edit list +val diff_str : ?tokenize_string:(string -> string list) -> string -> string -> StringDiff.elem Diff2.edit list (** Compute the differences between 2 lists of strings, treating the strings in the lists as indivisible units. diff --git a/printing/proof_diffs.ml b/printing/proof_diffs.ml index efffe58018e6c..106fe3db3abb7 100644 --- a/printing/proof_diffs.ml +++ b/printing/proof_diffs.ml @@ -89,7 +89,6 @@ let cprintf s = cfprintf !log_out_ch s module StringMap = Map.Make(String);; -(* placed here so that pp_diff.ml can access the lexer through dependency injection *) let tokenize_string s = (* todo: cLexer changes buff as it proceeds. Seems like that should be saved, too. But I don't understand how it's used--it looks like things get appended to it but @@ -112,8 +111,6 @@ let tokenize_string s = CLexer.set_lexer_state st; raise (Diff_Failure "Input string is not lexable");; -let _ = Pp_diff.tokenize_string := tokenize_string - type hyp_info = { idents: string list; @@ -153,7 +150,7 @@ let diff_hyps o_line_idents o_map n_line_idents n_map = let (o_line, o_pp) = setup old_ids o_map in let (n_line, n_pp) = setup new_ids n_map in - let hyp_diffs = diff_str o_line n_line in + let hyp_diffs = diff_str ~tokenize_string o_line n_line in let (has_added, has_removed) = has_changes hyp_diffs in if show_removed () && has_removed then begin let o_entry = StringMap.find (List.hd old_ids) o_map in @@ -303,7 +300,7 @@ let diff_goal_info o_info n_info = let (o_line_idents, o_hyp_map, o_concl_pp) = o_info in let (n_line_idents, n_hyp_map, n_concl_pp) = n_info in let show_removed = Some (show_removed ()) in - let concl_pp = diff_pp_combined ?show_removed o_concl_pp n_concl_pp in + let concl_pp = diff_pp_combined ~tokenize_string ?show_removed o_concl_pp n_concl_pp in let hyp_diffs_list = diff_hyps o_line_idents o_hyp_map n_line_idents n_hyp_map in (hyp_diffs_list, concl_pp) diff --git a/printing/proof_diffs.mli b/printing/proof_diffs.mli index 481b664d8aeee..0d3b5821e5113 100644 --- a/printing/proof_diffs.mli +++ b/printing/proof_diffs.mli @@ -43,6 +43,9 @@ the first argument set to None, which will skip the diff. *) val diff_goals : ?prev_gs:(goal sigma) -> goal sigma option -> Pp.t +(** Convert a string to a list of token strings using the lexer *) +val tokenize_string : string -> string list + (* Exposed for unit test, don't use these otherwise *) (* output channel for the test log file *) val log_out_ch : out_channel ref diff --git a/test-suite/unit-tests/printing/proof_diffs_test.ml b/test-suite/unit-tests/printing/proof_diffs_test.ml index 9e6aaaf319a7a..dbdcbefe21cb9 100644 --- a/test-suite/unit-tests/printing/proof_diffs_test.ml +++ b/test-suite/unit-tests/printing/proof_diffs_test.ml @@ -3,7 +3,9 @@ open Utest open Pp_diff open Proof_diffs -let tokenize_string = !Pp_diff.tokenize_string +let tokenize_string = Proof_diffs.tokenize_string +let diff_pp = diff_pp ~tokenize_string +let diff_str = diff_str ~tokenize_string let tests = ref [] let add_test name test = tests := (mk_test name (TestCase test)) :: !tests