diff --git a/CHANGES.md b/CHANGES.md index 73bf7fc..3a6e5f7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -14,6 +14,8 @@ Unreleased * Fix [Re.Pcre.split]. Regression introduced in 1.12 and a previous bug with [Re.Pcre.split] (#538). +* Avoid parsing unnecessary patterns supported `Re.Emacs` in `Re.Str` (#563) + 1.13.1 (30-Sep-2024) -------------------- diff --git a/lib/emacs.ml b/lib/emacs.ml index c0d4f91..c026836 100644 --- a/lib/emacs.ml +++ b/lib/emacs.ml @@ -31,7 +31,7 @@ let by_code f c c' = Char.chr (f c c') ;; -let parse s = +let parse ~emacs_only s = let buf = Parse_buffer.create s in let accept = Parse_buffer.accept buf in let eos () = Parse_buffer.eos buf in @@ -72,19 +72,19 @@ let parse s = let r = regexp () in if not (Parse_buffer.accept_s buf {|\)|}) then raise Parse_error; Re.group r) - else if accept '`' + else if emacs_only && accept '`' then Re.bos - else if accept '\'' + else if emacs_only && accept '\'' then Re.eos else if accept '=' then Re.start else if accept 'b' then Re.alt [ Re.bow; Re.eow ] - else if accept 'B' + else if emacs_only && accept 'B' then Re.not_boundary - else if accept '<' + else if emacs_only && accept '<' then Re.bow - else if accept '>' + else if emacs_only && accept '>' then Re.eow else if accept 'w' then Re.alt [ Re.alnum; Re.char '_' ] @@ -95,7 +95,7 @@ let parse s = match get () with | ('*' | '+' | '?' | '[' | ']' | '.' | '^' | '$' | '\\') as c -> Re.char c | '0' .. '9' -> raise Not_supported - | _ -> raise Parse_error) + | c -> if emacs_only then raise Parse_error else Re.char c) else ( if eos () then raise Parse_error; match get () with @@ -125,7 +125,12 @@ let parse s = ;; let re ?(case = true) s = - let r = parse s in + let r = parse s ~emacs_only:true in + if case then r else Re.no_case r +;; + +let re_no_emacs ~case s = + let r = parse s ~emacs_only:false in if case then r else Re.no_case r ;; diff --git a/lib/emacs.mli b/lib/emacs.mli index e196e4d..5092a20 100644 --- a/lib/emacs.mli +++ b/lib/emacs.mli @@ -37,3 +37,5 @@ val compile : Core.t -> Core.re (** Same as [Core.compile] *) val compile_pat : ?case:bool -> string -> Core.re + +val re_no_emacs : case:bool -> string -> Core.t diff --git a/lib/str.ml b/lib/str.ml index 63a3e7d..4c20886 100644 --- a/lib/str.ml +++ b/lib/str.ml @@ -30,7 +30,7 @@ type regexp = } let compile_regexp s c = - let re = Emacs.re ~case:(not c) s in + let re = Emacs.re_no_emacs ~case:(not c) s in { mtch = lazy (Compile.compile (Ast.seq [ Ast.start; re ])) ; srch = lazy (Compile.compile re) } diff --git a/lib_test/str/test_str.ml b/lib_test/str/test_str.ml index b3744d1..70b7b95 100644 --- a/lib_test/str/test_str.ml +++ b/lib_test/str/test_str.ml @@ -27,6 +27,15 @@ module Test_matches (R : Str_intf) = struct with | Not_found -> None ;; + + let eq_match' ?(pos = 0) ?(case = true) r s = + let pat = if case then R.regexp r else R.regexp_case_fold r in + try + ignore (R.string_match pat s pos); + Some (groups ()) + with + | Not_found -> None + ;; end module T_str = Test_matches (Str) @@ -42,6 +51,16 @@ let eq_match ?pos ?case r s = () ;; +let eq_match' ?pos ?case r s = + expect_equal_app + ~msg:(str_printer s) + ~printer:(opt_printer (list_printer ofs_printer)) + (fun () -> T_str.eq_match' ?pos ?case r s) + () + (fun () -> T_re.eq_match' ?pos ?case r s) + () +;; + let split_result_conv = List.map (function | Str.Delim x -> Re.Str.Delim x @@ -206,6 +225,14 @@ let _ = eq_match "[^0-9a-z]+" "A:Z+"; eq_match "[^0-9a-z]+" "0"; eq_match "[^0-9a-z]+" "a"); + (* Word modifiers *) + expect_pass "word boundaries" (fun () -> + eq_match' "\\bfoo" "foo"; + eq_match' "\\" "foo"; + eq_match' "z\\Bfoo" "zfoo"; + eq_match' "\\`foo" "foo"; + eq_match' "foo\\'" "foo"); (* Case modifiers *) expect_pass "no_case" (fun () -> eq_match ~case:false "abc" "abc";