diff --git a/lib/Conf.mli b/lib/Conf.mli index f44bd3428c..5c169c1597 100644 --- a/lib/Conf.mli +++ b/lib/Conf.mli @@ -83,6 +83,16 @@ type t = ; wrap_comments: bool (** Wrap comments at margin. *) ; wrap_fun_args: bool } +val ocamlformat_profile : t + +val conventional_profile : t + +val compact_profile : t + +val sparse_profile : t + +val janestreet_profile : t + type file = Stdin | File of string type kind = Kind : _ list Migrate_ast.Traverse.fragment -> kind diff --git a/test/comments/dune b/test/comments/dune new file mode 100644 index 0000000000..b8e0a0554a --- /dev/null +++ b/test/comments/dune @@ -0,0 +1,6 @@ +(env + (_ + (binaries + (test_comments/test_comments.exe as test_comments)))) + +(cram (deps %{bin:test_comments})) diff --git a/test/comments/test_comments.t/run.t b/test/comments/test_comments.t/run.t new file mode 100644 index 0000000000..55b79846b6 --- /dev/null +++ b/test/comments/test_comments.t/run.t @@ -0,0 +1,929 @@ +For every tokens in the test file, insert a comment at that position, format +then compute the diff at the token level. + +File "test.ml" should contain most syntaxes. + + $ test_comments test.ml + insertion offset = 0 + before: (* toto *) module M + after: (* toto *) module M + insertion offset = 9 + before: M (* toto *) : sig (** M *) type " a t class c : " a t -> l : " a t -> ?k: " a t -> object [@@@ attr ] end module type S = sig include module type of struct end end module M : functor ( X : module type of N with type t = t ) ( ) -> S with type t = t and module N = N end = struct + after: M : sig (** M *) type " a t class c : " a t -> l : " a t -> ?k: " a t -> object [@@@ attr ] end module type S = sig include module type of struct end end module M : functor ( X : module type of N with type t = t ) ( ) -> S with type t = t and module N = N end = (* toto *) struct + insertion offset = 14 + before: sig (* toto *) + after: sig (* toto *) + insertion offset = 26 + before: (** M *) (* toto *) type + after: (* toto *) (** M *) type + insertion offset = 29 + before: (* toto *) type " + after: (* toto *) type " + insertion offset = 35 + before: type " (* toto *) a + after: (* toto *) type " a + insertion offset = 37 + before: type " a (* toto *) t + after: (* toto *) type " a t + insertion offset = 39 + before: (* toto *) + after: (* toto *) + insertion offset = 42 + before: (* toto *) class c + after: (* toto *) class c + insertion offset = 51 + before: : (* toto *) + after: : (* toto *) + insertion offset = 57 + before: " (* toto *) a t + after: " a (* toto *) t + insertion offset = 65 + before: t (* toto *) -> l + after: t (* toto *) -> l + insertion offset = 68 + before: -> (* toto *) l : " a + after: -> l : (* toto *) " a + insertion offset = 69 + before: l (* toto *) : " a + after: l : (* toto *) " a + insertion offset = 71 + before: " (* toto *) a t + after: " a (* toto *) t + insertion offset = 79 + before: t (* toto *) -> ?k: + after: t (* toto *) -> ?k: + insertion offset = 82 + before: -> (* toto *) ?k: " a + after: -> ?k: (* toto *) " a + insertion offset = 86 + before: " (* toto *) a t + after: " a (* toto *) t + insertion offset = 94 + before: t (* toto *) -> object + after: t (* toto *) -> object + insertion offset = 97 + before: (* toto *) object + after: (* toto *) object + insertion offset = 103 + before: object (* toto *) + after: object (* toto *) + insertion offset = 130 + before: ] (* toto *) end + after: ] (* toto *) end + insertion offset = 133 + before: end (* toto *) + after: end (* toto *) + insertion offset = 134 + before: (* toto *) + after: (* toto *) + insertion offset = 137 + before: (* toto *) module type + after: (* toto *) module type + insertion offset = 144 + before: module (* toto *) type S = + after: module type (* toto *) S = + insertion offset = 156 + before: sig (* toto *) + after: sig (* toto *) + insertion offset = 161 + before: (* toto *) include module + after: (* toto *) include module + insertion offset = 176 + before: module (* toto *) type of struct end + after: module type of (* toto *) struct end + insertion offset = 181 + before: type (* toto *) of struct end + after: type of (* toto *) struct end + insertion offset = 191 + before: struct (* toto *) end + after: struct (* toto *) end + insertion offset = 197 + before: (* toto *) end + after: (* toto *) end + insertion offset = 200 + before: end (* toto *) + after: end (* toto *) + insertion offset = 201 + before: (* toto *) + after: (* toto *) + insertion offset = 204 + before: (* toto *) module M + after: (* toto *) module M + insertion offset = 215 + before: : (* toto *) functor ( X : module type of N with type t = t ) ( ) -> S with type + after: : functor ( X : module type of N with type t = t ) ( ) -> S (* toto *) with type + insertion offset = 223 + before: functor (* toto *) ( X : + after: functor ( (* toto *) X : + insertion offset = 228 + before: : (* toto *) module type of N with type + after: : module type of N (* toto *) with type + insertion offset = 235 + before: module (* toto *) type of N with + after: module type of (* toto *) N with + insertion offset = 240 + before: type (* toto *) of N with + after: type of (* toto *) N with + insertion offset = 262 + before: ) (* toto *) ( ) -> S with type + after: ) ( ) -> S (* toto *) with type + insertion offset = 263 + before: ( (* toto *) ) -> S with type + after: ( ) -> S (* toto *) with type + insertion offset = 265 + before: ) (* toto *) -> S with type + after: ) -> S (* toto *) with type + insertion offset = 267 + before: -> (* toto *) S with type + after: -> S (* toto *) with type + insertion offset = 272 + before: (* toto *) S with type + after: S (* toto *) with type + insertion offset = 294 + before: and (* toto *) module N = + after: and module (* toto *) N = + insertion offset = 306 + before: N (* toto *) + after: N (* toto *) + insertion offset = 307 + before: (* toto *) end = + after: (* toto *) end = + insertion offset = 319 + before: struct (* toto *) + after: struct (* toto *) + insertion offset = 322 + before: (* toto *) type t + after: (* toto *) type t + insertion offset = 327 + before: type (* toto *) t = + after: (* toto *) type t = + insertion offset = 329 + before: t (* toto *) = | A (** A *) | B : int * int -> t | C of { a : int (** a *) ; b : int (** b *) } constraint " a = [> ` A | b ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: t = | A (** A *) | B : int * int -> t | C of { a : int (** a *) ; b : int (** b *) } constraint " a = [> ` A | b ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a (* toto *) + insertion offset = 330 + before: = (* toto *) + after: = (* toto *) + insertion offset = 335 + before: (* toto *) | A + after: (* toto *) | A + insertion offset = 348 + before: (** A *) (* toto *) + after: (** A *) (* toto *) + insertion offset = 353 + before: (* toto *) | B + after: (* toto *) | B + insertion offset = 378 + before: (* toto *) | C + after: (* toto *) | C + insertion offset = 385 + before: of (* toto *) { a : + after: of { (* toto *) a : + insertion offset = 402 + before: int (** a *) (* toto *) ; b + after: int (* toto *) (** a *) ; b + insertion offset = 426 + before: } (* toto *) constraint + after: } (* toto *) constraint + insertion offset = 436 + before: } constraint (* toto *) + after: } (* toto *) constraint + insertion offset = 443 + before: } constraint (* toto *) " + after: } (* toto *) constraint " + insertion offset = 444 + before: " (* toto *) a = + after: " a (* toto *) = + insertion offset = 446 + before: a (* toto *) = + after: a (* toto *) = + insertion offset = 447 + before: = (* toto *) + after: = (* toto *) + insertion offset = 454 + before: (* toto *) [> ` + after: (* toto *) [> ` + insertion offset = 457 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 458 + before: ` (* toto *) A | b ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ` A (* toto *) | b ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 460 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 462 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 463 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 465 + before: ] (* toto *) * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] (* toto *) * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 467 + before: ] * (* toto *) [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * (* toto *) [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 470 + before: ] * [< (* toto *) ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< (* toto *) ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 471 + before: ] * [< ` (* toto *) A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A (* toto *) > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 473 + before: ] * [< ` A (* toto *) > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A (* toto *) > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 475 + before: ] * [< ` A > (* toto *) ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A (* toto *) > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 476 + before: ] * [< ` A > ` (* toto *) B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A (* toto *) > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 478 + before: ] * [< ` A > ` B (* toto *) ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A (* toto *) > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 479 + before: ] * [< ` A > ` B ` (* toto *) C ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A (* toto *) > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 480 + before: ] * [< ` A > ` B ` C (* toto *) ] * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A (* toto *) > ` B ` C ] * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 482 + before: ] * [< ` A > ` B ` C ] (* toto *) * < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] (* toto *) * < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 484 + before: ] * [< ` A > ` B ` C ] * (* toto *) < m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * (* toto *) < m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 486 + before: ] * [< ` A > ` B ` C ] * < (* toto *) m : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < (* toto *) m : t ; .. > * ( module S ) * t # u as " a + insertion offset = 487 + before: ] * [< ` A > ` B ` C ] * < m (* toto *) : t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m (* toto *) : t ; .. > * ( module S ) * t # u as " a + insertion offset = 489 + before: ] * [< ` A > ` B ` C ] * < m : (* toto *) t ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : (* toto *) t ; .. > * ( module S ) * t # u as " a + insertion offset = 491 + before: ] * [< ` A > ` B ` C ] * < m : t (* toto *) ; .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t (* toto *) ; .. > * ( module S ) * t # u as " a + insertion offset = 493 + before: ] * [< ` A > ` B ` C ] * < m : t ; (* toto *) .. > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t (* toto *) ; .. > * ( module S ) * t # u as " a + insertion offset = 496 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. (* toto *) > * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t (* toto *) ; .. > * ( module S ) * t # u as " a + insertion offset = 498 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > (* toto *) * ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > (* toto *) * ( module S ) * t # u as " a + insertion offset = 500 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * (* toto *) ( module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * (* toto *) ( module S ) * t # u as " a + insertion offset = 501 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( (* toto *) module S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * (* toto *) ( module S ) * t # u as " a + insertion offset = 508 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module (* toto *) S ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * (* toto *) ( module S ) * t # u as " a + insertion offset = 509 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S (* toto *) ) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) (* toto *) * t # u as " a + insertion offset = 511 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) (* toto *) * t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) (* toto *) * t # u as " a + insertion offset = 513 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * (* toto *) t # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * (* toto *) t # u as " a + insertion offset = 515 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t (* toto *) # u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t (* toto *) # u as " a + insertion offset = 516 + before: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t # (* toto *) u as " a + after: ] * [< ` A > ` B ` C ] * < m : t ; .. > * ( module S ) * t (* toto *) # u as " a + insertion offset = 518 + before: u (* toto *) as " a + after: u (* toto *) as " a + insertion offset = 521 + before: u as (* toto *) " a + after: u (* toto *) as " a + insertion offset = 522 + before: u as " (* toto *) a + after: u (* toto *) as " a + insertion offset = 523 + before: a (* toto *) + after: a (* toto *) + insertion offset = 524 + insertion offset = 535 + before: (** f *) (* toto *) let + after: (* toto *) (** f *) let + insertion offset = 538 + before: (** f *) (* toto *) let f + after: (* toto *) (** f *) let f + insertion offset = 547 + test_comments: Cannot process "test.ml". + Please report this bug at https://github.com/ocaml-ppx/ocamlformat/issues. + BUG: comments dropped. + insertion offset = 548 + test_comments: Cannot process "test.ml". + Please report this bug at https://github.com/ocaml-ppx/ocamlformat/issues. + BUG: comments dropped. + insertion offset = 558 + before: f : " a . [% id ] t (* toto *) = + after: f (* toto *) : " a . [% id ] t = + insertion offset = 563 + before: ( (* toto *) fun X -> + after: ( fun (* toto *) X -> + insertion offset = 586 + before: (* toto *) + after: (* toto *) + insertion offset = 589 + before: (* toto *) module M + after: (* toto *) module M + insertion offset = 607 + before: ( (* toto *) ) = struct end + after: ( ) = (* toto *) struct end + insertion offset = 609 + before: ) (* toto *) = struct end + after: ) = (* toto *) struct end + insertion offset = 618 + before: struct (* toto *) end + after: struct (* toto *) end + insertion offset = 622 + before: (* toto *) end + after: (* toto *) end + insertion offset = 625 + before: end (* toto *) + after: end (* toto *) + insertion offset = 626 + before: (* toto *) + after: (* toto *) + insertion offset = 627 + before: (* toto *) let _ + after: (* toto *) let _ + insertion offset = 634 + before: = (* toto *) + after: = (* toto *) + insertion offset = 684 + before: (* Insert every expressions in sequence here *) (* toto *) + after: (* Insert every expressions in sequence here *) (* toto *) + insertion offset = 687 + before: (* toto *) ( module + after: (* toto *) ( module + insertion offset = 688 + before: ( (* toto *) module M . + after: ( module (* toto *) M . + insertion offset = 696 + before: M (* toto *) . N ( X + after: M . N (* toto *) ( X + insertion offset = 697 + before: . (* toto *) N ( X + after: . N (* toto *) ( X + insertion offset = 701 + before: X (* toto *) . Y ) : + after: X . Y (* toto *) ) : + insertion offset = 702 + before: . (* toto *) Y ) : + after: . Y (* toto *) ) : + insertion offset = 711 + before: ; (* toto *) + after: ; (* toto *) + insertion offset = 714 + before: (* toto *) let rec + after: (* toto *) let rec + insertion offset = 718 + before: let (* toto *) rec x = + after: let rec (* toto *) x = + insertion offset = 733 + before: ( (* toto *) lazy _ ) + after: ( lazy (* toto *) _ ) + insertion offset = 747 + before: in (* toto *) + after: in (* toto *) + insertion offset = 750 + before: (* toto *) f + after: (* toto *) f + insertion offset = 757 + before: ( (* toto *) function (* Insert every patterns here *) + after: ( function (* toto *) (* Insert every patterns here *) + insertion offset = 765 + before: function (* toto *) + after: function (* toto *) + insertion offset = 804 + before: (* Insert every patterns here *) (* toto *) + after: (* Insert every patterns here *) (* toto *) + insertion offset = 811 + before: (* toto *) | ( + after: (* toto *) | ( + insertion offset = 813 + before: | (* toto *) ( ( + after: (* toto *) | ( ( + insertion offset = 815 + before: ( ( (* toto *) ( x + after: ( (* toto *) ( ( x + insertion offset = 828 + before: _ | (* toto *) a , b | A , B x | ` A , ` B x | # + after: _ (* toto *) | a , b | A , B x | ` A , ` B x | # + insertion offset = 835 + before: _ | a , b | (* toto *) A , B x | ` A , ` B x | # + after: _ | a , b (* toto *) | A , B x | ` A , ` B x | # + insertion offset = 844 + before: _ | a , b | A , B x | (* toto *) ` A , ` B x | # + after: _ | a , b | A , B x (* toto *) | ` A , ` B x | # + insertion offset = 845 + before: ` (* toto *) A , ` + after: ` A (* toto *) , ` + insertion offset = 849 + before: ` (* toto *) B x | + after: ` B (* toto *) x | + insertion offset = 855 + before: _ | a , b | A , B x | ` A , ` B x | (* toto *) # t + after: _ | a , b | A , B x | ` A , ` B x (* toto *) | # t + insertion offset = 856 + before: _ | a , b | A , B x | ` A , ` B x | # (* toto *) t + after: _ | a , b | A , B x | ` A , ` B x (* toto *) | # t + insertion offset = 859 + before: t ) (* toto *) as x + after: t (* toto *) ) as x + insertion offset = 872 + before: (* toto *) | { + after: (* toto *) | { + insertion offset = 873 + before: | (* toto *) { a + after: (* toto *) | { a + insertion offset = 886 + before: _ ; (* toto *) _ } + after: _ (* toto *) ; _ } + insertion offset = 896 + before: (* toto *) | [| + after: (* toto *) | [| + insertion offset = 897 + before: | (* toto *) [| a + after: (* toto *) | [| a + insertion offset = 913 + before: (* toto *) | A + after: (* toto *) | A + insertion offset = 914 + before: | (* toto *) A | + after: (* toto *) | A | + insertion offset = 918 + before: A | (* toto *) B + after: A (* toto *) | B + insertion offset = 927 + before: (* toto *) | ( + after: (* toto *) | ( + insertion offset = 928 + before: | (* toto *) ( module + after: (* toto *) | ( module + insertion offset = 929 + before: ( (* toto *) module M ) + after: ( module (* toto *) M ) + insertion offset = 946 + before: (* toto *) | ( + after: (* toto *) | ( + insertion offset = 947 + before: | (* toto *) ( module + after: (* toto *) | ( module + insertion offset = 948 + before: ( (* toto *) module M : + after: ( module (* toto *) M : + insertion offset = 961 + before: M : S ) (* toto *) + after: M (* toto *) : S ) + insertion offset = 969 + before: (* toto *) | ( + after: (* toto *) | ( + insertion offset = 970 + before: | (* toto *) ( module + after: (* toto *) | ( module + insertion offset = 971 + before: ( (* toto *) module _ ) + after: ( module (* toto *) _ ) + insertion offset = 988 + before: (* toto *) | ( + after: (* toto *) | ( + insertion offset = 989 + before: | (* toto *) ( exception + after: (* toto *) | ( exception + insertion offset = 990 + before: ( (* toto *) exception E ) + after: ( exception (* toto *) E ) + insertion offset = 1010 + before: (* toto *) | [% + after: (* toto *) | [% + insertion offset = 1011 + before: | (* toto *) [% ppx + after: (* toto *) | [% ppx + insertion offset = 1025 + before: (* toto *) | M + after: (* toto *) | M + insertion offset = 1026 + before: | (* toto *) M . + after: (* toto *) | M . + insertion offset = 1028 + before: . (* toto *) ( A | + after: . ( (* toto *) A | + insertion offset = 1033 + before: A | (* toto *) B ) + after: A (* toto *) | B ) + insertion offset = 1043 + before: (* toto *) | { + after: (* toto *) | { + insertion offset = 1044 + before: | (* toto *) { x + after: (* toto *) | { x + insertion offset = 1048 + before: = (* toto *) ( module M : + after: = ( module (* toto *) M : + insertion offset = 1049 + before: ( (* toto *) module M : + after: ( module (* toto *) M : + insertion offset = 1062 + before: M : S ) (* toto *) } + after: M (* toto *) : S ) } + insertion offset = 1071 + before: (* toto *) | { + after: (* toto *) | { + insertion offset = 1072 + before: | (* toto *) { x + after: (* toto *) | { x + insertion offset = 1076 + before: { x = (* toto *) ( + after: { (* toto *) x = ( + insertion offset = 1093 + before: (* toto *) | { + after: (* toto *) | { + insertion offset = 1094 + before: | (* toto *) { x + after: (* toto *) | { x + insertion offset = 1098 + before: { x = (* toto *) ( + after: { (* toto *) x = ( + insertion offset = 1114 + before: (* toto *) | { + after: (* toto *) | { + insertion offset = 1115 + before: | (* toto *) { x + after: (* toto *) | { x + insertion offset = 1120 + before: ( (* toto *) ( x' : + after: ( ( (* toto *) x' : + insertion offset = 1140 + before: -> (* toto *) + after: -> (* toto *) + insertion offset = 1160 + before: ( (* toto *) fun ( type t ) + after: ( fun ( type (* toto *) t ) + insertion offset = 1164 + before: fun (* toto *) ( type t ) + after: fun ( type (* toto *) t ) + insertion offset = 1165 + before: ( (* toto *) type t ) + after: ( type (* toto *) t ) + insertion offset = 1175 + before: X (* toto *) ~ a ~b: + after: X ~ (* toto *) a ~b: + insertion offset = 1178 + before: a (* toto *) ~b: Y ? + after: a ~b: (* toto *) Y ? + insertion offset = 1183 + before: Y (* toto *) ? c ?c: + after: Y ? (* toto *) c ?c: + insertion offset = 1186 + before: c (* toto *) ?c: ( Z = + after: c ?c: ( (* toto *) Z = + insertion offset = 1189 + before: ?c: (* toto *) ( Z = + after: ?c: ( (* toto *) Z = + insertion offset = 1201 + before: a ; (* toto *) _ } + after: a (* toto *) ; _ } + insertion offset = 1218 + before: X ~ (* toto *) a ~b: + after: X (* toto *) ~ a ~b: + insertion offset = 1224 + before: a ~b: ( (* toto *) x y ) ? c + after: a ~b: ( (* toto *) x y ) ? c + insertion offset = 1226 + before: a ~b: ( x (* toto *) y ) ? c + after: a ~b: ( x (* toto *) y ) ? c + insertion offset = 1227 + before: a ~b: ( x y (* toto *) ) ? c + after: a ~b: ( x y (* toto *) ) ? c + insertion offset = 1230 + before: ) ? (* toto *) c ?c: + after: ) (* toto *) ? c ?c: + insertion offset = 1275 + before: ( (* toto *) module M : + after: ( module (* toto *) M : + insertion offset = 1295 + before: ( (* toto *) try a + after: (* toto *) ( try a + insertion offset = 1356 + before: |] (* toto *) ; + after: |] (* toto *) ; + insertion offset = 1357 + before: ; (* toto *) + after: ; (* toto *) + insertion offset = 1365 + before: . (* toto *) { a , + after: . { (* toto *) a , + insertion offset = 1380 + before: .* (* toto *) ( 0 ) + after: .* ( (* toto *) 0 ) + insertion offset = 1384 + before: ( 0 ) (* toto *) else + after: ( (* toto *) 0 ) else + insertion offset = 1392 + before: .* (* toto *) ( a ; + after: .* ( (* toto *) a ; + insertion offset = 1399 + before: ( a ; b ) (* toto *) ; + after: ( (* toto *) a ; b ) ; + insertion offset = 1400 + before: ; (* toto *) + after: ; (* toto *) + insertion offset = 1403 + before: (* toto *) for i + after: (* toto *) for i + insertion offset = 1424 + before: do (* toto *) + after: do (* toto *) + insertion offset = 1436 + before: . (* toto *) ( i ) + after: . ( (* toto *) i ) + insertion offset = 1442 + before: ) (* toto *) done ; + after: ) (* toto *) done ; + insertion offset = 1447 + before: done (* toto *) ; + after: done (* toto *) ; + insertion offset = 1448 + before: ; (* toto *) + after: ; (* toto *) + insertion offset = 1460 + before: m ( (* toto *) new M + after: m (* toto *) ( new M + insertion offset = 1464 + before: m ( new (* toto *) M + after: m (* toto *) ( new M + insertion offset = 1465 + before: M (* toto *) . c ) {< x + after: M . c ) (* toto *) {< x + insertion offset = 1466 + before: . (* toto *) c ) {< x + after: . c ) (* toto *) {< x + insertion offset = 1467 + before: c (* toto *) ) {< x + after: c ) (* toto *) {< x + insertion offset = 1504 + before: ; (* toto *) + after: ; (* toto *) + insertion offset = 1507 + before: (* toto *) let module + after: (* toto *) let module + insertion offset = 1511 + before: let (* toto *) module M = + after: let module (* toto *) M = + insertion offset = 1523 + before: ( (* toto *) val S . + after: ( val (* toto *) S . + insertion offset = 1529 + before: . (* toto *) ( M . + after: . ( (* toto *) M . + insertion offset = 1531 + before: M (* toto *) . N X . + after: M . N (* toto *) X . + insertion offset = 1532 + before: . (* toto *) N X . + after: . N (* toto *) X . + insertion offset = 1535 + before: X (* toto *) . Y ) . + after: X . Y (* toto *) ) . + insertion offset = 1536 + before: . (* toto *) Y ) . + after: . Y (* toto *) ) . + insertion offset = 1544 + before: in (* toto *) + after: in (* toto *) + insertion offset = 1547 + before: (* toto *) let exception + after: (* toto *) let exception + insertion offset = 1551 + before: let (* toto *) exception E of + after: let exception (* toto *) E of + insertion offset = 1570 + before: in (* toto *) + after: in (* toto *) + insertion offset = 1573 + before: (* toto *) let open + after: (* toto *) let open + insertion offset = 1581 + before: open (* toto *) ! M in + after: open ! (* toto *) M in + insertion offset = 1587 + before: in (* toto *) + after: in (* toto *) + insertion offset = 1590 + before: (* toto *) let* x + after: (* toto *) let* x + insertion offset = 1595 + test_comments: Cannot process "test.ml". + Please report this bug at https://github.com/ocaml-ppx/ocamlformat/issues. + BUG: comments dropped. + insertion offset = 1603 + test_comments: Cannot process "test.ml". + Please report this bug at https://github.com/ocaml-ppx/ocamlformat/issues. + BUG: comments dropped. + insertion offset = 1613 + before: = ( (* toto *) w [@ + after: = (* toto *) ( w [@ + insertion offset = 1626 + before: in (* toto *) + after: in (* toto *) + insertion offset = 1629 + before: (* toto *) lazy ( + after: (* toto *) lazy ( + insertion offset = 1637 + before: ( (* toto *) let* ) ( + after: ( let* (* toto *) ) ( + insertion offset = 1645 + before: ( (* toto *) function X -> + after: ( function (* toto *) X -> + insertion offset = 1659 + before: -> (* toto *) ( + ) ) + after: -> ( + (* toto *) ) ) + insertion offset = 1661 + before: ( (* toto *) + ) ) + after: ( + (* toto *) ) ) + insertion offset = 1664 + before: + ) (* toto *) ) ( + after: + (* toto *) ) ) ( + insertion offset = 1667 + before: ) ( (* toto *) ( * + after: ) (* toto *) ( ( * + insertion offset = 1669 + before: ( (* toto *) * ) [@ attr ] ) ) ; + after: ( * ) [@ attr ] ) (* toto *) ) ; + insertion offset = 1671 + before: * (* toto *) ) [@ attr ] ) ) ; + after: * ) [@ attr ] ) (* toto *) ) ; + insertion offset = 1684 + before: ; (* toto *) + after: ; (* toto *) + insertion offset = 1689 + before: 1 (* toto *) :: ~- + after: (* toto *) 1 :: ~- + insertion offset = 1697 + before: ; (* toto *) + after: ; (* toto *) + insertion offset = 1706 + before: ] (* toto *) + after: ] (* toto *) + insertion offset = 1707 + before: (* toto *) + after: (* toto *) + insertion offset = 1708 + before: (* toto *) class virtual + after: (* toto *) class virtual + insertion offset = 1714 + before: class (* toto *) virtual c x + after: class virtual (* toto *) c x + insertion offset = 1726 + before: x (* toto *) ~ y ?z: + after: x ~ (* toto *) y ?z: + insertion offset = 1729 + before: y (* toto *) ?z: ( z' = + after: y ?z: ( (* toto *) z' = + insertion offset = 1732 + before: ?z: (* toto *) ( z' = + after: ?z: ( (* toto *) z' = + insertion offset = 1741 + before: ) (* toto *) = let + after: ) = (* toto *) let + insertion offset = 1742 + before: = (* toto *) + after: = (* toto *) + insertion offset = 1745 + before: (* toto *) let open + after: (* toto *) let open + insertion offset = 1749 + before: let (* toto *) open M in + after: let open (* toto *) M in + insertion offset = 1758 + before: in (* toto *) + after: in (* toto *) + insertion offset = 1761 + before: (* toto *) object ( + after: (* toto *) object ( + insertion offset = 1768 + before: object (* toto *) ( self ) + after: object ( (* toto *) self ) + insertion offset = 1773 + before: self (* toto *) ) inherit + after: self ) (* toto *) inherit + insertion offset = 1774 + before: ) (* toto *) + after: ) (* toto *) + insertion offset = 1779 + before: (* toto *) inherit M + after: (* toto *) inherit M + insertion offset = 1788 + before: M (* toto *) . c x + after: M . c (* toto *) x + insertion offset = 1789 + before: . (* toto *) c x + after: . c (* toto *) x + insertion offset = 1792 + before: x (* toto *) val + after: x (* toto *) val + insertion offset = 1793 + before: (* toto *) + after: (* toto *) + insertion offset = 1798 + before: (* toto *) val mutable + after: (* toto *) val mutable + insertion offset = 1802 + before: val (* toto *) mutable y = + after: val mutable (* toto *) y = + insertion offset = 1815 + before: 0 (* toto *) initializer + after: 0 (* toto *) initializer + insertion offset = 1816 + before: (* toto *) + after: (* toto *) + insertion offset = 1821 + before: (* toto *) initializer y + after: (* toto *) initializer y + insertion offset = 1839 + before: x (* toto *) method + after: x (* toto *) method + insertion offset = 1840 + before: (* toto *) + after: (* toto *) + insertion offset = 1845 + before: (* toto *) method m + after: (* toto *) method m + insertion offset = 1863 + before: y (* toto *) method + after: y (* toto *) method + insertion offset = 1864 + before: (* toto *) + after: (* toto *) + insertion offset = 1869 + before: (* toto *) method n + after: (* toto *) method n + insertion offset = 1891 + before: >} (* toto *) method + after: >} (* toto *) method + insertion offset = 1892 + before: (* toto *) + after: (* toto *) + insertion offset = 1897 + before: (* toto *) method virtual + after: (* toto *) method virtual + insertion offset = 1904 + before: method (* toto *) virtual o : + after: method virtual (* toto *) o : + insertion offset = 1917 + before: : # (* toto *) ct -> + after: : (* toto *) # ct -> + insertion offset = 1926 + before: int (* toto *) + after: int (* toto *) + insertion offset = 1929 + before: (* toto *) end + after: (* toto *) end + insertion offset = 1932 + before: end (* toto *) + after: end (* toto *) + insertion offset = 1933 + before: (* toto *) + after: (* toto *) + insertion offset = 1934 + before: (* toto *) type t + after: (* toto *) type t + insertion offset = 1939 + before: type (* toto *) t = + after: (* toto *) type t = + insertion offset = 1941 + before: t (* toto *) = .. + after: t = .. (* toto *) + insertion offset = 1943 + before: = (* toto *) .. + after: = .. (* toto *) + insertion offset = 1946 + before: (* toto *) + after: (* toto *) + insertion offset = 1947 + before: (* toto *) type t + after: (* toto *) type t + insertion offset = 1968 + before: (* toto *) + after: (* toto *) + insertion offset = 1969 + before: (* toto *) exception E + after: (* toto *) exception E + insertion offset = 1985 + before: t (* toto *) + after: t (* toto *) + insertion offset = 1986 + before: (* toto *) + after: (* toto *) diff --git a/test/comments/test_comments.t/test.ml b/test/comments/test_comments.t/test.ml new file mode 100644 index 0000000000..03efecb51b --- /dev/null +++ b/test/comments/test_comments.t/test.ml @@ -0,0 +1,95 @@ +module M : sig + (** M *) + + type 'a t + + class c : + 'a t + -> l:'a t + -> ?k:'a t + -> object + [@@@attr] + end + + module type S = sig + include module type of struct end + end + + module M : functor (X : module type of N with type t = t) () -> + S with type t = t and module N = N +end = struct + type t = + | A (** A *) + | B : int * int -> t + | C of {a: int (** a *); b: int (** b *)} + constraint + 'a = + [> `A | b] * [< `A > `B `C] * < m: t ; .. > * (module S) * t #u as 'a + + (** f *) + let f : 'a. [%id] t = f (fun X -> assert false) + + module M (X : S) () = struct end +end + +let _ = + (* Insert every expressions in sequence here *) + (module M.N (X.Y) : S) ; + let rec x = x and (lazy _) = y in + f + (function + (* Insert every patterns here *) + | (((x : t), _ | a, b | A, B x | `A, `B x | #t) as x) + |{a= _; b= _; _} + |[|a; b|] + |A | B + |(module M) + |(module M : S) + |(module _) + |(exception E) + |[%ppx] + |M.(A | B) + |{x= (module M : S)} + |{x= (x' : t)} + |{x= (P : t)} + |{x= ((x' : t)[@attr])} -> + . ) + (fun (type t) X ~a ~b:Y ?c ?c:(Z = W) {a; _} -> ()) + X ~a ~b:(x y) ?c ?c:(Some x) + {a; b= (u :> t); c: t; d= (module M : S)} + (try a.x <- b.x with Failure msg -> msg) + [|a; b + c + d|] ; + if x.{a, b} then y.*(0) else z.*(a; b) ; + for i = f x to f y do + r := x.(i) + done ; + (f x)#m (new M.c) {} ; + let module M = (val S.(M.N X.Y).x) in + let exception E of t in + let open! M in + let* x = ~-y and* z = (w [@attr]) in + lazy (( let* ) (function X -> ( + )) (( * ) [@attr])) ; + 1 :: ~-2 ; + [1; 2] + +class virtual c x ~y ?z:(z' = 0) = + let open M in + object (self) + inherit M.c x + + val mutable y = 0 + + initializer y <- x + + method m a = a - y + + method n = self#m {<>} + + method virtual o : #ct -> int + end + +type t = .. + +type t += X : t -> t + +exception E of t diff --git a/test/comments/test_comments/diff.ml b/test/comments/test_comments/diff.ml new file mode 100644 index 0000000000..506ba63c1f --- /dev/null +++ b/test/comments/test_comments/diff.ml @@ -0,0 +1,277 @@ +(** Thanks @craigfe, https://github.com/CraigFe/diff *) + +type index = int + +type 'a command = + | Insert of { expected : index; actual : index } + (** Insert the element [actual.(actual)] at [expected.(expected)]. *) + | Delete of { expected : index } + (** Delete the element at [expected.(expected)]. *) + | Substitute of { expected : index; actual : index } + (** Set [expected.(expected)) := actual.(actual)]. *) + +let pp_command ppf = function + | Insert { expected; actual } -> + Format.fprintf ppf "Insert { expected = %d; actual = %d }" expected actual + | Delete { expected } -> + Format.fprintf ppf "Delete { expected = %d }" expected + | Substitute { expected; actual } -> + Format.fprintf ppf "Substitute { expected = %d; actual = %d }" expected + actual + +let map_expected f = function + | Insert i -> Insert { i with expected = f i.expected } + | Delete d -> Delete { expected = f d.expected } + | Substitute s -> Substitute { s with expected = f s.expected } + +let map_actual f = function + | Insert i -> Insert { i with actual = f i.actual } + | Substitute s -> Substitute { s with actual = f s.actual } + | Delete _ as d -> d + +let insert expected actual = Insert { expected; actual } +let delete expected = Delete { expected } +let substitute expected actual = Substitute { expected; actual } + +type ('a, _) typ = + | Array : ('a, 'a array) typ + | List : ('a, 'a list) typ + | String : (char, string) typ + +module Subarray : sig + type 'a t + (** Read-only wrapper around an array or a string. Can be {!truncate}d in + [O(1)] time. *) + + val truncate : origin:int -> length:int -> 'a t -> 'a t + (** Return a new subarray with smaller bounds than the previous one. *) + + val empty : _ t + val get : 'a t -> int -> 'a + val length : 'a t -> int + val of_container : ('a, 'container) typ -> 'container -> 'a t +end = struct + type 'a t = { get : int -> 'a; origin : int; length : int } + + let truncate ~origin ~length + {get; origin = prev_origin; length = prev_length} = + if origin < prev_origin || length > prev_length then + failwith + (Format.sprintf + "Cannot expand array during truncation ({ origin = %d; length = %d \ + } -> { origin = %d; length = %d })" + prev_origin prev_length origin length ) ; + {get; origin; length} + + let index_oob = Format.ksprintf invalid_arg "index out of bounds: %d" + let empty = { get = index_oob; origin = 0; length = 0 } + + let get { get; origin; length } i = + if i >= length then index_oob i; + get (i + origin) + + let length { length; _ } = length + let of_array a = { get = Array.get a; origin = 0; length = Array.length a } + let of_list l = Array.of_list l |> of_array + let of_string s = { get = String.get s; origin = 0; length = String.length s } + + let of_container (type a container) : (a, container) typ -> container -> a t = + function + | Array -> of_array + | List -> of_list + | String -> of_string +end + +module Edit_script = struct + type 'a t = 'a command list + + let insert n v t = + let rec inner acc n t = + match (n, t) with + | 0, t -> List.rev_append acc (v :: t) + | _, [] -> List.rev (v :: acc) + | n, x :: xs -> inner (x :: acc) (n - 1) xs + in + inner [] n t + + let delete n t = + let rec inner acc n t = + match (n, t) with + | 0, _ :: xs -> List.rev_append acc xs + | n, x :: xs -> inner (x :: acc) (n - 1) xs + | _ -> assert false + in + inner [] n t + + let substitute n v t = + let rec inner acc n t = + match (n, t) with + | 0, _ :: xs -> List.rev acc @ (v :: xs) + | n, x :: xs -> inner (x :: acc) (n - 1) xs + | _ -> assert false + in + inner [] n t + + let apply (type a container) (typ : (a, container) typ) + ~actual:(t_actual : int -> a) (script : a t) (initial : container) : + a list = + let initial : a list = + match typ with + | List -> initial + | Array -> Array.to_list initial + | String -> List.init (String.length initial) (fun i -> initial.[i]) + in + List.fold_left + (fun (i, acc) -> function + | Insert { expected; actual } -> + (i + 1, insert (expected + i) (t_actual actual) acc) + | Delete { expected } -> (i - 1, delete (expected + i) acc) + | Substitute { expected; actual } -> + (i, substitute (expected + i) (t_actual actual) acc)) + (0, initial) script + |> snd +end + +let triple_minimum (a, b, c) = + min (min a b) c + +let triple_minimum_on f (a, b, c) = + let ab = if f a > f b then b else a in + if f ab > f c then c else ab + +(** Standard dynamic programming algorithm for Levenshtein edit scripts. This + works in two phases: + + 1. construct a {i cost matrix} of edit distances for each _prefix_ of the + two strings; + + 2. reconstruct an edit script from the cost matrix. + + The standard algorithm uses a cost matrix of size [n * m]. If we only care + about edit scripts up to some maximum length [b], the space and time + complexity can be reduced to [O(max (n, m) * b)] (assuming an [O(1)] + equality function). *) + +(** Phase 1: compute the cost matrix, bottom-up. *) +let construct_grid (type a) ~(equal : a -> a -> bool) (a : a Subarray.t) + (b : a Subarray.t) : int array array = + let grid_x_length = Subarray.length a + 1 + and grid_y_length = Subarray.length b + 1 in + let grid = Array.make_matrix grid_x_length grid_y_length 0 in + let get_grid (x, y) = grid.(x).(y) in + + for i = 0 to grid_x_length - 1 do + for j = 0 to grid_y_length - 1 do + let cost = + if min i j = 0 then max i j + else if equal (Subarray.get a (i - 1)) (Subarray.get b (j - 1)) then + get_grid (i - 1, j - 1) + else + triple_minimum + (get_grid (i - 1, j), get_grid (i, j - 1), get_grid (i - 1, j - 1)) + + 1 + in + grid.(i).(j) <- cost + done + done; + grid + +(** Phase 2: reconstruct the edit script from the cost matrix. *) +let reconstruct_edit_script a b grid = + let get_grid (x, y) = grid.(x).(y) in + + (* Reverse-engineer the direction and action towards a given cell *) + let backtrack (i, j) = + let p_sub = (i - 1, j - 1) and p_ins = (i, j - 1) and p_del = (i - 1, j) in + if Subarray.get a (i - 1) = Subarray.get b (j - 1) then (p_sub, []) + else + ( (`Substitute, get_grid p_sub + 1), + (`Insert, get_grid p_ins), + (`Delete, get_grid p_del) ) + |> triple_minimum_on snd + |> function + | `Substitute, _ -> (p_sub, [ substitute (fst p_sub) (snd p_sub) ]) + | `Insert, _ -> (p_ins, [ insert (fst p_ins) (snd p_ins) ]) + | `Delete, _ -> (p_del, [ delete (fst p_del) ]) + in + + let rec aux acc (x, y) = + match (x, y) with + | 0, 0 -> acc + | i, 0 -> List.init i delete @ acc + | 0, j -> List.init j (insert 0) @ acc + | pos -> + let next_coord, action = backtrack pos in + (aux [@tailcall]) (action @ acc) next_coord + in + let x_len, y_len = Array.length grid, Array.length grid.(0) in + aux [] (x_len - 1, y_len - 1) + +let ( >> ) f g x = g (f x) + +let levenshtein_dp ~equal (a_origin, b_origin) a b = + let grid = construct_grid ~equal a b in + reconstruct_edit_script a b grid + (* Map the command indices to the true coordinates *) + |> List.map (map_expected (( + ) a_origin) >> map_actual (( + ) b_origin)) + +(** Strip common prefixes and suffixes of the input sequences can be stripped + (in linear time) to avoid running the full dynamic programming algorithm on + them. *) +let strip_common_outer (type a) ~equal ((a : a Subarray.t), (b : a Subarray.t)) + = + let len_a = Subarray.length a and len_b = Subarray.length b in + + (* Shift the lower indices upwards until they point to non-equal values in the + arrays (or we scan an entire array). *) + let rec raise_lower_bound (i, j) = + match (i >= len_a, j >= len_b) with + | true, true -> `Equal + | false, false when equal (Subarray.get a i) (Subarray.get b j) -> + raise_lower_bound (i + 1, j + 1) + | a_oob, b_oob -> + let i = if a_oob then None else Some i in + let j = if b_oob then None else Some j in + `Non_equal (i, j) + in + match raise_lower_bound (0, 0) with + | `Equal -> `Equal (* The arrays are equal *) + (* One array is a prefix of the other *) + | `Non_equal (None, None) -> + `Non_equal ((0, 0), (Subarray.empty, Subarray.empty)) + | `Non_equal (None, Some j) -> + `Non_equal + ( (j, j), + (Subarray.empty, Subarray.truncate ~origin:j ~length:(len_b - j) b) ) + | `Non_equal (Some i, None) -> + `Non_equal + ( (i, i), + (Subarray.truncate ~origin:i ~length:(len_a - i) a, Subarray.empty) ) + | `Non_equal (Some i_origin, Some j_origin) -> ( + let rec lower_upper_bound (i, j) = + match (i < i_origin, j < j_origin) with + | true, true -> `Equal + | false, false when equal (Subarray.get a i) (Subarray.get b j) -> + lower_upper_bound (i - 1, j - 1) + | _ -> `Non_equal (i, j) + in + match lower_upper_bound (len_a - 1, len_b - 1) with + | `Equal -> + assert false (* We already decided that the arrays are non-equal *) + | `Non_equal (i_last, j_last) -> + `Non_equal + ( (i_origin, j_origin), + ( Subarray.truncate ~origin:i_origin + ~length:(i_last - i_origin + 1) + a, + Subarray.truncate ~origin:j_origin + ~length:(j_last - j_origin + 1) + b ) ) ) + +let levenshtein_script (type a container) (typ : (a, container) typ) + ~(equal : a -> a -> bool) (a : container) (b : container) : a Edit_script.t + = + let a, b = (Subarray.of_container typ a, Subarray.of_container typ b) in + match strip_common_outer ~equal (a, b) with + | `Equal -> [] + | `Non_equal (origin, (a, b)) -> levenshtein_dp ~equal origin a b diff --git a/test/comments/test_comments/diff.mli b/test/comments/test_comments/diff.mli new file mode 100644 index 0000000000..10455b2e01 --- /dev/null +++ b/test/comments/test_comments/diff.mli @@ -0,0 +1,36 @@ +type index := int + +type 'a command = + | Insert of { expected : index; actual : index } + (** Insert the element [actual.(actual)] at [expected.(expected)]. *) + | Delete of { expected : index } + (** Delete the element at [expected.(expected)]. *) + | Substitute of { expected : index; actual : index } + (** Set [expected.(expected)) := actual.(actual)]. *) + +val pp_command : Format.formatter -> 'a command -> unit + +type ('elt, 'container) typ = + | Array : ('a, 'a array) typ + | List : ('a, 'a list) typ + | String : (char, string) typ + +module Edit_script : sig + type 'a t = 'a command list + + val apply : + ('elt, 'container) typ -> + actual:(index -> 'elt) -> + 'elt t -> + 'container -> + 'elt list +end + +val levenshtein_script : + ('a, 'container) typ -> + equal:('a -> 'a -> bool) -> + 'container -> + 'container -> + 'a Edit_script.t +(** [O(n^2)]-space computation of Levenshtein edit scripts. Guarantees to be + [O(n)] time in the case that the containers are equal. *) diff --git a/test/comments/test_comments/dune b/test/comments/test_comments/dune new file mode 100644 index 0000000000..b4be3b4de0 --- /dev/null +++ b/test/comments/test_comments/dune @@ -0,0 +1,3 @@ +(executable + (name test_comments) + (libraries cmdliner ocamlformat_lib stdio)) diff --git a/test/comments/test_comments/test_comments.ml b/test/comments/test_comments/test_comments.ml new file mode 100644 index 0000000000..d5cab5230b --- /dev/null +++ b/test/comments/test_comments/test_comments.ml @@ -0,0 +1,239 @@ +open Ocamlformat_lib + +let rec lex_source acc lexbuf = + let tok = Lexer.token_with_comments lexbuf in + let tok = Token_latest.of_compiler_libs tok in + let acc = (tok, lexbuf.lex_start_p.pos_cnum) :: acc in + if tok = Migrate_ast.Parser.EOF then acc else lex_source acc lexbuf + +(** Return every tokens and the offset at which they start. *) +let lex_source source = + let lexbuf = Lexing.from_string source in + Lexer.init () ; + Lexer.skip_hash_bang lexbuf ; + List.rev (lex_source [] lexbuf) + +let insert_at_every_offsets insert toffs source ~f = + let ilen = String.length insert and slen = String.length source in + let buff = Bytes.create (ilen + slen) in + (* Keep room on the left for [insert]. *) + Bytes.blit_string source 0 buff ilen slen ; + let rec loop head_i = function + | hd :: tl -> + (* Move previous token from after [insert] to before (overriding it). *) + Bytes.blit buff (head_i + ilen) buff head_i (hd - head_i) ; + (* Blit again [insert]. *) + Bytes.blit_string insert 0 buff hd (String.length insert) ; + f hd (Bytes.unsafe_to_string buff) ; + loop hd tl + | [] -> () + in + loop 0 toffs + +let unlex_quoted_string ?op str del = + let op_sep = match (op, del) with Some _, Some _ -> " " | _ -> "" + and op = Option.value ~default:"" op + and del = Option.value ~default:"" del in + Format.sprintf "{%s%s%s|%s|%s}" op op_sep del str del + +let unlex_token = function + | Migrate_ast.Parser.AMPERAMPER -> "&&" + | AMPERSAND -> "&" + | AND -> "and" + | AS -> "as" + | ASSERT -> "assert" + | BACKQUOTE -> "`" + | BANG -> "!" + | BAR -> "|" + | BARBAR -> "||" + | BARRBRACKET -> "|]" + | BEGIN -> "begin" + | CHAR c -> Format.sprintf "'%c'" c + | CLASS -> "class" + | COLON -> ":" + | COLONCOLON -> "::" + | COLONEQUAL -> ":=" + | COLONGREATER -> ":>" + | COMMA -> "," + | COMMENT (cmt, _) -> Format.sprintf "(*%s*)" cmt + | CONSTRAINT -> "constraint" + | DOCSTRING d -> Format.sprintf "(**%s*)" (Docstrings.docstring_body d) + | DO -> "do" + | DONE -> "done" + | DOT -> "." + | DOTDOT -> ".." + | DOTOP op -> "." ^ op + | DOWNTO -> "downto" + | ELSE -> "else" + | END -> "end" + | EOF -> "" + | EOL -> "\n" + | EQUAL -> "=" + | EXCEPTION -> "exception" + | EXTERNAL -> "external" + | FALSE -> "false" + | FLOAT (r, None) | INT (r, None) -> r + | FLOAT (r, Some m) | INT (r, Some m) -> r ^ String.make 1 m + | FOR -> "for" + | FUNCTION -> "function" + | FUNCTOR -> "functor" + | FUN -> "fun" + | GREATER -> ">" + | GREATERRBRACE -> ">}" + | GREATERRBRACKET -> ">]" + | HASH -> "#" + | IF -> "if" + | INCLUDE -> "include" + | INHERIT -> "inherit" + | IN -> "in" + | INITIALIZER -> "initializer" + | LABEL n -> Format.sprintf "~%s:" n + | LAZY -> "lazy" + | LBRACE -> "{" + | LBRACELESS -> "{<" + | LBRACKET -> "[" + | LBRACKETAT -> "[@" + | LBRACKETATAT -> "[@@" + | LBRACKETATATAT -> "[@@@" + | LBRACKETBAR -> "[|" + | LBRACKETGREATER -> "[>" + | LBRACKETLESS -> "[<" + | LBRACKETPERCENT -> "[%" + | LBRACKETPERCENTPERCENT -> "[%%" + | LESS -> "<" + | LESSMINUS -> "<-" + | LET -> "let" + | LETOP op + |ANDOP op + |PREFIXOP op + |INFIXOP4 op + |INFIXOP3 op + |INFIXOP2 op + |INFIXOP1 op + |INFIXOP0 op + |HASHOP op -> + op + | LIDENT _1 | UIDENT _1 -> _1 + | LPAREN -> "(" + | MATCH -> "match" + | METHOD -> "method" + | MINUS -> "-" + | MINUSDOT -> "-." + | MINUSGREATER -> "->" + | MODULE -> "module" + | MUTABLE -> "mutable" + | NEW -> "new" + | NONREC -> "nonrec" + | OBJECT -> "object" + | OF -> "of" + | OPEN -> "open" + | OPTLABEL n -> Format.sprintf "?%s:" n + | OR -> "or" + | PERCENT -> "%" + | PLUS -> "+" + | PLUSDOT -> "+." + | PLUSEQ -> "+=" + | PRIVATE -> "private" + | QUESTION -> "?" + | QUOTE -> "\"" + | QUOTED_STRING_EXPR (id, _, str, _, del) -> + unlex_quoted_string ~op:("@" ^ id) str del + | QUOTED_STRING_ITEM (id, _, str, _, del) -> + unlex_quoted_string ~op:("%" ^ id) str del + | RBRACE -> "}" + | RBRACKET -> "]" + | REC -> "rec" + | RPAREN -> ")" + | SEMI -> ";" + | SEMISEMI -> ";;" + | SIG -> "sig" + | STAR -> "*" + | STRING (str, _, None) -> Format.sprintf "%S" str + | STRING (str, _, (Some _ as del)) -> unlex_quoted_string str del + | STRUCT -> "struct" + | THEN -> "then" + | TILDE -> "~" + | TO -> "to" + | TRUE -> "true" + | TRY -> "try" + | TYPE -> "type" + | UNDERSCORE -> "_" + | VAL -> "val" + | VIRTUAL -> "virtual" + | WHEN -> "when" + | WHILE -> "while" + | WITH -> "with" + +(** Can't use polymorphic equal because of location arguments in some tokens. *) +let token_equal a b = String.equal (unlex_token a) (unlex_token b) + +let print_hunk fmt (tokens, start, end_) = + (* Add a token of context before and after. *) + let start = max 0 (start - 1) + and end_ = min (Array.length tokens - 1) (end_ + 1) in + for i = start to end_ do + let tok_s = + match tokens.(i) with + (* Special case to be able to print on one line *) + | Migrate_ast.Parser.EOL -> "" + | tok -> unlex_token tok + in + Format.fprintf fmt " %s" tok_s + done + +let print_diff a b script = + let f (a_start, a_end, delta) d = + let i, delta = + match d with + | Diff.Insert {expected; _} -> (expected, delta + 1) + | Delete {expected} -> (expected, delta - 1) + | Substitute {expected; _} -> (expected, delta) + in + (min i a_start, max i a_end, delta) + in + let start, a_end, delta = List.fold_left f (max_int, min_int, 0) script in + Format.printf "before:%a@\n after:%a@\n" print_hunk (a, start, a_end) + print_hunk + (b, start, a_end + delta) + +let run file = + let conf = + { Conf.ocamlformat_profile with + break_cases= `Fit + ; margin= 77 + ; parse_docstrings= true + ; wrap_comments= true } + and opts = + Conf.{debug= false; margin_check= false; format_invalid_files= false} + in + let initial_source = Stdio.In_channel.read_all file in + let toffs = List.map snd (lex_source initial_source) in + let pr_ins = Format.printf "insertion offset = %d@\n%!" in + let insert = "(* toto *)" in + insert_at_every_offsets insert toffs initial_source + ~f:(fun insert_offset source -> + match + Translation_unit.parse_and_format Migrate_ast.Traverse.Structure + ~input_name:file ~source conf opts + with + | Ok formatted -> ( + let lex inp = Array.map fst (Array.of_list (lex_source inp)) in + let a = lex source and b = lex formatted in + match Diff.(levenshtein_script Array) ~equal:token_equal a b with + | [] -> () + | script -> pr_ins insert_offset ; print_diff a b script ) + | Error err -> + pr_ins insert_offset ; + Translation_unit.print_error ~debug:false ~quiet:true + ~input_name:file err ) + +open Cmdliner + +let cmd = + let a_file = Arg.(required & pos 0 (some file) None & info []) in + let doc = + "Repeatedly test ocamlformat with a comment before every tokens." + in + Term.(const run $ a_file, info ~doc "test_comments") + +let () = Term.exit (Term.eval cmd)