From a8db612b5e9b8a7baacaf532d7275bc445ecaba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Wed, 24 Apr 2024 12:13:03 +0200 Subject: [PATCH] index_format: using maps is more efficient when merging indexes --- src/analysis/occurrences.ml | 22 ++++++---- src/index-format/index_format.ml | 40 +++++++++---------- src/index-format/index_format.mli | 37 +++++++++++++++++ src/ocaml/utils/config.ml | 1 + src/ocaml/utils/config.mli | 2 + .../occurrences/project-wide/pwo-basic.t | 8 ++-- 6 files changed, 77 insertions(+), 33 deletions(-) create mode 100644 src/index-format/index_format.mli diff --git a/src/analysis/occurrences.ml b/src/analysis/occurrences.ml index a17d28481..782af6046 100644 --- a/src/analysis/occurrences.ml +++ b/src/analysis/occurrences.ml @@ -1,5 +1,5 @@ open Std -module LidSet = Index_format.LidSet +module Lid_set = Index_format.Lid_set let {Logger. log} = Logger.for_section "occurrences" @@ -30,6 +30,12 @@ let decl_of_path_or_lid env namespace path lid = let index_buffer_ ~scope ~current_buffer_path ~local_defs () = let {Logger. log} = Logger.for_section "index" in let defs = Hashtbl.create 64 in + let add tbl uid locs = + try + let locations = Hashtbl.find tbl uid in + Hashtbl.replace tbl uid (Lid_set.union locs locations) + with Not_found -> Hashtbl.add tbl uid locs + in let module Shape_reduce = Shape_reduce.Make (struct let fuel = 10 @@ -56,7 +62,7 @@ let index_buffer_ ~scope ~current_buffer_path ~local_defs () = | Some decl -> log ~title:"index_buffer" "Found declaration: %a" Logger.fmt (Fun.flip Location.print_loc decl.loc); - Index_format.(add defs decl.uid (LidSet.singleton lid)) + add defs decl.uid (Lid_set.singleton lid) end in if not_ghost lid then @@ -72,7 +78,7 @@ let index_buffer_ ~scope ~current_buffer_path ~local_defs () = (Longident.head lid.txt) Logger.fmt (Fun.flip Location.print_loc lid.loc) Logger.fmt (Fun.flip Shape.Uid.print uid); - Index_format.(add defs uid (LidSet.singleton lid)) + add defs uid (Lid_set.singleton lid) | Some uid, true -> log ~title:"index_buffer" "Shape is approximative, found uid: %a" Logger.fmt (Fun.flip Shape.Uid.print uid); @@ -238,8 +244,8 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = try let locs = List.filter_map config.merlin.index_files ~f:(fun file -> let external_index = Index_cache.read file in - Hashtbl.find_opt external_index.defs def_uid - |> Option.map ~f:(fun locs -> LidSet.filter (fun {loc; _} -> + Index_format.Uid_map.find_opt def_uid external_index.defs + |> Option.map ~f:(fun locs -> Lid_set.filter (fun {loc; _} -> (* We ignore external results that concern the current buffer *) let fname = loc.Location.loc_start.Lexing.pos_fname in if String.equal fname current_buffer_path then false @@ -264,11 +270,11 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = | Some buffer_locs -> buffer_locs :: external_locs | None -> external_locs in - List.fold_left ~init:LidSet.empty ~f:LidSet.union all_locs + List.fold_left ~init:Lid_set.empty ~f:Lid_set.union all_locs in let locs = - log ~title:"occurrences" "Found %i locs" (LidSet.cardinal locs); - LidSet.elements locs + log ~title:"occurrences" "Found %i locs" (Lid_set.cardinal locs); + Lid_set.elements locs |> List.filter_map ~f:(fun {Location.txt; loc} -> log ~title:"occurrences" "Found occ: %s %a" (Longident.head txt) Logger.fmt (Fun.flip Location.print_loc loc); diff --git a/src/index-format/index_format.ml b/src/index-format/index_format.ml index 98da57413..12606cbb2 100644 --- a/src/index-format/index_format.ml +++ b/src/index-format/index_format.ml @@ -15,30 +15,29 @@ module Lid : Set.OrderedType with type t = Longident.t Location.loc = struct | n -> n end -module LidSet = Set.Make (Lid) +module Lid_set = Set.Make (Lid) +module Uid_map = Shape.Uid.Map module Stats = Map.Make (String) -(** [add tbl uid locs] adds a binding of [uid] to the locations [locs]. If this key is +(** [add map uid locs] adds a binding of [uid] to the locations [locs]. If this key is already present the locations are merged. *) -let add tbl uid locs = - try - let locations = Hashtbl.find tbl uid in - Hashtbl.replace tbl uid (LidSet.union locs locations) - with Not_found -> Hashtbl.add tbl uid locs +let add map uid locs = Uid_map.update uid (function + | None -> Some locs + | Some locs' -> Some (Lid_set.union locs' locs)) + map + +type stat = { mtime : float; size : int; source_digest : string option } -type stat = { mtime : float; size : int; source_digest: string option } type index = { - defs : (Shape.Uid.t, LidSet.t) Hashtbl.t; - approximated : (Shape.Uid.t, LidSet.t) Hashtbl.t; - load_path : Load_path.paths; + defs : Lid_set.t Uid_map.t; + approximated : Lid_set.t Uid_map.t; cu_shape : (string, Shape.t) Hashtbl.t; stats : stat Stats.t; } -let pp_partials (fmt : Format.formatter) - (partials : (Shape.Uid.t, LidSet.t) Hashtbl.t) = +let pp_partials (fmt : Format.formatter) (partials : Lid_set.t Uid_map.t) = Format.fprintf fmt "{@["; - Hashtbl.iter + Uid_map.iter (fun uid locs -> Format.fprintf fmt "@[uid: %a; locs:@ @[%a@]@]@;" Shape.Uid.print uid @@ -48,13 +47,13 @@ let pp_partials (fmt : Format.formatter) Format.fprintf fmt "%S: %a" (try Longident.flatten txt |> String.concat "." with _ -> "") Location.print_loc loc)) - (LidSet.elements locs)) + (Lid_set.elements locs)) partials; Format.fprintf fmt "@]}" let pp (fmt : Format.formatter) pl = - Format.fprintf fmt "%i uids:@ {@[" (Hashtbl.length pl.defs); - Hashtbl.iter + Format.fprintf fmt "%i uids:@ {@[" (Uid_map.cardinal pl.defs); + Uid_map.iter (fun uid locs -> Format.fprintf fmt "@[uid: %a; locs:@ @[%a@]@]@;" Shape.Uid.print uid @@ -64,19 +63,18 @@ let pp (fmt : Format.formatter) pl = Format.fprintf fmt "%S: %a" (try Longident.flatten txt |> String.concat "." with _ -> "") Location.print_loc loc)) - (LidSet.elements locs)) + (Lid_set.elements locs)) pl.defs; Format.fprintf fmt "@]},@ "; Format.fprintf fmt "%i approx shapes:@ @[%a@],@ " - (Hashtbl.length pl.approximated) + (Uid_map.cardinal pl.approximated) pp_partials pl.approximated; Format.fprintf fmt "and shapes for CUS %s.@ " (String.concat ";@," (Hashtbl.to_seq_keys pl.cu_shape |> List.of_seq)) let ext = "ocaml-index" -(* [magic_number] Must be the same lenght as cmt's magic numbers *) -let magic_number = "Merl2023I001" +let magic_number = Config.index_magic_number let write ~file index = Misc.output_to_file_via_temporary ~mode:[ Open_binary ] file diff --git a/src/index-format/index_format.mli b/src/index-format/index_format.mli new file mode 100644 index 000000000..3b3866eb6 --- /dev/null +++ b/src/index-format/index_format.mli @@ -0,0 +1,37 @@ +exception Not_an_index of string + +val ext : string +val magic_number : string + +module Lid : Set.OrderedType with type t = Longident.t Location.loc +module Lid_set : Set.S with type elt = Lid.t +module Stats : Map.S with type key = String.t +module Uid_map = Shape.Uid.Map + +type stat = { + mtime : float; + size : int; + source_digest : string option +} + +type index = { + defs : Lid_set.t Uid_map.t; + approximated : Lid_set.t Uid_map.t; + cu_shape : (string, Shape.t) Hashtbl.t; + stats : stat Stats.t; +} + +val pp : Format.formatter -> index -> unit + +(** [add tbl uid locs] adds a binding of [uid] to the locations [locs]. If this + key is already present the locations are merged. *) +val add : Lid_set.t Uid_map.t -> Shape.Uid.t -> Lid_set.t -> Lid_set.t Uid_map.t + +type file_content = + | Cmt of Cmt_format.cmt_infos + | Index of index + | Unknown + +val write : file:string -> index -> unit +val read : file:string -> file_content +val read_exn : file:string -> index diff --git a/src/ocaml/utils/config.ml b/src/ocaml/utils/config.ml index 4c2a9566f..0a2c82eec 100644 --- a/src/ocaml/utils/config.ml +++ b/src/ocaml/utils/config.ml @@ -49,6 +49,7 @@ and ast_impl_magic_number = "Caml1999M034" and ast_intf_magic_number = "Caml1999N034" and cmxs_magic_number = "Caml1999D034" and cmt_magic_number = "Caml1999T034" +and index_magic_number = "Merl2023I001" let interface_suffix = ref ".mli" diff --git a/src/ocaml/utils/config.mli b/src/ocaml/utils/config.mli index 26323f87f..df34aee28 100644 --- a/src/ocaml/utils/config.mli +++ b/src/ocaml/utils/config.mli @@ -43,6 +43,8 @@ val cmxs_magic_number: string (* Magic number for dynamically-loadable plugins *) val cmt_magic_number: string (* Magic number for compiled interface files *) +val index_magic_number: string + (* Magic number for index files *) val max_tag: int (* Biggest tag that can be stored in the header of a regular block. *) diff --git a/tests/test-dirs/occurrences/project-wide/pwo-basic.t b/tests/test-dirs/occurrences/project-wide/pwo-basic.t index dcc5b295f..5fcd73f6e 100644 --- a/tests/test-dirs/occurrences/project-wide/pwo-basic.t +++ b/tests/test-dirs/occurrences/project-wide/pwo-basic.t @@ -12,13 +12,13 @@ $ ocaml-index aggregate main.cmt lib.cmt $ ocaml-index dump project.ocaml-index 2 uids: - {uid: Stdlib.312; locs: - "print_string": File "lib.ml", line 2, characters 9-21; - "print_string": File "main.ml", line 1, characters 9-21 - uid: Lib.0; locs: + {uid: Lib.0; locs: "foo": File "lib.ml", line 1, characters 4-7; "foo": File "lib.ml", line 2, characters 22-25; "Lib.foo": File "main.ml", line 1, characters 22-29 + uid: Stdlib.312; locs: + "print_string": File "lib.ml", line 2, characters 9-21; + "print_string": File "main.ml", line 1, characters 9-21 }, 0 approx shapes: {}, and shapes for CUS . $ $MERLIN single occurrences -scope project -identifier-at 1:28 \