From 93a528b117f292100a014173fff748e4678750ee Mon Sep 17 00:00:00 2001 From: Robin Bate Boerop Date: Thu, 7 May 2026 15:35:07 -0700 Subject: [PATCH] perf: cache the per-(dep_m, ml_kind, cm_kind, is_consumer) raw-refs builder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In [module_compilation.ml]'s [lib_deps_for_module], each consumer module iterates over [m :: trans_deps] and calls [read_dep_m_raw] per dep. Sibling consumers in the same stanza share large parts of [trans_deps] but used to reconstruct fresh [Action_builder.t] trees per call — the inner [ocamldep] result is shared via [Ocamldep]'s path-keyed cache, but the wrapping [need_impl_deps_of] / [Module_name.Set.union] logic was rebuilt N×K times per stanza. Add a per-cctx [Raw_refs.t = (Key.t, _ Action_builder.t) Table.t] in [Compilation_context], keyed on (obj_name, ml_kind, cm_kind, is_consumer). [Table.find] short-circuits before allocating, mirroring the pattern used by [Ocamldep.read_immediate_deps_words]'s top-level cache. Two prior attempts at this memoisation failed: * Apr 21 (`e1b638664`, reverted): recursive memo across direct module deps; infinite loop on module-level cycles (`alias/check-alias/ocamldep-cycles.t`). * Apr 25 (`3a70bfaa0`, dropped): seen-set shape; OOM-killed CI because [Action_builder.memoize] dedupes evaluation by string key but does NOT dedupe construction. With N modules × M consumers, each call still allocated a fresh [Action_builder.t] tree before the memoize wrapper saw the key. This third attempt avoids both failure modes: the [Table.find] short-circuit prevents construction-time blowup, and the cache is intra-stanza only (the cross-library walk has its own [seen]-set termination), so module-level cycles are not visited by this loop. Addresses art-w's review concern at https://github.com/ocaml/dune/pull/14116/files#r3116025155 Signed-off-by: Robin Bate Boerop --- src/dune_rules/compilation_context.ml | 74 ++++++++++++++++++++++++++ src/dune_rules/compilation_context.mli | 15 ++++++ src/dune_rules/module_compilation.ml | 47 ++++++++++------ 3 files changed, 121 insertions(+), 15 deletions(-) diff --git a/src/dune_rules/compilation_context.ml b/src/dune_rules/compilation_context.ml index a4744c87a5e..4ce607f9a35 100644 --- a/src/dune_rules/compilation_context.ml +++ b/src/dune_rules/compilation_context.ml @@ -37,6 +37,66 @@ end kept_libs). Two consumer modules with the same kept-libs share one [Args.t]. Cache is per-cctx, so regenerating the cctx (e.g. on a [(libraries ...)] edit) discards it — the requires-split need not be in the key. *) +(* Cache the raw-refs Action_builder built for each [(dep_m, + ml_kind, cm_kind, is_consumer)] tuple within a single cctx. + Sibling consumers iterate over overlapping [trans_deps] sets; + without this cache each call reconstructs a fresh + [Action_builder.t] tree (the inner [ocamldep] result is shared, + but the wrapping per-module logic is rebuilt N times per + stanza). [Table.find] short-circuits before allocating. *) +module Raw_refs = struct + module Key = struct + type t = + { obj_name : Module_name.Unique.t + ; ml_kind : Ml_kind.t + ; cm_kind : Lib_mode.Cm_kind.t + ; is_consumer : bool + } + + let cm_kind_tag : Lib_mode.Cm_kind.t -> int = function + | Ocaml Cmi -> 0 + | Ocaml Cmo -> 1 + | Ocaml Cmx -> 2 + | Melange Cmi -> 3 + | Melange Cmj -> 4 + ;; + + let ml_kind_tag : Ml_kind.t -> int = function + | Intf -> 0 + | Impl -> 1 + ;; + + let equal a b = + Module_name.Unique.equal a.obj_name b.obj_name + && ml_kind_tag a.ml_kind = ml_kind_tag b.ml_kind + && cm_kind_tag a.cm_kind = cm_kind_tag b.cm_kind + && Bool.equal a.is_consumer b.is_consumer + ;; + + let hash { obj_name; ml_kind; cm_kind; is_consumer } = + Poly.hash + ( Module_name.Unique.to_string obj_name + , ml_kind_tag ml_kind + , cm_kind_tag cm_kind + , is_consumer ) + ;; + + let to_dyn { obj_name; ml_kind; cm_kind; is_consumer } = + let open Dyn in + record + [ "obj_name", Module_name.Unique.to_dyn obj_name + ; "ml_kind", string (Ml_kind.to_string ml_kind) + ; "cm_kind", Lib_mode.Cm_kind.to_dyn cm_kind + ; "is_consumer", bool is_consumer + ] + ;; + end + + type t = (Key.t, Module_name.Set.t Action_builder.t) Table.t + + let create () : t = Table.create (module Key) 64 +end + module Filtered_includes = struct module Key = struct type t = @@ -113,6 +173,7 @@ type t = ; ocaml : Ocaml_toolchain.t ; for_ : Compilation_mode.t ; filtered_includes : Filtered_includes.t + ; raw_refs : Raw_refs.t } let loc t = t.loc @@ -377,11 +438,24 @@ let create ; instances ; for_ ; filtered_includes = Filtered_includes.create () + ; raw_refs = Raw_refs.create () } ;; let for_ t = t.for_ +let cached_raw_refs t ~dep_m ~ml_kind ~cm_kind ~is_consumer compute = + let cache_key = + { Raw_refs.Key.obj_name = Module.obj_name dep_m; ml_kind; cm_kind; is_consumer } + in + match Table.find t.raw_refs cache_key with + | Some builder -> builder + | None -> + let builder = compute () in + Table.set t.raw_refs cache_key builder; + builder +;; + let filtered_include_flags t ~cm_kind ~kept_libs = let lib_mode = Lib_mode.of_cm_kind cm_kind in let cache_key = diff --git a/src/dune_rules/compilation_context.mli b/src/dune_rules/compilation_context.mli index fa1dc4d890d..c32149bbea1 100644 --- a/src/dune_rules/compilation_context.mli +++ b/src/dune_rules/compilation_context.mli @@ -64,6 +64,21 @@ val requires_compile : t -> Lib.t list Resolve.Memo.t val parameters : t -> Module_name.t list Resolve.Memo.t val includes : t -> Command.Args.without_targets Command.Args.t Lib_mode.Cm_kind.Map.t +(** Memoise the raw-refs [Action_builder.t] computed for each + [(dep_m, ml_kind, cm_kind, is_consumer)] tuple within this + cctx. [compute ()] is invoked only on cache miss; subsequent + callers with the same key get the cached builder back. The + cache short-circuits before allocating, so siblings sharing + [trans_deps] don't redo construction. *) +val cached_raw_refs + : t + -> dep_m:Module.t + -> ml_kind:Ml_kind.t + -> cm_kind:Lib_mode.Cm_kind.t + -> is_consumer:bool + -> (unit -> Module_name.Set.t Action_builder.t) + -> Module_name.Set.t Action_builder.t + (** Include flags ([-I]/[-H]) filtered to a [kept_libs] subset of the cctx's [requires_compile] / [requires_hidden] (direct + hidden split preserved). Cached per diff --git a/src/dune_rules/module_compilation.ml b/src/dune_rules/module_compilation.ml index 1fd079f7a01..0dac739a81a 100644 --- a/src/dune_rules/module_compilation.ml +++ b/src/dune_rules/module_compilation.ml @@ -122,21 +122,38 @@ let lib_deps_for_module ~cctx ~obj_dir ~for_ ~dep_graph ~opaque ~cm_kind ~ml_kin | Ocaml (Cmi | Cmo) | Melange _ -> false in let read_dep_m_raw dep_m ~is_consumer = - let* impl_deps = - if need_impl_deps_of dep_m ~is_consumer - then - Ocamldep.read_immediate_deps_raw_of - ~sandbox - ~sctx - ~obj_dir - ~ml_kind:Impl - dep_m - else Action_builder.return Module_name.Set.empty - in - let+ intf_deps = - Ocamldep.read_immediate_deps_raw_of ~sandbox ~sctx ~obj_dir ~ml_kind:Intf dep_m - in - Module_name.Set.union impl_deps intf_deps + (* For trans_deps ([is_consumer = false]) [need_impl_deps_of] + does not read [ml_kind], so the cached builder is the same + for [Impl] and [Intf] passes. Normalising keeps the cache + shareable across both passes. *) + let cache_ml_kind = if is_consumer then ml_kind else Ml_kind.Impl in + Compilation_context.cached_raw_refs + cctx + ~dep_m + ~ml_kind:cache_ml_kind + ~cm_kind + ~is_consumer + (fun () -> + let* impl_deps = + if need_impl_deps_of dep_m ~is_consumer + then + Ocamldep.read_immediate_deps_raw_of + ~sandbox + ~sctx + ~obj_dir + ~ml_kind:Impl + dep_m + else Action_builder.return Module_name.Set.empty + in + let+ intf_deps = + Ocamldep.read_immediate_deps_raw_of + ~sandbox + ~sctx + ~obj_dir + ~ml_kind:Intf + dep_m + in + Module_name.Set.union impl_deps intf_deps) in let* m_raw = read_dep_m_raw m ~is_consumer:true in let* trans_raw =