diff --git a/src/Lean/Meta/Closure.lean b/src/Lean/Meta/Closure.lean index d5c6d4bf86de..6ddd7dfc43ae 100644 --- a/src/Lean/Meta/Closure.lean +++ b/src/Lean/Meta/Closure.lean @@ -456,10 +456,15 @@ def mkAuxDefinitionFor (name : Name) (value : Expr) (zetaDelta : Bool := false) -/ def mkAuxTheorem (type : Expr) (value : Expr) (zetaDelta : Bool := false) (kind? : Option Name := none) (cache := true) : MetaM Expr := do let result ← Closure.mkValueTypeClosure type value zetaDelta - let name ← mkAuxLemma (kind? := kind?) (cache := cache) result.levelParams.toList result.type result.value + let (resType, resValue) ← profileitM Exception "share common exprs" (← getOptions) do + withTraceNode `Meta.Closure.maxSharing (fun _ => return m!"share common exprs") do + let es := ShareCommon.shareCommon' #[result.type, result.value] + return (es[0]!, es[1]!) + let name ← mkAuxLemma (kind? := kind?) (cache := cache) result.levelParams.toList resType resValue return mkAppN (mkConst name result.levelArgs.toList) result.exprArgs builtin_initialize registerTraceClass `Meta.Closure + registerTraceClass `Meta.Closure.maxSharing end Lean.Meta diff --git a/src/kernel/environment.cpp b/src/kernel/environment.cpp index 8c5d2b1a79b3..e322800895e7 100644 --- a/src/kernel/environment.cpp +++ b/src/kernel/environment.cpp @@ -9,7 +9,6 @@ Author: Leonardo de Moura #include #include "runtime/sstream.h" #include "runtime/thread.h" -#include "runtime/sharecommon.h" #include "util/map_foreach.h" #include "util/io.h" #include "kernel/environment.h" @@ -194,15 +193,12 @@ environment environment::add_theorem(declaration const & d, bool check) const { theorem_val const & v = d.to_theorem_val(); if (check) { type_checker checker(*this, diag.get()); - sharecommon_persistent_fn share; - expr val(share(v.get_value().raw())); - expr type(share(v.get_type().raw())); check_constant_val(*this, v.to_constant_val(), checker); - if (!checker.is_prop(type)) - throw theorem_type_is_not_prop(*this, v.get_name(), type); - check_no_metavar_no_fvar(*this, v.get_name(), val); - expr val_type = checker.check(val, v.get_lparams()); - if (!checker.is_def_eq(val_type, type)) + if (!checker.is_prop(v.get_type())) + throw theorem_type_is_not_prop(*this, v.get_name(), v.get_type()); + check_no_metavar_no_fvar(*this, v.get_name(), v.get_value()); + expr val_type = checker.check(v.get_value(), v.get_lparams()); + if (!checker.is_def_eq(val_type, v.get_type())) throw definition_type_mismatch_exception(*this, d, val_type); } return diag.update(add(constant_info(d))); diff --git a/src/kernel/expr.cpp b/src/kernel/expr.cpp index f87e72aaee16..495df62cd745 100644 --- a/src/kernel/expr.cpp +++ b/src/kernel/expr.cpp @@ -100,7 +100,7 @@ extern "C" uint8 lean_expr_has_level_param(obj_arg e); bool has_univ_param(expr const & e) { return lean_expr_has_level_param(e.to_obj_arg()); } extern "C" unsigned lean_expr_loose_bvar_range(object * e); -unsigned get_loose_bvar_range(expr const & e) { return lean_expr_loose_bvar_range(e.to_obj_arg()); } +unsigned loose_bvar_range_core(expr const & e) { return lean_expr_loose_bvar_range(e.to_obj_arg()); } extern "C" LEAN_EXPORT uint64_t lean_expr_mk_data(uint64_t hash, object * bvarRange, uint32_t approxDepth, uint8_t hasFVar, uint8_t hasExprMVar, uint8_t hasLevelMVar, uint8_t hasLevelParam) { if (approxDepth > 255) approxDepth = 255; @@ -418,12 +418,13 @@ expr lower_loose_bvars(expr const & e, unsigned s, unsigned d) { if (d == 0 || s >= get_loose_bvar_range(e)) return e; lean_assert(s >= d); - return replace(e, [=](expr const & e, unsigned offset) -> optional { + return replace(e, + [=](expr const & e, unsigned offset) { + unsigned s1 = s + offset; + return s1 < s /* overflow */ || s1 >= get_loose_bvar_range(e); + }, + [=](expr const & e, unsigned offset) -> optional { unsigned s1 = s + offset; - if (s1 < s) - return some_expr(e); // overflow, vidx can't be >= max unsigned - if (s1 >= get_loose_bvar_range(e)) - return some_expr(e); // expression e does not contain bound variables with idx >= s1 if (is_bvar(e) && bvar_idx(e) >= s1) { lean_assert(bvar_idx(e) >= offset + d); return some_expr(mk_bvar(bvar_idx(e) - nat(d))); @@ -448,12 +449,12 @@ extern "C" LEAN_EXPORT object * lean_expr_lower_loose_bvars(b_obj_arg e, b_obj_a expr lift_loose_bvars(expr const & e, unsigned s, unsigned d) { if (d == 0 || s >= get_loose_bvar_range(e)) return e; - return replace(e, [=](expr const & e, unsigned offset) -> optional { + return replace(e, + [=](expr const & e, unsigned offset) { unsigned s1 = s + offset; - if (s1 < s) - return some_expr(e); // overflow, vidx can't be >= max unsigned - if (s1 >= get_loose_bvar_range(e)) - return some_expr(e); // expression e does not contain bound variables with idx >= s1 + return s1 < s /* overflow */ || s1 >= get_loose_bvar_range(e); + }, + [=](expr const & e, unsigned offset) -> optional { if (is_var(e) && bvar_idx(e) >= s + offset) { return some_expr(mk_bvar(bvar_idx(e) + nat(d))); } else { diff --git a/src/kernel/expr.h b/src/kernel/expr.h index bcf83b97aa4b..69ef27317166 100644 --- a/src/kernel/expr.h +++ b/src/kernel/expr.h @@ -155,7 +155,13 @@ inline bool has_fvar(expr const & e) { return r; } bool has_univ_param(expr const & e); -unsigned get_loose_bvar_range(expr const & e); +/* This is the implementation in Lean */ +unsigned loose_bvar_range_core(expr const & e); +inline unsigned get_loose_bvar_range(expr const & e) { + unsigned r = static_cast(get_data(e) >> 44); + lean_assert(r == loose_bvar_range_core(e)); + return r; +} struct expr_hash { unsigned operator()(expr const & e) const { return hash(e); } }; struct expr_pair_hash { diff --git a/src/kernel/instantiate.cpp b/src/kernel/instantiate.cpp index b845eda9129c..f04e1d4a1724 100644 --- a/src/kernel/instantiate.cpp +++ b/src/kernel/instantiate.cpp @@ -15,13 +15,14 @@ namespace lean { expr instantiate(expr const & a, unsigned s, unsigned n, expr const * subst) { if (s >= get_loose_bvar_range(a) || n == 0) return a; - return replace(a, [=](expr const & m, unsigned offset) -> optional { + return replace(a, + [=](expr const & m, unsigned offset) { unsigned s1 = s + offset; - if (s1 < s) - return some_expr(m); // overflow, vidx can't be >= max unsigned - if (s1 >= get_loose_bvar_range(m)) - return some_expr(m); // expression m does not contain loose bound variables with idx >= s1 + return s1 < s /* overflow */ || s1 >= get_loose_bvar_range(m); + }, + [=](expr const & m, unsigned offset) -> optional { if (is_bvar(m)) { + unsigned s1 = s + offset; nat const & vidx = bvar_idx(m); if (vidx >= s1) { unsigned h = s1 + n; @@ -58,9 +59,11 @@ static object * lean_expr_instantiate_core(b_obj_arg a0, size_t n, object** subs lean_inc(a0); return a0; } - expr r = replace(a, [=](expr const & m, unsigned offset) -> optional { - if (offset >= get_loose_bvar_range(m)) - return some_expr(m); // expression m does not contain loose bound variables with idx >= offset + expr r = replace(a, + [=](expr const & m, unsigned offset) { + return offset >= get_loose_bvar_range(m); + }, + [=](expr const & m, unsigned offset) -> optional { if (is_bvar(m)) { nat const & vidx = bvar_idx(m); if (vidx >= offset) { @@ -99,9 +102,11 @@ extern "C" LEAN_EXPORT object * lean_expr_instantiate_range(b_obj_arg a, b_obj_a expr instantiate_rev(expr const & a, unsigned n, expr const * subst) { if (!has_loose_bvars(a)) return a; - return replace(a, [=](expr const & m, unsigned offset) -> optional { - if (offset >= get_loose_bvar_range(m)) - return some_expr(m); // expression m does not contain loose bound variables with idx >= offset + return replace(a, + [=](expr const & m, unsigned offset) { + return offset >= get_loose_bvar_range(m); + }, + [=](expr const & m, unsigned offset) -> optional { if (is_bvar(m)) { nat const & vidx = bvar_idx(m); if (vidx >= offset) { @@ -123,9 +128,11 @@ static object * lean_expr_instantiate_rev_core(object * a0, size_t n, object ** lean_inc(a0); return a0; } - expr r = replace(a, [=](expr const & m, unsigned offset) -> optional { - if (offset >= get_loose_bvar_range(m)) - return some_expr(m); // expression m does not contain loose bound variables with idx >= offset + expr r = replace(a, + [=](expr const & m, unsigned offset) { + return offset >= get_loose_bvar_range(m); + }, + [=](expr const & m, unsigned offset) -> optional { if (is_bvar(m)) { nat const & vidx = bvar_idx(m); if (vidx >= offset) { diff --git a/src/kernel/replace_fn.cpp b/src/kernel/replace_fn.cpp index 08e6b17d85cd..dcaa98be23c3 100644 --- a/src/kernel/replace_fn.cpp +++ b/src/kernel/replace_fn.cpp @@ -13,75 +13,22 @@ Author: Leonardo de Moura namespace lean { -class replace_rec_fn { - struct key_hasher { - std::size_t operator()(std::pair const & p) const { - return hash((size_t)p.first >> 3, p.second); - } - }; - lean::unordered_map, expr, key_hasher> m_cache; - std::function(expr const &, unsigned)> m_f; - bool m_use_cache; - - expr save_result(expr const & e, unsigned offset, expr r, bool shared) { - if (shared) - m_cache.insert(mk_pair(mk_pair(e.raw(), offset), r)); - return r; - } - - expr apply(expr const & e, unsigned offset) { - bool shared = false; - if (m_use_cache && !is_likely_unshared(e)) { - auto it = m_cache.find(mk_pair(e.raw(), offset)); - if (it != m_cache.end()) - return it->second; - shared = true; - } - if (optional r = m_f(e, offset)) { - return save_result(e, offset, std::move(*r), shared); - } else { - switch (e.kind()) { - case expr_kind::Const: case expr_kind::Sort: - case expr_kind::BVar: case expr_kind::Lit: - case expr_kind::MVar: case expr_kind::FVar: - return save_result(e, offset, e, shared); - case expr_kind::MData: { - expr new_e = apply(mdata_expr(e), offset); - return save_result(e, offset, update_mdata(e, new_e), shared); - } - case expr_kind::Proj: { - expr new_e = apply(proj_expr(e), offset); - return save_result(e, offset, update_proj(e, new_e), shared); - } - case expr_kind::App: { - expr new_f = apply(app_fn(e), offset); - expr new_a = apply(app_arg(e), offset); - return save_result(e, offset, update_app(e, new_f, new_a), shared); - } - case expr_kind::Pi: case expr_kind::Lambda: { - expr new_d = apply(binding_domain(e), offset); - expr new_b = apply(binding_body(e), offset+1); - return save_result(e, offset, update_binding(e, new_d, new_b), shared); - } - case expr_kind::Let: { - expr new_t = apply(let_type(e), offset); - expr new_v = apply(let_value(e), offset); - expr new_b = apply(let_body(e), offset+1); - return save_result(e, offset, update_let(e, new_t, new_v, new_b), shared); - } - } - lean_unreachable(); - } - } -public: - template - replace_rec_fn(F const & f, bool use_cache):m_f(f), m_use_cache(use_cache) {} - - expr operator()(expr const & e) { return apply(e, 0); } -}; - +// Out-of-line `std::function`-typed overloads, declared in replace_fn.h. Defer +// to the templated implementation in the header, instantiated once per +// `std::function` shape. Hot callers should bypass these and use the templated +// `replace` (or `replace`) directly to avoid the per-call indirect +// dispatch through `std::function`. expr replace(expr const & e, std::function(expr const &, unsigned)> const & f, bool use_cache) { - return replace_rec_fn(f, use_cache)(e); + return replace_rec_fn(expr const &, unsigned)>>(f, use_cache)(e); +} + +expr replace(expr const & e, + std::function const & skip, + std::function(expr const &, unsigned)> const & f, + bool use_cache) { + using F = std::function(expr const &, unsigned)>; + using S = std::function; + return replace_rec_fn(skip, f, use_cache)(e); } class replace_fn { diff --git a/src/kernel/replace_fn.h b/src/kernel/replace_fn.h index ed2d8f9d39a0..83f6d20e53b8 100644 --- a/src/kernel/replace_fn.h +++ b/src/kernel/replace_fn.h @@ -6,11 +6,180 @@ Author: Leonardo de Moura */ #pragma once #include +#include #include "runtime/interrupt.h" #include "kernel/expr.h" #include "kernel/expr_maps.h" +#include "util/alloc.h" namespace lean { + +// Small-buffer cache for `replace_rec_fn`. The histogram of cache sizes during +// a typical run is heavily skewed toward small caches: ~87% of instances hold +// ≤15 entries and ~99% hold ≤63, with a long thin tail up to a few thousand. +// A linear scan over a stack-resident array beats any hash map at that scale, +// so we keep the first `INLINE_CAP` entries inline and only fall back to a +// real hash map (lazily allocated) for the rare large traversal. +class replace_cache { + struct key_hasher { + std::size_t operator()(std::pair const & p) const { + return hash((size_t)p.first >> 3, p.second); + } + }; + using key_t = std::pair; + using map_t = lean::unordered_map; + static constexpr unsigned INLINE_CAP = 16; + struct entry { key_t k; expr v; }; + // Uninitialized storage; only entries [0, m_size) are constructed. This + // avoids paying for `INLINE_CAP` default-constructed `expr`s on every + // `replace_rec_fn` instance, which matters because the typical traversal + // creates a fresh cache holding only a handful of entries. + alignas(entry) std::byte m_inline_storage[sizeof(entry) * INLINE_CAP]; + unsigned m_size = 0; + std::unique_ptr m_overflow; + + entry * inline_at(unsigned i) { + return std::launder(reinterpret_cast(m_inline_storage) + i); + } + entry const * inline_at(unsigned i) const { + return std::launder(reinterpret_cast(m_inline_storage) + i); + } +public: + replace_cache() = default; + replace_cache(replace_cache const &) = delete; + replace_cache & operator=(replace_cache const &) = delete; + ~replace_cache() { + for (unsigned i = 0; i < m_size; ++i) inline_at(i)->~entry(); + } + + expr const * find(key_t const & k) const { + for (unsigned i = 0; i < m_size; ++i) { + entry const * e = inline_at(i); + if (e->k == k) return &e->v; + } + if (m_overflow) { + auto it = m_overflow->find(k); + if (it != m_overflow->end()) return &it->second; + } + return nullptr; + } + void insert(key_t const & k, expr const & v) { + if (!m_overflow) { + if (m_size < INLINE_CAP) { + new (inline_at(m_size)) entry{k, v}; + ++m_size; + return; + } + // Promote: drain inline entries into the overflow map and reset + // `m_size` so future finds skip the (now-empty) inline scan and + // future inserts go straight to overflow. Without this drain, + // workloads with a long tail of large caches (e.g. the `big_do` + // elab benchmark, where ~3% of caches hold 1k-8k entries but + // contribute essentially all the lookup work) waste 16 stale + // comparisons on every post-promotion lookup. + m_overflow.reset(new map_t()); + m_overflow->reserve(INLINE_CAP * 4); + for (unsigned i = 0; i < m_size; ++i) { + entry * e = inline_at(i); + m_overflow->emplace(e->k, std::move(e->v)); + e->~entry(); + } + m_size = 0; + } + m_overflow->insert(mk_pair(k, v)); + } +}; + +/// Default skip predicate: never skips. Used by `replace_rec_fn` callers that +/// don't want a pre-cache bail-out. +struct replace_no_skip { + bool operator()(expr const &, unsigned) const { return false; } +}; + +/** + \brief Class-template implementation of `replace`. Stores the user functor by + value (not via `std::function`) so calls into it inline at every recursion + step. Hot callers should instantiate this directly via `replace(...)`; + the legacy `std::function`-typed `replace` overload below is a thin wrapper + for cold/external callers that need type erasure. + + The functor `F` must be callable as `F(expr const &, unsigned) -> optional`. + The optional `Skip` predicate is invoked *before* the cache lookup; if it + returns true the input is returned unchanged with no cache touch. Use this + for cheap "this entire subtree is unaffected" guards (e.g. a memoized + loose-bvar-range check) — it both avoids the cache lookup and avoids + polluting the cache with identity entries, without the pitfall of an + `r == e` check in `save_result` that would also drop the *recursion-identity* + case and blow up exponentially on workloads like `Init.WFExtrinsicFix`. +*/ +template +class replace_rec_fn { + replace_cache m_cache; + Skip m_skip; + F m_f; + bool m_use_cache; + + expr save_result(expr const & e, unsigned offset, expr r, bool shared) { + if (shared) + m_cache.insert(mk_pair(e.raw(), offset), r); + return r; + } + + expr apply(expr const & e, unsigned offset) { + if (m_skip(e, offset)) + return e; + bool shared = false; + if (m_use_cache && !is_likely_unshared(e)) { + if (expr const * cached = m_cache.find(mk_pair(e.raw(), offset))) + return *cached; + shared = true; + } + if (optional r = m_f(e, offset)) { + return save_result(e, offset, std::move(*r), shared); + } else { + switch (e.kind()) { + case expr_kind::Const: case expr_kind::Sort: + case expr_kind::BVar: case expr_kind::Lit: + case expr_kind::MVar: case expr_kind::FVar: + return save_result(e, offset, e, shared); + case expr_kind::MData: { + expr new_e = apply(mdata_expr(e), offset); + return save_result(e, offset, update_mdata(e, new_e), shared); + } + case expr_kind::Proj: { + expr new_e = apply(proj_expr(e), offset); + return save_result(e, offset, update_proj(e, new_e), shared); + } + case expr_kind::App: { + expr new_f = apply(app_fn(e), offset); + expr new_a = apply(app_arg(e), offset); + return save_result(e, offset, update_app(e, new_f, new_a), shared); + } + case expr_kind::Pi: case expr_kind::Lambda: { + expr new_d = apply(binding_domain(e), offset); + expr new_b = apply(binding_body(e), offset+1); + return save_result(e, offset, update_binding(e, new_d, new_b), shared); + } + case expr_kind::Let: { + expr new_t = apply(let_type(e), offset); + expr new_v = apply(let_value(e), offset); + expr new_b = apply(let_body(e), offset+1); + return save_result(e, offset, update_let(e, new_t, new_v, new_b), shared); + } + } + lean_unreachable(); + } + } +public: + template + replace_rec_fn(G && f, bool use_cache):m_f(std::forward(f)), m_use_cache(use_cache) {} + template + replace_rec_fn(H && skip, G && f, bool use_cache): + m_skip(std::forward(skip)), m_f(std::forward(f)), m_use_cache(use_cache) {} + + expr operator()(expr const & e) { return apply(e, 0); } +}; + /** \brief Apply f to the subexpressions of a given expression. @@ -18,9 +187,57 @@ namespace lean { In a call f(s, n), n is the scope level, i.e., the number of bindings operators that enclosing \c s. The replaces only visits children of \c e if f return none_expr. + + These templated overloads monomorphize on the functor type so the call into + `f` inlines at every recursion step. Prefer them over the `std::function` + overloads for hot callers. The first overload accepts functors taking + `(expr const &, unsigned)`; the second accepts functors taking just + `(expr const &)` and adapts them by ignoring the offset argument. +*/ +template, int> = 0> +inline expr replace(expr const & e, F && f, bool use_cache = true) { + return replace_rec_fn>(std::forward(f), use_cache)(e); +} + +template && + !std::is_invocable_v, int> = 0> +inline expr replace(expr const & e, F && f, bool use_cache = true) { + auto adapter = [f = std::forward(f)](expr const & e, unsigned) { return f(e); }; + return replace_rec_fn(std::move(adapter), use_cache)(e); +} + +/** + \brief As the templated `replace` above, but with a pre-cache `skip` + predicate. `skip(m, offset)` is invoked before any cache lookup; if it + returns true, `m` is returned unchanged with no cache touch. +*/ +template && + std::is_invocable_v, int> = 0> +inline expr replace(expr const & e, Skip && skip, F && f, bool use_cache = true) { + return replace_rec_fn, std::decay_t>( + std::forward(skip), std::forward(f), use_cache)(e); +} + +/** + \brief Type-erased overload of `replace` for cold callers and external use. + Internally instantiates `replace_rec_fn>`, so it pays the + `std::function` indirection on every recursive call. */ expr replace(expr const & e, std::function(expr const &, unsigned)> const & f, bool use_cache = true); inline expr replace(expr const & e, std::function(expr const &)> const & f, bool use_cache = true) { return replace(e, [&](expr const & e, unsigned) { return f(e); }, use_cache); } + +/** + \brief As `replace`, but with a pre-cache skip predicate. `skip(m, offset)` + is called before any cache lookup; if it returns true, `m` is returned + unchanged with no cache touch. Use this for cheap "this entire subtree is + unaffected" guards (e.g. a memoized loose-bvar-range check), since it both + skips the cache lookup and avoids polluting the cache with identity entries. +*/ +expr replace(expr const & e, + std::function const & skip, + std::function(expr const &, unsigned)> const & f, + bool use_cache = true); } diff --git a/src/runtime/sharecommon.cpp b/src/runtime/sharecommon.cpp index 887054e97104..a48cbb41d239 100644 --- a/src/runtime/sharecommon.cpp +++ b/src/runtime/sharecommon.cpp @@ -5,6 +5,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ #include +#include #include "runtime/sharecommon.h" #include "runtime/hash.h" @@ -443,14 +444,4 @@ lean_object * sharecommon_quick_fn::visit(lean_object * a) { extern "C" LEAN_EXPORT obj_res lean_sharecommon_quick(obj_arg a) { return sharecommon_quick_fn()(a); } - -lean_object * sharecommon_persistent_fn::operator()(lean_object * e) { - lean_object * r = check_cache(e); - if (r != nullptr) - return r; - m_saved.push_back(object_ref(e, true)); - r = visit(e); - m_saved.push_back(object_ref(r, true)); - return r; -} }; diff --git a/src/runtime/sharecommon.h b/src/runtime/sharecommon.h index 643a13b37cc7..f022e25fe240 100644 --- a/src/runtime/sharecommon.h +++ b/src/runtime/sharecommon.h @@ -5,8 +5,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Leonardo de Moura */ #pragma once -#include -#include "runtime/object_ref.h" +#include "runtime/object.h" #include "util/alloc.h" namespace lean { @@ -55,16 +54,4 @@ class LEAN_EXPORT sharecommon_quick_fn { } }; -/* -Similar to `sharecommon_quick_fn`, but we save the entry points and result values to ensure -they are not deleted. -*/ -class LEAN_EXPORT sharecommon_persistent_fn : private sharecommon_quick_fn { - std::vector m_saved; -public: - sharecommon_persistent_fn(bool s = false):sharecommon_quick_fn(s) {} - void set_check_set(bool f) { m_check_set = f; } - lean_object * operator()(lean_object * e); -}; - };