From 11d339ba5896cb4c95ee987d46f54628f2c73483 Mon Sep 17 00:00:00 2001 From: Larry Ewing Date: Mon, 16 Mar 2026 16:45:40 -0500 Subject: [PATCH 1/3] Use sorted vectors instead of unordered_maps for DWARF address maps Replace std::unordered_map with SortedMap (sorted vector + binary search) for AddrExprMap and FuncAddrMap. These maps are built once during construction and only used for read-only lookups, making them ideal for this pattern. - std::unordered_map: ~64 bytes/entry (hash buckets, linked list nodes) - SortedMap (sorted vector): ~16 bytes/entry (contiguous, cache-friendly) SortedMap tracks a finalized flag to assert lookups aren't performed before sort(). Duplicate keys are de-duplicated after sorting (keeps first) to handle cases like FuncAddrMap where start == declarations. DelimiterLocations reservation now counts actual non-zero entries rather than just the number of delimiter location arrays. Output is byte-for-byte identical, confirming functional correctness. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/wasm/wasm-debug.cpp | 121 +++++++++++++++++++++++++++------------- 1 file changed, 82 insertions(+), 39 deletions(-) diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp index abe1cd05c1f..6b785874c6c 100644 --- a/src/wasm/wasm-debug.cpp +++ b/src/wasm/wasm-debug.cpp @@ -355,14 +355,53 @@ struct LineState { } }; +// A sorted vector of (key, value) pairs with binary search lookup. +// Uses ~16 bytes per entry vs ~64 for std::unordered_map. +template +struct SortedMap { + std::vector> data; + bool finalized = false; + + void reserve(size_t n) { data.reserve(n); } + + void add(K key, V value) { + assert(!finalized && "cannot add after sort()"); + data.push_back({key, value}); + } + + // Call after all add() calls to enable lookup. + // De-duplicates adjacent entries with the same key (keeps first). + void sort() { + std::sort(data.begin(), data.end(), + [](const auto& a, const auto& b) { return a.first < b.first; }); + auto newEnd = std::unique(data.begin(), data.end(), + [](const auto& a, const auto& b) { return a.first == b.first; }); + data.erase(newEnd, data.end()); + finalized = true; + } + + const V* find(K key) const { + assert(finalized && "must call sort() before lookups"); + auto it = std::lower_bound( + data.begin(), data.end(), key, + [](const auto& pair, K k) { return pair.first < k; }); + if (it != data.end() && it->first == key) { + return &it->second; + } + return nullptr; + } + + size_t count(K key) const { return find(key) ? 1 : 0; } +}; + // Represents a mapping of addresses to expressions. We track beginnings and // endings of expressions separately, since the end of one (which is one past // the end in DWARF notation) overlaps with the beginning of the next, and also // to let us use contextual information (we may know we are looking up the end // of an instruction). struct AddrExprMap { - std::unordered_map startMap; - std::unordered_map endMap; + SortedMap startMap; + SortedMap endMap; // Some instructions have delimiter binary locations, like the else and end in // and if. Track those separately, including their expression and their id @@ -372,10 +411,25 @@ struct AddrExprMap { Expression* expr; size_t id; }; - std::unordered_map delimiterMap; + SortedMap delimiterMap; // Construct the map from the binaryLocations loaded from the wasm. AddrExprMap(const Module& wasm) { + // Count entries for reservation. + size_t exprCount = 0, delimCount = 0; + for (auto& func : wasm.functions) { + exprCount += func->expressionLocations.size(); + // Each DelimiterLocations entry can contain multiple non-zero offsets. + for (auto& [expr, delim] : func->delimiterLocations) { + for (Index i = 0; i < delim.size(); i++) { + if (delim[i] != 0) delimCount++; + } + } + } + startMap.reserve(exprCount); + endMap.reserve(exprCount); + delimiterMap.reserve(delimCount); + for (auto& func : wasm.functions) { for (auto& [expr, span] : func->expressionLocations) { add(expr, span); @@ -384,46 +438,37 @@ struct AddrExprMap { add(expr, delim); } } + startMap.sort(); + endMap.sort(); + delimiterMap.sort(); } Expression* getStart(BinaryLocation addr) const { - auto iter = startMap.find(addr); - if (iter != startMap.end()) { - return iter->second; - } - return nullptr; + auto* result = startMap.find(addr); + return result ? *result : nullptr; } Expression* getEnd(BinaryLocation addr) const { - auto iter = endMap.find(addr); - if (iter != endMap.end()) { - return iter->second; - } - return nullptr; + auto* result = endMap.find(addr); + return result ? *result : nullptr; } DelimiterInfo getDelimiter(BinaryLocation addr) const { - auto iter = delimiterMap.find(addr); - if (iter != delimiterMap.end()) { - return iter->second; - } - return DelimiterInfo{nullptr, BinaryLocations::Invalid}; + auto* result = delimiterMap.find(addr); + return result ? *result : DelimiterInfo{nullptr, BinaryLocations::Invalid}; } private: void add(Expression* expr, const BinaryLocations::Span span) { - assert(startMap.count(span.start) == 0); - startMap[span.start] = expr; - assert(endMap.count(span.end) == 0); - endMap[span.end] = expr; + startMap.add(span.start, expr); + endMap.add(span.end, expr); } void add(Expression* expr, const BinaryLocations::DelimiterLocations& delimiter) { for (Index i = 0; i < delimiter.size(); i++) { if (delimiter[i] != 0) { - assert(delimiterMap.count(delimiter[i]) == 0); - delimiterMap[delimiter[i]] = DelimiterInfo{expr, i}; + delimiterMap.add(delimiter[i], DelimiterInfo{expr, i}); } } } @@ -435,32 +480,30 @@ struct AddrExprMap { // of one past the end, and one before it which is the "end" opcode that is // emitted. struct FuncAddrMap { - std::unordered_map startMap, endMap; + SortedMap startMap, endMap; // Construct the map from the binaryLocations loaded from the wasm. FuncAddrMap(const Module& wasm) { + startMap.reserve(wasm.functions.size() * 2); + endMap.reserve(wasm.functions.size() * 2); for (auto& func : wasm.functions) { - startMap[func->funcLocation.start] = func.get(); - startMap[func->funcLocation.declarations] = func.get(); - endMap[func->funcLocation.end - 1] = func.get(); - endMap[func->funcLocation.end] = func.get(); + startMap.add(func->funcLocation.start, func.get()); + startMap.add(func->funcLocation.declarations, func.get()); + endMap.add(func->funcLocation.end - 1, func.get()); + endMap.add(func->funcLocation.end, func.get()); } + startMap.sort(); + endMap.sort(); } Function* getStart(BinaryLocation addr) const { - auto iter = startMap.find(addr); - if (iter != startMap.end()) { - return iter->second; - } - return nullptr; + auto* result = startMap.find(addr); + return result ? *result : nullptr; } Function* getEnd(BinaryLocation addr) const { - auto iter = endMap.find(addr); - if (iter != endMap.end()) { - return iter->second; - } - return nullptr; + auto* result = endMap.find(addr); + return result ? *result : nullptr; } }; From 88ef09c238ae0278c125962aedf7b4ce69658d63 Mon Sep 17 00:00:00 2001 From: Larry Ewing Date: Mon, 16 Mar 2026 20:25:48 -0500 Subject: [PATCH 2/3] Address review: remove count(), document de-dup semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove SortedMap::count() — no callers remain after removing pre-sort assertions, and it could be misused during build phase. - Document that duplicate keys (e.g. FuncAddrMap start==declarations) always map to the same value, so de-dup order is irrelevant. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/wasm/wasm-debug.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp index 6b785874c6c..2d20c3cabcf 100644 --- a/src/wasm/wasm-debug.cpp +++ b/src/wasm/wasm-debug.cpp @@ -370,7 +370,9 @@ struct SortedMap { } // Call after all add() calls to enable lookup. - // De-duplicates adjacent entries with the same key (keeps first). + // De-duplicates entries with the same key. When duplicates exist + // (e.g. FuncAddrMap where start == declarations), they map to the + // same value, so which one is kept doesn't matter. void sort() { std::sort(data.begin(), data.end(), [](const auto& a, const auto& b) { return a.first < b.first; }); @@ -390,8 +392,6 @@ struct SortedMap { } return nullptr; } - - size_t count(K key) const { return find(key) ? 1 : 0; } }; // Represents a mapping of addresses to expressions. We track beginnings and From 68b89a6e047932a398b14ac8a12f9e551789d05f Mon Sep 17 00:00:00 2001 From: Larry Ewing Date: Mon, 16 Mar 2026 20:49:21 -0500 Subject: [PATCH 3/3] Assert duplicate keys have same values in debug builds Add debug-time validation in SortedMap::sort() that duplicate keys map to the same value (assertUniqueValues=true by default). FuncAddrMap passes assertUniqueValues=false because contiguous functions legitimately share boundary addresses (func1.end == func2.start), matching the old unordered_map overwrite behavior. AddrExprMap uses the default (true) to catch debug info issues early. Also adds operator== to DelimiterInfo for the assertion. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/wasm/wasm-debug.cpp | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp index 2d20c3cabcf..8fb713994c3 100644 --- a/src/wasm/wasm-debug.cpp +++ b/src/wasm/wasm-debug.cpp @@ -370,12 +370,22 @@ struct SortedMap { } // Call after all add() calls to enable lookup. - // De-duplicates entries with the same key. When duplicates exist - // (e.g. FuncAddrMap where start == declarations), they map to the - // same value, so which one is kept doesn't matter. - void sort() { + // De-duplicates entries with the same key. When assertUniqueValues + // is true (default), asserts in debug builds that duplicate keys + // map to the same value. + void sort(bool assertUniqueValues = true) { std::sort(data.begin(), data.end(), [](const auto& a, const auto& b) { return a.first < b.first; }); +#ifndef NDEBUG + if (assertUniqueValues) { + for (size_t i = 1; i < data.size(); i++) { + if (data[i].first == data[i - 1].first) { + assert(data[i].second == data[i - 1].second && + "duplicate keys with different values in SortedMap"); + } + } + } +#endif auto newEnd = std::unique(data.begin(), data.end(), [](const auto& a, const auto& b) { return a.first == b.first; }); data.erase(newEnd, data.end()); @@ -410,6 +420,9 @@ struct AddrExprMap { struct DelimiterInfo { Expression* expr; size_t id; + bool operator==(const DelimiterInfo& o) const { + return expr == o.expr && id == o.id; + } }; SortedMap delimiterMap; @@ -492,8 +505,11 @@ struct FuncAddrMap { endMap.add(func->funcLocation.end - 1, func.get()); endMap.add(func->funcLocation.end, func.get()); } - startMap.sort(); - endMap.sort(); + // FuncAddrMap allows duplicate keys with different values because + // contiguous functions share boundary addresses (e.g. func1.end == + // func2.start). Callers disambiguate using context. + startMap.sort(/*assertUniqueValues=*/false); + endMap.sort(/*assertUniqueValues=*/false); } Function* getStart(BinaryLocation addr) const {