Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions pkgs/metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,50 @@ These series help correlate **long `[clock]` `slot_interval` gaps** with wall ti
- **Labels**: None
- **Sample Collection Event**: On each invocation of `compactAttestations`

### Block proposal attestation build metrics (`build_block` / `getProposalAttestations`)

`lean_block_building_payload_aggregation_time_seconds` remains the cross-client wall-clock total for the whole call. The metrics below attribute time and counts inside block-proposal attestation selection.

These are **not** the same as `zeam_compact_attestations_*`: those measure only the `compactAttestations` FFI helper (wall time plus attestation row counts in/out per call). They do not cover greedy payload selection, STF simulation, builds completed, child payloads consumed, or final distinct `AttestationData` / aggregate counts. The `compact` phase here overlaps in time with `zeam_compact_attestations_time_seconds` when compaction runs, but the `lean_block_proposal_*` suite is the portable, spec-aligned surface.

#### `lean_block_proposal_attestation_build_phase_seconds` (HistogramVec)
- **Description**: Phase-level time inside proposal attestation selection.
- **Type**: Histogram
- **Unit**: Seconds
- **Buckets**: 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 4, 8
- **Labels**: `phase` — `select_payloads` (greedy child-payload pick), `compact` (recursive merge per AttestationData; zeam uses `compactAttestations`), `stf_simulate` (candidate block STF)
- **Sample Collection Event**: Once per outer-loop iteration, per phase

#### `lean_block_proposal_attestation_builds_total` (Counter)
- **Description**: Completed block-proposal attestation selection runs (one per proposal attempt).
- **Type**: Counter
- **Unit**: Count (u64)
- **Labels**: None
- **Sample Collection Event**: On successful return from `getProposalAttestationsUnlocked`

#### `lean_block_proposal_child_payloads_consumed_total` (Counter)
- **Description**: Child aggregated payloads cloned from `latest_known_aggregated_payloads` during greedy selection (before compaction).
- **Type**: Counter
- **Unit**: Count (u64)
- **Labels**: None
- **Sample Collection Event**: Summed over one proposal build

#### `lean_block_proposal_attestation_data_selected` (Histogram)
- **Description**: Distinct `AttestationData` entries selected for the proposal block body.
- **Type**: Histogram
- **Unit**: Count
- **Buckets**: 0, 1, 2, 4, 8, 16, 32
- **Labels**: None
- **Sample Collection Event**: On successful return from `getProposalAttestationsUnlocked`

#### `lean_block_proposal_aggregates_selected` (Histogram)
- **Description**: Aggregated signature proofs in the proposal result after compaction.
- **Type**: Histogram
- **Unit**: Count
- **Buckets**: 0, 1, 2, 4, 8, 16, 32, 64, 128
- **Labels**: None
- **Sample Collection Event**: On successful return from `getProposalAttestationsUnlocked`


## Usage

Expand Down
41 changes: 41 additions & 0 deletions pkgs/metrics/src/lib.zig
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,15 @@ const Metrics = struct {
zeam_compact_attestations_time_seconds: CompactAttestationsTimeHistogram,
zeam_compact_attestations_input_total: CompactAttestationsInputCounter,
zeam_compact_attestations_output_total: CompactAttestationsOutputCounter,
// Block-proposal attestation selection (`getProposalAttestations`). Phase
// attribution mirrors `zeam_pq_sig_aggregated_signatures_building_phase_seconds`
// on the interval-2 aggregator path. `lean_block_building_payload_aggregation_time_seconds`
// remains the cross-client wall-clock total for the whole call.
lean_block_proposal_attestation_build_phase_seconds: BlockProposalAttestationBuildPhaseHistogram,
lean_block_proposal_attestation_builds_total: BlockProposalAttestationBuildsTotalCounter,
lean_block_proposal_child_payloads_consumed_total: BlockProposalChildPayloadsConsumedTotalCounter,
lean_block_proposal_attestation_data_selected: BlockProposalAttestationDataSelectedHistogram,
lean_block_proposal_aggregates_selected: BlockProposalAggregatesSelectedHistogram,
// Tick interval duration: actual elapsed time between clock ticks (nominal 0.8s)
lean_tick_interval_duration_seconds: TickIntervalDurationHistogram,
/// Wall time for one `xev.Loop.run(.until_done)` in `Clock.run` (issues #863, #867).
Expand Down Expand Up @@ -489,6 +498,11 @@ const Metrics = struct {
const AggregationIntervalTickHistogram = metrics_lib.Histogram(f32, &[_]f32{ 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 5.0, 10.0 });
const CompactAttestationsInputCounter = metrics_lib.Counter(u64);
const CompactAttestationsOutputCounter = metrics_lib.Counter(u64);
const BlockProposalAttestationBuildPhaseHistogram = metrics_lib.HistogramVec(f32, struct { phase: []const u8 }, &[_]f32{ 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 4, 8 });
const BlockProposalAttestationBuildsTotalCounter = metrics_lib.Counter(u64);
const BlockProposalChildPayloadsConsumedTotalCounter = metrics_lib.Counter(u64);
const BlockProposalAttestationDataSelectedHistogram = metrics_lib.Histogram(f32, &[_]f32{ 0, 1, 2, 4, 8, 16, 32 });
const BlockProposalAggregatesSelectedHistogram = metrics_lib.Histogram(f32, &[_]f32{ 0, 1, 2, 4, 8, 16, 32, 64, 128 });
// BeamNode mutex contention histogram types. Buckets span 100us..2s to cover
// both fast acquisitions and long stalls observed when STF runs under the lock.
const NodeMutexLabel = struct { site: []const u8 };
Expand Down Expand Up @@ -682,6 +696,18 @@ fn observeAttestationProduction(ctx: ?*anyopaque, value: f32) void {
histogram.observe(value);
}

fn observeBlockProposalAttestationDataSelected(ctx: ?*anyopaque, value: f32) void {
const histogram_ptr = ctx orelse return;
const histogram: *Metrics.BlockProposalAttestationDataSelectedHistogram = @ptrCast(@alignCast(histogram_ptr));
histogram.observe(value);
}

fn observeBlockProposalAggregatesSelected(ctx: ?*anyopaque, value: f32) void {
const histogram_ptr = ctx orelse return;
const histogram: *Metrics.BlockProposalAggregatesSelectedHistogram = @ptrCast(@alignCast(histogram_ptr));
histogram.observe(value);
}

fn observeCompactAttestations(ctx: ?*anyopaque, value: f32) void {
const histogram_ptr = ctx orelse return;
const histogram: *Metrics.CompactAttestationsTimeHistogram = @ptrCast(@alignCast(histogram_ptr));
Expand Down Expand Up @@ -808,6 +834,14 @@ pub var zeam_compact_attestations_time_seconds: Histogram = .{
.context = null,
.observe = &observeCompactAttestations,
};
pub var lean_block_proposal_attestation_data_selected: Histogram = .{
.context = null,
.observe = &observeBlockProposalAttestationDataSelected,
};
pub var lean_block_proposal_aggregates_selected: Histogram = .{
.context = null,
.observe = &observeBlockProposalAggregatesSelected,
};
pub var lean_tick_interval_duration_seconds: Histogram = .{
.context = null,
.observe = &observeTickIntervalDuration,
Expand Down Expand Up @@ -925,6 +959,11 @@ pub fn init(allocator: std.mem.Allocator) !void {
.zeam_compact_attestations_time_seconds = Metrics.CompactAttestationsTimeHistogram.init("zeam_compact_attestations_time_seconds", .{ .help = "Time taken by compactAttestations to merge payloads sharing the same AttestationData" }, .{}),
.zeam_compact_attestations_input_total = Metrics.CompactAttestationsInputCounter.init("zeam_compact_attestations_input_total", .{ .help = "Total number of attestations input to compactAttestations" }, .{}),
.zeam_compact_attestations_output_total = Metrics.CompactAttestationsOutputCounter.init("zeam_compact_attestations_output_total", .{ .help = "Total number of attestations output from compactAttestations after compaction" }, .{}),
.lean_block_proposal_attestation_build_phase_seconds = try Metrics.BlockProposalAttestationBuildPhaseHistogram.init(allocator, io, "lean_block_proposal_attestation_build_phase_seconds", .{ .help = "Phase-level time in block-proposal attestation selection (build_block / getProposalAttestations): select_payloads, compact (recursive merge per AttestationData), stf_simulate." }, .{}),
.lean_block_proposal_attestation_builds_total = Metrics.BlockProposalAttestationBuildsTotalCounter.init("lean_block_proposal_attestation_builds_total", .{ .help = "Completed block-proposal attestation selection runs (one per proposal attempt)." }, .{}),
.lean_block_proposal_child_payloads_consumed_total = Metrics.BlockProposalChildPayloadsConsumedTotalCounter.init("lean_block_proposal_child_payloads_consumed_total", .{ .help = "Child aggregated payloads selected during greedy proof picking (before recursive compaction)." }, .{}),
.lean_block_proposal_attestation_data_selected = Metrics.BlockProposalAttestationDataSelectedHistogram.init("lean_block_proposal_attestation_data_selected", .{ .help = "Distinct AttestationData entries in the proposal block body." }, .{}),
.lean_block_proposal_aggregates_selected = Metrics.BlockProposalAggregatesSelectedHistogram.init("lean_block_proposal_aggregates_selected", .{ .help = "Aggregated signature proofs in the proposal result after compaction." }, .{}),
.lean_tick_interval_duration_seconds = Metrics.TickIntervalDurationHistogram.init("lean_tick_interval_duration_seconds", .{ .help = "Elapsed time between clock ticks in seconds (nominal 0.8s = 4s slot / 5 intervals)" }, .{}),
.zeam_xev_clock_until_done_drain_seconds = Metrics.XevClockUntilDoneDrainHistogram.init("zeam_xev_clock_until_done_drain_seconds", .{ .help = "Wall time in seconds for one xev run(.until_done) in the clock driver (issues #863, #867). Captures completion backlog before the next tickInterval()." }, .{}),
.zeam_xev_clock_until_done_slow_ge_500ms_total = Metrics.ZeamXevClockUntilDoneSlowGe500msCounter.init("zeam_xev_clock_until_done_slow_ge_500ms_total", .{ .help = "Clock-loop xev run(.until_done) drains with wall time >= 0.5s (#863)." }, .{}),
Expand Down Expand Up @@ -1021,6 +1060,8 @@ pub fn init(allocator: std.mem.Allocator) !void {
lean_gossip_aggregation_size_bytes.context = @ptrCast(&metrics.lean_gossip_aggregation_size_bytes);
lean_attestations_production_time_seconds.context = @ptrCast(&metrics.lean_attestations_production_time_seconds);
zeam_compact_attestations_time_seconds.context = @ptrCast(&metrics.zeam_compact_attestations_time_seconds);
lean_block_proposal_attestation_data_selected.context = @ptrCast(&metrics.lean_block_proposal_attestation_data_selected);
lean_block_proposal_aggregates_selected.context = @ptrCast(&metrics.lean_block_proposal_aggregates_selected);
lean_tick_interval_duration_seconds.context = @ptrCast(&metrics.lean_tick_interval_duration_seconds);
zeam_xev_clock_until_done_drain_seconds.context = @ptrCast(&metrics.zeam_xev_clock_until_done_drain_seconds);
zeam_fork_choice_tick_interval_duration_seconds.context = @ptrCast(&metrics.zeam_fork_choice_tick_interval_duration_seconds);
Expand Down
44 changes: 37 additions & 7 deletions pkgs/node/src/forkchoice.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,11 @@ pub const ForkChoice = struct {
var processed_att_data = std.AutoHashMap(types.AttestationData, void).init(self.allocator);
defer processed_att_data.deinit();

var child_payloads_consumed: usize = 0;

while (true) {
const select_start_ns = zeam_utils.monotonicTimestampNs();

// Find attestation_data entries whose source slot is justified on this chain,
// that reference blocks we know about, and whose target is not yet justified.
// Collect and sort by target slot for deterministic processing order.
Expand Down Expand Up @@ -1262,14 +1266,18 @@ pub const ForkChoice = struct {

try agg_attestations.append(.{ .aggregation_bits = att_bits, .data = att_data });
try attestation_signatures.append(cloned_proof);
child_payloads_consumed += 1;
}
}

observeProposalBuildPhase("select_payloads", select_start_ns);

if (!found_entries) break;

// Compact: merge proofs sharing the same AttestationData into one
// using recursive children aggregation, so each AttestationData
// appears at most once.
const compact_start_ns = zeam_utils.monotonicTimestampNs();
const compact_timer = zeam_metrics.zeam_compact_attestations_time_seconds.start();
const compacted = try types.compactAttestations(
self.allocator,
Expand All @@ -1283,6 +1291,9 @@ pub const ForkChoice = struct {
agg_attestations = compacted.attestations;
attestation_signatures = compacted.signatures;
zeam_metrics.metrics.zeam_compact_attestations_output_total.incrBy(@intCast(agg_attestations.constSlice().len));
observeProposalBuildPhase("compact", compact_start_ns);

const stf_start_ns = zeam_utils.monotonicTimestampNs();

// Build candidate block with all accumulated attestations and apply STF
// to check if justification changed.
Expand Down Expand Up @@ -1323,6 +1334,7 @@ pub const ForkChoice = struct {
candidate_state.latest_justified.slot != current_justified.slot;
const finalized_changed = candidate_state.latest_finalized.slot != current_finalized_slot;
if (justified_changed or finalized_changed) {
observeProposalBuildPhase("stf_simulate", stf_start_ns);
current_justified = candidate_state.latest_justified;
current_finalized_slot = candidate_state.latest_finalized.slot;
// Swap in the updated justified_slots (clone first so candidate_state
Expand All @@ -1334,10 +1346,17 @@ pub const ForkChoice = struct {
continue;
}

observeProposalBuildPhase("stf_simulate", stf_start_ns);

// Justification and finalization unchanged - block production done.
break;
}

zeam_metrics.metrics.lean_block_proposal_attestation_builds_total.incr();
zeam_metrics.metrics.lean_block_proposal_child_payloads_consumed_total.incrBy(@intCast(child_payloads_consumed));
zeam_metrics.lean_block_proposal_attestation_data_selected.record(@floatFromInt(processed_att_data.count()));
zeam_metrics.lean_block_proposal_aggregates_selected.record(@floatFromInt(attestation_signatures.len()));

self.logBlockProposalPayloadCoverage(slot, &agg_attestations);

agg_att_cleanup = false;
Expand Down Expand Up @@ -4053,19 +4072,30 @@ fn countSeen(seen: []const bool) usize {
}

fn observeAggregateBuildPhase(comptime phase: []const u8, start_ns: i128) void {
const end_ns = zeam_utils.monotonicTimestampNs();
const elapsed_ns: i128 = if (end_ns >= start_ns) end_ns - start_ns else 0;
// Histogram seconds are f32; precision below roughly 100 ns is intentionally
// irrelevant for these phase buckets. The existing total histogram still
// wraps only the recursive proof build inside compute, so phase sums are an
// attribution view rather than an exact replacement for that metric.
const elapsed_s: f32 = @as(f32, @floatFromInt(elapsed_ns)) / @as(f32, @floatFromInt(std.time.ns_per_s));
const elapsed_s = phaseElapsedSeconds(start_ns);
zeam_metrics.metrics.zeam_pq_sig_aggregated_signatures_building_phase_seconds.observe(
.{ .phase = phase },
elapsed_s,
) catch {};
}

fn observeProposalBuildPhase(comptime phase: []const u8, start_ns: i128) void {
const elapsed_s = phaseElapsedSeconds(start_ns);
zeam_metrics.metrics.lean_block_proposal_attestation_build_phase_seconds.observe(
.{ .phase = phase },
elapsed_s,
) catch {};
}

fn phaseElapsedSeconds(start_ns: i128) f32 {
const end_ns = zeam_utils.monotonicTimestampNs();
const elapsed_ns: i128 = if (end_ns >= start_ns) end_ns - start_ns else 0;
// Histogram seconds are f32; precision below roughly 100 ns is intentionally
// irrelevant for these phase buckets. Phase sums are an attribution view
// rather than an exact replacement for the path's total wall-clock histogram.
return @as(f32, @floatFromInt(elapsed_ns)) / @as(f32, @floatFromInt(std.time.ns_per_s));
}

fn recordAggregateCoverageMetrics(
section: []const u8,
seen: []const bool,
Expand Down
Loading