Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
55bb98a
Add `trimmedCount` to `TimerStatistics` and `BenchmarkResults` types
MajorLift Apr 8, 2026
80d89da
Export `trimOutliers` and expose IQR count in `calculateTimerStatistics`
MajorLift Apr 8, 2026
01bc5ca
Add `WARMUP_RUNS`, `MIN_SAMPLES_FOR_VERDICT`, and `POWER_USER_NUM_BRO…
MajorLift Apr 8, 2026
4008c78
Apply warm-up exclusion and IQR trimming in `runPageLoadBenchmark`, p…
MajorLift Apr 8, 2026
cd5b927
Increase default `browserLoads` to 15 for `POWER_USER_HOME` preset
MajorLift Apr 8, 2026
10dbb46
Emit `trimmedCount` as derived Sentry tag per metric
MajorLift Apr 8, 2026
56d4f41
Add unit tests for IQR outlier trimming edge cases
MajorLift Apr 20, 2026
775bb15
ci: rebalance startupPowerUserHome to 15 browserLoads × 7 pageLoads
MajorLift Apr 20, 2026
897ae1d
fix: resolve lint errors in statistics and outlier-trimming test
MajorLift Apr 21, 2026
7cd855d
fix: filter measuredWebVitalsRuns by iteration instead of slice
MajorLift Apr 22, 2026
f7a7d21
fix: address Bugbot review #4156995681 — guard empty measuredResults …
MajorLift Apr 22, 2026
df553e4
[skip-e2e] [force-builds]
MajorLift Apr 22, 2026
3909d41
Narrow `dataQuality` type to literal union
MajorLift Apr 23, 2026
e98d614
Remove unused code
MajorLift Apr 23, 2026
47db254
Tighten test assertion
MajorLift Apr 23, 2026
ece9e74
Drop \`trimOutliers\` wrapper and fix reference aliasing in \`detectO…
MajorLift Apr 23, 2026
b0197ef
Add \`outliers\` to \`BenchmarkResults\` type; document IQR-only poli…
MajorLift Apr 23, 2026
697c24f
Replace inline CV ladder with \`assessDataQuality\`, emit \`outliers\…
MajorLift Apr 23, 2026
0bc831c
Spread input on \`detectOutliersZScore\` small-array early return
MajorLift Apr 23, 2026
1a5457b
Collect \`outliers\` from \`TimerStatistics\` in \`convertTimerStatis…
MajorLift Apr 23, 2026
451bc12
Fix prettier line-length violation in \`send-to-sentry.ts\`
MajorLift Apr 23, 2026
0827815
Fix JSDoc lint errors on \`runPageLoadBenchmark\`
MajorLift Apr 23, 2026
a8c5df8
Populate \`outliers\` in \`runPageLoadBenchmark\` return for Sentry c…
MajorLift Apr 23, 2026
4c40363
Spread \`outliers\` copy in \`runPageLoadBenchmark\` to avoid shared …
MajorLift Apr 23, 2026
f1fc8fe
Move \`browserLoads <= WARMUP_RUNS\` guard before benchmark loop in \…
MajorLift Apr 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ jobs:
- name: Run the benchmark
if: ${{ env.BENCHMARK_GATED == 'true' }}
run: >-
${{ matrix.pageType == 'startupPowerUserHome' && format('yarn test:e2e:benchmark --preset startupPowerUserHome --browserLoads 10 --pageLoads 10 --out test-artifacts/benchmarks/benchmark-{0}-{1}-startupPowerUserHome.json --retries 2', matrix.browser, matrix.buildType) || format('yarn test:e2e:benchmark --preset {2} --out test-artifacts/benchmarks/benchmark-{0}-{1}-{2}.json --retries 2', matrix.browser, matrix.buildType, matrix.pageType) }}
${{ matrix.pageType == 'startupPowerUserHome' && format('yarn test:e2e:benchmark --preset startupPowerUserHome --browserLoads 15 --pageLoads 7 --out test-artifacts/benchmarks/benchmark-{0}-{1}-startupPowerUserHome.json --retries 2', matrix.browser, matrix.buildType) || format('yarn test:e2e:benchmark --preset {2} --out test-artifacts/benchmarks/benchmark-{0}-{1}-{2}.json --retries 2', matrix.browser, matrix.buildType, matrix.pageType) }}
shell: bash

- name: Send benchmark results to Sentry (main/release only)
Expand Down
3 changes: 3 additions & 0 deletions shared/constants/benchmarks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ export type TimerStatistics = {
p99: number;
samples: number;
outliers: number;
trimmedCount?: number;
dataQuality: 'good' | 'poor' | 'unreliable';
};

Expand Down Expand Up @@ -108,6 +109,8 @@ export type BenchmarkResults = {
stdDev: StatisticalResult;
p75: StatisticalResult;
p95: StatisticalResult;
trimmedCount?: StatisticalResult;
outliers?: StatisticalResult;
webVitals?: WebVitalsSummary;
};

Expand Down
10 changes: 8 additions & 2 deletions test/e2e/benchmarks/flows/startup/power-user-home.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ import {
type BenchmarkResults,
type WebVitalsMetrics,
} from '../../../../../shared/constants/benchmarks';
import { WITH_STATE_POWER_USER } from '../../utils/constants';
import {
WITH_STATE_POWER_USER,
POWER_USER_NUM_BROWSER_LOADS,
} from '../../utils/constants';
import { runPageLoadBenchmark, collectWebVitals } from '../../utils';
import type {
Metrics,
Expand Down Expand Up @@ -87,5 +90,8 @@ async function measurePagePowerUser(
export async function run(
options: PageLoadBenchmarkOptions,
): Promise<BenchmarkResults> {
return runPageLoadBenchmark(measurePagePowerUser, options);
return runPageLoadBenchmark(measurePagePowerUser, {
...options,
browserLoads: options.browserLoads ?? POWER_USER_NUM_BROWSER_LOADS,
});
}
39 changes: 38 additions & 1 deletion test/e2e/benchmarks/send-to-sentry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import type {
} from '../../../shared/constants/benchmarks';
import { getGitBranch, getGitCommitHash } from './utils/git';
import type { UserActionResult } from './utils/types';
import { aggregateWebVitals } from './utils/statistics';
import { aggregateWebVitals, assessDataQuality } from './utils/statistics';

const packageJsonPath = path.resolve(__dirname, '../../../package.json');
const { version } = JSON.parse(readFileSync(packageJsonPath, 'utf-8')) as {
Expand Down Expand Up @@ -241,12 +241,49 @@ async function main() {
}
}

// Derived reliability metrics: CV, dataQuality, tailRatio
// CV = (stdDev / mean) * 100 — coefficient of variation, directly
// comparable across steps with different magnitudes.
// dataQuality: 'good' (CV<30), 'poor' (30-50), 'unreliable' (>50).
// tailRatio = p95/p75 — tail heaviness; closer to 1.0 = tighter.
const derivedMetrics: Record<string, number | string> = {};
if (benchmark.mean && benchmark.stdDev) {
for (const [key, meanVal] of Object.entries(benchmark.mean)) {
const stdDevVal = benchmark.stdDev[key];
if (meanVal > 0 && stdDevVal !== undefined) {
const cv = (stdDevVal / meanVal) * 100;
derivedMetrics[`${type}.cv.${key}`] = cv;
derivedMetrics[`${type}.dataQuality.${key}`] =
assessDataQuality(cv);
}
}
}
if (benchmark.p95 && benchmark.p75) {
for (const [key, p95Val] of Object.entries(benchmark.p95)) {
const p75Val = benchmark.p75[key];
if (p75Val !== undefined && p75Val > 0) {
derivedMetrics[`${type}.tailRatio.${key}`] = p95Val / p75Val;
}
}
}
if (benchmark.trimmedCount) {
for (const [key, count] of Object.entries(benchmark.trimmedCount)) {
derivedMetrics[`${type}.trimmedCount.${key}`] = count;
}
}
if (benchmark.outliers) {
for (const [key, count] of Object.entries(benchmark.outliers)) {
derivedMetrics[`${type}.outliers.${key}`] = count;
}
}

// Timer data: structured logs (existing path, unchanged)
Sentry.logger.info(message, {
...baseCiAttributes,
'ci.persona': benchmark.persona || BENCHMARK_PERSONA.STANDARD,
'ci.testTitle': benchmark.testTitle,
...allMetrics,
...derivedMetrics,
});

// Web vitals: separate reporting path via spans
Expand Down
6 changes: 6 additions & 0 deletions test/e2e/benchmarks/utils/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ export const DEFAULT_NUM_BROWSER_LOADS = DEFAULT_BENCHMARK_BROWSER_LOADS;
/** Same as {@link DEFAULT_BENCHMARK_PAGE_LOADS} in `shared/constants/benchmarks`. */
export const DEFAULT_NUM_PAGE_LOADS = DEFAULT_BENCHMARK_PAGE_LOADS;

/** Browser loads for the POWER_USER_HOME preset (higher than default to offset warm-up exclusion). */
export const POWER_USER_NUM_BROWSER_LOADS = 15;

/** Number of leading browser-load sessions to discard as warm-up before computing stats. */
export const WARMUP_RUNS = 1;

export const ALL_METRICS = {
uiStartup: 'UI Startup',
load: 'navigation[0].load',
Expand Down
131 changes: 131 additions & 0 deletions test/e2e/benchmarks/utils/outlier-trimming.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import { detectOutliersIQR } from './statistics';

describe('detectOutliersIQR (IQR-based)', () => {
describe('small sample edge cases', () => {
it('returns empty array unchanged', () => {
const result = detectOutliersIQR([]);
expect(result.filtered).toEqual([]);
expect(result.outlierCount).toBe(0);
});

it('returns array of 1 unchanged', () => {
const result = detectOutliersIQR([500]);
expect(result.filtered).toEqual([500]);
expect(result.outlierCount).toBe(0);
});

it('returns array of 2 unchanged', () => {
const result = detectOutliersIQR([100, 200]);
expect(result.filtered).toEqual([100, 200]);
expect(result.outlierCount).toBe(0);
});

it('returns array of 3 unchanged (below IQR threshold)', () => {
const result = detectOutliersIQR([100, 200, 9000]);
expect(result.filtered).toEqual([100, 200, 9000]);
expect(result.outlierCount).toBe(0);
});
});

describe('no outliers', () => {
it('returns all values when distribution is tight', () => {
const samples = [100, 101, 99, 102, 98, 100, 101, 99];
const result = detectOutliersIQR(samples);
expect(result.outlierCount).toBe(0);
expect(result.filtered).toHaveLength(samples.length);
});

it('returns all values when all samples are identical', () => {
const samples = [200, 200, 200, 200, 200, 200];
const result = detectOutliersIQR(samples);
expect(result.outlierCount).toBe(0);
expect(result.filtered).toHaveLength(samples.length);
});
});

describe('outlier removal', () => {
it('removes a single high outlier', () => {
const samples = [10, 11, 12, 10, 11, 12, 11, 1000];
const result = detectOutliersIQR(samples);
expect(result.filtered).not.toContain(1000);
expect(result.outlierCount).toBe(1);
});

it('removes a single low outlier', () => {
const samples = [100, 102, 101, 103, 100, 101, 1];
const result = detectOutliersIQR(samples);
expect(result.filtered).not.toContain(1);
expect(result.outlierCount).toBe(1);
});

it('removes multiple outliers on both ends', () => {
const samples = [1, 100, 101, 102, 100, 101, 103, 9999];
const result = detectOutliersIQR(samples);
expect(result.filtered).not.toContain(1);
expect(result.filtered).not.toContain(9999);
expect(result.outlierCount).toBe(2);
});

it('preserves non-outlier values exactly', () => {
const core = [100, 105, 95, 102, 98, 101];
const samples = [...core, 5000];
const result = detectOutliersIQR(samples);
for (const v of core) {
expect(result.filtered).toContain(v);
}
});
});

describe('realistic benchmark scenario (n=15)', () => {
it('removes exactly 2 outliers from a deterministic 15-sample benchmark run', () => {
// Q1=318, Q3=328, IQR=10, upper fence=343 — 850 and 900 both exceed it
const normal = [
320, 330, 315, 325, 318, 322, 328, 316, 319, 324, 321, 323, 317,
];
const withSpikes = [...normal, 900, 850]; // two cold-start spikes
const result = detectOutliersIQR(withSpikes);
expect(result.outlierCount).toBe(2);
expect(result.filtered).toHaveLength(13);
});

it('does not over-trim a low-variance run', () => {
const stable = [
300, 302, 298, 301, 299, 303, 300, 301, 302, 298, 300, 301, 299, 302,
300,
];
const result = detectOutliersIQR(stable);
expect(result.outlierCount).toBe(0);
expect(result.filtered).toHaveLength(stable.length);
});
});

describe('input ordering', () => {
it('produces the same outlierCount regardless of input order', () => {
const ordered = [10, 11, 12, 13, 14, 15, 1000];
const shuffled = [1000, 13, 10, 15, 12, 11, 14];
const r1 = detectOutliersIQR(ordered);
const r2 = detectOutliersIQR(shuffled);
expect(r1.outlierCount).toBe(r2.outlierCount);
expect(r1.filtered.sort((a, b) => a - b)).toEqual(
r2.filtered.sort((a, b) => a - b),
);
});

it('does not mutate the input array (n >= 4, filter path)', () => {
const samples = [10, 11, 1000, 12, 13];
const copy = [...samples];
detectOutliersIQR(samples);
expect(samples).toEqual(copy);
});

it('does not mutate the input array (n < 4, early-return path)', () => {
const samples = [100, 200, 9000];
const copy = [...samples];
const result = detectOutliersIQR(samples);
expect(samples).toEqual(copy);
// returned array must not be the same reference
result.filtered.push(999);
expect(samples).toEqual(copy);
});
});
});
Loading
Loading