Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions .github/workflows/codspeed-matrix.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Generate the CodSpeed benchmark matrix.
#
# Emits to stdout a compact JSON array of {"crate","bench"} objects — one per
# `[[bench]]` target in the selected workspace crates. Each object becomes one
# CodSpeed shard (`cargo codspeed run -p <crate> --bench <bench>`). Sharding
# one job per bench target keeps every shard under CodSpeed's hard per-upload
# limit of 1000 benchmarks, on the assumption that no single bench target
# defines >1000 benchmark cases. If a target ever crosses that line, split the
# bench source rather than reworking the sharding here.
#
# Targets are discovered structurally via `cargo metadata` (no Cargo.toml text
# parsing, no hardcoded crate list), so new crates and new `[[bench]]` targets
# are picked up automatically.
#
# A bench target is dropped from the matrix when its crate's Cargo.toml marks
# it skipped:
#
# [package.metadata.codspeed.benches]
# merge_kernels = { skip = true } # broken at runtime, fix and remove
#
# cargo surfaces that table at .packages[].metadata.codspeed.benches, so the
# skip list lives next to the bench in the crate that owns it.
#
# Usage:
# codspeed-matrix.sh # every workspace crate
# codspeed-matrix.sh arrow parquet # only the named crates (must be members)

set -euo pipefail

metadata="$(cargo metadata --format-version 1 --no-deps)"

# Reject explicitly-requested crates that are not workspace members, so a typo
# in a `bench:<crate>` label fails loudly instead of silently benching nothing.
if [ "$#" -gt 0 ]; then
members="$(jq -r '.packages[].name' <<<"$metadata")"
for crate in "$@"; do
if ! grep -qxF "$crate" <<<"$members"; then
echo "::error::Unknown workspace crate '$crate'" >&2
exit 1
fi
done
fi

selected="$(printf '%s\n' "$@" | jq -Rsc 'split("\n") | map(select(length > 0))')"

jq -c \
--argjson selected "$selected" '
[ .packages[]
| .name as $crate
| (.metadata.codspeed.benches // {}) as $cfg
| select(($selected | length) == 0 or ($selected | index($crate)))
| .targets[]
| select(.kind | index("bench"))
| select(($cfg[.name].skip // false) | not)
| { crate: $crate, bench: .name }
]
' <<<"$metadata"
179 changes: 179 additions & 0 deletions .github/workflows/codspeed-pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Opt-in CodSpeed benchmarking for pull requests, gated by labels and
# sharded one job per `[[bench]]` target in each selected crate.
#
# Label convention (managed manually on each PR):
#
# bench:all # every [[bench]] in the workspace
# bench:<crate> # every [[bench]] in that crate
# bench:<crate> bench:<crate> # union
#
# Where <crate> is a workspace member name, e.g. `bench:arrow`,
# `bench:parquet`, `bench:arrow-cast`. `bench:all` short-circuits and
# supersedes any per-crate labels.
#
# Topology mirrors codspeed.yml (setup + build run in parallel; bench
# is a matrix that downloads the build artifact and runs one bench
# target per shard). The `setup` job additionally filters the matrix
# by labels.
#
# Authorization: only users with write access to the repo can add
# labels, so the label is itself the authorization gate.
#
# Baseline: native `pull_request` event → CodSpeed compares against
# the base branch's latest CodSpeed report automatically.
#
# Fork PR caveat: workflows triggered by `pull_request` from fork PRs
# do not get an OIDC token. For benches on fork PRs, push the branch
# to this repo and label it there.

name: codspeed-pr

on:
pull_request:
types: [labeled, synchronize, opened, reopened]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true

permissions:
contents: read
id-token: write
pull-requests: write

env:
CODSPEED_FEATURES: arrow/test_utils,arrow/csv,arrow/json,arrow/chrono-tz,arrow/prettyprint,arrow-schema/ffi,parquet/arrow,parquet/async,parquet/test_common,parquet/experimental,parquet/object_store

jobs:
setup:
# Run only if at least one `bench:*` label is currently attached.
# The toJSON serialization wraps each label name in double quotes,
# so searching for `"bench:` matches only at the start of a label
# name.
if: contains(toJSON(github.event.pull_request.labels.*.name), '"bench:')
name: Generate bench matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.gen.outputs.matrix }}
scope: ${{ steps.gen.outputs.scope }}
steps:
- uses: actions/checkout@v6

- name: Resolve crates from labels and emit per-bench-target matrix
id: gen
env:
LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
# Discovery + the known-broken exclusion list live in the shared
# codspeed-matrix.sh (also used by codspeed.yml). `bench:all` passes
# no crate args (every crate); otherwise each `bench:<crate>` suffix
# is forwarded as an arg and validated against the workspace members
# by the script.
run: |
suffixes="$(jq -r '.[] | select(startswith("bench:")) | sub("^bench:"; "")' <<<"$LABELS")"

if grep -qx "all" <<<"$suffixes"; then
scope="full workspace (bench:all)"
matrix="$(bash .github/workflows/codspeed-matrix.sh)"
else
scope="$(echo $suffixes | tr '\n' ' ')"
# Intentionally unquoted: each whitespace-separated suffix is a
# separate crate argument.
matrix="$(bash .github/workflows/codspeed-matrix.sh $suffixes)"
fi

echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
echo "scope=$scope" >> "$GITHUB_OUTPUT"
echo "::notice::Scope: $scope ($(jq length <<<"$matrix") bench shards, known-broken targets excluded)"

build:
# Gate on the same label condition as setup so we don't build when
# there are no benches to run.
if: contains(toJSON(github.event.pull_request.labels.*.name), '"bench:')
name: Build workspace benchmarks
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v6
with:
submodules: true

- name: Install protoc
run: sudo apt-get update && sudo apt-get install -y protobuf-compiler

- name: Setup Rust toolchain, cache and cargo-codspeed
uses: moonrepo/setup-rust@v1
with:
channel: stable
cache-target: release
bins: cargo-codspeed

- name: Build benchmarks
run: cargo codspeed build --workspace --features "$CODSPEED_FEATURES"

- name: Pack bench binaries into a tarball
# actions/upload-artifact does not preserve Unix executable
# bits, so bench binaries downloaded by shards would otherwise
# land as 644 and fail with EACCES under `cargo codspeed run`.
run: tar -cf codspeed-binaries.tar -C target codspeed

- name: Upload built bench binaries
uses: actions/upload-artifact@v4
with:
name: codspeed-binaries
path: codspeed-binaries.tar
retention-days: 1
if-no-files-found: error

bench:
needs: [setup, build]
name: ${{ matrix.config.crate }} / ${{ matrix.config.bench }}
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.matrix) }}
steps:
- uses: actions/checkout@v6
with:
submodules: true

- name: Install cargo-codspeed
uses: moonrepo/setup-rust@v1
with:
channel: stable
bins: cargo-codspeed

- name: Download built bench binaries
uses: actions/download-artifact@v4
with:
name: codspeed-binaries
path: .

- name: Unpack bench binaries (preserves executable bits)
run: |
mkdir -p target
tar -xf codspeed-binaries.tar -C target

- name: Run single bench target
uses: CodSpeedHQ/action@v4
with:
mode: simulation
run: cargo codspeed run -p ${{ matrix.config.crate }} --bench ${{ matrix.config.bench }}
Loading
Loading