-
Notifications
You must be signed in to change notification settings - Fork 151
300 lines (269 loc) · 12.3 KB
/
run-bench.yml
File metadata and controls
300 lines (269 loc) · 12.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
name: Run Bench Main
on:
workflow_dispatch:
inputs:
benchmark_config:
description: 'Benchmark dataset regex (leave empty for all)'
required: false
default: ''
branches:
description: 'Space-separated list of branches to benchmark'
required: false
default: 'main'
custom_config:
description: 'Custom YAML configuration content (will override autoDefault.yml)'
required: false
type: string
default: ''
pull_request:
types: [opened,synchronize,ready_for_review]
branches:
- main
paths:
- '**/src/main/java/**'
- 'pom.xml'
- '**/pom.xml'
jobs:
# Job to generate the matrix configuration
generate-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Generate matrix
id: set-matrix
run: |
# Print event information for debugging
echo "Event name: ${{ github.event_name }}"
echo "Branches input: '${{ github.event.inputs.branches }}'"
# Default branches based on event type
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "Pull request detected. Using main and PR branch: ${{ github.head_ref }}"
BRANCHES='["main", "${{ github.head_ref }}"]'
elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
# Parse space-separated branches input into JSON array
echo "Workflow dispatch with branches input detected"
BRANCHES_INPUT="${{ github.event.inputs.branches }}"
BRANCHES="["
for branch in $BRANCHES_INPUT; do
if [[ "$BRANCHES" != "[" ]]; then
BRANCHES="$BRANCHES, "
fi
BRANCHES="$BRANCHES\"$branch\""
echo "Adding branch to matrix: $branch"
done
BRANCHES="$BRANCHES]"
else
echo "Default event type. Using main branch only"
BRANCHES='["main"]'
fi
echo "Generated branches matrix: $BRANCHES"
echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT
test-avx512:
needs: generate-matrix
concurrency:
group: ${{ matrix.isa }}-${{ matrix.jdk }}-${{ matrix.branch }}
cancel-in-progress: false
strategy:
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
runs-on: ${{ matrix.isa }}
steps:
- name: verify-avx512
run: |
# avx2 is included just for illustration
required="avx2 avx512f avx512cd avx512bw avx512dq avx512v"
printf "required ISA feature flags: %s\n" "${required}"
flags="$(lscpu|grep '^Flags'|cut -d: -f2)"
output=""
for flag in ${required} ; do
if [[ " $flags " == *"${flag}"* ]]
then output="${output} $flag(OK)"
else output="${output} $flag(FAIL)"
fi ; done
printf "%s\n" ${output}
if [[ " $output " == *"FAIL"* ]] ; then exit 2 ; fi
- name: Set up GCC
run: |
sudo apt install -y gcc
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.jdk }}
uses: actions/setup-java@v3
with:
java-version: ${{ matrix.jdk }}
distribution: temurin
cache: maven
- name: Get version from pom.xml
id: get-version
run: |
VERSION=$(grep -o '<version>[^<]*</version>' pom.xml | head -1 | sed 's/<version>\(.*\)<\/version>/\1/')
if [[ "$VERSION" == *'${revision}'* ]]; then
REVISION=$(grep -o '<revision>[^<]*</revision>' pom.xml | head -1 | sed 's/<revision>\(.*\)<\/revision>/\1/')
if [ -n "$REVISION" ]; then
VERSION=${VERSION//\$\{revision\}/$REVISION}
fi
fi
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Current branch has version $VERSION"
# Print debug information about the current job
- name: Print job information
run: |
echo "Running benchmark for:"
echo " - Branch: ${{ matrix.branch }}"
echo " - JDK: ${{ matrix.jdk }}"
echo " - ISA: ${{ matrix.isa }}"
# Checkout the branch specified in the matrix
- name: Checkout branch
uses: actions/checkout@v4
with:
ref: ${{ matrix.branch }}
fetch-depth: 0
# ==========================================
# Decode and write the protected dataset catalog
#
# TO UPDATE THIS SECRET:
# 1. On your local machine, run:
# base64 -i jvector-examples/yaml-configs/dataset-catalogs/protected-catalog.yaml
# 2. Go to GitHub Repo -> Settings -> Secrets and variables -> Actions
# 3. Update the PROTECTED_CATALOG_YAML secret with the new Base64 string.
# ==========================================
- name: Inject Protected Catalog
run: |
mkdir -p jvector-examples/yaml-configs/dataset-catalogs
echo "${{ secrets.PROTECTED_CATALOG_YAML }}" | base64 -d > jvector-examples/yaml-configs/dataset-catalogs/protected-catalog.yaml
# Create a directory to store benchmark results
- name: Create results directory
run: mkdir -p benchmark_results
# Build the branch
- name: Build branch
run: mvn -B -Punix-amd64-profile package --file pom.xml
# Run the benchmark if jvector-examples exists
- name: Run benchmark
id: run-benchmark
run: |
# Check if jvector-examples directory and AutoBenchYAML class exist
if [ ! -d "jvector-examples" ]; then
echo "Warning: jvector-examples directory not found in branch ${{ matrix.branch }}. Skipping benchmark."
exit 0
fi
# Check if the jar with dependencies was built
JAR_COUNT=$(ls jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar 2>/dev/null | wc -l)
if [ "$JAR_COUNT" -eq 0 ]; then
echo "Warning: No jar with dependencies found in branch ${{ matrix.branch }}. Skipping benchmark."
exit 0
fi
# Determine available memory and set heap size to half of it
TOTAL_MEM_GB=$(free -g | awk '/^Mem:/ {print $2}')
# Ensure we have a valid number, default to 16GB total (8GB heap) if detection fails
if [[ -z "$TOTAL_MEM_GB" ]] || [[ "$TOTAL_MEM_GB" -le 0 ]]; then
echo "Warning: Could not detect memory size, defaulting to 16GB total memory (8GB heap)"
TOTAL_MEM_GB=16
fi
HALF_MEM_GB=$((TOTAL_MEM_GB / 2))
# Ensure minimum heap size of 1GB
if [[ "$HALF_MEM_GB" -lt 1 ]]; then
HALF_MEM_GB=1
fi
echo "Total memory: ${TOTAL_MEM_GB}GB, using ${HALF_MEM_GB}GB for Java heap"
# Run the benchmark
echo "Running benchmark for branch ${{ matrix.branch }}"
# Determine optional benchmark config argument from workflow input
BENCH_ARG="${{ github.event.inputs.benchmark_config }}"
if [[ -z "$BENCH_ARG" ]]; then
echo "No benchmark_config provided; running with default dataset selection."
BENCH_SUFFIX=""
else
echo "Using benchmark_config: '$BENCH_ARG'"
BENCH_SUFFIX=" $BENCH_ARG"
fi
# Handle custom configuration if provided
CUSTOM_CONFIG="${{ github.event.inputs.custom_config }}"
CONFIG_ARG=""
if [[ -n "$CUSTOM_CONFIG" ]]; then
echo "Custom configuration provided, creating temporary config file..."
CUSTOM_CONFIG_FILE="custom-benchmark-config.yml"
echo "$CUSTOM_CONFIG" > "$CUSTOM_CONFIG_FILE"
CONFIG_ARG="--config $CUSTOM_CONFIG_FILE"
echo "Using custom config: $CUSTOM_CONFIG_FILE"
else
echo "No custom configuration provided, using default autoDefault.yml"
fi
# Sanitize branch name for filenames: replace any non-alphanumeric, dash or underscore with underscore
SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g')
echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} dpr-1M
else
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG}${BENCH_SUFFIX:+ }${BENCH_ARG}
fi
# Move the results to the benchmark_results directory
mv ${SAFE_BRANCH}-bench-results.csv benchmark_results/ || true
mv ${SAFE_BRANCH}-bench-results.json benchmark_results/ || true
echo "Completed benchmarks for branch: ${{ matrix.branch }}"
- name: Upload Individual Benchmark Results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ steps.run-benchmark.outputs.safe_branch }}
path: |
benchmark_results/*.csv
benchmark_results/*.json
if-no-files-found: warn
# Job to combine results and create visualizations
combine-results:
needs: test-avx512
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download all benchmark results
uses: actions/download-artifact@v4
with:
pattern: benchmark-results-*
path: all-benchmark-results
merge-multiple: true
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Python Dependencies
run: |
python -m pip install --upgrade pip
pip install matplotlib numpy psutil
- name: Generate visualization using visualize_benchmarks.py
run: |
# Discover all downloaded CSV benchmark result files
shopt -s globstar nullglob
echo "Listing downloaded artifact directory structure:"
ls -R all-benchmark-results || true
files=(all-benchmark-results/**/*.csv)
if [ ${#files[@]} -eq 0 ]; then
echo "No CSVs found under all-benchmark-results. Searching repo as fallback..."
files=(**/*.csv)
fi
echo "Found ${#files[@]} CSV files"
for f in "${files[@]}"; do echo " - $f"; done
# Check if any files were found
if [ ${#files[@]} -eq 0 ]; then
echo "No benchmark result files found. Skipping visualization generation."
echo "This can happen when benchmarks are skipped due to missing dependencies or other issues."
# Create empty output directory to satisfy artifact upload
mkdir -p benchmark_reports
echo "No benchmark results were available for visualization." > benchmark_reports/no_results.txt
exit 0
fi
# Ensure output directory matches the script's default/output expectation
OUTPUT_DIR="benchmark_reports"
# Run the visualization script with all files, default threshold (5.0)
python visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}"
- name: Upload visualization artifacts
uses: actions/upload-artifact@v4
with:
name: benchmark-comparison-results
path: |
benchmark_reports/**
retention-days: 90