Skip to content

Build Release (CUDA) #86

Build Release (CUDA)

Build Release (CUDA) #86

name: Build Release (CUDA)
on: workflow_dispatch
permissions:
contents: write
jobs:
build_wheels:
name: Build Wheel CUDA ${{ matrix.cuda_config.ver }} Py ${{ matrix.pyver }}
runs-on: ubuntu-latest
container:
image: nvidia/cuda:${{ matrix.cuda_config.ver }}-devel-ubuntu24.04
permissions:
id-token: write
contents: write
attestations: write
strategy:
fail-fast: false
matrix:
cuda_config:
- ver: 13.2.0
short: cu132
arch: 75
pyver: ["3.14"]
defaults:
run:
shell: bash
steps:
- name: Install system dependencies
run: |
apt-get update
apt-get install -y software-properties-common git curl build-essential cmake libssl-dev
- name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
with:
submodules: recursive
- name: Install UV and Python
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Install Python
run: |
uv venv --python ${{ matrix.pyver }}
echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH
- name: Environment Diagnostics
run: |
echo "========== SYSTEM DIAGNOSTICS =========="
echo "GLIBC: $(ldd --version | head -n1)"
echo "Python: $(python --version)"
echo "uv: $(uv --version)"
echo "CMake: $(cmake --version | head -n1)"
echo "nvcc: $(nvcc --version 2>/dev/null | grep release || echo 'not found')"
echo "Driver: $(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null || echo 'not available')"
echo "========================================"
- name: Build Wheel
working-directory: vendor/llama-cpp-python
env:
VERBOSE: 1
CMAKE_ARGS: >-
-DGGML_CUDA=on
-DLLAVA_BUILD=off
-DCMAKE_CUDA_ARCHITECTURES=${{ matrix.cuda_config.arch }}
-DGGML_CUDA_FORCE_MMQ=OFF
-DGGML_AVX2=on
-DGGML_FMA=on
-DGGML_F16C=on
-DLLAMA_BUILD_EXAMPLES=OFF
-DLLAMA_BUILD_TESTS=OFF
-DLLAMA_BUILD_SERVER=OFF
-DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs -lcuda"
run: |
# Rename the release version with +cu{cuda_ver} build tag
sed -i 's/__version__ = "\([^"]*\)"/__version__ = "\1+${{ matrix.cuda_config.short }}"/' llama_cpp/__init__.py
# Build wheel using uv
uv -v build --no-create-gitignore --python cpython@${{ matrix.pyver }} --wheel
- name: List built wheels
run: ls -lh vendor/llama-cpp-python/dist/*.whl
- name: Upload Release Assets
uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2
with:
files: vendor/llama-cpp-python/dist/*.whl
tag_name: ${{ github.ref_name }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Attest Build Provenance
uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3
with:
subject-path: 'vendor/llama-cpp-python/dist/*.whl'