From 5fd709ae0a13dc8a30c8ab81ad9584a1759b7e28 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 17:06:15 +0200 Subject: [PATCH 01/13] feat(rocm): add ROCm 7.x and gfx1151 (AMD Strix Halo / RDNA 3.5) build support ROCm 7.x overhauled the apt package structure and install paths, making gfx1151 (AMD Ryzen AI MAX+ / Radeon 8060S APUs) available for the first time. This commit adds full build-time and runtime support. ## Dockerfile Add `ARG ROCM_VERSION=6` to switch between ROCm 6.x (default, existing behaviour) and ROCm 7.x. When `ROCM_VERSION=7`: - Add AMD's new apt repo (repo.amd.com/rocm/packages/ubuntu2404) and install `amdrocm-llvm` + `amdrocm-hip-dev` (the ROCm 7.x replacements for `hipblas-dev`/`rocblas-dev` which no longer exist in ROCm 7.x). - Create /opt/rocm/{llvm,bin,hip,lib,include} compat symlinks so that cmake find_package(hip) and existing linker flags continue to work unchanged. ROCm 7.x installs to /opt/rocm/core-7.XX/ managed by update-alternatives. - Add /opt/rocm/llvm/lib to ldconfig so that libamd_comgr.so can dlopen LLVM shared libs (libLLVM.so, libclang-cpp.so) at HIP backend load time. - Fix libomp.so symlink to use an explicit path instead of a glob, which silently fails in ROCm 7.x because the path layout changed. ## backend/cpp/llama-cpp/Makefile - Add `gfx1151` to the default AMDGPU_TARGETS list. - Add `-DHIP_PLATFORM=amd` (required for cmake find_package(hip) in ROCm 7.x). - Add `-DROCM_PATH`/`-DCMAKE_PREFIX_PATH` to help cmake locate the HIP SDK. - Add `-DGGML_HIP_ROCWMMA_FATTN=ON` to enable rocWMMA-accelerated Flash Attention on RDNA 3+ (gfx1100 and newer, including gfx1151). ## backend/cpp/llama-cpp/prepare.sh + CMakeLists.txt Newer llama.cpp versions introduce chat-auto-parser.h in common/ and grpc-server.cpp includes it. The cmake include-path for the common/ library target proved unreliable across build variants, so: - prepare.sh now copies all common/ headers (and jinja/ / minja/ subdirs) into the grpc-server staging directory alongside grpc-server.cpp. - CMakeLists.txt adds an explicit target_include_directories for common/ as a belt-and-suspenders fallback. ## CI (.github/workflows) - Add `rocm-version` input to the reusable image_build.yml workflow and pass it as a Docker build-arg. - Add a `gpu-hipblas-rocm7` matrix entry in image.yml that builds from plain ubuntu:24.04 with ROCM_VERSION=7 and tags the result with `-gpu-hipblas-rocm7`. The ROCm 6.x entry is unchanged (tag-suffix `-gpu-hipblas`, default). Tested on: Geekom A9 Mega (AMD Ryzen AI MAX+ 395, Radeon 8060S / gfx1151, ROCm 7.11.0, Ubuntu 24.04, kernel 6.14.0-1018-oem). Required container env for gfx1151: HSA_OVERRIDE_GFX_VERSION=11.5.1 (tells HSA runtime to use gfx1151 code) ROCBLAS_USE_HIPBLASLT=1 (prefer hipBLASLt over rocBLAS GEMM) --- .github/workflows/image.yml | 15 ++++++++ .github/workflows/image_build.yml | 7 ++++ Dockerfile | 54 ++++++++++++++++++++++++---- backend/cpp/llama-cpp/CMakeLists.txt | 6 ++++ backend/cpp/llama-cpp/Makefile | 6 ++-- backend/cpp/llama-cpp/prepare.sh | 14 ++++++++ 6 files changed, 94 insertions(+), 8 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 8b672e8976d3..1df1c4f57fc7 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -29,6 +29,7 @@ makeflags: ${{ matrix.makeflags }} ubuntu-version: ${{ matrix.ubuntu-version }} ubuntu-codename: ${{ matrix.ubuntu-codename }} + rocm-version: ${{ matrix.rocm-version || '6' }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -47,6 +48,20 @@ makeflags: "--jobs=3 --output-sync=target" ubuntu-version: '2404' ubuntu-codename: 'noble' + rocm-version: '6' + # ROCm 7.x build for AMD Strix Halo / RDNA 3.5 (gfx1151) and other ROCm 7+ devices. + # Uses plain Ubuntu 24.04 and installs ROCm 7 from AMD's new apt repo (repo.amd.com). + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-hipblas-rocm7' + base-image: "ubuntu:24.04" + grpc-base-image: "ubuntu:24.04" + runs-on: 'ubuntu-latest' + makeflags: "--jobs=3 --output-sync=target" + ubuntu-version: '2404' + ubuntu-codename: 'noble' + rocm-version: '7' core-image-build: if: github.repository == 'mudler/LocalAI' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 9483239d2971..d45f8469c914 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -61,6 +61,11 @@ on: required: false default: 'noble' type: string + rocm-version: + description: 'ROCm major version (6 or 7). Controls which apt packages are installed for hipblas builds.' + required: false + default: '6' + type: string secrets: dockerUsername: required: true @@ -217,6 +222,7 @@ jobs: SKIP_DRIVERS=${{ inputs.skip-drivers }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} UBUNTU_CODENAME=${{ inputs.ubuntu-codename }} + ROCM_VERSION=${{ inputs.rocm-version }} context: . file: ./Dockerfile cache-from: type=gha @@ -246,6 +252,7 @@ jobs: SKIP_DRIVERS=${{ inputs.skip-drivers }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} UBUNTU_CODENAME=${{ inputs.ubuntu-codename }} + ROCM_VERSION=${{ inputs.rocm-version }} context: . file: ./Dockerfile cache-from: type=gha diff --git a/Dockerfile b/Dockerfile index 1567ef6f7ec2..f284e9770157 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,6 +23,12 @@ ARG CUDA_MINOR_VERSION=0 ARG SKIP_DRIVERS=false ARG TARGETARCH ARG TARGETVARIANT +# ROCM_VERSION: major version of ROCm to install. +# - "6" (default/blank): use packages already present in the rocm/dev-ubuntu-* base image +# (hipblas-dev, rocblas-dev — the legacy names used up to ROCm 6.x) +# - "7": install from AMD's new repo.amd.com/rocm/packages/ubuntu2404 repo and use +# the new amdrocm-* package names introduced in ROCm 7.x +ARG ROCM_VERSION=6 ENV BUILD_TYPE=${BUILD_TYPE} ARG UBUNTU_VERSION=2404 @@ -146,6 +152,32 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ ; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ + if [ "${ROCM_VERSION}" = "7" ]; then \ + # ROCm 7.x ships under a new apt repo with renamed packages. + # repo.amd.com/rocm/packages/ubuntu2404 uses amdrocm-* names; the old + # hipblas-dev / rocblas-dev packages no longer exist. + mkdir -p /etc/apt/keyrings && \ + apt-get update && \ + apt-get install -y --no-install-recommends wget gpg && \ + wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ + echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-hip-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + # ROCm 7.x installs to /opt/rocm/core-7.XX/ with update-alternatives managing + # /opt/rocm/core-7 -> /opt/rocm/core-7.XX. Create /opt/rocm/* compat symlinks + # so that cmake find_package(hip) and existing linker flags work unchanged. + ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ + ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ + ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ + ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ + ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ + echo "amd" > /run/localai/capability && \ + ldconfig ; \ + else \ + # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. + # ROCm lib packages don't trigger ldconfig - run it manually. apt-get update && \ apt-get install -y --no-install-recommends \ hipblas-dev \ @@ -153,14 +185,24 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ echo "amd" > /run/localai/capability && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi + ldconfig ; \ + fi \ +; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ - ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \ - ; fi + if [ "${ROCM_VERSION}" = "7" ]; then \ + ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ + else \ + ln -sf $(find /opt/rocm-*/lib/llvm/lib -name libomp.so 2>/dev/null | head -1) /usr/lib/libomp.so ; \ + fi \ +; fi + +# ROCm 7.x: libamd_comgr.so depends on LLVM shared libs (libLLVM.so, libclang-cpp.so) +# that live in /opt/rocm/llvm/lib which is not in the default ldconfig search path. +# Without this, HIP backends will fail to load libamd_comgr via dlopen at runtime. +RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${ROCM_VERSION}" = "7" ]; then \ + echo /opt/rocm/llvm/lib > /etc/ld.so.conf.d/rocm-llvm.conf && ldconfig ; \ +fi RUN expr "${BUILD_TYPE}" = intel && echo "intel" > /run/localai/capability || echo "not intel" diff --git a/backend/cpp/llama-cpp/CMakeLists.txt b/backend/cpp/llama-cpp/CMakeLists.txt index 598461975532..698fbc143903 100644 --- a/backend/cpp/llama-cpp/CMakeLists.txt +++ b/backend/cpp/llama-cpp/CMakeLists.txt @@ -61,6 +61,12 @@ add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h) target_include_directories(${TARGET} PRIVATE ../llava) target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}) +# grpc-server lives at llama.cpp/tools/grpc-server/; common/ is two levels up +# at the llama.cpp root. Add it so that chat-auto-parser.h and its transitive +# includes (jinja/, minja/) are found without relying on cmake's include +# propagation from the common library target, which proved unreliable. +get_filename_component(LLAMA_COMMON_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../common" ABSOLUTE) +target_include_directories(${TARGET} PRIVATE "${LLAMA_COMMON_DIR}") target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto absl::flags_parse diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 82b49de1564a..b1ef1861d1e8 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -33,8 +33,10 @@ else ifeq ($(BUILD_TYPE),hipblas) ROCM_PATH ?= /opt/rocm export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CC=$(ROCM_HOME)/llvm/bin/clang - AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 - CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) + AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201 + # -DHIP_PLATFORM=amd is required for ROCm 7.x cmake find_package(hip) to locate the HIP SDK. + # -DGGML_HIP_ROCWMMA_FATTN=ON enables rocWMMA-accelerated Flash Attention on RDNA 3+ (gfx1100+). + CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) -DHIP_PLATFORM=amd -DROCM_PATH=$(ROCM_PATH) -DCMAKE_PREFIX_PATH=$(ROCM_PATH) -DGGML_HIP_ROCWMMA_FATTN=ON else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DGGML_VULKAN=1 else ifeq ($(OS),Darwin) diff --git a/backend/cpp/llama-cpp/prepare.sh b/backend/cpp/llama-cpp/prepare.sh index f9b7e3dd2651..0a7a5375613b 100644 --- a/backend/cpp/llama-cpp/prepare.sh +++ b/backend/cpp/llama-cpp/prepare.sh @@ -21,6 +21,20 @@ cp -r grpc-server.cpp llama.cpp/tools/grpc-server/ cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/ cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/ +# Copy common/ headers into the grpc-server staging directory. +# Newer llama.cpp versions (post chat-auto-parser introduction) require +# headers from common/ (e.g. chat-auto-parser.h and its transitive deps +# jinja/, minja/) to be available alongside grpc-server.cpp. Relying solely +# on cmake include-path propagation proved fragile across build variants, so +# we stage them explicitly here. +cp -f llama.cpp/common/*.h llama.cpp/tools/grpc-server/ 2>/dev/null || true +for _subdir in jinja minja; do + if [ -d "llama.cpp/common/$_subdir" ]; then + cp -rf "llama.cpp/common/$_subdir" llama.cpp/tools/grpc-server/ + fi +done +unset _subdir + set +e if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then echo "grpc-server already added" From 022affc8a375a7d929a74265f5d321ba30e8cfac Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 17:38:54 +0200 Subject: [PATCH 02/13] fix(rocm7): correct package name, add rocWMMA headers, fix LD_LIBRARY_PATH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bugs in the initial ROCm 7.x support commit that would cause build or runtime failures: 1. Wrong package: amdrocm-hip-dev → amdrocm-core-sdk-${ROCM_ARCH} amdrocm-hip-dev is a generic HIP dev package that does NOT include the GPU-family BLAS kernel objects. In ROCm 7.x each GPU family has a dedicated metapackage: amdrocm-core-sdk-gfx1151 for Strix Halo, amdrocm-core-sdk-gfx1100 for RDNA 3, etc. Add ARG ROCM_ARCH=gfx1151 so callers can override for their hardware. 2. Missing rocWMMA headers The Makefile enables -DGGML_HIP_ROCWMMA_FATTN=ON which requires rocWMMA headers at compile time. rocwmma-dev is not available in the ROCm 7.x apt repo (repo.amd.com/rocm/packages/ubuntu2404). Clone the headers from github.com/ROCm/rocWMMA into /opt/rocwmma-headers/ and write a hardcoded rocwmma-version.hpp (the cmake configure_file step is not run here). Set ENV CPATH=/opt/rocwmma-headers so the compiler finds the headers in the builder-backends stage and during REBUILD=true at runtime. 3. LD_LIBRARY_PATH missing from final stage libamd_comgr.so.3 (needed by HIP at load time) depends on LLVM shared libs in /opt/rocm/llvm/lib. ldconfig covers the standard linker path but not dlopen() calls in backend subprocesses. Add ENV LD_LIBRARY_PATH=/opt/rocm/llvm/lib to the final stage which is inherited by all subprocess execvp chains. This path is a no-op on non-ROCm builds (directory does not exist there). --- Dockerfile | 58 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/Dockerfile b/Dockerfile index f284e9770157..4a051c57180f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,11 +24,16 @@ ARG SKIP_DRIVERS=false ARG TARGETARCH ARG TARGETVARIANT # ROCM_VERSION: major version of ROCm to install. -# - "6" (default/blank): use packages already present in the rocm/dev-ubuntu-* base image +# - "6" (default): use packages already present in the rocm/dev-ubuntu-* base image # (hipblas-dev, rocblas-dev — the legacy names used up to ROCm 6.x) # - "7": install from AMD's new repo.amd.com/rocm/packages/ubuntu2404 repo and use # the new amdrocm-* package names introduced in ROCm 7.x ARG ROCM_VERSION=6 +# ROCM_ARCH: GPU architecture target for ROCm 7.x architecture-specific packages. +# In ROCm 7.x each GPU family has a dedicated metapackage (amdrocm-core-sdk-gfxNNNN) +# that includes the BLAS kernel objects for that architecture. +# Examples: gfx1151 (Strix Halo / Radeon 8060S), gfx1100 (RX 7900 XTX), gfx942 (MI300X) +ARG ROCM_ARCH=gfx1151 ENV BUILD_TYPE=${BUILD_TYPE} ARG UBUNTU_VERSION=2404 @@ -162,19 +167,38 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ apt-get update && \ - apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-hip-dev && \ + apt-get install -y --no-install-recommends git && \ + # amdrocm-llvm: ROCm LLVM/Clang toolchain (compiler for HIP kernels) + # amdrocm-core-sdk-${ROCM_ARCH}: GPU-family metapackage with BLAS kernel objects. + # In ROCm 7.x the old hipblas-dev/rocblas-dev packages no longer exist; each GPU + # family gets its own amdrocm-core-sdk-gfxNNNN package instead. + apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-core-sdk-${ROCM_ARCH} && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ # ROCm 7.x installs to /opt/rocm/core-7.XX/ with update-alternatives managing - # /opt/rocm/core-7 -> /opt/rocm/core-7.XX. Create /opt/rocm/* compat symlinks - # so that cmake find_package(hip) and existing linker flags work unchanged. + # /opt/rocm/core-7 -> /opt/rocm/core-7.XX (e.g. core-7.12). + # Create /opt/rocm/* compat symlinks so cmake find_package(hip) and existing + # linker flags work unchanged. Using core-7 (not core-7.XX) means these + # symlinks survive minor version bumps (7.11 → 7.12 etc.) automatically. ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ echo "amd" > /run/localai/capability && \ - ldconfig ; \ + ldconfig && \ + # rocWMMA (rocwmma-dev) is not available in the ROCm 7.x apt repo. + # Install headers from source so GGML_HIP_ROCWMMA_FATTN can be compiled. + # Headers go to /opt/rocwmma-headers; CPATH is set below so the compiler + # finds them without any extra cmake path gymnastics. + git clone --depth 1 https://github.com/ROCm/rocWMMA /tmp/rocwmma && \ + mkdir -p /opt/rocwmma-headers/rocwmma && \ + cp -r /tmp/rocwmma/library/include/rocwmma/. /opt/rocwmma-headers/rocwmma/ && \ + rm -rf /tmp/rocwmma && \ + # rocwmma-version.hpp is generated by cmake configure_file in the source tree. + # Write it directly to avoid pulling in cmake at this stage. + printf '#ifndef ROCWMMA_API_VERSION_HPP\n#define ROCWMMA_API_VERSION_HPP\n#define ROCWMMA_VERSION_MAJOR 2\n#define ROCWMMA_VERSION_MINOR 2\n#define ROCWMMA_VERSION_PATCH 0\n#endif\n' \ + > /opt/rocwmma-headers/rocwmma/rocwmma-version.hpp ; \ else \ # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. # ROCm lib packages don't trigger ldconfig - run it manually. @@ -189,19 +213,12 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then fi \ ; fi -RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ - if [ "${ROCM_VERSION}" = "7" ]; then \ - ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ - else \ - ln -sf $(find /opt/rocm-*/lib/llvm/lib -name libomp.so 2>/dev/null | head -1) /usr/lib/libomp.so ; \ - fi \ -; fi +# Set CPATH so the compiler finds rocWMMA headers during backend compilation. +# This ENV is a no-op on non-hipblas builds (the path does not exist there). +ENV CPATH=/opt/rocwmma-headers:${CPATH:-} -# ROCm 7.x: libamd_comgr.so depends on LLVM shared libs (libLLVM.so, libclang-cpp.so) -# that live in /opt/rocm/llvm/lib which is not in the default ldconfig search path. -# Without this, HIP backends will fail to load libamd_comgr via dlopen at runtime. -RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${ROCM_VERSION}" = "7" ]; then \ - echo /opt/rocm/llvm/lib > /etc/ld.so.conf.d/rocm-llvm.conf && ldconfig ; \ +RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ + ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ fi RUN expr "${BUILD_TYPE}" = intel && echo "intel" > /run/localai/capability || echo "not intel" @@ -414,6 +431,13 @@ ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_VISIBLE_DEVICES=all +# ROCm 7.x: libamd_comgr.so.3 depends on LLVM shared libs (libLLVM.so, libclang-cpp.so) +# that live in /opt/rocm/llvm/lib. ldconfig alone is insufficient because backend +# subprocesses load libamd_comgr via dlopen() which respects LD_LIBRARY_PATH but not +# ldconfig when the caller bypasses the standard linker. This path is a no-op on +# non-ROCm builds (the directory simply does not exist). +ENV LD_LIBRARY_PATH=/opt/rocm/llvm/lib:${LD_LIBRARY_PATH:-} + WORKDIR / COPY ./entrypoint.sh . From bec5ca24d28b609b9ecec165b8cfee316c6586e9 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 18:13:35 +0200 Subject: [PATCH 03/13] feat(rocm): extend ROCm 7.x / gfx1151 support to all backends Apply the same ROCM_VERSION/ROCM_ARCH ARG pattern introduced for the main Dockerfile to the three backend Dockerfiles and both backend workflow files. Changes per file: backend/Dockerfile.llama-cpp, backend/Dockerfile.python, backend/Dockerfile.golang: - Add ARG ROCM_VERSION=6 and ARG ROCM_ARCH=gfx1151 - Gate existing hipblas-dev/rocblas-dev install behind ROCM_VERSION != 7 - Add ROCm 7.x branch: AMD apt repo setup, amdrocm-llvm + amdrocm-core-sdk-${ROCM_ARCH}, /opt/rocm compat symlinks - llama-cpp/python: also install rocWMMA headers for GGML_HIP_ROCWMMA_FATTN + add ENV CPATH=/opt/rocwmma-headers - python: fix libomp.so symlink (glob /opt/rocm-** broken on ROCm 6.x base images; unified path /opt/rocm/llvm/lib/libomp.so works for both ROCm 6.x and 7.x via /opt/rocm symlink) .github/workflows/backend_build.yml: - Add rocm-version (default '6') and rocm-arch (default 'gfx1151') workflow inputs - Pass ROCM_VERSION / ROCM_ARCH in build-args for both push and PR jobs .github/workflows/backend.yml: - Pass rocm-version and rocm-arch from matrix to backend_build.yml - Add 18 new ROCm 7.x matrix entries (tag suffix -gpu-rocm7-hipblas-*) for all existing hipblas backends; base-image ubuntu:24.04, skip-drivers false so the Dockerfile installs ROCm from AMD apt repo Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/backend.yml | 273 ++++++++++++++++++++++++++++ .github/workflows/backend_build.yml | 14 ++ backend/Dockerfile.golang | 33 +++- backend/Dockerfile.llama-cpp | 60 +++++- backend/Dockerfile.python | 37 +++- 5 files changed, 403 insertions(+), 14 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 0ec9bcf589f7..672beb6f2d8d 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -30,6 +30,8 @@ jobs: skip-drivers: ${{ matrix.skip-drivers }} context: ${{ matrix.context }} ubuntu-version: ${{ matrix.ubuntu-version }} + rocm-version: ${{ matrix.rocm-version || '6' }} + rocm-arch: ${{ matrix.rocm-arch || 'gfx1151' }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -1409,6 +1411,277 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + # ROCm 7.x hipblas builds (ubuntu:24.04 base + AMD apt repo; supports gfx1151/RDNA3.5+) + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-rerankers' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rerankers" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-llama-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "llama-cpp" + dockerfile: "./backend/Dockerfile.llama-cpp" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-vllm' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "vllm" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-vllm-omni' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "vllm-omni" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-transformers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "transformers" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-diffusers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "diffusers" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-ace-step' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "ace-step" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-kokoro' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "kokoro" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-vibevoice' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "vibevoice" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-qwen-asr' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "qwen-asr" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-nemo' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "nemo" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-qwen-tts' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "qwen-tts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-fish-speech' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "fish-speech" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-voxcpm' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "voxcpm" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-pocket-tts' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "pocket-tts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-faster-whisper' + runs-on: 'bigger-runner' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "faster-whisper" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-whisperx' + runs-on: 'bigger-runner' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "whisperx" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-coqui' + runs-on: 'bigger-runner' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "coqui" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + rocm-arch: 'gfx1151' # sycl builds - build-type: 'intel' cuda-major-version: "" diff --git a/.github/workflows/backend_build.yml b/.github/workflows/backend_build.yml index 0022238c61ab..5c4ec8dc5fb9 100644 --- a/.github/workflows/backend_build.yml +++ b/.github/workflows/backend_build.yml @@ -58,6 +58,16 @@ on: required: false default: '2204' type: string + rocm-version: + description: 'ROCm major version (6 = ROCm 6.x with hipblas-dev/rocblas-dev, 7 = ROCm 7.x with amdrocm-* packages)' + required: false + default: '6' + type: string + rocm-arch: + description: 'ROCm GPU architecture for ROCm 7.x arch-specific packages (e.g. gfx1151)' + required: false + default: 'gfx1151' + type: string secrets: dockerUsername: required: false @@ -214,6 +224,8 @@ jobs: BASE_IMAGE=${{ inputs.base-image }} BACKEND=${{ inputs.backend }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} + ROCM_VERSION=${{ inputs.rocm-version }} + ROCM_ARCH=${{ inputs.rocm-arch }} context: ${{ inputs.context }} file: ${{ inputs.dockerfile }} cache-from: type=gha @@ -235,6 +247,8 @@ jobs: BASE_IMAGE=${{ inputs.base-image }} BACKEND=${{ inputs.backend }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} + ROCM_VERSION=${{ inputs.rocm-version }} + ROCM_ARCH=${{ inputs.rocm-arch }} context: ${{ inputs.context }} file: ${{ inputs.dockerfile }} cache-from: type=gha diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 3bf15c508ea7..00a2f61e252c 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -14,6 +14,11 @@ ARG TARGETARCH ARG TARGETVARIANT ARG GO_VERSION=1.25.4 ARG UBUNTU_VERSION=2404 +# ROCM_VERSION: major ROCm version. '6' = ROCm 6.x (hipblas-dev/rocblas-dev from +# rocm/dev-ubuntu base image); '7' = ROCm 7.x (amdrocm-* packages from AMD apt repo). +ARG ROCM_VERSION=6 +# ROCM_ARCH: GPU-family for ROCm 7.x arch-specific metapackage (amdrocm-core-sdk-gfxNNNN). +ARG ROCM_ARCH=gfx1151 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -144,16 +149,36 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ ; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ + if [ "${ROCM_VERSION}" = "7" ]; then \ + # ROCm 7.x ships under a new apt repo with renamed packages. + # repo.amd.com/rocm/packages/ubuntu2404 uses amdrocm-* names; the old + # hipblas-dev / rocblas-dev packages no longer exist. + mkdir -p /etc/apt/keyrings && \ + apt-get update && \ + apt-get install -y --no-install-recommends wget gpg && \ + wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ + echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-core-sdk-${ROCM_ARCH} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ + ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ + ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ + ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ + ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ + ldconfig ; \ + else \ + # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. apt-get update && \ apt-get install -y --no-install-recommends \ hipblas-dev \ rocblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi + ldconfig ; \ + fi \ +; fi # Install Go RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index 3930d04d4aba..ea89328a8f6d 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -71,6 +71,11 @@ ARG TARGETARCH ARG TARGETVARIANT ARG GO_VERSION=1.25.4 ARG UBUNTU_VERSION=2404 +# ROCM_VERSION: major ROCm version. '6' = ROCm 6.x (hipblas-dev/rocblas-dev from +# rocm/dev-ubuntu base image); '7' = ROCm 7.x (amdrocm-* packages from AMD apt repo). +ARG ROCM_VERSION=6 +# ROCM_ARCH: GPU-family for ROCm 7.x arch-specific metapackage (amdrocm-core-sdk-gfxNNNN). +ARG ROCM_ARCH=gfx1151 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -201,16 +206,63 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ ; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ + if [ "${ROCM_VERSION}" = "7" ]; then \ + # ROCm 7.x ships under a new apt repo with renamed packages. + # repo.amd.com/rocm/packages/ubuntu2404 uses amdrocm-* names; the old + # hipblas-dev / rocblas-dev packages no longer exist. + mkdir -p /etc/apt/keyrings && \ + apt-get update && \ + apt-get install -y --no-install-recommends wget gpg git && \ + wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ + echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + # amdrocm-llvm: ROCm LLVM/Clang toolchain (compiler for HIP kernels) + # amdrocm-core-sdk-${ROCM_ARCH}: GPU-family metapackage with BLAS kernel objects. + # In ROCm 7.x the old hipblas-dev/rocblas-dev packages no longer exist; each GPU + # family gets its own amdrocm-core-sdk-gfxNNNN package instead. + apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-core-sdk-${ROCM_ARCH} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + # ROCm 7.x installs to /opt/rocm/core-7.XX/ with update-alternatives managing + # /opt/rocm/core-7 -> /opt/rocm/core-7.XX (e.g. core-7.12). + # Create /opt/rocm/* compat symlinks so cmake find_package(hip) and existing + # linker flags work unchanged. + ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ + ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ + ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ + ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ + ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ + ldconfig && \ + # rocWMMA (rocwmma-dev) is not available in the ROCm 7.x apt repo. + # Install headers from source so GGML_HIP_ROCWMMA_FATTN can be compiled. + git clone --depth 1 https://github.com/ROCm/rocWMMA /tmp/rocwmma && \ + mkdir -p /opt/rocwmma-headers/rocwmma && \ + cp -r /tmp/rocwmma/library/include/rocwmma/. /opt/rocwmma-headers/rocwmma/ && \ + rm -rf /tmp/rocwmma && \ + # rocwmma-version.hpp is generated by cmake configure_file in the source tree. + # Write it directly to avoid pulling in cmake at this stage. + printf '#ifndef ROCWMMA_API_VERSION_HPP\n#define ROCWMMA_API_VERSION_HPP\n#define ROCWMMA_VERSION_MAJOR 2\n#define ROCWMMA_VERSION_MINOR 2\n#define ROCWMMA_VERSION_PATCH 0\n#endif\n' \ + > /opt/rocwmma-headers/rocwmma/rocwmma-version.hpp ; \ + else \ + # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. + # ROCm lib packages don't trigger ldconfig - run it manually. apt-get update && \ apt-get install -y --no-install-recommends \ hipblas-dev \ rocblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi + ldconfig ; \ + fi \ +; fi + +# Set CPATH so the compiler finds rocWMMA headers during compilation. +# This ENV is a no-op on non-hipblas builds (the path does not exist there). +ENV CPATH=/opt/rocwmma-headers:${CPATH:-} + +RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ + ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ +fi RUN echo "TARGETARCH: $TARGETARCH" diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index 5d2e6171eb62..5f019dfbb9b7 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -13,6 +13,11 @@ ENV DEBIAN_FRONTEND=noninteractive ARG TARGETARCH ARG TARGETVARIANT ARG UBUNTU_VERSION=2404 +# ROCM_VERSION: major ROCm version. '6' = ROCm 6.x (hipblas-dev/rocblas-dev from +# rocm/dev-ubuntu base image); '7' = ROCm 7.x (amdrocm-* packages from AMD apt repo). +ARG ROCM_VERSION=6 +# ROCM_ARCH: GPU-family for ROCm 7.x arch-specific metapackage (amdrocm-core-sdk-gfxNNNN). +ARG ROCM_ARCH=gfx1151 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -158,20 +163,40 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ ; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ + if [ "${ROCM_VERSION}" = "7" ]; then \ + # ROCm 7.x ships under a new apt repo with renamed packages. + # repo.amd.com/rocm/packages/ubuntu2404 uses amdrocm-* names; the old + # hipblas-dev / rocblas-dev packages no longer exist. + mkdir -p /etc/apt/keyrings && \ + apt-get update && \ + apt-get install -y --no-install-recommends wget gpg git && \ + wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ + echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-core-sdk-${ROCM_ARCH} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ + ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ + ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ + ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ + ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ + ldconfig ; \ + else \ + # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. apt-get update && \ apt-get install -y --no-install-recommends \ hipblas-dev \ rocblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi + ldconfig ; \ + fi \ +; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ - ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \ - ; fi + ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ +fi # Install uv as a system package RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh From d6e164ee84f5d95b20c14e47665ae900fb37ede3 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 18:20:00 +0200 Subject: [PATCH 04/13] scripts: add all-backend support to sync-upstream.sh and build-rocm.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sync-upstream.sh: - Add BACKENDS array (18 entries: 17 Python + llama-cpp) - Step 5: build all backend images after main image (backend/Dockerfile.python with BACKEND=, backend/Dockerfile.llama-cpp) - Step 7: push backend images as latest-gfx1151- + versioned tag - Failed backends are collected and reported; push continues for successful ones - New flag --no-backends to skip backend builds (e.g. quick main-only rebuild) - Pass ROCM_VERSION=7 and ROCM_ARCH to all docker build calls scripts/build-rocm.sh: - Full rewrite: same BACKENDS array, same build+push logic - No git sync — pure rebuild; use sync-upstream.sh for merge+build+push - Flags: --no-push, --no-backends - Replaces stub that called non-existent make build-rocm-7xx target Co-Authored-By: Claude Sonnet 4.6 --- scripts/build-rocm.sh | 175 +++++++++++++++++++++++++++ sync-upstream.sh | 269 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 444 insertions(+) create mode 100755 scripts/build-rocm.sh create mode 100755 sync-upstream.sh diff --git a/scripts/build-rocm.sh b/scripts/build-rocm.sh new file mode 100755 index 000000000000..1eae4eb0c790 --- /dev/null +++ b/scripts/build-rocm.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# ============================================================================= +# LocalAI ROCm 7.x Rebuild Script — gfx1151 Fork +# ============================================================================= +# Builds (and optionally pushes) the main LocalAI image plus all ROCm 7.x +# backend images. No git sync — use sync-upstream.sh for merge + build + push. +# +# Usage: +# bash scripts/build-rocm.sh # build all + push +# bash scripts/build-rocm.sh --no-push # build all, no registry push +# bash scripts/build-rocm.sh --no-backends # main image only +# ROCM_VERSION=7.13 bash scripts/build-rocm.sh +# ROCM_ARCH=gfx1150 bash scripts/build-rocm.sh +# ============================================================================= +set -euo pipefail + +REGISTRY="${REGISTRY:-192.168.178.127:5000}" +ROCM_VERSION="${ROCM_VERSION:-7.12}" +ROCM_ARCH="${ROCM_ARCH:-gfx1151}" +NO_PUSH=false +NO_BACKENDS=false + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BOLD='\033[1m'; NC='\033[0m' + +for arg in "$@"; do + case $arg in + --no-push) NO_PUSH=true ;; + --no-backends) NO_BACKENDS=true ;; + esac +done + +# --------------------------------------------------------------------------- +# Backend list — keep in sync with sync-upstream.sh +# Format: "BACKEND_NAME|DOCKERFILE_TYPE" +# --------------------------------------------------------------------------- +BACKENDS=( + "rerankers|python" + "llama-cpp|llama-cpp" + "vllm|python" + "vllm-omni|python" + "transformers|python" + "diffusers|python" + "ace-step|python" + "kokoro|python" + "vibevoice|python" + "qwen-asr|python" + "nemo|python" + "qwen-tts|python" + "fish-speech|python" + "voxcpm|python" + "pocket-tts|python" + "faster-whisper|python" + "whisperx|python" + "coqui|python" +) + +OUR_SUFFIX="gfx1151-rocm${ROCM_VERSION}" +BUILD_SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "local") +UPSTREAM_VERSION=$(git describe --tags --abbrev=0 upstream/master 2>/dev/null \ + || git describe --tags --abbrev=0 2>/dev/null \ + || echo "dev") +IMAGE_TAG="${UPSTREAM_VERSION}-${OUR_SUFFIX}-${BUILD_SHA}" +LOCAL_IMAGE="localai:${OUR_SUFFIX}" + +echo -e "${BOLD}LocalAI ROCm Rebuild${NC}" +echo -e " ROCm: ${YELLOW}$ROCM_VERSION / $ROCM_ARCH${NC}" +echo -e " Tag: ${YELLOW}$IMAGE_TAG${NC}" +echo -e " Push: $( [ "$NO_PUSH" = "true" ] && echo "${YELLOW}disabled${NC}" || echo "${GREEN}enabled → $REGISTRY${NC}" )" +echo -e " Backends: $( [ "$NO_BACKENDS" = "true" ] && echo "${YELLOW}skipped${NC}" || echo "${GREEN}${#BACKENDS[@]} images${NC}" )" + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== Build main image ===${NC}" + +docker build \ + --build-arg BUILD_TYPE=hipblas \ + --build-arg ROCM_VERSION=7 \ + --build-arg ROCM_ARCH="${ROCM_ARCH}" \ + --build-arg GPU_TARGETS="${ROCM_ARCH}" \ + -t "$LOCAL_IMAGE" \ + . 2>&1 | tee /tmp/localai-build-main.log + +echo -e " ${GREEN}✓ Main image built: $LOCAL_IMAGE${NC}" + +# --------------------------------------------------------------------------- +if [ "$NO_BACKENDS" = "false" ]; then + echo -e "\n${BOLD}=== Build backend images (${#BACKENDS[@]} total) ===${NC}" + + FAILED_BACKENDS=() + for entry in "${BACKENDS[@]}"; do + backend="${entry%%|*}" + dftype="${entry##*|}" + + case "$dftype" in + llama-cpp) dockerfile="backend/Dockerfile.llama-cpp"; backend_arg="" ;; + *) dockerfile="backend/Dockerfile.python"; backend_arg="--build-arg BACKEND=${backend}" ;; + esac + + local_tag="localai-backends:${OUR_SUFFIX}-${backend}" + echo -e "\n [${backend}] Building..." + + # shellcheck disable=SC2086 + if docker build \ + --build-arg BUILD_TYPE=hipblas \ + --build-arg ROCM_VERSION=7 \ + --build-arg ROCM_ARCH="${ROCM_ARCH}" \ + $backend_arg \ + -f "$dockerfile" \ + -t "$local_tag" \ + . 2>&1 | tee "/tmp/localai-build-${backend}.log"; then + echo -e " ${GREEN}✓ ${backend} OK${NC}" + else + echo -e " ${RED}✗ ${backend} FAILED (log: /tmp/localai-build-${backend}.log)${NC}" + FAILED_BACKENDS+=("$backend") + fi + done + + if [ "${#FAILED_BACKENDS[@]}" -gt 0 ]; then + echo -e "\n${RED}${BOLD}Failed backends:${NC} ${FAILED_BACKENDS[*]}" + echo -e "${YELLOW}Continuing push for successfully built images.${NC}" + else + echo -e "\n ${GREEN}✓ All backends built${NC}" + fi +fi + +# --------------------------------------------------------------------------- +if [ "$NO_PUSH" = "true" ]; then + echo -e "\n${YELLOW}--no-push set — done (no registry push).${NC}" + exit 0 +fi + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== Push main image ===${NC}" + +REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" +REGISTRY_LATEST="${REGISTRY}/localai:latest-gfx1151" + +docker tag "$LOCAL_IMAGE" "$REGISTRY_IMAGE" +docker tag "$LOCAL_IMAGE" "$REGISTRY_LATEST" + +docker push "$REGISTRY_IMAGE" && echo -e " ${GREEN}✓ $REGISTRY_IMAGE${NC}" +docker push "$REGISTRY_LATEST" && echo -e " ${GREEN}✓ $REGISTRY_LATEST${NC}" + +# --------------------------------------------------------------------------- +if [ "$NO_BACKENDS" = "false" ]; then + echo -e "\n${BOLD}=== Push backend images ===${NC}" + + for entry in "${BACKENDS[@]}"; do + backend="${entry%%|*}" + local_tag="localai-backends:${OUR_SUFFIX}-${backend}" + + if ! docker image inspect "$local_tag" &>/dev/null; then + echo -e " ${YELLOW}⚠ Skipping $backend (not built)${NC}" + continue + fi + + reg_versioned="${REGISTRY}/localai-backends:${IMAGE_TAG}-${backend}" + reg_latest="${REGISTRY}/localai-backends:latest-gfx1151-${backend}" + + docker tag "$local_tag" "$reg_versioned" + docker tag "$local_tag" "$reg_latest" + docker push "$reg_versioned" && echo -e " ${GREEN}✓ $reg_versioned${NC}" + docker push "$reg_latest" && echo -e " ${GREEN}✓ $reg_latest${NC}" + done +fi + +# --------------------------------------------------------------------------- +echo -e "\n${GREEN}${BOLD}=== Done ===${NC}" +echo -e " Main image: ${GREEN}$REGISTRY_IMAGE${NC}" +echo -e " Latest: ${GREEN}$REGISTRY_LATEST${NC}" +if [ "$NO_BACKENDS" = "false" ]; then + echo -e " Backends: ${GREEN}${#BACKENDS[@]} images tagged as latest-gfx1151-${NC}" +fi +echo "" +echo -e " Deploy:" +echo -e " ${YELLOW}docker compose -f docker-compose-gfx1151.yaml up -d localai --force-recreate${NC}" diff --git a/sync-upstream.sh b/sync-upstream.sh new file mode 100755 index 000000000000..762530de9b90 --- /dev/null +++ b/sync-upstream.sh @@ -0,0 +1,269 @@ +#!/bin/bash +# ============================================================================= +# LocalAI Upstream Sync — gfx1151 / ROCm 7.x Fork +# ============================================================================= +# Usage: +# bash sync-upstream.sh # fetch + merge + build all + push +# bash sync-upstream.sh --dry-run # fetch + merge only (no build) +# bash sync-upstream.sh --no-push # build but no registry push +# bash sync-upstream.sh --no-backends # main image only, skip backend images +# ROCM_VERSION=7.13 bash sync-upstream.sh +# +# Image tags (main image): +# /localai:-gfx1151-rocm- (build-specific) +# /localai:-gfx1151-rocm (rolling per version) +# /localai:latest-gfx1151 (rolling latest) +# +# Image tags (each backend): +# /localai-backends:-gfx1151-rocm-- +# /localai-backends:latest-gfx1151- +# +# ROCm version changes only when a new ROCm release ships — override via env var. +# On conflict: script stops, resolve manually, git commit, then re-run. +# ============================================================================= +set -euo pipefail + +REGISTRY="${REGISTRY:-192.168.178.127:5000}" +ROCM_VERSION="${ROCM_VERSION:-7.12}" +ROCM_ARCH="${ROCM_ARCH:-gfx1151}" +UPSTREAM_REMOTE="${UPSTREAM_REMOTE:-upstream}" +UPSTREAM_BRANCH="${UPSTREAM_BRANCH:-master}" +DRY_RUN=false +NO_PUSH=false +NO_BACKENDS=false + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BOLD='\033[1m'; NC='\033[0m' + +for arg in "$@"; do + case $arg in + --dry-run) DRY_RUN=true ;; + --no-push) NO_PUSH=true ;; + --no-backends) NO_BACKENDS=true ;; + esac +done + +# --------------------------------------------------------------------------- +# Backend list — all ROCm 7.x backend images to build. +# Format: "BACKEND_NAME|DOCKERFILE_TYPE" +# python → backend/Dockerfile.python (passes --build-arg BACKEND=) +# llama-cpp → backend/Dockerfile.llama-cpp +# --------------------------------------------------------------------------- +BACKENDS=( + "rerankers|python" + "llama-cpp|llama-cpp" + "vllm|python" + "vllm-omni|python" + "transformers|python" + "diffusers|python" + "ace-step|python" + "kokoro|python" + "vibevoice|python" + "qwen-asr|python" + "nemo|python" + "qwen-tts|python" + "fish-speech|python" + "voxcpm|python" + "pocket-tts|python" + "faster-whisper|python" + "whisperx|python" + "coqui|python" +) + +OUR_SUFFIX="gfx1151-rocm${ROCM_VERSION}" + +# --------------------------------------------------------------------------- +echo -e "${BOLD}=== 1. Upstream fetch ===${NC}" +git fetch "$UPSTREAM_REMOTE" + +UPSTREAM_VERSION=$(git describe --tags --abbrev=0 "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH" 2>/dev/null || echo "dev") +echo -e " Upstream: ${YELLOW}$UPSTREAM_REMOTE/$UPSTREAM_BRANCH${NC} @ ${GREEN}$UPSTREAM_VERSION${NC}" +echo -e " ROCm: ${YELLOW}$ROCM_VERSION${NC} / arch ${YELLOW}$ROCM_ARCH${NC}" + +BEHIND=$(git rev-list HEAD.."$UPSTREAM_REMOTE/$UPSTREAM_BRANCH" --count) +if [ "$BEHIND" = "0" ]; then + echo -e " ${GREEN}✓ Already up to date — no merge needed${NC}" + if [ "$DRY_RUN" = "true" ]; then exit 0; fi +else + echo -e " ${YELLOW}⚠ $BEHIND new upstream commits${NC}" + + # ------------------------------------------------------------------------- + echo -e "\n${BOLD}=== 2. Merge upstream/$UPSTREAM_BRANCH ===${NC}" + if ! git merge "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH" --no-edit \ + -m "chore: merge upstream $UPSTREAM_VERSION into gfx1151 fork"; then + echo -e "\n${RED}✗ Merge conflicts! Manual resolution required:${NC}" + echo "" + git diff --name-only --diff-filter=U + echo "" + echo -e " Steps:" + echo -e " 1. Fix conflicts in the files listed above" + echo -e " 2. ${YELLOW}git add ${NC} for each resolved file" + echo -e " 3. ${YELLOW}git commit${NC} to complete the merge" + echo -e " 4. Re-run this script" + echo "" + echo -e " Or abort: ${YELLOW}git merge --abort${NC}" + exit 1 + fi + echo -e " ${GREEN}✓ Merge successful${NC}" +fi + +if [ "$DRY_RUN" = "true" ]; then + echo -e "\n${YELLOW}Dry-run — no build.${NC}" + exit 0 +fi + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== 3. Pre-build checks ===${NC}" +FAIL=0 + +if grep -q "^ENV GGML_CUDA_ENABLE_UNIFIED_MEMORY" Dockerfile 2>/dev/null; then + echo -e " ${RED}✗ GGML_CUDA_ENABLE_UNIFIED_MEMORY in Dockerfile! Remove it.${NC}" + FAIL=1 +else + echo -e " ${GREEN}✓ GGML_CUDA_ENABLE_UNIFIED_MEMORY not in Dockerfile${NC}" +fi + +if grep -qE "core-7\.[0-9]" backend/Dockerfile.llama-cpp backend/Dockerfile.python 2>/dev/null; then + echo -e " ${RED}✗ Hardcoded core-7.XX in backend Dockerfiles — use core-7 (update-alternatives).${NC}" + grep -n "core-7\.[0-9]" backend/Dockerfile.llama-cpp backend/Dockerfile.python 2>/dev/null || true + FAIL=1 +else + echo -e " ${GREEN}✓ No hardcoded core-7.XX in backend Dockerfiles${NC}" +fi + +[ "$FAIL" = "1" ] && { echo -e "\n${RED}Checks failed. Build aborted.${NC}"; exit 1; } + +# --------------------------------------------------------------------------- +BUILD_SHA=$(git rev-parse --short HEAD) +VERSION_TAG="${UPSTREAM_VERSION}-${OUR_SUFFIX}" +IMAGE_TAG="${VERSION_TAG}-${BUILD_SHA}" +LOCAL_IMAGE="localai:${OUR_SUFFIX}" + +echo -e "\n${BOLD}=== 4. Build main image ===${NC}" +echo -e " Build tag: ${YELLOW}$IMAGE_TAG${NC}" +echo -e " Version tag: ${YELLOW}$VERSION_TAG${NC}" +echo -e " Latest tag: ${YELLOW}latest-gfx1151${NC}" + +docker build \ + --build-arg BUILD_TYPE=hipblas \ + --build-arg ROCM_VERSION=7 \ + --build-arg ROCM_ARCH="${ROCM_ARCH}" \ + --build-arg GPU_TARGETS="${ROCM_ARCH}" \ + -t "$LOCAL_IMAGE" \ + . 2>&1 | tee /tmp/localai-build-main.log + +echo -e " ${GREEN}✓ Main image built${NC}" + +# --------------------------------------------------------------------------- +if [ "$NO_BACKENDS" = "false" ]; then + echo -e "\n${BOLD}=== 5. Build backend images (${#BACKENDS[@]} total) ===${NC}" + + FAILED_BACKENDS=() + for entry in "${BACKENDS[@]}"; do + backend="${entry%%|*}" + dftype="${entry##*|}" + + case "$dftype" in + llama-cpp) dockerfile="backend/Dockerfile.llama-cpp"; backend_arg="" ;; + *) dockerfile="backend/Dockerfile.python"; backend_arg="--build-arg BACKEND=${backend}" ;; + esac + + local_tag="localai-backends:${OUR_SUFFIX}-${backend}" + echo -e "\n [${backend}] Building..." + + # shellcheck disable=SC2086 + if docker build \ + --build-arg BUILD_TYPE=hipblas \ + --build-arg ROCM_VERSION=7 \ + --build-arg ROCM_ARCH="${ROCM_ARCH}" \ + $backend_arg \ + -f "$dockerfile" \ + -t "$local_tag" \ + . 2>&1 | tee "/tmp/localai-build-${backend}.log"; then + echo -e " ${GREEN}✓ ${backend} OK${NC}" + else + echo -e " ${RED}✗ ${backend} FAILED (log: /tmp/localai-build-${backend}.log)${NC}" + FAILED_BACKENDS+=("$backend") + fi + done + + if [ "${#FAILED_BACKENDS[@]}" -gt 0 ]; then + echo -e "\n${RED}${BOLD}Failed backends:${NC} ${FAILED_BACKENDS[*]}" + echo -e "${YELLOW}Continuing push for successfully built images.${NC}" + else + echo -e "\n ${GREEN}✓ All backends built successfully${NC}" + fi +fi + +# --------------------------------------------------------------------------- +if [ "$NO_PUSH" = "true" ]; then + echo -e "\n${YELLOW}--no-push set — skipping registry push.${NC}" + echo -e " Local main image: ${GREEN}$LOCAL_IMAGE${NC}" + exit 0 +fi + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== 6. Push main image ===${NC}" + +REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" +REGISTRY_VERSION="${REGISTRY}/localai:${VERSION_TAG}" +REGISTRY_LATEST="${REGISTRY}/localai:latest-gfx1151" + +docker tag "$LOCAL_IMAGE" "$REGISTRY_IMAGE" +docker tag "$LOCAL_IMAGE" "$REGISTRY_VERSION" +docker tag "$LOCAL_IMAGE" "$REGISTRY_LATEST" + +docker push "$REGISTRY_IMAGE" && echo -e " ${GREEN}✓ $REGISTRY_IMAGE${NC}" +docker push "$REGISTRY_VERSION" && echo -e " ${GREEN}✓ $REGISTRY_VERSION${NC}" +docker push "$REGISTRY_LATEST" && echo -e " ${GREEN}✓ $REGISTRY_LATEST${NC}" + +# --------------------------------------------------------------------------- +if [ "$NO_BACKENDS" = "false" ]; then + echo -e "\n${BOLD}=== 7. Push backend images ===${NC}" + + for entry in "${BACKENDS[@]}"; do + backend="${entry%%|*}" + local_tag="localai-backends:${OUR_SUFFIX}-${backend}" + + # Skip backends that failed to build + if ! docker image inspect "$local_tag" &>/dev/null; then + echo -e " ${YELLOW}⚠ Skipping $backend (not built)${NC}" + continue + fi + + reg_versioned="${REGISTRY}/localai-backends:${IMAGE_TAG}-${backend}" + reg_latest="${REGISTRY}/localai-backends:latest-gfx1151-${backend}" + + docker tag "$local_tag" "$reg_versioned" + docker tag "$local_tag" "$reg_latest" + docker push "$reg_versioned" && echo -e " ${GREEN}✓ $reg_versioned${NC}" + docker push "$reg_latest" && echo -e " ${GREEN}✓ $reg_latest${NC}" + done +fi + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== 8. Git tag ===${NC}" +GIT_TAG="${IMAGE_TAG}" +if git tag -l | grep -q "^${GIT_TAG}$"; then + echo -e " ${YELLOW}Tag $GIT_TAG already exists — skipped${NC}" +else + git tag "$GIT_TAG" + git push origin "$GIT_TAG" 2>/dev/null || echo -e " ${YELLOW}(Tag push failed — set locally)${NC}" + echo -e " ${GREEN}✓ Tag: $GIT_TAG${NC}" +fi + +# --------------------------------------------------------------------------- +echo -e "\n${GREEN}${BOLD}=== Done ===${NC}" +echo -e " Upstream: ${GREEN}$UPSTREAM_VERSION${NC}" +echo -e " ROCm: ${GREEN}$ROCM_VERSION / $ROCM_ARCH${NC}" +echo -e " Main image: ${GREEN}$REGISTRY_IMAGE${NC}" +echo -e " Latest: ${GREEN}$REGISTRY_LATEST${NC}" +if [ "$NO_BACKENDS" = "false" ]; then + echo -e " Backends: ${GREEN}${#BACKENDS[@]} images tagged as latest-gfx1151-${NC}" +fi +echo "" +echo -e " Deploy:" +echo -e " ${YELLOW}docker compose -f docker-compose-gfx1151.yaml up -d localai --force-recreate${NC}" +echo "" +echo -e " Next run:" +echo -e " ${YELLOW}bash sync-upstream.sh${NC}" +echo -e " ${YELLOW}ROCM_VERSION=7.13 bash sync-upstream.sh${NC} (when new ROCm version ships)" From 85511487f28ad3d4bd657d96334cd39c98d178ac Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 18:38:56 +0200 Subject: [PATCH 05/13] feat(rocm): support all ROCm 7.12 GPU architectures (not just gfx1151) Addresses reviewer feedback: the ROCm 7.x build should target all GPU architectures listed in https://rocm.docs.amd.com/en/7.12.0-preview/, not just gfx1151 (Strix Halo). backend/cpp/llama-cpp/Makefile: - Expand AMDGPU_TARGETS default to the full ROCm 7.12 supported list: gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1012 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 - Remove gfx1010 (dropped from ROCm support in 6.x, absent in 7.x) - Add new arches: gfx950 (MI350), gfx1012 (Navi14), gfx1031 (Navi22), gfx1103 (Phoenix APU), gfx1150 (Strix Point), gfx1152 (Krackan) Dockerfile, backend/Dockerfile.{llama-cpp,python,golang}: - Change ARG ROCM_ARCH default from single 'gfx1151' to the full comma-separated arch list (matches the AMDGPU_TARGETS expansion above) - Replace single 'apt-get install amdrocm-core-sdk-${ROCM_ARCH}' with a shell loop that installs amdrocm-core-sdk-gfxNNNN for each arch in the list; packages missing from the ROCm 7.x apt repo (e.g. old GCN arches) are skipped with a warning rather than failing the build - Single-arch builds remain possible: pass ROCM_ARCH=gfx1151 to get a smaller image optimised for one GPU family .github/workflows/backend_build.yml: - Change rocm-arch input default to the full arch list .github/workflows/backend.yml: - Update rocm-arch fallback in the with: block to the full arch list - Remove explicit rocm-arch: 'gfx1151' from all 18 ROCm 7.x matrix entries (they now use the workflow default = all arches) Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/backend.yml | 20 +------------------- .github/workflows/backend_build.yml | 4 ++-- Dockerfile | 21 +++++++++++++++------ backend/Dockerfile.golang | 12 +++++++++--- backend/Dockerfile.llama-cpp | 12 +++++++++--- backend/Dockerfile.python | 12 +++++++++--- backend/cpp/llama-cpp/Makefile | 4 +++- 7 files changed, 48 insertions(+), 37 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 672beb6f2d8d..0806535ae2e8 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -31,7 +31,7 @@ jobs: context: ${{ matrix.context }} ubuntu-version: ${{ matrix.ubuntu-version }} rocm-version: ${{ matrix.rocm-version || '6' }} - rocm-arch: ${{ matrix.rocm-arch || 'gfx1151' }} + rocm-arch: ${{ matrix.rocm-arch || 'gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201' }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -1426,7 +1426,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1441,7 +1440,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1456,7 +1454,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1471,7 +1468,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1486,7 +1482,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1501,7 +1496,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1516,7 +1510,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1531,7 +1524,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1546,7 +1538,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1561,7 +1552,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1576,7 +1566,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1591,7 +1580,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1606,7 +1594,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1621,7 +1608,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1636,7 +1622,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1651,7 +1636,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1666,7 +1650,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1681,7 +1664,6 @@ jobs: context: "./" ubuntu-version: '2404' rocm-version: '7' - rocm-arch: 'gfx1151' # sycl builds - build-type: 'intel' cuda-major-version: "" diff --git a/.github/workflows/backend_build.yml b/.github/workflows/backend_build.yml index 5c4ec8dc5fb9..2f6f0b19e32a 100644 --- a/.github/workflows/backend_build.yml +++ b/.github/workflows/backend_build.yml @@ -64,9 +64,9 @@ on: default: '6' type: string rocm-arch: - description: 'ROCm GPU architecture for ROCm 7.x arch-specific packages (e.g. gfx1151)' + description: 'Comma-separated GPU architectures for ROCm 7.x. Default covers all GPUs supported by ROCm 7.12. Override to a single arch (e.g. gfx1151) for smaller images.' required: false - default: 'gfx1151' + default: 'gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201' type: string secrets: dockerUsername: diff --git a/Dockerfile b/Dockerfile index 4a051c57180f..730c6301f21b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,11 +29,12 @@ ARG TARGETVARIANT # - "7": install from AMD's new repo.amd.com/rocm/packages/ubuntu2404 repo and use # the new amdrocm-* package names introduced in ROCm 7.x ARG ROCM_VERSION=6 -# ROCM_ARCH: GPU architecture target for ROCm 7.x architecture-specific packages. -# In ROCm 7.x each GPU family has a dedicated metapackage (amdrocm-core-sdk-gfxNNNN) -# that includes the BLAS kernel objects for that architecture. -# Examples: gfx1151 (Strix Halo / Radeon 8060S), gfx1100 (RX 7900 XTX), gfx942 (MI300X) -ARG ROCM_ARCH=gfx1151 +# ROCM_ARCH: comma-separated GPU architecture targets for ROCm 7.x. +# Controls both the apt packages installed (amdrocm-core-sdk-gfxNNNN per arch) +# and the AMDGPU_TARGETS passed to cmake/hipcc. +# Default: all GPU architectures supported by ROCm 7.12. +# Override to a single arch for smaller images, e.g. ROCM_ARCH=gfx1151. +ARG ROCM_ARCH=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 ENV BUILD_TYPE=${BUILD_TYPE} ARG UBUNTU_VERSION=2404 @@ -172,7 +173,15 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then # amdrocm-core-sdk-${ROCM_ARCH}: GPU-family metapackage with BLAS kernel objects. # In ROCm 7.x the old hipblas-dev/rocblas-dev packages no longer exist; each GPU # family gets its own amdrocm-core-sdk-gfxNNNN package instead. - apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-core-sdk-${ROCM_ARCH} && \ + apt-get install -y --no-install-recommends amdrocm-llvm && \ + # Install arch-specific SDK packages for each GPU target in ROCM_ARCH. + # amdrocm-core-sdk-gfxNNNN provides pre-compiled BLAS/CK/DNN kernels for + # that GPU family. Packages for old GCN arches (gfx803/900/906) may not + # exist in the ROCm 7.x repo; the || true skips unavailable packages. + for _arch in $(echo "${ROCM_ARCH}" | tr ',' ' '); do \ + apt-get install -y --no-install-recommends "amdrocm-core-sdk-${_arch}" || \ + echo "Note: amdrocm-core-sdk-${_arch} not available in ROCm 7.x repo, skipping" >&2 ; \ + done && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ # ROCm 7.x installs to /opt/rocm/core-7.XX/ with update-alternatives managing diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 00a2f61e252c..eec99e20c725 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -17,8 +17,10 @@ ARG UBUNTU_VERSION=2404 # ROCM_VERSION: major ROCm version. '6' = ROCm 6.x (hipblas-dev/rocblas-dev from # rocm/dev-ubuntu base image); '7' = ROCm 7.x (amdrocm-* packages from AMD apt repo). ARG ROCM_VERSION=6 -# ROCM_ARCH: GPU-family for ROCm 7.x arch-specific metapackage (amdrocm-core-sdk-gfxNNNN). -ARG ROCM_ARCH=gfx1151 +# ROCM_ARCH: comma-separated GPU architecture targets for ROCm 7.x. +# Default: all GPU architectures supported by ROCm 7.12. +# Override to a single arch for smaller images, e.g. ROCM_ARCH=gfx1151. +ARG ROCM_ARCH=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -159,7 +161,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ apt-get update && \ - apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-core-sdk-${ROCM_ARCH} && \ + apt-get install -y --no-install-recommends amdrocm-llvm && \ + for _arch in $(echo "${ROCM_ARCH}" | tr ',' ' '); do \ + apt-get install -y --no-install-recommends "amdrocm-core-sdk-${_arch}" || \ + echo "Note: amdrocm-core-sdk-${_arch} not available, skipping" >&2 ; \ + done && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index ea89328a8f6d..7bb22fd6d601 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -74,8 +74,10 @@ ARG UBUNTU_VERSION=2404 # ROCM_VERSION: major ROCm version. '6' = ROCm 6.x (hipblas-dev/rocblas-dev from # rocm/dev-ubuntu base image); '7' = ROCm 7.x (amdrocm-* packages from AMD apt repo). ARG ROCM_VERSION=6 -# ROCM_ARCH: GPU-family for ROCm 7.x arch-specific metapackage (amdrocm-core-sdk-gfxNNNN). -ARG ROCM_ARCH=gfx1151 +# ROCM_ARCH: comma-separated GPU architecture targets for ROCm 7.x. +# Default: all GPU architectures supported by ROCm 7.12. +# Override to a single arch for smaller images, e.g. ROCM_ARCH=gfx1151. +ARG ROCM_ARCH=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -220,7 +222,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then # amdrocm-core-sdk-${ROCM_ARCH}: GPU-family metapackage with BLAS kernel objects. # In ROCm 7.x the old hipblas-dev/rocblas-dev packages no longer exist; each GPU # family gets its own amdrocm-core-sdk-gfxNNNN package instead. - apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-core-sdk-${ROCM_ARCH} && \ + apt-get install -y --no-install-recommends amdrocm-llvm && \ + for _arch in $(echo "${ROCM_ARCH}" | tr ',' ' '); do \ + apt-get install -y --no-install-recommends "amdrocm-core-sdk-${_arch}" || \ + echo "Note: amdrocm-core-sdk-${_arch} not available, skipping" >&2 ; \ + done && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ # ROCm 7.x installs to /opt/rocm/core-7.XX/ with update-alternatives managing diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index 5f019dfbb9b7..808218eff814 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -16,8 +16,10 @@ ARG UBUNTU_VERSION=2404 # ROCM_VERSION: major ROCm version. '6' = ROCm 6.x (hipblas-dev/rocblas-dev from # rocm/dev-ubuntu base image); '7' = ROCm 7.x (amdrocm-* packages from AMD apt repo). ARG ROCM_VERSION=6 -# ROCM_ARCH: GPU-family for ROCm 7.x arch-specific metapackage (amdrocm-core-sdk-gfxNNNN). -ARG ROCM_ARCH=gfx1151 +# ROCM_ARCH: comma-separated GPU architecture targets for ROCm 7.x. +# Default: all GPU architectures supported by ROCm 7.12. +# Override to a single arch for smaller images, e.g. ROCM_ARCH=gfx1151. +ARG ROCM_ARCH=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -173,7 +175,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ apt-get update && \ - apt-get install -y --no-install-recommends amdrocm-llvm amdrocm-core-sdk-${ROCM_ARCH} && \ + apt-get install -y --no-install-recommends amdrocm-llvm && \ + for _arch in $(echo "${ROCM_ARCH}" | tr ',' ' '); do \ + apt-get install -y --no-install-recommends "amdrocm-core-sdk-${_arch}" || \ + echo "Note: amdrocm-core-sdk-${_arch} not available, skipping" >&2 ; \ + done && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index b1ef1861d1e8..17121508fc1a 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -33,7 +33,9 @@ else ifeq ($(BUILD_TYPE),hipblas) ROCM_PATH ?= /opt/rocm export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CC=$(ROCM_HOME)/llvm/bin/clang - AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201 + # Full list of GPU architectures supported by ROCm 7.12 (https://rocm.docs.amd.com/en/7.12.0-preview/). + # gfx1010 was removed — it was dropped in ROCm 6.x and is not present in 7.x. + AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 # -DHIP_PLATFORM=amd is required for ROCm 7.x cmake find_package(hip) to locate the HIP SDK. # -DGGML_HIP_ROCWMMA_FATTN=ON enables rocWMMA-accelerated Flash Attention on RDNA 3+ (gfx1100+). CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) -DHIP_PLATFORM=amd -DROCM_PATH=$(ROCM_PATH) -DCMAKE_PREFIX_PATH=$(ROCM_PATH) -DGGML_HIP_ROCWMMA_FATTN=ON From 5f528de49f16196ea58351410694f6590dac4908 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 19:29:03 +0200 Subject: [PATCH 06/13] fix(llama-cpp): always remove stale variant build dirs before make MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The llama-cpp-fallback-build/ and llama-cpp-grpc-build/ directories each contain their own llama.cpp/ subdirectory copied from the host. When CUDA_DOCKER_ARCH is unset (the normal case for ROCm/hipblas builds), the rm -rf was skipped, causing the Makefile to reuse a stale llama.cpp checkout that lacked common/chat-auto-parser.h — resulting in a build failure. Move the unconditional cleanup before the CUDA_DOCKER_ARCH block so all build paths benefit from a clean slate. Also update scripts/build-rocm.sh default ROCM_ARCH to the full ROCm 7.12 architecture list instead of the single gfx1151 value, consistent with the other Dockerfile defaults. Co-Authored-By: Claude Sonnet 4.6 --- backend/Dockerfile.llama-cpp | 5 ++++- scripts/build-rocm.sh | 10 +++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index 7bb22fd6d601..dc834432342c 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -309,11 +309,14 @@ COPY . /LocalAI RUN <<'EOT' bash set -euxo pipefail +# Always remove stale variant build dirs so the Makefile re-clones llama.cpp +# at the correct commit instead of reusing a stale host-side checkout. +rm -rf /LocalAI/backend/cpp/llama-cpp-*-build + if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}" export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}" echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}" - rm -rf /LocalAI/backend/cpp/llama-cpp-*-build fi if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then diff --git a/scripts/build-rocm.sh b/scripts/build-rocm.sh index 1eae4eb0c790..c6cb4b057eb5 100755 --- a/scripts/build-rocm.sh +++ b/scripts/build-rocm.sh @@ -1,22 +1,26 @@ #!/bin/bash # ============================================================================= -# LocalAI ROCm 7.x Rebuild Script — gfx1151 Fork +# LocalAI ROCm 7.x Rebuild Script # ============================================================================= # Builds (and optionally pushes) the main LocalAI image plus all ROCm 7.x # backend images. No git sync — use sync-upstream.sh for merge + build + push. # +# By default builds for ALL GPU architectures supported by ROCm 7.12. +# Override ROCM_ARCH to a subset for faster/smaller local builds, e.g.: +# ROCM_ARCH=gfx1151 bash scripts/build-rocm.sh +# # Usage: # bash scripts/build-rocm.sh # build all + push # bash scripts/build-rocm.sh --no-push # build all, no registry push # bash scripts/build-rocm.sh --no-backends # main image only # ROCM_VERSION=7.13 bash scripts/build-rocm.sh -# ROCM_ARCH=gfx1150 bash scripts/build-rocm.sh +# ROCM_ARCH=gfx1150,gfx1151 bash scripts/build-rocm.sh # ============================================================================= set -euo pipefail REGISTRY="${REGISTRY:-192.168.178.127:5000}" ROCM_VERSION="${ROCM_VERSION:-7.12}" -ROCM_ARCH="${ROCM_ARCH:-gfx1151}" +ROCM_ARCH="${ROCM_ARCH:-gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201}" NO_PUSH=false NO_BACKENDS=false From 76c1f9832d5b0fa2f5a89553bd187d088f0a9f07 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 19:37:49 +0200 Subject: [PATCH 07/13] fix(llama-cpp): remove GGML_HIP_ROCWMMA_FATTN for multi-arch fat binary builds rocWMMA does not support all GPU architectures in the AMDGPU_TARGETS list (e.g. gfx1103/Phoenix APU is unsupported), causing a build failure in fattn-wmma-f16.cu with "Unsupported architecture" when building a fat binary that covers all ROCm 7.12 supported GPUs. The WMMA FlashAttention implementation is itself marked deprecated in llama.cpp source. Keep GGML_HIP_ROCWMMA_FATTN at its cmake default (OFF) so the build works for all architectures. Co-Authored-By: Claude Sonnet 4.6 --- backend/cpp/llama-cpp/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 17121508fc1a..5c6efabeda34 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -37,8 +37,10 @@ else ifeq ($(BUILD_TYPE),hipblas) # gfx1010 was removed — it was dropped in ROCm 6.x and is not present in 7.x. AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 # -DHIP_PLATFORM=amd is required for ROCm 7.x cmake find_package(hip) to locate the HIP SDK. - # -DGGML_HIP_ROCWMMA_FATTN=ON enables rocWMMA-accelerated Flash Attention on RDNA 3+ (gfx1100+). - CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) -DHIP_PLATFORM=amd -DROCM_PATH=$(ROCM_PATH) -DCMAKE_PREFIX_PATH=$(ROCM_PATH) -DGGML_HIP_ROCWMMA_FATTN=ON + # GGML_HIP_ROCWMMA_FATTN is left at its cmake default (OFF) because rocWMMA does not + # support all architectures in the AMDGPU_TARGETS list (e.g. gfx1103 is unsupported), + # which would cause a build failure in fattn-wmma-f16.cu for multi-arch fat binaries. + CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) -DHIP_PLATFORM=amd -DROCM_PATH=$(ROCM_PATH) -DCMAKE_PREFIX_PATH=$(ROCM_PATH) else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DGGML_VULKAN=1 else ifeq ($(OS),Darwin) From b2a10f167b5609916f9606422fa5170321cee022 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 19:49:17 +0200 Subject: [PATCH 08/13] fix(llama-cpp): restrict AMDGPU_TARGETS to rocWMMA-compatible arches GGML_HIP_ROCWMMA_FATTN=ON requires rocWMMA support at compile time. rocWMMA (https://github.com/ROCm/rocWMMA) only supports a subset of the GPU architectures listed in ROCm 7.12, specifically: gfx908, gfx90a, gfx942, gfx950 (CDNA) gfx1100, gfx1101, gfx1102 (RDNA 3) gfx1150, gfx1151 (RDNA 3.5) gfx1200, gfx1201 (RDNA 4) Architectures without rocWMMA support (gfx803, gfx900, gfx906, gfx1012, gfx1030-gfx1032, gfx1103, gfx1152) trigger static_assert(0, "Unsupported architecture") in the rocWMMA headers when included in a multi-arch fat binary build, causing a build failure. Set the default AMDGPU_TARGETS to the rocWMMA-compatible subset so GGML_HIP_ROCWMMA_FATTN=ON can be kept for its ~50% FlashAttention speedup. Users who need support for older RDNA2/GCN hardware can override: make AMDGPU_TARGETS= (and remove -DGGML_HIP_ROCWMMA_FATTN=ON) Co-Authored-By: Claude Sonnet 4.6 --- backend/cpp/llama-cpp/Makefile | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 5c6efabeda34..f4a5d974c661 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -33,14 +33,18 @@ else ifeq ($(BUILD_TYPE),hipblas) ROCM_PATH ?= /opt/rocm export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CC=$(ROCM_HOME)/llvm/bin/clang - # Full list of GPU architectures supported by ROCm 7.12 (https://rocm.docs.amd.com/en/7.12.0-preview/). - # gfx1010 was removed — it was dropped in ROCm 6.x and is not present in 7.x. - AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 + # AMDGPU_TARGETS: GPU architectures that support rocWMMA (https://github.com/ROCm/rocWMMA). + # rocWMMA is required for GGML_HIP_ROCWMMA_FATTN below (~50% FlashAttention speedup). + # Architectures NOT in rocWMMA (gfx803, gfx900, gfx906, gfx1012, gfx1030-gfx1032, + # gfx1103, gfx1152) are excluded because -DGGML_HIP_ROCWMMA_FATTN=ON would trigger + # a static_assert("Unsupported architecture") in the rocWMMA headers for those targets. + # To build for all ROCm 7.12 GPUs at the cost of the rocWMMA optimisation, override with: + # make ... AMDGPU_TARGETS=gfx803,...,gfx1201 and remove -DGGML_HIP_ROCWMMA_FATTN=ON + AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1100,gfx1101,gfx1102,gfx1150,gfx1151,gfx1200,gfx1201 # -DHIP_PLATFORM=amd is required for ROCm 7.x cmake find_package(hip) to locate the HIP SDK. - # GGML_HIP_ROCWMMA_FATTN is left at its cmake default (OFF) because rocWMMA does not - # support all architectures in the AMDGPU_TARGETS list (e.g. gfx1103 is unsupported), - # which would cause a build failure in fattn-wmma-f16.cu for multi-arch fat binaries. - CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) -DHIP_PLATFORM=amd -DROCM_PATH=$(ROCM_PATH) -DCMAKE_PREFIX_PATH=$(ROCM_PATH) + # -DGGML_HIP_ROCWMMA_FATTN=ON enables rocWMMA-accelerated FlashAttention (~50% speedup + # on supported RDNA 3+ and CDNA 2+ architectures listed in AMDGPU_TARGETS above). + CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) -DHIP_PLATFORM=amd -DROCM_PATH=$(ROCM_PATH) -DCMAKE_PREFIX_PATH=$(ROCM_PATH) -DGGML_HIP_ROCWMMA_FATTN=ON else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DGGML_VULKAN=1 else ifeq ($(OS),Darwin) From f277282c5532512a0db917ea0ff341d5792c6338 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 20:21:47 +0200 Subject: [PATCH 09/13] refactor(scripts): rename image tags from gfx1151 to rocm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ROCm 7.x build is no longer GPU-arch-specific — it supports all rocWMMA-compatible architectures from gfx908 to gfx1201. The old gfx1151-rocm tag naming was misleading. New scheme: rocm (e.g. rocm7.12), matching the paradigm shift where ROCm 7.x uses entirely new amdrocm-* packages vs ROCm 6.x. Also: - Update ROCM_ARCH defaults to full ROCm 7.12 arch list in sync-upstream.sh - Use generic deploy command (docker compose pull/up) in final summary Co-Authored-By: Claude Sonnet 4.6 --- scripts/build-rocm.sh | 17 ++++++++++++----- sync-upstream.sh | 33 +++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/scripts/build-rocm.sh b/scripts/build-rocm.sh index c6cb4b057eb5..3cc27a39395a 100755 --- a/scripts/build-rocm.sh +++ b/scripts/build-rocm.sh @@ -5,6 +5,10 @@ # Builds (and optionally pushes) the main LocalAI image plus all ROCm 7.x # backend images. No git sync — use sync-upstream.sh for merge + build + push. # +# Tags use the scheme rocm (e.g. rocm7.12), not a GPU-specific +# name, because ROCm 7.x is a distinct build/install paradigm vs. ROCm 6.x and +# the resulting images support all rocWMMA-capable architectures. +# # By default builds for ALL GPU architectures supported by ROCm 7.12. # Override ROCM_ARCH to a subset for faster/smaller local builds, e.g.: # ROCM_ARCH=gfx1151 bash scripts/build-rocm.sh @@ -58,7 +62,9 @@ BACKENDS=( "coqui|python" ) -OUR_SUFFIX="gfx1151-rocm${ROCM_VERSION}" +# Tag suffix: rocm reflects the ROCm 7.x build paradigm +# (new amdrocm-* packages, different install path) not a specific GPU arch. +OUR_SUFFIX="rocm${ROCM_VERSION}" BUILD_SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "local") UPSTREAM_VERSION=$(git describe --tags --abbrev=0 upstream/master 2>/dev/null \ || git describe --tags --abbrev=0 2>/dev/null \ @@ -135,8 +141,9 @@ fi # --------------------------------------------------------------------------- echo -e "\n${BOLD}=== Push main image ===${NC}" +ROCM_MAJOR="${ROCM_VERSION%%.*}" # e.g. "7" from "7.12" REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" -REGISTRY_LATEST="${REGISTRY}/localai:latest-gfx1151" +REGISTRY_LATEST="${REGISTRY}/localai:latest-rocm${ROCM_MAJOR}" docker tag "$LOCAL_IMAGE" "$REGISTRY_IMAGE" docker tag "$LOCAL_IMAGE" "$REGISTRY_LATEST" @@ -158,7 +165,7 @@ if [ "$NO_BACKENDS" = "false" ]; then fi reg_versioned="${REGISTRY}/localai-backends:${IMAGE_TAG}-${backend}" - reg_latest="${REGISTRY}/localai-backends:latest-gfx1151-${backend}" + reg_latest="${REGISTRY}/localai-backends:latest-rocm${ROCM_MAJOR}-${backend}" docker tag "$local_tag" "$reg_versioned" docker tag "$local_tag" "$reg_latest" @@ -172,8 +179,8 @@ echo -e "\n${GREEN}${BOLD}=== Done ===${NC}" echo -e " Main image: ${GREEN}$REGISTRY_IMAGE${NC}" echo -e " Latest: ${GREEN}$REGISTRY_LATEST${NC}" if [ "$NO_BACKENDS" = "false" ]; then - echo -e " Backends: ${GREEN}${#BACKENDS[@]} images tagged as latest-gfx1151-${NC}" + echo -e " Backends: ${GREEN}${#BACKENDS[@]} images tagged as latest-rocm${ROCM_MAJOR}-${NC}" fi echo "" echo -e " Deploy:" -echo -e " ${YELLOW}docker compose -f docker-compose-gfx1151.yaml up -d localai --force-recreate${NC}" +echo -e " ${YELLOW}docker compose pull localai && docker compose up -d localai --force-recreate${NC}" diff --git a/sync-upstream.sh b/sync-upstream.sh index 762530de9b90..4de3809cc49e 100755 --- a/sync-upstream.sh +++ b/sync-upstream.sh @@ -1,6 +1,6 @@ #!/bin/bash # ============================================================================= -# LocalAI Upstream Sync — gfx1151 / ROCm 7.x Fork +# LocalAI Upstream Sync — ROCm 7.x Fork # ============================================================================= # Usage: # bash sync-upstream.sh # fetch + merge + build all + push @@ -9,14 +9,18 @@ # bash sync-upstream.sh --no-backends # main image only, skip backend images # ROCM_VERSION=7.13 bash sync-upstream.sh # +# Image tags use rocm (e.g. rocm7.12), not a GPU-specific name, +# because ROCm 7.x is a distinct build/install paradigm from ROCm 6.x and the +# resulting images support all rocWMMA-capable architectures. +# # Image tags (main image): -# /localai:-gfx1151-rocm- (build-specific) -# /localai:-gfx1151-rocm (rolling per version) -# /localai:latest-gfx1151 (rolling latest) +# /localai:-rocm- (build-specific) +# /localai:-rocm (rolling per version) +# /localai:latest-rocm (rolling latest, e.g. latest-rocm7) # # Image tags (each backend): -# /localai-backends:-gfx1151-rocm-- -# /localai-backends:latest-gfx1151- +# /localai-backends:-rocm-- +# /localai-backends:latest-rocm- # # ROCm version changes only when a new ROCm release ships — override via env var. # On conflict: script stops, resolve manually, git commit, then re-run. @@ -25,7 +29,7 @@ set -euo pipefail REGISTRY="${REGISTRY:-192.168.178.127:5000}" ROCM_VERSION="${ROCM_VERSION:-7.12}" -ROCM_ARCH="${ROCM_ARCH:-gfx1151}" +ROCM_ARCH="${ROCM_ARCH:-gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201}" UPSTREAM_REMOTE="${UPSTREAM_REMOTE:-upstream}" UPSTREAM_BRANCH="${UPSTREAM_BRANCH:-master}" DRY_RUN=false @@ -69,7 +73,7 @@ BACKENDS=( "coqui|python" ) -OUR_SUFFIX="gfx1151-rocm${ROCM_VERSION}" +OUR_SUFFIX="rocm${ROCM_VERSION}" # --------------------------------------------------------------------------- echo -e "${BOLD}=== 1. Upstream fetch ===${NC}" @@ -89,7 +93,7 @@ else # ------------------------------------------------------------------------- echo -e "\n${BOLD}=== 2. Merge upstream/$UPSTREAM_BRANCH ===${NC}" if ! git merge "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH" --no-edit \ - -m "chore: merge upstream $UPSTREAM_VERSION into gfx1151 fork"; then + -m "chore: merge upstream $UPSTREAM_VERSION into rocm7 fork"; then echo -e "\n${RED}✗ Merge conflicts! Manual resolution required:${NC}" echo "" git diff --name-only --diff-filter=U @@ -141,7 +145,7 @@ LOCAL_IMAGE="localai:${OUR_SUFFIX}" echo -e "\n${BOLD}=== 4. Build main image ===${NC}" echo -e " Build tag: ${YELLOW}$IMAGE_TAG${NC}" echo -e " Version tag: ${YELLOW}$VERSION_TAG${NC}" -echo -e " Latest tag: ${YELLOW}latest-gfx1151${NC}" +echo -e " Latest tag: ${YELLOW}latest-rocm${ROCM_VERSION%%.*}${NC}" docker build \ --build-arg BUILD_TYPE=hipblas \ @@ -204,9 +208,10 @@ fi # --------------------------------------------------------------------------- echo -e "\n${BOLD}=== 6. Push main image ===${NC}" +ROCM_MAJOR="${ROCM_VERSION%%.*}" # e.g. "7" from "7.12" REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" REGISTRY_VERSION="${REGISTRY}/localai:${VERSION_TAG}" -REGISTRY_LATEST="${REGISTRY}/localai:latest-gfx1151" +REGISTRY_LATEST="${REGISTRY}/localai:latest-rocm${ROCM_MAJOR}" docker tag "$LOCAL_IMAGE" "$REGISTRY_IMAGE" docker tag "$LOCAL_IMAGE" "$REGISTRY_VERSION" @@ -231,7 +236,7 @@ if [ "$NO_BACKENDS" = "false" ]; then fi reg_versioned="${REGISTRY}/localai-backends:${IMAGE_TAG}-${backend}" - reg_latest="${REGISTRY}/localai-backends:latest-gfx1151-${backend}" + reg_latest="${REGISTRY}/localai-backends:latest-rocm${ROCM_MAJOR}-${backend}" docker tag "$local_tag" "$reg_versioned" docker tag "$local_tag" "$reg_latest" @@ -258,11 +263,11 @@ echo -e " ROCm: ${GREEN}$ROCM_VERSION / $ROCM_ARCH${NC}" echo -e " Main image: ${GREEN}$REGISTRY_IMAGE${NC}" echo -e " Latest: ${GREEN}$REGISTRY_LATEST${NC}" if [ "$NO_BACKENDS" = "false" ]; then - echo -e " Backends: ${GREEN}${#BACKENDS[@]} images tagged as latest-gfx1151-${NC}" + echo -e " Backends: ${GREEN}${#BACKENDS[@]} images tagged as latest-rocm${ROCM_MAJOR}-${NC}" fi echo "" echo -e " Deploy:" -echo -e " ${YELLOW}docker compose -f docker-compose-gfx1151.yaml up -d localai --force-recreate${NC}" +echo -e " ${YELLOW}docker compose pull localai && docker compose up -d localai --force-recreate${NC}" echo "" echo -e " Next run:" echo -e " ${YELLOW}bash sync-upstream.sh${NC}" From 1e676489f6e891d701c83e11aacf2f4ddcba9881 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 20:21:53 +0200 Subject: [PATCH 10/13] fix(Dockerfile.llama-cpp): resolve CPATH lint warning in ENV instruction Use ARG CPATH="" + ENV CPATH=... to satisfy Docker BuildKit's undefined variable lint check, which warned about \${CPATH:-} having no prior definition in the Dockerfile scope. Co-Authored-By: Claude Sonnet 4.6 --- backend/Dockerfile.llama-cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index dc834432342c..dfb938d58c4f 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -264,7 +264,8 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then # Set CPATH so the compiler finds rocWMMA headers during compilation. # This ENV is a no-op on non-hipblas builds (the path does not exist there). -ENV CPATH=/opt/rocwmma-headers:${CPATH:-} +ARG CPATH="" +ENV CPATH=/opt/rocwmma-headers:${CPATH} RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ From ad3a7149ab7e9f9ccdf30f738bc0193ae0591ec0 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sat, 4 Apr 2026 20:29:18 +0200 Subject: [PATCH 11/13] feat(scripts): push to both local registries on local builds Add REGISTRY2 (pointblank.ddns.net:5556) alongside the primary local registry. All docker push operations in sync-upstream.sh and scripts/build-rocm.sh now push to both REGISTRY and REGISTRY2 so images are reachable from inside and outside the LAN. Both registries can be overridden via REGISTRY / REGISTRY2 env vars. NOTE: These scripts are local fork utilities and are NOT part of the upstream PR (they contain private network addresses). Co-Authored-By: Claude Sonnet 4.6 --- scripts/build-rocm.sh | 29 ++++++++++++++++------------- sync-upstream.sh | 28 +++++++++++++++------------- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/scripts/build-rocm.sh b/scripts/build-rocm.sh index 3cc27a39395a..4fd56e3561f1 100755 --- a/scripts/build-rocm.sh +++ b/scripts/build-rocm.sh @@ -23,6 +23,7 @@ set -euo pipefail REGISTRY="${REGISTRY:-192.168.178.127:5000}" +REGISTRY2="${REGISTRY2:-pointblank.ddns.net:5556}" ROCM_VERSION="${ROCM_VERSION:-7.12}" ROCM_ARCH="${ROCM_ARCH:-gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201}" NO_PUSH=false @@ -142,14 +143,19 @@ fi echo -e "\n${BOLD}=== Push main image ===${NC}" ROCM_MAJOR="${ROCM_VERSION%%.*}" # e.g. "7" from "7.12" -REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" -REGISTRY_LATEST="${REGISTRY}/localai:latest-rocm${ROCM_MAJOR}" -docker tag "$LOCAL_IMAGE" "$REGISTRY_IMAGE" -docker tag "$LOCAL_IMAGE" "$REGISTRY_LATEST" +push_image() { + local local_tag="$1" remote_tag="$2" + docker tag "$local_tag" "$remote_tag" + docker push "$remote_tag" && echo -e " ${GREEN}✓ $remote_tag${NC}" +} -docker push "$REGISTRY_IMAGE" && echo -e " ${GREEN}✓ $REGISTRY_IMAGE${NC}" -docker push "$REGISTRY_LATEST" && echo -e " ${GREEN}✓ $REGISTRY_LATEST${NC}" +for REG in "$REGISTRY" "$REGISTRY2"; do + push_image "$LOCAL_IMAGE" "${REG}/localai:${IMAGE_TAG}" + push_image "$LOCAL_IMAGE" "${REG}/localai:latest-rocm${ROCM_MAJOR}" +done +REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" +REGISTRY_LATEST="${REGISTRY}/localai:latest-rocm${ROCM_MAJOR}" # --------------------------------------------------------------------------- if [ "$NO_BACKENDS" = "false" ]; then @@ -164,13 +170,10 @@ if [ "$NO_BACKENDS" = "false" ]; then continue fi - reg_versioned="${REGISTRY}/localai-backends:${IMAGE_TAG}-${backend}" - reg_latest="${REGISTRY}/localai-backends:latest-rocm${ROCM_MAJOR}-${backend}" - - docker tag "$local_tag" "$reg_versioned" - docker tag "$local_tag" "$reg_latest" - docker push "$reg_versioned" && echo -e " ${GREEN}✓ $reg_versioned${NC}" - docker push "$reg_latest" && echo -e " ${GREEN}✓ $reg_latest${NC}" + for REG in "$REGISTRY" "$REGISTRY2"; do + push_image "$local_tag" "${REG}/localai-backends:${IMAGE_TAG}-${backend}" + push_image "$local_tag" "${REG}/localai-backends:latest-rocm${ROCM_MAJOR}-${backend}" + done done fi diff --git a/sync-upstream.sh b/sync-upstream.sh index 4de3809cc49e..01e396809000 100755 --- a/sync-upstream.sh +++ b/sync-upstream.sh @@ -28,6 +28,7 @@ set -euo pipefail REGISTRY="${REGISTRY:-192.168.178.127:5000}" +REGISTRY2="${REGISTRY2:-pointblank.ddns.net:5556}" ROCM_VERSION="${ROCM_VERSION:-7.12}" ROCM_ARCH="${ROCM_ARCH:-gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201}" UPSTREAM_REMOTE="${UPSTREAM_REMOTE:-upstream}" @@ -213,13 +214,17 @@ REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" REGISTRY_VERSION="${REGISTRY}/localai:${VERSION_TAG}" REGISTRY_LATEST="${REGISTRY}/localai:latest-rocm${ROCM_MAJOR}" -docker tag "$LOCAL_IMAGE" "$REGISTRY_IMAGE" -docker tag "$LOCAL_IMAGE" "$REGISTRY_VERSION" -docker tag "$LOCAL_IMAGE" "$REGISTRY_LATEST" +push_image() { + local local_tag="$1" remote_tag="$2" + docker tag "$local_tag" "$remote_tag" + docker push "$remote_tag" && echo -e " ${GREEN}✓ $remote_tag${NC}" +} -docker push "$REGISTRY_IMAGE" && echo -e " ${GREEN}✓ $REGISTRY_IMAGE${NC}" -docker push "$REGISTRY_VERSION" && echo -e " ${GREEN}✓ $REGISTRY_VERSION${NC}" -docker push "$REGISTRY_LATEST" && echo -e " ${GREEN}✓ $REGISTRY_LATEST${NC}" +for REG in "$REGISTRY" "$REGISTRY2"; do + push_image "$LOCAL_IMAGE" "${REG}/localai:${IMAGE_TAG}" + push_image "$LOCAL_IMAGE" "${REG}/localai:${VERSION_TAG}" + push_image "$LOCAL_IMAGE" "${REG}/localai:latest-rocm${ROCM_MAJOR}" +done # --------------------------------------------------------------------------- if [ "$NO_BACKENDS" = "false" ]; then @@ -235,13 +240,10 @@ if [ "$NO_BACKENDS" = "false" ]; then continue fi - reg_versioned="${REGISTRY}/localai-backends:${IMAGE_TAG}-${backend}" - reg_latest="${REGISTRY}/localai-backends:latest-rocm${ROCM_MAJOR}-${backend}" - - docker tag "$local_tag" "$reg_versioned" - docker tag "$local_tag" "$reg_latest" - docker push "$reg_versioned" && echo -e " ${GREEN}✓ $reg_versioned${NC}" - docker push "$reg_latest" && echo -e " ${GREEN}✓ $reg_latest${NC}" + for REG in "$REGISTRY" "$REGISTRY2"; do + push_image "$local_tag" "${REG}/localai-backends:${IMAGE_TAG}-${backend}" + push_image "$local_tag" "${REG}/localai-backends:latest-rocm${ROCM_MAJOR}-${backend}" + done done fi From c1c8fa594023aedb8388e615855f12667586a37d Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sun, 5 Apr 2026 00:33:03 +0200 Subject: [PATCH 12/13] fix(rocm7): bundle rocm_sysdeps and hipblaslt libs in backend package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ROCm 7.x moved bundled libc replacements (elf, drm, zstd, lzma, bz2, etc.) under core-X.Y/lib/rocm_sysdeps/lib/ which is not in any standard library search path. When the backend binary is executed via the custom ld.so in the package's lib/ directory, RPATH is not honoured for these paths — only LD_LIBRARY_PATH and the bundled lib/ dir are searched. Fixes the 'librocm_sysdeps_elf.so.1: cannot open shared object file' error that prevented the llama-cpp gRPC service from starting on ROCm 7.x. Also adds hipblaslt, amd_comgr_loader, rocroller, rocprofiler-register to the bundled library list (all present in ROCm 7.x, absent from 6.x list). Co-Authored-By: Claude Sonnet 4.6 --- scripts/build/package-gpu-libs.sh | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/scripts/build/package-gpu-libs.sh b/scripts/build/package-gpu-libs.sh index 8fc2a59c8599..e560d8d80fcf 100755 --- a/scripts/build/package-gpu-libs.sh +++ b/scripts/build/package-gpu-libs.sh @@ -166,17 +166,24 @@ package_rocm_libs() { "/opt/rocm/hip/lib" ) - # Find the actual ROCm versioned directory - for rocm_dir in /opt/rocm-*; do + # Find the actual ROCm versioned directory (supports both rocm-X.Y and core-X.Y layouts) + for rocm_dir in /opt/rocm-* /opt/rocm/core-*; do if [ -d "$rocm_dir/lib" ]; then rocm_lib_paths+=("$rocm_dir/lib") fi + # ROCm 7.x bundles sysdeps (elf, drm, zstd, etc.) under core-X.Y/lib/rocm_sysdeps/lib/ + # These are NOT in the standard lib path and are NOT reached via RPATH when a custom + # ld.so is used to execute the backend binary, so they must be bundled explicitly. + if [ -d "$rocm_dir/lib/rocm_sysdeps/lib" ]; then + rocm_lib_paths+=("$rocm_dir/lib/rocm_sysdeps/lib") + fi done # Core ROCm/HIP runtime libraries local rocm_libs=( "libamdhip64.so*" "libhipblas.so*" + "libhipblaslt.so*" "librocblas.so*" "librocrand.so*" "librocsparse.so*" @@ -186,8 +193,13 @@ package_rocm_libs() { "libroctx64.so*" "libhsa-runtime64.so*" "libamd_comgr.so*" + "libamd_comgr_loader.so*" "libhip_hcc.so*" "libhiprtc.so*" + "librocroller.so*" + "librocprofiler-register.so*" + # ROCm 7.x sysdeps — bundled libc replacements (elf, drm, zstd, lzma, bz2, etc.) + "librocm_sysdeps_*.so*" ) for lib_path in "${rocm_lib_paths[@]}"; do @@ -201,18 +213,23 @@ package_rocm_libs() { # Copy rocblas library data (tuning files, etc.) local old_nullglob=$(shopt -p nullglob) shopt -s nullglob - local rocm_dirs=(/opt/rocm /opt/rocm-*) + # ROCm 7.x installs to core-X.Y subdirectory; include both old and new layout + local rocm_dirs=(/opt/rocm /opt/rocm-* /opt/rocm/core-*) eval "$old_nullglob" for rocm_base in "${rocm_dirs[@]}"; do if [ -d "$rocm_base/lib/rocblas" ]; then mkdir -p "$TARGET_LIB_DIR/rocblas" cp -arfL "$rocm_base/lib/rocblas/"* "$TARGET_LIB_DIR/rocblas/" 2>/dev/null || true fi + if [ -d "$rocm_base/lib/hipblaslt" ]; then + mkdir -p "$TARGET_LIB_DIR/hipblaslt" + cp -arfL "$rocm_base/lib/hipblaslt/"* "$TARGET_LIB_DIR/hipblaslt/" 2>/dev/null || true + fi done # Copy libomp from LLVM (required for ROCm) shopt -s nullglob - local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so*) + local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so* /opt/rocm/core-*/lib/llvm/lib/libomp.so*) eval "$old_nullglob" for omp_path in "${omp_libs[@]}"; do if [ -e "$omp_path" ]; then From 91c18c6f245b7a642f8c63c28ba51774e2d05522 Mon Sep 17 00:00:00 2001 From: stefanwalcz Date: Sun, 5 Apr 2026 00:46:55 +0200 Subject: [PATCH 13/13] fix(Dockerfile.llama-cpp): cmake before ROCm, force-overwrite DNN conflicts - Install cmake before ROCm 7.x packages: ROCm sysdeps break libarchive13t64 which cmake depends on, so cmake must be installed while apt env is clean. - Add --force-overwrite to amdrocm-core-sdk installs: multiple GPU family packages (gfx908/gfx90a/etc.) all install the same MIOpen CTestTestfile.cmake, causing dpkg conflicts when building for more than one GPU family. Co-Authored-By: Claude Sonnet 4.6 --- backend/Dockerfile.llama-cpp | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index dfb938d58c4f..25dc1fbf99a7 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -207,6 +207,20 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ rm -rf /var/lib/apt/lists/* \ ; fi +# Install CMake before ROCm packages. +# ROCm 7.x sysdeps packages break libarchive13t64 which cmake depends on, so cmake +# must be installed first while the apt environment is still clean. +RUN <&2 ; \ done && \ apt-get clean && \ @@ -289,21 +305,10 @@ RUN <