diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 0ec9bcf589f7..0806535ae2e8 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -30,6 +30,8 @@ jobs: skip-drivers: ${{ matrix.skip-drivers }} context: ${{ matrix.context }} ubuntu-version: ${{ matrix.ubuntu-version }} + rocm-version: ${{ matrix.rocm-version || '6' }} + rocm-arch: ${{ matrix.rocm-arch || 'gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201' }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -1409,6 +1411,259 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + # ROCm 7.x hipblas builds (ubuntu:24.04 base + AMD apt repo; supports gfx1151/RDNA3.5+) + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-rerankers' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rerankers" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-llama-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "llama-cpp" + dockerfile: "./backend/Dockerfile.llama-cpp" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-vllm' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "vllm" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-vllm-omni' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "vllm-omni" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-transformers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "transformers" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-diffusers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "diffusers" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-ace-step' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "ace-step" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-kokoro' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "kokoro" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-vibevoice' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "vibevoice" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-qwen-asr' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "qwen-asr" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-nemo' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "nemo" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-qwen-tts' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "qwen-tts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-fish-speech' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "fish-speech" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-voxcpm' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "voxcpm" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-pocket-tts' + runs-on: 'arc-runner-set' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "pocket-tts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-faster-whisper' + runs-on: 'bigger-runner' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "faster-whisper" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-whisperx' + runs-on: 'bigger-runner' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "whisperx" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-rocm7-hipblas-coqui' + runs-on: 'bigger-runner' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "coqui" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + rocm-version: '7' # sycl builds - build-type: 'intel' cuda-major-version: "" diff --git a/.github/workflows/backend_build.yml b/.github/workflows/backend_build.yml index 0022238c61ab..2f6f0b19e32a 100644 --- a/.github/workflows/backend_build.yml +++ b/.github/workflows/backend_build.yml @@ -58,6 +58,16 @@ on: required: false default: '2204' type: string + rocm-version: + description: 'ROCm major version (6 = ROCm 6.x with hipblas-dev/rocblas-dev, 7 = ROCm 7.x with amdrocm-* packages)' + required: false + default: '6' + type: string + rocm-arch: + description: 'Comma-separated GPU architectures for ROCm 7.x. Default covers all GPUs supported by ROCm 7.12. Override to a single arch (e.g. gfx1151) for smaller images.' + required: false + default: 'gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201' + type: string secrets: dockerUsername: required: false @@ -214,6 +224,8 @@ jobs: BASE_IMAGE=${{ inputs.base-image }} BACKEND=${{ inputs.backend }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} + ROCM_VERSION=${{ inputs.rocm-version }} + ROCM_ARCH=${{ inputs.rocm-arch }} context: ${{ inputs.context }} file: ${{ inputs.dockerfile }} cache-from: type=gha @@ -235,6 +247,8 @@ jobs: BASE_IMAGE=${{ inputs.base-image }} BACKEND=${{ inputs.backend }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} + ROCM_VERSION=${{ inputs.rocm-version }} + ROCM_ARCH=${{ inputs.rocm-arch }} context: ${{ inputs.context }} file: ${{ inputs.dockerfile }} cache-from: type=gha diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 8b672e8976d3..1df1c4f57fc7 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -29,6 +29,7 @@ makeflags: ${{ matrix.makeflags }} ubuntu-version: ${{ matrix.ubuntu-version }} ubuntu-codename: ${{ matrix.ubuntu-codename }} + rocm-version: ${{ matrix.rocm-version || '6' }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -47,6 +48,20 @@ makeflags: "--jobs=3 --output-sync=target" ubuntu-version: '2404' ubuntu-codename: 'noble' + rocm-version: '6' + # ROCm 7.x build for AMD Strix Halo / RDNA 3.5 (gfx1151) and other ROCm 7+ devices. + # Uses plain Ubuntu 24.04 and installs ROCm 7 from AMD's new apt repo (repo.amd.com). + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-hipblas-rocm7' + base-image: "ubuntu:24.04" + grpc-base-image: "ubuntu:24.04" + runs-on: 'ubuntu-latest' + makeflags: "--jobs=3 --output-sync=target" + ubuntu-version: '2404' + ubuntu-codename: 'noble' + rocm-version: '7' core-image-build: if: github.repository == 'mudler/LocalAI' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 9483239d2971..d45f8469c914 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -61,6 +61,11 @@ on: required: false default: 'noble' type: string + rocm-version: + description: 'ROCm major version (6 or 7). Controls which apt packages are installed for hipblas builds.' + required: false + default: '6' + type: string secrets: dockerUsername: required: true @@ -217,6 +222,7 @@ jobs: SKIP_DRIVERS=${{ inputs.skip-drivers }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} UBUNTU_CODENAME=${{ inputs.ubuntu-codename }} + ROCM_VERSION=${{ inputs.rocm-version }} context: . file: ./Dockerfile cache-from: type=gha @@ -246,6 +252,7 @@ jobs: SKIP_DRIVERS=${{ inputs.skip-drivers }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} UBUNTU_CODENAME=${{ inputs.ubuntu-codename }} + ROCM_VERSION=${{ inputs.rocm-version }} context: . file: ./Dockerfile cache-from: type=gha diff --git a/Dockerfile b/Dockerfile index 1567ef6f7ec2..730c6301f21b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,6 +23,18 @@ ARG CUDA_MINOR_VERSION=0 ARG SKIP_DRIVERS=false ARG TARGETARCH ARG TARGETVARIANT +# ROCM_VERSION: major version of ROCm to install. +# - "6" (default): use packages already present in the rocm/dev-ubuntu-* base image +# (hipblas-dev, rocblas-dev — the legacy names used up to ROCm 6.x) +# - "7": install from AMD's new repo.amd.com/rocm/packages/ubuntu2404 repo and use +# the new amdrocm-* package names introduced in ROCm 7.x +ARG ROCM_VERSION=6 +# ROCM_ARCH: comma-separated GPU architecture targets for ROCm 7.x. +# Controls both the apt packages installed (amdrocm-core-sdk-gfxNNNN per arch) +# and the AMDGPU_TARGETS passed to cmake/hipcc. +# Default: all GPU architectures supported by ROCm 7.12. +# Override to a single arch for smaller images, e.g. ROCM_ARCH=gfx1151. +ARG ROCM_ARCH=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 ENV BUILD_TYPE=${BUILD_TYPE} ARG UBUNTU_VERSION=2404 @@ -146,6 +158,59 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ ; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ + if [ "${ROCM_VERSION}" = "7" ]; then \ + # ROCm 7.x ships under a new apt repo with renamed packages. + # repo.amd.com/rocm/packages/ubuntu2404 uses amdrocm-* names; the old + # hipblas-dev / rocblas-dev packages no longer exist. + mkdir -p /etc/apt/keyrings && \ + apt-get update && \ + apt-get install -y --no-install-recommends wget gpg && \ + wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ + echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends git && \ + # amdrocm-llvm: ROCm LLVM/Clang toolchain (compiler for HIP kernels) + # amdrocm-core-sdk-${ROCM_ARCH}: GPU-family metapackage with BLAS kernel objects. + # In ROCm 7.x the old hipblas-dev/rocblas-dev packages no longer exist; each GPU + # family gets its own amdrocm-core-sdk-gfxNNNN package instead. + apt-get install -y --no-install-recommends amdrocm-llvm && \ + # Install arch-specific SDK packages for each GPU target in ROCM_ARCH. + # amdrocm-core-sdk-gfxNNNN provides pre-compiled BLAS/CK/DNN kernels for + # that GPU family. Packages for old GCN arches (gfx803/900/906) may not + # exist in the ROCm 7.x repo; the || true skips unavailable packages. + for _arch in $(echo "${ROCM_ARCH}" | tr ',' ' '); do \ + apt-get install -y --no-install-recommends "amdrocm-core-sdk-${_arch}" || \ + echo "Note: amdrocm-core-sdk-${_arch} not available in ROCm 7.x repo, skipping" >&2 ; \ + done && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + # ROCm 7.x installs to /opt/rocm/core-7.XX/ with update-alternatives managing + # /opt/rocm/core-7 -> /opt/rocm/core-7.XX (e.g. core-7.12). + # Create /opt/rocm/* compat symlinks so cmake find_package(hip) and existing + # linker flags work unchanged. Using core-7 (not core-7.XX) means these + # symlinks survive minor version bumps (7.11 → 7.12 etc.) automatically. + ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ + ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ + ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ + ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ + ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ + echo "amd" > /run/localai/capability && \ + ldconfig && \ + # rocWMMA (rocwmma-dev) is not available in the ROCm 7.x apt repo. + # Install headers from source so GGML_HIP_ROCWMMA_FATTN can be compiled. + # Headers go to /opt/rocwmma-headers; CPATH is set below so the compiler + # finds them without any extra cmake path gymnastics. + git clone --depth 1 https://github.com/ROCm/rocWMMA /tmp/rocwmma && \ + mkdir -p /opt/rocwmma-headers/rocwmma && \ + cp -r /tmp/rocwmma/library/include/rocwmma/. /opt/rocwmma-headers/rocwmma/ && \ + rm -rf /tmp/rocwmma && \ + # rocwmma-version.hpp is generated by cmake configure_file in the source tree. + # Write it directly to avoid pulling in cmake at this stage. + printf '#ifndef ROCWMMA_API_VERSION_HPP\n#define ROCWMMA_API_VERSION_HPP\n#define ROCWMMA_VERSION_MAJOR 2\n#define ROCWMMA_VERSION_MINOR 2\n#define ROCWMMA_VERSION_PATCH 0\n#endif\n' \ + > /opt/rocwmma-headers/rocwmma/rocwmma-version.hpp ; \ + else \ + # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. + # ROCm lib packages don't trigger ldconfig - run it manually. apt-get update && \ apt-get install -y --no-install-recommends \ hipblas-dev \ @@ -153,14 +218,17 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ echo "amd" > /run/localai/capability && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi + ldconfig ; \ + fi \ +; fi + +# Set CPATH so the compiler finds rocWMMA headers during backend compilation. +# This ENV is a no-op on non-hipblas builds (the path does not exist there). +ENV CPATH=/opt/rocwmma-headers:${CPATH:-} RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ - ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \ - ; fi + ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ +fi RUN expr "${BUILD_TYPE}" = intel && echo "intel" > /run/localai/capability || echo "not intel" @@ -372,6 +440,13 @@ ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_VISIBLE_DEVICES=all +# ROCm 7.x: libamd_comgr.so.3 depends on LLVM shared libs (libLLVM.so, libclang-cpp.so) +# that live in /opt/rocm/llvm/lib. ldconfig alone is insufficient because backend +# subprocesses load libamd_comgr via dlopen() which respects LD_LIBRARY_PATH but not +# ldconfig when the caller bypasses the standard linker. This path is a no-op on +# non-ROCm builds (the directory simply does not exist). +ENV LD_LIBRARY_PATH=/opt/rocm/llvm/lib:${LD_LIBRARY_PATH:-} + WORKDIR / COPY ./entrypoint.sh . diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 3bf15c508ea7..eec99e20c725 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -14,6 +14,13 @@ ARG TARGETARCH ARG TARGETVARIANT ARG GO_VERSION=1.25.4 ARG UBUNTU_VERSION=2404 +# ROCM_VERSION: major ROCm version. '6' = ROCm 6.x (hipblas-dev/rocblas-dev from +# rocm/dev-ubuntu base image); '7' = ROCm 7.x (amdrocm-* packages from AMD apt repo). +ARG ROCM_VERSION=6 +# ROCM_ARCH: comma-separated GPU architecture targets for ROCm 7.x. +# Default: all GPU architectures supported by ROCm 7.12. +# Override to a single arch for smaller images, e.g. ROCM_ARCH=gfx1151. +ARG ROCM_ARCH=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -144,16 +151,40 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ ; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ + if [ "${ROCM_VERSION}" = "7" ]; then \ + # ROCm 7.x ships under a new apt repo with renamed packages. + # repo.amd.com/rocm/packages/ubuntu2404 uses amdrocm-* names; the old + # hipblas-dev / rocblas-dev packages no longer exist. + mkdir -p /etc/apt/keyrings && \ + apt-get update && \ + apt-get install -y --no-install-recommends wget gpg && \ + wget -qO- https://repo.amd.com/rocm/packages/gpg/rocm.gpg | gpg --dearmor > /etc/apt/keyrings/amdrocm.gpg && \ + echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends amdrocm-llvm && \ + for _arch in $(echo "${ROCM_ARCH}" | tr ',' ' '); do \ + apt-get install -y --no-install-recommends "amdrocm-core-sdk-${_arch}" || \ + echo "Note: amdrocm-core-sdk-${_arch} not available, skipping" >&2 ; \ + done && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ + ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ + ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ + ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ + ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ + ldconfig ; \ + else \ + # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. apt-get update && \ apt-get install -y --no-install-recommends \ hipblas-dev \ rocblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi + ldconfig ; \ + fi \ +; fi # Install Go RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index 3930d04d4aba..25dc1fbf99a7 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -71,6 +71,13 @@ ARG TARGETARCH ARG TARGETVARIANT ARG GO_VERSION=1.25.4 ARG UBUNTU_VERSION=2404 +# ROCM_VERSION: major ROCm version. '6' = ROCm 6.x (hipblas-dev/rocblas-dev from +# rocm/dev-ubuntu base image); '7' = ROCm 7.x (amdrocm-* packages from AMD apt repo). +ARG ROCM_VERSION=6 +# ROCM_ARCH: comma-separated GPU architecture targets for ROCm 7.x. +# Default: all GPU architectures supported by ROCm 7.12. +# Override to a single arch for smaller images, e.g. ROCM_ARCH=gfx1151. +ARG ROCM_ARCH=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -200,17 +207,85 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ rm -rf /var/lib/apt/lists/* \ ; fi +# Install CMake before ROCm packages. +# ROCm 7.x sysdeps packages break libarchive13t64 which cmake depends on, so cmake +# must be installed first while the apt environment is still clean. +RUN < /etc/apt/keyrings/amdrocm.gpg && \ + echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + # amdrocm-llvm: ROCm LLVM/Clang toolchain (compiler for HIP kernels) + # amdrocm-core-sdk-${ROCM_ARCH}: GPU-family metapackage with BLAS kernel objects. + # In ROCm 7.x the old hipblas-dev/rocblas-dev packages no longer exist; each GPU + # family gets its own amdrocm-core-sdk-gfxNNNN package instead. + apt-get install -y --no-install-recommends amdrocm-llvm && \ + for _arch in $(echo "${ROCM_ARCH}" | tr ',' ' '); do \ + apt-get install -y --no-install-recommends \ + -o Dpkg::Options::="--force-overwrite" \ + "amdrocm-core-sdk-${_arch}" || \ + echo "Note: amdrocm-core-sdk-${_arch} not available, skipping" >&2 ; \ + done && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + # ROCm 7.x installs to /opt/rocm/core-7.XX/ with update-alternatives managing + # /opt/rocm/core-7 -> /opt/rocm/core-7.XX (e.g. core-7.12). + # Create /opt/rocm/* compat symlinks so cmake find_package(hip) and existing + # linker flags work unchanged. + ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ + ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ + ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ + ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ + ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ + ldconfig && \ + # rocWMMA (rocwmma-dev) is not available in the ROCm 7.x apt repo. + # Install headers from source so GGML_HIP_ROCWMMA_FATTN can be compiled. + git clone --depth 1 https://github.com/ROCm/rocWMMA /tmp/rocwmma && \ + mkdir -p /opt/rocwmma-headers/rocwmma && \ + cp -r /tmp/rocwmma/library/include/rocwmma/. /opt/rocwmma-headers/rocwmma/ && \ + rm -rf /tmp/rocwmma && \ + # rocwmma-version.hpp is generated by cmake configure_file in the source tree. + # Write it directly to avoid pulling in cmake at this stage. + printf '#ifndef ROCWMMA_API_VERSION_HPP\n#define ROCWMMA_API_VERSION_HPP\n#define ROCWMMA_VERSION_MAJOR 2\n#define ROCWMMA_VERSION_MINOR 2\n#define ROCWMMA_VERSION_PATCH 0\n#endif\n' \ + > /opt/rocwmma-headers/rocwmma/rocwmma-version.hpp ; \ + else \ + # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. + # ROCm lib packages don't trigger ldconfig - run it manually. apt-get update && \ apt-get install -y --no-install-recommends \ hipblas-dev \ rocblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi + ldconfig ; \ + fi \ +; fi + +# Set CPATH so the compiler finds rocWMMA headers during compilation. +# This ENV is a no-op on non-hipblas builds (the path does not exist there). +ARG CPATH="" +ENV CPATH=/opt/rocwmma-headers:${CPATH} + +RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ + ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ +fi RUN echo "TARGETARCH: $TARGETARCH" @@ -230,32 +305,24 @@ RUN < /etc/apt/keyrings/amdrocm.gpg && \ + echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/amdrocm.gpg] https://repo.amd.com/rocm/packages/ubuntu2404 stable main' > /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends amdrocm-llvm && \ + for _arch in $(echo "${ROCM_ARCH}" | tr ',' ' '); do \ + apt-get install -y --no-install-recommends "amdrocm-core-sdk-${_arch}" || \ + echo "Note: amdrocm-core-sdk-${_arch} not available, skipping" >&2 ; \ + done && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + ln -sf /opt/rocm/core-7/lib/llvm /opt/rocm/llvm && \ + ln -sf /opt/rocm/core-7/bin /opt/rocm/bin && \ + ln -sf /opt/rocm/core-7 /opt/rocm/hip && \ + ln -sf /opt/rocm/core-7/lib /opt/rocm/lib && \ + ln -sf /opt/rocm/core-7/include /opt/rocm/include && \ + ldconfig ; \ + else \ + # ROCm 6.x: packages come pre-installed in the rocm/dev-ubuntu-* base image. apt-get update && \ apt-get install -y --no-install-recommends \ hipblas-dev \ rocblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi + ldconfig ; \ + fi \ +; fi RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ - ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \ - ; fi + ln -sf /opt/rocm/llvm/lib/libomp.so /usr/lib/libomp.so ; \ +fi # Install uv as a system package RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh diff --git a/backend/cpp/llama-cpp/CMakeLists.txt b/backend/cpp/llama-cpp/CMakeLists.txt index 598461975532..698fbc143903 100644 --- a/backend/cpp/llama-cpp/CMakeLists.txt +++ b/backend/cpp/llama-cpp/CMakeLists.txt @@ -61,6 +61,12 @@ add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h) target_include_directories(${TARGET} PRIVATE ../llava) target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}) +# grpc-server lives at llama.cpp/tools/grpc-server/; common/ is two levels up +# at the llama.cpp root. Add it so that chat-auto-parser.h and its transitive +# includes (jinja/, minja/) are found without relying on cmake's include +# propagation from the common library target, which proved unreliable. +get_filename_component(LLAMA_COMMON_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../common" ABSOLUTE) +target_include_directories(${TARGET} PRIVATE "${LLAMA_COMMON_DIR}") target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto absl::flags_parse diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 82b49de1564a..f4a5d974c661 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -33,8 +33,18 @@ else ifeq ($(BUILD_TYPE),hipblas) ROCM_PATH ?= /opt/rocm export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CC=$(ROCM_HOME)/llvm/bin/clang - AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 - CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) + # AMDGPU_TARGETS: GPU architectures that support rocWMMA (https://github.com/ROCm/rocWMMA). + # rocWMMA is required for GGML_HIP_ROCWMMA_FATTN below (~50% FlashAttention speedup). + # Architectures NOT in rocWMMA (gfx803, gfx900, gfx906, gfx1012, gfx1030-gfx1032, + # gfx1103, gfx1152) are excluded because -DGGML_HIP_ROCWMMA_FATTN=ON would trigger + # a static_assert("Unsupported architecture") in the rocWMMA headers for those targets. + # To build for all ROCm 7.12 GPUs at the cost of the rocWMMA optimisation, override with: + # make ... AMDGPU_TARGETS=gfx803,...,gfx1201 and remove -DGGML_HIP_ROCWMMA_FATTN=ON + AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1100,gfx1101,gfx1102,gfx1150,gfx1151,gfx1200,gfx1201 + # -DHIP_PLATFORM=amd is required for ROCm 7.x cmake find_package(hip) to locate the HIP SDK. + # -DGGML_HIP_ROCWMMA_FATTN=ON enables rocWMMA-accelerated FlashAttention (~50% speedup + # on supported RDNA 3+ and CDNA 2+ architectures listed in AMDGPU_TARGETS above). + CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) -DHIP_PLATFORM=amd -DROCM_PATH=$(ROCM_PATH) -DCMAKE_PREFIX_PATH=$(ROCM_PATH) -DGGML_HIP_ROCWMMA_FATTN=ON else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DGGML_VULKAN=1 else ifeq ($(OS),Darwin) diff --git a/backend/cpp/llama-cpp/prepare.sh b/backend/cpp/llama-cpp/prepare.sh index f9b7e3dd2651..0a7a5375613b 100644 --- a/backend/cpp/llama-cpp/prepare.sh +++ b/backend/cpp/llama-cpp/prepare.sh @@ -21,6 +21,20 @@ cp -r grpc-server.cpp llama.cpp/tools/grpc-server/ cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/ cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/ +# Copy common/ headers into the grpc-server staging directory. +# Newer llama.cpp versions (post chat-auto-parser introduction) require +# headers from common/ (e.g. chat-auto-parser.h and its transitive deps +# jinja/, minja/) to be available alongside grpc-server.cpp. Relying solely +# on cmake include-path propagation proved fragile across build variants, so +# we stage them explicitly here. +cp -f llama.cpp/common/*.h llama.cpp/tools/grpc-server/ 2>/dev/null || true +for _subdir in jinja minja; do + if [ -d "llama.cpp/common/$_subdir" ]; then + cp -rf "llama.cpp/common/$_subdir" llama.cpp/tools/grpc-server/ + fi +done +unset _subdir + set +e if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then echo "grpc-server already added" diff --git a/scripts/build-rocm.sh b/scripts/build-rocm.sh new file mode 100755 index 000000000000..4fd56e3561f1 --- /dev/null +++ b/scripts/build-rocm.sh @@ -0,0 +1,189 @@ +#!/bin/bash +# ============================================================================= +# LocalAI ROCm 7.x Rebuild Script +# ============================================================================= +# Builds (and optionally pushes) the main LocalAI image plus all ROCm 7.x +# backend images. No git sync — use sync-upstream.sh for merge + build + push. +# +# Tags use the scheme rocm (e.g. rocm7.12), not a GPU-specific +# name, because ROCm 7.x is a distinct build/install paradigm vs. ROCm 6.x and +# the resulting images support all rocWMMA-capable architectures. +# +# By default builds for ALL GPU architectures supported by ROCm 7.12. +# Override ROCM_ARCH to a subset for faster/smaller local builds, e.g.: +# ROCM_ARCH=gfx1151 bash scripts/build-rocm.sh +# +# Usage: +# bash scripts/build-rocm.sh # build all + push +# bash scripts/build-rocm.sh --no-push # build all, no registry push +# bash scripts/build-rocm.sh --no-backends # main image only +# ROCM_VERSION=7.13 bash scripts/build-rocm.sh +# ROCM_ARCH=gfx1150,gfx1151 bash scripts/build-rocm.sh +# ============================================================================= +set -euo pipefail + +REGISTRY="${REGISTRY:-192.168.178.127:5000}" +REGISTRY2="${REGISTRY2:-pointblank.ddns.net:5556}" +ROCM_VERSION="${ROCM_VERSION:-7.12}" +ROCM_ARCH="${ROCM_ARCH:-gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201}" +NO_PUSH=false +NO_BACKENDS=false + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BOLD='\033[1m'; NC='\033[0m' + +for arg in "$@"; do + case $arg in + --no-push) NO_PUSH=true ;; + --no-backends) NO_BACKENDS=true ;; + esac +done + +# --------------------------------------------------------------------------- +# Backend list — keep in sync with sync-upstream.sh +# Format: "BACKEND_NAME|DOCKERFILE_TYPE" +# --------------------------------------------------------------------------- +BACKENDS=( + "rerankers|python" + "llama-cpp|llama-cpp" + "vllm|python" + "vllm-omni|python" + "transformers|python" + "diffusers|python" + "ace-step|python" + "kokoro|python" + "vibevoice|python" + "qwen-asr|python" + "nemo|python" + "qwen-tts|python" + "fish-speech|python" + "voxcpm|python" + "pocket-tts|python" + "faster-whisper|python" + "whisperx|python" + "coqui|python" +) + +# Tag suffix: rocm reflects the ROCm 7.x build paradigm +# (new amdrocm-* packages, different install path) not a specific GPU arch. +OUR_SUFFIX="rocm${ROCM_VERSION}" +BUILD_SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "local") +UPSTREAM_VERSION=$(git describe --tags --abbrev=0 upstream/master 2>/dev/null \ + || git describe --tags --abbrev=0 2>/dev/null \ + || echo "dev") +IMAGE_TAG="${UPSTREAM_VERSION}-${OUR_SUFFIX}-${BUILD_SHA}" +LOCAL_IMAGE="localai:${OUR_SUFFIX}" + +echo -e "${BOLD}LocalAI ROCm Rebuild${NC}" +echo -e " ROCm: ${YELLOW}$ROCM_VERSION / $ROCM_ARCH${NC}" +echo -e " Tag: ${YELLOW}$IMAGE_TAG${NC}" +echo -e " Push: $( [ "$NO_PUSH" = "true" ] && echo "${YELLOW}disabled${NC}" || echo "${GREEN}enabled → $REGISTRY${NC}" )" +echo -e " Backends: $( [ "$NO_BACKENDS" = "true" ] && echo "${YELLOW}skipped${NC}" || echo "${GREEN}${#BACKENDS[@]} images${NC}" )" + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== Build main image ===${NC}" + +docker build \ + --build-arg BUILD_TYPE=hipblas \ + --build-arg ROCM_VERSION=7 \ + --build-arg ROCM_ARCH="${ROCM_ARCH}" \ + --build-arg GPU_TARGETS="${ROCM_ARCH}" \ + -t "$LOCAL_IMAGE" \ + . 2>&1 | tee /tmp/localai-build-main.log + +echo -e " ${GREEN}✓ Main image built: $LOCAL_IMAGE${NC}" + +# --------------------------------------------------------------------------- +if [ "$NO_BACKENDS" = "false" ]; then + echo -e "\n${BOLD}=== Build backend images (${#BACKENDS[@]} total) ===${NC}" + + FAILED_BACKENDS=() + for entry in "${BACKENDS[@]}"; do + backend="${entry%%|*}" + dftype="${entry##*|}" + + case "$dftype" in + llama-cpp) dockerfile="backend/Dockerfile.llama-cpp"; backend_arg="" ;; + *) dockerfile="backend/Dockerfile.python"; backend_arg="--build-arg BACKEND=${backend}" ;; + esac + + local_tag="localai-backends:${OUR_SUFFIX}-${backend}" + echo -e "\n [${backend}] Building..." + + # shellcheck disable=SC2086 + if docker build \ + --build-arg BUILD_TYPE=hipblas \ + --build-arg ROCM_VERSION=7 \ + --build-arg ROCM_ARCH="${ROCM_ARCH}" \ + $backend_arg \ + -f "$dockerfile" \ + -t "$local_tag" \ + . 2>&1 | tee "/tmp/localai-build-${backend}.log"; then + echo -e " ${GREEN}✓ ${backend} OK${NC}" + else + echo -e " ${RED}✗ ${backend} FAILED (log: /tmp/localai-build-${backend}.log)${NC}" + FAILED_BACKENDS+=("$backend") + fi + done + + if [ "${#FAILED_BACKENDS[@]}" -gt 0 ]; then + echo -e "\n${RED}${BOLD}Failed backends:${NC} ${FAILED_BACKENDS[*]}" + echo -e "${YELLOW}Continuing push for successfully built images.${NC}" + else + echo -e "\n ${GREEN}✓ All backends built${NC}" + fi +fi + +# --------------------------------------------------------------------------- +if [ "$NO_PUSH" = "true" ]; then + echo -e "\n${YELLOW}--no-push set — done (no registry push).${NC}" + exit 0 +fi + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== Push main image ===${NC}" + +ROCM_MAJOR="${ROCM_VERSION%%.*}" # e.g. "7" from "7.12" + +push_image() { + local local_tag="$1" remote_tag="$2" + docker tag "$local_tag" "$remote_tag" + docker push "$remote_tag" && echo -e " ${GREEN}✓ $remote_tag${NC}" +} + +for REG in "$REGISTRY" "$REGISTRY2"; do + push_image "$LOCAL_IMAGE" "${REG}/localai:${IMAGE_TAG}" + push_image "$LOCAL_IMAGE" "${REG}/localai:latest-rocm${ROCM_MAJOR}" +done +REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" +REGISTRY_LATEST="${REGISTRY}/localai:latest-rocm${ROCM_MAJOR}" + +# --------------------------------------------------------------------------- +if [ "$NO_BACKENDS" = "false" ]; then + echo -e "\n${BOLD}=== Push backend images ===${NC}" + + for entry in "${BACKENDS[@]}"; do + backend="${entry%%|*}" + local_tag="localai-backends:${OUR_SUFFIX}-${backend}" + + if ! docker image inspect "$local_tag" &>/dev/null; then + echo -e " ${YELLOW}⚠ Skipping $backend (not built)${NC}" + continue + fi + + for REG in "$REGISTRY" "$REGISTRY2"; do + push_image "$local_tag" "${REG}/localai-backends:${IMAGE_TAG}-${backend}" + push_image "$local_tag" "${REG}/localai-backends:latest-rocm${ROCM_MAJOR}-${backend}" + done + done +fi + +# --------------------------------------------------------------------------- +echo -e "\n${GREEN}${BOLD}=== Done ===${NC}" +echo -e " Main image: ${GREEN}$REGISTRY_IMAGE${NC}" +echo -e " Latest: ${GREEN}$REGISTRY_LATEST${NC}" +if [ "$NO_BACKENDS" = "false" ]; then + echo -e " Backends: ${GREEN}${#BACKENDS[@]} images tagged as latest-rocm${ROCM_MAJOR}-${NC}" +fi +echo "" +echo -e " Deploy:" +echo -e " ${YELLOW}docker compose pull localai && docker compose up -d localai --force-recreate${NC}" diff --git a/scripts/build/package-gpu-libs.sh b/scripts/build/package-gpu-libs.sh index 8fc2a59c8599..e560d8d80fcf 100755 --- a/scripts/build/package-gpu-libs.sh +++ b/scripts/build/package-gpu-libs.sh @@ -166,17 +166,24 @@ package_rocm_libs() { "/opt/rocm/hip/lib" ) - # Find the actual ROCm versioned directory - for rocm_dir in /opt/rocm-*; do + # Find the actual ROCm versioned directory (supports both rocm-X.Y and core-X.Y layouts) + for rocm_dir in /opt/rocm-* /opt/rocm/core-*; do if [ -d "$rocm_dir/lib" ]; then rocm_lib_paths+=("$rocm_dir/lib") fi + # ROCm 7.x bundles sysdeps (elf, drm, zstd, etc.) under core-X.Y/lib/rocm_sysdeps/lib/ + # These are NOT in the standard lib path and are NOT reached via RPATH when a custom + # ld.so is used to execute the backend binary, so they must be bundled explicitly. + if [ -d "$rocm_dir/lib/rocm_sysdeps/lib" ]; then + rocm_lib_paths+=("$rocm_dir/lib/rocm_sysdeps/lib") + fi done # Core ROCm/HIP runtime libraries local rocm_libs=( "libamdhip64.so*" "libhipblas.so*" + "libhipblaslt.so*" "librocblas.so*" "librocrand.so*" "librocsparse.so*" @@ -186,8 +193,13 @@ package_rocm_libs() { "libroctx64.so*" "libhsa-runtime64.so*" "libamd_comgr.so*" + "libamd_comgr_loader.so*" "libhip_hcc.so*" "libhiprtc.so*" + "librocroller.so*" + "librocprofiler-register.so*" + # ROCm 7.x sysdeps — bundled libc replacements (elf, drm, zstd, lzma, bz2, etc.) + "librocm_sysdeps_*.so*" ) for lib_path in "${rocm_lib_paths[@]}"; do @@ -201,18 +213,23 @@ package_rocm_libs() { # Copy rocblas library data (tuning files, etc.) local old_nullglob=$(shopt -p nullglob) shopt -s nullglob - local rocm_dirs=(/opt/rocm /opt/rocm-*) + # ROCm 7.x installs to core-X.Y subdirectory; include both old and new layout + local rocm_dirs=(/opt/rocm /opt/rocm-* /opt/rocm/core-*) eval "$old_nullglob" for rocm_base in "${rocm_dirs[@]}"; do if [ -d "$rocm_base/lib/rocblas" ]; then mkdir -p "$TARGET_LIB_DIR/rocblas" cp -arfL "$rocm_base/lib/rocblas/"* "$TARGET_LIB_DIR/rocblas/" 2>/dev/null || true fi + if [ -d "$rocm_base/lib/hipblaslt" ]; then + mkdir -p "$TARGET_LIB_DIR/hipblaslt" + cp -arfL "$rocm_base/lib/hipblaslt/"* "$TARGET_LIB_DIR/hipblaslt/" 2>/dev/null || true + fi done # Copy libomp from LLVM (required for ROCm) shopt -s nullglob - local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so*) + local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so* /opt/rocm/core-*/lib/llvm/lib/libomp.so*) eval "$old_nullglob" for omp_path in "${omp_libs[@]}"; do if [ -e "$omp_path" ]; then diff --git a/sync-upstream.sh b/sync-upstream.sh new file mode 100755 index 000000000000..01e396809000 --- /dev/null +++ b/sync-upstream.sh @@ -0,0 +1,276 @@ +#!/bin/bash +# ============================================================================= +# LocalAI Upstream Sync — ROCm 7.x Fork +# ============================================================================= +# Usage: +# bash sync-upstream.sh # fetch + merge + build all + push +# bash sync-upstream.sh --dry-run # fetch + merge only (no build) +# bash sync-upstream.sh --no-push # build but no registry push +# bash sync-upstream.sh --no-backends # main image only, skip backend images +# ROCM_VERSION=7.13 bash sync-upstream.sh +# +# Image tags use rocm (e.g. rocm7.12), not a GPU-specific name, +# because ROCm 7.x is a distinct build/install paradigm from ROCm 6.x and the +# resulting images support all rocWMMA-capable architectures. +# +# Image tags (main image): +# /localai:-rocm- (build-specific) +# /localai:-rocm (rolling per version) +# /localai:latest-rocm (rolling latest, e.g. latest-rocm7) +# +# Image tags (each backend): +# /localai-backends:-rocm-- +# /localai-backends:latest-rocm- +# +# ROCm version changes only when a new ROCm release ships — override via env var. +# On conflict: script stops, resolve manually, git commit, then re-run. +# ============================================================================= +set -euo pipefail + +REGISTRY="${REGISTRY:-192.168.178.127:5000}" +REGISTRY2="${REGISTRY2:-pointblank.ddns.net:5556}" +ROCM_VERSION="${ROCM_VERSION:-7.12}" +ROCM_ARCH="${ROCM_ARCH:-gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1012,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1150,gfx1151,gfx1152,gfx1200,gfx1201}" +UPSTREAM_REMOTE="${UPSTREAM_REMOTE:-upstream}" +UPSTREAM_BRANCH="${UPSTREAM_BRANCH:-master}" +DRY_RUN=false +NO_PUSH=false +NO_BACKENDS=false + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BOLD='\033[1m'; NC='\033[0m' + +for arg in "$@"; do + case $arg in + --dry-run) DRY_RUN=true ;; + --no-push) NO_PUSH=true ;; + --no-backends) NO_BACKENDS=true ;; + esac +done + +# --------------------------------------------------------------------------- +# Backend list — all ROCm 7.x backend images to build. +# Format: "BACKEND_NAME|DOCKERFILE_TYPE" +# python → backend/Dockerfile.python (passes --build-arg BACKEND=) +# llama-cpp → backend/Dockerfile.llama-cpp +# --------------------------------------------------------------------------- +BACKENDS=( + "rerankers|python" + "llama-cpp|llama-cpp" + "vllm|python" + "vllm-omni|python" + "transformers|python" + "diffusers|python" + "ace-step|python" + "kokoro|python" + "vibevoice|python" + "qwen-asr|python" + "nemo|python" + "qwen-tts|python" + "fish-speech|python" + "voxcpm|python" + "pocket-tts|python" + "faster-whisper|python" + "whisperx|python" + "coqui|python" +) + +OUR_SUFFIX="rocm${ROCM_VERSION}" + +# --------------------------------------------------------------------------- +echo -e "${BOLD}=== 1. Upstream fetch ===${NC}" +git fetch "$UPSTREAM_REMOTE" + +UPSTREAM_VERSION=$(git describe --tags --abbrev=0 "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH" 2>/dev/null || echo "dev") +echo -e " Upstream: ${YELLOW}$UPSTREAM_REMOTE/$UPSTREAM_BRANCH${NC} @ ${GREEN}$UPSTREAM_VERSION${NC}" +echo -e " ROCm: ${YELLOW}$ROCM_VERSION${NC} / arch ${YELLOW}$ROCM_ARCH${NC}" + +BEHIND=$(git rev-list HEAD.."$UPSTREAM_REMOTE/$UPSTREAM_BRANCH" --count) +if [ "$BEHIND" = "0" ]; then + echo -e " ${GREEN}✓ Already up to date — no merge needed${NC}" + if [ "$DRY_RUN" = "true" ]; then exit 0; fi +else + echo -e " ${YELLOW}⚠ $BEHIND new upstream commits${NC}" + + # ------------------------------------------------------------------------- + echo -e "\n${BOLD}=== 2. Merge upstream/$UPSTREAM_BRANCH ===${NC}" + if ! git merge "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH" --no-edit \ + -m "chore: merge upstream $UPSTREAM_VERSION into rocm7 fork"; then + echo -e "\n${RED}✗ Merge conflicts! Manual resolution required:${NC}" + echo "" + git diff --name-only --diff-filter=U + echo "" + echo -e " Steps:" + echo -e " 1. Fix conflicts in the files listed above" + echo -e " 2. ${YELLOW}git add ${NC} for each resolved file" + echo -e " 3. ${YELLOW}git commit${NC} to complete the merge" + echo -e " 4. Re-run this script" + echo "" + echo -e " Or abort: ${YELLOW}git merge --abort${NC}" + exit 1 + fi + echo -e " ${GREEN}✓ Merge successful${NC}" +fi + +if [ "$DRY_RUN" = "true" ]; then + echo -e "\n${YELLOW}Dry-run — no build.${NC}" + exit 0 +fi + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== 3. Pre-build checks ===${NC}" +FAIL=0 + +if grep -q "^ENV GGML_CUDA_ENABLE_UNIFIED_MEMORY" Dockerfile 2>/dev/null; then + echo -e " ${RED}✗ GGML_CUDA_ENABLE_UNIFIED_MEMORY in Dockerfile! Remove it.${NC}" + FAIL=1 +else + echo -e " ${GREEN}✓ GGML_CUDA_ENABLE_UNIFIED_MEMORY not in Dockerfile${NC}" +fi + +if grep -qE "core-7\.[0-9]" backend/Dockerfile.llama-cpp backend/Dockerfile.python 2>/dev/null; then + echo -e " ${RED}✗ Hardcoded core-7.XX in backend Dockerfiles — use core-7 (update-alternatives).${NC}" + grep -n "core-7\.[0-9]" backend/Dockerfile.llama-cpp backend/Dockerfile.python 2>/dev/null || true + FAIL=1 +else + echo -e " ${GREEN}✓ No hardcoded core-7.XX in backend Dockerfiles${NC}" +fi + +[ "$FAIL" = "1" ] && { echo -e "\n${RED}Checks failed. Build aborted.${NC}"; exit 1; } + +# --------------------------------------------------------------------------- +BUILD_SHA=$(git rev-parse --short HEAD) +VERSION_TAG="${UPSTREAM_VERSION}-${OUR_SUFFIX}" +IMAGE_TAG="${VERSION_TAG}-${BUILD_SHA}" +LOCAL_IMAGE="localai:${OUR_SUFFIX}" + +echo -e "\n${BOLD}=== 4. Build main image ===${NC}" +echo -e " Build tag: ${YELLOW}$IMAGE_TAG${NC}" +echo -e " Version tag: ${YELLOW}$VERSION_TAG${NC}" +echo -e " Latest tag: ${YELLOW}latest-rocm${ROCM_VERSION%%.*}${NC}" + +docker build \ + --build-arg BUILD_TYPE=hipblas \ + --build-arg ROCM_VERSION=7 \ + --build-arg ROCM_ARCH="${ROCM_ARCH}" \ + --build-arg GPU_TARGETS="${ROCM_ARCH}" \ + -t "$LOCAL_IMAGE" \ + . 2>&1 | tee /tmp/localai-build-main.log + +echo -e " ${GREEN}✓ Main image built${NC}" + +# --------------------------------------------------------------------------- +if [ "$NO_BACKENDS" = "false" ]; then + echo -e "\n${BOLD}=== 5. Build backend images (${#BACKENDS[@]} total) ===${NC}" + + FAILED_BACKENDS=() + for entry in "${BACKENDS[@]}"; do + backend="${entry%%|*}" + dftype="${entry##*|}" + + case "$dftype" in + llama-cpp) dockerfile="backend/Dockerfile.llama-cpp"; backend_arg="" ;; + *) dockerfile="backend/Dockerfile.python"; backend_arg="--build-arg BACKEND=${backend}" ;; + esac + + local_tag="localai-backends:${OUR_SUFFIX}-${backend}" + echo -e "\n [${backend}] Building..." + + # shellcheck disable=SC2086 + if docker build \ + --build-arg BUILD_TYPE=hipblas \ + --build-arg ROCM_VERSION=7 \ + --build-arg ROCM_ARCH="${ROCM_ARCH}" \ + $backend_arg \ + -f "$dockerfile" \ + -t "$local_tag" \ + . 2>&1 | tee "/tmp/localai-build-${backend}.log"; then + echo -e " ${GREEN}✓ ${backend} OK${NC}" + else + echo -e " ${RED}✗ ${backend} FAILED (log: /tmp/localai-build-${backend}.log)${NC}" + FAILED_BACKENDS+=("$backend") + fi + done + + if [ "${#FAILED_BACKENDS[@]}" -gt 0 ]; then + echo -e "\n${RED}${BOLD}Failed backends:${NC} ${FAILED_BACKENDS[*]}" + echo -e "${YELLOW}Continuing push for successfully built images.${NC}" + else + echo -e "\n ${GREEN}✓ All backends built successfully${NC}" + fi +fi + +# --------------------------------------------------------------------------- +if [ "$NO_PUSH" = "true" ]; then + echo -e "\n${YELLOW}--no-push set — skipping registry push.${NC}" + echo -e " Local main image: ${GREEN}$LOCAL_IMAGE${NC}" + exit 0 +fi + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== 6. Push main image ===${NC}" + +ROCM_MAJOR="${ROCM_VERSION%%.*}" # e.g. "7" from "7.12" +REGISTRY_IMAGE="${REGISTRY}/localai:${IMAGE_TAG}" +REGISTRY_VERSION="${REGISTRY}/localai:${VERSION_TAG}" +REGISTRY_LATEST="${REGISTRY}/localai:latest-rocm${ROCM_MAJOR}" + +push_image() { + local local_tag="$1" remote_tag="$2" + docker tag "$local_tag" "$remote_tag" + docker push "$remote_tag" && echo -e " ${GREEN}✓ $remote_tag${NC}" +} + +for REG in "$REGISTRY" "$REGISTRY2"; do + push_image "$LOCAL_IMAGE" "${REG}/localai:${IMAGE_TAG}" + push_image "$LOCAL_IMAGE" "${REG}/localai:${VERSION_TAG}" + push_image "$LOCAL_IMAGE" "${REG}/localai:latest-rocm${ROCM_MAJOR}" +done + +# --------------------------------------------------------------------------- +if [ "$NO_BACKENDS" = "false" ]; then + echo -e "\n${BOLD}=== 7. Push backend images ===${NC}" + + for entry in "${BACKENDS[@]}"; do + backend="${entry%%|*}" + local_tag="localai-backends:${OUR_SUFFIX}-${backend}" + + # Skip backends that failed to build + if ! docker image inspect "$local_tag" &>/dev/null; then + echo -e " ${YELLOW}⚠ Skipping $backend (not built)${NC}" + continue + fi + + for REG in "$REGISTRY" "$REGISTRY2"; do + push_image "$local_tag" "${REG}/localai-backends:${IMAGE_TAG}-${backend}" + push_image "$local_tag" "${REG}/localai-backends:latest-rocm${ROCM_MAJOR}-${backend}" + done + done +fi + +# --------------------------------------------------------------------------- +echo -e "\n${BOLD}=== 8. Git tag ===${NC}" +GIT_TAG="${IMAGE_TAG}" +if git tag -l | grep -q "^${GIT_TAG}$"; then + echo -e " ${YELLOW}Tag $GIT_TAG already exists — skipped${NC}" +else + git tag "$GIT_TAG" + git push origin "$GIT_TAG" 2>/dev/null || echo -e " ${YELLOW}(Tag push failed — set locally)${NC}" + echo -e " ${GREEN}✓ Tag: $GIT_TAG${NC}" +fi + +# --------------------------------------------------------------------------- +echo -e "\n${GREEN}${BOLD}=== Done ===${NC}" +echo -e " Upstream: ${GREEN}$UPSTREAM_VERSION${NC}" +echo -e " ROCm: ${GREEN}$ROCM_VERSION / $ROCM_ARCH${NC}" +echo -e " Main image: ${GREEN}$REGISTRY_IMAGE${NC}" +echo -e " Latest: ${GREEN}$REGISTRY_LATEST${NC}" +if [ "$NO_BACKENDS" = "false" ]; then + echo -e " Backends: ${GREEN}${#BACKENDS[@]} images tagged as latest-rocm${ROCM_MAJOR}-${NC}" +fi +echo "" +echo -e " Deploy:" +echo -e " ${YELLOW}docker compose pull localai && docker compose up -d localai --force-recreate${NC}" +echo "" +echo -e " Next run:" +echo -e " ${YELLOW}bash sync-upstream.sh${NC}" +echo -e " ${YELLOW}ROCM_VERSION=7.13 bash sync-upstream.sh${NC} (when new ROCm version ships)"