diff --git a/.github/workflows/publish_build_manylinux_cuda_image.yml b/.github/workflows/publish_build_manylinux_cuda_image.yml new file mode 100644 index 00000000000..6283ef78130 --- /dev/null +++ b/.github/workflows/publish_build_manylinux_cuda_image.yml @@ -0,0 +1,31 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +name: Publish CUDA build images + +on: + workflow_dispatch: + push: + branches: + - "main" + - "stage/docker/**" + paths: + - dockerfiles/build_manylinux_cuda_*.Dockerfile + - .github/workflows/publish_build_manylinux_cuda_image.yml + +permissions: + contents: read + packages: write + +jobs: + publish_cuda_12_9: + uses: ./.github/workflows/publish_dockerfile.yml + with: + DOCKER_FILE_NAME: build_manylinux_cuda_12_9_x86_64 + DOCKER_IMAGE_NAME: therock_build_manylinux_cuda_12_9_x86_64 + + publish_cuda_13_2: + uses: ./.github/workflows/publish_dockerfile.yml + with: + DOCKER_FILE_NAME: build_manylinux_cuda_13.2_x86_64 + DOCKER_IMAGE_NAME: therock_build_manylinux_cuda_13_2_x86_64 diff --git a/dockerfiles/build_manylinux_cuda_12_9_x86_64.Dockerfile b/dockerfiles/build_manylinux_cuda_12_9_x86_64.Dockerfile new file mode 100644 index 00000000000..3c4047878c3 --- /dev/null +++ b/dockerfiles/build_manylinux_cuda_12_9_x86_64.Dockerfile @@ -0,0 +1,141 @@ +# This dockerfile builds automatically upon push to the main branch. It can be built +# interactively for testing via: +# docker buildx build --file dockerfiles/build_manylinux_cuda_12_9_x86_64.Dockerfile dockerfiles/ +# This will print a SHA image id, which you can run with (or equiv): +# sudo docker run --rm -it --entrypoint /bin/bash <> +# +# To build and push to a test branch, create a pull request on a branch named: +# stage/docker/* +# We build our portable linux releases on the manylinux (RHEL-based) +# images, with custom additional packages installed. We switch to +# new upstream versions as needed. +FROM quay.io/pypa/manylinux_2_28_x86_64@sha256:d632b5e68ab39e59e128dcf0e59e438b26f122d7f2d45f3eea69ffd2877ab017 + +######## Python and CMake setup ####### +# These images come with multiple python versions. We pin one for +# default use. +# Prepend therock-tools to PATH +ENV PATH="/usr/local/therock-tools/bin:/opt/python/cp312-cp312/bin:${PATH}" + +######## Pip Packages ######## +RUN pip install --upgrade pip setuptools==69.1.1 wheel==0.46.2 && \ + pip install CppHeaderParser==2.7.4 meson==1.7.0 tomli==2.2.1 PyYAML==6.0.2 + +######## Repo ######## +RUN curl https://storage.googleapis.com/git-repo-downloads/repo > /usr/local/bin/repo && chmod a+x /usr/local/bin/repo + +######## CCache ######## +WORKDIR /install-ccache +COPY install_ccache.sh ./ +RUN ./install_ccache.sh "4.11.2" && rm -rf /install-ccache + +######## SCCache ######## +WORKDIR /install-sccache +COPY install_sccache.sh ./ +RUN ./install_sccache.sh "0.14.0" && rm -rf /install-sccache + +######## CMake ######## +WORKDIR /install-cmake +ENV CMAKE_VERSION="3.27.9" +COPY install_cmake.sh ./ +RUN ./install_cmake.sh "${CMAKE_VERSION}" && rm -rf /install-cmake + +######## Ninja ######## +WORKDIR /install-ninja +ENV CMAKE_VERSION="1.12.1" +COPY install_ninja.sh ./ +RUN ./install_ninja.sh "${CMAKE_VERSION}" && rm -rf /install-ninja + +######## AWS CLI ###### +WORKDIR /install-awscli +COPY install_awscli.sh ./ +RUN ./install_awscli.sh && rm -rf /install-awscli + +######## Installing Google test ####### +WORKDIR /install-googletest +ENV GOOGLE_TEST_VERSION="1.16.0" +COPY install_googletest.sh ./ +RUN ./install_googletest.sh "${GOOGLE_TEST_VERSION}" && rm -rf /install-googletest + +######## Yum Packages ####### +# We are pinning to gcc-toolset-12 until it is safe to upgrade. The latest +# manylinux containers use gcc-toolset-14 or later, which is not yet compatible +# with the LLVM that ROCm builds. This can be upgraded when clang-21 is used. +# +# We allow development tools in this list but not development packages (so that +# things can't acceidentally build with system dependencies). +# +# Development tool dependencies: +# texinfo, flag: rocprofiler-systems +RUN yum install -y epel-release && \ + yum remove -y gcc-toolset* && \ + yum install -y \ + gcc-toolset-13-binutils \ + gcc-toolset-13-gcc \ + gcc-toolset-13-gcc-c++ \ + gcc-toolset-13-gcc-gfortran \ + gcc-toolset-13-libatomic-devel \ + gcc-toolset-13-libstdc++-devel \ + patchelf \ + vim-common \ + git-lfs \ + && yum install -y \ + texinfo \ + flex \ + && yum clean all && \ + rm -rf /var/cache/yum + + +######## DVC via pip ###### +# dvc's rpm package includes .so dependencies built against glib 2.29 +# settling for pip install for now, but it installs modules not needed for dvc pull +# more dvc features may be used in upcoming sequenced builds +# Also pinning pathspec because a new version of it breaks the private _DIR_MARK +# API that dvc uses. When upgrading past ~3.64.0, then pin can likely be removed. +# +# Note: dvc[s3] version locking currently limits boto3>=1.41.0,<1.42.0 +# in requirements.txt +RUN pip install 'pathspec<0.13.0' 'dvc[s3]==3.62.0' && \ + which dvc && dvc --version || true + +######## Enable GCC Toolset and verify ######## +# This is a subset of what is typically sourced in the gcc-toolset enable +# script. +# The base manylinux container has references to its gcc-toolset in its PATHs, +# clean up LIBRARY_PATH and LD_LIBRARY_PATH since we yum remove that version. +# -- Predefine variables to avoid Dockerfile linting warnings -- +# Docker requires environment variables to be defined before reuse. +ENV LIBRARY_PATH="" +ENV LD_LIBRARY_PATH="" +ENV DEVTOOLSET_ROOTPATH="/opt/rh/gcc-toolset-13/root" +ENV PATH="/opt/rh/gcc-toolset-13/root/usr/bin:${PATH}" +ENV LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib:${LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib:${LD_LIBRARY_PATH}" + +######## Enable GCC Toolset and verify ######## +RUN which gcc && gcc --version && \ + which g++ && g++ --version && \ + which clang++ || true + +######## Shared Python Interpreters ######## +# Build Python with --enable-shared for embedding (e.g., rocgdb). +# The manylinux /opt/python builds are statically linked and can't be embedded. +WORKDIR /install-shared-pythons +COPY install_shared_pythons.sh ./ +RUN ./install_shared_pythons.sh /tmp/python-build && rm -rf /install-shared-pythons /tmp/python-build + +######## GIT CONFIGURATION ######## +# Git started enforcing strict user checking, which thwarts version +# configuration scripts in a docker image where the tree was checked +# out by the host and mapped in. Disable the check. +# See: https://github.com/openxla/iree/issues/12046 +# We use the wildcard option to disable the checks. This was added +# in git 2.35.3 +RUN git config --global --add safe.directory '*' + +######## CUDA Toolkit 12.9 ######## +WORKDIR /install-cuda +COPY install_cuda.sh ./ +RUN ./install_cuda.sh "12.9" && rm -rf /install-cuda +ENV PATH="/usr/local/cuda/bin:${PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" diff --git a/dockerfiles/build_manylinux_cuda_13.2_x86_64.Dockerfile b/dockerfiles/build_manylinux_cuda_13.2_x86_64.Dockerfile new file mode 100644 index 00000000000..083d703a5d9 --- /dev/null +++ b/dockerfiles/build_manylinux_cuda_13.2_x86_64.Dockerfile @@ -0,0 +1,141 @@ +# This dockerfile builds automatically upon push to the main branch. It can be built +# interactively for testing via: +# docker buildx build --file dockerfiles/build_manylinux_cuda_13.2_x86_64.Dockerfile dockerfiles/ +# This will print a SHA image id, which you can run with (or equiv): +# sudo docker run --rm -it --entrypoint /bin/bash <> +# +# To build and push to a test branch, create a pull request on a branch named: +# stage/docker/* +# We build our portable linux releases on the manylinux (RHEL-based) +# images, with custom additional packages installed. We switch to +# new upstream versions as needed. +FROM quay.io/pypa/manylinux_2_28_x86_64@sha256:d632b5e68ab39e59e128dcf0e59e438b26f122d7f2d45f3eea69ffd2877ab017 + +######## Python and CMake setup ####### +# These images come with multiple python versions. We pin one for +# default use. +# Prepend therock-tools to PATH +ENV PATH="/usr/local/therock-tools/bin:/opt/python/cp312-cp312/bin:${PATH}" + +######## Pip Packages ######## +RUN pip install --upgrade pip setuptools==69.1.1 wheel==0.46.2 && \ + pip install CppHeaderParser==2.7.4 meson==1.7.0 tomli==2.2.1 PyYAML==6.0.2 + +######## Repo ######## +RUN curl https://storage.googleapis.com/git-repo-downloads/repo > /usr/local/bin/repo && chmod a+x /usr/local/bin/repo + +######## CCache ######## +WORKDIR /install-ccache +COPY install_ccache.sh ./ +RUN ./install_ccache.sh "4.11.2" && rm -rf /install-ccache + +######## SCCache ######## +WORKDIR /install-sccache +COPY install_sccache.sh ./ +RUN ./install_sccache.sh "0.14.0" && rm -rf /install-sccache + +######## CMake ######## +WORKDIR /install-cmake +ENV CMAKE_VERSION="3.27.9" +COPY install_cmake.sh ./ +RUN ./install_cmake.sh "${CMAKE_VERSION}" && rm -rf /install-cmake + +######## Ninja ######## +WORKDIR /install-ninja +ENV CMAKE_VERSION="1.12.1" +COPY install_ninja.sh ./ +RUN ./install_ninja.sh "${CMAKE_VERSION}" && rm -rf /install-ninja + +######## AWS CLI ###### +WORKDIR /install-awscli +COPY install_awscli.sh ./ +RUN ./install_awscli.sh && rm -rf /install-awscli + +######## Installing Google test ####### +WORKDIR /install-googletest +ENV GOOGLE_TEST_VERSION="1.16.0" +COPY install_googletest.sh ./ +RUN ./install_googletest.sh "${GOOGLE_TEST_VERSION}" && rm -rf /install-googletest + +######## Yum Packages ####### +# We are pinning to gcc-toolset-12 until it is safe to upgrade. The latest +# manylinux containers use gcc-toolset-14 or later, which is not yet compatible +# with the LLVM that ROCm builds. This can be upgraded when clang-21 is used. +# +# We allow development tools in this list but not development packages (so that +# things can't acceidentally build with system dependencies). +# +# Development tool dependencies: +# texinfo, flag: rocprofiler-systems +RUN yum install -y epel-release && \ + yum remove -y gcc-toolset* && \ + yum install -y \ + gcc-toolset-13-binutils \ + gcc-toolset-13-gcc \ + gcc-toolset-13-gcc-c++ \ + gcc-toolset-13-gcc-gfortran \ + gcc-toolset-13-libatomic-devel \ + gcc-toolset-13-libstdc++-devel \ + patchelf \ + vim-common \ + git-lfs \ + && yum install -y \ + texinfo \ + flex \ + && yum clean all && \ + rm -rf /var/cache/yum + + +######## DVC via pip ###### +# dvc's rpm package includes .so dependencies built against glib 2.29 +# settling for pip install for now, but it installs modules not needed for dvc pull +# more dvc features may be used in upcoming sequenced builds +# Also pinning pathspec because a new version of it breaks the private _DIR_MARK +# API that dvc uses. When upgrading past ~3.64.0, then pin can likely be removed. +# +# Note: dvc[s3] version locking currently limits boto3>=1.41.0,<1.42.0 +# in requirements.txt +RUN pip install 'pathspec<0.13.0' 'dvc[s3]==3.62.0' && \ + which dvc && dvc --version || true + +######## Enable GCC Toolset and verify ######## +# This is a subset of what is typically sourced in the gcc-toolset enable +# script. +# The base manylinux container has references to its gcc-toolset in its PATHs, +# clean up LIBRARY_PATH and LD_LIBRARY_PATH since we yum remove that version. +# -- Predefine variables to avoid Dockerfile linting warnings -- +# Docker requires environment variables to be defined before reuse. +ENV LIBRARY_PATH="" +ENV LD_LIBRARY_PATH="" +ENV DEVTOOLSET_ROOTPATH="/opt/rh/gcc-toolset-13/root" +ENV PATH="/opt/rh/gcc-toolset-13/root/usr/bin:${PATH}" +ENV LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib:${LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib:${LD_LIBRARY_PATH}" + +######## Enable GCC Toolset and verify ######## +RUN which gcc && gcc --version && \ + which g++ && g++ --version && \ + which clang++ || true + +######## Shared Python Interpreters ######## +# Build Python with --enable-shared for embedding (e.g., rocgdb). +# The manylinux /opt/python builds are statically linked and can't be embedded. +WORKDIR /install-shared-pythons +COPY install_shared_pythons.sh ./ +RUN ./install_shared_pythons.sh /tmp/python-build && rm -rf /install-shared-pythons /tmp/python-build + +######## GIT CONFIGURATION ######## +# Git started enforcing strict user checking, which thwarts version +# configuration scripts in a docker image where the tree was checked +# out by the host and mapped in. Disable the check. +# See: https://github.com/openxla/iree/issues/12046 +# We use the wildcard option to disable the checks. This was added +# in git 2.35.3 +RUN git config --global --add safe.directory '*' + +######## CUDA Toolkit 13.2 ######## +WORKDIR /install-cuda +COPY install_cuda.sh ./ +RUN ./install_cuda.sh "13.2" && rm -rf /install-cuda +ENV PATH="/usr/local/cuda/bin:${PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" diff --git a/dockerfiles/install_cuda.sh b/dockerfiles/install_cuda.sh new file mode 100755 index 00000000000..10c4cb23b90 --- /dev/null +++ b/dockerfiles/install_cuda.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright 2026 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Installs the NVIDIA CUDA Toolkit from the official NVIDIA RHEL8 repository. +# Downloads and checksum-verifies the NVIDIA GPG key and .repo file before use, +# then installs a pinned, versioned toolkit package via dnf. +# +# Usage: install_cuda.sh +# Example: install_cuda.sh 12.9 +# install_cuda.sh 13.2 + +set -euo pipefail + +CUDA_VERSION="${1:-}" + +if [[ -z "${CUDA_VERSION}" ]]; then + echo "ERROR: CUDA version argument required." >&2 + echo "Usage: $0 (e.g. $0 12.9 or $0 13.2)" >&2 + exit 1 +fi + +CUDA_REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64" + +GPG_KEY_URL="${CUDA_REPO_BASE}/D42D0685.pub" +GPG_KEY_SHA256="27e46a2d43e125859fb8a62c3b75bf798aeb95fa6f7d9bf790c1167ed9a0b39c" + +REPO_FILE_URL="${CUDA_REPO_BASE}/cuda-rhel8.repo" +REPO_FILE_SHA256="8d5bbb8dc62e0f0701a27355659248c3a11477e80a1b3c93a63ff116d705c06f" + +declare -A CUDA_PACKAGE_SPECS=( + ["12.9"]="cuda-toolkit-12-9-12.9.0-1" + ["13.2"]="cuda-toolkit-13-2-13.2.0-1" +) + +if [[ -z "${CUDA_PACKAGE_SPECS[${CUDA_VERSION}]+x}" ]]; then + echo "ERROR: Unknown CUDA version '${CUDA_VERSION}'." >&2 + echo "Supported versions: ${!CUDA_PACKAGE_SPECS[*]}" >&2 + exit 1 +fi + +ARCH="$(uname -m)" +PACKAGE_SPEC="${CUDA_PACKAGE_SPECS[${CUDA_VERSION}]}.${ARCH}" + +echo "Downloading NVIDIA GPG key" +curl --silent --fail --show-error --location \ + "${GPG_KEY_URL}" \ + --output nvidia.pub + +echo "Verifying GPG key checksum" +echo "${GPG_KEY_SHA256} nvidia.pub" | sha256sum --check --strict + +rpm --import nvidia.pub + +echo "Downloading CUDA repo file" +curl --silent --fail --show-error --location \ + "${REPO_FILE_URL}" \ + --output cuda-rhel8.repo + +echo "Verifying repo file checksum" +echo "${REPO_FILE_SHA256} cuda-rhel8.repo" | sha256sum --check --strict +cp cuda-rhel8.repo /etc/yum.repos.d/cuda-rhel8.repo + +dnf config-manager --set-enabled powertools 2>/dev/null || + dnf config-manager --set-enabled crb 2>/dev/null || + true + +echo "Installing ${PACKAGE_SPEC}" +dnf install -y "${PACKAGE_SPEC}" +dnf clean all +rm -rf /var/cache/dnf + +echo "Verifying CUDA installation" +/usr/local/cuda/bin/nvcc --version + +echo "=== CUDA Toolkit ${CUDA_VERSION} installed successfully ==="