Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/publish_build_manylinux_cuda_image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT

name: Publish CUDA build images

on:
workflow_dispatch:
push:
branches:
- "main"
- "stage/docker/**"
paths:
- dockerfiles/build_manylinux_cuda_*.Dockerfile
- .github/workflows/publish_build_manylinux_cuda_image.yml

permissions:
contents: read
packages: write

jobs:
publish_cuda_12_9:
uses: ./.github/workflows/publish_dockerfile.yml
with:
DOCKER_FILE_NAME: build_manylinux_cuda_12_9_x86_64
DOCKER_IMAGE_NAME: therock_build_manylinux_cuda_12_9_x86_64

publish_cuda_13_2:
uses: ./.github/workflows/publish_dockerfile.yml
with:
DOCKER_FILE_NAME: build_manylinux_cuda_13.2_x86_64
DOCKER_IMAGE_NAME: therock_build_manylinux_cuda_13_2_x86_64
141 changes: 141 additions & 0 deletions dockerfiles/build_manylinux_cuda_12_9_x86_64.Dockerfile
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be no need to build those from scratch and it could be additional layers on top of our existing manylinux image.

Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# This dockerfile builds automatically upon push to the main branch. It can be built
# interactively for testing via:
# docker buildx build --file dockerfiles/build_manylinux_cuda_12_9_x86_64.Dockerfile dockerfiles/
# This will print a SHA image id, which you can run with (or equiv):
# sudo docker run --rm -it --entrypoint /bin/bash <<IMAGE>>
#
# To build and push to a test branch, create a pull request on a branch named:
# stage/docker/*
# We build our portable linux releases on the manylinux (RHEL-based)
# images, with custom additional packages installed. We switch to
# new upstream versions as needed.
FROM quay.io/pypa/manylinux_2_28_x86_64@sha256:d632b5e68ab39e59e128dcf0e59e438b26f122d7f2d45f3eea69ffd2877ab017

######## Python and CMake setup #######
# These images come with multiple python versions. We pin one for
# default use.
# Prepend therock-tools to PATH
ENV PATH="/usr/local/therock-tools/bin:/opt/python/cp312-cp312/bin:${PATH}"

######## Pip Packages ########
RUN pip install --upgrade pip setuptools==69.1.1 wheel==0.46.2 && \
pip install CppHeaderParser==2.7.4 meson==1.7.0 tomli==2.2.1 PyYAML==6.0.2

######## Repo ########
RUN curl https://storage.googleapis.com/git-repo-downloads/repo > /usr/local/bin/repo && chmod a+x /usr/local/bin/repo

######## CCache ########
WORKDIR /install-ccache
COPY install_ccache.sh ./
RUN ./install_ccache.sh "4.11.2" && rm -rf /install-ccache

######## SCCache ########
WORKDIR /install-sccache
COPY install_sccache.sh ./
RUN ./install_sccache.sh "0.14.0" && rm -rf /install-sccache

######## CMake ########
WORKDIR /install-cmake
ENV CMAKE_VERSION="3.27.9"
COPY install_cmake.sh ./
RUN ./install_cmake.sh "${CMAKE_VERSION}" && rm -rf /install-cmake

######## Ninja ########
WORKDIR /install-ninja
ENV CMAKE_VERSION="1.12.1"
COPY install_ninja.sh ./
RUN ./install_ninja.sh "${CMAKE_VERSION}" && rm -rf /install-ninja

######## AWS CLI ######
WORKDIR /install-awscli
COPY install_awscli.sh ./
RUN ./install_awscli.sh && rm -rf /install-awscli

######## Installing Google test #######
WORKDIR /install-googletest
ENV GOOGLE_TEST_VERSION="1.16.0"
COPY install_googletest.sh ./
RUN ./install_googletest.sh "${GOOGLE_TEST_VERSION}" && rm -rf /install-googletest

######## Yum Packages #######
# We are pinning to gcc-toolset-12 until it is safe to upgrade. The latest
# manylinux containers use gcc-toolset-14 or later, which is not yet compatible
# with the LLVM that ROCm builds. This can be upgraded when clang-21 is used.
#
# We allow development tools in this list but not development packages (so that
# things can't acceidentally build with system dependencies).
#
# Development tool dependencies:
# texinfo, flag: rocprofiler-systems
RUN yum install -y epel-release && \
yum remove -y gcc-toolset* && \
yum install -y \
gcc-toolset-13-binutils \
gcc-toolset-13-gcc \
gcc-toolset-13-gcc-c++ \
gcc-toolset-13-gcc-gfortran \
gcc-toolset-13-libatomic-devel \
gcc-toolset-13-libstdc++-devel \
patchelf \
vim-common \
git-lfs \
&& yum install -y \
texinfo \
flex \
&& yum clean all && \
rm -rf /var/cache/yum


######## DVC via pip ######
# dvc's rpm package includes .so dependencies built against glib 2.29
# settling for pip install for now, but it installs modules not needed for dvc pull
# more dvc features may be used in upcoming sequenced builds
# Also pinning pathspec because a new version of it breaks the private _DIR_MARK
# API that dvc uses. When upgrading past ~3.64.0, then pin can likely be removed.
#
# Note: dvc[s3] version locking currently limits boto3>=1.41.0,<1.42.0
# in requirements.txt
RUN pip install 'pathspec<0.13.0' 'dvc[s3]==3.62.0' && \
which dvc && dvc --version || true

######## Enable GCC Toolset and verify ########
# This is a subset of what is typically sourced in the gcc-toolset enable
# script.
# The base manylinux container has references to its gcc-toolset in its PATHs,
# clean up LIBRARY_PATH and LD_LIBRARY_PATH since we yum remove that version.
# -- Predefine variables to avoid Dockerfile linting warnings --
# Docker requires environment variables to be defined before reuse.
ENV LIBRARY_PATH=""
ENV LD_LIBRARY_PATH=""
ENV DEVTOOLSET_ROOTPATH="/opt/rh/gcc-toolset-13/root"
ENV PATH="/opt/rh/gcc-toolset-13/root/usr/bin:${PATH}"
ENV LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib:${LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib:${LD_LIBRARY_PATH}"

######## Enable GCC Toolset and verify ########
RUN which gcc && gcc --version && \
which g++ && g++ --version && \
which clang++ || true

######## Shared Python Interpreters ########
# Build Python with --enable-shared for embedding (e.g., rocgdb).
# The manylinux /opt/python builds are statically linked and can't be embedded.
WORKDIR /install-shared-pythons
COPY install_shared_pythons.sh ./
RUN ./install_shared_pythons.sh /tmp/python-build && rm -rf /install-shared-pythons /tmp/python-build

######## GIT CONFIGURATION ########
# Git started enforcing strict user checking, which thwarts version
# configuration scripts in a docker image where the tree was checked
# out by the host and mapped in. Disable the check.
# See: https://github.com/openxla/iree/issues/12046
# We use the wildcard option to disable the checks. This was added
# in git 2.35.3
RUN git config --global --add safe.directory '*'

######## CUDA Toolkit 12.9 ########
WORKDIR /install-cuda
COPY install_cuda.sh ./
RUN ./install_cuda.sh "12.9" && rm -rf /install-cuda
ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
141 changes: 141 additions & 0 deletions dockerfiles/build_manylinux_cuda_13.2_x86_64.Dockerfile
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here. This fully duplicated the above where the only change is the CUDA version? This adds an unnecessary maintenance burden.

Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# This dockerfile builds automatically upon push to the main branch. It can be built
# interactively for testing via:
# docker buildx build --file dockerfiles/build_manylinux_cuda_13.2_x86_64.Dockerfile dockerfiles/
# This will print a SHA image id, which you can run with (or equiv):
# sudo docker run --rm -it --entrypoint /bin/bash <<IMAGE>>
#
# To build and push to a test branch, create a pull request on a branch named:
# stage/docker/*
# We build our portable linux releases on the manylinux (RHEL-based)
# images, with custom additional packages installed. We switch to
# new upstream versions as needed.
FROM quay.io/pypa/manylinux_2_28_x86_64@sha256:d632b5e68ab39e59e128dcf0e59e438b26f122d7f2d45f3eea69ffd2877ab017

######## Python and CMake setup #######
# These images come with multiple python versions. We pin one for
# default use.
# Prepend therock-tools to PATH
ENV PATH="/usr/local/therock-tools/bin:/opt/python/cp312-cp312/bin:${PATH}"

######## Pip Packages ########
RUN pip install --upgrade pip setuptools==69.1.1 wheel==0.46.2 && \
pip install CppHeaderParser==2.7.4 meson==1.7.0 tomli==2.2.1 PyYAML==6.0.2

######## Repo ########
RUN curl https://storage.googleapis.com/git-repo-downloads/repo > /usr/local/bin/repo && chmod a+x /usr/local/bin/repo

######## CCache ########
WORKDIR /install-ccache
COPY install_ccache.sh ./
RUN ./install_ccache.sh "4.11.2" && rm -rf /install-ccache

######## SCCache ########
WORKDIR /install-sccache
COPY install_sccache.sh ./
RUN ./install_sccache.sh "0.14.0" && rm -rf /install-sccache

######## CMake ########
WORKDIR /install-cmake
ENV CMAKE_VERSION="3.27.9"
COPY install_cmake.sh ./
RUN ./install_cmake.sh "${CMAKE_VERSION}" && rm -rf /install-cmake

######## Ninja ########
WORKDIR /install-ninja
ENV CMAKE_VERSION="1.12.1"
COPY install_ninja.sh ./
RUN ./install_ninja.sh "${CMAKE_VERSION}" && rm -rf /install-ninja

######## AWS CLI ######
WORKDIR /install-awscli
COPY install_awscli.sh ./
RUN ./install_awscli.sh && rm -rf /install-awscli

######## Installing Google test #######
WORKDIR /install-googletest
ENV GOOGLE_TEST_VERSION="1.16.0"
COPY install_googletest.sh ./
RUN ./install_googletest.sh "${GOOGLE_TEST_VERSION}" && rm -rf /install-googletest

######## Yum Packages #######
# We are pinning to gcc-toolset-12 until it is safe to upgrade. The latest
# manylinux containers use gcc-toolset-14 or later, which is not yet compatible
# with the LLVM that ROCm builds. This can be upgraded when clang-21 is used.
#
# We allow development tools in this list but not development packages (so that
# things can't acceidentally build with system dependencies).
#
# Development tool dependencies:
# texinfo, flag: rocprofiler-systems
RUN yum install -y epel-release && \
yum remove -y gcc-toolset* && \
yum install -y \
gcc-toolset-13-binutils \
gcc-toolset-13-gcc \
gcc-toolset-13-gcc-c++ \
gcc-toolset-13-gcc-gfortran \
gcc-toolset-13-libatomic-devel \
gcc-toolset-13-libstdc++-devel \
patchelf \
vim-common \
git-lfs \
&& yum install -y \
texinfo \
flex \
&& yum clean all && \
rm -rf /var/cache/yum


######## DVC via pip ######
# dvc's rpm package includes .so dependencies built against glib 2.29
# settling for pip install for now, but it installs modules not needed for dvc pull
# more dvc features may be used in upcoming sequenced builds
# Also pinning pathspec because a new version of it breaks the private _DIR_MARK
# API that dvc uses. When upgrading past ~3.64.0, then pin can likely be removed.
#
# Note: dvc[s3] version locking currently limits boto3>=1.41.0,<1.42.0
# in requirements.txt
RUN pip install 'pathspec<0.13.0' 'dvc[s3]==3.62.0' && \
which dvc && dvc --version || true

######## Enable GCC Toolset and verify ########
# This is a subset of what is typically sourced in the gcc-toolset enable
# script.
# The base manylinux container has references to its gcc-toolset in its PATHs,
# clean up LIBRARY_PATH and LD_LIBRARY_PATH since we yum remove that version.
# -- Predefine variables to avoid Dockerfile linting warnings --
# Docker requires environment variables to be defined before reuse.
ENV LIBRARY_PATH=""
ENV LD_LIBRARY_PATH=""
ENV DEVTOOLSET_ROOTPATH="/opt/rh/gcc-toolset-13/root"
ENV PATH="/opt/rh/gcc-toolset-13/root/usr/bin:${PATH}"
ENV LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib:${LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/opt/rh/gcc-toolset-13/root/usr/lib64:/opt/rh/gcc-toolset-13/root/usr/lib:${LD_LIBRARY_PATH}"

######## Enable GCC Toolset and verify ########
RUN which gcc && gcc --version && \
which g++ && g++ --version && \
which clang++ || true

######## Shared Python Interpreters ########
# Build Python with --enable-shared for embedding (e.g., rocgdb).
# The manylinux /opt/python builds are statically linked and can't be embedded.
WORKDIR /install-shared-pythons
COPY install_shared_pythons.sh ./
RUN ./install_shared_pythons.sh /tmp/python-build && rm -rf /install-shared-pythons /tmp/python-build

######## GIT CONFIGURATION ########
# Git started enforcing strict user checking, which thwarts version
# configuration scripts in a docker image where the tree was checked
# out by the host and mapped in. Disable the check.
# See: https://github.com/openxla/iree/issues/12046
# We use the wildcard option to disable the checks. This was added
# in git 2.35.3
RUN git config --global --add safe.directory '*'

######## CUDA Toolkit 13.2 ########
WORKDIR /install-cuda
COPY install_cuda.sh ./
RUN ./install_cuda.sh "13.2" && rm -rf /install-cuda
ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
79 changes: 79 additions & 0 deletions dockerfiles/install_cuda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/bash
# Copyright 2026 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# Installs the NVIDIA CUDA Toolkit from the official NVIDIA RHEL8 repository.
# Downloads and checksum-verifies the NVIDIA GPG key and .repo file before use,
# then installs a pinned, versioned toolkit package via dnf.
#
# Usage: install_cuda.sh <cuda_version>
# Example: install_cuda.sh 12.9
# install_cuda.sh 13.2

set -euo pipefail

CUDA_VERSION="${1:-}"

if [[ -z "${CUDA_VERSION}" ]]; then
echo "ERROR: CUDA version argument required." >&2
echo "Usage: $0 <version> (e.g. $0 12.9 or $0 13.2)" >&2
exit 1
fi

CUDA_REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64"

GPG_KEY_URL="${CUDA_REPO_BASE}/D42D0685.pub"
GPG_KEY_SHA256="27e46a2d43e125859fb8a62c3b75bf798aeb95fa6f7d9bf790c1167ed9a0b39c"

REPO_FILE_URL="${CUDA_REPO_BASE}/cuda-rhel8.repo"
REPO_FILE_SHA256="8d5bbb8dc62e0f0701a27355659248c3a11477e80a1b3c93a63ff116d705c06f"

declare -A CUDA_PACKAGE_SPECS=(
["12.9"]="cuda-toolkit-12-9-12.9.0-1"
["13.2"]="cuda-toolkit-13-2-13.2.0-1"
)

if [[ -z "${CUDA_PACKAGE_SPECS[${CUDA_VERSION}]+x}" ]]; then
echo "ERROR: Unknown CUDA version '${CUDA_VERSION}'." >&2
echo "Supported versions: ${!CUDA_PACKAGE_SPECS[*]}" >&2
exit 1
fi

ARCH="$(uname -m)"
PACKAGE_SPEC="${CUDA_PACKAGE_SPECS[${CUDA_VERSION}]}.${ARCH}"

echo "Downloading NVIDIA GPG key"
curl --silent --fail --show-error --location \
"${GPG_KEY_URL}" \
--output nvidia.pub

echo "Verifying GPG key checksum"
echo "${GPG_KEY_SHA256} nvidia.pub" | sha256sum --check --strict

rpm --import nvidia.pub

echo "Downloading CUDA repo file"
curl --silent --fail --show-error --location \
"${REPO_FILE_URL}" \
--output cuda-rhel8.repo

echo "Verifying repo file checksum"
echo "${REPO_FILE_SHA256} cuda-rhel8.repo" | sha256sum --check --strict
cp cuda-rhel8.repo /etc/yum.repos.d/cuda-rhel8.repo

dnf config-manager --set-enabled powertools 2>/dev/null ||
dnf config-manager --set-enabled crb 2>/dev/null ||
true

echo "Installing ${PACKAGE_SPEC}"
dnf install -y "${PACKAGE_SPEC}"
dnf clean all
rm -rf /var/cache/dnf

echo "Verifying CUDA installation"
/usr/local/cuda/bin/nvcc --version

echo "=== CUDA Toolkit ${CUDA_VERSION} installed successfully ==="
Loading