Skip to content
18 changes: 18 additions & 0 deletions .buildkite/release-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -711,3 +711,21 @@ steps:
env:
DOCKER_BUILDKIT: "1"
S3_BUCKET: "vllm-wheels"

- label: "Publish nightly ROCm image to DockerHub"
depends_on:
- build-rocm-release-image
if: build.env("NIGHTLY") == "1"
agents:
queue: small_cpu_queue_postmerge
commands:
- "bash .buildkite/scripts/push-nightly-builds-rocm.sh"
# Clean up old nightly builds (keep only last 14)
- "bash .buildkite/scripts/cleanup-nightly-builds.sh nightly- vllm/vllm-openai-rocm"
plugins:
- docker-login#v3.0.0:
username: vllmbot
password-env: DOCKERHUB_TOKEN
env:
DOCKER_BUILDKIT: "1"
DOCKERHUB_USERNAME: "vllmbot"
Comment on lines +766 to +777
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

To improve maintainability, it's better to define the ROCm nightly repository name as an environment variable and pass it to the scripts. This avoids hardcoding the same string in multiple places and makes it easier to update in the future. This change is required to work with the suggested changes in push-nightly-builds-rocm.sh.

    commands:
      - "bash .buildkite/scripts/push-nightly-builds-rocm.sh \"$ROCM_NIGHTLY_REPO\""
      # Clean up old nightly builds (keep only last 14)
      - "bash .buildkite/scripts/cleanup-nightly-builds.sh nightly- \"$ROCM_NIGHTLY_REPO\""
    plugins:
      - docker-login#v3.0.0:
          username: vllmbot
          password-env: DOCKERHUB_TOKEN
    env:
      DOCKER_BUILDKIT: "1"
      DOCKERHUB_USERNAME: "vllmbot"
      ROCM_NIGHTLY_REPO: "vllm/vllm-openai-rocm"

1 change: 0 additions & 1 deletion .buildkite/scripts/annotate-rocm-release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ ROCM_VERSION=$(grep -E '^ARG BASE_IMAGE=' docker/Dockerfile.rocm_base | sed -E '
PYTHON_VERSION=$(buildkite-agent meta-data get rocm-python-version 2>/dev/null || echo "3.12")
PYTORCH_ROCM_ARCH=$(buildkite-agent meta-data get rocm-pytorch-rocm-arch 2>/dev/null || echo "gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")

# TODO: Enable the nightly build for ROCm
# Get release version, default to 1.0.0.dev for nightly/per-commit builds
RELEASE_VERSION=$(buildkite-agent meta-data get release-version 2>/dev/null || echo "")
if [ -z "${RELEASE_VERSION}" ]; then
Expand Down
17 changes: 10 additions & 7 deletions .buildkite/scripts/cleanup-nightly-builds.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@ set -ex

# Clean up old nightly builds from DockerHub, keeping only the last 14 builds
# This script uses DockerHub API to list and delete old tags with specified prefix
# Usage: cleanup-nightly-builds.sh [TAG_PREFIX]
# Example: cleanup-nightly-builds.sh "nightly-" or cleanup-nightly-builds.sh "cu130-nightly-"
# Usage: cleanup-nightly-builds.sh [TAG_PREFIX] [REPO]
# Example: cleanup-nightly-builds.sh "nightly-"
# Example: cleanup-nightly-builds.sh "cu130-nightly-"
# Example: cleanup-nightly-builds.sh "nightly-" "vllm/vllm-openai-rocm"

# Get tag prefix from argument, default to "nightly-" if not provided
# Get tag prefix and repo from arguments
TAG_PREFIX="${1:-nightly-}"
REPO="${2:-vllm/vllm-openai}"

echo "Cleaning up tags with prefix: $TAG_PREFIX"
echo "Cleaning up tags with prefix: $TAG_PREFIX in repository: $REPO"

# DockerHub API endpoint for vllm/vllm-openai repository
REPO_API_URL="https://hub.docker.com/v2/repositories/vllm/vllm-openai/tags"
# DockerHub API endpoint for the repository
REPO_API_URL="https://hub.docker.com/v2/repositories/${REPO}/tags"

# Get DockerHub credentials from environment
if [ -z "$DOCKERHUB_TOKEN" ]; then
Expand Down Expand Up @@ -70,7 +73,7 @@ delete_tag() {
local tag_name="$1"
echo "Deleting tag: $tag_name"

local delete_url="https://hub.docker.com/v2/repositories/vllm/vllm-openai/tags/$tag_name"
local delete_url="https://hub.docker.com/v2/repositories/${REPO}/tags/$tag_name"
set +x
local response=$(curl -s -X DELETE -H "Authorization: Bearer $BEARER_TOKEN" "$delete_url")
set -x
Expand Down
44 changes: 44 additions & 0 deletions .buildkite/scripts/push-nightly-builds-rocm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
#
# Push ROCm nightly image from ECR to Docker Hub as vllm/vllm-openai-rocm:nightly
# and vllm/vllm-openai-rocm:nightly-<commit>.
# Run when NIGHTLY=1 after build-rocm-release-image has pushed to ECR.
#
# Local testing (no push to Docker Hub):
# BUILDKITE_COMMIT=<commit-with-rocm-image-in-ecr> DRY_RUN=1 bash .buildkite/scripts/push-nightly-builds-rocm.sh
# Requires: AWS CLI configured (for ECR public login), Docker. For full run: Docker Hub login.

set -ex

# Use BUILDKITE_COMMIT from env (required; set to a commit that has ROCm image in ECR for local test)
BUILDKITE_COMMIT="${BUILDKITE_COMMIT:?Set BUILDKITE_COMMIT to the commit SHA that has the ROCm image in ECR (e.g. from a previous release pipeline run)}"
DRY_RUN="${DRY_RUN:-0}"

ORIG_TAG="${BUILDKITE_COMMIT}-rocm"
TAG_NAME="nightly"
TAG_NAME_COMMIT="nightly-${BUILDKITE_COMMIT}"

echo "Pushing ROCm image from ECR tag $ORIG_TAG to Docker Hub as $TAG_NAME and $TAG_NAME_COMMIT"
[[ "$DRY_RUN" == "1" ]] && echo "[DRY_RUN] Skipping push to Docker Hub"

# Login to ECR and pull the image built by build-rocm-release-image
aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG"

# Tag for Docker Hub (nightly and nightly-<commit>)
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG" vllm/vllm-openai-rocm:"$TAG_NAME"
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG" vllm/vllm-openai-rocm:"$TAG_NAME_COMMIT"

if [[ "$DRY_RUN" == "1" ]]; then
echo "[DRY_RUN] Would push vllm/vllm-openai-rocm:$TAG_NAME and vllm/vllm-openai-rocm:$TAG_NAME_COMMIT"
echo "[DRY_RUN] Local tags created. Exiting without push."
exit 0
fi

# Push to Docker Hub (docker-login plugin runs before this step in CI)
docker push vllm/vllm-openai-rocm:"$TAG_NAME"
docker push vllm/vllm-openai-rocm:"$TAG_NAME_COMMIT"

echo "Pushed vllm/vllm-openai-rocm:$TAG_NAME and vllm/vllm-openai-rocm:$TAG_NAME_COMMIT"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

For better maintainability and to avoid hardcoding values, the Docker repository name should be passed as an argument to this script instead of being hardcoded. This makes the script more reusable and configurable. This change is similar to how cleanup-nightly-builds.sh was modified in this PR.

This suggestion refactors the script to accept the repository name as the first argument and updates the documentation comments accordingly. You will also need to update the call to this script in .buildkite/release-pipeline.yaml to pass the repository name.

# Push ROCm nightly image from ECR to a specified Docker Hub repository.
# e.g. vllm/vllm-openai-rocm:nightly and vllm/vllm-openai-rocm:nightly-<commit>.
# Run when NIGHTLY=1 after build-rocm-release-image has pushed to ECR.
#
# Local testing (no push to Docker Hub):
#   BUILDKITE_COMMIT=<commit-with-rocm-image-in-ecr> DRY_RUN=1 bash .buildkite/scripts/push-nightly-builds-rocm.sh <repo-name>
# Requires: AWS CLI configured (for ECR public login), Docker. For full run: Docker Hub login.

set -ex

# Use BUILDKITE_COMMIT from env (required; set to a commit that has ROCm image in ECR for local test)
BUILDKITE_COMMIT="${BUILDKITE_COMMIT:?Set BUILDKITE_COMMIT to the commit SHA that has the ROCm image in ECR (e.g. from a previous release pipeline run)}"
DRY_RUN="${DRY_RUN:-0}"
REPO="${1:?Please provide the Docker repository name as the first argument}"

ORIG_TAG="${BUILDKITE_COMMIT}-rocm"
TAG_NAME="nightly"
TAG_NAME_COMMIT="nightly-${BUILDKITE_COMMIT}"

echo "Pushing ROCm image from ECR tag $ORIG_TAG to Docker Hub as ${REPO}:${TAG_NAME} and ${REPO}:${TAG_NAME_COMMIT}"
[[ "$DRY_RUN" == "1" ]] && echo "[DRY_RUN] Skipping push to Docker Hub"

# Login to ECR and pull the image built by build-rocm-release-image
aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG"

# Tag for Docker Hub (nightly and nightly-<commit>)
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG" "${REPO}:${TAG_NAME}"
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG" "${REPO}:${TAG_NAME_COMMIT}"

if [[ "$DRY_RUN" == "1" ]]; then
  echo "[DRY_RUN] Would push ${REPO}:${TAG_NAME} and ${REPO}:${TAG_NAME_COMMIT}"
  echo "[DRY_RUN] Local tags created. Exiting without push."
  exit 0
fi

# Push to Docker Hub (docker-login plugin runs before this step in CI)
docker push "${REPO}:${TAG_NAME}"
docker push "${REPO}:${TAG_NAME_COMMIT}"

echo "Pushed ${REPO}:${TAG_NAME} and ${REPO}:${TAG_NAME_COMMIT}"

Loading