Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sdk_v2/cpp/src/catalog/static_catalog_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ class StaticCatalogClient : public ICatalogClient {
for (const auto& [device, eps] : devices_to_eps) {
for (const auto& ep : eps) {
allowed.emplace(to_lower(device), to_lower(ep));

// CudaPluginExecutionProvider is the ORT registration name for the
// downloadable CUDA plugin EP, but catalog models are tagged with
// CudaExecutionProvider. Add the canonical name as an alias so
// plugin-EP machines can see and load CUDA catalog models.
Comment thread
prathikr marked this conversation as resolved.
if (to_lower(ep) == "cudapluginexecutionprovider") {
allowed.emplace(to_lower(device), "cudaexecutionprovider");
}
}
}

Expand Down
221 changes: 159 additions & 62 deletions sdk_v2/cpp/src/ep_detection/cuda_ep_bootstrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,26 @@
// Licensed under the MIT License.
#include "ep_detection/cuda_ep_bootstrapper.h"

#include "http/http_client.h"
#include "http/http_download.h"
#include "logger.h"
#include "util/file_lock.h"
#include "http/http_download.h"
#include "util/sha256.h"
#include "util/zip_extract.h"

#include <fmt/format.h>
#include <nlohmann/json.hpp>

#include <algorithm>
#include <atomic>
#include <cctype>
#include <cstdio>
#include <filesystem>
#include <map>
#include <optional>
#include <stdexcept>
#include <string>
#include <unordered_map>

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
Expand All @@ -25,60 +32,116 @@ namespace {

constexpr const char* kPackageFileName = "cuda-ep.zip";
constexpr const char* kLockFileName = "cuda-ep.lock";
constexpr const char* kStagingDirName = "cuda-ep-staging";
constexpr const char* kUserAgent = "FoundryLocal";
constexpr int kMaxInstallAttempts = 5;

// CUDA EP package is built against the ONNX Runtime version we link against, so
// WinML and non-WinML builds need separate downloads. Hashes mirror the C# core
// (see neutron.main/src/Service/Providers/Detector/CudaEpBootstrapper.cs).
// WinML build -> ORT 1.23.2 (cuda-ep-20260501-182408.zip)
// Non-WinML -> ORT 1.25.1 (cuda-ep-20260501-062935.zip)
#if defined(FOUNDRY_LOCAL_USE_WINML) && FOUNDRY_LOCAL_USE_WINML
constexpr const char* kDownloadUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda-ep-20260501-182408.zip";
#else
constexpr const char* kDownloadUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda-ep-20260501-062935.zip";
#endif

struct ExpectedBinary {
const char* filename;
const char* sha256;
// Manifest URL on the CDN — published by the CUDA EP upload pipeline.
constexpr const char* kManifestUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda_ep_prod.json";

// -----------------------------------------------------------------------
// Platform detection
//
// Returns the manifest platform key and ORT registration library filename
// for the current build target, or std::nullopt if unsupported.
//
// To add a platform:
// 1. Uncomment its #elif block below.
// 2. Uncomment its entry in $binaryNames / $expectedPlatforms in
// cuda-ep-upload.yml and update $platformPattern there too.
// -----------------------------------------------------------------------
struct PlatformInfo {
const char* key; // manifest lookup key, e.g. "win-x64"
const char* ep_lib; // ORT registration library filename
};

#if defined(FOUNDRY_LOCAL_USE_WINML) && FOUNDRY_LOCAL_USE_WINML
constexpr ExpectedBinary kExpectedBinaries[] = {
{"onnxruntime_providers_cuda.dll", "4CEF18654878CEFCFCF8488E9C3A705EB5327AA9B5556155C319C9CBB2D98FCF"},
{"onnxruntime-genai-cuda.dll", "BC953F8E2AAFC6219B2D723B65AB8F1A9426A6B7724D6A01ED756FAE8C3DE6AE"},
};
std::optional<PlatformInfo> GetPlatformInfo() {
#if defined(_WIN32) && !defined(_M_ARM64)
return PlatformInfo{"win-x64", "onnxruntime_providers_cuda_plugin.dll"};

// Uncomment when win-arm64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(_WIN32) && defined(_M_ARM64)
// return PlatformInfo{"win-arm64", "onnxruntime_providers_cuda_plugin.dll"};

// Uncomment when linux-x64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(__linux__) && defined(__x86_64__)
// return PlatformInfo{"linux-x64", "libonnxruntime_providers_cuda_plugin.so"};

// Uncomment when linux-arm64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(__linux__) && defined(__aarch64__)
// return PlatformInfo{"linux-arm64", "libonnxruntime_providers_cuda_plugin.so"};

#else
constexpr ExpectedBinary kExpectedBinaries[] = {
{"onnxruntime_providers_cuda.dll", "DD540FCFECFBC68B4675C9ADF09C2858CF6B054563859D79598AA2524406A76F"},
{"onnxruntime-genai-cuda.dll", "BC953F8E2AAFC6219B2D723B65AB8F1A9426A6B7724D6A01ED756FAE8C3DE6AE"},
};
return std::nullopt; // Platform not yet supported — graceful no-op.
#endif
}

constexpr const char* kRegistrationName = "Foundry.CUDA";
constexpr const char* kCudaProviderDll = "onnxruntime_providers_cuda.dll";

struct ManifestInfo {
std::string version;
std::string download_url;
std::unordered_map<std::string, std::string> sha256; // filename -> expected hash
};

/// Fetch and parse the CUDA EP manifest from the CDN.
/// Returns the package entry for the given platform key.
ManifestInfo FetchManifest(const char* platform_key, fl::ILogger& logger) {
logger.Log(fl::LogLevel::Debug,
fmt::format("CUDA EP: fetching manifest from {}", kManifestUrl));

auto body = fl::http::HttpGetWithRetry(kManifestUrl, kUserAgent, logger);
auto j = nlohmann::json::parse(body);

ManifestInfo info;
info.version = j.at("version").get<std::string>();

auto& packages = j.at("packages");
if (!packages.contains(platform_key)) {
throw std::runtime_error(
fmt::format("CUDA EP manifest has no entry for platform '{}'", platform_key));
}

auto& pkg = packages.at(platform_key);
info.download_url = pkg.at("url").get<std::string>();

for (auto& [filename, hash] : pkg.at("sha256").items()) {
info.sha256[filename] = hash.get<std::string>();
}

return info;
}

/// Verify all expected binaries exist and have correct SHA256 hashes.
bool VerifyPackage(const std::filesystem::path& dir, fl::ILogger& logger) {
for (const auto& expected : kExpectedBinaries) {
auto file_path = dir / expected.filename;
/// Logs the name of the first missing or mismatched file to aid diagnosis.
bool VerifyPackage(const std::filesystem::path& dir,
const std::unordered_map<std::string, std::string>& expected_hashes,
fl::ILogger& logger) {
// Quick sentinel check before the expensive SHA256 work.
if (!std::filesystem::exists(dir)) {
logger.Log(fl::LogLevel::Debug,
fmt::format("CUDA EP: package directory does not exist: {}", dir.string()));
return false;
}

for (const auto& [filename, expected_hash] : expected_hashes) {
Comment thread
prathikr marked this conversation as resolved.
Outdated
auto file_path = dir / filename;

if (!std::filesystem::exists(file_path)) {
logger.Log(fl::LogLevel::Debug,
fmt::format("CUDA EP: package file missing: {}", file_path.string()));
return false;
}

auto hash = fl::Sha256File(file_path);

// Case-insensitive comparison
std::string expected_hash(expected.sha256);
if (!std::equal(hash.begin(), hash.end(), expected_hash.begin(), expected_hash.end(),
[](char a, char b) { return std::toupper(a) == std::toupper(b); })) {
Comment thread
prathikr marked this conversation as resolved.
Outdated
logger.Log(fl::LogLevel::Warning,
fmt::format("CUDA EP: hash mismatch for {}: got {}, expected {}",
expected.filename, hash, expected.sha256));
filename, hash, expected_hash));
return false;
}
}
Expand Down Expand Up @@ -118,74 +181,101 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,

attempts_++;

// Bail out early if this platform is not yet in the manifest.
auto platform_info = GetPlatformInfo();
if (!platform_info) {
logger.Log(LogLevel::Information, "CUDA EP: current platform is not yet supported");
return false;
}

auto ep_dir = std::filesystem::path(ep_dir_);
auto lock_path = ep_dir.parent_path() / kLockFileName;
auto zip_path = ep_dir.parent_path() / kPackageFileName;
auto parent_dir = ep_dir.parent_path();

try {
// Cross-process lock to prevent concurrent installs
FileLock lock(lock_path);
// Fetch the manifest before acquiring the lock to avoid holding it during network I/O.
auto manifest = FetchManifest(platform_info->key, logger);
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: manifest fetched (version={}, platform={})",
manifest.version, platform_info->key));

// Cross-process lock to prevent concurrent installs.
std::filesystem::create_directories(parent_dir);
FileLock lock(parent_dir / kLockFileName);

// Check if package already exists and is valid
if (VerifyPackage(ep_dir, logger)) {
// Re-check after acquiring the lock — another process may have already updated.
if (!force && VerifyPackage(ep_dir, manifest.sha256, logger)) {
logger.Log(LogLevel::Information, "CUDA EP: package already valid, skipping download");
} else {
// Clean up any partial install
if (std::filesystem::exists(ep_dir)) {
std::filesystem::remove_all(ep_dir);
// Download to a staging directory so a failure never corrupts the existing install.
auto staging_dir = parent_dir / kStagingDirName;
if (std::filesystem::exists(staging_dir)) {
std::filesystem::remove_all(staging_dir);
}
std::filesystem::create_directories(staging_dir);

std::filesystem::create_directories(ep_dir);
auto zip_path = staging_dir / kPackageFileName;

// Download
logger.Log(LogLevel::Information, "CUDA EP: downloading from CDN...");
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: downloading for {}...", platform_info->key));
logger.Log(LogLevel::Debug,
fmt::format("CUDA EP: download URL is {}", manifest.download_url));

// Bridge callback-based cancellation to the atomic flag HttpDownloadFile expects
std::atomic<bool> cancel_flag{false};

auto download_progress = [&](float pct) {
if (progress_cb) {
// 0-80% for download phase
// 080% for the download phase.
if (!progress_cb(name_, pct * 0.8f)) {
cancel_flag.store(true);
}
}
};

if (!HttpDownloadFile(kDownloadUrl, zip_path, kUserAgent,
if (!HttpDownloadFile(manifest.download_url, zip_path, kUserAgent,
&cancel_flag, download_progress, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: download failed (see prior log for details)");
std::filesystem::remove_all(staging_dir);
return false;
}

// Extract
logger.Log(LogLevel::Information, "CUDA EP: extracting...");
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: extracting package to {}", staging_dir.string()));

if (!ExtractZip(zip_path, ep_dir, logger)) {
if (!ExtractZip(zip_path, staging_dir, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: extraction failed");
std::filesystem::remove_all(staging_dir);
return false;
}

// Clean up zip
std::filesystem::remove(zip_path);

// Verify
if (!VerifyPackage(ep_dir, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: verification failed after download");
if (!VerifyPackage(staging_dir, manifest.sha256, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: verification failed after extraction");
std::filesystem::remove_all(staging_dir);
return false;
}

logger.Log(LogLevel::Debug,
fmt::format("CUDA EP: staging verification succeeded, promoting to {}",
ep_dir.string()));

// Atomic swap: delete old install, rename staging to target.
if (std::filesystem::exists(ep_dir)) {
std::filesystem::remove_all(ep_dir);
}
std::filesystem::rename(staging_dir, ep_dir);
logger.Log(LogLevel::Information, "CUDA EP: successfully installed.");
}

if (progress_cb) {
progress_cb(name_, 90.0f);
}

// Register with ORT
// Register with ORT.
#ifdef _WIN32
// Permanently prepend the EP directory to PATH. The zip bundles all
// required CUDA/cuDNN DLLs, so no system CUDA install is needed.
// PATH must stay modified for the process lifetime because:
// - onnxruntime_providers_cuda.dll delay-loads some dependencies
// - onnxruntime_providers_cuda_plugin.dll delay-loads CUDA dependencies
// - onnxruntime-genai-cuda.dll is loaded later at model-load time
// - ORT creates CUDA sessions after registration
{
Expand All @@ -202,9 +292,17 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,
}
#endif

auto cuda_dll_path = ep_dir / kCudaProviderDll;
auto cuda_lib_path = ep_dir / platform_info->ep_lib;

// NOTE: RegisterExecutionProviderLibrary loads the CUDA plugin DLL, which
// initializes the CUDA runtime and cuDNN. This can take 30–60 seconds on
// first use — especially on machines with large cuDNN caches or slow VRAM
// init. This is normal; it is NOT a hang in the bootstrapper itself.
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: registering provider library {} (CUDA init may take ~30s)...",
cuda_lib_path.string()));

if (!register_ep_(kRegistrationName, cuda_dll_path)) {
if (!register_ep_(kRegistrationName, cuda_lib_path)) {
logger.Log(LogLevel::Warning, "CUDA EP: ORT registration failed");
return false;
}
Expand All @@ -215,10 +313,9 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,
progress_cb(name_, 100.0f);
}

// Bootstrapper-side log — captures the install dir, which the central
// register_ep callback (logs library + version) doesn't have.
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: ready (install_path={})", ep_dir.string()));
fmt::format("CUDA EP: ready (install_path={}, version={})",
ep_dir.string(), manifest.version));
return true;
} catch (const std::exception& e) {
logger.Log(LogLevel::Warning, fmt::format("CUDA EP: error: {}", e.what()));
Expand Down
2 changes: 1 addition & 1 deletion sdk_v2/cpp/src/ep_detection/ep_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ EpDownloadResult EpDetector::DownloadAndRegisterEps(const std::vector<std::strin

logger_.Log(LogLevel::Information, "Downloading and registering EP: " + bs->Name());

if (bs->DownloadAndRegister(/*force=*/true, wrapped_cb, logger_)) {
if (bs->DownloadAndRegister(/*force=*/false, wrapped_cb, logger_)) {
result.registered_eps.push_back(bs->Name());
Comment thread
prathikr marked this conversation as resolved.

// Update cached registration state in place under the cache lock so
Expand Down
16 changes: 14 additions & 2 deletions sdk_v2/cpp/src/inferencing/model_load_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ constexpr ModelIdEpRequirement kModelIdEpRequirements[] = {
{"vitis-npu", "VitisAIExecutionProvider"},
};

/// Returns true if the registered EP name satisfies the catalog requirement.
/// CudaPluginExecutionProvider is treated as equivalent to CUDAExecutionProvider
/// because catalog models are tagged with the canonical name, not the plugin name.
bool EpSatisfiesRequirement(std::string_view registered_ep, std::string_view required_ep) {
if (registered_ep == required_ep) return true;
if (required_ep == "CUDAExecutionProvider" && registered_ep == "CudaPluginExecutionProvider")
return true;
return false;
}

/// Returns the required EP registration name for a model_id, or empty if none required.
std::string_view RequiredEpForModelId(std::string_view model_id) {
for (const auto& req : kModelIdEpRequirements) {
Expand Down Expand Up @@ -65,8 +75,10 @@ ModelLoadManager::~ModelLoadManager() {
bool ModelLoadManager::HasEP(const std::string& ep_name) const {
const auto& device_map = ep_detector_.GetAvailableDevicesToEPs();
for (const auto& [device, eps] : device_map) {
if (std::find(eps.begin(), eps.end(), ep_name) != eps.end()) {
return true;
for (const auto& registered : eps) {
if (EpSatisfiesRequirement(registered, ep_name)) {
return true;
}
}
}

Expand Down