Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
FROM pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
# Python 3.12 included in this PyTorch image
ARG PYTHON_VERSION=3.11
ARG PYTORCH_BASE=pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
FROM ${PYTORCH_BASE}

WORKDIR /app

Expand Down
3 changes: 2 additions & 1 deletion Dockerfile-cpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM python:3.12-slim
ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim

WORKDIR /app

Expand Down
5 changes: 3 additions & 2 deletions Dockerfile-lb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
FROM pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
# Python 3.12 included in this PyTorch image
ARG PYTHON_VERSION=3.11
ARG PYTORCH_BASE=pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
FROM ${PYTORCH_BASE}

WORKDIR /app

Expand Down
3 changes: 2 additions & 1 deletion Dockerfile-lb-cpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM python:3.12-slim
ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim

WORKDIR /app

Expand Down
179 changes: 179 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ endif
WIP_TAG ?= wip
MULTI_PLATFORM := linux/amd64,linux/arm64

# Python version matrix for multi-version builds
GPU_PYTHON_VERSIONS := 3.11 3.12
CPU_PYTHON_VERSIONS := 3.10 3.11 3.12
DEFAULT_PYTHON_VERSION := 3.11

# PyTorch base image mapping per Python version (GPU only)
PYTORCH_BASE_3.11 := pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
PYTORCH_BASE_3.12 := pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime

.PHONY: setup help

# Check if 'uv' is installed
Expand Down Expand Up @@ -130,6 +139,176 @@ build-wip-lb-cpu: setup # Build and push LB CPU image (multi-platform)
-t $(IMAGE)-lb-cpu:$(WIP_TAG) \
. --push

# Versioned Build Targets (multi-Python-version matrix)
# GPU images: Python 3.11, 3.12 (with PyTorch base)
# CPU images: Python 3.10, 3.11, 3.12 (python:X.Y-slim)
# Tag format: py${VERSION}-${TAG} (e.g., runpod/flash:py3.11-local)

build-gpu-versioned: setup # Build GPU images for all GPU Python versions
@pytorch_base() { \
case "$$1" in \
3.11) echo "$(PYTORCH_BASE_3.11)";; \
3.12) echo "$(PYTORCH_BASE_3.12)";; \
esac; \
}; \
for pyver in $(GPU_PYTHON_VERSIONS); do \
base=$$(pytorch_base $$pyver); \
echo "Building GPU image for Python $$pyver (base: $$base)..."; \
docker buildx build \
--platform $(PLATFORM) \
--build-arg PYTHON_VERSION=$$pyver \
--build-arg PYTORCH_BASE=$$base \
-t $(IMAGE):py$$pyver-$(TAG) \
. --load; \
done

build-cpu-versioned: setup # Build CPU images for all CPU Python versions
@for pyver in $(CPU_PYTHON_VERSIONS); do \
echo "Building CPU image for Python $$pyver..."; \
docker buildx build \
--platform $(PLATFORM) \
--build-arg PYTHON_VERSION=$$pyver \
-f Dockerfile-cpu \
-t $(IMAGE)-cpu:py$$pyver-$(TAG) \
. --load; \
done

build-lb-versioned: setup # Build GPU-LB images for all GPU Python versions
@pytorch_base() { \
case "$$1" in \
3.11) echo "$(PYTORCH_BASE_3.11)";; \
3.12) echo "$(PYTORCH_BASE_3.12)";; \
esac; \
}; \
for pyver in $(GPU_PYTHON_VERSIONS); do \
base=$$(pytorch_base $$pyver); \
echo "Building GPU-LB image for Python $$pyver (base: $$base)..."; \
docker buildx build \
--platform $(PLATFORM) \
--build-arg PYTHON_VERSION=$$pyver \
--build-arg PYTORCH_BASE=$$base \
-f Dockerfile-lb \
-t $(IMAGE)-lb:py$$pyver-$(TAG) \
. --load; \
done

build-lb-cpu-versioned: setup # Build CPU-LB images for all CPU Python versions
@for pyver in $(CPU_PYTHON_VERSIONS); do \
echo "Building CPU-LB image for Python $$pyver..."; \
docker buildx build \
--platform $(PLATFORM) \
--build-arg PYTHON_VERSION=$$pyver \
-f Dockerfile-lb-cpu \
-t $(IMAGE)-lb-cpu:py$$pyver-$(TAG) \
. --load; \
done

build-all-versioned: # Build all 10 versioned images (GPU+CPU, QB+LB)
@echo "Building all versioned images (10 total)..."
$(MAKE) build-gpu-versioned
$(MAKE) build-cpu-versioned
$(MAKE) build-lb-versioned
$(MAKE) build-lb-cpu-versioned
@echo "All 10 versioned images built."

# Versioned WIP Push Targets (multi-platform, requires Docker Hub push)
# Also tags DEFAULT_PYTHON_VERSION images as latest (unversioned tag)

build-wip-versioned: setup # Build and push all versioned images (multi-platform)
@echo "Building and pushing all versioned images with tag prefix py*-$(WIP_TAG)..."
@pytorch_base() { \
case "$$1" in \
3.11) echo "$(PYTORCH_BASE_3.11)";; \
3.12) echo "$(PYTORCH_BASE_3.12)";; \
esac; \
}; \
for pyver in $(GPU_PYTHON_VERSIONS); do \
base=$$(pytorch_base $$pyver); \
echo "Pushing GPU QB image for Python $$pyver..."; \
tag_args="-t $(IMAGE):py$$pyver-$(WIP_TAG)"; \
if [ "$$pyver" = "$(DEFAULT_PYTHON_VERSION)" ]; then \
tag_args="$$tag_args -t $(IMAGE):$(WIP_TAG)"; \
fi; \
docker buildx build \
--platform $(MULTI_PLATFORM) \
--build-arg PYTHON_VERSION=$$pyver \
--build-arg PYTORCH_BASE=$$base \
$$tag_args \
. --push; \
done
@for pyver in $(CPU_PYTHON_VERSIONS); do \
echo "Pushing CPU QB image for Python $$pyver..."; \
tag_args="-t $(IMAGE)-cpu:py$$pyver-$(WIP_TAG)"; \
if [ "$$pyver" = "$(DEFAULT_PYTHON_VERSION)" ]; then \
tag_args="$$tag_args -t $(IMAGE)-cpu:$(WIP_TAG)"; \
fi; \
docker buildx build \
--platform $(MULTI_PLATFORM) \
--build-arg PYTHON_VERSION=$$pyver \
-f Dockerfile-cpu \
$$tag_args \
. --push; \
done
@pytorch_base() { \
case "$$1" in \
3.11) echo "$(PYTORCH_BASE_3.11)";; \
3.12) echo "$(PYTORCH_BASE_3.12)";; \
esac; \
}; \
for pyver in $(GPU_PYTHON_VERSIONS); do \
base=$$(pytorch_base $$pyver); \
echo "Pushing GPU LB image for Python $$pyver..."; \
tag_args="-t $(IMAGE)-lb:py$$pyver-$(WIP_TAG)"; \
if [ "$$pyver" = "$(DEFAULT_PYTHON_VERSION)" ]; then \
tag_args="$$tag_args -t $(IMAGE)-lb:$(WIP_TAG)"; \
fi; \
docker buildx build \
--platform $(MULTI_PLATFORM) \
--build-arg PYTHON_VERSION=$$pyver \
--build-arg PYTORCH_BASE=$$base \
-f Dockerfile-lb \
$$tag_args \
. --push; \
done
@for pyver in $(CPU_PYTHON_VERSIONS); do \
echo "Pushing CPU LB image for Python $$pyver..."; \
tag_args="-t $(IMAGE)-lb-cpu:py$$pyver-$(WIP_TAG)"; \
if [ "$$pyver" = "$(DEFAULT_PYTHON_VERSION)" ]; then \
tag_args="$$tag_args -t $(IMAGE)-lb-cpu:$(WIP_TAG)"; \
fi; \
docker buildx build \
--platform $(MULTI_PLATFORM) \
--build-arg PYTHON_VERSION=$$pyver \
-f Dockerfile-lb-cpu \
$$tag_args \
. --push; \
done
@echo "All versioned images pushed. Default ($(DEFAULT_PYTHON_VERSION)) also tagged as :$(WIP_TAG)."

# Versioned Smoke Tests

smoketest-versioned: build-all-versioned # Verify Python version in each versioned image
@echo "Running Python version checks across all versioned images..."
@fail=0; \
for pyver in $(GPU_PYTHON_VERSIONS); do \
echo -n "GPU QB py$$pyver: "; \
docker run --rm $(IMAGE):py$$pyver-$(TAG) python --version || fail=1; \
done; \
for pyver in $(CPU_PYTHON_VERSIONS); do \
echo -n "CPU QB py$$pyver: "; \
docker run --rm $(IMAGE)-cpu:py$$pyver-$(TAG) python --version || fail=1; \
done; \
for pyver in $(GPU_PYTHON_VERSIONS); do \
echo -n "GPU LB py$$pyver: "; \
docker run --rm $(IMAGE)-lb:py$$pyver-$(TAG) python --version || fail=1; \
done; \
for pyver in $(CPU_PYTHON_VERSIONS); do \
echo -n "CPU LB py$$pyver: "; \
docker run --rm $(IMAGE)-lb-cpu:py$$pyver-$(TAG) python --version || fail=1; \
done; \
if [ $$fail -ne 0 ]; then echo "FAIL: Some images failed version check"; exit 1; fi; \
echo "All 10 images passed Python version check."

# Test commands
test: # Run all tests in parallel
uv run pytest tests/ -v -n auto --dist loadscope
Expand Down
109 changes: 49 additions & 60 deletions src/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,27 @@
logger.info(format_version_banner())


def _is_deployed_mode() -> bool:
"""True when running as a Flash-deployed endpoint (not Live Serverless)."""
return bool(os.getenv("FLASH_RESOURCE_NAME"))


def _load_generated_handler() -> Optional[Any]:
"""Load Flash-generated handler if available (deployed QB mode).
"""Load Flash-generated handler for deployed QB mode.

Checks for a handler_<resource_name>.py file generated by the flash
build pipeline. These handlers accept plain JSON input without
FunctionRequest/cloudpickle serialization.

In deployed mode (FLASH_RESOURCE_NAME set), failures are fatal.
FunctionRequest fallback is only valid for Live Serverless workers.

Returns:
Handler function if generated handler found, None otherwise.
Handler function if generated handler found, None if not in
deployed mode.

Raises:
RuntimeError: If in deployed mode and the handler cannot be loaded.
"""
resource_name = os.getenv("FLASH_RESOURCE_NAME")
if not resource_name:
Expand All @@ -38,95 +50,72 @@ def _load_generated_handler() -> Optional[Any]:
handler_file = Path(f"/app/handler_{resource_name}.py")

if not handler_file.resolve().is_relative_to(Path("/app").resolve()):
logger.warning(
"FLASH_RESOURCE_NAME '%s' resolves outside /app. "
"Falling back to FunctionRequest handler.",
resource_name,
raise RuntimeError(
f"FLASH_RESOURCE_NAME '{resource_name}' resolves outside /app. "
f"This is a security violation. Check the endpoint environment variables."
)
return None

if not handler_file.exists():
logger.warning(
"Generated handler file %s not found for resource '%s'. "
"The build artifact may be incomplete. "
"Falling back to FunctionRequest handler.",
handler_file,
resource_name,
raise RuntimeError(
f"Generated handler {handler_file} not found for resource '{resource_name}'. "
f"The build artifact is incomplete. Redeploy with 'flash deploy'."
)
return None

spec = importlib.util.spec_from_file_location(f"handler_{resource_name}", handler_file)
if not spec or not spec.loader:
logger.warning("Failed to create module spec for %s", handler_file)
return None
raise RuntimeError(
f"Failed to create module spec for {handler_file}. "
f"The file may be corrupted. Redeploy with 'flash deploy'."
)

mod = importlib.util.module_from_spec(spec)
try:
spec.loader.exec_module(mod)
except ImportError as e:
logger.warning(
"Generated handler %s failed to import (missing dependency: %s). "
"Redeploy to include latest runpod_flash. "
"Falling back to FunctionRequest handler.",
handler_file,
e,
)
return None
raise RuntimeError(
f"Generated handler {handler_file} failed to import: {e}. "
f"This usually means a dependency was built for the wrong Python version. "
f"Redeploy with 'flash deploy'."
) from e
except SyntaxError as e:
logger.error(
"Generated handler %s has a syntax error: %s. "
"This indicates a bug in the flash build pipeline. "
"Falling back to FunctionRequest handler.",
handler_file,
e,
)
return None
raise RuntimeError(
f"Generated handler {handler_file} has a syntax error: {e}. "
f"This indicates a bug in the flash build pipeline."
) from e
except Exception as e:
logger.error(
"Generated handler %s failed to load unexpectedly: %s (%s). "
"Falling back to FunctionRequest handler.",
handler_file,
e,
type(e).__name__,
exc_info=True,
)
return None
raise RuntimeError(
f"Generated handler {handler_file} failed to load: {e} ({type(e).__name__}). "
f"Redeploy with 'flash deploy'."
) from e

generated = getattr(mod, "handler", None)
if generated is None:
logger.warning(
"Generated handler %s loaded but has no 'handler' attribute. "
"Ensure the flash build pipeline generates a 'handler' function. "
"Falling back to FunctionRequest handler.",
handler_file,
raise RuntimeError(
f"Generated handler {handler_file} has no 'handler' function. "
f"This indicates a bug in the flash build pipeline."
)
return None

if not callable(generated):
logger.warning(
"Generated handler %s has a 'handler' attribute but it is not callable (%s). "
"Falling back to FunctionRequest handler.",
handler_file,
type(generated).__name__,
raise RuntimeError(
f"Generated handler {handler_file} has a 'handler' attribute "
f"but it is not callable ({type(generated).__name__}). "
f"This indicates a bug in the flash build pipeline."
)
return None

logger.info("Loaded generated handler from %s", handler_file)
return generated


# Try generated handler first (plain JSON mode for deployed QB endpoints)
_generated = _load_generated_handler()

if _generated:
handler = _generated
# Deployed mode: generated handler is mandatory, failures are fatal.
# Live Serverless mode: FunctionRequest handler is the only path.
if _is_deployed_mode():
handler = _load_generated_handler()
else:
# Fallback: original FunctionRequest handler (backward compatible)
from runpod_flash.protos.remote_execution import FunctionRequest, FunctionResponse
from remote_executor import RemoteExecutor

async def handler(event: Dict[str, Any]) -> Dict[str, Any]:
"""RunPod serverless function handler with dependency installation."""
"""RunPod serverless handler for Live Serverless (FunctionRequest protocol)."""
output: FunctionResponse

try:
Expand Down
Loading
Loading