runpod-workers · deanq · Mar 10, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,6 @@
-FROM pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
-# Python 3.12 included in this PyTorch image
+ARG PYTHON_VERSION=3.11
+ARG PYTORCH_BASE=pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
+FROM ${PYTORCH_BASE}
 
 WORKDIR /app
 

diff --git a/Dockerfile-cpu b/Dockerfile-cpu
@@ -1,4 +1,5 @@
-FROM python:3.12-slim
+ARG PYTHON_VERSION=3.11
+FROM python:${PYTHON_VERSION}-slim
 
 WORKDIR /app
 

diff --git a/Dockerfile-lb b/Dockerfile-lb
@@ -1,5 +1,6 @@
-FROM pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
-# Python 3.12 included in this PyTorch image
+ARG PYTHON_VERSION=3.11
+ARG PYTORCH_BASE=pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
+FROM ${PYTORCH_BASE}
 
 WORKDIR /app
 

diff --git a/Dockerfile-lb-cpu b/Dockerfile-lb-cpu
@@ -1,4 +1,5 @@
-FROM python:3.12-slim
+ARG PYTHON_VERSION=3.11
+FROM python:${PYTHON_VERSION}-slim
 
 WORKDIR /app
 

diff --git a/Makefile b/Makefile
@@ -19,6 +19,15 @@ endif
 WIP_TAG ?= wip
 MULTI_PLATFORM := linux/amd64,linux/arm64
 
+# Python version matrix for multi-version builds
+GPU_PYTHON_VERSIONS := 3.11 3.12
+CPU_PYTHON_VERSIONS := 3.10 3.11 3.12
+DEFAULT_PYTHON_VERSION := 3.11
+
+# PyTorch base image mapping per Python version (GPU only)
+PYTORCH_BASE_3.11 := pytorch/pytorch:2.9.1-cuda12.8-cudnn9-runtime
+PYTORCH_BASE_3.12 := pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime
+
 .PHONY: setup help
 
 # Check if 'uv' is installed
@@ -130,6 +139,176 @@ build-wip-lb-cpu: setup # Build and push LB CPU image (multi-platform)
 	-t $(IMAGE)-lb-cpu:$(WIP_TAG) \
 	. --push
 
+# Versioned Build Targets (multi-Python-version matrix)
+# GPU images: Python 3.11, 3.12 (with PyTorch base)
+# CPU images: Python 3.10, 3.11, 3.12 (python:X.Y-slim)
+# Tag format: py${VERSION}-${TAG} (e.g., runpod/flash:py3.11-local)
+
+build-gpu-versioned: setup # Build GPU images for all GPU Python versions
+	@pytorch_base() { \
+		case "$$1" in \
+			3.11) echo "$(PYTORCH_BASE_3.11)";; \
+			3.12) echo "$(PYTORCH_BASE_3.12)";; \
+		esac; \
+	}; \
+	for pyver in $(GPU_PYTHON_VERSIONS); do \
+		base=$$(pytorch_base $$pyver); \
+		echo "Building GPU image for Python $$pyver (base: $$base)..."; \
+		docker buildx build \
+			--platform $(PLATFORM) \
+			--build-arg PYTHON_VERSION=$$pyver \
+			--build-arg PYTORCH_BASE=$$base \
+			-t $(IMAGE):py$$pyver-$(TAG) \
+			. --load; \
+	done
+
+build-cpu-versioned: setup # Build CPU images for all CPU Python versions
+	@for pyver in $(CPU_PYTHON_VERSIONS); do \
+		echo "Building CPU image for Python $$pyver..."; \
+		docker buildx build \
+			--platform $(PLATFORM) \
+			--build-arg PYTHON_VERSION=$$pyver \
+			-f Dockerfile-cpu \
+			-t $(IMAGE)-cpu:py$$pyver-$(TAG) \
+			. --load; \
+	done
+
+build-lb-versioned: setup # Build GPU-LB images for all GPU Python versions
+	@pytorch_base() { \
+		case "$$1" in \
+			3.11) echo "$(PYTORCH_BASE_3.11)";; \
+			3.12) echo "$(PYTORCH_BASE_3.12)";; \
+		esac; \
+	}; \
+	for pyver in $(GPU_PYTHON_VERSIONS); do \
+		base=$$(pytorch_base $$pyver); \
+		echo "Building GPU-LB image for Python $$pyver (base: $$base)..."; \
+		docker buildx build \
+			--platform $(PLATFORM) \
+			--build-arg PYTHON_VERSION=$$pyver \
+			--build-arg PYTORCH_BASE=$$base \
+			-f Dockerfile-lb \
+			-t $(IMAGE)-lb:py$$pyver-$(TAG) \
+			. --load; \
+	done
+
+build-lb-cpu-versioned: setup # Build CPU-LB images for all CPU Python versions
+	@for pyver in $(CPU_PYTHON_VERSIONS); do \
+		echo "Building CPU-LB image for Python $$pyver..."; \
+		docker buildx build \
+			--platform $(PLATFORM) \
+			--build-arg PYTHON_VERSION=$$pyver \
+			-f Dockerfile-lb-cpu \
+			-t $(IMAGE)-lb-cpu:py$$pyver-$(TAG) \
+			. --load; \
+	done
+
+build-all-versioned: # Build all 10 versioned images (GPU+CPU, QB+LB)
+	@echo "Building all versioned images (10 total)..."
+	$(MAKE) build-gpu-versioned
+	$(MAKE) build-cpu-versioned
+	$(MAKE) build-lb-versioned
+	$(MAKE) build-lb-cpu-versioned
+	@echo "All 10 versioned images built."
+
+# Versioned WIP Push Targets (multi-platform, requires Docker Hub push)
+# Also tags DEFAULT_PYTHON_VERSION images as latest (unversioned tag)
+
+build-wip-versioned: setup # Build and push all versioned images (multi-platform)
+	@echo "Building and pushing all versioned images with tag prefix py*-$(WIP_TAG)..."
+	@pytorch_base() { \
+		case "$$1" in \
+			3.11) echo "$(PYTORCH_BASE_3.11)";; \
+			3.12) echo "$(PYTORCH_BASE_3.12)";; \
+		esac; \
+	}; \
+	for pyver in $(GPU_PYTHON_VERSIONS); do \
+		base=$$(pytorch_base $$pyver); \
+		echo "Pushing GPU QB image for Python $$pyver..."; \
+		tag_args="-t $(IMAGE):py$$pyver-$(WIP_TAG)"; \
+		if [ "$$pyver" = "$(DEFAULT_PYTHON_VERSION)" ]; then \
+			tag_args="$$tag_args -t $(IMAGE):$(WIP_TAG)"; \
+		fi; \
+		docker buildx build \
+			--platform $(MULTI_PLATFORM) \
+			--build-arg PYTHON_VERSION=$$pyver \
+			--build-arg PYTORCH_BASE=$$base \
+			$$tag_args \
+			. --push; \
+	done
+	@for pyver in $(CPU_PYTHON_VERSIONS); do \
+		echo "Pushing CPU QB image for Python $$pyver..."; \
+		tag_args="-t $(IMAGE)-cpu:py$$pyver-$(WIP_TAG)"; \
+		if [ "$$pyver" = "$(DEFAULT_PYTHON_VERSION)" ]; then \
+			tag_args="$$tag_args -t $(IMAGE)-cpu:$(WIP_TAG)"; \
+		fi; \
+		docker buildx build \
+			--platform $(MULTI_PLATFORM) \
+			--build-arg PYTHON_VERSION=$$pyver \
+			-f Dockerfile-cpu \
+			$$tag_args \
+			. --push; \
+	done
+	@pytorch_base() { \
+		case "$$1" in \
+			3.11) echo "$(PYTORCH_BASE_3.11)";; \
+			3.12) echo "$(PYTORCH_BASE_3.12)";; \
+		esac; \
+	}; \
+	for pyver in $(GPU_PYTHON_VERSIONS); do \
+		base=$$(pytorch_base $$pyver); \
+		echo "Pushing GPU LB image for Python $$pyver..."; \
+		tag_args="-t $(IMAGE)-lb:py$$pyver-$(WIP_TAG)"; \
+		if [ "$$pyver" = "$(DEFAULT_PYTHON_VERSION)" ]; then \
+			tag_args="$$tag_args -t $(IMAGE)-lb:$(WIP_TAG)"; \
+		fi; \
+		docker buildx build \
+			--platform $(MULTI_PLATFORM) \
+			--build-arg PYTHON_VERSION=$$pyver \
+			--build-arg PYTORCH_BASE=$$base \
+			-f Dockerfile-lb \
+			$$tag_args \
+			. --push; \
+	done
+	@for pyver in $(CPU_PYTHON_VERSIONS); do \
+		echo "Pushing CPU LB image for Python $$pyver..."; \
+		tag_args="-t $(IMAGE)-lb-cpu:py$$pyver-$(WIP_TAG)"; \
+		if [ "$$pyver" = "$(DEFAULT_PYTHON_VERSION)" ]; then \
+			tag_args="$$tag_args -t $(IMAGE)-lb-cpu:$(WIP_TAG)"; \
+		fi; \
+		docker buildx build \
+			--platform $(MULTI_PLATFORM) \
+			--build-arg PYTHON_VERSION=$$pyver \
+			-f Dockerfile-lb-cpu \
+			$$tag_args \
+			. --push; \
+	done
+	@echo "All versioned images pushed. Default ($(DEFAULT_PYTHON_VERSION)) also tagged as :$(WIP_TAG)."
+
+# Versioned Smoke Tests
+
+smoketest-versioned: build-all-versioned # Verify Python version in each versioned image
+	@echo "Running Python version checks across all versioned images..."
+	@fail=0; \
+	for pyver in $(GPU_PYTHON_VERSIONS); do \
+		echo -n "GPU QB py$$pyver: "; \
+		docker run --rm $(IMAGE):py$$pyver-$(TAG) python --version || fail=1; \
+	done; \
+	for pyver in $(CPU_PYTHON_VERSIONS); do \
+		echo -n "CPU QB py$$pyver: "; \
+		docker run --rm $(IMAGE)-cpu:py$$pyver-$(TAG) python --version || fail=1; \
+	done; \
+	for pyver in $(GPU_PYTHON_VERSIONS); do \
+		echo -n "GPU LB py$$pyver: "; \
+		docker run --rm $(IMAGE)-lb:py$$pyver-$(TAG) python --version || fail=1; \
+	done; \
+	for pyver in $(CPU_PYTHON_VERSIONS); do \
+		echo -n "CPU LB py$$pyver: "; \
+		docker run --rm $(IMAGE)-lb-cpu:py$$pyver-$(TAG) python --version || fail=1; \
+	done; \
+	if [ $$fail -ne 0 ]; then echo "FAIL: Some images failed version check"; exit 1; fi; \
+	echo "All 10 images passed Python version check."
+
 # Test commands
 test: # Run all tests in parallel
 	uv run pytest tests/ -v -n auto --dist loadscope

diff --git a/src/handler.py b/src/handler.py
@@ -21,15 +21,27 @@
 logger.info(format_version_banner())
 
 
+def _is_deployed_mode() -> bool:
+    """True when running as a Flash-deployed endpoint (not Live Serverless)."""
+    return bool(os.getenv("FLASH_RESOURCE_NAME"))
+
+
 def _load_generated_handler() -> Optional[Any]:
-    """Load Flash-generated handler if available (deployed QB mode).
+    """Load Flash-generated handler for deployed QB mode.
 
     Checks for a handler_<resource_name>.py file generated by the flash
     build pipeline. These handlers accept plain JSON input without
     FunctionRequest/cloudpickle serialization.
 
+    In deployed mode (FLASH_RESOURCE_NAME set), failures are fatal.
+    FunctionRequest fallback is only valid for Live Serverless workers.
+
     Returns:
-        Handler function if generated handler found, None otherwise.
+        Handler function if generated handler found, None if not in
+        deployed mode.
+
+    Raises:
+        RuntimeError: If in deployed mode and the handler cannot be loaded.
     """
     resource_name = os.getenv("FLASH_RESOURCE_NAME")
     if not resource_name:
@@ -38,95 +50,72 @@ def _load_generated_handler() -> Optional[Any]:
     handler_file = Path(f"/app/handler_{resource_name}.py")
 
     if not handler_file.resolve().is_relative_to(Path("/app").resolve()):
-        logger.warning(
-            "FLASH_RESOURCE_NAME '%s' resolves outside /app. "
-            "Falling back to FunctionRequest handler.",
-            resource_name,
+        raise RuntimeError(
+            f"FLASH_RESOURCE_NAME '{resource_name}' resolves outside /app. "
+            f"This is a security violation. Check the endpoint environment variables."
         )
-        return None
 
     if not handler_file.exists():
-        logger.warning(
-            "Generated handler file %s not found for resource '%s'. "
-            "The build artifact may be incomplete. "
-            "Falling back to FunctionRequest handler.",
-            handler_file,
-            resource_name,
+        raise RuntimeError(
+            f"Generated handler {handler_file} not found for resource '{resource_name}'. "
+            f"The build artifact is incomplete. Redeploy with 'flash deploy'."
         )
-        return None
 
     spec = importlib.util.spec_from_file_location(f"handler_{resource_name}", handler_file)
     if not spec or not spec.loader:
-        logger.warning("Failed to create module spec for %s", handler_file)
-        return None
+        raise RuntimeError(
+            f"Failed to create module spec for {handler_file}. "
+            f"The file may be corrupted. Redeploy with 'flash deploy'."
+        )
 
     mod = importlib.util.module_from_spec(spec)
     try:
         spec.loader.exec_module(mod)
     except ImportError as e:
-        logger.warning(
-            "Generated handler %s failed to import (missing dependency: %s). "
-            "Redeploy to include latest runpod_flash. "
-            "Falling back to FunctionRequest handler.",
-            handler_file,
-            e,
-        )
-        return None
+        raise RuntimeError(
+            f"Generated handler {handler_file} failed to import: {e}. "
+            f"This usually means a dependency was built for the wrong Python version. "
+            f"Redeploy with 'flash deploy'."
+        ) from e
     except SyntaxError as e:
-        logger.error(
-            "Generated handler %s has a syntax error: %s. "
-            "This indicates a bug in the flash build pipeline. "
-            "Falling back to FunctionRequest handler.",
-            handler_file,
-            e,
-        )
-        return None
+        raise RuntimeError(
+            f"Generated handler {handler_file} has a syntax error: {e}. "
+            f"This indicates a bug in the flash build pipeline."
+        ) from e
     except Exception as e:
-        logger.error(
-            "Generated handler %s failed to load unexpectedly: %s (%s). "
-            "Falling back to FunctionRequest handler.",
-            handler_file,
-            e,
-            type(e).__name__,
-            exc_info=True,
-        )
-        return None
+        raise RuntimeError(
+            f"Generated handler {handler_file} failed to load: {e} ({type(e).__name__}). "
+            f"Redeploy with 'flash deploy'."
+        ) from e
 
     generated = getattr(mod, "handler", None)
     if generated is None:
-        logger.warning(
-            "Generated handler %s loaded but has no 'handler' attribute. "
-            "Ensure the flash build pipeline generates a 'handler' function. "
-            "Falling back to FunctionRequest handler.",
-            handler_file,
+        raise RuntimeError(
+            f"Generated handler {handler_file} has no 'handler' function. "
+            f"This indicates a bug in the flash build pipeline."
         )
-        return None
 
     if not callable(generated):
-        logger.warning(
-            "Generated handler %s has a 'handler' attribute but it is not callable (%s). "
-            "Falling back to FunctionRequest handler.",
-            handler_file,
-            type(generated).__name__,
+        raise RuntimeError(
+            f"Generated handler {handler_file} has a 'handler' attribute "
+            f"but it is not callable ({type(generated).__name__}). "
+            f"This indicates a bug in the flash build pipeline."
         )
-        return None
 
     logger.info("Loaded generated handler from %s", handler_file)
     return generated
 
 
-# Try generated handler first (plain JSON mode for deployed QB endpoints)
-_generated = _load_generated_handler()
-
-if _generated:
-    handler = _generated
+# Deployed mode: generated handler is mandatory, failures are fatal.
+# Live Serverless mode: FunctionRequest handler is the only path.
+if _is_deployed_mode():
+    handler = _load_generated_handler()
 else:
-    # Fallback: original FunctionRequest handler (backward compatible)
     from runpod_flash.protos.remote_execution import FunctionRequest, FunctionResponse
     from remote_executor import RemoteExecutor
 
     async def handler(event: Dict[str, Any]) -> Dict[str, Any]:
-        """RunPod serverless function handler with dependency installation."""
+        """RunPod serverless handler for Live Serverless (FunctionRequest protocol)."""
         output: FunctionResponse
 
         try: