Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ classifiers = [
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
requires-python = ">=3.10,<3.15"
requires-python = ">=3.10,<3.13"

dependencies = [
"cloudpickle>=3.1.1",
Expand Down
21 changes: 20 additions & 1 deletion src/runpod_flash/cli/commands/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
except ImportError:
import tomli as tomllib # Python 3.9-3.10

from runpod_flash.core.resources.constants import MAX_TARBALL_SIZE_MB
from runpod_flash.core.resources.constants import (
MAX_TARBALL_SIZE_MB,
SUPPORTED_PYTHON_VERSIONS,
validate_python_version,
)

from ..utils.ignore import get_file_tree, load_ignore_patterns
from .build_utils.handler_generator import HandlerGenerator
Expand Down Expand Up @@ -795,6 +799,21 @@ def install_dependencies(
# Get current Python version for compatibility
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"

try:
validate_python_version(python_version)
except ValueError:
console.print(
f"\n[red]Python {python_version} is not supported for Flash deployment.[/red]"
)
console.print(
f"[yellow]Supported versions: {', '.join(SUPPORTED_PYTHON_VERSIONS)}[/yellow]"
)
console.print(
"[yellow]Set python_version explicitly in your resource config "
"or switch to a supported Python version.[/yellow]"
)
return False

# Determine if using uv pip or standard pip (different flag formats)
is_uv_pip = pip_cmd[0] == UV_COMMAND

Expand Down
1 change: 1 addition & 0 deletions src/runpod_flash/cli/commands/build_utils/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ def build(self) -> Dict[str, Any]:

manifest = {
"version": "1.0",
"python_version": f"{sys.version_info.major}.{sys.version_info.minor}",
"generated_at": datetime.now(timezone.utc)
.isoformat()
.replace("+00:00", "Z"),
Expand Down
106 changes: 102 additions & 4 deletions src/runpod_flash/core/resources/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,113 @@ def _endpoint_domain_from_base_url(base_url: str) -> str:
ENDPOINT_DOMAIN = _endpoint_domain_from_base_url(runpod.endpoint_url_base)


# Python version support
SUPPORTED_PYTHON_VERSIONS: tuple[str, ...] = ("3.10", "3.11", "3.12")
GPU_PYTHON_VERSIONS: tuple[str, ...] = ("3.11", "3.12")
CPU_PYTHON_VERSIONS: tuple[str, ...] = ("3.10", "3.11", "3.12")
DEFAULT_PYTHON_VERSION: str = "3.11"

# Image type to repository mapping
_IMAGE_REPOS: dict[str, str] = {
"gpu": "runpod/flash",
"cpu": "runpod/flash-cpu",
"lb": "runpod/flash-lb",
"lb-cpu": "runpod/flash-lb-cpu",
}

# Image types that require GPU-compatible Python versions
_GPU_IMAGE_TYPES: frozenset[str] = frozenset({"gpu", "lb"})

# Image type to environment variable override mapping
_IMAGE_ENV_VARS: dict[str, str] = {
"gpu": "FLASH_GPU_IMAGE",
"cpu": "FLASH_CPU_IMAGE",
"lb": "FLASH_LB_IMAGE",
"lb-cpu": "FLASH_CPU_LB_IMAGE",
}


def validate_python_version(version: str) -> str:
"""Validate that a Python version string is supported.

Args:
version: Python version string (e.g. "3.11").

Returns:
The validated version string.

Raises:
ValueError: If version is not in SUPPORTED_PYTHON_VERSIONS.
"""
if version not in SUPPORTED_PYTHON_VERSIONS:
supported = ", ".join(SUPPORTED_PYTHON_VERSIONS)
raise ValueError(
f"Python {version} is not supported. Supported versions: {supported}"
)
return version


def get_image_name(
image_type: str,
python_version: str,
*,
tag: str | None = None,
) -> str:
"""Resolve a versioned Docker image name for the given type and Python version.

Args:
image_type: One of 'gpu', 'cpu', 'lb', 'lb-cpu'.
python_version: Python version string (e.g. "3.11", "3.12").
tag: Image tag suffix. Defaults to FLASH_IMAGE_TAG env var or "latest".

Returns:
Fully qualified image name, e.g. "runpod/flash:py3.12-latest".

Raises:
ValueError: If image_type is unknown, python_version is unsupported,
or a GPU image type is requested with a CPU-only Python version.
"""
if image_type not in _IMAGE_REPOS:
raise ValueError(
f"Unknown image type '{image_type}'. "
f"Valid types: {', '.join(sorted(_IMAGE_REPOS))}"
)

validate_python_version(python_version)

if image_type in _GPU_IMAGE_TYPES and python_version not in GPU_PYTHON_VERSIONS:
gpu_versions = ", ".join(GPU_PYTHON_VERSIONS)
raise ValueError(
f"GPU endpoints require Python {gpu_versions}. Got Python {python_version}."
)

# Environment variable override takes precedence
env_var = _IMAGE_ENV_VARS[image_type]
override = os.environ.get(env_var)
if override:
return override

resolved_tag = tag or os.environ.get("FLASH_IMAGE_TAG", "latest")
repo = _IMAGE_REPOS[image_type]
return f"{repo}:py{python_version}-{resolved_tag}"


# Docker image configuration
FLASH_IMAGE_TAG = os.environ.get("FLASH_IMAGE_TAG", "latest")
_RESOLVED_TAG = FLASH_IMAGE_TAG

FLASH_GPU_IMAGE = os.environ.get("FLASH_GPU_IMAGE", f"runpod/flash:{_RESOLVED_TAG}")
FLASH_CPU_IMAGE = os.environ.get("FLASH_CPU_IMAGE", f"runpod/flash-cpu:{_RESOLVED_TAG}")
FLASH_LB_IMAGE = os.environ.get("FLASH_LB_IMAGE", f"runpod/flash-lb:{_RESOLVED_TAG}")
FLASH_GPU_IMAGE = os.environ.get(
"FLASH_GPU_IMAGE", f"runpod/flash:py{DEFAULT_PYTHON_VERSION}-{_RESOLVED_TAG}"
)
FLASH_CPU_IMAGE = os.environ.get(
"FLASH_CPU_IMAGE", f"runpod/flash-cpu:py{DEFAULT_PYTHON_VERSION}-{_RESOLVED_TAG}"
)
FLASH_LB_IMAGE = os.environ.get(
"FLASH_LB_IMAGE", f"runpod/flash-lb:py{DEFAULT_PYTHON_VERSION}-{_RESOLVED_TAG}"
)
FLASH_CPU_LB_IMAGE = os.environ.get(
"FLASH_CPU_LB_IMAGE", f"runpod/flash-lb-cpu:{_RESOLVED_TAG}"
"FLASH_CPU_LB_IMAGE",
f"runpod/flash-lb-cpu:py{DEFAULT_PYTHON_VERSION}-{_RESOLVED_TAG}",
)

# Worker configuration defaults
Expand Down
116 changes: 23 additions & 93 deletions src/runpod_flash/core/resources/live_serverless.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
# Ship serverless code as you write it. No builds, no deploys — just run.
from typing import ClassVar

from pydantic import model_validator

from .constants import (
FLASH_CPU_IMAGE,
FLASH_CPU_LB_IMAGE,
FLASH_GPU_IMAGE,
FLASH_LB_IMAGE,
)
from .constants import DEFAULT_PYTHON_VERSION, get_image_name
from .load_balancer_sls_resource import (
CpuLoadBalancerSlsResource,
LoadBalancerSlsResource,
Expand All @@ -18,142 +15,75 @@
class LiveServerlessMixin:
"""Common mixin for live serverless endpoints that locks the image."""

_image_type: ClassVar[str] = (
"" # Override in subclasses: 'gpu', 'cpu', 'lb', 'lb-cpu'
)

@property
def _live_image(self) -> str:
"""Override in subclasses to specify the locked image."""
raise NotImplementedError("Subclasses must define _live_image")
python_version = getattr(self, "python_version", None) or DEFAULT_PYTHON_VERSION
return get_image_name(self._image_type, python_version)

@property
def imageName(self):
# Lock imageName to specific image
return self._live_image

@imageName.setter
def imageName(self, value):
# Prevent manual setting of imageName
pass


class LiveServerless(LiveServerlessMixin, ServerlessEndpoint):
"""GPU-only live serverless endpoint."""

@property
def _live_image(self) -> str:
return FLASH_GPU_IMAGE
_image_type: ClassVar[str] = "gpu"

@model_validator(mode="before")
@classmethod
def set_live_serverless_template(cls, data: dict):
"""Set default GPU image for Live Serverless."""
data["imageName"] = FLASH_GPU_IMAGE
python_version = data.get("python_version") or DEFAULT_PYTHON_VERSION
data["imageName"] = get_image_name("gpu", python_version)
return data


class CpuLiveServerless(LiveServerlessMixin, CpuServerlessEndpoint):
"""CPU-only live serverless endpoint with automatic disk sizing."""

@property
def _live_image(self) -> str:
return FLASH_CPU_IMAGE
_image_type: ClassVar[str] = "cpu"

@model_validator(mode="before")
@classmethod
def set_live_serverless_template(cls, data: dict):
"""Set default CPU image for Live Serverless."""
data["imageName"] = FLASH_CPU_IMAGE
python_version = data.get("python_version") or DEFAULT_PYTHON_VERSION
data["imageName"] = get_image_name("cpu", python_version)
return data


class LiveLoadBalancer(LiveServerlessMixin, LoadBalancerSlsResource):
"""Live load-balanced endpoint for local development and testing.

Similar to LiveServerless but for HTTP-based load-balanced endpoints.
Enables local testing of @remote decorated functions with LB endpoints
before deploying to production.

Features:
- Locks to Flash LB image (flash-lb)
- Direct HTTP execution (not queue-based)
- Local development with flash run
- Same @remote decorator pattern as LoadBalancerSlsResource

Usage:
from runpod_flash import LiveLoadBalancer, remote

api = LiveLoadBalancer(name="api-service")

@remote(api, method="POST", path="/api/process")
async def process_data(x: int, y: int):
return {"result": x + y}

# Test locally
result = await process_data(5, 3)
"""Live load-balanced endpoint."""

Local Development Flow:
1. Create LiveLoadBalancer with routing
2. Decorate functions with @remote(lb_resource, method=..., path=...)
3. Run with `flash run` to start local endpoint
4. Call functions directly in tests or scripts
5. Deploy to production with `flash build` and `flash deploy`

Note:
The endpoint_url is configured by the Flash runtime when the
endpoint is deployed locally. For true local testing without
deployment, use the functions directly or mock the HTTP layer.
"""

@property
def _live_image(self) -> str:
return FLASH_LB_IMAGE
_image_type: ClassVar[str] = "lb"

@model_validator(mode="before")
@classmethod
def set_live_lb_template(cls, data: dict):
"""Set default image for Live Load-Balanced endpoint."""
data["imageName"] = FLASH_LB_IMAGE
python_version = data.get("python_version") or DEFAULT_PYTHON_VERSION
data["imageName"] = get_image_name("lb", python_version)
return data


class CpuLiveLoadBalancer(LiveServerlessMixin, CpuLoadBalancerSlsResource):
"""CPU-only live load-balanced endpoint for local development and testing.

Similar to LiveLoadBalancer but configured for CPU instances with
automatic disk sizing and validation.

Features:
- Locks to CPU Flash LB image (flash-lb-cpu)
- CPU instance support with automatic disk sizing
- Direct HTTP execution (not queue-based)
- Local development with flash run
- Same @remote decorator pattern as CpuLoadBalancerSlsResource

Usage:
from runpod_flash import CpuLiveLoadBalancer, remote
"""CPU-only live load-balanced endpoint."""

api = CpuLiveLoadBalancer(name="api-service")

@remote(api, method="POST", path="/api/process")
async def process_data(x: int, y: int):
return {"result": x + y}

# Test locally
result = await process_data(5, 3)

Local Development Flow:
1. Create CpuLiveLoadBalancer with routing
2. Decorate functions with @remote(lb_resource, method=..., path=...)
3. Run with `flash run` to start local endpoint
4. Call functions directly in tests or scripts
5. Deploy to production with `flash build` and `flash deploy`
"""

@property
def _live_image(self) -> str:
return FLASH_CPU_LB_IMAGE
_image_type: ClassVar[str] = "lb-cpu"

@model_validator(mode="before")
@classmethod
def set_live_cpu_lb_template(cls, data: dict):
"""Set default CPU image for Live Load-Balanced endpoint."""
data["imageName"] = FLASH_CPU_LB_IMAGE
python_version = data.get("python_version") or DEFAULT_PYTHON_VERSION
data["imageName"] = get_image_name("lb-cpu", python_version)
return data
Loading
Loading