Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/Deployment_Architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ When `flash deploy` provisions endpoints:
3. The State Manager stores `{environment_id, resource_name} -> endpoint_id`
4. At runtime, the `ServiceRegistry` uses the manifest + State Manager to route calls

### Manifest credential handling

- Runtime endpoint metadata (including API-returned `aiKey`) may be stored in the State Manager manifest for deployment reconciliation.
- Local `.flash/flash_manifest.json` is sanitized before it is written to disk and does not include `aiKey`.
- `RUNPOD_API_KEY` is sourced from environment/credential storage and injected into endpoint env when needed; it is not persisted in the local manifest.

See [Cross-Endpoint Routing](Cross_Endpoint_Routing.md) for the full runtime flow.

## Related Documentation
Expand Down
14 changes: 11 additions & 3 deletions src/runpod_flash/cli/docs/flash-deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,17 @@ The deploy command combines building and deploying your Flash application in a s
- Registers endpoints in environment tracking

4. **Post-Deployment**:
- Displays deployment URLs and available routes
- Shows authentication and testing guidance
- Cleans up temporary build directory
- Displays deployment URLs and available routes
- Shows authentication and testing guidance
- Cleans up temporary build directory

## Manifest and Credential Handling

During deploy, Flash updates manifest metadata with runtime endpoint details (for example `endpoint_id`, endpoint URLs, and `aiKey` when returned by the API).

- The manifest stored in State Manager keeps runtime metadata used for reconciliation.
- The local `.flash/flash_manifest.json` is sanitized before writing to disk and does not persist `aiKey`.
- `RUNPOD_API_KEY` continues to be resolved from credentials/env at runtime and is not stored in the local manifest.

## Build Options

Expand Down
6 changes: 6 additions & 0 deletions src/runpod_flash/cli/docs/flash-logging.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ Logs are written in the same format as console output, so you can grep through t
- **Graceful degradation**: Continues with stdout-only if file logging fails
- **Zero configuration**: Works out of the box with sensible defaults

### QB request log polling during `Endpoint.run(...)`

- For queue-based (QB) endpoints, Flash polls endpoint status/metrics while waiting and streams worker log lines to stdout when available.
- Polling is used for async `run(...)` flows (not `runsync(...)`), and is skipped for non-QB endpoint types.
- If endpoint `aiKey` is unavailable, Flash falls back to your configured `RUNPOD_API_KEY`; without a key, log streaming is skipped.

## Log Location

By default, logs are written to:
Expand Down
75 changes: 69 additions & 6 deletions src/runpod_flash/cli/utils/deployment.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,54 @@
"""Deployment environment management utilities."""

import asyncio
import copy
import json
import logging
from typing import Dict, Any
from datetime import datetime
from pathlib import Path

from runpod_flash.config import get_paths
from runpod_flash.core.resources.serverless import ServerlessResource
from runpod_flash.core.resources.app import FlashApp
from runpod_flash.core.resources.resource_manager import ResourceManager
from runpod_flash.runtime.resource_provisioner import create_resource_from_manifest

log = logging.getLogger(__name__)

RUNTIME_RESOURCE_FIELDS = set(ServerlessResource.RUNTIME_FIELDS) | {
"id",
"endpoint_id",
}


def _normalized_resource_attr(resource: Any, *names: str) -> str | None:
for name in names:
value = getattr(resource, name, None)
if isinstance(value, str) and value.strip():
return value
return None


def _manifest_without_ai_keys(manifest: Dict[str, Any]) -> Dict[str, Any]:
sanitized_manifest = copy.deepcopy(manifest)
resources = sanitized_manifest.get("resources")
if not isinstance(resources, dict):
return sanitized_manifest

for config in resources.values():
if isinstance(config, dict):
config.pop("aiKey", None)

return sanitized_manifest


def _resource_config_for_compare(config: Dict[str, Any]) -> Dict[str, Any]:
compare_config = copy.deepcopy(config)
for field in RUNTIME_RESOURCE_FIELDS:
compare_config.pop(field, None)
return compare_config


async def upload_build(app_name: str, build_path: str | Path):
app = await FlashApp.from_name(app_name)
Expand Down Expand Up @@ -147,6 +182,14 @@ async def provision_resources_for_build(

resources_endpoints[resource_name] = endpoint_url

endpoint_id = _normalized_resource_attr(deployed_resource, "endpoint_id", "id")
if endpoint_id:
manifest["resources"][resource_name]["endpoint_id"] = endpoint_id

ai_key = _normalized_resource_attr(deployed_resource, "aiKey", "ai_key")
if ai_key:
manifest["resources"][resource_name]["aiKey"] = ai_key

# Track load balancer URL for prominent logging
if manifest["resources"][resource_name].get("is_load_balanced"):
lb_endpoint_url = endpoint_url
Expand Down Expand Up @@ -258,9 +301,15 @@ async def reconcile_and_provision_resources(
local_config = local_manifest["resources"][resource_name]
state_config = state_manifest.get("resources", {}).get(resource_name, {})

# Simple hash comparison for config changes
local_json = json.dumps(local_config, sort_keys=True)
state_json = json.dumps(state_config, sort_keys=True)
# Compare only user-managed config fields (exclude runtime metadata)
local_json = json.dumps(
_resource_config_for_compare(local_config),
sort_keys=True,
)
state_json = json.dumps(
_resource_config_for_compare(state_config),
sort_keys=True,
)

# Check if endpoint exists in state manifest
has_endpoint = resource_name in state_manifest.get("resources_endpoints", {})
Expand All @@ -282,6 +331,10 @@ async def reconcile_and_provision_resources(
local_manifest["resources"][resource_name]["endpoint_id"] = (
state_config["endpoint_id"]
)
if "aiKey" in state_config:
local_manifest["resources"][resource_name]["aiKey"] = state_config[
"aiKey"
]
if resource_name in state_manifest.get("resources_endpoints", {}):
local_manifest.setdefault("resources_endpoints", {})[resource_name] = (
state_manifest["resources_endpoints"][resource_name]
Expand Down Expand Up @@ -315,13 +368,21 @@ async def reconcile_and_provision_resources(
deployed_resource = provisioning_results[i]

# Extract endpoint info
endpoint_id = getattr(deployed_resource, "endpoint_id", None)
endpoint_id = _normalized_resource_attr(
deployed_resource, "endpoint_id", "id"
)
endpoint_url = getattr(deployed_resource, "endpoint_url", None)

if isinstance(endpoint_url, str):
endpoint_url = endpoint_url.strip() or None
else:
endpoint_url = None
ai_key = _normalized_resource_attr(deployed_resource, "aiKey", "ai_key")
if endpoint_id:
local_manifest["resources"][resource_name]["endpoint_id"] = endpoint_id
if endpoint_url:
local_manifest["resources_endpoints"][resource_name] = endpoint_url
if ai_key:
local_manifest["resources"][resource_name]["aiKey"] = ai_key

log.debug(
f"{'Provisioned' if action_type == 'provision' else 'Updated'}: "
Expand All @@ -348,9 +409,11 @@ async def reconcile_and_provision_resources(
f"Successfully provisioned: {provisioned}"
)

local_manifest_for_disk = _manifest_without_ai_keys(local_manifest)

# Write updated manifest back to local file
manifest_path = Path.cwd() / ".flash" / "flash_manifest.json"
manifest_path.write_text(json.dumps(local_manifest, indent=2))
manifest_path.write_text(json.dumps(local_manifest_for_disk, indent=2))

log.debug(f"Local manifest updated at {manifest_path.relative_to(Path.cwd())}")

Expand Down
74 changes: 74 additions & 0 deletions src/runpod_flash/core/api/runpod.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,80 @@ async def get_gpu_types(
result = await self._execute_graphql(query, variables)
return result.get("gpuTypes", [])

async def get_gpu_lowest_price_stock_status(
self,
gpu_id: str,
gpu_count: int,
data_center_id: Optional[str] = None,
) -> Optional[str]:
query = """
query ServerlessGpuTypes($lowestPriceInput: GpuLowestPriceInput, $gpuTypesInput: GpuTypeFilter) {
gpuTypes(input: $gpuTypesInput) {
lowestPrice(input: $lowestPriceInput) {
stockStatus
}
}
}
"""

variables = {
"gpuTypesInput": {"ids": [gpu_id]},
"lowestPriceInput": {
"dataCenterId": data_center_id,
"gpuCount": gpu_count,
"secureCloud": True,
"includeAiApi": True,
"allowedCudaVersions": [],
"compliance": [],
},
}

result = await self._execute_graphql(query, variables)
gpu_types = result.get("gpuTypes") or []
first = gpu_types[0] if gpu_types else {}
lowest = first.get("lowestPrice") if isinstance(first, dict) else {}
if not isinstance(lowest, dict):
return None
status = lowest.get("stockStatus")
if isinstance(status, str) and status.strip():
return status.strip()
return None

async def get_cpu_specific_stock_status(
self,
cpu_flavor_id: str,
instance_id: str,
data_center_id: str,
) -> Optional[str]:
query = """
query SecureCpuTypes($cpuFlavorInput: CpuFlavorInput, $specificsInput: SpecificsInput) {
cpuFlavors(input: $cpuFlavorInput) {
specifics(input: $specificsInput) {
stockStatus
}
}
}
"""

variables = {
"cpuFlavorInput": {"id": cpu_flavor_id},
"specificsInput": {
"dataCenterId": data_center_id,
"instanceId": instance_id,
},
}

result = await self._execute_graphql(query, variables)
cpu_flavors = result.get("cpuFlavors") or []
first = cpu_flavors[0] if cpu_flavors else {}
specifics = first.get("specifics") if isinstance(first, dict) else {}
if not isinstance(specifics, dict):
return None
status = specifics.get("stockStatus")
if isinstance(status, str) and status.strip():
return status.strip()
return None

async def get_endpoint(self, endpoint_id: str) -> Dict[str, Any]:
"""Get endpoint details."""
# Note: The schema doesn't show a specific endpoint query
Expand Down
Loading
Loading