Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/Deployment_Architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ When `flash deploy` provisions endpoints:
3. The State Manager stores `{environment_id, resource_name} -> endpoint_id`
4. At runtime, the `ServiceRegistry` uses the manifest + State Manager to route calls

### Manifest credential handling

- Runtime endpoint metadata (including API-returned `aiKey`) may be stored in the State Manager manifest for deployment reconciliation.
- Local `.flash/flash_manifest.json` is sanitized before it is written to disk and does not include `aiKey`.
- `RUNPOD_API_KEY` is sourced from environment/credential storage and injected into endpoint env when needed; it is not persisted in the local manifest.

See [Cross-Endpoint Routing](Cross_Endpoint_Routing.md) for the full runtime flow.

## Related Documentation
Expand Down
14 changes: 11 additions & 3 deletions src/runpod_flash/cli/docs/flash-deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,17 @@ The deploy command combines building and deploying your Flash application in a s
- Registers endpoints in environment tracking

4. **Post-Deployment**:
- Displays deployment URLs and available routes
- Shows authentication and testing guidance
- Cleans up temporary build directory
- Displays deployment URLs and available routes
- Shows authentication and testing guidance
- Cleans up temporary build directory

## Manifest and Credential Handling

During deploy, Flash updates manifest metadata with runtime endpoint details (for example `endpoint_id`, endpoint URLs, and `aiKey` when returned by the API).

- The manifest stored in State Manager keeps runtime metadata used for reconciliation.
- The local `.flash/flash_manifest.json` is sanitized before writing to disk and does not persist `aiKey`.
- `RUNPOD_API_KEY` continues to be resolved from credentials/env at runtime and is not stored in the local manifest.

## Build Options

Expand Down
6 changes: 6 additions & 0 deletions src/runpod_flash/cli/docs/flash-logging.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ Logs are written in the same format as console output, so you can grep through t
- **Graceful degradation**: Continues with stdout-only if file logging fails
- **Zero configuration**: Works out of the box with sensible defaults

### QB request log polling during `Endpoint.run(...)`

- For queue-based (QB) endpoints, Flash polls endpoint status/metrics while waiting and streams worker log lines to stdout when available.
- Polling is used for async `run(...)` flows (not `runsync(...)`), and is skipped for non-QB endpoint types.
- If endpoint `aiKey` is unavailable, Flash falls back to your configured `RUNPOD_API_KEY`; without a key, log streaming is skipped.

## Log Location

By default, logs are written to:
Expand Down
50 changes: 47 additions & 3 deletions src/runpod_flash/cli/utils/deployment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Deployment environment management utilities."""

import asyncio
import copy
import json
import logging
from typing import Dict, Any
Expand All @@ -15,6 +16,27 @@
log = logging.getLogger(__name__)


def _normalized_resource_attr(resource: Any, *names: str) -> str | None:
for name in names:
value = getattr(resource, name, None)
if isinstance(value, str) and value.strip():
return value
return None


def _manifest_without_ai_keys(manifest: Dict[str, Any]) -> Dict[str, Any]:
sanitized_manifest = copy.deepcopy(manifest)
resources = sanitized_manifest.get("resources")
if not isinstance(resources, dict):
return sanitized_manifest

for config in resources.values():
if isinstance(config, dict):
config.pop("aiKey", None)

return sanitized_manifest


async def upload_build(app_name: str, build_path: str | Path):
app = await FlashApp.from_name(app_name)
await app.upload_build(build_path)
Expand Down Expand Up @@ -147,6 +169,14 @@ async def provision_resources_for_build(

resources_endpoints[resource_name] = endpoint_url

endpoint_id = _normalized_resource_attr(deployed_resource, "endpoint_id", "id")
if endpoint_id:
manifest["resources"][resource_name]["endpoint_id"] = endpoint_id

ai_key = _normalized_resource_attr(deployed_resource, "aiKey", "ai_key")
if ai_key:
manifest["resources"][resource_name]["aiKey"] = ai_key

# Track load balancer URL for prominent logging
if manifest["resources"][resource_name].get("is_load_balanced"):
lb_endpoint_url = endpoint_url
Expand Down Expand Up @@ -282,6 +312,10 @@ async def reconcile_and_provision_resources(
local_manifest["resources"][resource_name]["endpoint_id"] = (
state_config["endpoint_id"]
)
if "aiKey" in state_config:
local_manifest["resources"][resource_name]["aiKey"] = state_config[
"aiKey"
]
if resource_name in state_manifest.get("resources_endpoints", {}):
local_manifest.setdefault("resources_endpoints", {})[resource_name] = (
state_manifest["resources_endpoints"][resource_name]
Expand Down Expand Up @@ -315,13 +349,21 @@ async def reconcile_and_provision_resources(
deployed_resource = provisioning_results[i]

# Extract endpoint info
endpoint_id = getattr(deployed_resource, "endpoint_id", None)
endpoint_id = _normalized_resource_attr(
deployed_resource, "endpoint_id", "id"
)
endpoint_url = getattr(deployed_resource, "endpoint_url", None)

if isinstance(endpoint_url, str):
endpoint_url = endpoint_url.strip() or None
else:
endpoint_url = None
ai_key = _normalized_resource_attr(deployed_resource, "aiKey", "ai_key")
if endpoint_id:
local_manifest["resources"][resource_name]["endpoint_id"] = endpoint_id
if endpoint_url:
local_manifest["resources_endpoints"][resource_name] = endpoint_url
if ai_key:
local_manifest["resources"][resource_name]["aiKey"] = ai_key

log.debug(
f"{'Provisioned' if action_type == 'provision' else 'Updated'}: "
Expand All @@ -348,9 +390,11 @@ async def reconcile_and_provision_resources(
f"Successfully provisioned: {provisioned}"
)

local_manifest_for_disk = _manifest_without_ai_keys(local_manifest)

# Write updated manifest back to local file
manifest_path = Path.cwd() / ".flash" / "flash_manifest.json"
manifest_path.write_text(json.dumps(local_manifest, indent=2))
manifest_path.write_text(json.dumps(local_manifest_for_disk, indent=2))

log.debug(f"Local manifest updated at {manifest_path.relative_to(Path.cwd())}")

Expand Down
74 changes: 74 additions & 0 deletions src/runpod_flash/core/api/runpod.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,80 @@ async def get_gpu_types(
result = await self._execute_graphql(query, variables)
return result.get("gpuTypes", [])

async def get_gpu_lowest_price_stock_status(
self,
gpu_id: str,
gpu_count: int,
data_center_id: Optional[str] = None,
) -> Optional[str]:
query = """
query ServerlessGpuTypes($lowestPriceInput: GpuLowestPriceInput, $gpuTypesInput: GpuTypeFilter) {
gpuTypes(input: $gpuTypesInput) {
lowestPrice(input: $lowestPriceInput) {
stockStatus
}
}
}
"""

variables = {
"gpuTypesInput": {"ids": [gpu_id]},
"lowestPriceInput": {
"dataCenterId": data_center_id,
"gpuCount": gpu_count,
"secureCloud": True,
"includeAiApi": True,
"allowedCudaVersions": [],
"compliance": [],
},
}

result = await self._execute_graphql(query, variables)
gpu_types = result.get("gpuTypes") or []
first = gpu_types[0] if gpu_types else {}
lowest = first.get("lowestPrice") if isinstance(first, dict) else {}
if not isinstance(lowest, dict):
return None
status = lowest.get("stockStatus")
if isinstance(status, str) and status.strip():
return status.strip()
return None

async def get_cpu_specific_stock_status(
self,
cpu_flavor_id: str,
instance_id: str,
data_center_id: str,
) -> Optional[str]:
query = """
query SecureCpuTypes($cpuFlavorInput: CpuFlavorInput, $specificsInput: SpecificsInput) {
cpuFlavors(input: $cpuFlavorInput) {
specifics(input: $specificsInput) {
stockStatus
}
}
}
"""

variables = {
"cpuFlavorInput": {"id": cpu_flavor_id},
"specificsInput": {
"dataCenterId": data_center_id,
"instanceId": instance_id,
},
}

result = await self._execute_graphql(query, variables)
cpu_flavors = result.get("cpuFlavors") or []
first = cpu_flavors[0] if cpu_flavors else {}
specifics = first.get("specifics") if isinstance(first, dict) else {}
if not isinstance(specifics, dict):
return None
status = specifics.get("stockStatus")
if isinstance(status, str) and status.strip():
return status.strip()
return None

async def get_endpoint(self, endpoint_id: str) -> Dict[str, Any]:
"""Get endpoint details."""
# Note: The schema doesn't show a specific endpoint query
Expand Down
Loading