runpod · jhcipar · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Apr 2, 2026
diff --git a/docs/Deployment_Architecture.md b/docs/Deployment_Architecture.md
@@ -191,6 +191,12 @@ When `flash deploy` provisions endpoints:
 3. The State Manager stores `{environment_id, resource_name} -> endpoint_id`
 4. At runtime, the `ServiceRegistry` uses the manifest + State Manager to route calls
 
+### Manifest credential handling
+
+- Runtime endpoint metadata (including API-returned `aiKey`) may be stored in the State Manager manifest for deployment reconciliation.
+- Local `.flash/flash_manifest.json` is sanitized before it is written to disk and does not include `aiKey`.
+- `RUNPOD_API_KEY` is sourced from environment/credential storage and injected into endpoint env when needed; it is not persisted in the local manifest.
+
 See [Cross-Endpoint Routing](Cross_Endpoint_Routing.md) for the full runtime flow.
 
 ## Related Documentation

diff --git a/src/runpod_flash/cli/docs/flash-deploy.md b/src/runpod_flash/cli/docs/flash-deploy.md
@@ -138,9 +138,17 @@ The deploy command combines building and deploying your Flash application in a s
    - Registers endpoints in environment tracking
 
 4. **Post-Deployment**:
-   - Displays deployment URLs and available routes
-   - Shows authentication and testing guidance
-   - Cleans up temporary build directory
+    - Displays deployment URLs and available routes
+    - Shows authentication and testing guidance
+    - Cleans up temporary build directory
+
+## Manifest and Credential Handling
+
+During deploy, Flash updates manifest metadata with runtime endpoint details (for example `endpoint_id`, endpoint URLs, and `aiKey` when returned by the API).
+
+- The manifest stored in State Manager keeps runtime metadata used for reconciliation.
+- The local `.flash/flash_manifest.json` is sanitized before writing to disk and does not persist `aiKey`.
+- `RUNPOD_API_KEY` continues to be resolved from credentials/env at runtime and is not stored in the local manifest.
 
 ## Build Options
 

diff --git a/src/runpod_flash/cli/docs/flash-logging.md b/src/runpod_flash/cli/docs/flash-logging.md
@@ -28,6 +28,12 @@ Logs are written in the same format as console output, so you can grep through t
 - **Graceful degradation**: Continues with stdout-only if file logging fails
 - **Zero configuration**: Works out of the box with sensible defaults
 
+### QB request log polling during `Endpoint.run(...)`
+
+- For queue-based (QB) endpoints, Flash polls endpoint status/metrics while waiting and streams worker log lines to stdout when available.
+- Polling is used for async `run(...)` flows (not `runsync(...)`), and is skipped for non-QB endpoint types.
+- If endpoint `aiKey` is unavailable, Flash falls back to your configured `RUNPOD_API_KEY`; without a key, log streaming is skipped.
+
 ## Log Location
 
 By default, logs are written to:

diff --git a/src/runpod_flash/cli/utils/deployment.py b/src/runpod_flash/cli/utils/deployment.py
@@ -1,19 +1,54 @@
 """Deployment environment management utilities."""
 
 import asyncio
+import copy
 import json
 import logging
 from typing import Dict, Any
 from datetime import datetime
 from pathlib import Path
 
 from runpod_flash.config import get_paths
+from runpod_flash.core.resources.serverless import ServerlessResource
 from runpod_flash.core.resources.app import FlashApp
 from runpod_flash.core.resources.resource_manager import ResourceManager
 from runpod_flash.runtime.resource_provisioner import create_resource_from_manifest
 
 log = logging.getLogger(__name__)
 
+RUNTIME_RESOURCE_FIELDS = set(ServerlessResource.RUNTIME_FIELDS) | {
+    "id",
+    "endpoint_id",
+}
+
+
+def _normalized_resource_attr(resource: Any, *names: str) -> str | None:
+    for name in names:
+        value = getattr(resource, name, None)
+        if isinstance(value, str) and value.strip():
+            return value
+    return None
+
+
+def _manifest_without_ai_keys(manifest: Dict[str, Any]) -> Dict[str, Any]:
+    sanitized_manifest = copy.deepcopy(manifest)
+    resources = sanitized_manifest.get("resources")
+    if not isinstance(resources, dict):
+        return sanitized_manifest
+
+    for config in resources.values():
+        if isinstance(config, dict):
+            config.pop("aiKey", None)
+
+    return sanitized_manifest
+
+
+def _resource_config_for_compare(config: Dict[str, Any]) -> Dict[str, Any]:
+    compare_config = copy.deepcopy(config)
+    for field in RUNTIME_RESOURCE_FIELDS:
+        compare_config.pop(field, None)
+    return compare_config
+
 
 async def upload_build(app_name: str, build_path: str | Path):
     app = await FlashApp.from_name(app_name)
@@ -147,6 +182,14 @@ async def provision_resources_for_build(
 
         resources_endpoints[resource_name] = endpoint_url
 
+        endpoint_id = _normalized_resource_attr(deployed_resource, "endpoint_id", "id")
+        if endpoint_id:
+            manifest["resources"][resource_name]["endpoint_id"] = endpoint_id
+
+        ai_key = _normalized_resource_attr(deployed_resource, "aiKey", "ai_key")
+        if ai_key:
+            manifest["resources"][resource_name]["aiKey"] = ai_key
+
         # Track load balancer URL for prominent logging
         if manifest["resources"][resource_name].get("is_load_balanced"):
             lb_endpoint_url = endpoint_url
@@ -258,9 +301,15 @@ async def reconcile_and_provision_resources(
         local_config = local_manifest["resources"][resource_name]
         state_config = state_manifest.get("resources", {}).get(resource_name, {})
 
-        # Simple hash comparison for config changes
-        local_json = json.dumps(local_config, sort_keys=True)
-        state_json = json.dumps(state_config, sort_keys=True)
+        # Compare only user-managed config fields (exclude runtime metadata)
+        local_json = json.dumps(
+            _resource_config_for_compare(local_config),
+            sort_keys=True,
+        )
+        state_json = json.dumps(
+            _resource_config_for_compare(state_config),
+            sort_keys=True,
+        )
 
         # Check if endpoint exists in state manifest
         has_endpoint = resource_name in state_manifest.get("resources_endpoints", {})
@@ -282,6 +331,10 @@ async def reconcile_and_provision_resources(
                 local_manifest["resources"][resource_name]["endpoint_id"] = (
                     state_config["endpoint_id"]
                 )
+            if "aiKey" in state_config:
+                local_manifest["resources"][resource_name]["aiKey"] = state_config[
+                    "aiKey"
+                ]
             if resource_name in state_manifest.get("resources_endpoints", {}):
                 local_manifest.setdefault("resources_endpoints", {})[resource_name] = (
                     state_manifest["resources_endpoints"][resource_name]
@@ -315,13 +368,21 @@ async def reconcile_and_provision_resources(
             deployed_resource = provisioning_results[i]
 
             # Extract endpoint info
-            endpoint_id = getattr(deployed_resource, "endpoint_id", None)
+            endpoint_id = _normalized_resource_attr(
+                deployed_resource, "endpoint_id", "id"
+            )
             endpoint_url = getattr(deployed_resource, "endpoint_url", None)
-
+            if isinstance(endpoint_url, str):
+                endpoint_url = endpoint_url.strip() or None
+            else:
+                endpoint_url = None
+            ai_key = _normalized_resource_attr(deployed_resource, "aiKey", "ai_key")
             if endpoint_id:
                 local_manifest["resources"][resource_name]["endpoint_id"] = endpoint_id
             if endpoint_url:
                 local_manifest["resources_endpoints"][resource_name] = endpoint_url
+            if ai_key:
+                local_manifest["resources"][resource_name]["aiKey"] = ai_key
 
             log.debug(
                 f"{'Provisioned' if action_type == 'provision' else 'Updated'}: "
@@ -348,9 +409,11 @@ async def reconcile_and_provision_resources(
                 f"Successfully provisioned: {provisioned}"
             )
 
+    local_manifest_for_disk = _manifest_without_ai_keys(local_manifest)
+
     # Write updated manifest back to local file
     manifest_path = Path.cwd() / ".flash" / "flash_manifest.json"
-    manifest_path.write_text(json.dumps(local_manifest, indent=2))
+    manifest_path.write_text(json.dumps(local_manifest_for_disk, indent=2))
 
     log.debug(f"Local manifest updated at {manifest_path.relative_to(Path.cwd())}")
 

diff --git a/src/runpod_flash/core/api/runpod.py b/src/runpod_flash/core/api/runpod.py
@@ -396,6 +396,80 @@ async def get_gpu_types(
         result = await self._execute_graphql(query, variables)
         return result.get("gpuTypes", [])
 
+    async def get_gpu_lowest_price_stock_status(
+        self,
+        gpu_id: str,
+        gpu_count: int,
+        data_center_id: Optional[str] = None,
+    ) -> Optional[str]:
+        query = """
+        query ServerlessGpuTypes($lowestPriceInput: GpuLowestPriceInput, $gpuTypesInput: GpuTypeFilter) {
+          gpuTypes(input: $gpuTypesInput) {
+            lowestPrice(input: $lowestPriceInput) {
+              stockStatus
+            }
+          }
+        }
+        """
+
+        variables = {
+            "gpuTypesInput": {"ids": [gpu_id]},
+            "lowestPriceInput": {
+                "dataCenterId": data_center_id,
+                "gpuCount": gpu_count,
+                "secureCloud": True,
+                "includeAiApi": True,
+                "allowedCudaVersions": [],
+                "compliance": [],
+            },
+        }
+
+        result = await self._execute_graphql(query, variables)
+        gpu_types = result.get("gpuTypes") or []
+        first = gpu_types[0] if gpu_types else {}
+        lowest = first.get("lowestPrice") if isinstance(first, dict) else {}
+        if not isinstance(lowest, dict):
+            return None
+        status = lowest.get("stockStatus")
+        if isinstance(status, str) and status.strip():
+            return status.strip()
+        return None
+
+    async def get_cpu_specific_stock_status(
+        self,
+        cpu_flavor_id: str,
+        instance_id: str,
+        data_center_id: str,
+    ) -> Optional[str]:
+        query = """
+        query SecureCpuTypes($cpuFlavorInput: CpuFlavorInput, $specificsInput: SpecificsInput) {
+          cpuFlavors(input: $cpuFlavorInput) {
+            specifics(input: $specificsInput) {
+              stockStatus
+            }
+          }
+        }
+        """
+
+        variables = {
+            "cpuFlavorInput": {"id": cpu_flavor_id},
+            "specificsInput": {
+                "dataCenterId": data_center_id,
+                "instanceId": instance_id,
+            },
+        }
+
+        result = await self._execute_graphql(query, variables)
+        cpu_flavors = result.get("cpuFlavors") or []
+        first = cpu_flavors[0] if cpu_flavors else {}
+        specifics = first.get("specifics") if isinstance(first, dict) else {}
+        if not isinstance(specifics, dict):
+            return None
+        status = specifics.get("stockStatus")
+        if isinstance(status, str) and status.strip():
+            return status.strip()
+        return None
+
     async def get_endpoint(self, endpoint_id: str) -> Dict[str, Any]:
         """Get endpoint details."""
         # Note: The schema doesn't show a specific endpoint query