runpod · DifferentialityDevelopment · Apr 10, 2025 · Apr 10, 2025 · Apr 9, 2026 · Copilot
diff --git a/runpod/serverless/modules/rp_fastapi.py b/runpod/serverless/modules/rp_fastapi.py
@@ -271,14 +271,29 @@ def start_uvicorn(self, api_host="localhost", api_port=8000, api_concurrency=1):
         """
         Starts the Uvicorn server.
         """
-        uvicorn.run(
-            self.rp_app,
-            host=api_host,
-            port=int(api_port),
-            workers=int(api_concurrency),
-            log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-            access_log=False,
-        )
+        if api_concurrency > 1:
+            # For multiple workers, we need to use the module:app format
+            import uvicorn.workers
+            uvicorn.run(
+                "runpod.serverless.modules.rp_fastapi:app",
+                host=api_host,
+                port=int(api_port),
+                workers=int(api_concurrency),
+                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
+                access_log=False,
+                factory=True
+            )
+        else:
+            # For single worker, we can use the app instance directly
+            import uvicorn.workers
+            uvicorn.run(
+                self.rp_app,
+                host=api_host,
+                port=int(api_port),
+                workers=1,
+                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
+                access_log=False
+            )
 
-        if api_concurrency > 1:
-            # For multiple workers, we need to use the module:app format
-            import uvicorn.workers
-            uvicorn.run(
-                "runpod.serverless.modules.rp_fastapi:app",
-                host=api_host,
-                port=int(api_port),
-                workers=int(api_concurrency),
-                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-                access_log=False,
-                factory=True
-            )
-        else:
-            # For single worker, we can use the app instance directly
-            import uvicorn.workers
-            uvicorn.run(
-                self.rp_app,
-                host=api_host,
-                port=int(api_port),
-                workers=1,
-                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-                access_log=False
-            )
+        import uvicorn.workers
+
+        # `self.rp_app` is the ASGI app instance created for this WorkerAPI.
+        # Running Uvicorn with an import string and `factory=True` requires a
+        # module-level callable that is not defined in this module, so always
+        # launch the existing app instance directly.
+        uvicorn.run(
+            self.rp_app,
+            host=api_host,
+            port=int(api_port),
+            workers=1,
+            log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
+            access_log=False
+        )
-        if api_concurrency > 1:
-            # For multiple workers, we need to use the module:app format
-            import uvicorn.workers
-            uvicorn.run(
-                "runpod.serverless.modules.rp_fastapi:app",
-                host=api_host,
-                port=int(api_port),
-                workers=int(api_concurrency),
-                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-                access_log=False,
-                factory=True
-            )
-        else:
-            # For single worker, we can use the app instance directly
-            import uvicorn.workers
-            uvicorn.run(
-                self.rp_app,
-                host=api_host,
-                port=int(api_port),
-                workers=1,
-                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-                access_log=False
-            )
+        if int(api_concurrency) > 1:
+            raise ValueError(
+                "api_concurrency > 1 is not supported because job tracking state is "
+                "not synchronized across multiple Uvicorn worker processes."
+            )
+
+        # For a single worker, we can use the app instance directly.
+        import uvicorn.workers
+        uvicorn.run(
+            self.rp_app,
+            host=api_host,
+            port=int(api_port),
+            workers=1,
+            log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
+            access_log=False
+        )
-        if api_concurrency > 1:
-            # For multiple workers, we need to use the module:app format
-            import uvicorn.workers
-            uvicorn.run(
-                "runpod.serverless.modules.rp_fastapi:app",
-                host=api_host,
-                port=int(api_port),
-                workers=int(api_concurrency),
-                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-                access_log=False,
-                factory=True
-            )
-        else:
-            # For single worker, we can use the app instance directly
-            import uvicorn.workers
-            uvicorn.run(
-                self.rp_app,
-                host=api_host,
-                port=int(api_port),
-                workers=1,
-                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-                access_log=False
-            )
+        import uvicorn.workers
+
+        # `self.rp_app` is the ASGI app instance created for this WorkerAPI.
+        # Running Uvicorn with an import string and `factory=True` requires a
+        # module-level callable that is not defined in this module, so always
+        # launch the existing app instance directly.
+        uvicorn.run(
+            self.rp_app,
+            host=api_host,
+            port=int(api_port),
+            workers=1,
+            log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
+            access_log=False
+        )
-        if api_concurrency > 1:
-            # For multiple workers, we need to use the module:app format
-            import uvicorn.workers
-            uvicorn.run(
-                "runpod.serverless.modules.rp_fastapi:app",
-                host=api_host,
-                port=int(api_port),
-                workers=int(api_concurrency),
-                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-                access_log=False,
-                factory=True
-            )
-        else:
-            # For single worker, we can use the app instance directly
-            import uvicorn.workers
-            uvicorn.run(
-                self.rp_app,
-                host=api_host,
-                port=int(api_port),
-                workers=1,
-                log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
-                access_log=False
-            )
+        if int(api_concurrency) > 1:
+            raise ValueError(
+                "api_concurrency > 1 is not supported because job tracking state is "
+                "not synchronized across multiple Uvicorn worker processes."
+            )
+
+        # For a single worker, we can use the app instance directly.
+        import uvicorn.workers
+        uvicorn.run(
+            self.rp_app,
+            host=api_host,
+            port=int(api_port),
+            workers=1,
+            log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"),
+            access_log=False
+        )
     # ----------------------------- Realtime Endpoint ---------------------------- #
     async def _realtime(self, job: Job):