Skip to content
1 change: 1 addition & 0 deletions exegol/console/cli/actions/ExegolParameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self) -> None:
"Get a [blue]tmux[/blue] shell": "exegol start --shell [blue]tmux[/blue]",
"Share a specific [blue]hardware device[/blue] [bright_black](e.g. Proxmark)[/bright_black]": "exegol start -d [bright_magenta]/dev/ttyACM0[/bright_magenta]",
"Share every [blue]USB device[/blue] connected to the host": "exegol start -d [magenta]/dev/bus/usb/[/magenta]",
"Enable [blue]NVIDIA GPU[/blue] passthrough": "exegol start [blue]gpu[/blue] [bright_blue]free[/bright_blue] [magenta]--nvidia-gpu[/magenta]",
}

def __call__(self, *args, **kwargs):
Expand Down
8 changes: 7 additions & 1 deletion exegol/console/cli/actions/GenericParameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,12 @@ def __init__(self, groupArgs: List[GroupArg]):
dest="devices",
default=[],
action="append",
help="Add host [default not bold]device(s)[/default not bold] at the container creation (example: -d /dev/ttyACM0 -d /dev/bus/usb/)")
help="Add host [default not bold]device(s)[/default not bold] at the container creation (example: -d /dev/ttyACM0 -d /dev/bus/usb/ -d nvidia.com/gpu=all)")
self.gpu = Option("--nvidia-gpu",
dest="gpu",
action="store_true",
default=False,
help="Enable NVIDIA GPU passthrough using Docker CDI on Linux hosts (equivalent to: -d nvidia.com/gpu=all)")
Comment thread
Macbucheron1 marked this conversation as resolved.
Outdated

self.hosts_file = Option("--hosts-file",
dest="hosts_file",
Expand All @@ -263,6 +268,7 @@ def __init__(self, groupArgs: List[GroupArg]):
{"arg": self.hostname, "required": False},
{"arg": self.privileged, "required": False},
{"arg": self.devices, "required": False},
{"arg": self.gpu, "required": False},
{"arg": self.X11, "required": False},
{"arg": self.my_resources, "required": False},
{"arg": self.exegol_resources, "required": False},
Expand Down
47 changes: 45 additions & 2 deletions exegol/model/ContainerConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def __init__(self, container: Optional[Container] = None, container_name: Option
self.__wrapper_start_enabled: bool = False
self.__mounts: List[Mount] = []
self.__devices: List[str] = []
self.__device_requests: List[Dict[str, Union[str, int, List[str]]]] = []
self.__capabilities: List[str] = []
self.__sysctls: Dict[str, str] = {}
self.__envs: Dict[str, str] = {}
Expand Down Expand Up @@ -190,6 +191,17 @@ def __parseContainerConfig(self, container: Container) -> None:
self.__devices.append(
f"{device.get('PathOnHost', '?')}:{device.get('PathInContainer', '?')}:{device.get('CgroupPermissions', '?')}")
logger.debug(f"└── Load devices : {self.__devices}")
device_requests = host_config.get("DeviceRequests", [])
if device_requests is not None:
for request in device_requests:
if request is None:
continue
driver = request.get("Driver")
device_ids = request.get("DeviceIDs")
if driver == "cdi" and isinstance(device_ids, list):
Comment thread
Macbucheron1 marked this conversation as resolved.
for device_id in device_ids:
if isinstance(device_id, str):
self.__addCdiDevice(device_id)
extra_hosts = host_config.get("ExtraHosts", [])
for entry in extra_hosts:
hostname, ip = entry.rsplit(":", 1)
Expand Down Expand Up @@ -344,6 +356,11 @@ async def configFromUser(self) -> "ContainerConfig":
if ParametersManager().volumes is not None:
for volume in ParametersManager().volumes:
await self.addRawVolume(volume)
if ParametersManager().gpu:
if EnvInfo.isMacHost() or EnvInfo.isWindowsHost():
logger.critical("The --nvidia-gpu option is currently supported only on Linux hosts.")
Comment thread
Macbucheron1 marked this conversation as resolved.
Outdated
if "nvidia.com/gpu=all" not in ParametersManager().devices:
self.addUserDevice("nvidia.com/gpu=all")
if ParametersManager().devices is not None:
for device in ParametersManager().devices:
self.addUserDevice(device)
Expand Down Expand Up @@ -1330,6 +1347,10 @@ def getDevices(self) -> List[str]:
"""Devices config getter"""
return self.__devices

def getDeviceRequests(self) -> List[Dict[str, Union[str, int, List[str]]]]:
"""Device requests config getter (used for CDI selectors)."""
return self.__device_requests

def addEnv(self, key: str, value: str) -> None:
"""Add or update an environment variable to the container configuration"""
self.__envs[key] = value
Expand Down Expand Up @@ -1566,8 +1587,20 @@ def addUserDevice(self, user_device_config: str) -> None:
logger.warning("Orbstack does not support (yet) USB device passthrough.")
logger.verbose("Official doc: https://docs.orbstack.dev/machines/#usb-devices")
logger.critical("Device configuration cannot be applied, aborting operation.")
if self.__isCdiDevice(user_device_config):
self.__addCdiDevice(user_device_config)
return
self.__addDevice(user_device_config)

def __addCdiDevice(self, device_selector: str) -> None:
"""Add a CDI selector as a Docker device request."""
self.__device_requests.append({"Driver": "cdi", "Count": 0, "DeviceIDs": [device_selector]})
Comment thread
Macbucheron1 marked this conversation as resolved.
Outdated

@staticmethod
def __isCdiDevice(device: str) -> bool:
"""Return True when user input looks like a CDI selector."""
return re.match(r"^[^/:]+/[^:=]+=[^:]+$", device) is not None

async def addRawPort(self, user_test_port: str) -> None:
"""Add port config or range of ports from user input.
Format must be [<host_ipv4>:]<host_port>[-<end_host_port>][:<container_port>[-<end_container_port>]][:<protocol>]
Expand Down Expand Up @@ -1711,11 +1744,21 @@ def getTextMounts(self, verbose: bool = False) -> str:
def getTextDevices(self, verbose: bool = False) -> str:
"""Text formatter for Devices configuration. The verbose mode show full device configuration."""
result = ''
for device in self.__devices:
text_devices = list(self.__devices)
for request in self.__device_requests:
driver = request.get("Driver")
device_ids = request.get("DeviceIDs")
if driver == "cdi" and isinstance(device_ids, list):
text_devices.extend([device for device in device_ids if isinstance(device, str)])
for device in text_devices:
if verbose:
result += f"{device}{os.linesep}"
else:
src, dest = device.split(':')[:2]
split_device = device.split(':')
if len(split_device) < 2:
result += f"{device}{os.linesep}"
continue
src, dest = split_device[:2]
if src == dest:
result += f"{src}{os.linesep}"
else:
Expand Down
13 changes: 12 additions & 1 deletion exegol/model/ExegolContainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,18 @@ async def __start_container(self) -> None:
self.__container.start()
except APIError as e:
logger.debug(e)
logger.critical(f"Docker raised a critical error when starting the container [green]{self.name}[/green], error message is: {e.explanation}")
explanation = e.explanation
if explanation is None:
explanation = ""
Comment thread
Macbucheron1 marked this conversation as resolved.
Outdated
elif isinstance(explanation, bytes):
explanation = explanation.decode("utf-8", errors="ignore")
message = str(explanation)
Comment thread
Macbucheron1 marked this conversation as resolved.
Outdated
lower_message = message.lower()
message = message.replace('[', '\\[')
logger.error(f"Docker raised a critical error when starting the container [green]{self.name}[/green], error message is: {message}")
if "cdi device injection failed" in lower_message and "nvidia.com/gpu=all" in lower_message:
logger.warning("Hint: verify NVIDIA CDI is configured (e.g. nvidia-container-toolkit installed and Docker CDI enabled).")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we check with docker info / from the docker daemon SDK if the nvidia toolkit is enabled ?

PS: can we link the user to the nvidia doc on how-to install the nvidia toolkit for users who don't know this ?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can only check whether Docker currently sees NVIDIA CDI devices or not. That does not strictly tell us whether the NVIDIA toolkit is enabled, since the CDI spec may simply not be generated or discovered yet. Docker exposes CDI support and discovered devices in docker info, so this is more a runtime visibility check than a toolkit check.

If we want to handle NVIDIA separately, we could also check for the presence of nvidia-ctk, since that is the tool NVIDIA provides to configure the toolkit and generate CDI specs.

$ docker info
Client:
 Version:    29.2.1
 Context:    default
...
 CDI spec directories:
  /etc/cdi
  /var/run/cdi
 Discovered Devices:
  cdi: nvidia.com/gpu=0
  cdi: nvidia.com/gpu=all
 ...

And using the SDK:

$ python3
Python 3.13.12 (main, Feb  3 2026, 17:53:27) [GCC 15.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import docker
>>> info = docker.from_env().info()
>>> print(info.get("DiscoveredDevices",[]))
[{'Source': 'cdi', 'ID': 'nvidia.com/gpu=0'}, {'Source': 'cdi', 'ID': 'nvidia.com/gpu=all'}]

added the link to nvidia doc in 24cc238. Also removed about Docker CDI enabled since it is enable by default since v27

logger.critical("Error while starting exegol container. Exiting.")
if not self.config.legacy_entrypoint: # TODO improve startup compatibility check
try:
# Try to find log / startup messages. Will time out after 2 seconds if the image don't support status update through container logs.
Expand Down
1 change: 1 addition & 0 deletions exegol/utils/DockerUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def createContainer(self, model: ExegolContainerTemplate, temporary: bool = Fals
"hostname": model.config.hostname,
"extra_hosts": model.config.getExtraHost(),
"devices": model.config.getDevices(),
"device_requests": model.config.getDeviceRequests(),
"environment": model.config.getEnvs(),
"labels": model.config.getLabels(),
"ports": model.config.getPorts(),
Expand Down