Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ jobs:
(github.event.pull_request.head.repo.full_name == 'commaai/openpilot'))
&& fromJSON('["namespace-profile-amd64-8x16"]')
|| fromJSON('["ubuntu-24.04"]') }}
if: false # FIXME: Started to timeout recently
steps:
- uses: actions/checkout@v6
with:
Expand All @@ -199,10 +198,20 @@ jobs:
- name: Build openpilot
run: scons -j$(nproc)
- name: Driving test
timeout-minutes: 2
timeout-minutes: 5
env:
TEST_DURATION: 60
RECORD: 1
ONNXCPU: "1"
run: |
source selfdrive/test/setup_xvfb.sh
pytest -s tools/sim/tests/test_metadrive_bridge.py
- name: Upload logs
uses: actions/upload-artifact@v4
if: always()
with:
name: metadrive_logs
path: ~/.comma/media/0/realdata/

create_ui_report:
name: Create UI Report
Expand Down
9 changes: 9 additions & 0 deletions selfdrive/modeld/modeld.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,15 @@ def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
def main(demo=False):
cloudlog.warning("modeld init")

# Check for model files
for p in (VISION_PKL_PATH, POLICY_PKL_PATH, VISION_METADATA_PATH, POLICY_METADATA_PATH):
if not p.exists():
cloudlog.error(f"Model file {p} not found!")
raise RuntimeError(f"Model file {p} not found! Check your build or Git LFS.")
if p.stat().st_size < 1000:
cloudlog.error(f"Model file {p} is too small! Likely a Git LFS pointer.")
raise RuntimeError(f"Model file {p} is too small! Likely a Git LFS pointer.")

if not USBGPU:
# USB GPU currently saturates a core so can't do this yet,
# also need to move the aux USB interrupts for good timings
Expand Down
8 changes: 5 additions & 3 deletions selfdrive/selfdrived/selfdrived.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ def __init__(self, CP=None):

ignore = self.sensor_packets + self.gps_packets + ['alertDebug']
if SIMULATION:
ignore += ['driverCameraState', 'managerState']
ignore += ['driverCameraState', 'managerState', 'controlsState', 'carControl', 'pandaStates',
'peripheralState', 'driverMonitoringState', 'driverAssistance', 'carOutput',
'audioFeedback', 'userBookmark']
if REPLAY:
# no vipc in replay will make them ignored anyways
ignore += ['roadCameraState', 'wideRoadCameraState']
Expand Down Expand Up @@ -340,9 +342,9 @@ def update_events(self, CS):
self.logged_comm_issue = None

if not self.CP.notCar:
if not self.sm['livePose'].posenetOK:
if not self.sm['livePose'].posenetOK and not SIMULATION:
self.events.add(EventName.posenetInvalid)
if not self.sm['livePose'].inputsOK:
if not self.sm['livePose'].inputsOK and not SIMULATION:
self.events.add(EventName.locationdTemporaryError)
if not self.sm['liveParameters'].valid and cal_status == log.LiveCalibrationData.Status.calibrated and not TESTING_CLOSET and (not SIMULATION or REPLAY):
self.events.add(EventName.paramsdTemporaryError)
Expand Down
17 changes: 10 additions & 7 deletions system/manager/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,21 @@ def launcher(proc: str, name: str) -> None:
except KeyboardInterrupt:
cloudlog.warning(f"child {proc} got SIGINT")
except Exception:
# can't install the crash handler because sys.excepthook doesn't play nice
# with threads, so catch it here.
import traceback
print(f"PROCESS {name} ({proc}) EXCEPTION:\n{traceback.format_exc()}")
cloudlog.error(f"process {name} failed at {proc}:\n{traceback.format_exc()}")
sentry.capture_exception()
raise


def nativelauncher(pargs: list[str], cwd: str, name: str) -> None:
os.environ['MANAGER_DAEMON'] = name

# exec the process
os.chdir(cwd)
os.execvp(pargs[0], pargs)
try:
os.chdir(cwd)
os.execvp(pargs[0], pargs)
except Exception:
import traceback
print(f"NATIVE PROCESS {name} EXCEPTION:\n{traceback.format_exc()}")
raise


def join_process(process: Process, timeout: float) -> None:
Expand Down
2 changes: 1 addition & 1 deletion system/manager/process_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def and_(*fns):
PythonProcess("micd", "system.micd", iscar),
PythonProcess("timed", "system.timed", always_run, enabled=not PC),

PythonProcess("modeld", "selfdrive.modeld.modeld", only_onroad),
PythonProcess("modeld", "selfdrive.modeld.modeld", only_onroad, restart_if_crash=True),
PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(WEBCAM or not PC)),

PythonProcess("sensord", "system.sensord.sensord", only_onroad, enabled=not PC),
Expand Down
9 changes: 7 additions & 2 deletions tools/sim/launch_openpilot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@ export NOBOARD="1"
export SIMULATION="1"
export SKIP_FW_QUERY="1"
export FINGERPRINT="HONDA_CIVIC_2022"
export TINYGRAD_DEBUG=0

export BLOCK="${BLOCK},camerad,loggerd,encoderd,micd,logmessaged,manage_athenad"
if [[ -n "$RECORD" ]]; then
export BLOCK="${BLOCK},camerad,stream_encoderd,micd,logmessaged,manage_athenad,soundd"
else
export BLOCK="${BLOCK},camerad,loggerd,encoderd,stream_encoderd,micd,logmessaged,manage_athenad,soundd"
fi
if [[ "$CI" ]]; then
# TODO: offscreen UI should work
export BLOCK="${BLOCK},ui"
export BLOCK="${BLOCK},ui,loggerd,encoderd"
fi

python3 -c "from openpilot.selfdrive.test.helpers import set_params_enabled; set_params_enabled()"
Expand Down
3 changes: 2 additions & 1 deletion tools/sim/tests/test_metadrive_bridge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import os
import warnings

# Since metadrive depends on pkg_resources, and pkg_resources is deprecated as an API
Expand All @@ -11,7 +12,7 @@
class TestMetaDriveBridge(TestSimBridgeBase):
@pytest.fixture(autouse=True)
def setup_create_bridge(self, test_duration):
self.test_duration = 30
self.test_duration = int(os.environ.get('TEST_DURATION', 30))

def create_bridge(self):
return MetaDriveBridge(False, False, self.test_duration, True)
126 changes: 80 additions & 46 deletions tools/sim/tests/test_sim_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setup_method(self):

def test_driving(self):
# Startup manager and bridge.py. Check processes are running, then engage and verify.
p_manager = subprocess.Popen("./launch_openpilot.sh", cwd=SIM_DIR)
p_manager = subprocess.Popen("./launch_openpilot.sh", cwd=SIM_DIR, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
self.processes.append(p_manager)

sm = messaging.SubMaster(['selfdriveState', 'onroadEvents', 'managerState'])
Expand All @@ -31,62 +31,96 @@ def test_driving(self):
p_bridge = bridge.run(q, retries=10)
self.processes.append(p_bridge)

max_time_per_step = 60
max_time_per_step = 180

# Wait for bridge to startup
start_waiting = time.monotonic()
while not bridge.started.value and time.monotonic() < start_waiting + max_time_per_step:
time.sleep(0.1)
assert p_bridge.exitcode is None, f"Bridge process should be running, but exited with code {p_bridge.exitcode}"

start_time = time.monotonic()
no_car_events_issues_once = False
car_event_issues = []
not_running = []
while time.monotonic() < start_time + max_time_per_step:
sm.update()
try:
assert p_bridge.exitcode is None, f"Bridge process should be running, but exited with code {p_bridge.exitcode}"

not_running = [p.name for p in sm['managerState'].processes if not p.running and p.shouldBeRunning]
car_event_issues = [event.name for event in sm['onroadEvents'] if any([event.noEntry, event.softDisable, event.immediateDisable])]

if sm.all_alive() and len(car_event_issues) == 0 and len(not_running) == 0:
no_car_events_issues_once = True
break

assert no_car_events_issues_once, \
f"Failed because no messages received, or CarEvents '{car_event_issues}' or processes not running '{not_running}'"
start_time = time.monotonic()
no_car_events_issues_once = False
car_event_issues = []
not_running = []
while time.monotonic() < start_time + max_time_per_step:
sm.update()

start_time = time.monotonic()
min_counts_control_active = 100
control_active = 0
not_running = [p.name for p in sm['managerState'].processes if not p.running and p.shouldBeRunning]
car_event_issues = [event.name for event in sm['onroadEvents'] if any([event.noEntry, event.softDisable, event.immediateDisable])]

while time.monotonic() < start_time + max_time_per_step:
sm.update()

if sm.all_alive() and sm['selfdriveState'].active:
control_active += 1

if control_active == min_counts_control_active:
if sm.all_alive() and len(car_event_issues) == 0 and len(not_running) == 0:
no_car_events_issues_once = True
break

assert min_counts_control_active == control_active, f"Simulator did not engage a minimal of {min_counts_control_active} steps was {control_active}"

failure_states = []
while bridge.started.value:
continue

while not q.empty():
state = q.get()
if state.type == QueueMessageType.TERMINATION_INFO:
done_info = state.info
failure_states = [done_state for done_state in done_info if done_state != "timeout" and done_info[done_state]]
break
assert len(failure_states) == 0, f"Simulator fails to finish a loop. Failure states: {failure_states}"
else:
if sm.frame % 100 == 0:
print(f"Waiting for healthy state... not_running: {not_running}, car_event_issues: {car_event_issues}")
if not sm.all_alive():
print(f" NOT ALIVE: {[s for s, a in sm.alive.items() if not a]}")
if not sm.all_freq_ok():
print(f" FREQ NOT OK: {[s for s, f in sm.freq_ok.items() if not f]}")
if not sm.all_valid():
print(f" NOT VALID: {[s for s, v in sm.valid.items() if not v]}")

assert no_car_events_issues_once, \
f"Failed because no messages received, or CarEvents '{car_event_issues}' or processes not running '{not_running}'"

start_time = time.monotonic()
min_counts_control_active = 100
control_active = 0

while time.monotonic() < start_time + max_time_per_step:
sm.update()

if sm.all_alive() and sm['selfdriveState'].active:
control_active += 1

if control_active == min_counts_control_active:
break

engageable = sm['selfdriveState'].engageable
alive = sm.all_alive()
events = [event.name for event in sm['onroadEvents']]
not_running = [p.name for p in sm['managerState'].processes if not p.running and p.shouldBeRunning]
err_msg = f"Sim not engaged. active: {control_active}, engageable: {engageable}, alive: {alive}, events: {events}, not_running: {not_running}. "
if not engageable:
err_msg += "Check if modeld or locationd crashed or are not publishing. "
assert min_counts_control_active == control_active, err_msg

failure_states = []
while bridge.started.value:
time.sleep(0.1)

while not q.empty():
state = q.get()
if state.type == QueueMessageType.TERMINATION_INFO:
done_info = state.info
failure_states = [done_state for done_state in done_info if done_state != "timeout" and done_info[done_state]]
break
assert len(failure_states) == 0, f"Simulator fails to finish a loop. Failure states: {failure_states}"
except Exception:
if p_manager.poll() is None:
p_manager.terminate()
stdout, _ = p_manager.communicate(timeout=10)
print("\n\n" + "="*20 + " MANAGER LOGS " + "="*20)
print(stdout)
print("="*54 + "\n\n")
raise

def teardown_method(self):
print("Test shutting down. CommIssues are acceptable")
for p in reversed(self.processes):
p.terminate()

for p in reversed(self.processes):
p.kill()
if isinstance(p, subprocess.Popen):
if p.poll() is None:
p.terminate()
try:
p.wait(15)
except subprocess.TimeoutExpired:
p.kill()
else:
p.terminate()
p.join(15)
if p.exitcode is None:
p.kill()
Loading