Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
56a0571
compile_modeld.py
Armandpl Apr 2, 2026
ed6089f
update estimates
Armandpl Apr 2, 2026
5834b42
missing image=2?
Armandpl Apr 2, 2026
d005cd1
Revert "missing image=2?"
Armandpl Apr 2, 2026
919b263
Revert "update estimates"
Armandpl Apr 2, 2026
ff2da85
Revert "compile_modeld.py"
Armandpl Apr 2, 2026
6f3dfef
load warp in ModelState init
Armandpl Apr 4, 2026
0f3a276
dead code
Armandpl Apr 4, 2026
a5a7a22
prep
Armandpl Apr 4, 2026
f43c3ea
compile modeld
Armandpl Apr 4, 2026
8d93837
update SConscript
Armandpl Apr 4, 2026
ec8a43d
tmp save plot locally
Armandpl Apr 5, 2026
2f65af2
Revert "tmp save plot locally"
Armandpl Apr 5, 2026
53e84ff
openpilot hacks?
Armandpl Apr 5, 2026
ab0ad4b
no float16
Armandpl Apr 5, 2026
f133a16
tmp more chunks
Armandpl Apr 5, 2026
fe21f55
Revert "tmp more chunks"
Armandpl Apr 5, 2026
ff25362
Revert "no float16"
Armandpl Apr 5, 2026
db26a67
realize boundaries
Armandpl Apr 5, 2026
e490560
Revert "realize boundaries"
Armandpl Apr 6, 2026
b64e152
prune=False?
Armandpl Apr 6, 2026
9625b94
Reapply "tmp more chunks"
Armandpl Apr 6, 2026
9285441
tg bug?
Armandpl Apr 6, 2026
27e0350
load first?
Armandpl Apr 6, 2026
406b350
Revert "load first?"
Armandpl Apr 6, 2026
dc1191b
revert
Armandpl Apr 6, 2026
0cfb91a
Reapply "tmp save plot locally"
Armandpl Apr 6, 2026
04205f2
0 tol pc
Armandpl Apr 6, 2026
2c00587
warp -> modeld
Armandpl Apr 6, 2026
934d89c
rename
Armandpl Apr 6, 2026
ad5422a
bypass chunking?
Armandpl Apr 6, 2026
cc97fc6
dont chunk
Armandpl Apr 6, 2026
5ffae8f
Revert "dont chunk"
Armandpl Apr 6, 2026
42bd9b6
dont chunk
Armandpl Apr 6, 2026
b3c2f2e
debug
Armandpl Apr 7, 2026
735cecb
Revert "debug"
Armandpl Apr 7, 2026
497b614
Revert "dont chunk"
Armandpl Apr 7, 2026
f2082a2
Revert "bypass chunking?"
Armandpl Apr 7, 2026
245feb9
corrupt model outputs
Armandpl Apr 7, 2026
188ecba
Revert "corrupt model outputs"
Armandpl Apr 7, 2026
b58d6a8
image=0 for warp, match master
Armandpl Apr 7, 2026
5d6e6e8
dedupe enqueue
Armandpl Apr 7, 2026
8bcccbb
pass traffic convention
Armandpl Apr 7, 2026
45e8119
tg buffer for desire
Armandpl Apr 7, 2026
a4dc55d
dedupe buffer creation
Armandpl Apr 7, 2026
a374700
compile_modeld: nuke stale cached pkl before compiling
haraschax Apr 7, 2026
a855173
test vs compile
Armandpl Apr 7, 2026
25493eb
all outputs need to be different on different inputs
Armandpl Apr 7, 2026
ae20f3b
randomize numpy inputs
Armandpl Apr 7, 2026
fe56443
randomize on every step
Armandpl Apr 7, 2026
a648c15
SConscript: nuke stale pkl+chunks before compile_modeld
haraschax Apr 8, 2026
027f1b2
compile_modeld: restore Context(IMAGE=0) for warp
haraschax Apr 8, 2026
968c987
modeld: create SubMaster before model loading
haraschax Apr 8, 2026
051e6de
Revert "modeld: create SubMaster before model loading"
haraschax Apr 8, 2026
8703885
stale metadata?
Armandpl Apr 8, 2026
49e754c
claude debug
Armandpl Apr 8, 2026
8e95790
Revert "claude debug"
Armandpl Apr 8, 2026
fc431c7
Revert "stale metadata?"
Armandpl Apr 8, 2026
c534e43
modeld: realize jit outputs before parsing
haraschax Apr 9, 2026
557a75d
Update modeld.py
haraschax Apr 9, 2026
a6df299
Merge branch 'master' into modeld-single-jit2
haraschax Apr 9, 2026
129f77b
modeld: fix NameError by removing redundant MODELS_DIR definition
haraschax Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions selfdrive/modeld/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,27 @@ for model_name in ['driving_vision', 'driving_off_policy', 'driving_on_policy',
image_flag = {
'larch64': 'IMAGE=2',
}.get(arch, 'IMAGE=0')
script_files = [File(Dir("#selfdrive/modeld").File("compile_warp.py").abspath)]
compile_warp_cmd = f'{tg_flags} {mac_brew_string} python3 {Dir("#selfdrive/modeld").abspath}/compile_warp.py '
script_files = [File(Dir("#selfdrive/modeld").File("compile_modeld.py").abspath)]
# nuke stale cached pkl+chunks before compiling (UNSAFE CI checkout keeps gitignored files)
compile_modeld_cmd = f'rm -f {Dir("#selfdrive/modeld").abspath}/models/driving_*_tinygrad.pkl* && {tg_flags} {mac_brew_string} {image_flag} python3 {Dir("#selfdrive/modeld").abspath}/compile_modeld.py '
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
warp_targets = []
driving_onnx_deps = [File(f"models/{m}.onnx").abspath for m in ['driving_vision', 'driving_on_policy', 'driving_off_policy']]
driving_metadata_deps = [File(f"models/{m}_metadata.pkl").abspath for m in ['driving_vision', 'driving_on_policy']]
modeld_targets = []
policy_pkls = []
for cam in [_ar_ox_fisheye, _os_fisheye]:
w, h = cam.width, cam.height
warp_targets += [File(f"models/warp_{w}x{h}_tinygrad.pkl").abspath, File(f"models/dm_warp_{w}x{h}_tinygrad.pkl").abspath]
lenv.Command(warp_targets, tinygrad_files + script_files + [compiled_flags_node], compile_warp_cmd)
policy_pkls.append(File(f"models/driving_{w}x{h}_tinygrad.pkl").abspath)
modeld_targets += [File(f"models/driving_{w}x{h}_tinygrad.pkl").abspath, File(f"models/dm_warp_{w}x{h}_tinygrad.pkl").abspath]
compile_node = lenv.Command(modeld_targets, tinygrad_files + script_files + driving_onnx_deps + driving_metadata_deps + [chunker_file], compile_modeld_cmd)

# chunk the combined policy pkls (they contain model weights)
for policy_pkl in policy_pkls:
onnx_sizes_sum = sum(os.path.getsize(f) for f in driving_onnx_deps)
chunk_targets = get_chunk_paths(policy_pkl, 2.0 * onnx_sizes_sum + 10 * 1024 * 1024)
def do_chunk(target, source, env, pkl=policy_pkl, chunks=chunk_targets):
chunk_file(pkl, chunks)
lenv.Command(chunk_targets, compile_node, do_chunk)

def tg_compile(flags, model_name):
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
Expand All @@ -81,6 +94,5 @@ def tg_compile(flags, model_name):
do_chunk,
)

# Compile small models
for model_name in ['driving_vision', 'driving_off_policy', 'driving_on_policy', 'dmonitoring_model']:
tg_compile(tg_flags, model_name)
# Compile dmonitoring model (driving models are in the combined policy JIT)
tg_compile(tg_flags, 'dmonitoring_model')
275 changes: 275 additions & 0 deletions selfdrive/modeld/compile_modeld.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
#!/usr/bin/env python3
import time
import pickle
import numpy as np
from pathlib import Path
from tinygrad.tensor import Tensor
from tinygrad.helpers import Context
from tinygrad.device import Device
from tinygrad.engine.jit import TinyJit

from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE, DM_INPUT_SIZE
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye

MODELS_DIR = Path(__file__).parent / 'models'

CAMERA_CONFIGS = [
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
]

UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)

def policy_pkl_path(w, h):
return MODELS_DIR / f'driving_{w}x{h}_tinygrad.pkl'


def dm_warp_pkl_path(w, h):
return MODELS_DIR / f'dm_warp_{w}x{h}_tinygrad.pkl'


def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad):
w_dst, h_dst = dst_shape
h_src, w_src = src_shape

x = Tensor.arange(w_dst).reshape(1, w_dst).expand(h_dst, w_dst).reshape(-1)
y = Tensor.arange(h_dst).reshape(h_dst, 1).expand(h_dst, w_dst).reshape(-1)

# inline 3x3 matmul as elementwise to avoid reduce op (enables fusion with gather)
src_x = M_inv[0, 0] * x + M_inv[0, 1] * y + M_inv[0, 2]
src_y = M_inv[1, 0] * x + M_inv[1, 1] * y + M_inv[1, 2]
src_w = M_inv[2, 0] * x + M_inv[2, 1] * y + M_inv[2, 2]

src_x = src_x / src_w
src_y = src_y / src_w

x_nn_clipped = Tensor.round(src_x).clip(0, w_src - 1).cast('int')
y_nn_clipped = Tensor.round(src_y).clip(0, h_src - 1).cast('int')
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped

return src_flat[idx]


def frames_to_tensor(frames, model_w, model_h):
H = (frames.shape[0] * 2) // 3
W = frames.shape[1]
in_img1 = Tensor.cat(frames[0:H:2, 0::2],
frames[1:H:2, 0::2],
frames[0:H:2, 1::2],
frames[1:H:2, 1::2],
frames[H:H+H//4].reshape((H//2, W//2)),
frames[H+H//4:H+H//2].reshape((H//2, W//2)), dim=0).reshape((6, H//2, W//2))
return in_img1


def make_frame_prepare(cam_w, cam_h, model_w, model_h):
stride, y_height, uv_height, _ = get_nv12_info(cam_w, cam_h)
uv_offset = stride * y_height
stride_pad = stride - cam_w

def frame_prepare_tinygrad(input_frame, M_inv):
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]])
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
with Context(SPLIT_REDUCEOP=0):
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
M_inv, (model_w, model_h),
(cam_h, cam_w), stride_pad).realize()
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
M_inv_uv, (model_w//2, model_h//2),
(cam_h//2, cam_w//2), 0).realize()
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
M_inv_uv, (model_w//2, model_h//2),
(cam_h//2, cam_w//2), 0).realize()
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
tensor = frames_to_tensor(yuv, model_w, model_h)
return tensor
return frame_prepare_tinygrad


def make_buffers(vision_input_shapes, policy_input_shapes, frame_skip):
img = vision_input_shapes['img'] # (1, 12, 128, 256)
n_frames = img[1] // 6
img_buf_shape = (frame_skip * (n_frames - 1) + 1, 6, img[2], img[3])

fb = policy_input_shapes['features_buffer'] # (1, 25, 512)
dp = policy_input_shapes['desire_pulse'] # (1, 25, 8)
tc = policy_input_shapes['traffic_convention'] # (1, 2)

npy = {
'desire': np.zeros(dp[2], dtype=np.float32),
'traffic_convention': np.zeros(tc, dtype=np.float32),
'tfm': np.zeros((3, 3), dtype=np.float32),
'big_tfm': np.zeros((3, 3), dtype=np.float32),
}
bufs = {
'img_buf': Tensor.zeros(img_buf_shape, dtype='uint8').contiguous().realize(),
'big_img_buf': Tensor.zeros(img_buf_shape, dtype='uint8').contiguous().realize(),
'feat_q': Tensor.zeros(frame_skip * (fb[1] - 1) + 1, fb[0], fb[2]).contiguous().realize(),
'desire_q': Tensor.zeros(frame_skip * dp[1], dp[0], dp[2]).contiguous().realize(),
**{k: Tensor(v, device='NPY').realize() for k, v in npy.items()},
}
return bufs, npy


def shift_and_sample(buf, new_val, sample_fn):
buf.assign(buf[1:].cat(new_val, dim=0).contiguous())
return sample_fn(buf)


def make_warp_dm(cam_w, cam_h, dm_w, dm_h):
stride, y_height, _, _ = get_nv12_info(cam_w, cam_h)
stride_pad = stride - cam_w

def warp_dm(input_frame, M_inv):
M_inv = M_inv.to(Device.DEFAULT)
result = warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv, (dm_w, dm_h), (cam_h, cam_w), stride_pad).reshape(-1, dm_h * dm_w)
return result
return warp_dm


def make_run_policy(vision_runner, on_policy_runner, off_policy_runner, cam_w, cam_h,
vision_features_slice, frame_skip):
model_w, model_h = MEDMODEL_INPUT_SIZE
frame_prepare = make_frame_prepare(cam_w, cam_h, model_w, model_h)

def sample_skip(buf):
return buf[::frame_skip].contiguous().flatten(0, 1).unsqueeze(0)

def sample_desire(buf):
return buf.reshape(-1, frame_skip, *buf.shape[1:]).max(1).flatten(0, 1).unsqueeze(0)

def run_policy(img_buf, big_img_buf, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm, frame, big_frame):
with Context(IMAGE=0):
img = shift_and_sample(img_buf, frame_prepare(frame, tfm.to(Device.DEFAULT)).unsqueeze(0), sample_skip)
big_img = shift_and_sample(big_img_buf, frame_prepare(big_frame, big_tfm.to(Device.DEFAULT)).unsqueeze(0), sample_skip)

vision_out = next(iter(vision_runner({'img': img, 'big_img': big_img}).values())).cast('float32')

new_feat = vision_out[:, vision_features_slice].reshape(1, -1).unsqueeze(0)
feat_buf = shift_and_sample(feat_q, new_feat, sample_skip)
desire_buf = shift_and_sample(desire_q, desire.to(Device.DEFAULT).reshape(1, 1, -1), sample_desire)

inputs = {'features_buffer': feat_buf, 'desire_pulse': desire_buf, 'traffic_convention': traffic_convention.to(Device.DEFAULT)}
on_policy_out = next(iter(on_policy_runner(inputs).values())).cast('float32')
off_policy_out = next(iter(off_policy_runner(inputs).values())).cast('float32')

return vision_out, on_policy_out, off_policy_out
return run_policy


def compile_modeld(cam_w, cam_h):
from tinygrad.nn.onnx import OnnxRunner
from openpilot.selfdrive.modeld.constants import ModelConstants

_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
print(f"Compiling combined policy JIT for {cam_w}x{cam_h}...")

vision_runner = OnnxRunner(MODELS_DIR / 'driving_vision.onnx')
on_policy_runner = OnnxRunner(MODELS_DIR / 'driving_on_policy.onnx')
off_policy_runner = OnnxRunner(MODELS_DIR / 'driving_off_policy.onnx')

with open(MODELS_DIR / 'driving_vision_metadata.pkl', 'rb') as f:
vision_metadata = pickle.load(f)
vision_features_slice = vision_metadata['output_slices']['hidden_state']
vision_input_shapes = vision_metadata['input_shapes']
with open(MODELS_DIR / 'driving_on_policy_metadata.pkl', 'rb') as f:
policy_input_shapes = pickle.load(f)['input_shapes']

frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ

_run = make_run_policy(vision_runner, on_policy_runner, off_policy_runner,
cam_w, cam_h, vision_features_slice, frame_skip)
run_policy_jit = TinyJit(_run, prune=True)
bufs, npy = make_buffers(vision_input_shapes, policy_input_shapes, frame_skip)


for i in range(3):
frame = Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize()
big_frame = Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize()
for v in npy.values():
v[:] = np.random.randn(*v.shape).astype(v.dtype)
Device.default.synchronize()

st = time.perf_counter()
with Context(OPENPILOT_HACKS=1):
inputs = {**bufs, 'frame': frame, 'big_frame': big_frame}
outs = run_policy_jit(**inputs)
mt = time.perf_counter()
Device.default.synchronize()
et = time.perf_counter()
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")

if i == 1:
test_val = [np.copy(v.numpy()) for v in outs]
test_inputs = {k: Tensor(v.numpy().copy(), device=v.device) for k, v in inputs.items()}

pkl_path = policy_pkl_path(cam_w, cam_h)
with open(pkl_path, "wb") as f:
pickle.dump(run_policy_jit, f)
print(f" Saved to {pkl_path}")
return test_inputs, test_val


def test_vs_compile(run, inputs: dict[str, Tensor], test_val: list[np.ndarray]):
# run 20 times
for i in range(20):
st = time.perf_counter()
out = run(**inputs)
mt = time.perf_counter()
val = [v.numpy() for v in out]
et = time.perf_counter()
print(f"enqueue {(mt-st)*1e3:6.2f} ms -- total run {(et-st)*1e3:6.2f} ms")

if test_val is not None and i == 0: # check output matches before buffers get mutated by the jit
np.testing.assert_equal(test_val, val)

# test that changing the inputs changes the model outputs
inputs_2x = {k: Tensor(v.numpy()*2, device=v.device) for k,v in inputs.items()}
out = run(**inputs_2x)
changed_val = [v.numpy() for v in out]
for v, cv in zip(val, changed_val):
assert not np.array_equal(v, cv), f"output with shape {v.shape} didn't change when inputs were doubled"
print('test_vs_compile OK')


def compile_dm_warp(cam_w, cam_h):
dm_w, dm_h = DM_INPUT_SIZE
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)

print(f"Compiling DM warp for {cam_w}x{cam_h}...")

warp_dm = make_warp_dm(cam_w, cam_h, dm_w, dm_h)
warp_dm_jit = TinyJit(warp_dm, prune=True)

for i in range(10):
inputs = [Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
Device.default.synchronize()
st = time.perf_counter()
warp_dm_jit(*inputs)
mt = time.perf_counter()
Device.default.synchronize()
et = time.perf_counter()
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")

pkl_path = dm_warp_pkl_path(cam_w, cam_h)
with open(pkl_path, "wb") as f:
pickle.dump(warp_dm_jit, f)
print(f" Saved to {pkl_path}")


def run_and_save_pickle():
for cam_w, cam_h in CAMERA_CONFIGS:
inputs, outputs = compile_modeld(cam_w, cam_h)
pickle_loaded = pickle.load(open(policy_pkl_path(cam_w, cam_h), "rb"))
test_vs_compile(pickle_loaded, inputs, outputs)

compile_dm_warp(cam_w, cam_h)


if __name__ == "__main__":
run_and_save_pickle()
Loading
Loading