From a9f3fac13e9ca8d4b9128449e0a130e1815c0499 Mon Sep 17 00:00:00 2001 From: jichuanh Date: Tue, 19 May 2026 07:04:30 +0000 Subject: [PATCH 1/6] [Test] Stabilize RTX MDL shader-warmup flake in rendering correctness tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `test_shadow_hand_vision_presets.py::test_camera_renders_not_empty` and the three helper-driven rendering tests in `rendering_test_utils.py` (`rendering_test_shadow_hand`/`_cartpole`/`_dexsuite_kuka`) intermittently fail with "Camera output 'simple_shading_*_mdl' is all zeros or all inf after stepping" on cold-cache CI runners. Root cause is RTX MDL shader compile timing: a single `env.step()` returns a still-zero framebuffer for shader variants that haven't finished compiling. The variant that flakes (constant_diffuse / diffuse_mdl / full_mdl) shifts between runs depending on which MDL pipeline finishes first. Step the env 10 frames before reading the camera tensor — empirically enough across the MDL variants that flake. Adds ~1-2 s per parametrize variant; cheap relative to the cost of a retry. Affected jobs: - `isaaclab_tasks [2/3]` (test_camera_renders_not_empty) - `rendering-correctness-kitless` (test_rendering_*_kitless[*-simple_shading_*]) --- ...ichuanh-stabilize-rendering-mdl-warmup.rst | 13 ++++++++++++ .../test/rendering_test_utils.py | 21 +++++++++++++++++++ .../test/test_shadow_hand_vision_presets.py | 7 ++++++- 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst diff --git a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst new file mode 100644 index 000000000000..8bd16814ca2c --- /dev/null +++ b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst @@ -0,0 +1,13 @@ +Fixed +^^^^^ + +* Stabilized RTX MDL shader-warmup flakes in the rendering-correctness tests + by stepping the env 10 frames before reading the camera tensor, instead of + a single step. Affected ``test_shadow_hand_vision_presets.py``'s + ``test_camera_renders_not_empty`` and the three helper-driven tests in + ``rendering_test_utils.py`` + (``rendering_test_shadow_hand``/``_cartpole``/``_dexsuite_kuka``) — all of + which intermittently failed with "Camera output is all zeros or all inf" + for ``simple_shading_*_mdl`` and ``simple_shading_constant_diffuse`` + variants on cold-cache CI runners (the GPU returned a still-zero + framebuffer because the MDL material hadn't finished compiling). diff --git a/source/isaaclab_tasks/test/rendering_test_utils.py b/source/isaaclab_tasks/test/rendering_test_utils.py index d4fbf368ea7c..8755245084cc 100644 --- a/source/isaaclab_tasks/test/rendering_test_utils.py +++ b/source/isaaclab_tasks/test/rendering_test_utils.py @@ -675,6 +675,24 @@ def validate_camera_outputs( pytest.fail(reason) +def _warmup_render(env, num_steps: int = 10) -> None: + """Step the env ``num_steps`` times to let RTX MDL shaders finish compiling. + + RTX MDL materials compile asynchronously on first use. A single ``env.step()`` + is not enough — the GPU may return a still-zero framebuffer for shader + variants that have not finished compiling, which trips + :func:`validate_camera_outputs` with "no non-zero pixels" or + "all zeros or all inf". ``10`` is empirically enough across the MDL + presets (``simple_shading_constant_diffuse``, ``simple_shading_diffuse_mdl``, + ``simple_shading_full_mdl``) that flake on the CI runners as of 2026-05. + Adds ~1-2 s of wall time per parametrize variant; cheap relative to the + cost of a CI re-run. + """ + actions = torch.zeros(env.num_envs, env.action_space.shape[-1], device=env.device) + for _ in range(num_steps): + env.step(actions) + + def rendering_test_shadow_hand( physics_backend: str, renderer: str, @@ -700,6 +718,7 @@ def rendering_test_shadow_hand( try: env = ShadowHandVisionEnv(env_cfg) maybe_save_stage("shadow_hand", physics_backend, renderer, data_type) + _warmup_render(env) validate_camera_outputs( "shadow_hand", @@ -739,6 +758,7 @@ def rendering_test_cartpole( try: env = CartpoleCameraEnv(env_cfg) maybe_save_stage("cartpole", physics_backend, renderer, data_type) + _warmup_render(env) validate_camera_outputs( "cartpole", physics_backend, @@ -795,6 +815,7 @@ def rendering_test_dexsuite_kuka( try: env = ManagerBasedRLEnv(env_cfg) maybe_save_stage("dexsuite_kuka", physics_backend, renderer, data_type) + _warmup_render(env) validate_camera_outputs( "dexsuite_kuka", physics_backend, diff --git a/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py b/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py index ccec60c3fa46..7bfc42dff714 100644 --- a/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py +++ b/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py @@ -398,7 +398,12 @@ def render_correctness_env(request, shadow_hand_vision_presets): env = ShadowHandVisionEnv(cfg) env.reset() actions = torch.zeros(cfg.scene.num_envs, env.action_space.shape[-1], device=env.device) - env.step(actions) + # Step enough frames for RTX MDL shader variants to finish compiling. A single step + # returns a still-zero framebuffer for ``simple_shading_*_mdl`` presets on cold caches, + # which trips the assertion in ``test_camera_renders_not_empty`` below. 10 steps is + # empirically enough across the MDL variants that flake in CI as of 2026-05. + for _ in range(10): + env.step(actions) yield renderer_preset, camera_preset, physics, env env.close() From 08cd6c951d7e293fe84bbf9e851169d5c7f9f313 Mon Sep 17 00:00:00 2001 From: jichuanh Date: Tue, 19 May 2026 09:04:59 +0000 Subject: [PATCH 2/6] Use sim.render warmup so goldens stay valid The previous warmup used env.step() which advances physics and breaks golden-image comparison in the rendering correctness tests. Switch to sim.render() + scene.update() so shader compile finishes without moving the scene state, mirroring num_rerenders_on_reset in the env classes. --- ...ichuanh-stabilize-rendering-mdl-warmup.rst | 23 ++++++----- .../test/rendering_test_utils.py | 38 +++++++++++-------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst index 8bd16814ca2c..37c67d16705f 100644 --- a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst +++ b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst @@ -2,12 +2,17 @@ Fixed ^^^^^ * Stabilized RTX MDL shader-warmup flakes in the rendering-correctness tests - by stepping the env 10 frames before reading the camera tensor, instead of - a single step. Affected ``test_shadow_hand_vision_presets.py``'s - ``test_camera_renders_not_empty`` and the three helper-driven tests in - ``rendering_test_utils.py`` - (``rendering_test_shadow_hand``/``_cartpole``/``_dexsuite_kuka``) — all of - which intermittently failed with "Camera output is all zeros or all inf" - for ``simple_shading_*_mdl`` and ``simple_shading_constant_diffuse`` - variants on cold-cache CI runners (the GPU returned a still-zero - framebuffer because the MDL material hadn't finished compiling). + by driving 10 extra ``sim.render()`` + ``scene.update()`` passes between env + construction and the camera read in + ``rendering_test_utils.py``'s ``rendering_test_shadow_hand``, + ``rendering_test_cartpole``, and ``rendering_test_dexsuite_kuka`` helpers. + The warmup mirrors the pattern already used by + :attr:`~isaaclab.envs.DirectRLEnvCfg.num_rerenders_on_reset` — + it does not advance physics, so the existing golden images remain valid. + ``test_camera_renders_not_empty`` in + ``test_shadow_hand_vision_presets.py`` (which has no golden compare) is + stabilized by stepping the env 10 frames before the non-zero pixel check. + Both flakes manifested as "Camera output is all zeros or all inf" for + ``simple_shading_*_mdl`` and ``simple_shading_constant_diffuse`` variants on + cold-cache CI runners — the GPU returned a still-zero framebuffer because + the MDL material had not finished compiling. diff --git a/source/isaaclab_tasks/test/rendering_test_utils.py b/source/isaaclab_tasks/test/rendering_test_utils.py index 8755245084cc..6a81013e8130 100644 --- a/source/isaaclab_tasks/test/rendering_test_utils.py +++ b/source/isaaclab_tasks/test/rendering_test_utils.py @@ -675,22 +675,30 @@ def validate_camera_outputs( pytest.fail(reason) -def _warmup_render(env, num_steps: int = 10) -> None: - """Step the env ``num_steps`` times to let RTX MDL shaders finish compiling. - - RTX MDL materials compile asynchronously on first use. A single ``env.step()`` - is not enough — the GPU may return a still-zero framebuffer for shader - variants that have not finished compiling, which trips - :func:`validate_camera_outputs` with "no non-zero pixels" or - "all zeros or all inf". ``10`` is empirically enough across the MDL - presets (``simple_shading_constant_diffuse``, ``simple_shading_diffuse_mdl``, - ``simple_shading_full_mdl``) that flake on the CI runners as of 2026-05. - Adds ~1-2 s of wall time per parametrize variant; cheap relative to the - cost of a CI re-run. +def _warmup_render(env, num_passes: int = 10) -> None: + """Drive extra render passes to let RTX MDL shaders finish compiling. + + RTX MDL materials compile asynchronously on first use. The single render + pass that env construction triggers (via ``scene.update`` in ``__init__``) + is not always enough — on cold-cache CI runners the GPU may return a + still-zero framebuffer for shader variants that have not finished + compiling, which trips :func:`validate_camera_outputs` with "no non-zero + pixels" or "all zeros or all inf". ``10`` passes is empirically enough + across the MDL presets (``simple_shading_constant_diffuse``, + ``simple_shading_diffuse_mdl``, ``simple_shading_full_mdl``) that flake on + the CI runners as of 2026-05. + + The warmup calls ``sim.render()`` + ``scene.update()`` rather than + ``env.step()``, so it does not advance physics state and the goldens + captured at the post-init state remain valid. This mirrors the pattern + already used by :attr:`~isaaclab.envs.DirectRLEnvCfg.num_rerenders_on_reset` + and :attr:`~isaaclab.envs.DirectRLEnvCfg.wait_for_textures` in the core + env classes. """ - actions = torch.zeros(env.num_envs, env.action_space.shape[-1], device=env.device) - for _ in range(num_steps): - env.step(actions) + physics_dt = env.physics_dt + for _ in range(num_passes): + env.sim.render() + env.scene.update(dt=physics_dt) def rendering_test_shadow_hand( From c359111391d5a5887aca4382f107f283e646b231 Mon Sep 17 00:00:00 2001 From: jichuanh Date: Tue, 19 May 2026 09:31:52 +0000 Subject: [PATCH 3/6] Narrow MDL warmup fix to test_camera_renders_not_empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 10-pass sim.render warmup in the three goldenfile helpers (rendering_test_shadow_hand / _cartpole / _dexsuite_kuka) caused test_rendering_dexsuite_kuka and *_kitless variants to fail with large pixel diffs vs the existing goldens — the warmup exposes a different post-loading scene state for the dexsuite_kuka asset set. Keep only the test_camera_renders_not_empty change (no golden compare, env.step warmup is safe there). The goldenfile helpers already use flaky(max_runs=3) for occasional MDL hiccups. --- ...ichuanh-stabilize-rendering-mdl-warmup.rst | 24 ++++++--------- .../test/rendering_test_utils.py | 29 ------------------- 2 files changed, 9 insertions(+), 44 deletions(-) diff --git a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst index 37c67d16705f..92c3c887cdb9 100644 --- a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst +++ b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst @@ -1,18 +1,12 @@ Fixed ^^^^^ -* Stabilized RTX MDL shader-warmup flakes in the rendering-correctness tests - by driving 10 extra ``sim.render()`` + ``scene.update()`` passes between env - construction and the camera read in - ``rendering_test_utils.py``'s ``rendering_test_shadow_hand``, - ``rendering_test_cartpole``, and ``rendering_test_dexsuite_kuka`` helpers. - The warmup mirrors the pattern already used by - :attr:`~isaaclab.envs.DirectRLEnvCfg.num_rerenders_on_reset` — - it does not advance physics, so the existing golden images remain valid. - ``test_camera_renders_not_empty`` in - ``test_shadow_hand_vision_presets.py`` (which has no golden compare) is - stabilized by stepping the env 10 frames before the non-zero pixel check. - Both flakes manifested as "Camera output is all zeros or all inf" for - ``simple_shading_*_mdl`` and ``simple_shading_constant_diffuse`` variants on - cold-cache CI runners — the GPU returned a still-zero framebuffer because - the MDL material had not finished compiling. +* Stabilized ``test_camera_renders_not_empty`` in + ``test_shadow_hand_vision_presets.py`` by stepping the env 10 frames + before reading the camera tensor instead of a single step. The test + intermittently failed with "Camera output is all zeros or all inf" for + ``simple_shading_*_mdl`` and ``simple_shading_constant_diffuse`` variants + on cold-cache CI runners because the GPU returned a still-zero + framebuffer before the MDL material finished compiling. The three + goldenfile-comparing helpers in ``rendering_test_utils.py`` already use + ``flaky(max_runs=3)`` and are left untouched. diff --git a/source/isaaclab_tasks/test/rendering_test_utils.py b/source/isaaclab_tasks/test/rendering_test_utils.py index 6a81013e8130..d4fbf368ea7c 100644 --- a/source/isaaclab_tasks/test/rendering_test_utils.py +++ b/source/isaaclab_tasks/test/rendering_test_utils.py @@ -675,32 +675,6 @@ def validate_camera_outputs( pytest.fail(reason) -def _warmup_render(env, num_passes: int = 10) -> None: - """Drive extra render passes to let RTX MDL shaders finish compiling. - - RTX MDL materials compile asynchronously on first use. The single render - pass that env construction triggers (via ``scene.update`` in ``__init__``) - is not always enough — on cold-cache CI runners the GPU may return a - still-zero framebuffer for shader variants that have not finished - compiling, which trips :func:`validate_camera_outputs` with "no non-zero - pixels" or "all zeros or all inf". ``10`` passes is empirically enough - across the MDL presets (``simple_shading_constant_diffuse``, - ``simple_shading_diffuse_mdl``, ``simple_shading_full_mdl``) that flake on - the CI runners as of 2026-05. - - The warmup calls ``sim.render()`` + ``scene.update()`` rather than - ``env.step()``, so it does not advance physics state and the goldens - captured at the post-init state remain valid. This mirrors the pattern - already used by :attr:`~isaaclab.envs.DirectRLEnvCfg.num_rerenders_on_reset` - and :attr:`~isaaclab.envs.DirectRLEnvCfg.wait_for_textures` in the core - env classes. - """ - physics_dt = env.physics_dt - for _ in range(num_passes): - env.sim.render() - env.scene.update(dt=physics_dt) - - def rendering_test_shadow_hand( physics_backend: str, renderer: str, @@ -726,7 +700,6 @@ def rendering_test_shadow_hand( try: env = ShadowHandVisionEnv(env_cfg) maybe_save_stage("shadow_hand", physics_backend, renderer, data_type) - _warmup_render(env) validate_camera_outputs( "shadow_hand", @@ -766,7 +739,6 @@ def rendering_test_cartpole( try: env = CartpoleCameraEnv(env_cfg) maybe_save_stage("cartpole", physics_backend, renderer, data_type) - _warmup_render(env) validate_camera_outputs( "cartpole", physics_backend, @@ -823,7 +795,6 @@ def rendering_test_dexsuite_kuka( try: env = ManagerBasedRLEnv(env_cfg) maybe_save_stage("dexsuite_kuka", physics_backend, renderer, data_type) - _warmup_render(env) validate_camera_outputs( "dexsuite_kuka", physics_backend, From 39e6bb71d801cfec9206606708320266b9f6b0bf Mon Sep 17 00:00:00 2001 From: jichuanh Date: Tue, 19 May 2026 10:22:59 +0000 Subject: [PATCH 4/6] Poll camera output until non-zero, max 60 steps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 10 frames was not enough — CI still flaked on test_camera_renders_not_empty[physx-isaacsim_rtx-simple_shading_diffuse_mdl]. Poll until every camera output tensor has a non-zero max with a 60-step cap, exiting early once all outputs are ready. --- ...ichuanh-stabilize-rendering-mdl-warmup.rst | 17 +++++++++-------- .../test/test_shadow_hand_vision_presets.py | 19 ++++++++++++++----- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst index 92c3c887cdb9..f5250cfa0cef 100644 --- a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst +++ b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst @@ -2,11 +2,12 @@ Fixed ^^^^^ * Stabilized ``test_camera_renders_not_empty`` in - ``test_shadow_hand_vision_presets.py`` by stepping the env 10 frames - before reading the camera tensor instead of a single step. The test - intermittently failed with "Camera output is all zeros or all inf" for - ``simple_shading_*_mdl`` and ``simple_shading_constant_diffuse`` variants - on cold-cache CI runners because the GPU returned a still-zero - framebuffer before the MDL material finished compiling. The three - goldenfile-comparing helpers in ``rendering_test_utils.py`` already use - ``flaky(max_runs=3)`` and are left untouched. + ``test_shadow_hand_vision_presets.py`` by polling the camera output until + all data-type tensors are non-zero, with a 60-step cap, instead of a + single ``env.step()``. The test intermittently failed with "Camera output + is all zeros or all inf" for ``simple_shading_*_mdl`` and + ``simple_shading_constant_diffuse`` variants on cold-cache CI runners + because the GPU returned a still-zero framebuffer before the MDL material + finished compiling. The three goldenfile-comparing helpers in + ``rendering_test_utils.py`` already use ``flaky(max_runs=3)`` and are left + untouched. diff --git a/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py b/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py index 7bfc42dff714..2b5ce3851991 100644 --- a/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py +++ b/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py @@ -398,12 +398,21 @@ def render_correctness_env(request, shadow_hand_vision_presets): env = ShadowHandVisionEnv(cfg) env.reset() actions = torch.zeros(cfg.scene.num_envs, env.action_space.shape[-1], device=env.device) - # Step enough frames for RTX MDL shader variants to finish compiling. A single step - # returns a still-zero framebuffer for ``simple_shading_*_mdl`` presets on cold caches, - # which trips the assertion in ``test_camera_renders_not_empty`` below. 10 steps is - # empirically enough across the MDL variants that flake in CI as of 2026-05. - for _ in range(10): + # Step until all camera outputs are non-zero (RTX MDL shaders compile lazily). + # ``simple_shading_*_mdl`` presets can take 10–30 frames to produce non-zero pixels + # on cold-cache CI runners; poll up to ``_MAX_WARMUP_STEPS`` and exit early once ready. + _MAX_WARMUP_STEPS = 60 + for _ in range(_MAX_WARMUP_STEPS): env.step(actions) + outputs_ready = True + for output in env._tiled_camera.data.output.values(): + tensor = output.torch + finite = torch.where(torch.isinf(tensor), torch.zeros_like(tensor), tensor) + if finite.max() <= 0.2: + outputs_ready = False + break + if outputs_ready: + break yield renderer_preset, camera_preset, physics, env env.close() From 3de115adacc6e360ced6d34725c8cc4d698b0b61 Mon Sep 17 00:00:00 2001 From: jichuanh Date: Tue, 19 May 2026 11:43:56 +0000 Subject: [PATCH 5/6] Add poll-until-nonzero warmup in goldenfile helpers The three rendering_test_utils helpers (rendering_test_shadow_hand / _cartpole / _dexsuite_kuka) now drive sim.render() + scene.update() passes until every camera-output tensor has a non-zero max, with a 30-pass cap and early-exit. Physics is not advanced (no env.step) so the existing goldens at the post-init "first non-zero frame" stay valid. A previous fixed 10-pass over-rendered and broke dexsuite_kuka goldens at 91% pixel diff. --- ...ichuanh-stabilize-rendering-mdl-warmup.rst | 27 ++++++++----- .../test/rendering_test_utils.py | 40 +++++++++++++++++++ 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst index f5250cfa0cef..aa36cba7a8ba 100644 --- a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst +++ b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst @@ -1,13 +1,20 @@ Fixed ^^^^^ -* Stabilized ``test_camera_renders_not_empty`` in - ``test_shadow_hand_vision_presets.py`` by polling the camera output until - all data-type tensors are non-zero, with a 60-step cap, instead of a - single ``env.step()``. The test intermittently failed with "Camera output - is all zeros or all inf" for ``simple_shading_*_mdl`` and - ``simple_shading_constant_diffuse`` variants on cold-cache CI runners - because the GPU returned a still-zero framebuffer before the MDL material - finished compiling. The three goldenfile-comparing helpers in - ``rendering_test_utils.py`` already use ``flaky(max_runs=3)`` and are left - untouched. +* Stabilized RTX MDL shader-warmup flakes in the rendering-correctness tests + by polling the camera output until every data-type tensor reports a + non-zero max before the assertion / golden compare, instead of relying on + a single render pass: + + * ``test_camera_renders_not_empty`` in + ``test_shadow_hand_vision_presets.py`` polls via ``env.step()`` with a + 60-step cap. + * The three goldenfile helpers in ``rendering_test_utils.py`` + (``rendering_test_shadow_hand`` / ``_cartpole`` / ``_dexsuite_kuka``) + poll via ``sim.render()`` + ``scene.update()`` with a 30-pass cap, so + physics state is not advanced and the existing goldens stay valid. + + All variants intermittently failed with "Camera output is all zeros or + all inf" for ``simple_shading_*_mdl`` and ``simple_shading_constant_diffuse`` + on cold-cache CI runners because the GPU returned a still-zero + framebuffer before the MDL material finished compiling. diff --git a/source/isaaclab_tasks/test/rendering_test_utils.py b/source/isaaclab_tasks/test/rendering_test_utils.py index d4fbf368ea7c..a92055d9a4e1 100644 --- a/source/isaaclab_tasks/test/rendering_test_utils.py +++ b/source/isaaclab_tasks/test/rendering_test_utils.py @@ -675,6 +675,43 @@ def validate_camera_outputs( pytest.fail(reason) +def _warmup_render_until_nonzero(env, max_passes: int = 30) -> None: + """Drive extra render passes until every camera output tensor has a non-zero max. + + RTX MDL materials compile asynchronously on first use. The single render pass that env + construction triggers (via ``scene.update`` in ``__init__``) is not always enough — on + cold-cache CI runners the GPU returns a still-zero framebuffer for shader variants that + have not finished compiling, which trips :func:`validate_camera_outputs` with + "no non-zero pixels" or "all zeros or all inf". + + Polling exits as soon as every camera output is ready, so the scene state stays at the + same "first non-zero frame" the existing goldens were captured at. Renders are driven by + ``sim.render()`` + ``scene.update()`` (no ``env.step()``) so physics state is not + advanced. This mirrors the pattern used by + :attr:`~isaaclab.envs.DirectRLEnvCfg.num_rerenders_on_reset` and + :attr:`~isaaclab.envs.DirectRLEnvCfg.wait_for_textures` in the core env classes. + """ + camera = getattr(env, "_tiled_camera", None) + if camera is None: + camera = env.scene.sensors.get("base_camera") + if camera is None: + return + + physics_dt = env.physics_dt + for _ in range(max_passes): + outputs_ready = True + for output in camera.data.output.values(): + tensor = output if isinstance(output, torch.Tensor) else output.torch + finite = torch.where(torch.isinf(tensor), torch.zeros_like(tensor), tensor) + if finite.max() <= 0.2: + outputs_ready = False + break + if outputs_ready: + return + env.sim.render() + env.scene.update(dt=physics_dt) + + def rendering_test_shadow_hand( physics_backend: str, renderer: str, @@ -700,6 +737,7 @@ def rendering_test_shadow_hand( try: env = ShadowHandVisionEnv(env_cfg) maybe_save_stage("shadow_hand", physics_backend, renderer, data_type) + _warmup_render_until_nonzero(env) validate_camera_outputs( "shadow_hand", @@ -739,6 +777,7 @@ def rendering_test_cartpole( try: env = CartpoleCameraEnv(env_cfg) maybe_save_stage("cartpole", physics_backend, renderer, data_type) + _warmup_render_until_nonzero(env) validate_camera_outputs( "cartpole", physics_backend, @@ -795,6 +834,7 @@ def rendering_test_dexsuite_kuka( try: env = ManagerBasedRLEnv(env_cfg) maybe_save_stage("dexsuite_kuka", physics_backend, renderer, data_type) + _warmup_render_until_nonzero(env) validate_camera_outputs( "dexsuite_kuka", physics_backend, From eb23b0c33e1b5b7e014d09d97b3ee537dfbd8095 Mon Sep 17 00:00:00 2001 From: jichuanh Date: Tue, 19 May 2026 13:39:34 +0000 Subject: [PATCH 6/6] Apply warmup to registered-tasks rendering test Extend warmup_render_until_nonzero to iterate every sensor in env.scene.sensors (instead of hardcoded _tiled_camera / base_camera) and call it from test_rendering_registered_tasks.py. Also rename the helper from _warmup_render_until_nonzero (module-private) to warmup_render_until_nonzero now that it has cross-module callers. --- ...ichuanh-stabilize-rendering-mdl-warmup.rst | 23 ++++++----- .../test/rendering_test_utils.py | 40 +++++++++++-------- .../test/test_rendering_registered_tasks.py | 2 + 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst index aa36cba7a8ba..904156117865 100644 --- a/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst +++ b/source/isaaclab_tasks/changelog.d/jichuanh-stabilize-rendering-mdl-warmup.rst @@ -3,18 +3,21 @@ Fixed * Stabilized RTX MDL shader-warmup flakes in the rendering-correctness tests by polling the camera output until every data-type tensor reports a - non-zero max before the assertion / golden compare, instead of relying on - a single render pass: + non-zero max before the assertion / golden compare: * ``test_camera_renders_not_empty`` in ``test_shadow_hand_vision_presets.py`` polls via ``env.step()`` with a 60-step cap. - * The three goldenfile helpers in ``rendering_test_utils.py`` - (``rendering_test_shadow_hand`` / ``_cartpole`` / ``_dexsuite_kuka``) - poll via ``sim.render()`` + ``scene.update()`` with a 30-pass cap, so - physics state is not advanced and the existing goldens stay valid. + * ``rendering_test_utils.warmup_render_until_nonzero`` is invoked from the + four rendering helpers in ``rendering_test_utils.py`` + (``rendering_test_shadow_hand`` / ``_cartpole`` / ``_dexsuite_kuka``) and + from ``test_rendering_registered_tasks.py``. It iterates over every + sensor in ``env.scene.sensors`` and polls via ``sim.render()`` + + ``scene.update()`` with a 30-pass cap. Physics state is not advanced, so + the existing golden images stay valid. - All variants intermittently failed with "Camera output is all zeros or - all inf" for ``simple_shading_*_mdl`` and ``simple_shading_constant_diffuse`` - on cold-cache CI runners because the GPU returned a still-zero - framebuffer before the MDL material finished compiling. + All affected variants intermittently failed with "Camera output is all + zeros or all inf" for ``simple_shading_*_mdl`` and + ``simple_shading_constant_diffuse`` on cold-cache CI runners because the + GPU returned a still-zero framebuffer before the MDL material finished + compiling. diff --git a/source/isaaclab_tasks/test/rendering_test_utils.py b/source/isaaclab_tasks/test/rendering_test_utils.py index a92055d9a4e1..499b72e3972a 100644 --- a/source/isaaclab_tasks/test/rendering_test_utils.py +++ b/source/isaaclab_tasks/test/rendering_test_utils.py @@ -675,7 +675,7 @@ def validate_camera_outputs( pytest.fail(reason) -def _warmup_render_until_nonzero(env, max_passes: int = 30) -> None: +def warmup_render_until_nonzero(env, max_passes: int = 30) -> None: """Drive extra render passes until every camera output tensor has a non-zero max. RTX MDL materials compile asynchronously on first use. The single render pass that env @@ -691,25 +691,33 @@ def _warmup_render_until_nonzero(env, max_passes: int = 30) -> None: :attr:`~isaaclab.envs.DirectRLEnvCfg.num_rerenders_on_reset` and :attr:`~isaaclab.envs.DirectRLEnvCfg.wait_for_textures` in the core env classes. """ - camera = getattr(env, "_tiled_camera", None) - if camera is None: - camera = env.scene.sensors.get("base_camera") - if camera is None: + base = getattr(env, "unwrapped", env) + scene = getattr(base, "scene", None) + if scene is None or not getattr(scene, "sensors", None): return - physics_dt = env.physics_dt + physics_dt = base.physics_dt for _ in range(max_passes): outputs_ready = True - for output in camera.data.output.values(): - tensor = output if isinstance(output, torch.Tensor) else output.torch - finite = torch.where(torch.isinf(tensor), torch.zeros_like(tensor), tensor) - if finite.max() <= 0.2: - outputs_ready = False + for sensor in scene.sensors.values(): + data = getattr(sensor, "data", None) + output = getattr(data, "output", None) if data is not None else None + if not isinstance(output, dict): + continue + for value in output.values(): + tensor = value if isinstance(value, torch.Tensor) else getattr(value, "torch", None) + if tensor is None: + continue + finite = torch.where(torch.isinf(tensor), torch.zeros_like(tensor), tensor) + if finite.max() <= 0.2: + outputs_ready = False + break + if not outputs_ready: break if outputs_ready: return - env.sim.render() - env.scene.update(dt=physics_dt) + base.sim.render() + scene.update(dt=physics_dt) def rendering_test_shadow_hand( @@ -737,7 +745,7 @@ def rendering_test_shadow_hand( try: env = ShadowHandVisionEnv(env_cfg) maybe_save_stage("shadow_hand", physics_backend, renderer, data_type) - _warmup_render_until_nonzero(env) + warmup_render_until_nonzero(env) validate_camera_outputs( "shadow_hand", @@ -777,7 +785,7 @@ def rendering_test_cartpole( try: env = CartpoleCameraEnv(env_cfg) maybe_save_stage("cartpole", physics_backend, renderer, data_type) - _warmup_render_until_nonzero(env) + warmup_render_until_nonzero(env) validate_camera_outputs( "cartpole", physics_backend, @@ -834,7 +842,7 @@ def rendering_test_dexsuite_kuka( try: env = ManagerBasedRLEnv(env_cfg) maybe_save_stage("dexsuite_kuka", physics_backend, renderer, data_type) - _warmup_render_until_nonzero(env) + warmup_render_until_nonzero(env) validate_camera_outputs( "dexsuite_kuka", physics_backend, diff --git a/source/isaaclab_tasks/test/test_rendering_registered_tasks.py b/source/isaaclab_tasks/test/test_rendering_registered_tasks.py index 94eb6e07232a..a5b7da879ed0 100644 --- a/source/isaaclab_tasks/test/test_rendering_registered_tasks.py +++ b/source/isaaclab_tasks/test/test_rendering_registered_tasks.py @@ -24,6 +24,7 @@ make_generate_html_report_fixture, maybe_save_stage, validate_camera_outputs, + warmup_render_until_nonzero, ) pytestmark = pytest.mark.isaacsim_ci @@ -99,6 +100,7 @@ def test_rendering_registered_tasks(task_id: str, env_name: str): sim._app_control_on_stop_handle = None maybe_save_stage(f"registered_tasks_{task_id}", "default_physics", "default_renderer", "stage") + warmup_render_until_nonzero(env) camera_outputs_nested_dict = _collect_camera_outputs(env) num_camera_outputs = len(camera_outputs_nested_dict)