diff --git a/mani_skill/envs/sapien_env.py b/mani_skill/envs/sapien_env.py
index e70b888045..3df0ed0d46 100644
--- a/mani_skill/envs/sapien_env.py
+++ b/mani_skill/envs/sapien_env.py
@@ -39,6 +39,9 @@
 from mani_skill.utils.structs import Actor, Articulation
 from mani_skill.utils.structs.pose import Pose
 from mani_skill.utils.structs.types import Array, SimConfig
+from mani_skill.utils.visualization.viewer_camera_control import (
+    ViewerCameraControlPlugin,
+)
 from mani_skill.utils.visualization.misc import tile_images
 
 
@@ -115,6 +118,9 @@ class BaseEnv(gym.Env):
         enhanced_determinism (bool): By default this is False and env resets will reset the episode RNG only when a seed / seed list is given.
             If True, the environment will reset the episode RNG upon each reset regardless of whether a seed is provided.
             Generally enhanced_determinisim is not needed and users are recommended to pass seeds into the env reset function instead.
+
+        enable_camera_pose_editing (bool): If True, the human viewer exposes an in-window camera editor that lets you click camera frustums
+            and drag a gizmo to update their poses live.
     """
 
     # fmt: off
@@ -208,8 +214,10 @@ def __init__(
         render_backend: str = "gpu",
         parallel_in_single_scene: bool = False,
         enhanced_determinism: bool = False,
+        enable_camera_pose_editing: bool = False,
     ):
         self._enhanced_determinism = enhanced_determinism
+        self._enable_camera_pose_editing = enable_camera_pose_editing
 
         self.num_envs = num_envs
         self.reconfiguration_freq = reconfiguration_freq if reconfiguration_freq is not None else 0
@@ -312,6 +320,7 @@ def __init__(
         # Render mode
         self.render_mode = render_mode
         self._viewer = None
+        self._viewer_camera_control_plugin = None
 
         # Lighting
         self.enable_shadow = enable_shadow
@@ -1250,6 +1259,7 @@ def _close_viewer(self):
             return
         self._viewer.close()
         self._viewer = None
+        self._viewer_camera_control_plugin = None
 
     @cached_property
     def segmentation_id_map(self):
@@ -1331,6 +1341,22 @@ def set_state(self, state: Array, env_idx: torch.Tensor = None):
     def viewer(self):
         return self._viewer
 
+    def _ensure_viewer_camera_control_plugin(self):
+        if self._viewer is None:
+            return
+        existing_plugin = sapien_utils.get_obj_by_type(
+            self._viewer.plugins, ViewerCameraControlPlugin, is_unique=False
+        )
+        if isinstance(existing_plugin, list):
+            self._viewer_camera_control_plugin = existing_plugin[0]
+        elif existing_plugin is not None:
+            self._viewer_camera_control_plugin = existing_plugin
+        if self._viewer_camera_control_plugin is None:
+            self._viewer_camera_control_plugin = ViewerCameraControlPlugin(self)
+            self._viewer.plugins.append(self._viewer_camera_control_plugin)
+            self._viewer_camera_control_plugin.init(self._viewer)
+        self._viewer_camera_control_plugin.notify_scene_change()
+
     def _setup_viewer(self):
         """Setup the interactive viewer.
 
@@ -1347,6 +1373,8 @@ def _setup_viewer(self):
         )
         control_window.show_joint_axes = False
         control_window.show_camera_linesets = False
+        if self._enable_camera_pose_editing:
+            self._ensure_viewer_camera_control_plugin()
         if "render_camera" in self._human_render_cameras:
             self._viewer.set_camera_pose(
                 self._human_render_cameras["render_camera"].camera.global_pose[0].sp
diff --git a/mani_skill/examples/demo_random_action.py b/mani_skill/examples/demo_random_action.py
index 90d06b0be9..7bc9c8397d 100644
--- a/mani_skill/examples/demo_random_action.py
+++ b/mani_skill/examples/demo_random_action.py
@@ -49,6 +49,9 @@ class Args:
     pause: Annotated[bool, tyro.conf.arg(aliases=["-p"])] = False
     """If using human render mode, auto pauses the simulation upon loading"""
 
+    edit_camera_poses: bool = False
+    """Enable in-viewer camera pose editing when using the human render mode"""
+
     quiet: bool = False
     """Disable verbose output."""
 
@@ -83,6 +86,7 @@ def main(args: Args):
         render_backend=args.render_backend,
         enable_shadow=True,
         parallel_in_single_scene=parallel_in_single_scene,
+        enable_camera_pose_editing=args.edit_camera_poses,
     )
     if args.robot_uids is not None:
         env_kwargs["robot_uids"] = tuple(args.robot_uids.split(","))
@@ -107,12 +111,19 @@ def main(args: Args):
     obs, _ = env.reset(seed=args.seed, options=dict(reconfigure=True))
     if args.seed is not None and env.action_space is not None:
             env.action_space.seed(args.seed[0])
+    pause_after_viewer_boot = False
     if args.render_mode == "human":
         viewer = env.render()
-        if isinstance(viewer, sapien.utils.Viewer):
-            viewer.paused = args.pause
-        env.render()
+        pause_after_viewer_boot = args.pause and isinstance(viewer, sapien.utils.Viewer)
+        if args.edit_camera_poses:
+            print("Camera pose editing enabled: click a camera frustum in the viewer and drag the gizmo in the Camera Editor window.")
     while True:
+        if args.render_mode == "human":
+            viewer = env.render()
+            if isinstance(viewer, sapien.utils.Viewer) and pause_after_viewer_boot:
+                viewer.paused = True
+                pause_after_viewer_boot = False
+                continue
         action = env.action_space.sample() if env.action_space is not None else None
         obs, reward, terminated, truncated, info = env.step(action)
         if verbose:
@@ -120,8 +131,6 @@ def main(args: Args):
             print("terminated", terminated)
             print("truncated", truncated)
             print("info", info)
-        if args.render_mode == "human":
-            env.render()
         if args.render_mode is None or args.render_mode != "human":
             if (terminated | truncated).any():
                 break
diff --git a/mani_skill/examples/teleoperation/teleop.py b/mani_skill/examples/teleoperation/teleop.py
new file mode 100644
index 0000000000..4fc19aa060
--- /dev/null
+++ b/mani_skill/examples/teleoperation/teleop.py
@@ -0,0 +1,566 @@
+"""
+Generic cross-platform teleoperation script.
+
+Works with any robot that defines `ee_link_name` and `arm_joint_names`.
+Uses Pinocchio-based IK (no mplib dependency), so runs on macOS + Linux.
+
+Usage:
+    python -m mani_skill.examples.teleoperation.teleop -e PickCube-v1 -r panda
+    python -m mani_skill.examples.teleoperation.teleop -e PickCube-v1 -r xarm6_robotiq
+    python -m mani_skill.examples.teleoperation.teleop -e PickCube-v1 -r so100
+"""
+
+from dataclasses import dataclass, field
+from typing import Annotated, Optional
+
+import platform
+import gymnasium as gym
+import h5py
+import json
+import numpy as np
+import sapien.core as sapien
+import sapien.internal_renderer as R
+import sapien.utils.viewer
+from sapien.utils.viewer.plugin import Plugin
+import torch
+import tyro
+
+_IS_MACOS = platform.system() == "Darwin"
+_DEFAULT_SHADER = "default" if _IS_MACOS else "rt-fast"
+
+from mani_skill import format_path
+from mani_skill.agents.controllers.base_controller import CombinedController
+from mani_skill.agents.controllers.utils.kinematics import Kinematics
+from mani_skill.envs.sapien_env import BaseEnv
+from mani_skill.utils import sapien_utils
+from mani_skill.utils.structs.pose import Pose as MSPose
+from mani_skill.utils.wrappers.record import RecordEpisode
+import mani_skill.trajectory.utils as trajectory_utils
+
+
+class TeleopPlugin(Plugin):
+    """Custom viewer plugin that locks selection to the gizmo link and provides a gripper slider."""
+
+    def __init__(self):
+        self._locked_entity = None
+        self._ui_window = None
+        self.gripper_openness = 1.0  # 1.0 = fully open, 0.0 = fully closed
+        self._has_gripper = False
+
+    def set_locked_entity(self, entity):
+        self._locked_entity = entity
+
+    def set_has_gripper(self, has_gripper):
+        self._has_gripper = has_gripper
+        self._ui_window = None  # force rebuild
+
+    def notify_selected_entity_change(self):
+        if self._locked_entity is not None and self.viewer.selected_entity != self._locked_entity:
+            self.viewer.select_entity(self._locked_entity)
+
+    def _build_ui(self):
+        children = [R.UIDisplayText().Text("Teleop Controls")]
+        if self._has_gripper:
+            children.append(
+                R.UISliderFloat()
+                .Label("Gripper Opening")
+                .Min(0.0)
+                .Max(1.0)
+                .Bind(self, "gripper_openness")
+            )
+        children.append(R.UIDisplayText().Text("Keys: h=help n=move g=gripper r=reset c=next q=quit"))
+        self._ui_window = R.UIWindow().Label("Teleop").Pos(10, 10).Size(300, 120)
+        for child in children:
+            self._ui_window.append(child)
+
+    def get_ui_windows(self):
+        if self._ui_window is None:
+            self._build_ui()
+        return [self._ui_window]
+
+
+@dataclass
+class Args:
+    env_id: Annotated[str, tyro.conf.arg(aliases=["-e"])] = "PickCube-v1"
+    robot_uid: Annotated[str, tyro.conf.arg(aliases=["-r"])] = "panda"
+    """The robot to use. Any robot with ee_link_name and arm_joint_names is supported."""
+    ee_link: Optional[str] = None
+    """Override the end-effector link name. If not set, uses agent.ee_link_name."""
+    obs_mode: str = "none"
+    record_dir: str = "demos"
+    """Directory to record demonstration data and optionally videos."""
+    save_video: bool = False
+    """Whether to save videos of the demonstrations after collecting them."""
+    viewer_shader: str = _DEFAULT_SHADER
+    """Shader for the viewer. 'default' is fast, 'rt' and 'rt-fast' are ray tracing. Defaults to 'default' on macOS."""
+    video_saving_shader: str = _DEFAULT_SHADER
+    """Shader for saved videos. 'default' is fast, 'rt' and 'rt-fast' are ray tracing. Defaults to 'default' on macOS."""
+    step_size: float = 0.01
+    """Translation step size for keyboard nudge controls."""
+
+
+def parse_args() -> Args:
+    return tyro.cli(Args)
+
+
+def _get_robot_info(env, ee_link_override=None):
+    """Extract robot introspection data from the environment agent."""
+    agent = env.unwrapped.agent
+
+    # EE link name
+    ee_link_name = ee_link_override
+    if ee_link_name is None:
+        if hasattr(agent, "ee_link_name") and agent.ee_link_name:
+            ee_link_name = agent.ee_link_name
+        else:
+            link_names = [l.name for l in agent.robot.links]
+            print("ERROR: Robot does not define ee_link_name. Use --ee-link to specify one.")
+            print(f"Available links: {link_names}")
+            raise SystemExit(1)
+
+    # Arm joint names
+    if not hasattr(agent, "arm_joint_names") or not agent.arm_joint_names:
+        print("ERROR: Robot does not define arm_joint_names. This script requires it.")
+        raise SystemExit(1)
+    arm_joint_names = agent.arm_joint_names
+
+    # Arm joint indices (indices into the active joints array)
+    arm_joint_indices = []
+    for name in arm_joint_names:
+        joint = agent.robot.active_joints_map[name]
+        arm_joint_indices.append(joint.active_index[0].item())
+
+    # Gripper detection
+    has_gripper = (
+        hasattr(agent, "gripper_joint_names")
+        and agent.gripper_joint_names
+        and len(agent.gripper_joint_names) > 0
+    )
+    gripper_joint_names = agent.gripper_joint_names if has_gripper else []
+
+    # Gripper open/close values from joint limits
+    gripper_open_values = []
+    gripper_close_values = []
+    if has_gripper:
+        qlimits = agent.robot.get_qlimits()[0]  # (num_joints, 2), take first env
+        for name in gripper_joint_names:
+            joint = agent.robot.active_joints_map[name]
+            idx = joint.active_index[0].item()
+            lower = qlimits[idx, 0].item()
+            upper = qlimits[idx, 1].item()
+            gripper_open_values.append(upper)
+            gripper_close_values.append(lower)
+
+    # Action layout: CombinedController vs flat
+    controller = agent.controller
+    is_combined = isinstance(controller, CombinedController)
+    action_mapping = controller.action_mapping if is_combined else None
+
+    # URDF path
+    urdf_path = format_path(str(agent.urdf_path))
+
+    return dict(
+        ee_link_name=ee_link_name,
+        arm_joint_names=arm_joint_names,
+        arm_joint_indices=arm_joint_indices,
+        has_gripper=has_gripper,
+        gripper_joint_names=gripper_joint_names,
+        gripper_open_values=gripper_open_values,
+        gripper_close_values=gripper_close_values,
+        is_combined=is_combined,
+        action_mapping=action_mapping,
+        urdf_path=urdf_path,
+    )
+
+
+def _find_gizmo_link(agent, ee_link_name):
+    """Find a visible link for the gizmo, walking up kinematic chain if needed.
+
+    Returns (gizmo_link, ee_in_gizmo_offset) where ee_in_gizmo_offset is the
+    static transform from the gizmo link frame to the EE link frame.
+    """
+    ee_link = sapien_utils.get_obj_by_name(agent.robot.links, ee_link_name)
+
+    def _has_visual(link):
+        """Check if a link has visual geometry."""
+        try:
+            entity = link._objs[0].entity
+            for comp in entity.components:
+                # Check for render body component (visual geometry)
+                if hasattr(comp, "render_shapes") or "RenderBody" in type(comp).__name__:
+                    return True
+        except (IndexError, AttributeError):
+            pass
+        return False
+
+    # Walk up the kinematic chain to find a link with visuals
+    gizmo_link = ee_link
+    while gizmo_link is not None and not _has_visual(gizmo_link):
+        parent = gizmo_link.joint.parent_link
+        if parent is None:
+            break
+        gizmo_link = parent
+
+    # If we couldn't find any visual link, just use the EE link
+    if gizmo_link is None:
+        gizmo_link = ee_link
+
+    # Compute static offset: ee pose expressed in gizmo link frame
+    # Convert ManiSkill batched Pose to a sapien.Pose for use with the viewer gizmo
+    ms_offset = gizmo_link.pose.inv() * ee_link.pose
+    p = ms_offset.p[0].cpu().numpy()
+    q = ms_offset.q[0].cpu().numpy()
+    ee_in_gizmo = sapien.Pose(p=p, q=q)
+
+    return gizmo_link, ee_in_gizmo
+
+
+def _build_action(env, robot_info, arm_qpos, gripper_values):
+    """Construct a full action vector for env.step().
+
+    Args:
+        env: the gym environment
+        robot_info: dict from _get_robot_info
+        arm_qpos: numpy array of target arm joint positions
+        gripper_values: list of gripper joint target values (or empty if no gripper)
+    """
+    action_dim = env.action_space.shape[0]
+    action = np.zeros(action_dim, dtype=np.float32)
+
+    if robot_info["is_combined"]:
+        mapping = robot_info["action_mapping"]
+        # Fill arm action
+        if "arm" in mapping:
+            start, end = mapping["arm"]
+            action[start:end] = arm_qpos
+        # Fill gripper action(s)
+        if robot_info["has_gripper"]:
+            gripper_idx = 0
+            for uid, (start, end) in mapping.items():
+                if "gripper" in uid.lower() and "passive" not in uid.lower():
+                    size = end - start
+                    action[start:end] = gripper_values[gripper_idx:gripper_idx + size]
+                    gripper_idx += size
+    else:
+        # Flat controller: arm joints first, then gripper
+        n_arm = len(robot_info["arm_joint_names"])
+        action[:n_arm] = arm_qpos
+        if robot_info["has_gripper"] and len(gripper_values) > 0:
+            action[n_arm:n_arm + len(gripper_values)] = gripper_values
+
+    return action
+
+
+def _compute_gripper_values(robot_info, openness):
+    """Compute gripper joint values from a 0-1 openness fraction."""
+    open_vals = robot_info["gripper_open_values"]
+    close_vals = robot_info["gripper_close_values"]
+    return [c + openness * (o - c) for o, c in zip(open_vals, close_vals)]
+
+
+def _snap_gizmo_to_current(agent, gizmo_link, transform_window):
+    """Snap the gizmo to the current gizmo link pose (e.g. after reset)."""
+    p = gizmo_link.pose.p[0].cpu().numpy()
+    q = gizmo_link.pose.q[0].cpu().numpy()
+    current_pose = sapien.Pose(p=p, q=q)
+    transform_window.gizmo_matrix = current_pose.to_transformation_matrix()
+    transform_window.update_ghost_objects()
+
+
+def solve(env: BaseEnv, robot_info: dict, step_size: float, teleop_plugin: TeleopPlugin):
+    """Interactive teleoperation loop.
+
+    Returns "quit" or "continue" to signal the outer loop.
+    """
+    base_env = env.unwrapped
+    agent = base_env.agent
+    ee_link_name = robot_info["ee_link_name"]
+
+    # Setup IK solver
+    kinematics = Kinematics(
+        urdf_path=robot_info["urdf_path"],
+        end_link_name=ee_link_name,
+        articulation=agent.robot,
+        active_joint_indices=torch.tensor(robot_info["arm_joint_indices"]),
+    )
+
+    viewer = base_env.render_human()
+
+    # Find gizmo link and compute offset
+    gizmo_link, ee_in_gizmo = _find_gizmo_link(agent, ee_link_name)
+    gizmo_entity = gizmo_link._objs[0].entity
+
+    # Lock selection to the gizmo entity — prevents accidentally clicking joints
+    teleop_plugin.set_locked_entity(gizmo_entity)
+    teleop_plugin.set_has_gripper(robot_info["has_gripper"])
+    teleop_plugin.gripper_openness = 1.0  # start fully open
+    viewer.select_entity(gizmo_entity)
+
+    # Find transform window plugin
+    transform_window = None
+    for plugin in viewer.plugins:
+        if isinstance(plugin, sapien.utils.viewer.viewer.TransformWindow):
+            transform_window = plugin
+            break
+
+    # Store initial qpos for reset
+    initial_qpos = agent.robot.get_qpos()[0].cpu().numpy().copy()
+
+    # Track previous gripper openness to detect slider changes
+    prev_gripper_openness = teleop_plugin.gripper_openness
+
+    while True:
+        transform_window.enabled = True
+        base_env.render_human()
+        execute_current_pose = False
+
+        # --- Gripper slider: detect changes and actuate ---
+        if robot_info["has_gripper"]:
+            cur_openness = teleop_plugin.gripper_openness
+            if abs(cur_openness - prev_gripper_openness) > 1e-4:
+                gripper_vals = _compute_gripper_values(robot_info, cur_openness)
+                qpos = agent.robot.get_qpos()[0].cpu().numpy()
+                arm_qpos = np.array([qpos[i] for i in robot_info["arm_joint_indices"]])
+                for _ in range(4):
+                    action = _build_action(env, robot_info, arm_qpos, gripper_vals)
+                    env.step(action)
+                    base_env.render_human()
+                prev_gripper_openness = cur_openness
+
+        if viewer.window.key_press("h"):
+            print("""Available commands:
+            h: print this help menu
+            u: move EE gizmo up
+            j: move EE gizmo down
+            arrow keys: move EE gizmo in X/Y directions
+            n: execute IK motion to move robot to gizmo target pose
+            g: toggle gripper open/close (if robot has one)
+            r: reset robot to initial pose
+            c: save episode and start a new one
+            q: save and quit
+            Gripper slider in the Teleop panel for fine control.""")
+        elif viewer.window.key_press("q"):
+            return "quit"
+        elif viewer.window.key_press("c"):
+            return "continue"
+        elif viewer.window.key_press("n"):
+            execute_current_pose = True
+        elif viewer.window.key_press("r"):
+            # Reset robot to initial pose
+            arm_qpos = np.array([initial_qpos[i] for i in robot_info["arm_joint_indices"]])
+            if robot_info["has_gripper"]:
+                gripper_vals = _compute_gripper_values(robot_info, 1.0)
+                teleop_plugin.gripper_openness = 1.0
+                prev_gripper_openness = 1.0
+            else:
+                gripper_vals = []
+            for _ in range(20):
+                action = _build_action(env, robot_info, arm_qpos, gripper_vals)
+                env.step(action)
+                base_env.render_human()
+            # Snap gizmo to new EE position
+            _snap_gizmo_to_current(agent, gizmo_link, transform_window)
+            print("Reset to initial pose")
+        elif viewer.window.key_press("g") and robot_info["has_gripper"]:
+            # Toggle gripper via key (snap to fully open or fully closed)
+            if teleop_plugin.gripper_openness > 0.5:
+                teleop_plugin.gripper_openness = 0.0
+            else:
+                teleop_plugin.gripper_openness = 1.0
+            gripper_vals = _compute_gripper_values(robot_info, teleop_plugin.gripper_openness)
+            prev_gripper_openness = teleop_plugin.gripper_openness
+
+            qpos = agent.robot.get_qpos()[0].cpu().numpy()
+            arm_qpos = np.array([qpos[i] for i in robot_info["arm_joint_indices"]])
+            for _ in range(6):
+                action = _build_action(env, robot_info, arm_qpos, gripper_vals)
+                env.step(action)
+                base_env.render_human()
+            print(f"Gripper {'opened' if teleop_plugin.gripper_openness > 0.5 else 'closed'}")
+        elif viewer.window.key_press("u"):
+            transform_window.gizmo_matrix = (
+                transform_window._gizmo_pose * sapien.Pose(p=[0, 0, -step_size])
+            ).to_transformation_matrix()
+            transform_window.update_ghost_objects()
+        elif viewer.window.key_press("j"):
+            transform_window.gizmo_matrix = (
+                transform_window._gizmo_pose * sapien.Pose(p=[0, 0, +step_size])
+            ).to_transformation_matrix()
+            transform_window.update_ghost_objects()
+        elif viewer.window.key_press("down"):
+            transform_window.gizmo_matrix = (
+                transform_window._gizmo_pose * sapien.Pose(p=[+step_size, 0, 0])
+            ).to_transformation_matrix()
+            transform_window.update_ghost_objects()
+        elif viewer.window.key_press("up"):
+            transform_window.gizmo_matrix = (
+                transform_window._gizmo_pose * sapien.Pose(p=[-step_size, 0, 0])
+            ).to_transformation_matrix()
+            transform_window.update_ghost_objects()
+        elif viewer.window.key_press("right"):
+            transform_window.gizmo_matrix = (
+                transform_window._gizmo_pose * sapien.Pose(p=[0, -step_size, 0])
+            ).to_transformation_matrix()
+            transform_window.update_ghost_objects()
+        elif viewer.window.key_press("left"):
+            transform_window.gizmo_matrix = (
+                transform_window._gizmo_pose * sapien.Pose(p=[0, +step_size, 0])
+            ).to_transformation_matrix()
+            transform_window.update_ghost_objects()
+
+        if execute_current_pose:
+            # Compute target EE pose from gizmo pose + offset (world frame)
+            gizmo_pose = transform_window._gizmo_pose
+            target_ee_pose_world = gizmo_pose * ee_in_gizmo
+
+            # Transform from world frame to robot base frame for Pinocchio IK
+            robot_pose = agent.robot.pose
+            robot_pose_sp = sapien.Pose(
+                p=robot_pose.p[0].cpu().numpy(),
+                q=robot_pose.q[0].cpu().numpy(),
+            )
+            target_ee_pose_base = robot_pose_sp.inv() * target_ee_pose_world
+
+            # Run IK
+            q0 = agent.robot.get_qpos()
+            target_pose_ms = MSPose.create_from_pq(
+                p=torch.tensor([[target_ee_pose_base.p[0], target_ee_pose_base.p[1], target_ee_pose_base.p[2]]], dtype=torch.float32),
+                q=torch.tensor([[target_ee_pose_base.q[0], target_ee_pose_base.q[1], target_ee_pose_base.q[2], target_ee_pose_base.q[3]]], dtype=torch.float32),
+            )
+            ik_result = kinematics.compute_ik(
+                pose=target_pose_ms,
+                q0=q0,
+            )
+
+            if ik_result is None:
+                print("IK failed — try a closer target")
+            else:
+                target_arm_qpos = ik_result[0].cpu().numpy()
+
+                # Get current arm qpos
+                current_qpos = q0[0].cpu().numpy()
+                current_arm_qpos = np.array([current_qpos[i] for i in robot_info["arm_joint_indices"]])
+
+                # Current gripper from slider
+                if robot_info["has_gripper"]:
+                    gripper_vals = _compute_gripper_values(robot_info, teleop_plugin.gripper_openness)
+                else:
+                    gripper_vals = []
+
+                # Interpolate from current to target
+                max_delta = np.max(np.abs(target_arm_qpos - current_arm_qpos))
+                n_steps = max(1, min(100, int(max_delta / 0.02)))
+
+                for step_i in range(1, n_steps + 1):
+                    t = step_i / n_steps
+                    interp_arm = current_arm_qpos + t * (target_arm_qpos - current_arm_qpos)
+                    action = _build_action(env, robot_info, interp_arm, gripper_vals)
+                    env.step(action)
+                    base_env.render_human()
+
+                print(f"Moved to target in {n_steps} steps")
+
+            execute_current_pose = False
+
+
+def main(args: Args):
+    output_dir = f"{args.record_dir}/{args.env_id}/teleop/"
+    env = gym.make(
+        args.env_id,
+        obs_mode=args.obs_mode,
+        control_mode="pd_joint_pos",
+        render_mode="rgb_array",
+        reward_mode="none",
+        robot_uids=args.robot_uid,
+        enable_shadow=True,
+        viewer_camera_configs=dict(shader_pack=args.viewer_shader),
+    )
+    env = RecordEpisode(
+        env,
+        output_dir=output_dir,
+        trajectory_name="trajectory",
+        save_video=False,
+        info_on_video=False,
+        source_type="teleoperation",
+        source_desc="teleoperation via the click+drag system",
+    )
+
+    seed = 0
+    num_trajs = 0
+    env.reset(seed=seed)
+
+    # Extract robot info once after first reset
+    robot_info = _get_robot_info(env, ee_link_override=args.ee_link)
+    print(f"Robot: {args.robot_uid}")
+    print(f"EE link: {robot_info['ee_link_name']}")
+    print(f"Arm joints: {robot_info['arm_joint_names']}")
+    print(f"Has gripper: {robot_info['has_gripper']}")
+    if robot_info["is_combined"]:
+        print(f"Action mapping: {robot_info['action_mapping']}")
+    print("Press 'h' in the viewer for controls help.\n")
+
+    # Create teleop plugin (will be registered with viewer on first render)
+    teleop_plugin = TeleopPlugin()
+
+    # Do a first render to get the viewer, then register the plugin
+    base_env = env.unwrapped
+    viewer = base_env.render_human()
+    teleop_plugin.init(viewer)
+    viewer.plugins.append(teleop_plugin)
+
+    while True:
+        print(f"Collecting trajectory {num_trajs + 1}, seed={seed}")
+        code = solve(env, robot_info, step_size=args.step_size, teleop_plugin=teleop_plugin)
+        if code == "quit":
+            num_trajs += 1
+            break
+        elif code == "continue":
+            seed += 1
+            num_trajs += 1
+            env.reset(seed=seed)
+            continue
+
+    h5_file_path = env._h5_file.filename
+    json_file_path = env._json_path
+    env.close()
+    del env
+    print(f"Trajectories saved to {h5_file_path}")
+
+    if args.save_video:
+        print(f"Saving videos to {output_dir}")
+        trajectory_data = h5py.File(h5_file_path)
+        with open(json_file_path, "r") as f:
+            json_data = json.load(f)
+        env = gym.make(
+            args.env_id,
+            obs_mode=args.obs_mode,
+            control_mode="pd_joint_pos",
+            render_mode="rgb_array",
+            reward_mode="none",
+            robot_uids=args.robot_uid,
+            human_render_camera_configs=dict(shader_pack=args.video_saving_shader),
+        )
+        env = RecordEpisode(
+            env,
+            output_dir=output_dir,
+            trajectory_name="trajectory",
+            save_video=True,
+            info_on_video=False,
+            save_trajectory=False,
+            video_fps=30,
+        )
+        for episode in json_data["episodes"]:
+            traj_id = f"traj_{episode['episode_id']}"
+            data = trajectory_data[traj_id]
+            env.reset(**episode["reset_kwargs"])
+            env_states_list = trajectory_utils.dict_to_list_of_dicts(data["env_states"])
+            env.base_env.set_state_dict(env_states_list[0])
+            for action in np.array(data["actions"]):
+                env.step(action)
+        trajectory_data.close()
+        env.close()
+        del env
+
+
+if __name__ == "__main__":
+    main(parse_args())
diff --git a/mani_skill/utils/visualization/viewer_camera_control.py b/mani_skill/utils/visualization/viewer_camera_control.py
new file mode 100644
index 0000000000..34e839f3f0
--- /dev/null
+++ b/mani_skill/utils/visualization/viewer_camera_control.py
@@ -0,0 +1,669 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import numpy as np
+import sapien
+from sapien import internal_renderer as R
+from sapien.utils.viewer.plugin import Plugin, copy_to_clipboard
+from transforms3d.quaternions import mat2quat
+
+from mani_skill.sensors.camera import Camera
+from mani_skill.utils import sapien_utils
+from mani_skill.utils.structs.pose import Pose
+
+if TYPE_CHECKING:
+    from mani_skill.envs.sapien_env import BaseEnv
+
+
+_VIEWER_GL_POSE = sapien.Pose([0, 0, 0], [-0.5, -0.5, 0.5, 0.5])
+_CAMERA_LINESET_VERTICES = [
+    0,
+    0,
+    0,
+    1,
+    1,
+    -1,
+    0,
+    0,
+    0,
+    -1,
+    1,
+    -1,
+    0,
+    0,
+    0,
+    1,
+    -1,
+    -1,
+    0,
+    0,
+    0,
+    -1,
+    -1,
+    -1,
+    1,
+    1,
+    -1,
+    1,
+    -1,
+    -1,
+    1,
+    -1,
+    -1,
+    -1,
+    -1,
+    -1,
+    -1,
+    -1,
+    -1,
+    -1,
+    1,
+    -1,
+    -1,
+    1,
+    -1,
+    1,
+    1,
+    -1,
+    1,
+    1.2,
+    -1,
+    0,
+    2,
+    -1,
+    0,
+    2,
+    -1,
+    -1,
+    1.2,
+    -1,
+    -1,
+    1.2,
+    -1,
+    1,
+    1.2,
+    -1,
+]
+_INACTIVE_CAMERA_LINESET_COLORS = [0.9254901961, 0.5764705882, 0.1882352941, 1.0] * 22
+_ACTIVE_CAMERA_LINESET_COLORS = [0.1882352941, 0.7921568627, 0.3882352941, 1.0] * 22
+
+
+class ViewerCameraControlPlugin(Plugin):
+    """Edit ManiSkill cameras directly from the SAPIEN viewer."""
+
+    def __init__(
+        self,
+        env: "BaseEnv",
+        enabled: bool = True,
+        selection_radius_px: float = 18.0,
+    ):
+        self.env = env
+        self.enabled = enabled
+        self.selection_radius_px = selection_radius_px
+        self._active_camera_name: Optional[str] = None
+        self._line_sets = []
+        self._line_set_names: list[str] = []
+        self._line_set_active_name: Optional[str] = None
+        self._line_set_model = None
+        self._active_line_set_model = None
+        self._ui_window = None
+        self._gizmo = None
+        self._target_gizmo = None
+        self._camera_selector = None
+        self._camera_targets: dict[str, np.ndarray] = {}
+        self._camera_ups: dict[str, np.ndarray] = {}
+        self._initial_local_poses: dict[str, sapien.Pose] = {}
+        self._initial_targets: dict[str, np.ndarray] = {}
+        self._initial_ups: dict[str, np.ndarray] = {}
+        self.default_target_distance = 0.25
+
+    def init(self, viewer):
+        super().init(viewer)
+        if hasattr(self.viewer, "register_click_handler"):
+            self.viewer.register_click_handler(self._handle_click)
+        self._create_visual_models()
+
+    def notify_scene_change(self):
+        self._clear_camera_linesets()
+        self._ui_window = None
+        self._camera_targets = {}
+        self._camera_ups = {}
+        self._initial_local_poses = {}
+        self._initial_targets = {}
+        self._initial_ups = {}
+        if self._active_camera_name not in self.camera_names:
+            self._active_camera_name = (
+                self.camera_names[0] if self.camera_names else None
+            )
+        self._capture_initial_camera_state()
+
+    def clear_scene(self):
+        self._clear_camera_linesets()
+
+    def close(self):
+        self._clear_camera_linesets()
+        self._line_set_model = None
+        self._active_line_set_model = None
+        self._ui_window = None
+        self._gizmo = None
+        self._target_gizmo = None
+        self._camera_selector = None
+        self._camera_targets = {}
+        self._camera_ups = {}
+        self._initial_local_poses = {}
+        self._initial_targets = {}
+        self._initial_ups = {}
+
+    @property
+    def editable_cameras(self) -> dict[str, Camera]:
+        cameras = {}
+        for uid, sensor in self.env._sensors.items():
+            if isinstance(sensor, Camera):
+                cameras[f"sensor:{uid}"] = sensor
+        return cameras
+
+    @property
+    def camera_names(self) -> list[str]:
+        return list(self.editable_cameras.keys())
+
+    @property
+    def camera_items(self) -> list[str]:
+        return ["None"] + self.camera_names
+
+    @property
+    def active_camera_name(self) -> Optional[str]:
+        if self._active_camera_name not in self.editable_cameras:
+            self._active_camera_name = (
+                self.camera_names[0] if self.camera_names else None
+            )
+        return self._active_camera_name
+
+    @active_camera_name.setter
+    def active_camera_name(self, value: Optional[str]):
+        self._active_camera_name = value if value in self.editable_cameras else None
+        if self._active_camera_name is not None:
+            self._ensure_look_at_state(self._active_camera_name)
+        self.viewer.notify_render_update()
+
+    @property
+    def active_camera_index(self) -> int:
+        active_camera_name = self.active_camera_name
+        if active_camera_name is None:
+            return 0
+        return self.camera_names.index(active_camera_name) + 1
+
+    @active_camera_index.setter
+    def active_camera_index(self, index: int):
+        if index <= 0 or index > len(self.camera_names):
+            self.active_camera_name = None
+        else:
+            self.active_camera_name = self.camera_names[index - 1]
+
+    @property
+    def active_camera(self) -> Optional[Camera]:
+        active_camera_name = self.active_camera_name
+        if active_camera_name is None:
+            return None
+        return self.editable_cameras[active_camera_name]
+
+    @property
+    def gizmo_matrix(self):
+        if self.active_camera_name is None:
+            return np.eye(4)
+        self._ensure_look_at_state(self.active_camera_name)
+        matrix = np.eye(4, dtype=np.float32)
+        matrix[:3, 3] = self.active_eye_position
+        return matrix
+
+    @gizmo_matrix.setter
+    def gizmo_matrix(self, matrix):
+        if self.active_camera_name is None:
+            return
+        self.active_eye_position = np.asarray(matrix[:3, 3], dtype=np.float32)
+
+    @property
+    def target_gizmo_matrix(self):
+        matrix = np.eye(4, dtype=np.float32)
+        matrix[:3, 3] = self.active_target_position
+        return matrix
+
+    @target_gizmo_matrix.setter
+    def target_gizmo_matrix(self, matrix):
+        self.active_target_position = np.asarray(matrix[:3, 3], dtype=np.float32)
+
+    @property
+    def active_eye_position(self):
+        active_camera = self.active_camera
+        if active_camera is None:
+            return np.zeros(3, dtype=np.float32)
+        pose = self._get_camera_global_pose(active_camera)
+        return np.asarray(pose.p, dtype=np.float32)
+
+    @active_eye_position.setter
+    def active_eye_position(self, value):
+        if self.active_camera_name is None:
+            return
+        self._apply_look_at(self.active_camera_name, eye=np.asarray(value, dtype=np.float32))
+
+    @property
+    def active_target_position(self):
+        if self.active_camera_name is None:
+            return np.zeros(3, dtype=np.float32)
+        self._ensure_look_at_state(self.active_camera_name)
+        return self._camera_targets[self.active_camera_name].copy()
+
+    @active_target_position.setter
+    def active_target_position(self, value):
+        if self.active_camera_name is None:
+            return
+        self._apply_look_at(
+            self.active_camera_name, target=np.asarray(value, dtype=np.float32)
+        )
+
+    @property
+    def active_up_vector(self):
+        if self.active_camera_name is None:
+            return np.array([0, 0, 1], dtype=np.float32)
+        self._ensure_look_at_state(self.active_camera_name)
+        return self._camera_ups[self.active_camera_name].copy()
+
+    @active_up_vector.setter
+    def active_up_vector(self, value):
+        if self.active_camera_name is None:
+            return
+        self._apply_look_at(
+            self.active_camera_name, up=np.asarray(value, dtype=np.float32)
+        )
+
+    def get_ui_windows(self):
+        self._build_ui()
+        if self._ui_window is None:
+            return []
+        return [self._ui_window]
+
+    def after_render(self):
+        if self.viewer.scene is None:
+            return
+        self._update_camera_linesets()
+
+    def look_through_active_camera(self, _=None):
+        active_camera = self.active_camera
+        if active_camera is None:
+            return
+        self.viewer.set_camera_pose(self._get_camera_global_pose(active_camera))
+
+    def set_active_camera_from_view(self, _=None):
+        active_camera = self.active_camera
+        if active_camera is None:
+            return
+        self._set_camera_global_pose(
+            active_camera, self.viewer.window.get_camera_pose()
+        )
+        if self.active_camera_name is not None:
+            self._sync_look_at_state_from_camera(
+                self.active_camera_name, preserve_distance=True
+            )
+        self.viewer.notify_render_update()
+
+    def copy_active_camera_config(self, _=None):
+        active_camera = self.active_camera
+        active_camera_name = self.active_camera_name
+        if active_camera is None or active_camera_name is None:
+            return
+
+        self._ensure_look_at_state(active_camera_name)
+        eye = self._format_vector(self.active_eye_position)
+        target = self._format_vector(self.active_target_position)
+        up = self._format_vector(self.active_up_vector)
+        camera_uid = active_camera_name.split(":", 1)[1]
+
+        config_lines = [
+            f'{camera_uid}_pose = sapien_utils.look_at(eye={eye}, target={target}, up={up})',
+        ]
+        if active_camera.config.fov is not None:
+            config_lines.append(
+                'CameraConfig('
+                f'"{camera_uid}", {camera_uid}_pose, {active_camera.config.width}, '
+                f'{active_camera.config.height}, {active_camera.config.fov!r}, '
+                f'{active_camera.config.near!r}, {active_camera.config.far!r}'
+                ')'
+            )
+        else:
+            intrinsic = np.asarray(active_camera.config.intrinsic).tolist()
+            config_lines.append(
+                'CameraConfig('
+                f'"{camera_uid}", pose={camera_uid}_pose, width={active_camera.config.width}, '
+                f'height={active_camera.config.height}, intrinsic={intrinsic}, '
+                f'near={active_camera.config.near!r}, far={active_camera.config.far!r}'
+                ')'
+            )
+        copy_to_clipboard("\n".join(config_lines))
+
+    def reset_camera_configs(self, _=None):
+        for camera_name, local_pose in self._initial_local_poses.items():
+            camera = self.editable_cameras.get(camera_name)
+            if camera is None:
+                continue
+            camera.camera.set_local_pose(local_pose)
+            camera.config.pose = Pose.create(local_pose)
+
+        self._camera_targets = {
+            camera_name: value.copy()
+            for camera_name, value in self._initial_targets.items()
+        }
+        self._camera_ups = {
+            camera_name: value.copy() for camera_name, value in self._initial_ups.items()
+        }
+        self._active_camera_name = None
+        self.viewer.select_entity(None)
+        self._ui_window = None
+        self._gizmo = None
+        self._target_gizmo = None
+        self.viewer.notify_render_update()
+
+    def _build_ui(self):
+        if self.viewer.scene is None or not self.camera_names:
+            self._ui_window = None
+            return
+
+        if self._ui_window is None:
+            self._gizmo = R.UIGizmo().Bind(self, "gizmo_matrix")
+            self._target_gizmo = R.UIGizmo().Bind(self, "target_gizmo_matrix")
+            self._camera_selector = (
+                R.UIOptions()
+                .Label("Camera")
+                .Style("select")
+                .BindItems(self, "camera_items")
+                .BindIndex(self, "active_camera_index")
+            )
+
+            self._ui_window = (
+                R.UIWindow()
+                .Label("Camera Editor")
+                .Pos(10, 420)
+                .Size(420, 430)
+                .append(
+                    R.UICheckbox().Label("Enabled").Bind(self, "enabled"),
+                    self._camera_selector,
+                    R.UIDisplayText().Text(
+                        "Click a camera frustum, then drag the pose gizmo to move it."
+                    ),
+                    R.UISameLine().append(
+                        R.UIButton()
+                        .Label("Look Through")
+                        .Callback(self.look_through_active_camera),
+                        R.UIButton()
+                        .Label("Set From View")
+                        .Callback(self.set_active_camera_from_view),
+                        R.UIButton()
+                        .Label("Reset")
+                        .Callback(self.reset_camera_configs),
+                        R.UIButton()
+                        .Label("Copy Config")
+                        .Callback(self.copy_active_camera_config),
+                    ),
+                    R.UIConditional()
+                    .Bind(lambda: self.enabled and self.active_camera is not None)
+                    .append(
+                        R.UISection()
+                        .Label("Position")
+                        .Expanded(True)
+                        .append(
+                            R.UIDisplayText().Text(
+                                "Move the camera position here. Orientation stays driven by Look At."
+                            ),
+                            self._gizmo,
+                        ),
+                        R.UISection()
+                        .Label("Look At")
+                        .Expanded(True)
+                        .append(
+                            R.UIDisplayText().Text(
+                                "Edit eye/target/up directly, or drag the target gizmo."
+                            ),
+                            R.UIInputFloat3()
+                            .Label("Eye")
+                            .Bind(self, "active_eye_position"),
+                            R.UIInputFloat3()
+                            .Label("Target")
+                            .Bind(self, "active_target_position"),
+                            R.UIInputFloat3()
+                            .Label("Up")
+                            .Bind(self, "active_up_vector"),
+                            self._target_gizmo,
+                        ),
+                    ),
+                )
+            )
+
+        projection = self.viewer.window.get_camera_projection_matrix()
+        view = (
+            (self.viewer.window.get_camera_pose() * _VIEWER_GL_POSE)
+            .inv()
+            .to_transformation_matrix()
+        )
+        self._gizmo.CameraMatrices(view, projection)
+        self._gizmo.Matrix(self.gizmo_matrix)
+        self._target_gizmo.CameraMatrices(view, projection)
+        self._target_gizmo.Matrix(self.target_gizmo_matrix)
+
+    def _create_visual_models(self):
+        self._line_set_model = self.viewer.renderer_context.create_line_set(
+            _CAMERA_LINESET_VERTICES,
+            _INACTIVE_CAMERA_LINESET_COLORS,
+        )
+        self._active_line_set_model = self.viewer.renderer_context.create_line_set(
+            _CAMERA_LINESET_VERTICES,
+            _ACTIVE_CAMERA_LINESET_COLORS,
+        )
+
+    def _clear_camera_linesets(self):
+        if self.viewer.render_scene is None:
+            self._line_sets = []
+            self._line_set_names = []
+            self._line_set_active_name = None
+            return
+        for node in self._line_sets:
+            self.viewer.render_scene.remove_node(node)
+        self._line_sets = []
+        self._line_set_names = []
+        self._line_set_active_name = None
+
+    def _update_camera_linesets(self):
+        if not self.enabled or self.viewer.render_scene is None:
+            self._clear_camera_linesets()
+            return
+
+        camera_names = self.camera_names
+        if (
+            len(self._line_sets) != len(camera_names)
+            or self._line_set_names != camera_names
+            or self._line_set_active_name != self.active_camera_name
+        ):
+            self._clear_camera_linesets()
+            for camera_name in camera_names:
+                line_set_model = (
+                    self._active_line_set_model
+                    if camera_name == self.active_camera_name
+                    else self._line_set_model
+                )
+                self._line_sets.append(
+                    self.viewer.render_scene.add_line_set(line_set_model)
+                )
+            self._line_set_names = list(camera_names)
+            self._line_set_active_name = self.active_camera_name
+
+        for line_set, camera_name in zip(self._line_sets, camera_names):
+            camera = self.editable_cameras[camera_name]
+            model_matrix = self._get_camera_model_matrix(camera)
+            line_set.set_position(model_matrix[:3, 3])
+            line_set.set_rotation(mat2quat(model_matrix[:3, :3]))
+            line_set.set_scale(
+                np.array(
+                    [
+                        np.tan(camera.camera.fovx / 2),
+                        np.tan(camera.camera.fovy / 2),
+                        1.0,
+                    ]
+                )
+                * 0.3
+            )
+
+    def _handle_click(self, _viewer, x: int, y: int) -> bool:
+        if not self.enabled:
+            return False
+
+        click_target = np.array([x, y], dtype=np.float32)
+        best_match = None
+        for camera_name, camera in self.editable_cameras.items():
+            projected = self._project_camera_origin(camera)
+            if projected is None:
+                continue
+            distance = np.linalg.norm(projected[:2] - click_target)
+            if distance > self.selection_radius_px:
+                continue
+            candidate = (distance, projected[2], camera_name)
+            if best_match is None or candidate < best_match:
+                best_match = candidate
+
+        if best_match is None:
+            return False
+
+        self.active_camera_name = best_match[2]
+        self.viewer.select_entity(None)
+        return True
+
+    def _project_camera_origin(self, camera: Camera):
+        segmentation_width, segmentation_height = self.viewer.window.get_picture_size(
+            "Segmentation"
+        )
+        pose = self._get_camera_global_pose(camera)
+        point = np.array([pose.p[0], pose.p[1], pose.p[2], 1.0], dtype=np.float32)
+        view = (
+            (self.viewer.window.get_camera_pose() * _VIEWER_GL_POSE)
+            .inv()
+            .to_transformation_matrix()
+        )
+        clip = self.viewer.window.get_camera_projection_matrix() @ (view @ point)
+        if clip[3] <= 1e-6:
+            return None
+        ndc = clip[:3] / clip[3]
+        if ndc[2] < -1 or ndc[2] > 1:
+            return None
+        px = (ndc[0] * 0.5 + 0.5) * segmentation_width
+        py = (1 - (ndc[1] * 0.5 + 0.5)) * segmentation_height
+        return np.array([px, py, ndc[2]], dtype=np.float32)
+
+    def _get_camera_global_pose(self, camera: Camera) -> sapien.Pose:
+        pose = camera.camera.get_global_pose()
+        if len(pose) > 1:
+            pose = pose[0]
+        return pose.sp
+
+    def _get_camera_model_matrix(self, camera: Camera) -> np.ndarray:
+        matrix = camera.camera.get_model_matrix()
+        if hasattr(matrix, "detach"):
+            matrix = matrix.detach().cpu().numpy()
+        else:
+            matrix = np.asarray(matrix)
+        if matrix.ndim == 3:
+            matrix = matrix[0]
+        return matrix
+
+    def _set_camera_global_pose(self, camera: Camera, pose: sapien.Pose):
+        local_pose = pose
+        if camera.camera.mount is not None:
+            mount_pose = camera.camera.mount.pose
+            if len(mount_pose) > 1:
+                mount_pose = mount_pose[0]
+            local_pose = mount_pose.sp.inv() * pose
+        camera.camera.set_local_pose(local_pose)
+        camera.config.pose = Pose.create(local_pose)
+
+    def _capture_initial_camera_state(self):
+        for camera_name, camera in self.editable_cameras.items():
+            local_pose = camera.camera.get_local_pose()
+            if len(local_pose) > 1:
+                local_pose = local_pose[0]
+            self._initial_local_poses[camera_name] = local_pose.sp
+            pose = self._get_camera_global_pose(camera)
+            transform = pose.to_transformation_matrix()
+            eye = np.asarray(pose.p, dtype=np.float32)
+            forward = np.asarray(transform[:3, 0], dtype=np.float32)
+            up = self._normalize_vector(
+                np.asarray(transform[:3, 2], dtype=np.float32),
+                fallback=np.array([0, 0, 1], dtype=np.float32),
+            )
+            self._initial_targets[camera_name] = (
+                eye + forward * self.default_target_distance
+            )
+            self._initial_ups[camera_name] = up
+
+    def _ensure_look_at_state(self, camera_name: str):
+        if camera_name in self._camera_targets and camera_name in self._camera_ups:
+            return
+        self._sync_look_at_state_from_camera(camera_name, preserve_distance=False)
+
+    def _sync_look_at_state_from_camera(
+        self, camera_name: str, preserve_distance: bool = True
+    ):
+        camera = self.editable_cameras[camera_name]
+        pose = self._get_camera_global_pose(camera)
+        transform = pose.to_transformation_matrix()
+        eye = np.asarray(pose.p, dtype=np.float32)
+        forward = np.asarray(transform[:3, 0], dtype=np.float32)
+        up = np.asarray(transform[:3, 2], dtype=np.float32)
+
+        target_distance = self.default_target_distance
+        if preserve_distance and camera_name in self._camera_targets:
+            target_distance = np.linalg.norm(self._camera_targets[camera_name] - eye)
+            if target_distance < 1e-4:
+                target_distance = self.default_target_distance
+
+        self._camera_targets[camera_name] = eye + forward * target_distance
+        self._camera_ups[camera_name] = self._normalize_vector(
+            up, fallback=np.array([0, 0, 1], dtype=np.float32)
+        )
+
+    def _apply_look_at(
+        self,
+        camera_name: str,
+        eye: Optional[np.ndarray] = None,
+        target: Optional[np.ndarray] = None,
+        up: Optional[np.ndarray] = None,
+    ):
+        self._ensure_look_at_state(camera_name)
+        eye = (
+            np.asarray(eye, dtype=np.float32)
+            if eye is not None
+            else self.active_eye_position
+        )
+        target = (
+            np.asarray(target, dtype=np.float32)
+            if target is not None
+            else self._camera_targets[camera_name]
+        )
+        up = self._normalize_vector(
+            up if up is not None else self._camera_ups[camera_name],
+            fallback=self._camera_ups[camera_name],
+        )
+        if np.linalg.norm(target - eye) < 1e-4:
+            return
+
+        self._camera_targets[camera_name] = target
+        self._camera_ups[camera_name] = up
+        pose = sapien_utils.look_at(eye=eye, target=target, up=up).sp
+        self._set_camera_global_pose(self.editable_cameras[camera_name], pose)
+        self.viewer.notify_render_update()
+
+    def _normalize_vector(self, vector, fallback: np.ndarray) -> np.ndarray:
+        vector = np.asarray(vector, dtype=np.float32)
+        norm = np.linalg.norm(vector)
+        if norm < 1e-6:
+            return np.asarray(fallback, dtype=np.float32)
+        return vector / norm
+
+    def _format_vector(self, vector: np.ndarray) -> str:
+        values = np.asarray(vector, dtype=np.float32).tolist()
+        return "[" + ", ".join(f"{value:.6g}" for value in values) + "]"