From 99bce6cf0d194b38bd8a25248d36cbd76bdb26b9 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Wed, 18 Dec 2024 19:43:58 -0500 Subject: [PATCH 01/40] Add control context for record phase --- .../common/robot_devices/control_context.py | 240 ++++++++++++++++++ lerobot/common/robot_devices/control_utils.py | 82 +++--- lerobot/scripts/control_robot.py | 47 +++- pyproject.toml | 1 + 4 files changed, 327 insertions(+), 43 deletions(-) create mode 100644 lerobot/common/robot_devices/control_context.py diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py new file mode 100644 index 000000000..c86b08856 --- /dev/null +++ b/lerobot/common/robot_devices/control_context.py @@ -0,0 +1,240 @@ +import pygame +import numpy as np +from typing import Dict, Optional +from dataclasses import dataclass + + +# Create an enum for ControlPhase +class ControlPhase: + TELEOPERATE = "Teleoperate" + WARMUP = "Warmup" + RECORD = "Record" + RESET = "Reset" + + +@dataclass +class ControlContextConfig: + display_cameras: bool = False + play_sounds: bool = False + assign_rewards: bool = False + debug_mode: bool = False + control_phase: ControlPhase = ControlPhase.TELEOPERATE + num_episodes: int = 0 + + +class ControlContext: + def __init__(self, config: Optional[ControlContextConfig] = None): + self.config = config or ControlContextConfig() + pygame.init() + if not self.config.display_cameras: + pygame.display.set_mode((1, 1), pygame.HIDDEN) + + self.screen = None + self.image_positions = {} + self.padding = 20 + self.title_height = 30 + self.controls_width = 300 # Width of the controls panel + self.events = { + "exit_early": False, + "rerecord_episode": False, + "stop_recording": False, + "next_reward": 0, + } + + if config.assign_rewards: + self.events["next_reward"] = 0 + + self.pressed_keys = [] + self.font = pygame.font.Font(None, 36) + self.small_font = pygame.font.Font(None, 24) # Smaller font for controls list + self.current_episode_index = 0 + + # Color theme + self.text_bg_color = (0, 0, 0) # Black + self.text_color = (0, 255, 0) # Green + + # Define the control instructions + self.controls = [ + ("Right Arrow", "Exit current loop"), + ("Left Arrow", "Rerecord last episode"), + ("Escape", "Stop recording"), + ("Space", "Toggle reward (if enabled)"), + ] + + def calculate_window_size(self, images: Dict[str, np.ndarray]): + """Calculate required window size based on images""" + max_width = 0 + max_height = 0 + n_images = len(images) + + # Calculate grid dimensions + grid_cols = min(2, n_images) + grid_rows = (n_images + 1) // 2 + + for image in images.values(): + max_width = max(max_width, image.shape[1]) + max_height = max(max_height, image.shape[0]) + + # Adjust total width and height calculations to remove extra padding + total_width = max_width * grid_cols + self.controls_width + total_height = max_height * grid_rows + self.title_height + + return total_width, total_height, grid_cols + + def render_controls_panel(self, window_width: int, window_height: int): + """Render the controls panel on the right side""" + # Draw controls background + controls_rect = pygame.Rect( + window_width - self.controls_width, + self.title_height, + self.controls_width, + window_height - self.title_height, + ) + pygame.draw.rect(self.screen, self.text_bg_color, controls_rect) + pygame.draw.line( + self.screen, + self.text_color, + (controls_rect.left, self.title_height), + (controls_rect.left, window_height), + 2, + ) + + # Draw "Controls" header + header = self.font.render("Controls", True, self.text_color) + header_rect = header.get_rect( + centerx=window_width - self.controls_width / 2, top=self.title_height + 10 + ) + self.screen.blit(header, header_rect) + + # Draw control instructions + y_pos = header_rect.bottom + 20 + for key, action in self.controls: + # Draw key + key_surface = self.small_font.render(key, True, self.text_color) + key_rect = key_surface.get_rect(left=window_width - self.controls_width + 20, top=y_pos) + self.screen.blit(key_surface, key_rect) + + # Draw action + action_surface = self.small_font.render(action, True, self.text_color) + action_rect = action_surface.get_rect(left=key_rect.right + 10, top=y_pos) + self.screen.blit(action_surface, action_rect) + + y_pos += 30 + + # Add status information below controls + y_pos += 20 # Add some spacing + + # Control phase + phase_text = f"Control Phase: {self.config.control_phase}" + phase_surface = self.small_font.render(phase_text, True, self.text_color) + phase_rect = phase_surface.get_rect(left=window_width - self.controls_width + 20, top=y_pos) + self.screen.blit(phase_surface, phase_rect) + + # Pressed keys + y_pos += 30 + keys_text = f"Pressed: {', '.join(self.pressed_keys)}" if self.pressed_keys else "Pressed: None" + keys_surface = self.small_font.render(keys_text, True, self.text_color) + keys_rect = keys_surface.get_rect(left=window_width - self.controls_width + 20, top=y_pos) + self.screen.blit(keys_surface, keys_rect) + + def handle_events(self): + """Handle pygame events and update internal state""" + for event in pygame.event.get(): + if self.config.debug_mode: + print(event) + if event.type == pygame.QUIT: + self.events["stop_recording"] = True + self.events["exit_early"] = True + elif event.type == pygame.KEYDOWN: + key_name = pygame.key.name(event.key) + self.pressed_keys.append(key_name) + + if event.key == pygame.K_RIGHT: + print("Right arrow key pressed. Exiting loop...") + self.events["exit_early"] = True + elif event.key == pygame.K_LEFT: + print("Left arrow key pressed. Exiting loop and rerecord the last episode...") + self.events["rerecord_episode"] = True + self.events["exit_early"] = True + elif event.key == pygame.K_ESCAPE: + print("Escape key pressed. Stopping data recording...") + self.events["stop_recording"] = True + self.events["exit_early"] = True + elif self.config.assign_rewards and event.key == pygame.K_SPACE: + self.events["next_reward"] = 1 if self.events["next_reward"] == 0 else 0 + print(f"Space key pressed. New reward: {self.events['next_reward']}") + + elif event.type == pygame.KEYUP: + key_name = pygame.key.name(event.key) + if key_name in self.pressed_keys: + self.pressed_keys.remove(key_name) + + return self.events + + def render_camera_frames(self, observation: Dict[str, np.ndarray]): + """Update display with new images from observation dict""" + image_keys = [key for key in observation if "image" in key] + images = {k: observation[k].numpy() for k in image_keys} + if not images: + return + + # Initialize or resize window if needed + window_width, window_height, grid_cols = self.calculate_window_size(images) + if self.screen is None or self.screen.get_size() != (window_width, window_height): + self.screen = pygame.display.set_mode((window_width, window_height)) + + # @TODO - label this window with the camera name + pygame.display.set_caption("Camera 0") + + self.screen.fill(self.text_bg_color) + + # Update image positions and draw images + for idx, (key, image) in enumerate(images.items()): + # Calculate grid position + col = idx % grid_cols + row = idx // grid_cols + + # Calculate pixel position - adjust for controls panel + x = col * (image.shape[1] + self.padding) + y = row * (image.shape[0] + self.title_height + self.padding) + + # Convert numpy array to pygame surface + image_surface = pygame.surfarray.make_surface(np.transpose(image, (1, 0, 2))) + self.screen.blit(image_surface, (x, y + self.title_height)) + + pygame.draw.rect(self.screen, self.text_bg_color, (0, 0, window_width, self.title_height)) + + # Prepare top bar text + top_text_str = "" + if self.config.control_phase == ControlPhase.RECORD: + top_text_str = f"Episode: {self.current_episode_index}/{self.config.num_episodes}" + if self.config.assign_rewards: + next_reward = self.events["next_reward"] + top_text_str += f" | Reward: {next_reward}" + + top_text = self.font.render(top_text_str, True, self.text_color) + text_rect = top_text.get_rect(center=(window_width // 2, self.title_height // 2)) + self.screen.blit(top_text, text_rect) + + # Draw controls panel + self.render_controls_panel(window_width, window_height) + + pygame.display.flip() + + def update_with_observations(self, observation: Dict[str, np.ndarray]): + if self.config.display_cameras: + self.render_camera_frames(observation) + self.handle_events() + return self + + def update_current_episode(self, episode_index): + self.current_episode_index = episode_index + return self + + def close(self): + """Clean up pygame resources""" + pygame.quit() + + def get_events(self): + """Return current events state""" + return self.events.copy() diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index 8cc0f3260..3926f0ae9 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -183,44 +183,44 @@ def init_policy(pretrained_policy_name_or_path, policy_overrides): def warmup_record( robot, - events, enable_teleoperation, warmup_time_s, display_cameras, fps, + control_context ): control_loop( robot=robot, control_time_s=warmup_time_s, display_cameras=display_cameras, - events=events, fps=fps, teleoperate=enable_teleoperation, + control_context=control_context, ) def record_episode( robot, dataset, - events, episode_time_s, display_cameras, policy, device, use_amp, fps, + control_context ): control_loop( robot=robot, control_time_s=episode_time_s, display_cameras=display_cameras, dataset=dataset, - events=events, policy=policy, device=device, use_amp=use_amp, fps=fps, teleoperate=policy is None, + control_context=control_context, ) @@ -229,14 +229,16 @@ def control_loop( robot, control_time_s=None, teleoperate=False, - display_cameras=False, + display_cameras=False, # TODO - remove this dataset: LeRobotDataset | None = None, - events=None, policy=None, device=None, use_amp=None, fps=None, + control_context=None, ): + events = control_context.get_events() if control_context is not None else None + # TODO(rcadene): Add option to record logs if not robot.is_connected: robot.connect() @@ -255,42 +257,46 @@ def control_loop( timestamp = 0 start_episode_t = time.perf_counter() - while timestamp < control_time_s: - start_loop_t = time.perf_counter() - - if teleoperate: - observation, action = robot.teleop_step(record_data=True) - else: - observation = robot.capture_observation() - - if policy is not None: - pred_action = predict_action(observation, policy, device, use_amp) - # Action can eventually be clipped using `max_relative_target`, - # so action actually sent is saved in the dataset. - action = robot.send_action(pred_action) - action = {"action": action} - - if dataset is not None: - frame = {**observation, **action} - dataset.add_frame(frame) - - if display_cameras and not is_headless(): - image_keys = [key for key in observation if "image" in key] - for key in image_keys: - cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR)) - cv2.waitKey(1) + try: + while timestamp < control_time_s: + start_loop_t = time.perf_counter() + + if teleoperate: + observation, action = robot.teleop_step(record_data=True) + else: + observation = robot.capture_observation() + + if policy is not None: + pred_action = predict_action(observation, policy, device, use_amp) + # Action can eventually be clipped using `max_relative_target`, + # so action actually sent is saved in the dataset. + action = robot.send_action(pred_action) + action = {"action": action} + + if dataset is not None: + frame = {**observation, **action} + dataset.add_frame(frame) + + control_context.update_with_observations(observation) + + if fps is not None: + dt_s = time.perf_counter() - start_loop_t + busy_wait(1 / fps - dt_s) - if fps is not None: dt_s = time.perf_counter() - start_loop_t - busy_wait(1 / fps - dt_s) + log_control_info(robot, dt_s, fps=fps) - dt_s = time.perf_counter() - start_loop_t - log_control_info(robot, dt_s, fps=fps) + timestamp = time.perf_counter() - start_episode_t + if events["exit_early"]: + events["exit_early"] = False + break - timestamp = time.perf_counter() - start_episode_t - if events["exit_early"]: - events["exit_early"] = False - break + except Exception as e: + print(f"Error in control loop: {e}") + finally: + # Clean up display window + if control_context is not None: + control_context.close() def reset_environment(robot, events, reset_time_s): diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 12eaf146f..e12bd01ab 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -114,11 +114,18 @@ stop_recording, warmup_record, ) +from lerobot.common.robot_devices.control_context import ( + ControlContext, + ControlContextConfig, + ControlPhase, +) + from lerobot.common.robot_devices.robots.factory import make_robot from lerobot.common.robot_devices.robots.utils import Robot from lerobot.common.robot_devices.utils import busy_wait, safe_disconnect from lerobot.common.utils.utils import init_hydra_config, init_logging, log_say, none_or_int + ######################################################################################## # Control modes ######################################################################################## @@ -174,12 +181,19 @@ def calibrate(robot: Robot, arms: list[str] | None): def teleoperate( robot: Robot, fps: int | None = None, teleop_time_s: float | None = None, display_cameras: bool = False ): + control_context = ControlContext( + config=ControlContextConfig( + display_cameras=display_cameras, + control_phase=ControlPhase.TELEOPERATE, + ) + ) control_loop( robot, control_time_s=teleop_time_s, fps=fps, teleoperate=True, display_cameras=display_cameras, + control_context=control_context, ) @@ -259,19 +273,38 @@ def record( if not robot.is_connected: robot.connect() - listener, events = init_keyboard_listener() - # Execute a few seconds without recording to: # 1. teleoperate the robot to move it in starting position if no policy provided, # 2. give times to the robot devices to connect and start synchronizing, # 3. place the cameras windows on screen enable_teleoperation = policy is None log_say("Warmup record", play_sounds) - warmup_record(robot, events, enable_teleoperation, warmup_time_s, display_cameras, fps) + + control_context = ControlContext( + config=ControlContextConfig( + control_phase=ControlPhase.WARMUP, + display_cameras=display_cameras, + play_sounds=play_sounds, + assign_rewards=False, + ) + ) + + warmup_record(robot, enable_teleoperation, warmup_time_s, display_cameras, fps, control_context) if has_method(robot, "teleop_safety_stop"): robot.teleop_safety_stop() + # We need to reinitialize the control context because control loop tears it down + control_context = ControlContext( + config=ControlContextConfig( + control_phase=ControlPhase.RECORD, + display_cameras=display_cameras, + play_sounds=play_sounds, + assign_rewards=False, + num_episodes=num_episodes, + ) + ) + recorded_episodes = 0 while True: if recorded_episodes >= num_episodes: @@ -286,15 +319,18 @@ def record( record_episode( dataset=dataset, robot=robot, - events=events, episode_time_s=episode_time_s, display_cameras=display_cameras, policy=policy, device=device, use_amp=use_amp, fps=fps, + control_context=control_context, ) + # Events will be updated by control loop + events = control_context.get_events() + # Execute a few seconds without recording to give time to manually reset the environment # Current code logic doesn't allow to teleoperate during this time. # TODO(rcadene): add an option to enable teleoperation during reset @@ -314,12 +350,13 @@ def record( dataset.save_episode(task) recorded_episodes += 1 + control_context.update_current_episode(recorded_episodes) if events["stop_recording"]: break log_say("Stop recording", play_sounds, blocking=True) - stop_recording(robot, listener, display_cameras) + stop_recording(robot, None, display_cameras) if run_compute_stats: logging.info("Computing dataset statistics") diff --git a/pyproject.toml b/pyproject.toml index 59c2de8bc..6663edc0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ pyrender = {git = "https://github.com/mmatl/pyrender.git", markers = "sys_platfo hello-robot-stretch-body = {version = ">=0.7.27", markers = "sys_platform == 'linux'", optional = true} pyserial = {version = ">=3.5", optional = true} jsonlines = ">=4.0.0" +pygame = ">=2.6.1" [tool.poetry.extras] From 565479bfefa7ae73713855e4bdbe31c8e2b8ae6a Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Wed, 18 Dec 2024 20:20:56 -0500 Subject: [PATCH 02/40] Replace stop recording with control context cleanup --- lerobot/common/robot_devices/control_context.py | 8 ++++---- lerobot/common/robot_devices/control_utils.py | 16 ---------------- lerobot/scripts/control_robot.py | 3 +-- 3 files changed, 5 insertions(+), 22 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index c86b08856..8b4f5f2d9 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -231,10 +231,10 @@ def update_current_episode(self, episode_index): self.current_episode_index = episode_index return self - def close(self): - """Clean up pygame resources""" - pygame.quit() - def get_events(self): """Return current events state""" return self.events.copy() + + def cleanup(self, robot): + robot.disconnect() + pygame.quit() diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index 3926f0ae9..ec03dfe6f 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -293,10 +293,6 @@ def control_loop( except Exception as e: print(f"Error in control loop: {e}") - finally: - # Clean up display window - if control_context is not None: - control_context.close() def reset_environment(robot, events, reset_time_s): @@ -318,18 +314,6 @@ def reset_environment(robot, events, reset_time_s): events["exit_early"] = False break - -def stop_recording(robot, listener, display_cameras): - robot.disconnect() - - if not is_headless(): - if listener is not None: - listener.stop() - - if display_cameras: - cv2.destroyAllWindows() - - def sanity_check_dataset_name(repo_id, policy): _, dataset_name = repo_id.split("/") # either repo_id doesnt start with "eval_" and there is no policy diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index e12bd01ab..a237c0385 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -111,7 +111,6 @@ reset_environment, sanity_check_dataset_name, sanity_check_dataset_robot_compatibility, - stop_recording, warmup_record, ) from lerobot.common.robot_devices.control_context import ( @@ -356,7 +355,7 @@ def record( break log_say("Stop recording", play_sounds, blocking=True) - stop_recording(robot, None, display_cameras) + control_context.cleanup(robot) if run_compute_stats: logging.info("Computing dataset statistics") From 7235e0e040d3aea243316dfeccbf00a43496d1e1 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Wed, 18 Dec 2024 20:29:59 -0500 Subject: [PATCH 03/40] Added todos --- lerobot/common/robot_devices/control_context.py | 4 ++++ lerobot/common/robot_devices/control_utils.py | 2 +- lerobot/scripts/control_robot.py | 3 ++- lerobot/scripts/control_sim_robot.py | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 8b4f5f2d9..736b48469 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -20,6 +20,7 @@ class ControlContextConfig: debug_mode: bool = False control_phase: ControlPhase = ControlPhase.TELEOPERATE num_episodes: int = 0 + # @TODO(jackvial): Add robot on this class so we can call robot.disconnect() in cleanup class ControlContext: @@ -124,6 +125,7 @@ def render_controls_panel(self, window_width: int, window_height: int): # Add status information below controls y_pos += 20 # Add some spacing + # TODO(jackvial): Move control phase to the top bar # Control phase phase_text = f"Control Phase: {self.config.control_phase}" phase_surface = self.small_font.render(phase_text, True, self.text_color) @@ -219,6 +221,8 @@ def render_camera_frames(self, observation: Dict[str, np.ndarray]): # Draw controls panel self.render_controls_panel(window_width, window_height) + # TODO(jackvial): Would be nice to show count down timer for warmup phase and reset phase + pygame.display.flip() def update_with_observations(self, observation: Dict[str, np.ndarray]): diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index ec03dfe6f..631e5815a 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -25,7 +25,7 @@ from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, set_global_seed from lerobot.scripts.eval import get_pretrained_policy_path - +# @TODO(jackvial): Move this to ControlContext and make configurable def log_control_info(robot: Robot, dt_s, episode_index=None, frame_index=None, fps=None): log_items = [] if episode_index is not None: diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index a237c0385..983cf6d76 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -293,7 +293,8 @@ def record( if has_method(robot, "teleop_safety_stop"): robot.teleop_safety_stop() - # We need to reinitialize the control context because control loop tears it down + # @TODO(jackvial): Maybe add an update_config method to ControlContext so we don't + # initialize a new class control_context = ControlContext( config=ControlContextConfig( control_phase=ControlPhase.RECORD, diff --git a/lerobot/scripts/control_sim_robot.py b/lerobot/scripts/control_sim_robot.py index 4fffa8c75..ae839d3e1 100644 --- a/lerobot/scripts/control_sim_robot.py +++ b/lerobot/scripts/control_sim_robot.py @@ -299,6 +299,7 @@ def record( dataset.add_frame(frame) + # @TODO(jackvial): Update to use ControlContext if display_cameras and not is_headless(): for key in image_keys: cv2.imshow(key, cv2.cvtColor(observation[key], cv2.COLOR_RGB2BGR)) From c1c5c73aa6792bfe99fc1d19197808784a186144 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Wed, 18 Dec 2024 20:30:40 -0500 Subject: [PATCH 04/40] format todos. very important --- lerobot/common/robot_devices/control_context.py | 2 +- lerobot/common/robot_devices/control_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 736b48469..34a4939d9 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -20,7 +20,7 @@ class ControlContextConfig: debug_mode: bool = False control_phase: ControlPhase = ControlPhase.TELEOPERATE num_episodes: int = 0 - # @TODO(jackvial): Add robot on this class so we can call robot.disconnect() in cleanup + # TODO(jackvial): Add robot on this class so we can call robot.disconnect() in cleanup class ControlContext: diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index 631e5815a..288303385 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -25,7 +25,7 @@ from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, set_global_seed from lerobot.scripts.eval import get_pretrained_policy_path -# @TODO(jackvial): Move this to ControlContext and make configurable +# TODO(jackvial): Move this to ControlContext and make configurable def log_control_info(robot: Robot, dt_s, episode_index=None, frame_index=None, fps=None): log_items = [] if episode_index is not None: From edcbf678f3a20682c52ace1b3f7093da22b5a988 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Thu, 19 Dec 2024 20:11:07 -0500 Subject: [PATCH 05/40] Setup test bed for iterating on UI --- .../common/robot_devices/control_context.py | 84 +++++++++++++++---- 1 file changed, 67 insertions(+), 17 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 34a4939d9..1f2399469 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -82,8 +82,19 @@ def calculate_window_size(self, images: Dict[str, np.ndarray]): return total_width, total_height, grid_cols - def render_controls_panel(self, window_width: int, window_height: int): - """Render the controls panel on the right side""" + def draw_top_bar(self, window_width: int): + top_text_str = "" + if self.config.control_phase == ControlPhase.RECORD: + top_text_str = f"Episode: {self.current_episode_index}/{self.config.num_episodes}" + if self.config.assign_rewards: + next_reward = self.events["next_reward"] + top_text_str += f" | Reward: {next_reward}" + + top_text = self.font.render(top_text_str, True, self.text_color) + text_rect = top_text.get_rect(center=(window_width // 2, self.title_height // 2)) + self.screen.blit(top_text, text_rect) + + def draw_right_side_bar(self, window_width: int, window_height: int): # Draw controls background controls_rect = pygame.Rect( window_width - self.controls_width, @@ -206,20 +217,8 @@ def render_camera_frames(self, observation: Dict[str, np.ndarray]): pygame.draw.rect(self.screen, self.text_bg_color, (0, 0, window_width, self.title_height)) - # Prepare top bar text - top_text_str = "" - if self.config.control_phase == ControlPhase.RECORD: - top_text_str = f"Episode: {self.current_episode_index}/{self.config.num_episodes}" - if self.config.assign_rewards: - next_reward = self.events["next_reward"] - top_text_str += f" | Reward: {next_reward}" - - top_text = self.font.render(top_text_str, True, self.text_color) - text_rect = top_text.get_rect(center=(window_width // 2, self.title_height // 2)) - self.screen.blit(top_text, text_rect) - - # Draw controls panel - self.render_controls_panel(window_width, window_height) + self.draw_top_bar(window_width) + self.draw_right_side_bar(window_width, window_height) # TODO(jackvial): Would be nice to show count down timer for warmup phase and reset phase @@ -238,7 +237,58 @@ def update_current_episode(self, episode_index): def get_events(self): """Return current events state""" return self.events.copy() - + def cleanup(self, robot): robot.disconnect() pygame.quit() + + +if __name__ == "__main__": + import torch + import cv2 + import numpy as np + + config = ControlContextConfig( + display_cameras=True, + assign_rewards=True, + debug_mode=True, + control_phase=ControlPhase.RECORD, + num_episodes=5, + ) + context = ControlContext(config) + + # Initialize webcam + cap = cv2.VideoCapture(0) + + # Check if the webcam is opened correctly + if not cap.isOpened(): + raise IOError("Cannot open webcam") + + while True: + # Read frame from webcam + ret, frame = cap.read() + if not ret: + print("Failed to grab frame") + break + + # Convert BGR to RGB (OpenCV uses BGR by default) + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + # Convert to torch tensor and normalize to float + image = torch.tensor(frame_rgb).float() + + # Create observation dictionary with state and image + observation = { + "state": torch.tensor([10.0195, 128.9355, 173.0566, -13.2715, -7.2070, 34.4531]), + "image": image, + } + + context.update_with_observations(observation) + events = context.get_events() + + if events["exit_early"]: + break + + # Release the webcam and cleanup + cap.release() + cv2.destroyAllWindows() From 86e826396f198727eddad47e83d0dff37c1fd092 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Thu, 19 Dec 2024 20:31:36 -0500 Subject: [PATCH 06/40] Move mode to top bar --- .../common/robot_devices/control_context.py | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 1f2399469..0fdf0386f 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -46,8 +46,8 @@ def __init__(self, config: Optional[ControlContextConfig] = None): self.events["next_reward"] = 0 self.pressed_keys = [] - self.font = pygame.font.Font(None, 36) - self.small_font = pygame.font.Font(None, 24) # Smaller font for controls list + self.font = pygame.font.SysFont('courier', 24) # Courier is a monospace font + self.small_font = pygame.font.SysFont('courier', 18) # Smaller font for controls list self.current_episode_index = 0 # Color theme @@ -56,10 +56,10 @@ def __init__(self, config: Optional[ControlContextConfig] = None): # Define the control instructions self.controls = [ - ("Right Arrow", "Exit current loop"), - ("Left Arrow", "Rerecord last episode"), - ("Escape", "Stop recording"), - ("Space", "Toggle reward (if enabled)"), + ("Right Arrow", "Exit Early"), + ("Left Arrow", "Rerecord"), + ("Escape", "Stop"), + ("Space", "Toggle Reward"), ] def calculate_window_size(self, images: Dict[str, np.ndarray]): @@ -83,9 +83,9 @@ def calculate_window_size(self, images: Dict[str, np.ndarray]): return total_width, total_height, grid_cols def draw_top_bar(self, window_width: int): - top_text_str = "" + top_text_str = f"Mode: {self.config.control_phase}" if self.config.control_phase == ControlPhase.RECORD: - top_text_str = f"Episode: {self.current_episode_index}/{self.config.num_episodes}" + top_text_str += f" | Episode: {self.current_episode_index}/{self.config.num_episodes}" if self.config.assign_rewards: next_reward = self.events["next_reward"] top_text_str += f" | Reward: {next_reward}" @@ -136,16 +136,9 @@ def draw_right_side_bar(self, window_width: int, window_height: int): # Add status information below controls y_pos += 20 # Add some spacing - # TODO(jackvial): Move control phase to the top bar - # Control phase - phase_text = f"Control Phase: {self.config.control_phase}" - phase_surface = self.small_font.render(phase_text, True, self.text_color) - phase_rect = phase_surface.get_rect(left=window_width - self.controls_width + 20, top=y_pos) - self.screen.blit(phase_surface, phase_rect) - # Pressed keys y_pos += 30 - keys_text = f"Pressed: {', '.join(self.pressed_keys)}" if self.pressed_keys else "Pressed: None" + keys_text = f"Keys Pressed: {', '.join(self.pressed_keys)}" if self.pressed_keys else "Keys Pressed: None" keys_surface = self.small_font.render(keys_text, True, self.text_color) keys_rect = keys_surface.get_rect(left=window_width - self.controls_width + 20, top=y_pos) self.screen.blit(keys_surface, keys_rect) From 5f6c62fdabead4f1ff93f05793b885184c3b3a2d Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Sat, 21 Dec 2024 14:43:12 -0500 Subject: [PATCH 07/40] Remove side bar --- .../common/robot_devices/control_context.py | 57 ++----------------- 1 file changed, 4 insertions(+), 53 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 0fdf0386f..5e7ccc8ec 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -34,7 +34,6 @@ def __init__(self, config: Optional[ControlContextConfig] = None): self.image_positions = {} self.padding = 20 self.title_height = 30 - self.controls_width = 300 # Width of the controls panel self.events = { "exit_early": False, "rerecord_episode": False, @@ -77,7 +76,7 @@ def calculate_window_size(self, images: Dict[str, np.ndarray]): max_height = max(max_height, image.shape[0]) # Adjust total width and height calculations to remove extra padding - total_width = max_width * grid_cols + self.controls_width + total_width = max_width * grid_cols total_height = max_height * grid_rows + self.title_height return total_width, total_height, grid_cols @@ -94,55 +93,6 @@ def draw_top_bar(self, window_width: int): text_rect = top_text.get_rect(center=(window_width // 2, self.title_height // 2)) self.screen.blit(top_text, text_rect) - def draw_right_side_bar(self, window_width: int, window_height: int): - # Draw controls background - controls_rect = pygame.Rect( - window_width - self.controls_width, - self.title_height, - self.controls_width, - window_height - self.title_height, - ) - pygame.draw.rect(self.screen, self.text_bg_color, controls_rect) - pygame.draw.line( - self.screen, - self.text_color, - (controls_rect.left, self.title_height), - (controls_rect.left, window_height), - 2, - ) - - # Draw "Controls" header - header = self.font.render("Controls", True, self.text_color) - header_rect = header.get_rect( - centerx=window_width - self.controls_width / 2, top=self.title_height + 10 - ) - self.screen.blit(header, header_rect) - - # Draw control instructions - y_pos = header_rect.bottom + 20 - for key, action in self.controls: - # Draw key - key_surface = self.small_font.render(key, True, self.text_color) - key_rect = key_surface.get_rect(left=window_width - self.controls_width + 20, top=y_pos) - self.screen.blit(key_surface, key_rect) - - # Draw action - action_surface = self.small_font.render(action, True, self.text_color) - action_rect = action_surface.get_rect(left=key_rect.right + 10, top=y_pos) - self.screen.blit(action_surface, action_rect) - - y_pos += 30 - - # Add status information below controls - y_pos += 20 # Add some spacing - - # Pressed keys - y_pos += 30 - keys_text = f"Keys Pressed: {', '.join(self.pressed_keys)}" if self.pressed_keys else "Keys Pressed: None" - keys_surface = self.small_font.render(keys_text, True, self.text_color) - keys_rect = keys_surface.get_rect(left=window_width - self.controls_width + 20, top=y_pos) - self.screen.blit(keys_surface, keys_rect) - def handle_events(self): """Handle pygame events and update internal state""" for event in pygame.event.get(): @@ -211,7 +161,6 @@ def render_camera_frames(self, observation: Dict[str, np.ndarray]): pygame.draw.rect(self.screen, self.text_bg_color, (0, 0, window_width, self.title_height)) self.draw_top_bar(window_width) - self.draw_right_side_bar(window_width, window_height) # TODO(jackvial): Would be nice to show count down timer for warmup phase and reset phase @@ -246,10 +195,12 @@ def cleanup(self, robot): assign_rewards=True, debug_mode=True, control_phase=ControlPhase.RECORD, - num_episodes=5, + num_episodes=200, ) context = ControlContext(config) + context.update_current_episode(199) + # Initialize webcam cap = cv2.VideoCapture(0) From 98231ea5614c6efd5d663ec19b1313782ed868c0 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Sat, 21 Dec 2024 15:05:29 -0500 Subject: [PATCH 08/40] Remove side bar --- lerobot/common/robot_devices/control_context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 5e7ccc8ec..32c4a2d78 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -221,6 +221,7 @@ def cleanup(self, robot): # Convert to torch tensor and normalize to float image = torch.tensor(frame_rgb).float() + # TODO(jackvial): Setup to support multiple cameras # Create observation dictionary with state and image observation = { "state": torch.tensor([10.0195, 128.9355, 173.0566, -13.2715, -7.2070, 34.4531]), From 997ad3e1b28fa23550703ebcf78f4294f0fb0f66 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Sun, 22 Dec 2024 14:44:03 -0500 Subject: [PATCH 09/40] support rendering multiple cameras --- .../common/robot_devices/control_context.py | 70 ++++++++++++------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 32c4a2d78..0e7f1bfe0 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -188,8 +188,17 @@ def cleanup(self, robot): if __name__ == "__main__": import torch import cv2 + import time import numpy as np + def read_image_from_camera(cap): + ret, frame = cap.read() + if not ret: + print("Failed to grab frame") + return None + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + return torch.tensor(frame_rgb).float() + config = ControlContextConfig( display_cameras=True, assign_rewards=True, @@ -198,42 +207,49 @@ def cleanup(self, robot): num_episodes=200, ) context = ControlContext(config) - context.update_current_episode(199) - # Initialize webcam - cap = cv2.VideoCapture(0) + # Initialize cameras with proper naming convention + cameras = { + "main": cv2.VideoCapture(0), + "top": cv2.VideoCapture(4) + } - # Check if the webcam is opened correctly - if not cap.isOpened(): - raise IOError("Cannot open webcam") + # Check if cameras are opened correctly + for name, cap in cameras.items(): + if not cap.isOpened(): + raise Exception(f"Error: Could not open {name} camera") + # Main loop while True: - # Read frame from webcam - ret, frame = cap.read() - if not ret: - print("Failed to grab frame") - break - - # Convert BGR to RGB (OpenCV uses BGR by default) - frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - - # Convert to torch tensor and normalize to float - image = torch.tensor(frame_rgb).float() - - # TODO(jackvial): Setup to support multiple cameras - # Create observation dictionary with state and image - observation = { - "state": torch.tensor([10.0195, 128.9355, 173.0566, -13.2715, -7.2070, 34.4531]), - "image": image, + # Capture images from cameras + images = {} + camera_logs = {} + for name, cap in cameras.items(): + before_camread_t = time.perf_counter() + images[name] = read_image_from_camera(cap) + camera_logs[f"read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t + + # Create state tensor (simulating follower positions) + state = torch.tensor([10.0195, 128.9355, 173.0566, -13.2715, -7.2070, 34.4531]) + + # Construct observation dictionary with proper naming + obs_dict = { + "observation.state": state } + + # Add camera images to observation dictionary + for name in cameras: + obs_dict[f"observation.images.{name}"] = images[name] - context.update_with_observations(observation) + # Update context with observations + context.update_with_observations(obs_dict) events = context.get_events() if events["exit_early"]: break - # Release the webcam and cleanup - cap.release() - cv2.destroyAllWindows() + # Cleanup + for cap in cameras.values(): + cap.release() + cv2.destroyAllWindows() \ No newline at end of file From f7cdd1d81b036e47a7b6acd20fb91362d2882f86 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Sun, 22 Dec 2024 14:55:33 -0500 Subject: [PATCH 10/40] Test testing with multiple cameras --- lerobot/common/robot_devices/control_context.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 0e7f1bfe0..2b05bcaf9 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -212,7 +212,8 @@ def read_image_from_camera(cap): # Initialize cameras with proper naming convention cameras = { "main": cv2.VideoCapture(0), - "top": cv2.VideoCapture(4) + "top": cv2.VideoCapture(4), + "web": cv2.VideoCapture(8) } # Check if cameras are opened correctly From 5b2145179dd0dc28191148bd6f900ee350652832 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Sun, 22 Dec 2024 15:10:07 -0500 Subject: [PATCH 11/40] display camera labels --- .../common/robot_devices/control_context.py | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 2b05bcaf9..312c4ec26 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -127,7 +127,7 @@ def handle_events(self): return self.events - def render_camera_frames(self, observation: Dict[str, np.ndarray]): + def render_scene_from_observations(self, observation: Dict[str, np.ndarray]): """Update display with new images from observation dict""" image_keys = [key for key in observation if "image" in key] images = {k: observation[k].numpy() for k in image_keys} @@ -138,9 +138,7 @@ def render_camera_frames(self, observation: Dict[str, np.ndarray]): window_width, window_height, grid_cols = self.calculate_window_size(images) if self.screen is None or self.screen.get_size() != (window_width, window_height): self.screen = pygame.display.set_mode((window_width, window_height)) - - # @TODO - label this window with the camera name - pygame.display.set_caption("Camera 0") + pygame.display.set_caption("LeRobot") self.screen.fill(self.text_bg_color) @@ -158,17 +156,19 @@ def render_camera_frames(self, observation: Dict[str, np.ndarray]): image_surface = pygame.surfarray.make_surface(np.transpose(image, (1, 0, 2))) self.screen.blit(image_surface, (x, y + self.title_height)) - pygame.draw.rect(self.screen, self.text_bg_color, (0, 0, window_width, self.title_height)) + camera_label_text = key.split(".")[-1] + camera_label = self.font.render(camera_label_text, True, self.text_color) + self.screen.blit(camera_label, (x + 5, y + self.title_height + 5)) - self.draw_top_bar(window_width) - # TODO(jackvial): Would be nice to show count down timer for warmup phase and reset phase + pygame.draw.rect(self.screen, self.text_bg_color, (0, 0, window_width, self.title_height)) + self.draw_top_bar(window_width) pygame.display.flip() def update_with_observations(self, observation: Dict[str, np.ndarray]): if self.config.display_cameras: - self.render_camera_frames(observation) + self.render_scene_from_observations(observation) self.handle_events() return self @@ -209,21 +209,17 @@ def read_image_from_camera(cap): context = ControlContext(config) context.update_current_episode(199) - # Initialize cameras with proper naming convention cameras = { "main": cv2.VideoCapture(0), "top": cv2.VideoCapture(4), "web": cv2.VideoCapture(8) } - # Check if cameras are opened correctly for name, cap in cameras.items(): if not cap.isOpened(): raise Exception(f"Error: Could not open {name} camera") - # Main loop while True: - # Capture images from cameras images = {} camera_logs = {} for name, cap in cameras.items(): @@ -234,12 +230,10 @@ def read_image_from_camera(cap): # Create state tensor (simulating follower positions) state = torch.tensor([10.0195, 128.9355, 173.0566, -13.2715, -7.2070, 34.4531]) - # Construct observation dictionary with proper naming obs_dict = { "observation.state": state } - # Add camera images to observation dictionary for name in cameras: obs_dict[f"observation.images.{name}"] = images[name] @@ -250,7 +244,6 @@ def read_image_from_camera(cap): if events["exit_early"]: break - # Cleanup for cap in cameras.values(): cap.release() cv2.destroyAllWindows() \ No newline at end of file From 84e2cd7774c22423f8918981d4aae6363d3ab206 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Sun, 22 Dec 2024 20:04:19 -0500 Subject: [PATCH 12/40] Added ability to stream video to browser --- .../common/robot_devices/control_context.py | 61 +++++++- lerobot/common/robot_devices/control_utils.py | 2 +- video_server.py | 148 ++++++++++++++++++ 3 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 video_server.py diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 312c4ec26..509c4f893 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -2,6 +2,9 @@ import numpy as np from typing import Dict, Optional from dataclasses import dataclass +import multiprocessing as mp +from multiprocessing import shared_memory +import json # Create an enum for ControlPhase @@ -20,8 +23,7 @@ class ControlContextConfig: debug_mode: bool = False control_phase: ControlPhase = ControlPhase.TELEOPERATE num_episodes: int = 0 - # TODO(jackvial): Add robot on this class so we can call robot.disconnect() in cleanup - + use_shared_memory: bool = True class ControlContext: def __init__(self, config: Optional[ControlContextConfig] = None): @@ -44,6 +46,11 @@ def __init__(self, config: Optional[ControlContextConfig] = None): if config.assign_rewards: self.events["next_reward"] = 0 + # Shared memory setup + self.shared_mem = {} + if self.config.use_shared_memory: + self.setup_shared_memory() + self.pressed_keys = [] self.font = pygame.font.SysFont('courier', 24) # Courier is a monospace font self.small_font = pygame.font.SysFont('courier', 18) # Smaller font for controls list @@ -126,6 +133,52 @@ def handle_events(self): self.pressed_keys.remove(key_name) return self.events + + def setup_shared_memory(self): + # Create shared memory blocks for each camera + # We'll store the shape information in a separate shared memory block + self.shared_mem['metadata'] = shared_memory.SharedMemory( + name='camera_metadata', + create=True, + size=1024 # Enough space for JSON metadata + ) + + def cleanup_shared_memory(self): + if hasattr(self, 'shared_mem'): + for name, shm in self.shared_mem.items(): + shm.close() + shm.unlink() + + def update_shared_memory(self, images: Dict[str, np.ndarray]): + if not self.config.use_shared_memory: + return + + metadata = {} + for name, image in images.items(): + if name not in self.shared_mem: + # Create new shared memory block for this camera + shm_name = f'camera_{name}' + shm = shared_memory.SharedMemory( + name=shm_name, + create=True, + size=image.nbytes + ) + self.shared_mem[name] = shm + + # Copy image data to shared memory + np.ndarray(image.shape, dtype=image.dtype, + buffer=self.shared_mem[name].buf)[:] = image + + # Update metadata + metadata[name] = { + 'shape': image.shape, + 'dtype': str(image.dtype), + 'shm_name': f'camera_{name}' + } + + # Store metadata in shared memory + metadata_bytes = json.dumps(metadata).encode() + self.shared_mem['metadata'].buf[:len(metadata_bytes)] = metadata_bytes def render_scene_from_observations(self, observation: Dict[str, np.ndarray]): """Update display with new images from observation dict""" @@ -133,6 +186,10 @@ def render_scene_from_observations(self, observation: Dict[str, np.ndarray]): images = {k: observation[k].numpy() for k in image_keys} if not images: return + + # Update shared memory if enabled + if self.config.use_shared_memory: + self.update_shared_memory(images) # Initialize or resize window if needed window_width, window_height, grid_cols = self.calculate_window_size(images) diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index 288303385..cc6cdc222 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -284,7 +284,7 @@ def control_loop( busy_wait(1 / fps - dt_s) dt_s = time.perf_counter() - start_loop_t - log_control_info(robot, dt_s, fps=fps) + # log_control_info(robot, dt_s, fps=fps) timestamp = time.perf_counter() - start_episode_t if events["exit_early"]: diff --git a/video_server.py b/video_server.py new file mode 100644 index 000000000..b6abc082f --- /dev/null +++ b/video_server.py @@ -0,0 +1,148 @@ +from fastapi import FastAPI, WebSocket +from fastapi.responses import HTMLResponse +import uvicorn +from multiprocessing import shared_memory +import numpy as np +import json +import asyncio +import cv2 +import base64 +from starlette.websockets import WebSocketDisconnect + +app = FastAPI() + +html = """ + + + + Camera Streams + + + +
+
+ + + +""" + +@app.get("/") +async def get(): + return HTMLResponse(html) + +async def read_shared_memory(): + try: + # Access metadata shared memory + shm_metadata = shared_memory.SharedMemory(name='camera_metadata') + metadata_str = shm_metadata.buf.tobytes().decode().split('\x00')[0] + metadata = json.loads(metadata_str) + + frames = {} + for name, info in metadata.items(): + try: + # Access camera frame shared memory + shm = shared_memory.SharedMemory(name=info['shm_name']) + + # Reconstruct numpy array from shared memory + shape = tuple(info['shape']) + dtype = np.dtype(info['dtype']) + frame = np.ndarray(shape, dtype=dtype, buffer=shm.buf) + + # Convert to JPEG + success, buffer = cv2.imencode('.jpg', cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) + if success: + frames[name] = base64.b64encode(buffer).decode('utf-8') + + shm.close() # Close the shared memory access + except FileNotFoundError: + print(f"Shared memory for camera {name} not found. Skipping.") + continue + + shm_metadata.close() # Close metadata shared memory access + return frames + + except (FileNotFoundError, ValueError, json.JSONDecodeError) as e: + print(f"Error reading shared memory: {e}") + return {} + +@app.websocket("/ws") +async def websocket_endpoint(websocket: WebSocket): + await websocket.accept() + try: + while True: + frames = await read_shared_memory() + if frames: + await websocket.send_json(frames) + await asyncio.sleep(0.033) # ~30 FPS + except WebSocketDisconnect: + print("WebSocket disconnected") + except Exception as e: + print(f"WebSocket error: {e}") + finally: + if websocket.client_state == "CONNECTED": + await websocket.close() + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--host', default='0.0.0.0', help='Host IP address') + parser.add_argument('--port', type=int, default=8000, help='Port number') + args = parser.parse_args() + + print(f"Starting server at {args.host}:{args.port}") + uvicorn.run(app, host=args.host, port=args.port) From b0bc2949affa6e20567281a0fc084006b8452aab Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Sun, 22 Dec 2024 20:29:52 -0500 Subject: [PATCH 13/40] Use zero mq for inter process communication --- .../common/robot_devices/control_context.py | 164 ++++++++---------- video_server.py | 108 ++++++------ 2 files changed, 128 insertions(+), 144 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 509c4f893..f01239085 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -2,28 +2,25 @@ import numpy as np from typing import Dict, Optional from dataclasses import dataclass -import multiprocessing as mp -from multiprocessing import shared_memory import json +import cv2 +import base64 +import zmq - -# Create an enum for ControlPhase class ControlPhase: TELEOPERATE = "Teleoperate" WARMUP = "Warmup" RECORD = "Record" RESET = "Reset" - @dataclass class ControlContextConfig: display_cameras: bool = False play_sounds: bool = False assign_rewards: bool = False debug_mode: bool = False - control_phase: ControlPhase = ControlPhase.TELEOPERATE + control_phase: str = ControlPhase.TELEOPERATE num_episodes: int = 0 - use_shared_memory: bool = True class ControlContext: def __init__(self, config: Optional[ControlContextConfig] = None): @@ -43,22 +40,17 @@ def __init__(self, config: Optional[ControlContextConfig] = None): "next_reward": 0, } - if config.assign_rewards: + if self.config.assign_rewards: self.events["next_reward"] = 0 - # Shared memory setup - self.shared_mem = {} - if self.config.use_shared_memory: - self.setup_shared_memory() - self.pressed_keys = [] - self.font = pygame.font.SysFont('courier', 24) # Courier is a monospace font - self.small_font = pygame.font.SysFont('courier', 18) # Smaller font for controls list + self.font = pygame.font.SysFont('courier', 24) + self.small_font = pygame.font.SysFont('courier', 18) self.current_episode_index = 0 # Color theme - self.text_bg_color = (0, 0, 0) # Black - self.text_color = (0, 255, 0) # Green + self.text_bg_color = (0, 0, 0) + self.text_color = (0, 255, 0) # Define the control instructions self.controls = [ @@ -68,6 +60,15 @@ def __init__(self, config: Optional[ControlContextConfig] = None): ("Space", "Toggle Reward"), ] + # ------------------------------- + # ZeroMQ Setup (Publisher) + # ------------------------------- + self.zmq_context = zmq.Context() + self.publisher_socket = self.zmq_context.socket(zmq.PUB) + # Bind to a TCP port. Adjust IP/port as needed. + self.publisher_socket.bind("tcp://127.0.0.1:5555") + # ------------------------------- + def calculate_window_size(self, images: Dict[str, np.ndarray]): """Calculate required window size based on images""" max_width = 0 @@ -82,7 +83,6 @@ def calculate_window_size(self, images: Dict[str, np.ndarray]): max_width = max(max_width, image.shape[1]) max_height = max(max_height, image.shape[0]) - # Adjust total width and height calculations to remove extra padding total_width = max_width * grid_cols total_height = max_height * grid_rows + self.title_height @@ -133,99 +133,77 @@ def handle_events(self): self.pressed_keys.remove(key_name) return self.events - - def setup_shared_memory(self): - # Create shared memory blocks for each camera - # We'll store the shape information in a separate shared memory block - self.shared_mem['metadata'] = shared_memory.SharedMemory( - name='camera_metadata', - create=True, - size=1024 # Enough space for JSON metadata - ) - - def cleanup_shared_memory(self): - if hasattr(self, 'shared_mem'): - for name, shm in self.shared_mem.items(): - shm.close() - shm.unlink() - - def update_shared_memory(self, images: Dict[str, np.ndarray]): - if not self.config.use_shared_memory: - return - metadata = {} + def publish_frames(self, images: Dict[str, np.ndarray]): + """ + Encode each image as JPEG -> base64 -> JSON, then send via ZeroMQ PUB socket. + """ + frame_data = {} for name, image in images.items(): - if name not in self.shared_mem: - # Create new shared memory block for this camera - shm_name = f'camera_{name}' - shm = shared_memory.SharedMemory( - name=shm_name, - create=True, - size=image.nbytes - ) - self.shared_mem[name] = shm - - # Copy image data to shared memory - np.ndarray(image.shape, dtype=image.dtype, - buffer=self.shared_mem[name].buf)[:] = image - - # Update metadata - metadata[name] = { - 'shape': image.shape, - 'dtype': str(image.dtype), - 'shm_name': f'camera_{name}' + # Convert from RGB to BGR for JPEG encoding if needed + bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + success, buffer = cv2.imencode(".jpg", bgr_image) + if success: + # Convert to base64 + b64_jpeg = base64.b64encode(buffer).decode("utf-8") + frame_data[name] = b64_jpeg + + if frame_data: + message = { + "type": "frame_update", + "frames": frame_data } - - # Store metadata in shared memory - metadata_bytes = json.dumps(metadata).encode() - self.shared_mem['metadata'].buf[:len(metadata_bytes)] = metadata_bytes + # Send JSON over ZeroMQ + self.publisher_socket.send_json(message) def render_scene_from_observations(self, observation: Dict[str, np.ndarray]): - """Update display with new images from observation dict""" + """Render in a Pygame window AND publish frames via ZeroMQ.""" image_keys = [key for key in observation if "image" in key] images = {k: observation[k].numpy() for k in image_keys} if not images: return - - # Update shared memory if enabled - if self.config.use_shared_memory: - self.update_shared_memory(images) - - # Initialize or resize window if needed - window_width, window_height, grid_cols = self.calculate_window_size(images) - if self.screen is None or self.screen.get_size() != (window_width, window_height): - self.screen = pygame.display.set_mode((window_width, window_height)) - pygame.display.set_caption("LeRobot") - self.screen.fill(self.text_bg_color) + # ------------------------------- + # Publish frames via ZeroMQ + # ------------------------------- + self.publish_frames(images) - # Update image positions and draw images - for idx, (key, image) in enumerate(images.items()): - # Calculate grid position - col = idx % grid_cols - row = idx // grid_cols - - # Calculate pixel position - adjust for controls panel - x = col * (image.shape[1] + self.padding) - y = row * (image.shape[0] + self.title_height + self.padding) + if self.config.display_cameras: + window_width, window_height, grid_cols = self.calculate_window_size(images) + if self.screen is None or self.screen.get_size() != (window_width, window_height): + self.screen = pygame.display.set_mode((window_width, window_height)) + pygame.display.set_caption("LeRobot") - # Convert numpy array to pygame surface - image_surface = pygame.surfarray.make_surface(np.transpose(image, (1, 0, 2))) - self.screen.blit(image_surface, (x, y + self.title_height)) + self.screen.fill(self.text_bg_color) - camera_label_text = key.split(".")[-1] - camera_label = self.font.render(camera_label_text, True, self.text_color) - self.screen.blit(camera_label, (x + 5, y + self.title_height + 5)) + # Update image positions and draw images + for idx, (key, image) in enumerate(images.items()): + col = idx % grid_cols + row = idx // grid_cols + x = col * (image.shape[1] + self.padding) + y = row * (image.shape[0] + self.title_height + self.padding) + image_surface = pygame.surfarray.make_surface(np.transpose(image, (1, 0, 2))) + self.screen.blit(image_surface, (x, y + self.title_height)) - pygame.draw.rect(self.screen, self.text_bg_color, (0, 0, window_width, self.title_height)) + camera_label_text = key.split(".")[-1] + camera_label = self.font.render(camera_label_text, True, self.text_color) + self.screen.blit(camera_label, (x + 5, y + self.title_height + 5)) - self.draw_top_bar(window_width) - pygame.display.flip() + pygame.draw.rect(self.screen, self.text_bg_color, (0, 0, window_width, self.title_height)) + self.draw_top_bar(window_width) + pygame.display.flip() def update_with_observations(self, observation: Dict[str, np.ndarray]): if self.config.display_cameras: self.render_scene_from_observations(observation) + else: + # Even if not displaying, still publish frames via ZeroMQ + image_keys = [key for key in observation if "image" in key] + images = {k: observation[k].numpy() for k in image_keys} + if images: + self.publish_frames(images) + self.handle_events() return self @@ -234,12 +212,14 @@ def update_current_episode(self, episode_index): return self def get_events(self): - """Return current events state""" return self.events.copy() def cleanup(self, robot): robot.disconnect() pygame.quit() + # Clean up ZMQ socket + self.publisher_socket.close() + self.zmq_context.term() if __name__ == "__main__": diff --git a/video_server.py b/video_server.py index b6abc082f..940ab9201 100644 --- a/video_server.py +++ b/video_server.py @@ -1,13 +1,11 @@ from fastapi import FastAPI, WebSocket from fastapi.responses import HTMLResponse import uvicorn -from multiprocessing import shared_memory -import numpy as np -import json import asyncio -import cv2 import base64 from starlette.websockets import WebSocketDisconnect +import zmq +import zmq.asyncio app = FastAPI() @@ -32,46 +30,45 @@ -
-
+
+ + + + +
+
+
+ Mode: + - +
+
+ Episode: + - +
+
+ Reward: + - +
+
+
+
+ + +
+ +
+ +
+ + +
+

Events

+
+
+
+ + +
+ +
+

Robot State

+
+
+ + +
+

Configuration

+
+
+ + +
+

Controls

+
+
+ + Exit Early +
+
+ + Rerecord +
+
+ Esc + Stop +
+
+ Space + Toggle Reward +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/video_server.py b/video_server.py deleted file mode 100644 index 940ab9201..000000000 --- a/video_server.py +++ /dev/null @@ -1,152 +0,0 @@ -from fastapi import FastAPI, WebSocket -from fastapi.responses import HTMLResponse -import uvicorn -import asyncio -import base64 -from starlette.websockets import WebSocketDisconnect -import zmq -import zmq.asyncio - -app = FastAPI() - -html = """ - - - - Camera Streams - - - -
- - - -""" - -@app.get("/") -async def get(): - return HTMLResponse(html) - -# Global dictionary to hold the latest frames from ZeroMQ -latest_frames = {} - -# Set up a ZMQ context and SUB socket in asyncio mode -zmq_context = zmq.asyncio.Context() -subscriber_socket = zmq_context.socket(zmq.SUB) - -# Connect to the producer's PUB socket -# Make sure this matches the IP/port from control_context.py -subscriber_socket.connect("tcp://127.0.0.1:5555") -subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "") # subscribe to all messages - - -async def zmq_consumer(): - """ - Continuously receive messages from ZeroMQ and update the global `latest_frames`. - """ - while True: - try: - message = await subscriber_socket.recv_json() - # message should look like: {"type": "frame_update", "frames": {cameraName: base64_jpeg, ...}} - if message.get("type") == "frame_update": - frames = message.get("frames", {}) - # Update the global dictionary - for camera_name, b64_jpeg in frames.items(): - latest_frames[camera_name] = b64_jpeg - except Exception as e: - print(f"ZMQ consumer error: {e}") - await asyncio.sleep(1) - # Small pause to avoid busy-loop - await asyncio.sleep(0.001) - - -@app.on_event("startup") -async def startup_event(): - """ - When FastAPI starts, launch the background ZMQ consumer task. - """ - asyncio.create_task(zmq_consumer()) - - -@app.websocket("/ws") -async def websocket_endpoint(websocket: WebSocket): - await websocket.accept() - try: - while True: - # Send the latest frames to the websocket client - if latest_frames: - await websocket.send_json(latest_frames) - await asyncio.sleep(0.033) # ~30 FPS - except WebSocketDisconnect: - print("WebSocket disconnected") - except Exception as e: - print(f"WebSocket error: {e}") - finally: - if websocket.client_state == "CONNECTED": - await websocket.close() - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument('--host', default='0.0.0.0', help='Host IP address') - parser.add_argument('--port', type=int, default=8000, help='Port number') - args = parser.parse_args() - - print(f"Starting server at {args.host}:{args.port}") - uvicorn.run(app, host=args.host, port=args.port) From db37696ed78fc653e290c00edf8bf1edd6cc3465 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Mon, 23 Dec 2024 14:11:42 -0500 Subject: [PATCH 15/40] Add ability to handle keyboard events from browser --- .../common/robot_devices/control_context.py | 51 +++++++++++++++++-- lerobot/scripts/stream_video_over_http.py | 20 ++++++++ lerobot/templates/stream_video_template.html | 4 ++ 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 9f8f5ccf8..3faaaeda3 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -64,14 +64,16 @@ def __init__(self, config: Optional[ControlContextConfig] = None): ("Space", "Toggle Reward"), ] - # ------------------------------- - # ZeroMQ Setup (Publisher) - # ------------------------------- self.zmq_context = zmq.Context() + self.publisher_socket = self.zmq_context.socket(zmq.PUB) - # Bind to a TCP port. Adjust IP/port as needed. self.publisher_socket.bind("tcp://127.0.0.1:5555") - # ------------------------------- + + self.command_sub_socket = self.zmq_context.socket(zmq.SUB) + self.command_sub_socket.connect("tcp://127.0.0.1:5556") + + # Subscribe to all messages + self.command_sub_socket.setsockopt_string(zmq.SUBSCRIBE, "") def calculate_window_size(self, images: Dict[str, np.ndarray]): """Calculate required window size based on images""" @@ -236,7 +238,46 @@ def update_with_observations(self, observation: Dict[str, np.ndarray]): self.publish_observations(observation) self.handle_events() + self.check_for_keyboard_events_from_browser() return self + + def check_for_keyboard_events_from_browser(self): + try: + # If there's data, receive it + msg = self.command_sub_socket.recv_json() + print("msg: ", msg) + + if msg.get("type") == "command" and msg.get("command") == "keydown": + self.handle_browser_keyboard_event(msg.get("key_pressed")) + + except zmq.Again: + pass + except Exception as e: + print(f"Error while polling for commands: {e}") + + def handle_browser_keyboard_event(self, key_pressed: str): + """ + Translate a key pressed in the web UI to the same event logic used in Pygame. + """ + + print("key_pressed: ", key_pressed) + + if key_pressed == "ArrowRight": + print("Received 'ArrowRight' from browser -> Exit Early") + self.events["exit_early"] = True + elif key_pressed == "ArrowLeft": + print("Received 'ArrowLeft' from browser -> Rerecord Episode") + self.events["rerecord_episode"] = True + self.events["exit_early"] = True + elif key_pressed == "Escape": + print("Received 'Escape' from browser -> Stop") + self.events["stop_recording"] = True + self.events["exit_early"] = True + elif key_pressed == "Space": + # Toggle "next_reward" + self.events["next_reward"] = 1 if self.events["next_reward"] == 0 else 0 + print(f"Space toggled reward to {self.events['next_reward']}") + def update_current_episode(self, episode_index): self.current_episode_index = episode_index diff --git a/lerobot/scripts/stream_video_over_http.py b/lerobot/scripts/stream_video_over_http.py index c83f9e93d..274bb7121 100644 --- a/lerobot/scripts/stream_video_over_http.py +++ b/lerobot/scripts/stream_video_over_http.py @@ -25,6 +25,9 @@ subscriber_socket.connect("tcp://127.0.0.1:5555") subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "") +command_publisher = zmq_context.socket(zmq.PUB) +command_publisher.bind("tcp://127.0.0.1:5556") + def zmq_consumer(): """Thread function to consume ZMQ messages and emit to connected clients.""" while True: @@ -64,6 +67,23 @@ def zmq_consumer(): time.sleep(0.001) # Small sleep to prevent busy-waiting + +@socketio.on("keydown_event") +def handle_keydown_event(data): + """ + When the browser sends a keydown_event, we publish it over ZeroMQ. + """ + key_pressed = data.get("key") + print(f"Received key event from browser: {key_pressed}") + + # Publish over ZeroMQ + message = { + "type": "command", + "command": "keydown", + "key_pressed": key_pressed + } + command_publisher.send_json(message) + @app.route("/") def index(): """Render the main page.""" diff --git a/lerobot/templates/stream_video_template.html b/lerobot/templates/stream_video_template.html index 26d8d538a..281660b0c 100644 --- a/lerobot/templates/stream_video_template.html +++ b/lerobot/templates/stream_video_template.html @@ -233,15 +233,19 @@

Controls

switch(event.key) { case 'ArrowRight': console.log('Right arrow pressed - Exit Early'); + socket.emit('keydown_event', { key: 'ArrowRight' }); break; case 'ArrowLeft': console.log('Left arrow pressed - Rerecord'); + socket.emit('keydown_event', { key: 'ArrowLeft' }); break; case 'Escape': console.log('Escape pressed - Stop'); + socket.emit('keydown_event', { key: 'Escape' }); break; case ' ': // Space key console.log('Space pressed - Toggle Reward'); + socket.emit('keydown_event', { key: 'Space' }); break; } }); From f969ebbf9929300793bfd9e65169360623b92680 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Mon, 23 Dec 2024 14:29:06 -0500 Subject: [PATCH 16/40] Handle events from browser --- .../common/robot_devices/control_context.py | 74 +++++++++---------- lerobot/scripts/control_robot.py | 1 - lerobot/scripts/stream_video_over_http.py | 9 ++- 3 files changed, 39 insertions(+), 45 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 3faaaeda3..33e40a8c4 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -139,6 +139,38 @@ def handle_events(self): self.pressed_keys.remove(key_name) return self.events + + def handle_browser_events(self, key_pressed: str): + """ + Translate a key pressed in the web UI to the same event logic used in Pygame. + """ + + try: + msg = self.command_sub_socket.recv_json() + + if msg.get("type") == "command" and msg.get("command") == "keydown": + key_pressed = msg.get("key_pressed") + + if key_pressed == "ArrowRight": + print("Received 'ArrowRight' from browser -> Exit Early") + self.events["exit_early"] = True + elif key_pressed == "ArrowLeft": + print("Received 'ArrowLeft' from browser -> Rerecord Episode") + self.events["rerecord_episode"] = True + self.events["exit_early"] = True + elif key_pressed == "Escape": + print("Received 'Escape' from browser -> Stop") + self.events["stop_recording"] = True + self.events["exit_early"] = True + elif key_pressed == "Space": + # Toggle "next_reward" + self.events["next_reward"] = 1 if self.events["next_reward"] == 0 else 0 + print(f"Space toggled reward to {self.events['next_reward']}") + + except zmq.Again: + pass + except Exception as e: + print(f"Error while polling for commands: {e}") def publish_observations(self, observation: Dict[str, np.ndarray]): """ @@ -238,46 +270,8 @@ def update_with_observations(self, observation: Dict[str, np.ndarray]): self.publish_observations(observation) self.handle_events() - self.check_for_keyboard_events_from_browser() + self.handle_browser_events() return self - - def check_for_keyboard_events_from_browser(self): - try: - # If there's data, receive it - msg = self.command_sub_socket.recv_json() - print("msg: ", msg) - - if msg.get("type") == "command" and msg.get("command") == "keydown": - self.handle_browser_keyboard_event(msg.get("key_pressed")) - - except zmq.Again: - pass - except Exception as e: - print(f"Error while polling for commands: {e}") - - def handle_browser_keyboard_event(self, key_pressed: str): - """ - Translate a key pressed in the web UI to the same event logic used in Pygame. - """ - - print("key_pressed: ", key_pressed) - - if key_pressed == "ArrowRight": - print("Received 'ArrowRight' from browser -> Exit Early") - self.events["exit_early"] = True - elif key_pressed == "ArrowLeft": - print("Received 'ArrowLeft' from browser -> Rerecord Episode") - self.events["rerecord_episode"] = True - self.events["exit_early"] = True - elif key_pressed == "Escape": - print("Received 'Escape' from browser -> Stop") - self.events["stop_recording"] = True - self.events["exit_early"] = True - elif key_pressed == "Space": - # Toggle "next_reward" - self.events["next_reward"] = 1 if self.events["next_reward"] == 0 else 0 - print(f"Space toggled reward to {self.events['next_reward']}") - def update_current_episode(self, episode_index): self.current_episode_index = episode_index @@ -309,7 +303,7 @@ def read_image_from_camera(cap): return torch.tensor(frame_rgb).float() config = ControlContextConfig( - display_cameras=True, + display_cameras=False, assign_rewards=True, debug_mode=True, control_phase=ControlPhase.RECORD, diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 983cf6d76..a85a4f9c4 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -104,7 +104,6 @@ from lerobot.common.robot_devices.control_utils import ( control_loop, has_method, - init_keyboard_listener, init_policy, log_control_info, record_episode, diff --git a/lerobot/scripts/stream_video_over_http.py b/lerobot/scripts/stream_video_over_http.py index 274bb7121..b09c26b5c 100644 --- a/lerobot/scripts/stream_video_over_http.py +++ b/lerobot/scripts/stream_video_over_http.py @@ -19,17 +19,19 @@ # Global dictionary to hold the latest observation data from ZeroMQ latest_observation = {} -# Set up ZMQ context and SUB socket zmq_context = zmq.Context() + +# For recieving observation (camera frames, state, events) from ControlContext +# so we can send them to the browser subscriber_socket = zmq_context.socket(zmq.SUB) subscriber_socket.connect("tcp://127.0.0.1:5555") subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "") +# For sending keydown events from the browser to ControlContext command_publisher = zmq_context.socket(zmq.PUB) command_publisher.bind("tcp://127.0.0.1:5556") def zmq_consumer(): - """Thread function to consume ZMQ messages and emit to connected clients.""" while True: try: message = subscriber_socket.recv_json() @@ -58,7 +60,7 @@ def zmq_consumer(): # Update latest observation latest_observation.update(processed_data) - # Emit the full observation to all connected clients + # Emit the observation data to the browser socketio.emit("observation_update", processed_data) except Exception as e: @@ -74,7 +76,6 @@ def handle_keydown_event(data): When the browser sends a keydown_event, we publish it over ZeroMQ. """ key_pressed = data.get("key") - print(f"Received key event from browser: {key_pressed}") # Publish over ZeroMQ message = { From 040eff5d0073ed7ba72d9efc630c12b0b217dae7 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Mon, 23 Dec 2024 14:52:13 -0500 Subject: [PATCH 17/40] Fix browser event handler --- .../common/robot_devices/control_context.py | 49 ++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 33e40a8c4..cc64a3883 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -140,34 +140,39 @@ def handle_events(self): return self.events - def handle_browser_events(self, key_pressed: str): + def handle_browser_events(self): """ Translate a key pressed in the web UI to the same event logic used in Pygame. """ - try: - msg = self.command_sub_socket.recv_json() - - if msg.get("type") == "command" and msg.get("command") == "keydown": - key_pressed = msg.get("key_pressed") - - if key_pressed == "ArrowRight": - print("Received 'ArrowRight' from browser -> Exit Early") - self.events["exit_early"] = True - elif key_pressed == "ArrowLeft": - print("Received 'ArrowLeft' from browser -> Rerecord Episode") - self.events["rerecord_episode"] = True - self.events["exit_early"] = True - elif key_pressed == "Escape": - print("Received 'Escape' from browser -> Stop") - self.events["stop_recording"] = True - self.events["exit_early"] = True - elif key_pressed == "Space": - # Toggle "next_reward" - self.events["next_reward"] = 1 if self.events["next_reward"] == 0 else 0 - print(f"Space toggled reward to {self.events['next_reward']}") + # Set a non-blocking polls + if self.command_sub_socket.poll(timeout=0): # Check if there's a message + msg = self.command_sub_socket.recv_json() + + if msg.get("type") == "command" and msg.get("command") == "keydown": + key_pressed = msg.get("key_pressed") + + if key_pressed == "ArrowRight": + print("Received 'ArrowRight' from browser -> Exit Early") + self.events["exit_early"] = True + elif key_pressed == "ArrowLeft": + print("Received 'ArrowLeft' from browser -> Rerecord Episode") + self.events["rerecord_episode"] = True + self.events["exit_early"] = True + elif key_pressed == "Escape": + print("Received 'Escape' from browser -> Stop") + self.events["stop_recording"] = True + self.events["exit_early"] = True + elif key_pressed == "Space": + # Toggle "next_reward" + self.events["next_reward"] = 1 if self.events["next_reward"] == 0 else 0 + print(f"Space toggled reward to {self.events['next_reward']}") + else: + # No message available, continue + pass except zmq.Again: + # No message received within timeout pass except Exception as e: print(f"Error while polling for commands: {e}") From 6bf4add960024a68662c2831606c3ce4733b6d60 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Mon, 23 Dec 2024 15:15:14 -0500 Subject: [PATCH 18/40] Stream logs to browser --- .../common/robot_devices/control_context.py | 41 +++++++++++++++---- lerobot/common/robot_devices/control_utils.py | 11 +---- lerobot/scripts/control_robot.py | 2 + lerobot/scripts/stream_video_over_http.py | 3 +- lerobot/templates/stream_video_template.html | 24 +++++++++++ 5 files changed, 62 insertions(+), 19 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index cc64a3883..016b855e6 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -7,6 +7,11 @@ import zmq import torch import time +from lerobot.common.robot_devices.robots.utils import Robot +from lerobot.common.robot_devices.utils import busy_wait +from lerobot.common.robot_devices.control_utils import ( + log_control_info +) class ControlPhase: @@ -24,11 +29,16 @@ class ControlContextConfig: debug_mode: bool = False control_phase: str = ControlPhase.TELEOPERATE num_episodes: int = 0 + robot: Robot class ControlContext: - def __init__(self, config: Optional[ControlContextConfig] = None): - self.config = config or ControlContextConfig() + def __init__(self, config: ControlContextConfig): + self.config = config + + if not self.config.robot: + raise ValueError("Robot object must be provided in ControlContextConfig") + pygame.init() if not self.config.display_cameras: pygame.display.set_mode((1, 1), pygame.HIDDEN) @@ -177,7 +187,7 @@ def handle_browser_events(self): except Exception as e: print(f"Error while polling for commands: {e}") - def publish_observations(self, observation: Dict[str, np.ndarray]): + def publish_observations(self, observation: Dict[str, np.ndarray], log_items: list): """ Encode and publish the full observation object via ZeroMQ PUB socket. Includes observation data, events, and config information. @@ -231,7 +241,8 @@ def publish_observations(self, observation: Dict[str, np.ndarray]): "timestamp": time.time(), "data": processed_data, "events": events_data, - "config": config_data + "config": config_data, + "log_items": log_items } # Send JSON over ZeroMQ @@ -270,9 +281,10 @@ def render_scene_from_observations(self, observation: Dict[str, np.ndarray]): self.draw_top_bar(window_width) pygame.display.flip() - def update_with_observations(self, observation: Dict[str, np.ndarray]): + def update_with_observations(self, observation: Dict[str, np.ndarray], start_loop_t: int): + log_items = self.log_control_info(start_loop_t) self.render_scene_from_observations(observation) - self.publish_observations(observation) + self.publish_observations(observation, log_items) self.handle_events() self.handle_browser_events() @@ -284,11 +296,22 @@ def update_current_episode(self, episode_index): def get_events(self): return self.events.copy() + + def log_control_info(self, start_loop_t, fps=None): + log_items = [] + if fps is not None: + dt_s = time.perf_counter() - start_loop_t + busy_wait(1 / fps - dt_s) - def cleanup(self, robot): - robot.disconnect() + dt_s = time.perf_counter() - start_loop_t + log_items = log_control_info(self.config.robot, dt_s, fps=fps) + + return log_items + + def cleanup(self): + self.config.robot.disconnect() pygame.quit() - # Clean up ZMQ socket + self.publisher_socket.close() self.zmq_context.term() diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index cc6cdc222..4a7e3248a 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -25,7 +25,6 @@ from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, set_global_seed from lerobot.scripts.eval import get_pretrained_policy_path -# TODO(jackvial): Move this to ControlContext and make configurable def log_control_info(robot: Robot, dt_s, episode_index=None, frame_index=None, fps=None): log_items = [] if episode_index is not None: @@ -68,6 +67,7 @@ def log_dt(shortname, dt_val_s): info_str = " ".join(log_items) logging.info(info_str) + return log_items @cache @@ -277,14 +277,7 @@ def control_loop( frame = {**observation, **action} dataset.add_frame(frame) - control_context.update_with_observations(observation) - - if fps is not None: - dt_s = time.perf_counter() - start_loop_t - busy_wait(1 / fps - dt_s) - - dt_s = time.perf_counter() - start_loop_t - # log_control_info(robot, dt_s, fps=fps) + control_context.update_with_observations(observation, start_loop_t) timestamp = time.perf_counter() - start_episode_t if events["exit_early"]: diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index a85a4f9c4..719eb300a 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -183,6 +183,7 @@ def teleoperate( config=ControlContextConfig( display_cameras=display_cameras, control_phase=ControlPhase.TELEOPERATE, + robot=robot, ) ) control_loop( @@ -280,6 +281,7 @@ def record( control_context = ControlContext( config=ControlContextConfig( + robot=robot, control_phase=ControlPhase.WARMUP, display_cameras=display_cameras, play_sounds=play_sounds, diff --git a/lerobot/scripts/stream_video_over_http.py b/lerobot/scripts/stream_video_over_http.py index b09c26b5c..f411a2e6e 100644 --- a/lerobot/scripts/stream_video_over_http.py +++ b/lerobot/scripts/stream_video_over_http.py @@ -41,7 +41,8 @@ def zmq_consumer(): "images": {}, "state": {}, "events": message.get("events", {}), - "config": message.get("config", {}) + "config": message.get("config", {}), + "log_items": message.get("log_items", []) } # Process observation data diff --git a/lerobot/templates/stream_video_template.html b/lerobot/templates/stream_video_template.html index 281660b0c..971bfaa77 100644 --- a/lerobot/templates/stream_video_template.html +++ b/lerobot/templates/stream_video_template.html @@ -75,6 +75,12 @@

Controls

+ + +
+

Logs

+
+
@@ -143,6 +149,19 @@

Controls

} } + // Update log display + function updateLogs(logItems) { + const logContainer = document.getElementById('log-items'); + logContainer.innerHTML = ''; + + logItems.forEach(item => { + const logElement = document.createElement('div'); + logElement.className = 'font-mono text-sm'; + logElement.textContent = item; + logContainer.appendChild(logElement); + }); + } + // Handle incoming observation updates socket.on('observation_update', function(data) { console.log('Received observation update:', data); @@ -202,6 +221,11 @@

Controls

if (data.events) { updateEvents(data.events); } + + // Update logs + if (data.log_items) { + updateLogs(data.log_items); + } }); // Connection handling From 926223f88b97021117bd044a98b5dbd0a47949b3 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Mon, 23 Dec 2024 15:28:22 -0500 Subject: [PATCH 19/40] remove debug flag. always print logs and send them to the browser --- lerobot/common/robot_devices/control_context.py | 12 +++++------- lerobot/scripts/control_robot.py | 4 ++++ lerobot/templates/stream_video_template.html | 1 - 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 016b855e6..5b018f1e1 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -26,10 +26,10 @@ class ControlContextConfig: display_cameras: bool = False play_sounds: bool = False assign_rewards: bool = False - debug_mode: bool = False control_phase: str = ControlPhase.TELEOPERATE num_episodes: int = 0 - robot: Robot + robot: Robot = None + fps: Optional[int] = None class ControlContext: @@ -119,8 +119,6 @@ def draw_top_bar(self, window_width: int): def handle_events(self): """Handle pygame events and update internal state""" for event in pygame.event.get(): - if self.config.debug_mode: - print(event) if event.type == pygame.QUIT: self.events["stop_recording"] = True self.events["exit_early"] = True @@ -230,7 +228,6 @@ def publish_observations(self, observation: Dict[str, np.ndarray], log_items: li "display_cameras": self.config.display_cameras, "play_sounds": self.config.play_sounds, "assign_rewards": self.config.assign_rewards, - "debug_mode": self.config.debug_mode, "control_phase": self.config.control_phase, "num_episodes": self.config.num_episodes, "current_episode": self.current_episode_index @@ -297,8 +294,9 @@ def update_current_episode(self, episode_index): def get_events(self): return self.events.copy() - def log_control_info(self, start_loop_t, fps=None): + def log_control_info(self, start_loop_t): log_items = [] + fps = self.config.fps if fps is not None: dt_s = time.perf_counter() - start_loop_t busy_wait(1 / fps - dt_s) @@ -333,9 +331,9 @@ def read_image_from_camera(cap): config = ControlContextConfig( display_cameras=False, assign_rewards=True, - debug_mode=True, control_phase=ControlPhase.RECORD, num_episodes=200, + fps=30 ) context = ControlContext(config) context.update_current_episode(199) diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 719eb300a..a2dd9597e 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -184,6 +184,7 @@ def teleoperate( display_cameras=display_cameras, control_phase=ControlPhase.TELEOPERATE, robot=robot, + fps=fps, ) ) control_loop( @@ -286,6 +287,7 @@ def record( display_cameras=display_cameras, play_sounds=play_sounds, assign_rewards=False, + fps=fps, ) ) @@ -298,11 +300,13 @@ def record( # initialize a new class control_context = ControlContext( config=ControlContextConfig( + robot=robot, control_phase=ControlPhase.RECORD, display_cameras=display_cameras, play_sounds=play_sounds, assign_rewards=False, num_episodes=num_episodes, + fps=fps, ) ) diff --git a/lerobot/templates/stream_video_template.html b/lerobot/templates/stream_video_template.html index 971bfaa77..d5bc354b0 100644 --- a/lerobot/templates/stream_video_template.html +++ b/lerobot/templates/stream_video_template.html @@ -164,7 +164,6 @@

Logs

// Handle incoming observation updates socket.on('observation_update', function(data) { - console.log('Received observation update:', data); // Update timestamp if (data.timestamp) { From e6fda954065226649c487bcd986a02e63ce5f601 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Tue, 24 Dec 2024 14:55:21 -0500 Subject: [PATCH 20/40] Removed display_cameras from control loop args. Rename browser ui related code --- .../common/robot_devices/control_context.py | 33 ++++++++++--------- lerobot/common/robot_devices/control_utils.py | 5 --- ...ideo_over_http.py => browser_ui_server.py} | 2 +- lerobot/scripts/control_robot.py | 11 ++----- ...am_video_template.html => browser_ui.html} | 0 5 files changed, 21 insertions(+), 30 deletions(-) rename lerobot/scripts/{stream_video_over_http.py => browser_ui_server.py} (98%) rename lerobot/templates/{stream_video_template.html => browser_ui.html} (100%) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 5b018f1e1..60abe0d79 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -9,9 +9,7 @@ import time from lerobot.common.robot_devices.robots.utils import Robot from lerobot.common.robot_devices.utils import busy_wait -from lerobot.common.robot_devices.control_utils import ( - log_control_info -) +from lerobot.common.robot_devices.control_utils import log_control_info class ControlPhase: @@ -85,6 +83,9 @@ def __init__(self, config: ControlContextConfig): # Subscribe to all messages self.command_sub_socket.setsockopt_string(zmq.SUBSCRIBE, "") + def update_config(self, config: ControlContextConfig): + self.config = config + def calculate_window_size(self, images: Dict[str, np.ndarray]): """Calculate required window size based on images""" max_width = 0 @@ -147,7 +148,7 @@ def handle_events(self): self.pressed_keys.remove(key_name) return self.events - + def handle_browser_events(self): """ Translate a key pressed in the web UI to the same event logic used in Pygame. @@ -189,13 +190,13 @@ def publish_observations(self, observation: Dict[str, np.ndarray], log_items: li """ Encode and publish the full observation object via ZeroMQ PUB socket. Includes observation data, events, and config information. - + Args: observation (Dict[str, np.ndarray]): Dictionary containing observation data, including images and state information """ processed_data = {} - + # Process observation data for key, value in observation.items(): if "image" in key: @@ -211,17 +212,17 @@ def publish_observations(self, observation: Dict[str, np.ndarray], log_items: li "type": "image", "encoding": "jpeg_base64", "data": b64_jpeg, - "shape": image.shape + "shape": image.shape, } else: tensor_data = value.detach().cpu().numpy() if torch.is_tensor(value) else value - + processed_data[key] = { "type": "tensor", "data": tensor_data.tolist(), - "shape": tensor_data.shape + "shape": tensor_data.shape, } - + # Add events and config information events_data = self.get_events() config_data = { @@ -230,18 +231,18 @@ def publish_observations(self, observation: Dict[str, np.ndarray], log_items: li "assign_rewards": self.config.assign_rewards, "control_phase": self.config.control_phase, "num_episodes": self.config.num_episodes, - "current_episode": self.current_episode_index + "current_episode": self.current_episode_index, } - + message = { "type": "observation_update", "timestamp": time.time(), "data": processed_data, "events": events_data, "config": config_data, - "log_items": log_items + "log_items": log_items, } - + # Send JSON over ZeroMQ self.publisher_socket.send_json(message) @@ -293,7 +294,7 @@ def update_current_episode(self, episode_index): def get_events(self): return self.events.copy() - + def log_control_info(self, start_loop_t): log_items = [] fps = self.config.fps @@ -333,7 +334,7 @@ def read_image_from_camera(cap): assign_rewards=True, control_phase=ControlPhase.RECORD, num_episodes=200, - fps=30 + fps=30, ) context = ControlContext(config) context.update_current_episode(199) diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index 4a7e3248a..fec237f68 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -185,14 +185,12 @@ def warmup_record( robot, enable_teleoperation, warmup_time_s, - display_cameras, fps, control_context ): control_loop( robot=robot, control_time_s=warmup_time_s, - display_cameras=display_cameras, fps=fps, teleoperate=enable_teleoperation, control_context=control_context, @@ -203,7 +201,6 @@ def record_episode( robot, dataset, episode_time_s, - display_cameras, policy, device, use_amp, @@ -213,7 +210,6 @@ def record_episode( control_loop( robot=robot, control_time_s=episode_time_s, - display_cameras=display_cameras, dataset=dataset, policy=policy, device=device, @@ -229,7 +225,6 @@ def control_loop( robot, control_time_s=None, teleoperate=False, - display_cameras=False, # TODO - remove this dataset: LeRobotDataset | None = None, policy=None, device=None, diff --git a/lerobot/scripts/stream_video_over_http.py b/lerobot/scripts/browser_ui_server.py similarity index 98% rename from lerobot/scripts/stream_video_over_http.py rename to lerobot/scripts/browser_ui_server.py index f411a2e6e..ee3a6f581 100644 --- a/lerobot/scripts/stream_video_over_http.py +++ b/lerobot/scripts/browser_ui_server.py @@ -89,7 +89,7 @@ def handle_keydown_event(data): @app.route("/") def index(): """Render the main page.""" - return render_template("stream_video_template.html") + return render_template("browser_ui.html") @socketio.on("connect") def handle_connect(): diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index a2dd9597e..fc6fc9ef8 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -192,7 +192,6 @@ def teleoperate( control_time_s=teleop_time_s, fps=fps, teleoperate=True, - display_cameras=display_cameras, control_context=control_context, ) @@ -291,18 +290,15 @@ def record( ) ) - warmup_record(robot, enable_teleoperation, warmup_time_s, display_cameras, fps, control_context) + warmup_record(robot, enable_teleoperation, warmup_time_s, fps, control_context) if has_method(robot, "teleop_safety_stop"): robot.teleop_safety_stop() - # @TODO(jackvial): Maybe add an update_config method to ControlContext so we don't - # initialize a new class - control_context = ControlContext( - config=ControlContextConfig( + control_context.update_config( + ControlContextConfig( robot=robot, control_phase=ControlPhase.RECORD, - display_cameras=display_cameras, play_sounds=play_sounds, assign_rewards=False, num_episodes=num_episodes, @@ -325,7 +321,6 @@ def record( dataset=dataset, robot=robot, episode_time_s=episode_time_s, - display_cameras=display_cameras, policy=policy, device=device, use_amp=use_amp, diff --git a/lerobot/templates/stream_video_template.html b/lerobot/templates/browser_ui.html similarity index 100% rename from lerobot/templates/stream_video_template.html rename to lerobot/templates/browser_ui.html From 3c54bf4460e416aff3aefe2ce4d945762ddd5e15 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Tue, 24 Dec 2024 14:57:57 -0500 Subject: [PATCH 21/40] Add new dependencies --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 6663edc0f..6ffdac265 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,8 @@ hello-robot-stretch-body = {version = ">=0.7.27", markers = "sys_platform == 'li pyserial = {version = ">=3.5", optional = true} jsonlines = ">=4.0.0" pygame = ">=2.6.1" +flask-socketio = ">=5.5.0" +pyzmq = ">=26.2.0" [tool.poetry.extras] From 9108649dd4b9ddda4ce7dcdf55a62636ca7bc588 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Tue, 24 Dec 2024 16:29:31 -0500 Subject: [PATCH 22/40] Fix updating control context config --- .../common/robot_devices/control_context.py | 103 +++++++++++------- lerobot/scripts/browser_ui_server.py | 38 +++++-- lerobot/scripts/control_robot.py | 5 +- lerobot/templates/browser_ui.html | 51 +++++++-- 4 files changed, 138 insertions(+), 59 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 60abe0d79..d9409fcad 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -33,37 +33,40 @@ class ControlContextConfig: class ControlContext: def __init__(self, config: ControlContextConfig): self.config = config + self._initialize_display() + self._initialize_communication() + self._initialize_state() - if not self.config.robot: - raise ValueError("Robot object must be provided in ControlContextConfig") - + def _initialize_display(self): pygame.init() if not self.config.display_cameras: pygame.display.set_mode((1, 1), pygame.HIDDEN) - + self.screen = None self.image_positions = {} self.padding = 20 self.title_height = 30 + self.font = pygame.font.SysFont("courier", 24) + self.small_font = pygame.font.SysFont("courier", 18) + + # Color theme + self.text_bg_color = (0, 0, 0) + self.text_color = (0, 255, 0) + + def _initialize_state(self): self.events = { "exit_early": False, "rerecord_episode": False, "stop_recording": False, "next_reward": 0, } - + if self.config.assign_rewards: self.events["next_reward"] = 0 - + self.pressed_keys = [] - self.font = pygame.font.SysFont("courier", 24) - self.small_font = pygame.font.SysFont("courier", 18) self.current_episode_index = 0 - - # Color theme - self.text_bg_color = (0, 0, 0) - self.text_color = (0, 255, 0) - + # Define the control instructions self.controls = [ ("Right Arrow", "Exit Early"), @@ -72,19 +75,51 @@ def __init__(self, config: ControlContextConfig): ("Space", "Toggle Reward"), ] + def _initialize_communication(self): self.zmq_context = zmq.Context() - self.publisher_socket = self.zmq_context.socket(zmq.PUB) self.publisher_socket.bind("tcp://127.0.0.1:5555") - + self.command_sub_socket = self.zmq_context.socket(zmq.SUB) self.command_sub_socket.connect("tcp://127.0.0.1:5556") - - # Subscribe to all messages self.command_sub_socket.setsockopt_string(zmq.SUBSCRIBE, "") def update_config(self, config: ControlContextConfig): + """Update configuration and reinitialize UI components as needed""" + old_display_setting = self.config.display_cameras self.config = config + + # If display setting changed, reinitialize display + if old_display_setting != self.config.display_cameras: + pygame.quit() + self._initialize_display() + + # Force screen recreation on next render + self.screen = None + + # Update ZMQ message with new config + self._publish_config_update() + + return self + + def _publish_config_update(self): + """Publish configuration update to ZMQ subscribers""" + config_data = { + "display_cameras": self.config.display_cameras, + "play_sounds": self.config.play_sounds, + "assign_rewards": self.config.assign_rewards, + "control_phase": self.config.control_phase, + "num_episodes": self.config.num_episodes, + "current_episode": self.current_episode_index, + } + + message = { + "type": "config_update", + "timestamp": time.time(), + "config": config_data, + } + + self.publisher_socket.send_json(message) def calculate_window_size(self, images: Dict[str, np.ndarray]): """Calculate required window size based on images""" @@ -187,26 +222,16 @@ def handle_browser_events(self): print(f"Error while polling for commands: {e}") def publish_observations(self, observation: Dict[str, np.ndarray], log_items: list): - """ - Encode and publish the full observation object via ZeroMQ PUB socket. - Includes observation data, events, and config information. - - Args: - observation (Dict[str, np.ndarray]): Dictionary containing observation data, - including images and state information - """ + """Encode and publish observation data with current configuration""" processed_data = {} - + # Process observation data for key, value in observation.items(): if "image" in key: - # Handle image data image = value.numpy() if torch.is_tensor(value) else value - # Convert from RGB to BGR for JPEG encoding bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) success, buffer = cv2.imencode(".jpg", bgr_image) if success: - # Convert to base64 b64_jpeg = base64.b64encode(buffer).decode("utf-8") processed_data[key] = { "type": "image", @@ -216,15 +241,13 @@ def publish_observations(self, observation: Dict[str, np.ndarray], log_items: li } else: tensor_data = value.detach().cpu().numpy() if torch.is_tensor(value) else value - processed_data[key] = { "type": "tensor", "data": tensor_data.tolist(), "shape": tensor_data.shape, } - # Add events and config information - events_data = self.get_events() + # Include current configuration in observation update config_data = { "display_cameras": self.config.display_cameras, "play_sounds": self.config.play_sounds, @@ -238,12 +261,11 @@ def publish_observations(self, observation: Dict[str, np.ndarray], log_items: li "type": "observation_update", "timestamp": time.time(), "data": processed_data, - "events": events_data, + "events": self.get_events(), "config": config_data, "log_items": log_items, } - # Send JSON over ZeroMQ self.publisher_socket.send_json(message) def render_scene_from_observations(self, observation: Dict[str, np.ndarray]): @@ -307,19 +329,22 @@ def log_control_info(self, start_loop_t): return log_items - def cleanup(self): - self.config.robot.disconnect() + def cleanup(self, robot=None): + """Clean up resources and connections""" + if robot: + robot.disconnect() + pygame.quit() - self.publisher_socket.close() + self.command_sub_socket.close() self.zmq_context.term() if __name__ == "__main__": - import torch import cv2 - import time import numpy as np + import time + import torch def read_image_from_camera(cap): ret, frame = cap.read() diff --git a/lerobot/scripts/browser_ui_server.py b/lerobot/scripts/browser_ui_server.py index ee3a6f581..b1f3a143e 100644 --- a/lerobot/scripts/browser_ui_server.py +++ b/lerobot/scripts/browser_ui_server.py @@ -16,18 +16,20 @@ app = Flask(__name__, template_folder=str(template_dir)) socketio = SocketIO(app, cors_allowed_origins="*") -# Global dictionary to hold the latest observation data from ZeroMQ -latest_observation = {} +# Global dictionary to hold the latest data from ZeroMQ +latest_data = { + "observation": {}, + "config": {} +} zmq_context = zmq.Context() -# For recieving observation (camera frames, state, events) from ControlContext -# so we can send them to the browser +# For receiving updates from ControlContext subscriber_socket = zmq_context.socket(zmq.SUB) subscriber_socket.connect("tcp://127.0.0.1:5555") subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "") -# For sending keydown events from the browser to ControlContext +# For sending keydown events to ControlContext command_publisher = zmq_context.socket(zmq.PUB) command_publisher.bind("tcp://127.0.0.1:5556") @@ -35,6 +37,7 @@ def zmq_consumer(): while True: try: message = subscriber_socket.recv_json() + if message.get("type") == "observation_update": processed_data = { "timestamp": message.get("timestamp"), @@ -58,12 +61,24 @@ def zmq_consumer(): "shape": value["shape"] } - # Update latest observation - latest_observation.update(processed_data) + # Update latest observation and config + latest_data["observation"].update(processed_data) + latest_data["config"].update(processed_data.get("config", {})) # Emit the observation data to the browser socketio.emit("observation_update", processed_data) + elif message.get("type") == "config_update": + # Handle dedicated config updates + config_data = message.get("config", {}) + latest_data["config"].update(config_data) + + # Emit configuration update to browser + socketio.emit("config_update", { + "timestamp": message.get("timestamp"), + "config": config_data + }) + except Exception as e: print(f"ZMQ consumer error: {e}") time.sleep(1) @@ -96,8 +111,13 @@ def handle_connect(): """Handle client connection.""" print("Client connected") # Send current state if available - if latest_observation: - socketio.emit("observation_update", latest_observation) + if latest_data["observation"]: + socketio.emit("observation_update", latest_data["observation"]) + if latest_data["config"]: + socketio.emit("config_update", { + "timestamp": time.time(), + "config": latest_data["config"] + }) @socketio.on("disconnect") def handle_disconnect(): diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index fc6fc9ef8..6c9a62930 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -295,18 +295,19 @@ def record( if has_method(robot, "teleop_safety_stop"): robot.teleop_safety_stop() - control_context.update_config( + control_context = control_context.update_config( ControlContextConfig( robot=robot, control_phase=ControlPhase.RECORD, play_sounds=play_sounds, assign_rewards=False, num_episodes=num_episodes, + display_cameras=display_cameras, fps=fps, ) ) - recorded_episodes = 0 + recorded_episodes = 1 while True: if recorded_episodes >= num_episodes: break diff --git a/lerobot/templates/browser_ui.html b/lerobot/templates/browser_ui.html index d5bc354b0..2c42c2206 100644 --- a/lerobot/templates/browser_ui.html +++ b/lerobot/templates/browser_ui.html @@ -88,6 +88,7 @@

Logs

// Connect to Socket.IO server const socket = io(); let cameras = {}; + let lastConfig = {}; // Format timestamp function formatTimestamp(timestamp) { @@ -111,11 +112,14 @@

Logs

} // Update config display - function updateConfig(configData) { + function updateConfig(configData, updateTimestamp = true) { + // Merge with existing config + lastConfig = { ...lastConfig, ...configData }; + const configContainer = document.getElementById('config-values'); configContainer.innerHTML = ''; - for (const [key, value] of Object.entries(configData)) { + for (const [key, value] of Object.entries(lastConfig)) { const configElement = document.createElement('div'); configElement.className = 'font-mono text-sm'; configElement.textContent = `${key}: ${value}`; @@ -123,9 +127,19 @@

Logs

} // Update header information - document.getElementById('control-phase').textContent = configData.control_phase; - document.getElementById('episode-counter').textContent = - `${configData.current_episode}/${configData.num_episodes}`; + if (lastConfig.control_phase) { + document.getElementById('control-phase').textContent = lastConfig.control_phase; + } + if (lastConfig.current_episode !== undefined && lastConfig.num_episodes !== undefined) { + document.getElementById('episode-counter').textContent = + `${lastConfig.current_episode}/${lastConfig.num_episodes}`; + } + + // Update camera display based on display_cameras setting + const camerasContainer = document.getElementById('cameras'); + if (lastConfig.display_cameras !== undefined) { + camerasContainer.style.display = lastConfig.display_cameras ? 'grid' : 'none'; + } } // Update events display @@ -164,15 +178,14 @@

Logs

// Handle incoming observation updates socket.on('observation_update', function(data) { - // Update timestamp if (data.timestamp) { document.getElementById('timestamp').textContent = `Last Update: ${formatTimestamp(data.timestamp)}`; } - // Update images - if (data.images) { + // Update images if display_cameras is true + if (data.images && lastConfig.display_cameras) { for (const [name, imageData] of Object.entries(data.images)) { if (!cameras[name]) { // Create new camera display @@ -211,7 +224,7 @@

Logs

updateState(data.state); } - // Update config values + // Update config values if present in observation if (data.config) { updateConfig(data.config); } @@ -227,6 +240,17 @@

Logs

} }); + // Handle dedicated config updates + socket.on('config_update', function(data) { + if (data.config) { + updateConfig(data.config); + } + if (data.timestamp) { + document.getElementById('timestamp').textContent = + `Config Updated: ${formatTimestamp(data.timestamp)}`; + } + }); + // Connection handling socket.on('connect', () => { console.log('Connected to server'); @@ -245,6 +269,14 @@

Logs

document.getElementById('state-values').innerHTML = ''; document.getElementById('config-values').innerHTML = ''; document.getElementById('events').innerHTML = ''; + document.getElementById('log-items').innerHTML = ''; + + // Reset stored config + lastConfig = {}; + + // Clear camera displays + document.getElementById('cameras').innerHTML = ''; + cameras = {}; }); socket.on('connect_error', (error) => { @@ -269,6 +301,7 @@

Logs

case ' ': // Space key console.log('Space pressed - Toggle Reward'); socket.emit('keydown_event', { key: 'Space' }); + event.preventDefault(); // Prevent page scroll break; } }); From ccf53e4fe11cfd8f331b74c5610d6b259b08e0a8 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Tue, 24 Dec 2024 16:40:03 -0500 Subject: [PATCH 23/40] Show reset and saving episode --- .../common/robot_devices/control_context.py | 1 + lerobot/scripts/control_robot.py | 46 ++++++++++++++----- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index d9409fcad..f86ed3a8d 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -17,6 +17,7 @@ class ControlPhase: WARMUP = "Warmup" RECORD = "Record" RESET = "Reset" + SAVING = "Saving" @dataclass diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 6c9a62930..cffe705b8 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -295,18 +295,6 @@ def record( if has_method(robot, "teleop_safety_stop"): robot.teleop_safety_stop() - control_context = control_context.update_config( - ControlContextConfig( - robot=robot, - control_phase=ControlPhase.RECORD, - play_sounds=play_sounds, - assign_rewards=False, - num_episodes=num_episodes, - display_cameras=display_cameras, - fps=fps, - ) - ) - recorded_episodes = 1 while True: if recorded_episodes >= num_episodes: @@ -317,6 +305,18 @@ def record( # if multi_task: # task = input("Enter your task description: ") + control_context = control_context.update_config( + ControlContextConfig( + robot=robot, + control_phase=ControlPhase.RECORD, + play_sounds=play_sounds, + assign_rewards=False, + num_episodes=num_episodes, + display_cameras=display_cameras, + fps=fps, + ) + ) + log_say(f"Recording episode {dataset.num_episodes}", play_sounds) record_episode( dataset=dataset, @@ -339,6 +339,17 @@ def record( if not events["stop_recording"] and ( (dataset.num_episodes < num_episodes - 1) or events["rerecord_episode"] ): + control_context = control_context.update_config( + ControlContextConfig( + robot=robot, + control_phase=ControlPhase.RESET, + play_sounds=play_sounds, + assign_rewards=False, + num_episodes=num_episodes, + display_cameras=display_cameras, + fps=fps, + ) + ) log_say("Reset the environment", play_sounds) reset_environment(robot, events, reset_time_s) @@ -349,6 +360,17 @@ def record( dataset.clear_episode_buffer() continue + control_context = control_context.update_config( + ControlContextConfig( + robot=robot, + control_phase=ControlPhase.SAVING, + play_sounds=play_sounds, + assign_rewards=False, + num_episodes=num_episodes, + display_cameras=display_cameras, + fps=fps, + ) + ) dataset.save_episode(task) recorded_episodes += 1 control_context.update_current_episode(recorded_episodes) From 496adf0e5d6a699362a535b33c2ce0b7b69c1ff7 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Tue, 24 Dec 2024 19:18:53 -0500 Subject: [PATCH 24/40] Update config for other modes --- .../common/robot_devices/control_context.py | 7 ++-- lerobot/common/robot_devices/control_utils.py | 15 ++++++-- lerobot/scripts/browser_ui_server.py | 4 ++- lerobot/scripts/control_robot.py | 36 ++++++++++++++++++- lerobot/templates/browser_ui.html | 26 ++++++++++++++ 5 files changed, 80 insertions(+), 8 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index f86ed3a8d..efaf93960 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -222,7 +222,7 @@ def handle_browser_events(self): except Exception as e: print(f"Error while polling for commands: {e}") - def publish_observations(self, observation: Dict[str, np.ndarray], log_items: list): + def publish_observations(self, observation: Dict[str, np.ndarray], log_items: list, countdown_time: int): """Encode and publish observation data with current configuration""" processed_data = {} @@ -265,6 +265,7 @@ def publish_observations(self, observation: Dict[str, np.ndarray], log_items: li "events": self.get_events(), "config": config_data, "log_items": log_items, + "countdown_time": countdown_time, } self.publisher_socket.send_json(message) @@ -302,10 +303,10 @@ def render_scene_from_observations(self, observation: Dict[str, np.ndarray]): self.draw_top_bar(window_width) pygame.display.flip() - def update_with_observations(self, observation: Dict[str, np.ndarray], start_loop_t: int): + def update_with_observations(self, observation: Dict[str, np.ndarray], start_loop_t: int, countdown_time: int): log_items = self.log_control_info(start_loop_t) self.render_scene_from_observations(observation) - self.publish_observations(observation, log_items) + self.publish_observations(observation, log_items, countdown_time) self.handle_events() self.handle_browser_events() diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index fec237f68..f05a04396 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -252,6 +252,7 @@ def control_loop( timestamp = 0 start_episode_t = time.perf_counter() + total_time = 0 try: while timestamp < control_time_s: start_loop_t = time.perf_counter() @@ -272,9 +273,12 @@ def control_loop( frame = {**observation, **action} dataset.add_frame(frame) - control_context.update_with_observations(observation, start_loop_t) - timestamp = time.perf_counter() - start_episode_t + total_time += timestamp + countdown_time = max(0, control_time_s - timestamp) + + control_context.update_with_observations(observation, start_loop_t, countdown_time) + if events["exit_early"]: events["exit_early"] = False break @@ -283,12 +287,14 @@ def control_loop( print(f"Error in control loop: {e}") -def reset_environment(robot, events, reset_time_s): +def reset_environment(robot, control_context, reset_time_s): # TODO(rcadene): refactor warmup_record and reset_environment # TODO(alibets): allow for teleop during reset if has_method(robot, "teleop_safety_stop"): robot.teleop_safety_stop() + events = control_context.get_events() + timestamp = 0 start_vencod_t = time.perf_counter() @@ -297,7 +303,10 @@ def reset_environment(robot, events, reset_time_s): while timestamp < reset_time_s: time.sleep(1) timestamp = time.perf_counter() - start_vencod_t + countdown_time = max(0, reset_time_s - timestamp) + control_context.update_with_observations(None, 0, countdown_time) pbar.update(1) + if events["exit_early"]: events["exit_early"] = False break diff --git a/lerobot/scripts/browser_ui_server.py b/lerobot/scripts/browser_ui_server.py index b1f3a143e..cca3cd18c 100644 --- a/lerobot/scripts/browser_ui_server.py +++ b/lerobot/scripts/browser_ui_server.py @@ -45,7 +45,8 @@ def zmq_consumer(): "state": {}, "events": message.get("events", {}), "config": message.get("config", {}), - "log_items": message.get("log_items", []) + "log_items": message.get("log_items", []), + "countdown_time": message.get("countdown_time"), } # Process observation data @@ -71,6 +72,7 @@ def zmq_consumer(): elif message.get("type") == "config_update": # Handle dedicated config updates config_data = message.get("config", {}) + config_data["countdown_time"] = message.get("countdown_time") latest_data["config"].update(config_data) # Emit configuration update to browser diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index cffe705b8..7f1c60686 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -351,7 +351,7 @@ def record( ) ) log_say("Reset the environment", play_sounds) - reset_environment(robot, events, reset_time_s) + reset_environment(robot, control_context=control_context, reset_time_s=reset_time_s) if events["rerecord_episode"]: log_say("Re-record episode", play_sounds) @@ -381,15 +381,49 @@ def record( log_say("Stop recording", play_sounds, blocking=True) control_context.cleanup(robot) + control_context = control_context.update_config( + ControlContextConfig( + robot=robot, + control_phase=ControlPhase.PROCESSING_DATASET, + play_sounds=play_sounds, + assign_rewards=False, + num_episodes=num_episodes, + display_cameras=display_cameras, + fps=fps, + ) + ) + if run_compute_stats: logging.info("Computing dataset statistics") dataset.consolidate(run_compute_stats) if push_to_hub: + control_context = control_context.update_config( + ControlContextConfig( + robot=robot, + control_phase=ControlPhase.UPLOADING_DATASET_TO_HUB, + play_sounds=play_sounds, + assign_rewards=False, + num_episodes=num_episodes, + display_cameras=display_cameras, + fps=fps, + ) + ) dataset.push_to_hub(tags=tags) log_say("Exiting", play_sounds) + control_context = control_context.update_config( + ControlContextConfig( + robot=robot, + control_phase=ControlPhase.RECORDING_COMPLETE, + play_sounds=play_sounds, + assign_rewards=False, + num_episodes=num_episodes, + display_cameras=display_cameras, + fps=fps, + ) + ) return dataset diff --git a/lerobot/templates/browser_ui.html b/lerobot/templates/browser_ui.html index 2c42c2206..f667df8a3 100644 --- a/lerobot/templates/browser_ui.html +++ b/lerobot/templates/browser_ui.html @@ -21,6 +21,10 @@ Reward: - +
+ Time Remaining: + 00:00 +
@@ -176,6 +180,18 @@

Logs

}); } + // Update timers display + function updateTimers(countdownTime) { + document.getElementById('time-remaining').textContent = formatTime(countdownTime); + } + + // Format time in seconds to MM:SS + function formatTime(seconds) { + const minutes = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); // Changed to Math.floor to remove decimals + return `${String(minutes).padStart(2, '0')}:${String(secs).padStart(2, '0')}`; + } + // Handle incoming observation updates socket.on('observation_update', function(data) { // Update timestamp @@ -238,6 +254,11 @@

Logs

if (data.log_items) { updateLogs(data.log_items); } + + // Update timers + if (data.countdown_time !== undefined) { + updateTimers(data.countdown_time); + } }); // Handle dedicated config updates @@ -249,6 +270,11 @@

Logs

document.getElementById('timestamp').textContent = `Config Updated: ${formatTimestamp(data.timestamp)}`; } + + // Update timers + if (data.countdown_time !== undefined) { + updateTimers(data.countdown_time); + } }); // Connection handling From 990d5e709685615bc1ed61a0a4d6133bc926726d Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Wed, 25 Dec 2024 13:40:07 -0500 Subject: [PATCH 25/40] Added speech synthesis --- .../common/robot_devices/control_context.py | 29 ++++++++++++++++++- lerobot/scripts/browser_ui_server.py | 11 +++++++ lerobot/scripts/control_robot.py | 12 ++++---- lerobot/templates/browser_ui.html | 8 +++++ 4 files changed, 53 insertions(+), 7 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index efaf93960..528b2a5ed 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -10,6 +10,7 @@ from lerobot.common.robot_devices.robots.utils import Robot from lerobot.common.robot_devices.utils import busy_wait from lerobot.common.robot_devices.control_utils import log_control_info +from lerobot.common.utils.utils import log_say class ControlPhase: @@ -18,12 +19,15 @@ class ControlPhase: RECORD = "Record" RESET = "Reset" SAVING = "Saving" + PROCESSING_DATASET = "Processing Dataset" + UPLOADING_DATASET_TO_HUB = "Uploading Dataset to Hub" + RECORDING_COMPLETE = "Recording Complete" @dataclass class ControlContextConfig: display_cameras: bool = False - play_sounds: bool = False + play_sounds: bool = True assign_rewards: bool = False control_phase: str = ControlPhase.TELEOPERATE num_episodes: int = 0 @@ -34,6 +38,8 @@ class ControlContextConfig: class ControlContext: def __init__(self, config: ControlContextConfig): self.config = config + self.modes_with_no_observation = [ControlPhase.RESET, ControlPhase.SAVING, ControlPhase.PROCESSING_DATASET, ControlPhase.UPLOADING_DATASET_TO_HUB, ControlPhase.RECORDING_COMPLETE] + self.last_observation = None self._initialize_display() self._initialize_communication() self._initialize_state() @@ -304,6 +310,12 @@ def render_scene_from_observations(self, observation: Dict[str, np.ndarray]): pygame.display.flip() def update_with_observations(self, observation: Dict[str, np.ndarray], start_loop_t: int, countdown_time: int): + if observation is not None: + self.last_observation = observation + + if self.config.control_phase in self.modes_with_no_observation: + observation = self.last_observation + log_items = self.log_control_info(start_loop_t) self.render_scene_from_observations(observation) self.publish_observations(observation, log_items, countdown_time) @@ -330,6 +342,21 @@ def log_control_info(self, start_loop_t): log_items = log_control_info(self.config.robot, dt_s, fps=fps) return log_items + + def publish_log_say(self, message): + message = { + "type": "log_say", + "timestamp": time.time(), + "message": message, + } + + self.publisher_socket.send_json(message) + + def log_say(self, message, blocking=False): + if self.config.play_sounds: + self.publish_log_say(message) + log_say(message, blocking) + def cleanup(self, robot=None): """Clean up resources and connections""" diff --git a/lerobot/scripts/browser_ui_server.py b/lerobot/scripts/browser_ui_server.py index cca3cd18c..83f3469ac 100644 --- a/lerobot/scripts/browser_ui_server.py +++ b/lerobot/scripts/browser_ui_server.py @@ -37,6 +37,9 @@ def zmq_consumer(): while True: try: message = subscriber_socket.recv_json() + + message_type = message.get("type") + print(f"Received message: {message_type}") if message.get("type") == "observation_update": processed_data = { @@ -80,6 +83,14 @@ def zmq_consumer(): "timestamp": message.get("timestamp"), "config": config_data }) + elif message.get("type") == "log_say": + data = message.get("message") + timestamp = message.get("timestamp") + socketio.emit("log_say", { + "timestamp": timestamp, + "message": data + }) + except Exception as e: print(f"ZMQ consumer error: {e}") diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 7f1c60686..7082bdb7f 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -277,7 +277,6 @@ def record( # 2. give times to the robot devices to connect and start synchronizing, # 3. place the cameras windows on screen enable_teleoperation = policy is None - log_say("Warmup record", play_sounds) control_context = ControlContext( config=ControlContextConfig( @@ -289,6 +288,7 @@ def record( fps=fps, ) ) + control_context.log_say("Warmup record") warmup_record(robot, enable_teleoperation, warmup_time_s, fps, control_context) @@ -317,7 +317,7 @@ def record( ) ) - log_say(f"Recording episode {dataset.num_episodes}", play_sounds) + control_context.log_say(f"Recording episode {dataset.num_episodes}") record_episode( dataset=dataset, robot=robot, @@ -350,11 +350,11 @@ def record( fps=fps, ) ) - log_say("Reset the environment", play_sounds) + control_context.log_say("Reset the environment") reset_environment(robot, control_context=control_context, reset_time_s=reset_time_s) if events["rerecord_episode"]: - log_say("Re-record episode", play_sounds) + control_context.log_say("Re-record episode") events["rerecord_episode"] = False events["exit_early"] = False dataset.clear_episode_buffer() @@ -378,7 +378,7 @@ def record( if events["stop_recording"]: break - log_say("Stop recording", play_sounds, blocking=True) + control_context.log_say("Stop recording", blocking=True) control_context.cleanup(robot) control_context = control_context.update_config( @@ -412,7 +412,7 @@ def record( ) dataset.push_to_hub(tags=tags) - log_say("Exiting", play_sounds) + control_context.log_say("Exiting") control_context = control_context.update_config( ControlContextConfig( robot=robot, diff --git a/lerobot/templates/browser_ui.html b/lerobot/templates/browser_ui.html index f667df8a3..c0b0bbeb4 100644 --- a/lerobot/templates/browser_ui.html +++ b/lerobot/templates/browser_ui.html @@ -277,6 +277,14 @@

Logs

} }); + // Handle log_say events + socket.on('log_say', function(data) { + const message = data.message; + console.log('Saying:', message); + const utterance = new SpeechSynthesisUtterance(message); + speechSynthesis.speak(utterance); + }); + // Connection handling socket.on('connect', () => { console.log('Connected to server'); From f106cf6d6885cdc2dfbcabc385858d2bdb75c485 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Wed, 25 Dec 2024 14:01:36 -0500 Subject: [PATCH 26/40] Add button to enable voice --- lerobot/templates/browser_ui.html | 46 +++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/lerobot/templates/browser_ui.html b/lerobot/templates/browser_ui.html index c0b0bbeb4..8ddda25e8 100644 --- a/lerobot/templates/browser_ui.html +++ b/lerobot/templates/browser_ui.html @@ -25,6 +25,9 @@ Time Remaining: 00:00 +
@@ -192,6 +195,20 @@

Logs

return `${String(minutes).padStart(2, '0')}:${String(secs).padStart(2, '0')}`; } + function loadVoices() { + return new Promise((resolve) => { + const voices = speechSynthesis.getVoices(); + if (voices.length > 0) { + resolve(voices); + } else { + speechSynthesis.onvoiceschanged = () => { + resolve(speechSynthesis.getVoices()); + }; + } + }); + } + + // Handle incoming observation updates socket.on('observation_update', function(data) { // Update timestamp @@ -280,9 +297,10 @@

Logs

// Handle log_say events socket.on('log_say', function(data) { const message = data.message; - console.log('Saying:', message); - const utterance = new SpeechSynthesisUtterance(message); - speechSynthesis.speak(utterance); + loadVoices().then(voices => { + const utterance = new SpeechSynthesisUtterance(message); + speechSynthesis.speak(utterance); + }); }); // Connection handling @@ -339,6 +357,28 @@

Logs

break; } }); + + document.getElementById('enable-voice-button').addEventListener('click', () => { + console.log('Enable Voice button clicked.'); + // Force Chrome to allow voice by calling TTS upon user gesture + loadVoices().then((voices) => { + console.log('Voices loaded:', voices.map(v => v.name)); + + // Optionally pick a specific voice + let chosenVoice = voices.find(v => v.name === 'Aaron'); + if (!chosenVoice) { + console.log('Aaron voice not found, using default voice.'); + } + + const utterance = new SpeechSynthesisUtterance("Voice is now enabled!"); + if (chosenVoice) { + utterance.voice = chosenVoice; + } + + // Speak a short test phrase + speechSynthesis.speak(utterance); + }); + }); \ No newline at end of file From 26a92fa2b518add81a710b2d67152e5c1e924ad1 Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Wed, 25 Dec 2024 14:05:25 -0500 Subject: [PATCH 27/40] Refactor to alpine js --- lerobot/templates/browser_ui.html | 611 ++++++++++++++---------------- 1 file changed, 276 insertions(+), 335 deletions(-) diff --git a/lerobot/templates/browser_ui.html b/lerobot/templates/browser_ui.html index 8ddda25e8..8ce618086 100644 --- a/lerobot/templates/browser_ui.html +++ b/lerobot/templates/browser_ui.html @@ -1,384 +1,325 @@ - + - Robot Observation Stream - - + + Robot Observation Stream (Alpine.js Refactor) + + + + + + + - - -
-
-
- Mode: - - -
-
- Episode: - - -
-
- Reward: - - -
-
- Time Remaining: - 00:00 -
- + + + +
+
+ +
+ Mode: + - +
+ +
+ Episode: + + - + +
+ +
+ Reward: + - +
+ +
+ Time Remaining: + 00:00 +
+ + +
+ +
+
+ + +
+ +
+ +
+ + +
+ + +
+

Events

+
+
-
+
- -
- -
- -
- - -
-

Events

-
+ +
+ +
+

Robot State

+
+
- - -
- -
-

Robot State

-
-
- - -
-

Configuration

-
-
- - -
-

Controls

-
-
- - Exit Early -
-
- - Rerecord -
-
- Esc - Stop -
-
- Space - Toggle Reward -
-
+
+ + +
+

Configuration

+
+ +
+
+ + +
+

Controls

+
+
+ + Exit Early +
+
+ + Rerecord +
+
+ Esc + Stop +
+
+ Space + Toggle Reward +
+
+
- -
-

Logs

-
-
+ +
+

Logs

+
+
+
+
- + // Speak a message using speech synthesis + speakMessage(message) { + const utterance = new SpeechSynthesisUtterance(message); + speechSynthesis.speak(utterance); + }, + })); + }); + - \ No newline at end of file + From 5eefcbac145e76baf1f8b065f00b2f8ae40cc37c Mon Sep 17 00:00:00 2001 From: Jack Vial Date: Wed, 25 Dec 2024 14:16:39 -0500 Subject: [PATCH 28/40] Aligning voice and text episode indices --- .../common/robot_devices/control_context.py | 6 +- lerobot/scripts/control_robot.py | 2 +- lerobot/templates/browser_ui.html | 69 +++---------------- 3 files changed, 14 insertions(+), 63 deletions(-) diff --git a/lerobot/common/robot_devices/control_context.py b/lerobot/common/robot_devices/control_context.py index 528b2a5ed..c3726a682 100644 --- a/lerobot/common/robot_devices/control_context.py +++ b/lerobot/common/robot_devices/control_context.py @@ -116,7 +116,7 @@ def _publish_config_update(self): "play_sounds": self.config.play_sounds, "assign_rewards": self.config.assign_rewards, "control_phase": self.config.control_phase, - "num_episodes": self.config.num_episodes, + "num_episodes": self.config.num_episodes - 1, "current_episode": self.current_episode_index, } @@ -150,7 +150,7 @@ def calculate_window_size(self, images: Dict[str, np.ndarray]): def draw_top_bar(self, window_width: int): top_text_str = f"Mode: {self.config.control_phase}" if self.config.control_phase == ControlPhase.RECORD: - top_text_str += f" | Episode: {self.current_episode_index}/{self.config.num_episodes}" + top_text_str += f" | Episode: {self.current_episode_index}/{self.config.num_episodes - 1}" if self.config.assign_rewards: next_reward = self.events["next_reward"] top_text_str += f" | Reward: {next_reward}" @@ -260,7 +260,7 @@ def publish_observations(self, observation: Dict[str, np.ndarray], log_items: li "play_sounds": self.config.play_sounds, "assign_rewards": self.config.assign_rewards, "control_phase": self.config.control_phase, - "num_episodes": self.config.num_episodes, + "num_episodes": self.config.num_episodes - 1, "current_episode": self.current_episode_index, } diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 7082bdb7f..96fc7dd55 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -295,7 +295,7 @@ def record( if has_method(robot, "teleop_safety_stop"): robot.teleop_safety_stop() - recorded_episodes = 1 + recorded_episodes = 0 while True: if recorded_episodes >= num_episodes: break diff --git a/lerobot/templates/browser_ui.html b/lerobot/templates/browser_ui.html index 8ce618086..098ab51fd 100644 --- a/lerobot/templates/browser_ui.html +++ b/lerobot/templates/browser_ui.html @@ -169,7 +169,6 @@

Logs

- @@ -11,7 +11,8 @@ - -
+
@@ -31,7 +32,8 @@
Episode: -
Reward: - - + + - +
Time Remaining: - 00:00 + + 00:00 +