Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ui): Browser Based UI for Control Robot #601

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
99bce6c
Add control context for record phase
jackvial Dec 19, 2024
565479b
Replace stop recording with control context cleanup
jackvial Dec 19, 2024
7235e0e
Added todos
jackvial Dec 19, 2024
c1c5c73
format todos. very important
jackvial Dec 19, 2024
edcbf67
Setup test bed for iterating on UI
jackvial Dec 20, 2024
86e8263
Move mode to top bar
jackvial Dec 20, 2024
5f6c62f
Remove side bar
jackvial Dec 21, 2024
98231ea
Remove side bar
jackvial Dec 21, 2024
997ad3e
support rendering multiple cameras
jackvial Dec 22, 2024
f7cdd1d
Test testing with multiple cameras
jackvial Dec 22, 2024
5b21451
display camera labels
jackvial Dec 22, 2024
c6342d0
Merge branch 'main' into pygame-for-rendering-and-event-handling
jackvial Dec 22, 2024
84e2cd7
Added ability to stream video to browser
jackvial Dec 23, 2024
b0bc294
Use zero mq for inter process communication
jackvial Dec 23, 2024
63fe164
Publish more data to zmq and render in browser template
jackvial Dec 23, 2024
db37696
Add ability to handle keyboard events from browser
jackvial Dec 23, 2024
f969ebb
Handle events from browser
jackvial Dec 23, 2024
040eff5
Fix browser event handler
jackvial Dec 23, 2024
6bf4add
Stream logs to browser
jackvial Dec 23, 2024
926223f
remove debug flag. always print logs and send them to the browser
jackvial Dec 23, 2024
e6fda95
Removed display_cameras from control loop args. Rename browser ui rel…
jackvial Dec 24, 2024
3c54bf4
Add new dependencies
jackvial Dec 24, 2024
9108649
Fix updating control context config
jackvial Dec 24, 2024
ccf53e4
Show reset and saving episode
jackvial Dec 24, 2024
496adf0
Update config for other modes
jackvial Dec 25, 2024
990d5e7
Added speech synthesis
jackvial Dec 25, 2024
f106cf6
Add button to enable voice
jackvial Dec 25, 2024
26a92fa
Refactor to alpine js
jackvial Dec 25, 2024
5eefcba
Aligning voice and text episode indices
jackvial Dec 25, 2024
2c84202
Add buttons for the keyboard controls
jackvial Dec 25, 2024
bb69fbe
Added dark theme
jackvial Dec 25, 2024
d614611
Remove pygame rendering from control context
jackvial Dec 27, 2024
b636718
Remove display cameras and control sounds options. This will be contr…
jackvial Dec 27, 2024
24b43e1
Fix countdown_time inf bug
jackvial Dec 27, 2024
242802d
Add logs back
jackvial Dec 27, 2024
1eca586
Improve button styling
jackvial Dec 27, 2024
1b8e800
remove control icon
jackvial Dec 27, 2024
3cbc3f7
Fix log formatting
jackvial Dec 28, 2024
9fc696a
Remove old blocking false arg
jackvial Dec 28, 2024
29460c2
Fix episode count in UI. Fix Space bar action not being set to contro…
jackvial Dec 28, 2024
35fa3de
Merge branch 'main' of github.com:huggingface/lerobot into control-ro…
jackvial Dec 28, 2024
6715331
Make ruff linter happy
jackvial Dec 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
299 changes: 299 additions & 0 deletions lerobot/common/robot_devices/control_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
import base64
import time
from dataclasses import dataclass
from typing import Dict, Optional

import cv2
import numpy as np
import torch
import zmq

from lerobot.common.robot_devices.control_utils import log_control_info, serialize_log_items
from lerobot.common.robot_devices.robots.utils import Robot
from lerobot.common.robot_devices.utils import busy_wait


class ControlPhase:
TELEOPERATE = "Teleoperate"
WARMUP = "Warmup"
RECORD = "Record"
RESET = "Reset"
SAVING = "Saving"
PROCESSING_DATASET = "Processing Dataset"
UPLOADING_DATASET_TO_HUB = "Uploading Dataset to Hub"
RECORDING_COMPLETE = "Recording Complete"


@dataclass
class ControlContextConfig:
assign_rewards: bool = False
control_phase: str = ControlPhase.TELEOPERATE
num_episodes: int = 0
robot: Robot = None
fps: Optional[int] = None


class ControlContext:
def __init__(self, config: ControlContextConfig):
self.config = config
self.modes_with_no_observation = [
ControlPhase.RESET,
ControlPhase.SAVING,
ControlPhase.PROCESSING_DATASET,
ControlPhase.UPLOADING_DATASET_TO_HUB,
ControlPhase.RECORDING_COMPLETE,
]
self.last_observation = None
self._initialize_communication()
self._initialize_state()

def _initialize_state(self):
self.events = {
"exit_early": False,
"rerecord_episode": False,
"stop_recording": False,
"next_reward": 0,
}

if self.config.assign_rewards:
self.events["next_reward"] = 0

self.current_episode_index = 0

# Define the control instructions
self.controls = [
("Right Arrow", "Exit Early"),
("Left Arrow", "Rerecord"),
("Escape", "Stop"),
("Space", "Toggle Reward"),
]

def _initialize_communication(self):
self.zmq_context = zmq.Context()
self.publisher_socket = self.zmq_context.socket(zmq.PUB)
self.publisher_socket.bind("tcp://127.0.0.1:5555")

self.command_sub_socket = self.zmq_context.socket(zmq.SUB)
self.command_sub_socket.connect("tcp://127.0.0.1:5556")
self.command_sub_socket.setsockopt_string(zmq.SUBSCRIBE, "")

def _handle_browser_events(self):
try:
# Set a non-blocking polls
if self.command_sub_socket.poll(timeout=0): # Check if there's a message
msg = self.command_sub_socket.recv_json()

if msg.get("type") == "command" and msg.get("command") == "keydown":
key_pressed = msg.get("key_pressed")

if key_pressed == "ArrowRight":
print("Received 'ArrowRight' from browser -> Exit Early")
self.events["exit_early"] = True
elif key_pressed == "ArrowLeft":
print("Received 'ArrowLeft' from browser -> Rerecord Episode")
self.events["rerecord_episode"] = True
self.events["exit_early"] = True
elif key_pressed == "Escape":
print("Received 'Escape' from browser -> Stop")
self.events["stop_recording"] = True
self.events["exit_early"] = True
elif key_pressed == "Space":
# Toggle "next_reward"
self.events["next_reward"] = 1 if self.events["next_reward"] == 0 else 0
print(f"Space toggled reward to {self.events['next_reward']}")
else:
# No message available, continue
pass

except zmq.Again:
# No message received within timeout
pass
except Exception as e:
print(f"Error while polling for commands: {e}")

def update_config(self, config: ControlContextConfig):
"""Update configuration and reinitialize UI components as needed"""
self.config = config

# Update ZMQ message with new config
self._publish_config_update()

return self

def _publish_config_update(self):
"""Publish configuration update to ZMQ subscribers"""
config_data = {
"assign_rewards": self.config.assign_rewards,
"control_phase": self.config.control_phase,
"num_episodes": self.config.num_episodes,
"current_episode": self.current_episode_index,
}

message = {
"type": "config_update",
"timestamp": time.time(),
"config": config_data,
}

self.publisher_socket.send_json(message)

def update_with_observations(
self, observation: Dict[str, np.ndarray], start_loop_t: int, countdown_time: int
):
if observation is not None:
self.last_observation = observation

if self.config.control_phase in self.modes_with_no_observation:
observation = self.last_observation

log_items = self.log_control_info(start_loop_t)
self._publish_observations(observation, log_items, countdown_time)
self._handle_browser_events()
return self

def _publish_observations(self, observation: Dict[str, np.ndarray], log_items: list, countdown_time: int):
"""Encode and publish observation data with current configuration"""
processed_data = {}
for key, value in observation.items():
if "image" in key:
image = value.numpy() if torch.is_tensor(value) else value
bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
success, buffer = cv2.imencode(".jpg", bgr_image)
if success:
b64_jpeg = base64.b64encode(buffer).decode("utf-8")
processed_data[key] = {
"type": "image",
"encoding": "jpeg_base64",
"data": b64_jpeg,
"shape": image.shape,
}
else:
tensor_data = value.detach().cpu().numpy() if torch.is_tensor(value) else value
processed_data[key] = {
"type": "tensor",
"data": tensor_data.tolist(),
"shape": tensor_data.shape,
}

# Include current configuration in observation update
config_data = {
"assign_rewards": self.config.assign_rewards,
"control_phase": self.config.control_phase,
"num_episodes": self.config.num_episodes,
"current_episode": self.current_episode_index,
}

# Sanitize countdown time. if inf set to max 32-bit int
countdown_time = int(countdown_time) if countdown_time != float("inf") else 2 ** 31 - 1
if self.config.control_phase == ControlPhase.TELEOPERATE:
countdown_time = 0

message = {
"type": "observation_update",
"timestamp": time.time(),
"data": processed_data,
"events": self.get_events(),
"config": config_data,
"log_items": serialize_log_items(log_items),
"countdown_time": countdown_time,
}

self.publisher_socket.send_json(message)

def update_current_episode(self, episode_index):
self.current_episode_index = episode_index
return self

def get_events(self):
return self.events.copy()

def log_control_info(self, start_loop_t):
log_items = []
fps = self.config.fps
if fps is not None:
dt_s = time.perf_counter() - start_loop_t
busy_wait(1 / fps - dt_s)

dt_s = time.perf_counter() - start_loop_t
log_items = log_control_info(self.config.robot, dt_s, fps=fps)

return log_items

def log_say(self, message):
self._publish_log_say(message)

def _publish_log_say(self, message):
message = {
"type": "log_say",
"timestamp": time.time(),
"message": message,
}

self.publisher_socket.send_json(message)

def cleanup(self, robot=None):
"""Clean up resources and connections"""
if robot:
robot.disconnect()

self.publisher_socket.close()
self.command_sub_socket.close()
self.zmq_context.term()


if __name__ == "__main__":
import time

import cv2
import numpy as np
import torch

def read_image_from_camera(cap):
ret, frame = cap.read()
if not ret:
print("Failed to grab frame")
return None
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
return torch.tensor(frame_rgb).float()

config = ControlContextConfig(
assign_rewards=True,
control_phase=ControlPhase.RECORD,
num_episodes=200,
fps=30,
)
context = ControlContext(config)
context.update_current_episode(199)

cameras = {"main": cv2.VideoCapture(0), "top": cv2.VideoCapture(4)}

for name, cap in cameras.items():
if not cap.isOpened():
raise Exception(f"Error: Could not open {name} camera")

while True:
images = {}
camera_logs = {}
for name, cap in cameras.items():
before_camread_t = time.perf_counter()
images[name] = read_image_from_camera(cap)
camera_logs[f"read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t

# Create state tensor (simulating follower positions)
state = torch.tensor([10.0195, 128.9355, 173.0566, -13.2715, -7.2070, 34.4531])

obs_dict = {"observation.state": state}

for name in cameras:
obs_dict[f"observation.images.{name}"] = images[name]

# Update context with observations
context.update_with_observations(obs_dict, time.perf_counter(), countdown_time=10)
events = context.get_events()

if events["exit_early"]:
break

for cap in cameras.values():
cap.release()
cv2.destroyAllWindows()
Loading