rail-berkeley · jianlanluo · Apr 25, 2024 · Apr 20, 2024 · Apr 20, 2024 · Apr 20, 2024
diff --git a/examples/async_bin_relocation_fwbw_drq/record_bc_demos.py b/examples/async_bin_relocation_fwbw_drq/record_bc_demos.py
@@ -75,8 +75,10 @@ def on_esc(key):
     transitions = []
     while demos_count < demos_needed:
 
-        next_obs, rew, done, truncated, info = env.step(action=np.zeros((7,)))
-        actions = info["intervene_action"]
+        actions = np.zeros((7,))
+        next_obs, rew, done, truncated, info = env.step(action=actions)
+        if "intervene_action" in info:
+            actions = info["intervene_action"]
 
         transition = copy.deepcopy(
             dict(

diff --git a/examples/async_bin_relocation_fwbw_drq/record_demo.py b/examples/async_bin_relocation_fwbw_drq/record_demo.py
@@ -85,8 +85,10 @@
         len(fw_transitions) < transitions_needed
         or len(bw_transitions) < transitions_needed
     ):
-        next_obs, rew, done, truncated, info = env.step(action=np.zeros((7,)))
-        actions = info["intervene_action"]
+        actions = np.zeros((7,))
+        next_obs, rew, done, truncated, info = env.step(action=actions)
+        if "intervene_action" in info:
+            actions = info["intervene_action"]
 
         transition = copy.deepcopy(
             dict(

diff --git a/examples/async_bin_relocation_fwbw_drq/record_transitions.py b/examples/async_bin_relocation_fwbw_drq/record_transitions.py
@@ -98,9 +98,11 @@ def check_all_done():
 
     # Loop until we have enough transitions
     while not check_all_done():
-        next_obs, rew, done, truncated, info = env.step(action=np.zeros((7,)))
+        actions = np.zeros((7,))
+        next_obs, rew, done, truncated, info = env.step(action=actions)
+        if "intervene_action" in info:
+            actions = info["intervene_action"]
         next_obs = env.get_front_cam_obs()
-        actions = info["intervene_action"]
 
         transition = copy.deepcopy(
             dict(

diff --git a/examples/async_bin_relocation_fwbw_drq/test_classifier.py b/examples/async_bin_relocation_fwbw_drq/test_classifier.py
@@ -55,8 +55,10 @@
     obs, _ = env.reset()
 
     for i in tqdm(range(1000)):
-        next_obs, rew, done, truncated, info = env.step(action=np.zeros((7,)))
-        actions = info["intervene_action"]
+        actions = np.zeros((7,))
+        next_obs, rew, done, truncated, info = env.step(action=actions)
+        if "intervene_action" in info:
+            actions = info["intervene_action"]
 
         obs = next_obs
 

diff --git a/examples/async_cable_route_drq/record_demo.py b/examples/async_cable_route_drq/record_demo.py
@@ -64,8 +64,10 @@
         raise PermissionError(f"No permission to write to {file_dir}")
 
     while success_count < success_needed:
-        next_obs, rew, done, truncated, info = env.step(action=np.zeros((6,)))
-        actions = info["intervene_action"]
+        actions = np.zeros((6,))
+        next_obs, rew, done, truncated, info = env.step(action=actions)
+        if "intervene_action" in info:
+            actions = info["intervene_action"]
 
         transition = copy.deepcopy(
             dict(

diff --git a/examples/async_cable_route_drq/test_classifier.py b/examples/async_cable_route_drq/test_classifier.py
@@ -43,8 +43,10 @@
     obs, _ = env.reset()
 
     for i in tqdm(range(1000)):
-        next_obs, rew, done, truncated, info = env.step(action=np.zeros((6,)))
-        actions = info["intervene_action"]
+        actions = np.zeros((6,))
+        next_obs, rew, done, truncated, info = env.step(action=actions)
+        if "intervene_action" in info:
+            actions = info["intervene_action"]
 
         obs = next_obs
 

diff --git a/examples/async_pcb_insert_drq/record_demo.py b/examples/async_pcb_insert_drq/record_demo.py
@@ -49,8 +49,10 @@
         raise PermissionError(f"No permission to write to {file_dir}")
 
     while success_count < success_needed:
-        next_obs, rew, done, truncated, info = env.step(action=np.zeros((6,)))
-        actions = info["intervene_action"]
+        actions = np.zeros((6,))
+        next_obs, rew, done, truncated, info = env.step(action=actions)
+        if "intervene_action" in info:
+            actions = info["intervene_action"]
 
         transition = copy.deepcopy(
             dict(

diff --git a/examples/async_peg_insert_drq/record_demo.py b/examples/async_peg_insert_drq/record_demo.py
@@ -49,8 +49,10 @@
         raise PermissionError(f"No permission to write to {file_dir}")
 
     while success_count < success_needed:
-        next_obs, rew, done, truncated, info = env.step(action=np.zeros((6,)))
-        actions = info["intervene_action"]
+        actions = np.zeros((6,))
+        next_obs, rew, done, truncated, info = env.step(action=actions)
+        if "intervene_action" in info:
+            actions = info["intervene_action"]
 
         transition = copy.deepcopy(
             dict(

diff --git a/serl_robot_infra/franka_env/envs/relative_env.py b/serl_robot_infra/franka_env/envs/relative_env.py
@@ -23,7 +23,9 @@ class RelativeFrame(gym.Wrapper):
             }
         ),
         ......
-    }, and at least 6 DoF action space with (x, y, z, rx, ry, rz, ...)
+    }, and at least 6 DoF action space with (x, y, z, rx, ry, rz, ...).
+    By convention, the 7th dimension of the action space is used for the gripper.
+
     """
 
     def __init__(self, env: Env, include_relative_pose=True):
@@ -44,7 +46,9 @@ def step(self, action: np.ndarray):
 
         # this is to convert the spacemouse intervention action
         if "intervene_action" in info:
-            info["intervene_action"] = self.transform_action(info["intervene_action"])
+            info["intervene_action"] = self.transform_action_inv(
+                info["intervene_action"]
+            )
 
         # Update adjoint matrix
         self.adjoint_matrix = construct_adjoint_matrix(obs["state"]["tcp_pose"])
@@ -94,3 +98,12 @@ def transform_action(self, action: np.ndarray):
         action = np.array(action)  # in case action is a jax read-only array
         action[:6] = self.adjoint_matrix @ action[:6]
         return action
+
+    def transform_action_inv(self, action: np.ndarray):
+        """
+        Transform action from spatial(base) frame into body(end-effector) frame
+        using the adjoint matrix.
+        """
+        action = np.array(action)
+        action[:6] = np.linalg.inv(self.adjoint_matrix) @ action[:6]
+        return action
diff --git a/serl_robot_infra/franka_env/envs/wrappers.py b/serl_robot_infra/franka_env/envs/wrappers.py
@@ -202,14 +202,17 @@ def action(self, action: np.ndarray) -> np.ndarray:
             expert_a = np.concatenate((expert_a, gripper_action), axis=0)
 
         if time.time() - self.last_intervene < 0.5:
-            return expert_a
+            return expert_a, True
 
-        return action
+        return action, False
 
     def step(self, action):
-        new_action = self.action(action)
+
+        new_action, replaced = self.action(action)
+
         obs, rew, done, truncated, info = self.env.step(new_action)
-        info["intervene_action"] = new_action
+        if replaced:
+            info["intervene_action"] = new_action
         info["left"] = self.left
         info["right"] = self.right
         return obs, rew, done, truncated, info