MyoHub · Vittorio-Caggiano · Aug 27, 2024 · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024
diff --git a/myosuite/envs/myo/myochallenge/bimanual_v0.py b/myosuite/envs/myo/myochallenge/bimanual_v0.py
@@ -111,7 +111,7 @@ def _setup(self,
 
         # check whether the object experience force over max force
         self.over_max = False
-        self.max_force = max_force
+        self.max_force = 0
 
         self.touch_history = []
 
@@ -177,7 +177,8 @@ def get_obs_dict(self, sim):
 
         current_force = sim.data.sensordata[0]
         if current_force > self.max_force:
-            self.over_max = True
+            self.max_force = current_force
+        obs_dict['max_force'] = np.array([self.max_force])
 
         obs_vec = self._obj_label_to_obs(touching_objects)
         obs_dict["touching_body"] = obs_vec
@@ -236,7 +237,8 @@ def get_reward_dict(self, obs_dict):
                 ("pass_err", pass_dist + np.log(pass_dist + 1e-3)),
                 # Must keys
                 ("sparse", 0),
-                ("solved", self.check_solve(goal_dis)),
+                ("goal_dist", goal_dis), 
+                ("solved", goal_dis < self.proximity_th),
                 ("done", False),
             )
         )
@@ -258,12 +260,6 @@ def step(self, a, **kwargs):
                                                       - self.sim.model.actuator_ctrlrange[robotic_act_ind, 0]) / 2.0)
         return super().step(processed_controls, **kwargs)
 
-    def check_solve(self, goal_dis):
-        if goal_dis > 0.01:
-            return False
-        if self.over_max == True:
-            return False
-        return True
 
     def get_metrics(self, paths, successful_steps=5):
         """
@@ -280,15 +276,19 @@ def get_metrics(self, paths, successful_steps=5):
                 num_success += 1
         score = num_success / num_paths
 
-        times = np.mean([np.round(p['env_infos']['obs_dict']['time'][-1], 2) for p in paths])
+        times = np.mean([np.round(p['env_infos']['obs_dict']['time'][-1], 5) for p in paths])
+        max_force = np.mean([np.round(p['env_infos']['obs_dict']['max_force'][-1], 5) for p in paths])
+        goal_dist = np.mean([np.mean(p['env_infos']['rwd_dict']['goal_dist']) for p in paths])
 
         # average activations over entire trajectory (can be shorter than horizon, if done) realized
-        effort = -1.0 * np.mean([np.mean(p['env_infos']['rwd_dict']['act_reg']) for p in paths])
+        effort = -1.0 * np.mean([np.mean(p['env_infos']['rwd_dict']['act']) for p in paths])
 
         metrics = {
             'score': score,
             'time': times,
             'effort': effort,
+            'peak force': max_force,
+            'goal dist': goal_dist, 
         }
         return metrics