diff --git a/docs/source/challenge-doc.rst b/docs/source/challenge-doc.rst index 936d5c9d..0fdc137b 100644 --- a/docs/source/challenge-doc.rst +++ b/docs/source/challenge-doc.rst @@ -174,7 +174,7 @@ Success Condition - The object moved from start position to goal position. Both the MPL hand, and MyoHand, is required to touch the object for certain timesteps - Exerting a maximum contact force on the object, less than 1500N (subject to change in final EVALUATION environment) - - Placing the object within 0.05 meters of the goal site on the pillar + - Placing the object within 0.17 meters of the goal site on the pillar (0.1m in each of xyz-axis) Ranking Criteria diff --git a/myosuite/agents/sb3_job_script.py b/myosuite/agents/sb3_job_script.py index c9e1d433..a2f112e3 100644 --- a/myosuite/agents/sb3_job_script.py +++ b/myosuite/agents/sb3_job_script.py @@ -62,6 +62,7 @@ def train_loop(job_data) -> None: learning_rate=job_data.learning_rate, batch_size=job_data.batch_size, policy_kwargs=policy_kwargs, + tensorboard_log=f"wandb/{run.id}", gamma=job_data.gamma, **job_data.alg_hyper_params) elif algo == 'SAC': model = SAC(job_data.policy, env, @@ -70,6 +71,7 @@ def train_loop(job_data) -> None: learning_starts=job_data.learning_starts, batch_size=job_data.batch_size, tau=job_data.tau, + tensorboard_log=f"wandb/{run.id}", gamma=job_data.gamma, **job_data.alg_hyper_params) diff --git a/myosuite/envs/myo/myochallenge/bimanual_v0.py b/myosuite/envs/myo/myochallenge/bimanual_v0.py index 26bbffd0..7bf225ee 100644 --- a/myosuite/envs/myo/myochallenge/bimanual_v0.py +++ b/myosuite/envs/myo/myochallenge/bimanual_v0.py @@ -50,7 +50,7 @@ def _setup(self, goal_center=np.array([0.4, -0.25, 1.05]), max_force=1500, # Max force against throwing - proximity_th=0.015, # object-target proximity threshold + proximity_th=0.17, # object-target proximity threshold, based on 10cm in each axis in Euclidean Distance start_shifts=np.array([0.055, 0.055, 0]), # shift factor for start/goal random generation with z-axis fixed @@ -78,6 +78,7 @@ def _setup(self, self.start_shifts = start_shifts self.goal_shifts = goal_shifts + self.PILLAR_HEIGHT = 1.09 self.id_info = IdInfo(self.sim.model) @@ -246,6 +247,7 @@ def get_reward_dict(self, obs_dict): obj_pos = obs_dict["obj_pos"][0][0] if obs_dict['obj_pos'].ndim == 3 else obs_dict['obj_pos'] palm_pos = obs_dict["palm_pos"][0][0] if obs_dict["palm_pos"].ndim == 3 else obs_dict["palm_pos"] goal_pos = obs_dict["goal_pos"][0][0] if obs_dict["goal_pos"].ndim == 3 else obs_dict["goal_pos"] + goal_pos = np.concatenate((goal_pos[:2], np.array([self.PILLAR_HEIGHT]))) lift_height = np.linalg.norm(np.array([[[obj_pos[-1], palm_pos[-1]]]]) - np.array([[[self.init_obj_z, self.init_palm_z]]]), axis=-1)