From 4c01023e5ee1dc9edbf283adb7ac43d3b81c7756 Mon Sep 17 00:00:00 2001 From: Vikash Kumar Date: Wed, 27 Dec 2023 18:02:07 -0500 Subject: [PATCH 01/19] FEATURE: Adding XML models. MyoChallenge is over. Staging models for release. All envs have been upgraded to point to the moddel XMLs --- .../envs/myo/assets/arm/myoarm_relocate.xml | 74 +++++++++++++++++++ .../envs/myo/assets/leg/myolegs_chasetag.xml | 57 ++++++++++++++ myosuite/envs/myo/myochallenge/__init__.py | 8 +- myosuite/envs/myo/myochallenge/chasetag_v0.py | 3 +- myosuite/simhive/furniture_sim | 2 +- myosuite/simhive/myo_sim | 2 +- 6 files changed, 138 insertions(+), 8 deletions(-) create mode 100644 myosuite/envs/myo/assets/arm/myoarm_relocate.xml create mode 100644 myosuite/envs/myo/assets/leg/myolegs_chasetag.xml diff --git a/myosuite/envs/myo/assets/arm/myoarm_relocate.xml b/myosuite/envs/myo/assets/arm/myoarm_relocate.xml new file mode 100644 index 00000000..2e8414e1 --- /dev/null +++ b/myosuite/envs/myo/assets/arm/myoarm_relocate.xml @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/myosuite/envs/myo/assets/leg/myolegs_chasetag.xml b/myosuite/envs/myo/assets/leg/myolegs_chasetag.xml new file mode 100644 index 00000000..7f6f1dc6 --- /dev/null +++ b/myosuite/envs/myo/assets/leg/myolegs_chasetag.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/myosuite/envs/myo/myochallenge/__init__.py b/myosuite/envs/myo/myochallenge/__init__.py index 3bb528da..4f55b4d9 100644 --- a/myosuite/envs/myo/myochallenge/__init__.py +++ b/myosuite/envs/myo/myochallenge/__init__.py @@ -11,7 +11,7 @@ entry_point='myosuite.envs.myo.myochallenge.relocate_v0:RelocateEnvV0', max_episode_steps=150, kwargs={ - 'model_path': curr_dir+'/../../../simhive/myo_sim/arm/myoarm_object_v0.16(mj237).mjb', + 'model_path': curr_dir+'/../assets/arm/myoarm_relocate.xml', 'normalize_act': True, 'frame_skip': 5, 'pos_th': 0.1, # cover entire base of the receptacle @@ -26,7 +26,7 @@ entry_point='myosuite.envs.myo.myochallenge.relocate_v0:RelocateEnvV0', max_episode_steps=150, kwargs={ - 'model_path': curr_dir+'/../../../simhive/myo_sim/arm/myoarm_object_v0.16(mj237).mjb', + 'model_path': curr_dir+'/../assets/arm/myoarm_relocate.xml', 'normalize_act': True, 'frame_skip': 5, 'pos_th': 0.1, # cover entire base of the receptacle @@ -47,7 +47,7 @@ entry_point='myosuite.envs.myo.myochallenge.chasetag_v0:ChaseTagEnvV0', max_episode_steps=2000, kwargs={ - 'model_path': curr_dir+'/../../../simhive/myo_sim/leg/myolegs_chasetag_v0.11(mj237).mjb', + 'model_path': curr_dir+'/../assets/leg/myolegs_chasetag.xml', 'normalize_act': True, 'win_distance': 0.5, 'min_spawn_distance': 2, @@ -66,7 +66,7 @@ entry_point='myosuite.envs.myo.myochallenge.chasetag_v0:ChaseTagEnvV0', max_episode_steps=2000, kwargs={ - 'model_path': curr_dir+'/../../../simhive/myo_sim/leg/myolegs_chasetag_v0.11(mj237).mjb', + 'model_path': curr_dir+'/../assets/leg/myolegs_chasetag.xml', 'normalize_act': True, 'win_distance': 0.5, 'min_spawn_distance': 2, diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index 7441c2e9..dd837c53 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -533,8 +533,7 @@ def get_reward_dict(self, obs_dict): rwd_dict['dense'] = np.sum([wt*rwd_dict[key] for key, wt in self.rwd_keys_wt.items()], axis=0) # Success Indicator - self.sim.model.site_rgba[self.success_indicator_sid, :] = np.array([0, 2, 0, 0.1]) if rwd_dict['solved'] else np.array([2, 0, 0, 0]) - + self.sim.model.site_rgba[self.success_indicator_sid, :] = np.array([0, 2, 0, 0.2]) if rwd_dict['solved'] else np.array([2, 0, 0, 0]) return rwd_dict def get_metrics(self, paths): diff --git a/myosuite/simhive/furniture_sim b/myosuite/simhive/furniture_sim index 434ad64c..45f94f72 160000 --- a/myosuite/simhive/furniture_sim +++ b/myosuite/simhive/furniture_sim @@ -1 +1 @@ -Subproject commit 434ad64ca51088f430371af66468b6baf9a6c066 +Subproject commit 45f94f72cb3a30b22dea0176a9daa1a092fc55f4 diff --git a/myosuite/simhive/myo_sim b/myosuite/simhive/myo_sim index aff0bc09..052c335b 160000 --- a/myosuite/simhive/myo_sim +++ b/myosuite/simhive/myo_sim @@ -1 +1 @@ -Subproject commit aff0bc096d98085ee0a6befd613cc9fbff024944 +Subproject commit 052c335b7ea63d212316ea480125e351bb5631d8 From cd9a25e90317c0ccc4c32e231ecf123da428c99a Mon Sep 17 00:00:00 2001 From: Vikash Kumar Date: Wed, 27 Dec 2023 18:21:22 -0500 Subject: [PATCH 02/19] FEATURE: Moving the rest of stack from mjbs to XMLs --- myosuite/agents/baseline_Reflex/ReflexCtrInterface.py | 2 +- myosuite/envs/myo/myobase/__init__.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/myosuite/agents/baseline_Reflex/ReflexCtrInterface.py b/myosuite/agents/baseline_Reflex/ReflexCtrInterface.py index a7d649f2..9e92de99 100644 --- a/myosuite/agents/baseline_Reflex/ReflexCtrInterface.py +++ b/myosuite/agents/baseline_Reflex/ReflexCtrInterface.py @@ -56,7 +56,7 @@ def __init__(self, init_dict=DEFAULT_INIT_POSE, dt=0.01, mode='3D', sim_time=2.0 curr_dir = os.getcwd() register_env_variant( env_id='myoLegDemo-v0', - variants={'model_path': curr_dir+'/../../simhive/myo_sim/myoleg/myoleg_v0.52(mj120).mjb', + variants={'model_path': curr_dir+'/../../simhive/myo_sim/leg/myolegs.xml', 'normalize_act':False}, variant_id='MyoLegReflex-v0', silent=False diff --git a/myosuite/envs/myo/myobase/__init__.py b/myosuite/envs/myo/myobase/__init__.py index 13d70ef9..7720c257 100644 --- a/myosuite/envs/myo/myobase/__init__.py +++ b/myosuite/envs/myo/myobase/__init__.py @@ -287,11 +287,7 @@ def register_env_with_variants(id, entry_point, max_episode_steps, kwargs): # Gait Torso Reaching ============================== from myosuite.physics.sim_scene import SimBackend sim_backend = SimBackend.get_sim_backend() -if sim_backend == SimBackend.MUJOCO_PY: - leg_model='/../../../simhive/myo_sim/leg/myolegs_v0.54(mj210).mjb' -elif sim_backend == SimBackend.MUJOCO: - leg_model='/../../../simhive/myo_sim/leg/myolegs_v0.56(mj237).mjb' - +leg_model='/../../../simhive/myo_sim/leg/myolegs.xml' register_env_with_variants(id='myoLegStandRandom-v0', entry_point='myosuite.envs.myo.myobase.walk_v0:ReachEnvV0', From 9083c23a83832a27c87447279d3e01ca854783d5 Mon Sep 17 00:00:00 2001 From: elladyr Date: Fri, 6 Oct 2023 10:37:00 -0400 Subject: [PATCH 03/19] Updated velocity randomization --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index dd837c53..b6196ed2 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -42,6 +42,7 @@ def __init__(self, sim, rng, probabilities: list, min_spawn_distance: float): self.sim = sim self.opponent_probabilities = probabilities self.min_spawn_distance = min_spawn_distance + self.linear_velocity = 1.0 self.reset_opponent(rng=rng) def reset_noise_process(self): @@ -151,6 +152,9 @@ def reset_opponent(self, player_task='CHASE', rng=None): self.set_opponent_pose(pose) self.opponent_vel[:] = 0.0 + # Randomize opponent forward velocity + self.linear_velocity = self.rng.uniform(1, 5) + def chase_player(self): """ This moves the opponent randomly in a correlated @@ -163,7 +167,7 @@ def chase_player(self): new_vec = np.array([np.cos(theta), np.sin(theta)]) new_vec2 = pel - vec vel = np.dot(new_vec, new_vec2) - return np.array([1.0, vel]) + return np.array([self.linear_velocity, vel]) class HeightField: From bdbf0d221922c241cfe96dbea080539832aba2b2 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Mon, 18 Sep 2023 21:32:49 +0200 Subject: [PATCH 04/19] added repeller policy --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 157 +++++++++++++++++- 1 file changed, 156 insertions(+), 1 deletion(-) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index b6196ed2..ac663ef5 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -378,6 +378,161 @@ def ncol(self): return self.hfield.ncol +class RepellerChallengeOpponent(ChallengeOpponent): + # Repeller parameters + DIST_INFLUENCE = 3.5 # Distance of influence by the repeller + ETA = 20.0 # Scaling factor + MIN_SPAWN_DIST = 1.5 + + def __init__(self, sim, rng, probabilities, min_spawn_distance): + self.dt = 0.01 + self.sim = sim + self.rng = rng + self.opponent_probabilities = [0.1, 0.35, 0.35, 0.2] + self.min_spawn_distance = min_spawn_distance + self.noise_process = pink.ColoredNoiseProcess(beta=2, size=(2, 2000), scale=10, rng=rng) + self.reset_opponent() + + def get_agent_pos(self): + """ + Get agent Pose + :param pose: Pose of the agent, measured from the pelvis. + :type pose: array -> [x, y] + """ + return self.sim.data.body('pelvis').xpos[:2] + + def get_wall_pos(self): + """ + Get location of quad boundaries. + :param pose: Pose of points along quad boundaries. + :type pose: array -> [x, y] + """ + bound_resolution = np.linspace(-8.7, 8.7, 25) + right_left_bounds = np.vstack( (np.array([[8.7,x] for x in bound_resolution]), + np.array([[-8.7,x] for x in bound_resolution])) ) + all_bounds = np.vstack( (right_left_bounds, right_left_bounds[:,[1,0]]) ) + + return all_bounds + + def get_repellers(self): + """ + Get location of all repellers. + :param pose: Pose of all repellers + :type pose: array -> [x, y] + """ + agent_pos = self.get_agent_pos() + wall_pos = self.get_wall_pos() + + obstacle_list = np.vstack( (agent_pos, wall_pos) ) + return obstacle_list + + def repeller_stochastic(self): + """ + Returns the linear velocity for the opponent + :param pose: Pose of points of all repellers + :type pose: array -> [x, y, rotation] + """ + obstacle_pos = self.get_repellers() + opponent_pos = self.get_opponent_pose().copy() + + # Calculate over all the workspace + distance = np.array([np.linalg.norm(diff) for diff in (obstacle_pos - opponent_pos[0:2])]) + + # Check if any obstacles are around + dist_idx = np.where(distance < self.DIST_INFLUENCE)[0] + + # Take a random step if no repellers are close by, making it a non-stationary target + if len(dist_idx) == 0: + #print("Random - no repellers close") + lin, rot = self.noise_process.sample() + escape_linear = np.clip(lin, 0.5, 1) + escape_ang_rot = self._calc_angular_vel(opponent_pos[2], rot) + return np.hstack((escape_linear, escape_ang_rot)) + + repel_COM = np.mean(obstacle_pos[dist_idx,:], axis=0) + # Use repeller force as linear velocity to escape + repel_force = 0.5 * self.ETA * ( 1/np.maximum(distance[dist_idx], 0.00001) - 1/self.DIST_INFLUENCE )**2 + escape_linear = np.clip(np.mean(repel_force), 0.3, 1) + escape_xpos = opponent_pos[0:2] - repel_COM + + equil_idx = np.where(np.abs(escape_xpos) <= 0.1 )[0] + if len(equil_idx) != 0: + for idx in equil_idx: + escape_xpos[idx] = -1*np.sign(escape_xpos[idx]) * self.rng.uniform(low=0.3, high=0.9) + + escape_direction = np.arctan2(escape_xpos[1], escape_xpos[0]) # Direction + escape_direction = escape_direction + 1.57 # Account for rotation in world frame + + # Determines turning direction + escape_ang_rot = self._calc_angular_vel(opponent_pos[2], escape_direction) + + return np.hstack((escape_linear, escape_ang_rot)) + + def _calc_angular_vel(self, current_pos, desired_pos): + # Checking for sign of the current position and escape position to prevent inefficient turning + # E.g. 3.14 and -3.14 are pointing in the same direction, so a simple substraction of facing direction will make the opponent turn a lot + + # Bring the current pos and desired pos to be between 0 to 2pi + if current_pos > (2*np.pi): + while current_pos > (2*np.pi): + current_pos = current_pos - (2*np.pi) + elif np.sign(current_pos) < 0: + while np.sign(current_pos) < 0: + current_pos = current_pos + (2*np.pi) + + if desired_pos > (2*np.pi): + while desired_pos > (2*np.pi): + desired_pos = desired_pos - (2*np.pi) + elif np.sign(desired_pos) < 0: + while np.sign(desired_pos) < 0: + desired_pos = desired_pos + (2*np.pi) + + direction_clock = np.abs(0 - current_pos) + (2*np.pi - desired_pos) # Clockwise rotation + direction_anticlock = (2*np.pi - current_pos) + (0 + desired_pos) # Anticlockwise rotation + + if direction_clock < direction_anticlock: + return 1 + else: + return -1 + + def repeller_policy(self): + """ + This uses the repeller policy to move the opponent. + """ + return self.repeller_stochastic() + + def sample_opponent_policy(self): + """ + Takes in three probabilities and returns the policies with the given frequency. + """ + rand_num = self.rng.uniform() + if rand_num < self.opponent_probabilities[0]: + self.opponent_policy = 'static_stationary' + elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1]: + self.opponent_policy = 'stationary' + elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1] + self.opponent_probabilities[2]: + self.opponent_policy = 'random' + else: + self.opponent_policy = 'repeller' + + def update_opponent_state(self): + """ + This function executes an opponent step with + one of the control policies. + """ + if self.opponent_policy == 'stationary' or self.opponent_policy == 'static_stationary': + opponent_vel = np.zeros(2,) + + elif self.opponent_policy == 'random': + opponent_vel = self.random_movement() + + elif self.opponent_policy == 'repeller': + opponent_vel = self.repeller_policy() + else: + raise NotImplementedError(f"This opponent policy doesn't exist. Chose: static_stationary, stationary, random or repeller. Policy was: {self.opponent_policy}") + self.move_opponent(opponent_vel) + + class ChaseTagEnvV0(WalkEnvV0): DEFAULT_OBS_KEYS = [ @@ -451,7 +606,7 @@ def _setup(self, self.win_distance = win_distance self.grf_sensor_names = ['r_foot', 'r_toes', 'l_foot', 'l_toes'] - self.opponent = ChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance) + self.opponent = RepellerChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance) self.success_indicator_sid = self.sim.model.site_name2id("opponent_indicator") self.current_task = Task.CHASE super()._setup(obs_keys=obs_keys, From 101ba44c23e13b35469eb4743d05e898fe388551 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Sat, 7 Oct 2023 20:16:40 +0200 Subject: [PATCH 05/19] repeller and random velocity are added. P1 and P2 compatability works. --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index ac663ef5..64883082 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -517,7 +517,7 @@ def sample_opponent_policy(self): def update_opponent_state(self): """ - This function executes an opponent step with + This function executes an opponent step with one of the control policies. """ if self.opponent_policy == 'stationary' or self.opponent_policy == 'static_stationary': @@ -528,8 +528,11 @@ def update_opponent_state(self): elif self.opponent_policy == 'repeller': opponent_vel = self.repeller_policy() + + elif self.opponent_policy == 'chase_player': + opponent_vel = self.chase_player() else: - raise NotImplementedError(f"This opponent policy doesn't exist. Chose: static_stationary, stationary, random or repeller. Policy was: {self.opponent_policy}") + raise NotImplementedError(f"This opponent policy doesn't exist. Chose: static_stationary, stationary or random. Policy was: {self.opponent_policy}") self.move_opponent(opponent_vel) From 66a673aadc24d97af80d0cf77b901d7b7aa83d0a Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Mon, 18 Sep 2023 21:32:49 +0200 Subject: [PATCH 06/19] added repeller policy --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index 64883082..e9d76d56 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -536,6 +536,161 @@ def update_opponent_state(self): self.move_opponent(opponent_vel) +class RepellerChallengeOpponent(ChallengeOpponent): + # Repeller parameters + DIST_INFLUENCE = 3.5 # Distance of influence by the repeller + ETA = 20.0 # Scaling factor + MIN_SPAWN_DIST = 1.5 + + def __init__(self, sim, rng, probabilities, min_spawn_distance): + self.dt = 0.01 + self.sim = sim + self.rng = rng + self.opponent_probabilities = [0.1, 0.35, 0.35, 0.2] + self.min_spawn_distance = min_spawn_distance + self.noise_process = pink.ColoredNoiseProcess(beta=2, size=(2, 2000), scale=10, rng=rng) + self.reset_opponent() + + def get_agent_pos(self): + """ + Get agent Pose + :param pose: Pose of the agent, measured from the pelvis. + :type pose: array -> [x, y] + """ + return self.sim.data.body('pelvis').xpos[:2] + + def get_wall_pos(self): + """ + Get location of quad boundaries. + :param pose: Pose of points along quad boundaries. + :type pose: array -> [x, y] + """ + bound_resolution = np.linspace(-8.7, 8.7, 25) + right_left_bounds = np.vstack( (np.array([[8.7,x] for x in bound_resolution]), + np.array([[-8.7,x] for x in bound_resolution])) ) + all_bounds = np.vstack( (right_left_bounds, right_left_bounds[:,[1,0]]) ) + + return all_bounds + + def get_repellers(self): + """ + Get location of all repellers. + :param pose: Pose of all repellers + :type pose: array -> [x, y] + """ + agent_pos = self.get_agent_pos() + wall_pos = self.get_wall_pos() + + obstacle_list = np.vstack( (agent_pos, wall_pos) ) + return obstacle_list + + def repeller_stochastic(self): + """ + Returns the linear velocity for the opponent + :param pose: Pose of points of all repellers + :type pose: array -> [x, y, rotation] + """ + obstacle_pos = self.get_repellers() + opponent_pos = self.get_opponent_pose().copy() + + # Calculate over all the workspace + distance = np.array([np.linalg.norm(diff) for diff in (obstacle_pos - opponent_pos[0:2])]) + + # Check if any obstacles are around + dist_idx = np.where(distance < self.DIST_INFLUENCE)[0] + + # Take a random step if no repellers are close by, making it a non-stationary target + if len(dist_idx) == 0: + #print("Random - no repellers close") + lin, rot = self.noise_process.sample() + escape_linear = np.clip(lin, 0.5, 1) + escape_ang_rot = self._calc_angular_vel(opponent_pos[2], rot) + return np.hstack((escape_linear, escape_ang_rot)) + + repel_COM = np.mean(obstacle_pos[dist_idx,:], axis=0) + # Use repeller force as linear velocity to escape + repel_force = 0.5 * self.ETA * ( 1/np.maximum(distance[dist_idx], 0.00001) - 1/self.DIST_INFLUENCE )**2 + escape_linear = np.clip(np.mean(repel_force), 0.3, 1) + escape_xpos = opponent_pos[0:2] - repel_COM + + equil_idx = np.where(np.abs(escape_xpos) <= 0.1 )[0] + if len(equil_idx) != 0: + for idx in equil_idx: + escape_xpos[idx] = -1*np.sign(escape_xpos[idx]) * self.rng.uniform(low=0.3, high=0.9) + + escape_direction = np.arctan2(escape_xpos[1], escape_xpos[0]) # Direction + escape_direction = escape_direction + 1.57 # Account for rotation in world frame + + # Determines turning direction + escape_ang_rot = self._calc_angular_vel(opponent_pos[2], escape_direction) + + return np.hstack((escape_linear, escape_ang_rot)) + + def _calc_angular_vel(self, current_pos, desired_pos): + # Checking for sign of the current position and escape position to prevent inefficient turning + # E.g. 3.14 and -3.14 are pointing in the same direction, so a simple substraction of facing direction will make the opponent turn a lot + + # Bring the current pos and desired pos to be between 0 to 2pi + if current_pos > (2*np.pi): + while current_pos > (2*np.pi): + current_pos = current_pos - (2*np.pi) + elif np.sign(current_pos) < 0: + while np.sign(current_pos) < 0: + current_pos = current_pos + (2*np.pi) + + if desired_pos > (2*np.pi): + while desired_pos > (2*np.pi): + desired_pos = desired_pos - (2*np.pi) + elif np.sign(desired_pos) < 0: + while np.sign(desired_pos) < 0: + desired_pos = desired_pos + (2*np.pi) + + direction_clock = np.abs(0 - current_pos) + (2*np.pi - desired_pos) # Clockwise rotation + direction_anticlock = (2*np.pi - current_pos) + (0 + desired_pos) # Anticlockwise rotation + + if direction_clock < direction_anticlock: + return 1 + else: + return -1 + + def repeller_policy(self): + """ + This uses the repeller policy to move the opponent. + """ + return self.repeller_stochastic() + + def sample_opponent_policy(self): + """ + Takes in three probabilities and returns the policies with the given frequency. + """ + rand_num = self.rng.uniform() + if rand_num < self.opponent_probabilities[0]: + self.opponent_policy = 'static_stationary' + elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1]: + self.opponent_policy = 'stationary' + elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1] + self.opponent_probabilities[2]: + self.opponent_policy = 'random' + else: + self.opponent_policy = 'repeller' + + def update_opponent_state(self): + """ + This function executes an opponent step with + one of the control policies. + """ + if self.opponent_policy == 'stationary' or self.opponent_policy == 'static_stationary': + opponent_vel = np.zeros(2,) + + elif self.opponent_policy == 'random': + opponent_vel = self.random_movement() + + elif self.opponent_policy == 'repeller': + opponent_vel = self.repeller_policy() + else: + raise NotImplementedError(f"This opponent policy doesn't exist. Chose: static_stationary, stationary, random or repeller. Policy was: {self.opponent_policy}") + self.move_opponent(opponent_vel) + + class ChaseTagEnvV0(WalkEnvV0): DEFAULT_OBS_KEYS = [ From 38a819d723ff321c5f015a95dfcc97c4c3932066 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Sat, 7 Oct 2023 20:29:11 +0200 Subject: [PATCH 07/19] postmerge cleanup --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 155 ------------------ 1 file changed, 155 deletions(-) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index e9d76d56..64883082 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -536,161 +536,6 @@ def update_opponent_state(self): self.move_opponent(opponent_vel) -class RepellerChallengeOpponent(ChallengeOpponent): - # Repeller parameters - DIST_INFLUENCE = 3.5 # Distance of influence by the repeller - ETA = 20.0 # Scaling factor - MIN_SPAWN_DIST = 1.5 - - def __init__(self, sim, rng, probabilities, min_spawn_distance): - self.dt = 0.01 - self.sim = sim - self.rng = rng - self.opponent_probabilities = [0.1, 0.35, 0.35, 0.2] - self.min_spawn_distance = min_spawn_distance - self.noise_process = pink.ColoredNoiseProcess(beta=2, size=(2, 2000), scale=10, rng=rng) - self.reset_opponent() - - def get_agent_pos(self): - """ - Get agent Pose - :param pose: Pose of the agent, measured from the pelvis. - :type pose: array -> [x, y] - """ - return self.sim.data.body('pelvis').xpos[:2] - - def get_wall_pos(self): - """ - Get location of quad boundaries. - :param pose: Pose of points along quad boundaries. - :type pose: array -> [x, y] - """ - bound_resolution = np.linspace(-8.7, 8.7, 25) - right_left_bounds = np.vstack( (np.array([[8.7,x] for x in bound_resolution]), - np.array([[-8.7,x] for x in bound_resolution])) ) - all_bounds = np.vstack( (right_left_bounds, right_left_bounds[:,[1,0]]) ) - - return all_bounds - - def get_repellers(self): - """ - Get location of all repellers. - :param pose: Pose of all repellers - :type pose: array -> [x, y] - """ - agent_pos = self.get_agent_pos() - wall_pos = self.get_wall_pos() - - obstacle_list = np.vstack( (agent_pos, wall_pos) ) - return obstacle_list - - def repeller_stochastic(self): - """ - Returns the linear velocity for the opponent - :param pose: Pose of points of all repellers - :type pose: array -> [x, y, rotation] - """ - obstacle_pos = self.get_repellers() - opponent_pos = self.get_opponent_pose().copy() - - # Calculate over all the workspace - distance = np.array([np.linalg.norm(diff) for diff in (obstacle_pos - opponent_pos[0:2])]) - - # Check if any obstacles are around - dist_idx = np.where(distance < self.DIST_INFLUENCE)[0] - - # Take a random step if no repellers are close by, making it a non-stationary target - if len(dist_idx) == 0: - #print("Random - no repellers close") - lin, rot = self.noise_process.sample() - escape_linear = np.clip(lin, 0.5, 1) - escape_ang_rot = self._calc_angular_vel(opponent_pos[2], rot) - return np.hstack((escape_linear, escape_ang_rot)) - - repel_COM = np.mean(obstacle_pos[dist_idx,:], axis=0) - # Use repeller force as linear velocity to escape - repel_force = 0.5 * self.ETA * ( 1/np.maximum(distance[dist_idx], 0.00001) - 1/self.DIST_INFLUENCE )**2 - escape_linear = np.clip(np.mean(repel_force), 0.3, 1) - escape_xpos = opponent_pos[0:2] - repel_COM - - equil_idx = np.where(np.abs(escape_xpos) <= 0.1 )[0] - if len(equil_idx) != 0: - for idx in equil_idx: - escape_xpos[idx] = -1*np.sign(escape_xpos[idx]) * self.rng.uniform(low=0.3, high=0.9) - - escape_direction = np.arctan2(escape_xpos[1], escape_xpos[0]) # Direction - escape_direction = escape_direction + 1.57 # Account for rotation in world frame - - # Determines turning direction - escape_ang_rot = self._calc_angular_vel(opponent_pos[2], escape_direction) - - return np.hstack((escape_linear, escape_ang_rot)) - - def _calc_angular_vel(self, current_pos, desired_pos): - # Checking for sign of the current position and escape position to prevent inefficient turning - # E.g. 3.14 and -3.14 are pointing in the same direction, so a simple substraction of facing direction will make the opponent turn a lot - - # Bring the current pos and desired pos to be between 0 to 2pi - if current_pos > (2*np.pi): - while current_pos > (2*np.pi): - current_pos = current_pos - (2*np.pi) - elif np.sign(current_pos) < 0: - while np.sign(current_pos) < 0: - current_pos = current_pos + (2*np.pi) - - if desired_pos > (2*np.pi): - while desired_pos > (2*np.pi): - desired_pos = desired_pos - (2*np.pi) - elif np.sign(desired_pos) < 0: - while np.sign(desired_pos) < 0: - desired_pos = desired_pos + (2*np.pi) - - direction_clock = np.abs(0 - current_pos) + (2*np.pi - desired_pos) # Clockwise rotation - direction_anticlock = (2*np.pi - current_pos) + (0 + desired_pos) # Anticlockwise rotation - - if direction_clock < direction_anticlock: - return 1 - else: - return -1 - - def repeller_policy(self): - """ - This uses the repeller policy to move the opponent. - """ - return self.repeller_stochastic() - - def sample_opponent_policy(self): - """ - Takes in three probabilities and returns the policies with the given frequency. - """ - rand_num = self.rng.uniform() - if rand_num < self.opponent_probabilities[0]: - self.opponent_policy = 'static_stationary' - elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1]: - self.opponent_policy = 'stationary' - elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1] + self.opponent_probabilities[2]: - self.opponent_policy = 'random' - else: - self.opponent_policy = 'repeller' - - def update_opponent_state(self): - """ - This function executes an opponent step with - one of the control policies. - """ - if self.opponent_policy == 'stationary' or self.opponent_policy == 'static_stationary': - opponent_vel = np.zeros(2,) - - elif self.opponent_policy == 'random': - opponent_vel = self.random_movement() - - elif self.opponent_policy == 'repeller': - opponent_vel = self.repeller_policy() - else: - raise NotImplementedError(f"This opponent policy doesn't exist. Chose: static_stationary, stationary, random or repeller. Policy was: {self.opponent_policy}") - self.move_opponent(opponent_vel) - - class ChaseTagEnvV0(WalkEnvV0): DEFAULT_OBS_KEYS = [ From c25deface46c0d858d8230877668cc5e2156fe6f Mon Sep 17 00:00:00 2001 From: elladyr Date: Sat, 7 Oct 2023 14:56:51 -0400 Subject: [PATCH 08/19] Registered Relocate P2eval --- myosuite/envs/myo/myochallenge/__init__.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/myosuite/envs/myo/myochallenge/__init__.py b/myosuite/envs/myo/myochallenge/__init__.py index 4f55b4d9..af17a8bc 100644 --- a/myosuite/envs/myo/myochallenge/__init__.py +++ b/myosuite/envs/myo/myochallenge/__init__.py @@ -41,6 +41,26 @@ } ) +# Register MyoChallenge Manipulation P2 Evals +register(id='myoChallengeRelocateP2eval-v0', + entry_point='myosuite.envs.myo.myochallenge.relocate_v0:RelocateEnvV0', + max_episode_steps=150, + kwargs={ + 'model_path': curr_dir+'/../../../simhive/myo_sim/arm/myoarm_object_v0.16(mj237).mjb', + 'normalize_act': True, + 'frame_skip': 5, + 'pos_th': 0.1, # cover entire base of the receptacle + 'rot_th': np.inf, # ignore rotation errors + 'qpos_noise_range':0.015, # jnt initialization range + 'target_xyz_range': {'high':[0.4, -.1, 1.1], 'low':[-.5, -.5, .9]}, + 'target_rxryrz_range': {'high':[.3, .3, .3], 'low':[-.3, -.3, -.3]}, + 'obj_xyz_range': {'high':[0.15, -.10, 1.0], 'low':[-0.20, -.40, 1.0]}, + 'obj_geom_range': {'high':[.025, .025, .035], 'low':[.015, 0.015, 0.015]}, + 'obj_mass_range': {'high':0.300, 'low':0.050},# 50gms to 250 gms + 'obj_friction_range': {'high':[1.2, 0.006, 0.00012], 'low':[0.8, 0.004, 0.00008]} + } +) + ## MyoChallenge Locomotion P1 register(id='myoChallengeChaseTagP1-v0', From 250bcc64fac17cc9718084ccb0d860b3a7b1daa9 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Mon, 18 Sep 2023 21:32:49 +0200 Subject: [PATCH 09/19] added repeller policy --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index 64883082..e9d76d56 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -536,6 +536,161 @@ def update_opponent_state(self): self.move_opponent(opponent_vel) +class RepellerChallengeOpponent(ChallengeOpponent): + # Repeller parameters + DIST_INFLUENCE = 3.5 # Distance of influence by the repeller + ETA = 20.0 # Scaling factor + MIN_SPAWN_DIST = 1.5 + + def __init__(self, sim, rng, probabilities, min_spawn_distance): + self.dt = 0.01 + self.sim = sim + self.rng = rng + self.opponent_probabilities = [0.1, 0.35, 0.35, 0.2] + self.min_spawn_distance = min_spawn_distance + self.noise_process = pink.ColoredNoiseProcess(beta=2, size=(2, 2000), scale=10, rng=rng) + self.reset_opponent() + + def get_agent_pos(self): + """ + Get agent Pose + :param pose: Pose of the agent, measured from the pelvis. + :type pose: array -> [x, y] + """ + return self.sim.data.body('pelvis').xpos[:2] + + def get_wall_pos(self): + """ + Get location of quad boundaries. + :param pose: Pose of points along quad boundaries. + :type pose: array -> [x, y] + """ + bound_resolution = np.linspace(-8.7, 8.7, 25) + right_left_bounds = np.vstack( (np.array([[8.7,x] for x in bound_resolution]), + np.array([[-8.7,x] for x in bound_resolution])) ) + all_bounds = np.vstack( (right_left_bounds, right_left_bounds[:,[1,0]]) ) + + return all_bounds + + def get_repellers(self): + """ + Get location of all repellers. + :param pose: Pose of all repellers + :type pose: array -> [x, y] + """ + agent_pos = self.get_agent_pos() + wall_pos = self.get_wall_pos() + + obstacle_list = np.vstack( (agent_pos, wall_pos) ) + return obstacle_list + + def repeller_stochastic(self): + """ + Returns the linear velocity for the opponent + :param pose: Pose of points of all repellers + :type pose: array -> [x, y, rotation] + """ + obstacle_pos = self.get_repellers() + opponent_pos = self.get_opponent_pose().copy() + + # Calculate over all the workspace + distance = np.array([np.linalg.norm(diff) for diff in (obstacle_pos - opponent_pos[0:2])]) + + # Check if any obstacles are around + dist_idx = np.where(distance < self.DIST_INFLUENCE)[0] + + # Take a random step if no repellers are close by, making it a non-stationary target + if len(dist_idx) == 0: + #print("Random - no repellers close") + lin, rot = self.noise_process.sample() + escape_linear = np.clip(lin, 0.5, 1) + escape_ang_rot = self._calc_angular_vel(opponent_pos[2], rot) + return np.hstack((escape_linear, escape_ang_rot)) + + repel_COM = np.mean(obstacle_pos[dist_idx,:], axis=0) + # Use repeller force as linear velocity to escape + repel_force = 0.5 * self.ETA * ( 1/np.maximum(distance[dist_idx], 0.00001) - 1/self.DIST_INFLUENCE )**2 + escape_linear = np.clip(np.mean(repel_force), 0.3, 1) + escape_xpos = opponent_pos[0:2] - repel_COM + + equil_idx = np.where(np.abs(escape_xpos) <= 0.1 )[0] + if len(equil_idx) != 0: + for idx in equil_idx: + escape_xpos[idx] = -1*np.sign(escape_xpos[idx]) * self.rng.uniform(low=0.3, high=0.9) + + escape_direction = np.arctan2(escape_xpos[1], escape_xpos[0]) # Direction + escape_direction = escape_direction + 1.57 # Account for rotation in world frame + + # Determines turning direction + escape_ang_rot = self._calc_angular_vel(opponent_pos[2], escape_direction) + + return np.hstack((escape_linear, escape_ang_rot)) + + def _calc_angular_vel(self, current_pos, desired_pos): + # Checking for sign of the current position and escape position to prevent inefficient turning + # E.g. 3.14 and -3.14 are pointing in the same direction, so a simple substraction of facing direction will make the opponent turn a lot + + # Bring the current pos and desired pos to be between 0 to 2pi + if current_pos > (2*np.pi): + while current_pos > (2*np.pi): + current_pos = current_pos - (2*np.pi) + elif np.sign(current_pos) < 0: + while np.sign(current_pos) < 0: + current_pos = current_pos + (2*np.pi) + + if desired_pos > (2*np.pi): + while desired_pos > (2*np.pi): + desired_pos = desired_pos - (2*np.pi) + elif np.sign(desired_pos) < 0: + while np.sign(desired_pos) < 0: + desired_pos = desired_pos + (2*np.pi) + + direction_clock = np.abs(0 - current_pos) + (2*np.pi - desired_pos) # Clockwise rotation + direction_anticlock = (2*np.pi - current_pos) + (0 + desired_pos) # Anticlockwise rotation + + if direction_clock < direction_anticlock: + return 1 + else: + return -1 + + def repeller_policy(self): + """ + This uses the repeller policy to move the opponent. + """ + return self.repeller_stochastic() + + def sample_opponent_policy(self): + """ + Takes in three probabilities and returns the policies with the given frequency. + """ + rand_num = self.rng.uniform() + if rand_num < self.opponent_probabilities[0]: + self.opponent_policy = 'static_stationary' + elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1]: + self.opponent_policy = 'stationary' + elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1] + self.opponent_probabilities[2]: + self.opponent_policy = 'random' + else: + self.opponent_policy = 'repeller' + + def update_opponent_state(self): + """ + This function executes an opponent step with + one of the control policies. + """ + if self.opponent_policy == 'stationary' or self.opponent_policy == 'static_stationary': + opponent_vel = np.zeros(2,) + + elif self.opponent_policy == 'random': + opponent_vel = self.random_movement() + + elif self.opponent_policy == 'repeller': + opponent_vel = self.repeller_policy() + else: + raise NotImplementedError(f"This opponent policy doesn't exist. Chose: static_stationary, stationary, random or repeller. Policy was: {self.opponent_policy}") + self.move_opponent(opponent_vel) + + class ChaseTagEnvV0(WalkEnvV0): DEFAULT_OBS_KEYS = [ From 5f834e90ac208a91cad4740947717a4e97967cee Mon Sep 17 00:00:00 2001 From: elladyr Date: Tue, 10 Oct 2023 10:05:42 -0400 Subject: [PATCH 10/19] Added wheel creation script --- create_wheel.sh | 1 + 1 file changed, 1 insertion(+) create mode 100644 create_wheel.sh diff --git a/create_wheel.sh b/create_wheel.sh new file mode 100644 index 00000000..7576a81e --- /dev/null +++ b/create_wheel.sh @@ -0,0 +1 @@ +python3 setup.py bdist_wheel --universal \ No newline at end of file From befe09cb4297ec2f88513555cc43f2202eb7bf6a Mon Sep 17 00:00:00 2001 From: elladyr Date: Tue, 10 Oct 2023 11:22:46 -0400 Subject: [PATCH 11/19] Fixed rebasing duplicates --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 155 ------------------ 1 file changed, 155 deletions(-) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index e9d76d56..64883082 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -536,161 +536,6 @@ def update_opponent_state(self): self.move_opponent(opponent_vel) -class RepellerChallengeOpponent(ChallengeOpponent): - # Repeller parameters - DIST_INFLUENCE = 3.5 # Distance of influence by the repeller - ETA = 20.0 # Scaling factor - MIN_SPAWN_DIST = 1.5 - - def __init__(self, sim, rng, probabilities, min_spawn_distance): - self.dt = 0.01 - self.sim = sim - self.rng = rng - self.opponent_probabilities = [0.1, 0.35, 0.35, 0.2] - self.min_spawn_distance = min_spawn_distance - self.noise_process = pink.ColoredNoiseProcess(beta=2, size=(2, 2000), scale=10, rng=rng) - self.reset_opponent() - - def get_agent_pos(self): - """ - Get agent Pose - :param pose: Pose of the agent, measured from the pelvis. - :type pose: array -> [x, y] - """ - return self.sim.data.body('pelvis').xpos[:2] - - def get_wall_pos(self): - """ - Get location of quad boundaries. - :param pose: Pose of points along quad boundaries. - :type pose: array -> [x, y] - """ - bound_resolution = np.linspace(-8.7, 8.7, 25) - right_left_bounds = np.vstack( (np.array([[8.7,x] for x in bound_resolution]), - np.array([[-8.7,x] for x in bound_resolution])) ) - all_bounds = np.vstack( (right_left_bounds, right_left_bounds[:,[1,0]]) ) - - return all_bounds - - def get_repellers(self): - """ - Get location of all repellers. - :param pose: Pose of all repellers - :type pose: array -> [x, y] - """ - agent_pos = self.get_agent_pos() - wall_pos = self.get_wall_pos() - - obstacle_list = np.vstack( (agent_pos, wall_pos) ) - return obstacle_list - - def repeller_stochastic(self): - """ - Returns the linear velocity for the opponent - :param pose: Pose of points of all repellers - :type pose: array -> [x, y, rotation] - """ - obstacle_pos = self.get_repellers() - opponent_pos = self.get_opponent_pose().copy() - - # Calculate over all the workspace - distance = np.array([np.linalg.norm(diff) for diff in (obstacle_pos - opponent_pos[0:2])]) - - # Check if any obstacles are around - dist_idx = np.where(distance < self.DIST_INFLUENCE)[0] - - # Take a random step if no repellers are close by, making it a non-stationary target - if len(dist_idx) == 0: - #print("Random - no repellers close") - lin, rot = self.noise_process.sample() - escape_linear = np.clip(lin, 0.5, 1) - escape_ang_rot = self._calc_angular_vel(opponent_pos[2], rot) - return np.hstack((escape_linear, escape_ang_rot)) - - repel_COM = np.mean(obstacle_pos[dist_idx,:], axis=0) - # Use repeller force as linear velocity to escape - repel_force = 0.5 * self.ETA * ( 1/np.maximum(distance[dist_idx], 0.00001) - 1/self.DIST_INFLUENCE )**2 - escape_linear = np.clip(np.mean(repel_force), 0.3, 1) - escape_xpos = opponent_pos[0:2] - repel_COM - - equil_idx = np.where(np.abs(escape_xpos) <= 0.1 )[0] - if len(equil_idx) != 0: - for idx in equil_idx: - escape_xpos[idx] = -1*np.sign(escape_xpos[idx]) * self.rng.uniform(low=0.3, high=0.9) - - escape_direction = np.arctan2(escape_xpos[1], escape_xpos[0]) # Direction - escape_direction = escape_direction + 1.57 # Account for rotation in world frame - - # Determines turning direction - escape_ang_rot = self._calc_angular_vel(opponent_pos[2], escape_direction) - - return np.hstack((escape_linear, escape_ang_rot)) - - def _calc_angular_vel(self, current_pos, desired_pos): - # Checking for sign of the current position and escape position to prevent inefficient turning - # E.g. 3.14 and -3.14 are pointing in the same direction, so a simple substraction of facing direction will make the opponent turn a lot - - # Bring the current pos and desired pos to be between 0 to 2pi - if current_pos > (2*np.pi): - while current_pos > (2*np.pi): - current_pos = current_pos - (2*np.pi) - elif np.sign(current_pos) < 0: - while np.sign(current_pos) < 0: - current_pos = current_pos + (2*np.pi) - - if desired_pos > (2*np.pi): - while desired_pos > (2*np.pi): - desired_pos = desired_pos - (2*np.pi) - elif np.sign(desired_pos) < 0: - while np.sign(desired_pos) < 0: - desired_pos = desired_pos + (2*np.pi) - - direction_clock = np.abs(0 - current_pos) + (2*np.pi - desired_pos) # Clockwise rotation - direction_anticlock = (2*np.pi - current_pos) + (0 + desired_pos) # Anticlockwise rotation - - if direction_clock < direction_anticlock: - return 1 - else: - return -1 - - def repeller_policy(self): - """ - This uses the repeller policy to move the opponent. - """ - return self.repeller_stochastic() - - def sample_opponent_policy(self): - """ - Takes in three probabilities and returns the policies with the given frequency. - """ - rand_num = self.rng.uniform() - if rand_num < self.opponent_probabilities[0]: - self.opponent_policy = 'static_stationary' - elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1]: - self.opponent_policy = 'stationary' - elif rand_num < self.opponent_probabilities[0] + self.opponent_probabilities[1] + self.opponent_probabilities[2]: - self.opponent_policy = 'random' - else: - self.opponent_policy = 'repeller' - - def update_opponent_state(self): - """ - This function executes an opponent step with - one of the control policies. - """ - if self.opponent_policy == 'stationary' or self.opponent_policy == 'static_stationary': - opponent_vel = np.zeros(2,) - - elif self.opponent_policy == 'random': - opponent_vel = self.random_movement() - - elif self.opponent_policy == 'repeller': - opponent_vel = self.repeller_policy() - else: - raise NotImplementedError(f"This opponent policy doesn't exist. Chose: static_stationary, stationary, random or repeller. Policy was: {self.opponent_policy}") - self.move_opponent(opponent_vel) - - class ChaseTagEnvV0(WalkEnvV0): DEFAULT_OBS_KEYS = [ From b925b7c50eeea4647cc6b5b3dd6b167c7bf53569 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Sat, 30 Dec 2023 10:07:41 +0100 Subject: [PATCH 12/19] removed file --- create_wheel.sh | 1 - 1 file changed, 1 deletion(-) delete mode 100644 create_wheel.sh diff --git a/create_wheel.sh b/create_wheel.sh deleted file mode 100644 index 7576a81e..00000000 --- a/create_wheel.sh +++ /dev/null @@ -1 +0,0 @@ -python3 setup.py bdist_wheel --universal \ No newline at end of file From 9295f5171636644463cc67a3c18cd16d20697d11 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Sat, 30 Dec 2023 10:46:50 +0100 Subject: [PATCH 13/19] extracted locomotion evaluation env into separate class from public evaluation env --- myosuite/envs/myo/myochallenge/__init__.py | 21 +++++++++++++++++++ myosuite/envs/myo/myochallenge/chasetag_v0.py | 19 ++++++++++++----- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/myosuite/envs/myo/myochallenge/__init__.py b/myosuite/envs/myo/myochallenge/__init__.py index af17a8bc..a62e3954 100644 --- a/myosuite/envs/myo/myochallenge/__init__.py +++ b/myosuite/envs/myo/myochallenge/__init__.py @@ -96,9 +96,30 @@ 'hills_range': (0.03, 0.23), 'rough_range': (0.05, 0.1), 'relief_range': (0.1, 0.3), + 'velocity': 1.0, + 'repeller_opponent': False } ) +# Register MyoChallenge Locomotion P2 Evals +register(id='myoChallengeChaseTagP2eval-v0', + entry_point='myosuite.envs.myo.myochallenge.chasetag_v0:ChaseTagEnvV0', + max_episode_steps=2000, + kwargs={ + 'model_path': curr_dir+'/../assets/leg/myolegs_chasetag.xml', + 'normalize_act': True, + 'win_distance': 0.5, + 'min_spawn_distance': 2, + 'reset_type': 'random', # none, init, random + 'terrain': 'random', # FLAT, random + 'task_choice': 'random', # CHASE, EVADE, random + 'hills_range': (0.03, 0.23), + 'rough_range': (0.05, 0.1), + 'relief_range': (0.1, 0.3), + 'velocity': 'random', + 'repeller_opponent': True + } + ) # MyoChallenge 2022 envs ============================================== # MyoChallenge Die: Trial env diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index 64883082..59bd3697 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -37,12 +37,13 @@ class ChallengeOpponent: Contains several different policies. For the final evaluation, an additional non-disclosed policy will be used. """ - def __init__(self, sim, rng, probabilities: list, min_spawn_distance: float): + def __init__(self, sim, rng, probabilities: list, min_spawn_distance: float, linear_velocity: float): self.dt = 0.01 self.sim = sim self.opponent_probabilities = probabilities self.min_spawn_distance = min_spawn_distance - self.linear_velocity = 1.0 + self.randomize_vel = True if linear_velocity == 'random' else False + self.linear_velocity = linear_velocity if type(linear_velocity) == float or type(linear_velocity) == int else 1.0 self.reset_opponent(rng=rng) def reset_noise_process(self): @@ -153,7 +154,8 @@ def reset_opponent(self, player_task='CHASE', rng=None): self.opponent_vel[:] = 0.0 # Randomize opponent forward velocity - self.linear_velocity = self.rng.uniform(1, 5) + if self.randomize_vel: + self.linear_velocity = self.rng.uniform(1, 5) def chase_player(self): """ @@ -384,13 +386,15 @@ class RepellerChallengeOpponent(ChallengeOpponent): ETA = 20.0 # Scaling factor MIN_SPAWN_DIST = 1.5 - def __init__(self, sim, rng, probabilities, min_spawn_distance): + def __init__(self, sim, rng, probabilities: list, min_spawn_distance: float, linear_velocity: float): self.dt = 0.01 self.sim = sim self.rng = rng self.opponent_probabilities = [0.1, 0.35, 0.35, 0.2] self.min_spawn_distance = min_spawn_distance self.noise_process = pink.ColoredNoiseProcess(beta=2, size=(2, 2000), scale=10, rng=rng) + self.randomize_vel = True if linear_velocity == 'random' else False + self.linear_velocity = linear_velocity if type(linear_velocity) == float or type(linear_velocity) == int else 1.0 self.reset_opponent() def get_agent_pos(self): @@ -591,6 +595,8 @@ def _setup(self, hills_range=(0,0), rough_range=(0,0), relief_range=(0,0), + velocity=1.0, + repeller_opponent=False, **kwargs, ): @@ -609,7 +615,10 @@ def _setup(self, self.win_distance = win_distance self.grf_sensor_names = ['r_foot', 'r_toes', 'l_foot', 'l_toes'] - self.opponent = RepellerChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance) + if repeller_opponent: + self.opponent = RepellerChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance, linear_velocity=velocity) + else: + self.opponent = ChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance, linear_velocity=velocity) self.success_indicator_sid = self.sim.model.site_name2id("opponent_indicator") self.current_task = Task.CHASE super()._setup(obs_keys=obs_keys, From 1d3c3ffc2b484d32dde68ab2e208b263ba00f331 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Sun, 31 Dec 2023 16:24:29 +0100 Subject: [PATCH 14/19] modified construction arguments for chasetag tasks and default arguments --- myosuite/envs/myo/myochallenge/__init__.py | 11 +++--- myosuite/envs/myo/myochallenge/chasetag_v0.py | 34 ++++++++++++------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/myosuite/envs/myo/myochallenge/__init__.py b/myosuite/envs/myo/myochallenge/__init__.py index a62e3954..a0be4348 100644 --- a/myosuite/envs/myo/myochallenge/__init__.py +++ b/myosuite/envs/myo/myochallenge/__init__.py @@ -77,6 +77,7 @@ 'hills_range': (0.0, 0.0), 'rough_range': (0.0, 0.0), 'relief_range': (0.0, 0.0), + 'opponent_probabilities': [0.1, 0.45, 0.45], } ) @@ -96,8 +97,9 @@ 'hills_range': (0.03, 0.23), 'rough_range': (0.05, 0.1), 'relief_range': (0.1, 0.3), - 'velocity': 1.0, - 'repeller_opponent': False + 'repeller_opponent': False, + 'vel_range': (1.0, 1.0), # only for chase + 'opponent_probabilities': [0.1, 0.45, 0.45], } ) @@ -116,8 +118,9 @@ 'hills_range': (0.03, 0.23), 'rough_range': (0.05, 0.1), 'relief_range': (0.1, 0.3), - 'velocity': 'random', - 'repeller_opponent': True + 'repeller_opponent': True, + 'vel_range': (1, 5), # only for chase + 'opponent_probabilities': [0.1, 0.35, 0.35, 0.2], } ) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index 59bd3697..30a8239d 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -37,13 +37,17 @@ class ChallengeOpponent: Contains several different policies. For the final evaluation, an additional non-disclosed policy will be used. """ - def __init__(self, sim, rng, probabilities: list, min_spawn_distance: float, linear_velocity: float): + def __init__(self, + sim, + rng, + probabilities: list[float], + min_spawn_distance: float, + vel_range: tuple[float]): self.dt = 0.01 self.sim = sim self.opponent_probabilities = probabilities self.min_spawn_distance = min_spawn_distance - self.randomize_vel = True if linear_velocity == 'random' else False - self.linear_velocity = linear_velocity if type(linear_velocity) == float or type(linear_velocity) == int else 1.0 + self.vel_range = vel_range self.reset_opponent(rng=rng) def reset_noise_process(self): @@ -154,8 +158,7 @@ def reset_opponent(self, player_task='CHASE', rng=None): self.opponent_vel[:] = 0.0 # Randomize opponent forward velocity - if self.randomize_vel: - self.linear_velocity = self.rng.uniform(1, 5) + self.linear_velocity = self.rng.uniform(self.vel_range[0], self.vel_range[1]) def chase_player(self): """ @@ -386,15 +389,20 @@ class RepellerChallengeOpponent(ChallengeOpponent): ETA = 20.0 # Scaling factor MIN_SPAWN_DIST = 1.5 - def __init__(self, sim, rng, probabilities: list, min_spawn_distance: float, linear_velocity: float): + def __init__(self, + sim, + rng, + probabilities: list[float], + min_spawn_distance: float, + vel_range: list[float]): self.dt = 0.01 self.sim = sim self.rng = rng - self.opponent_probabilities = [0.1, 0.35, 0.35, 0.2] + self.opponent_probabilities = probabilities + self.min_spawn_distance = min_spawn_distance self.noise_process = pink.ColoredNoiseProcess(beta=2, size=(2, 2000), scale=10, rng=rng) - self.randomize_vel = True if linear_velocity == 'random' else False - self.linear_velocity = linear_velocity if type(linear_velocity) == float or type(linear_velocity) == int else 1.0 + self.vel_range = vel_range self.reset_opponent() def get_agent_pos(self): @@ -586,7 +594,6 @@ def __init__(self, model_path, obsd_model_path=None, seed=None, **kwargs): def _setup(self, obs_keys: list = DEFAULT_OBS_KEYS, weighted_reward_keys: dict = DEFAULT_RWD_KEYS_AND_WEIGHTS, - opponent_probabilities=[0.1, 0.45, 0.45], reset_type='none', win_distance=0.5, min_spawn_distance=2, @@ -595,8 +602,9 @@ def _setup(self, hills_range=(0,0), rough_range=(0,0), relief_range=(0,0), - velocity=1.0, repeller_opponent=False, + vel_range=(1.0, 1.0), + opponent_probabilities=[0.1, 0.45, 0.45], **kwargs, ): @@ -616,9 +624,9 @@ def _setup(self, self.win_distance = win_distance self.grf_sensor_names = ['r_foot', 'r_toes', 'l_foot', 'l_toes'] if repeller_opponent: - self.opponent = RepellerChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance, linear_velocity=velocity) + self.opponent = RepellerChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance, vel_range=vel_range) else: - self.opponent = ChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance, linear_velocity=velocity) + self.opponent = ChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance, vel_range=vel_range) self.success_indicator_sid = self.sim.model.site_name2id("opponent_indicator") self.current_task = Task.CHASE super()._setup(obs_keys=obs_keys, From 05740fc04404ea8cbc3b4c34991716978690a62f Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Sun, 31 Dec 2023 16:31:18 +0100 Subject: [PATCH 15/19] made bound_resolutions modifiable in class definition --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index 30a8239d..d01b054c 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -388,6 +388,7 @@ class RepellerChallengeOpponent(ChallengeOpponent): DIST_INFLUENCE = 3.5 # Distance of influence by the repeller ETA = 20.0 # Scaling factor MIN_SPAWN_DIST = 1.5 + BOUND_RESOLUTIONS = [-8.7, 8.7, 25] def __init__(self, sim, @@ -419,7 +420,7 @@ def get_wall_pos(self): :param pose: Pose of points along quad boundaries. :type pose: array -> [x, y] """ - bound_resolution = np.linspace(-8.7, 8.7, 25) + bound_resolution = np.linspace(self.BOUND_RESOLUTIONS[0], self.BOUND_RESOLUTIONS[1], self.BOUND_RESOLUTIONS[2]) right_left_bounds = np.vstack( (np.array([[8.7,x] for x in bound_resolution]), np.array([[-8.7,x] for x in bound_resolution])) ) all_bounds = np.vstack( (right_left_bounds, right_left_bounds[:,[1,0]]) ) From 345b3f828f874b587ae639e2e66fb2332ef25574 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Mon, 1 Jan 2024 16:33:03 +0100 Subject: [PATCH 16/19] different parametrization for opponent velocities --- myosuite/envs/myo/myochallenge/__init__.py | 13 ++-- myosuite/envs/myo/myochallenge/chasetag_v0.py | 64 ++++++++++++++----- 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/myosuite/envs/myo/myochallenge/__init__.py b/myosuite/envs/myo/myochallenge/__init__.py index a0be4348..5cd59c36 100644 --- a/myosuite/envs/myo/myochallenge/__init__.py +++ b/myosuite/envs/myo/myochallenge/__init__.py @@ -77,7 +77,7 @@ 'hills_range': (0.0, 0.0), 'rough_range': (0.0, 0.0), 'relief_range': (0.0, 0.0), - 'opponent_probabilities': [0.1, 0.45, 0.45], + 'opponent_probabilities': (0.1, 0.45, 0.45), } ) @@ -98,8 +98,9 @@ 'rough_range': (0.05, 0.1), 'relief_range': (0.1, 0.3), 'repeller_opponent': False, - 'vel_range': (1.0, 1.0), # only for chase - 'opponent_probabilities': [0.1, 0.45, 0.45], + 'chase_vel_range': (1.0, 1.0), # randomly drawn + 'random_vel_range': (-2, 2), # clipped + 'opponent_probabilities': (0.1, 0.45, 0.45), } ) @@ -119,8 +120,10 @@ 'rough_range': (0.05, 0.1), 'relief_range': (0.1, 0.3), 'repeller_opponent': True, - 'vel_range': (1, 5), # only for chase - 'opponent_probabilities': [0.1, 0.35, 0.35, 0.2], + 'chase_vel_range': (1, 5), # randomly drawn + 'random_vel_range': (-2, 2), # clipped + 'repeller_vel_range': (0.3, 1), # clipped + 'opponent_probabilities': (0.1, 0.35, 0.35, 0.2), } ) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index d01b054c..a78c4432 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -40,14 +40,17 @@ class ChallengeOpponent: def __init__(self, sim, rng, - probabilities: list[float], + probabilities: tuple[float], min_spawn_distance: float, - vel_range: tuple[float]): + chase_vel_range: tuple[float], + random_vel_range: tuple[float], + ): self.dt = 0.01 self.sim = sim self.opponent_probabilities = probabilities self.min_spawn_distance = min_spawn_distance - self.vel_range = vel_range + self.chase_vel_range = chase_vel_range + self.random_vel_range = random_vel_range self.reset_opponent(rng=rng) def reset_noise_process(self): @@ -98,7 +101,7 @@ def random_movement(self): This moves the opponent randomly in a correlated pattern. """ - return self.noise_process.sample() + return np.clip(self.noise_process.sample(), self.random_vel_range[0], self.random_vel_range[1]) def sample_opponent_policy(self): """ @@ -158,7 +161,7 @@ def reset_opponent(self, player_task='CHASE', rng=None): self.opponent_vel[:] = 0.0 # Randomize opponent forward velocity - self.linear_velocity = self.rng.uniform(self.vel_range[0], self.vel_range[1]) + self.chase_velocity = self.rng.uniform(self.chase_vel_range[0], self.chase_vel_range[1]) def chase_player(self): """ @@ -172,7 +175,7 @@ def chase_player(self): new_vec = np.array([np.cos(theta), np.sin(theta)]) new_vec2 = pel - vec vel = np.dot(new_vec, new_vec2) - return np.array([self.linear_velocity, vel]) + return np.array([self.chase_velocity, vel]) class HeightField: @@ -393,9 +396,12 @@ class RepellerChallengeOpponent(ChallengeOpponent): def __init__(self, sim, rng, - probabilities: list[float], + probabilities: tuple[float], min_spawn_distance: float, - vel_range: list[float]): + chase_vel_range: tuple[float], + random_vel_range: tuple[float], + repeller_vel_range: tuple[float], + ): self.dt = 0.01 self.sim = sim self.rng = rng @@ -403,7 +409,9 @@ def __init__(self, self.min_spawn_distance = min_spawn_distance self.noise_process = pink.ColoredNoiseProcess(beta=2, size=(2, 2000), scale=10, rng=rng) - self.vel_range = vel_range + self.chase_vel_range = chase_vel_range + self.random_vel_range = random_vel_range + self.repeller_vel_range = repeller_vel_range self.reset_opponent() def get_agent_pos(self): @@ -456,16 +464,15 @@ def repeller_stochastic(self): # Take a random step if no repellers are close by, making it a non-stationary target if len(dist_idx) == 0: - #print("Random - no repellers close") lin, rot = self.noise_process.sample() - escape_linear = np.clip(lin, 0.5, 1) + escape_linear = np.clip(lin, self.repeller_vel_range[0], self.repeller_vel_range[1]) escape_ang_rot = self._calc_angular_vel(opponent_pos[2], rot) return np.hstack((escape_linear, escape_ang_rot)) repel_COM = np.mean(obstacle_pos[dist_idx,:], axis=0) # Use repeller force as linear velocity to escape repel_force = 0.5 * self.ETA * ( 1/np.maximum(distance[dist_idx], 0.00001) - 1/self.DIST_INFLUENCE )**2 - escape_linear = np.clip(np.mean(repel_force), 0.3, 1) + escape_linear = np.clip(np.mean(repel_force), self.repeller_vel_range[0], self.repeller_vel_range[1]) escape_xpos = opponent_pos[0:2] - repel_COM equil_idx = np.where(np.abs(escape_xpos) <= 0.1 )[0] @@ -604,8 +611,10 @@ def _setup(self, rough_range=(0,0), relief_range=(0,0), repeller_opponent=False, - vel_range=(1.0, 1.0), - opponent_probabilities=[0.1, 0.45, 0.45], + chase_vel_range=(1.0, 1.0), + random_vel_range=(1.0, 1.0), + repeller_vel_range=(1.0, 1.0), + opponent_probabilities=(0.1, 0.45, 0.45), **kwargs, ): @@ -625,9 +634,22 @@ def _setup(self, self.win_distance = win_distance self.grf_sensor_names = ['r_foot', 'r_toes', 'l_foot', 'l_toes'] if repeller_opponent: - self.opponent = RepellerChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance, vel_range=vel_range) + self.opponent = RepellerChallengeOpponent(sim=self.sim, + rng=self.np_random, + probabilities=opponent_probabilities, + min_spawn_distance=min_spawn_distance, + chase_vel_range=chase_vel_range, + random_vel_range=random_vel_range, + repeller_vel_range=repeller_vel_range) else: - self.opponent = ChallengeOpponent(sim=self.sim, rng=self.np_random, probabilities=opponent_probabilities, min_spawn_distance = min_spawn_distance, vel_range=vel_range) + self.opponent = ChallengeOpponent(sim=self.sim, + rng=self.np_random, + probabilities=opponent_probabilities, + min_spawn_distance=min_spawn_distance, + chase_vel_range=chase_vel_range, + random_vel_range=random_vel_range) + + self.success_indicator_sid = self.sim.model.site_name2id("opponent_indicator") self.current_task = Task.CHASE super()._setup(obs_keys=obs_keys, @@ -638,6 +660,16 @@ def _setup(self, self.init_qpos[:] = self.sim.model.key_qpos[0] self.init_qvel[:] = 0.0 self.startFlag = True + self.assert_settings() + + def assert_settings(self): + # chase always positive + assert self.opponent.chase_vel_range[0] >= 0 and self.opponent.chase_vel_range[1] > 0, f"Chase velocity range should be positive. {self.opponent.chase_vel_range}" + # others assert that range end is bigger than range start + assert self.opponent.chase_vel_range[0] <= self.opponent.chase_vel_range[1], f"Chase velocity range is not valid. {self.opponent.chase_vel_range}" + assert self.opponent.random_vel_range[0] <= self.opponent.random_vel_range[1], f"Random movement velocity range is not valid {self.opponent.random_vel_range}" + if hasattr(self.opponent, 'repeller_vel_range'): + assert self.opponent.repeller_vel_range[0] <= self.opponent.repeller_vel_range[1], f"Repeller velocity range is not valid {self.opponent.repeller_vel_range}" def get_obs_dict(self, sim): obs_dict = {} From 6aa5bca49d160efee1c6656ccae202dea63bbdec Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Mon, 1 Jan 2024 19:26:38 +0100 Subject: [PATCH 17/19] wrote documentation for ChaseTag classes. opponent timestep is now given by environment and not hardcoded anymore. added assertions for opponent probabilities. --- myosuite/envs/myo/myochallenge/__init__.py | 10 ++--- myosuite/envs/myo/myochallenge/chasetag_v0.py | 43 ++++++++++++++++--- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/myosuite/envs/myo/myochallenge/__init__.py b/myosuite/envs/myo/myochallenge/__init__.py index 5cd59c36..6f6d67b0 100644 --- a/myosuite/envs/myo/myochallenge/__init__.py +++ b/myosuite/envs/myo/myochallenge/__init__.py @@ -98,8 +98,8 @@ 'rough_range': (0.05, 0.1), 'relief_range': (0.1, 0.3), 'repeller_opponent': False, - 'chase_vel_range': (1.0, 1.0), # randomly drawn - 'random_vel_range': (-2, 2), # clipped + 'chase_vel_range': (1.0, 1.0), + 'random_vel_range': (-2, 2), 'opponent_probabilities': (0.1, 0.45, 0.45), } ) @@ -120,9 +120,9 @@ 'rough_range': (0.05, 0.1), 'relief_range': (0.1, 0.3), 'repeller_opponent': True, - 'chase_vel_range': (1, 5), # randomly drawn - 'random_vel_range': (-2, 2), # clipped - 'repeller_vel_range': (0.3, 1), # clipped + 'chase_vel_range': (1, 5), + 'random_vel_range': (-2, 2), + 'repeller_vel_range': (0.3, 1), 'opponent_probabilities': (0.1, 0.35, 0.35, 0.2), } ) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index a78c4432..6ba5ebdd 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -44,8 +44,19 @@ def __init__(self, min_spawn_distance: float, chase_vel_range: tuple[float], random_vel_range: tuple[float], + dt=0.01, ): - self.dt = 0.01 + """ + Initialize the opponent class. + :param sim: Mujoco sim object. + :param rng: np_random generator. + :param probabilities: Probabilities for the different policies, (static_stationary, stationary, random). + :param min_spawn_distance: Minimum distance for opponent to spawn from the model. + :param chase_vel_range: Range of velocities for the chase policy. Randomly drawn. + :param random_vel_range: Range of velocities for the random policy. Clipped. + :param dt: Simulation timestep. + """ + self.dt = dt self.sim = sim self.opponent_probabilities = probabilities self.min_spawn_distance = min_spawn_distance @@ -401,8 +412,20 @@ def __init__(self, chase_vel_range: tuple[float], random_vel_range: tuple[float], repeller_vel_range: tuple[float], + dt=0.01, ): - self.dt = 0.01 + """ + Initialize the opponent class. This class additionally contains a repeller policy which always runs away from the + agent. + :param sim: Mujoco sim object. + :param rng: np_random generator. + :param probabilities: Probabilities for the different policies, (static_stationary, stationary, random, repeller). + :param min_spawn_distance: Minimum distance for opponent to spawn from the model. + :param chase_vel_range: Range of velocities for the chase policy. Randomly drawn. + :param random_vel_range: Range of velocities for the random policy. Clipped. + :param dt: Simulation timestep. + """ + self.dt = dt self.sim = sim self.rng = rng self.opponent_probabilities = probabilities @@ -630,9 +653,6 @@ def _setup(self, self.task_choice = task_choice self.terrain = terrain self.maxTime = 20 - - self.win_distance = win_distance - self.grf_sensor_names = ['r_foot', 'r_toes', 'l_foot', 'l_toes'] if repeller_opponent: self.opponent = RepellerChallengeOpponent(sim=self.sim, rng=self.np_random, @@ -649,9 +669,11 @@ def _setup(self, chase_vel_range=chase_vel_range, random_vel_range=random_vel_range) - + self.win_distance = win_distance + self.grf_sensor_names = ['r_foot', 'r_toes', 'l_foot', 'l_toes'] self.success_indicator_sid = self.sim.model.site_name2id("opponent_indicator") self.current_task = Task.CHASE + self.repeller_opponent = repeller_opponent super()._setup(obs_keys=obs_keys, weighted_reward_keys=weighted_reward_keys, reset_type=reset_type, @@ -661,6 +683,9 @@ def _setup(self, self.init_qvel[:] = 0.0 self.startFlag = True self.assert_settings() + self.opponent.dt = self.sim.model.opt.timestep * self.frame_skip + + def assert_settings(self): # chase always positive @@ -670,6 +695,12 @@ def assert_settings(self): assert self.opponent.random_vel_range[0] <= self.opponent.random_vel_range[1], f"Random movement velocity range is not valid {self.opponent.random_vel_range}" if hasattr(self.opponent, 'repeller_vel_range'): assert self.opponent.repeller_vel_range[0] <= self.opponent.repeller_vel_range[1], f"Repeller velocity range is not valid {self.opponent.repeller_vel_range}" + if self.repeller_opponent == True: + assert len(self.opponent.opponent_probabilities) == 4, "Repeller opponent requires 4 probabilities" + else: + assert len(self.opponent.opponent_probabilities) == 3, "Standard opponent requires 3 probabilities" + for x in self.opponent.opponent_probabilities: + assert 0 <= x <= 1, "Probabilities should be between 0 and 1" def get_obs_dict(self, sim): obs_dict = {} From 1f67416646b44583259b31039be0fb8187218614 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Mon, 1 Jan 2024 19:28:32 +0100 Subject: [PATCH 18/19] changed mjb to xml in RelocateEval --- myosuite/envs/myo/myochallenge/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/myosuite/envs/myo/myochallenge/__init__.py b/myosuite/envs/myo/myochallenge/__init__.py index 6f6d67b0..629c0950 100644 --- a/myosuite/envs/myo/myochallenge/__init__.py +++ b/myosuite/envs/myo/myochallenge/__init__.py @@ -46,7 +46,7 @@ entry_point='myosuite.envs.myo.myochallenge.relocate_v0:RelocateEnvV0', max_episode_steps=150, kwargs={ - 'model_path': curr_dir+'/../../../simhive/myo_sim/arm/myoarm_object_v0.16(mj237).mjb', + 'model_path': curr_dir + '/../assets/arm/myoarm_relocate.xml', 'normalize_act': True, 'frame_skip': 5, 'pos_th': 0.1, # cover entire base of the receptacle From 88ce7bdf56b312f86ef652d9da3cc968adcbed68 Mon Sep 17 00:00:00 2001 From: Pierre Schumacher Date: Mon, 1 Jan 2024 20:56:54 +0100 Subject: [PATCH 19/19] changed type hinting to typing module to pass python 3.8 tests --- myosuite/envs/myo/myochallenge/chasetag_v0.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/myosuite/envs/myo/myochallenge/chasetag_v0.py b/myosuite/envs/myo/myochallenge/chasetag_v0.py index 6ba5ebdd..79cc8c86 100644 --- a/myosuite/envs/myo/myochallenge/chasetag_v0.py +++ b/myosuite/envs/myo/myochallenge/chasetag_v0.py @@ -9,7 +9,7 @@ import pink import os from enum import Enum -from typing import Optional +from typing import Optional, Tuple from myosuite.envs.myo.base_v0 import BaseV0 from myosuite.envs.myo.myobase.walk_v0 import WalkEnvV0 @@ -40,10 +40,10 @@ class ChallengeOpponent: def __init__(self, sim, rng, - probabilities: tuple[float], + probabilities: Tuple[float], min_spawn_distance: float, - chase_vel_range: tuple[float], - random_vel_range: tuple[float], + chase_vel_range: Tuple[float], + random_vel_range: Tuple[float], dt=0.01, ): """ @@ -407,11 +407,11 @@ class RepellerChallengeOpponent(ChallengeOpponent): def __init__(self, sim, rng, - probabilities: tuple[float], + probabilities: Tuple[float], min_spawn_distance: float, - chase_vel_range: tuple[float], - random_vel_range: tuple[float], - repeller_vel_range: tuple[float], + chase_vel_range: Tuple[float], + random_vel_range: Tuple[float], + repeller_vel_range: Tuple[float], dt=0.01, ): """