Skip to content

Commit

Permalink
Merge pull request #99 from MyoHub/dev
Browse files Browse the repository at this point in the history
MyoChallenge Phase 2
  • Loading branch information
Vittorio-Caggiano authored Oct 5, 2023
2 parents a6ad6f6 + b58e604 commit b25023e
Show file tree
Hide file tree
Showing 117 changed files with 619 additions and 103 deletions.
2 changes: 1 addition & 1 deletion docs/source/baselines.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ Launch training

DEP-RL baseline
```````````````
We provide `deprl <https://github.com/martius-lab/depRL>`_ as an additional baseline for locomotion policies. The controller was adapted from the original paper and produces robust locomotion policies with the MyoLeg through the use of a self-organizing exploration method.
We provide `deprl <https://github.com/martius-lab/depRL>`_ as an additional baseline for locomotion policies. You can find more detailed explanations and documentation on how to use it `here <https://deprl.readthedocs.io/en/latest/index.html>`__. The controller was adapted from the original paper and produces robust locomotion policies with the MyoLeg through the use of a self-organizing exploration method.
While DEP-RL can be used for any kind of RL task, we provide a pre-trained controller and training settings for the `myoLegWalk-v0` task.
See `this tutorial <https://github.com/facebookresearch/myosuite/blob/main/docs/source/tutorials/4a_deprl.ipynb>`_ for more detailed tutorials.

Expand Down
1 change: 1 addition & 0 deletions docs/source/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ When using ``mjrl`` it might be needed to resume training of a policy locally. I

Load DEP-RL Baseline
====================
See `here <https://deprl.readthedocs.io/en/latest/index.html>`__ for more detailed documentation of ``deprl``.

If you want to load and execute the pre-trained DEP-RL baseline. Make sure that the ``deprl`` package is installed.

Expand Down
10 changes: 5 additions & 5 deletions myosuite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
myosuite_env_suite = myosuite_env_suite | myosuite_myochal_suite
myosuite_myochal_suite = sorted(myosuite_myochal_suite)

# Register MyoDex Suite
# Register MyoDM Suite
import myosuite.envs.myo # noqa
import myosuite.envs.myo.myodex # noqa
myosuite_myodex_suite = set(gym.envs.registration.registry.env_specs.keys())-myosuite_env_suite-_current_gym_envs
myosuite_env_suite = myosuite_env_suite | myosuite_myodex_suite
myosuite_myodex_suite = sorted(myosuite_myodex_suite)
import myosuite.envs.myo.myodm # noqa
myosuite_myodm_suite = set(gym.envs.registration.registry.env_specs.keys())-myosuite_env_suite-_current_gym_envs
myosuite_env_suite = myosuite_env_suite | myosuite_myodm_suite
myosuite_myodm_suite = sorted(myosuite_myodm_suite)

# All myosuite Envs
myosuite_env_suite = sorted(myosuite_env_suite)
Expand Down
5 changes: 3 additions & 2 deletions myosuite/envs/env_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def get_env_infos(self):
- NOTE: Returned dict contains pointers that will be updated by the env. Deepcopy returned data if you want it to persist
- Essential keys are added below. Users can add more keys by overriding this function in their task-env
- Requires necessary keys (dense, sparse, solved, done) in rwd_dict to be populated
- Visual_dict can be {} if users hasn't explicitely updated it explicitely for current time
- Visual_dict can be {} if users hasn't explicitly updated it explicitly for current time
"""

# resolve if current visuals are available
Expand All @@ -455,7 +455,7 @@ def get_env_infos(self):
'solved': self.rwd_dict['solved'][()], # MDP(t)
'done': self.rwd_dict['done'][()], # MDP(t)
'obs_dict': self.obs_dict, # MDP(t)
'visual_dict': visual_dict, # MDP(t), will be {} if user hasn't explicitely updated self.visual_dict at the current time
'visual_dict': visual_dict, # MDP(t), will be {} if user hasn't explicitly updated self.visual_dict at the current time
'proprio_dict': self.proprio_dict, # MDP(t)
'rwd_dict': self.rwd_dict, # MDP(t)
'state': self.get_env_state(), # MDP(t)
Expand Down Expand Up @@ -657,6 +657,7 @@ def viewer_setup(self, distance=2.5, azimuth=90, elevation=-30, lookat=None, ren
)


# Methods on policy (should it be a part of utils?) =================================
def examine_policy(self,
policy,
horizon=1000,
Expand Down
Binary file added myosuite/envs/myo/assets/myo_relief.npy
Binary file not shown.
12 changes: 6 additions & 6 deletions myosuite/envs/myo/myobase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,17 +290,17 @@ def register_env_with_variants(id, entry_point, max_episode_steps, kwargs):
if sim_backend == SimBackend.MUJOCO_PY:
leg_model='/../../../simhive/myo_sim/leg/myolegs_v0.54(mj210).mjb'
elif sim_backend == SimBackend.MUJOCO:
leg_model='/../../../simhive/myo_sim/leg/myolegs_v0.55(mj236).mjb'
leg_model='/../../../simhive/myo_sim/leg/myolegs_v0.56(mj237).mjb'

register_env_with_variants(id='myoLegReachFixed-v0',

register_env_with_variants(id='myoLegStandRandom-v0',
entry_point='myosuite.envs.myo.myobase.walk_v0:ReachEnvV0',
max_episode_steps=150,
kwargs={
'model_path': curr_dir+leg_model,
'joint_random_range': (0.2, -0.2), #range of joint randomization (jnt = init_qpos + random(range)
'target_reach_range': {
# 'pelvis': ((-.05, -.05, .92), (0.05, 0.05, .92)),
'pelvis': ((-.005, -.005, .9), (0.005, 0.005, .9)),
# 'pelvis': ((-.005, -.005, .75), (0.005, 0.005, .9)),
'pelvis': ((-.05, -.05, 0), (0.05, 0.05, 0)),
},
'normalize_act': True,
'far_th': 0.44
Expand Down Expand Up @@ -377,7 +377,7 @@ def register_env_with_variants(id, entry_point, max_episode_steps, kwargs):
'target_y_vel':1.2, # desired y velocity in m/s
'target_rot': None, # if None then the initial root pos will be taken, otherwise provide quat
'terrain':'stairs',
'variant':'fixed'
'variant':'fixed',
}
)

Expand Down
38 changes: 32 additions & 6 deletions myosuite/envs/myo/myobase/walk_v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ def __init__(self, model_path, obsd_model_path=None, seed=None, **kwargs):

def _setup(self,
target_reach_range:dict,
joint_random_range:tuple=(0.0,0.0),
far_th = .35,
obs_keys:list = DEFAULT_OBS_KEYS,
weighted_reward_keys:dict = DEFAULT_RWD_KEYS_AND_WEIGHTS,
**kwargs,
):
self.far_th = far_th
self.target_reach_range = target_reach_range
self.joint_random_range = joint_random_range
super()._setup(obs_keys=obs_keys,
weighted_reward_keys=weighted_reward_keys,
sites=self.target_reach_range.keys(),
Expand Down Expand Up @@ -78,6 +80,7 @@ def get_obs_dict(self, sim):
obs_dict['reach_err'] = np.array(obs_dict['target_pos'])-np.array(obs_dict['tip_pos'])
return obs_dict


def get_reward_dict(self, obs_dict):
reach_dist = np.linalg.norm(obs_dict['reach_err'], axis=-1)
vel_dist = np.linalg.norm(obs_dict['qvel'], axis=-1)
Expand All @@ -87,7 +90,7 @@ def get_reward_dict(self, obs_dict):
near_th = len(self.tip_sids)*.050
rwd_dict = collections.OrderedDict((
# Optional Keys
('reach', -1.*reach_dist -10.*vel_dist),
('reach', 10.-1.*reach_dist -10.*vel_dist),
('bonus', 1.*(reach_dist<2*near_th) + 1.*(reach_dist<near_th)),
('act_reg', -100.*act_mag),
('penalty', -1.*(reach_dist>far_th)),
Expand All @@ -96,21 +99,44 @@ def get_reward_dict(self, obs_dict):
('solved', reach_dist<near_th),
('done', reach_dist > far_th),
))
# print(f"reach_dist:{reach_dist}, far_th:{far_th}")
rwd_dict['dense'] = np.sum([wt*rwd_dict[key] for key, wt in self.rwd_keys_wt.items()], axis=0)
return rwd_dict


# generate a valid target
def generate_target_pose(self):
def generate_targets(self):
for site, span in self.target_reach_range.items():
sid = self.sim.model.site_name2id(site+'_target')
self.sim.model.site_pos[sid] = self.np_random.uniform(low=span[0], high=span[1])
sid = self.sim.model.site_name2id(site)
sid_target = self.sim.model.site_name2id(site+'_target')
self.sim.model.site_pos[sid_target] = self.sim.data.site_xpos[sid].copy() + self.np_random.uniform(low=span[0], high=span[1])
self.sim.forward()


# generate random qpos for targets (only at linear joints)
def generate_qpos(self):
qpos_rand = self.np_random.uniform(low= self.joint_random_range[0], high= self.joint_random_range[1], size=self.init_qpos.shape)
qpos_new = self.init_qpos.copy()
qpos_new[self.sim.model.jnt_qposadr] += qpos_rand[self.sim.model.jnt_qposadr] # only linear joints
qpos_new[self.sim.model.jnt_qposadr] = np.clip(qpos_new[self.sim.model.jnt_qposadr], self.sim.model.jnt_range[:,0], self.sim.model.jnt_range[:,1])
return qpos_new


def reset(self):
self.generate_target_pose()
# generate random targets
if np.ptp(self.joint_random_range)>0:
self.sim.data.qpos = self.generate_qpos()
self.sim.forward()
self.generate_targets()

# sync targets to sim_obsd
self.robot.sync_sims(self.sim, self.sim_obsd)
obs = super().reset()

# generate resets
if np.ptp(self.joint_random_range)>0:
obs = super().reset(reset_qpos= self.generate_qpos())
else:
obs = super().reset()
return obs

class WalkEnvV0(BaseV0):
Expand Down
56 changes: 50 additions & 6 deletions myosuite/envs/myo/myochallenge/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@


# MyoChallenge 2023 envs ==============================================
# MyoChallenge Manipulation
# MyoChallenge Manipulation P1
register(id='myoChallengeRelocateP1-v0',
entry_point='myosuite.envs.myo.myochallenge.relocate_v0:RelocateEnvV0',
max_episode_steps=150,
kwargs={
'model_path': curr_dir+'/../../../simhive/myo_sim/arm/myoarm_object_v0.14(mj236).mjb',
'model_path': curr_dir+'/../../../simhive/myo_sim/arm/myoarm_object_v0.16(mj237).mjb',
'normalize_act': True,
'frame_skip': 5,
'pos_th': 0.1, # cover entire base of the receptacle
Expand All @@ -21,17 +21,61 @@
}
)

# MyoChallenge Manipulation P2
register(id='myoChallengeRelocateP2-v0',
entry_point='myosuite.envs.myo.myochallenge.relocate_v0:RelocateEnvV0',
max_episode_steps=150,
kwargs={
'model_path': curr_dir+'/../../../simhive/myo_sim/arm/myoarm_object_v0.16(mj237).mjb',
'normalize_act': True,
'frame_skip': 5,
'pos_th': 0.1, # cover entire base of the receptacle
'rot_th': np.inf, # ignore rotation errors
'qpos_noise_range':0.01, # jnt initialization range
'target_xyz_range': {'high':[0.3, -.45, 0.9], 'low':[0.0, -.1, 1.05]},
'target_rxryrz_range': {'high':[-.2, -.2, -.2], 'low':[0.2, 0.2, 0.2]},
'obj_xyz_range': {'high':[0.1, -.15, 1.0], 'low':[-0.1, -.35, 1.0]},
'obj_geom_range': {'high':[.025, .025, .025], 'low':[.015, 0.015, 0.015]},
'obj_mass_range': {'high':0.200, 'low':0.050},# 50gms to 250 gms
'obj_friction_range': {'high':[1.2, 0.006, 0.00012], 'low':[0.8, 0.004, 0.00008]}
}
)


# MyoChallenge Locomotion
## MyoChallenge Locomotion P1
register(id='myoChallengeChaseTagP1-v0',
entry_point='myosuite.envs.myo.myochallenge.chasetag_v0:ChaseTagEnvV0',
max_episode_steps=2000,
kwargs={
'model_path': curr_dir+'/../../../simhive/myo_sim/leg/myolegs_chasetag_v0.10(mj236).mjb',
'model_path': curr_dir+'/../../../simhive/myo_sim/leg/myolegs_chasetag_v0.11(mj237).mjb',
'normalize_act': True,
'win_distance': 0.5,
'min_spawn_distance': 2,
'reset_type': 'init', # none, init, random
'terrain': 'flat',
'task_choice': 'chase',
'hills_range': (0.0, 0.0),
'rough_range': (0.0, 0.0),
'relief_range': (0.0, 0.0),
}
)


# MyoChallenge Locomotion P2
register(id='myoChallengeChaseTagP2-v0',
entry_point='myosuite.envs.myo.myochallenge.chasetag_v0:ChaseTagEnvV0',
max_episode_steps=2000,
kwargs={
'model_path': curr_dir+'/../../../simhive/myo_sim/leg/myolegs_chasetag_v0.11(mj237).mjb',
'normalize_act': True,
'reset_type':'init', # none, init, random
'win_distance': 0.5,
'min_spawn_distance': 2
'min_spawn_distance': 2,
'reset_type': 'random', # none, init, random
'terrain': 'random', # flat, random
'task_choice': 'random', # chase, evade, random
'hills_range': (0.03, 0.23),
'rough_range': (0.05, 0.1),
'relief_range': (0.1, 0.3),
}
)

Expand Down
Loading

0 comments on commit b25023e

Please sign in to comment.