diff --git a/docs/source/tutorials/7_Fatigue_Modeling.ipynb b/docs/source/tutorials/7_Fatigue_Modeling.ipynb index 7d0e54c7..3df31324 100644 --- a/docs/source/tutorials/7_Fatigue_Modeling.ipynb +++ b/docs/source/tutorials/7_Fatigue_Modeling.ipynb @@ -6,6 +6,8 @@ "metadata": {}, "outputs": [], "source": [ + "%env MUJOCO_GL=egl\n", + "import myosuite\n", "from myosuite.utils import gym\n", "import skvideo.io\n", "import numpy as np\n", @@ -13,9 +15,6 @@ "import pickle\n", "import time\n", "import os\n", - "\n", - "os.environ['MUJOCO_GL'] = 'egl'\n", - "\n", "import mujoco" ] }, @@ -107,9 +106,9 @@ "metadata": {}, "outputs": [], "source": [ - "envFatigue.muscle_fatigue.MF #percentage of fatigued motor units for each muscle\n", - "envFatigue.muscle_fatigue.MR #percentage of resting motor units for each muscle\n", - "envFatigue.muscle_fatigue.MA #percentage of active motor units for each muscle" + "envFatigue.unwrapped.muscle_fatigue.MF #percentage of fatigued motor units for each muscle\n", + "envFatigue.unwrapped.muscle_fatigue.MR #percentage of resting motor units for each muscle\n", + "envFatigue.unwrapped.muscle_fatigue.MA #percentage of active motor units for each muscle" ] }, { @@ -125,11 +124,11 @@ "metadata": {}, "outputs": [], "source": [ - "envFatigue.muscle_fatigue.set_RecoveryMultiplier(10)\n", - "envFatigue.muscle_fatigue.set_RecoveryCoefficient(0.0022)\n", - "envFatigue.muscle_fatigue.set_FatigueCoefficient(0.0146)\n", + "envFatigue.unwrapped.muscle_fatigue.set_RecoveryMultiplier(10)\n", + "envFatigue.unwrapped.muscle_fatigue.set_RecoveryCoefficient(0.0022)\n", + "envFatigue.unwrapped.muscle_fatigue.set_FatigueCoefficient(0.0146)\n", "\n", - "envFatigue.muscle_fatigue.r, envFatigue.muscle_fatigue.R, envFatigue.muscle_fatigue.F" + "envFatigue.unwrapped.muscle_fatigue.r, envFatigue.unwrapped.muscle_fatigue.R, envFatigue.unwrapped.muscle_fatigue.F" ] }, { @@ -162,7 +161,7 @@ "outputs": [], "source": [ "envFatigue.reset()\n", - "envFatigue.muscle_fatigue.MF, envFatigue.muscle_fatigue.MR, envFatigue.muscle_fatigue.MA" + "envFatigue.unwrapped.muscle_fatigue.MF, envFatigue.unwrapped.muscle_fatigue.MR, envFatigue.unwrapped.muscle_fatigue.MA" ] }, { @@ -178,10 +177,10 @@ "metadata": {}, "outputs": [], "source": [ - "envFatigue.set_fatigue_reset_random(True)\n", + "envFatigue.unwrapped.set_fatigue_reset_random(True)\n", "\n", "envFatigue.reset()\n", - "envFatigue.muscle_fatigue.MF, envFatigue.muscle_fatigue.MR, envFatigue.muscle_fatigue.MA" + "envFatigue.unwrapped.muscle_fatigue.MF, envFatigue.unwrapped.muscle_fatigue.MR, envFatigue.unwrapped.muscle_fatigue.MA" ] }, { @@ -197,8 +196,8 @@ "metadata": {}, "outputs": [], "source": [ - "envFatigue.set_fatigue_reset_random(False)\n", - "a = np.zeros(envFatigue.sim.model.nu,)\n", + "envFatigue.unwrapped.set_fatigue_reset_random(False)\n", + "a = np.zeros(envFatigue.unwrapped.sim.model.nu,)\n", "a[0] = 1\n", "\n", "envFatigue.reset()\n", @@ -210,7 +209,7 @@ "for i in range(10):\n", " next_o, r, done, *_, ifo = env.step(a) # take an action\n", "\n", - "env.last_ctrl, envFatigue.last_ctrl, envFatigue.muscle_fatigue.MF" + "env.unwrapped.last_ctrl, envFatigue.unwrapped.last_ctrl, envFatigue.unwrapped.muscle_fatigue.MF" ] }, { @@ -231,7 +230,7 @@ "data_store = []\n", "data_store_f = []\n", "for i in range(7*3): # 7 batches of 3 episodes, with 2 episodes of maximum muscle controls for some muscles followed by a resting episode (i.e., zero muscle controls) in each batch\n", - " a = np.zeros(env.sim.model.nu,)\n", + " a = np.zeros(env.unwrapped.sim.model.nu,)\n", " if i%3!=2:\n", " a[3:]=1\n", " else:\n", @@ -242,47 +241,47 @@ " next_f_o, r_f, done_F, *_, ifo_f = envFatigue.step(a) # take an action\n", " \n", " data_store.append({\"action\":a.copy(), \n", - " \"jpos\":env.sim.data.qpos.copy(), \n", - " \"mlen\":env.sim.data.actuator_length.copy(), \n", - " \"act\":env.sim.data.act.copy()})\n", + " \"jpos\":env.unwrapped.sim.data.qpos.copy(), \n", + " \"mlen\":env.unwrapped.sim.data.actuator_length.copy(), \n", + " \"act\":env.unwrapped.sim.data.act.copy()})\n", " data_store_f.append({\"action\":a.copy(), \n", - " \"jpos\":envFatigue.sim.data.qpos.copy(), \n", - " \"mlen\":envFatigue.sim.data.actuator_length.copy(),\n", - " \"MF\":envFatigue.muscle_fatigue.MF.copy(),\n", - " \"MR\":envFatigue.muscle_fatigue.MR.copy(),\n", - " \"MA\":envFatigue.muscle_fatigue.MA.copy(), \n", - " \"act\":envFatigue.sim.data.act.copy()})\n", + " \"jpos\":envFatigue.unwrapped.sim.data.qpos.copy(), \n", + " \"mlen\":envFatigue.unwrapped.sim.data.actuator_length.copy(),\n", + " \"MF\":envFatigue.unwrapped.muscle_fatigue.MF.copy(),\n", + " \"MR\":envFatigue.unwrapped.muscle_fatigue.MR.copy(),\n", + " \"MA\":envFatigue.unwrapped.muscle_fatigue.MA.copy(), \n", + " \"act\":envFatigue.unwrapped.sim.data.act.copy()})\n", "\n", "env.close()\n", "envFatigue.close()\n", "\n", - "muscle_names = [env.sim.model.id2name(i, \"actuator\") for i in range(env.sim.model.nu) if env.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", + "muscle_names = [env.unwrapped.sim.model.id2name(i, \"actuator\") for i in range(env.unwrapped.sim.model.nu) if env.unwrapped.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", "muscle_id = -1\n", "\n", "plt.figure(figsize=(12, 6))\n", "plt.subplot(221)\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['act'][muscle_id] for d in data_store]), label=\"Normal model/Desired activations\")\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['act'][muscle_id] for d in data_store_f]), label='Fatigued model')\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['act'][muscle_id] for d in data_store]), label=\"Normal model/Desired activations\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['act'][muscle_id] for d in data_store_f]), label='Fatigued model')\n", "plt.legend()\n", "plt.title(f'Muscle activations over time ({muscle_names[muscle_id]})')\n", "plt.xlabel('time (s)'),plt.ylabel('act')\n", "\n", "plt.subplot(222)\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['jpos'] for d in data_store]), label=\"Normal model\")\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['jpos'] for d in data_store_f]), label=\"Fatigued model\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['jpos'] for d in data_store]), label=\"Normal model\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['jpos'] for d in data_store_f]), label=\"Fatigued model\")\n", "plt.legend()\n", "plt.title('Joint angle over time')\n", "plt.xlabel('time (s)'),plt.ylabel('angle')\n", "\n", "plt.subplot(223)\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['mlen'][muscle_id] for d in data_store]), label=\"Normal model\")\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['mlen'][muscle_id] for d in data_store_f]), label=\"Fatigued model\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['mlen'][muscle_id] for d in data_store]), label=\"Normal model\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['mlen'][muscle_id] for d in data_store_f]), label=\"Fatigued model\")\n", "plt.legend()\n", "plt.title(f'Muscle lengths over time ({muscle_names[muscle_id]})')\n", "plt.xlabel('time (s)'),plt.ylabel('muscle length')\n", "\n", "plt.subplot(224)\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['MF'][muscle_id] for d in data_store_f]), color=\"tab:orange\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['MF'][muscle_id] for d in data_store_f]), color=\"tab:orange\")\n", "plt.title(f'Fatigued motor units over time ({muscle_names[muscle_id]})')\n", "plt.xlabel('time (s)'),plt.ylabel('%MVC')\n", "\n", @@ -301,7 +300,7 @@ "data_store = []\n", "data_store_f = []\n", "for i in range(2*3): # 2 batches of 3 episodes, with 0.5*MVC in first and 1*MVC in second episode, followed by a resting episode (i.e., zero muscle controls) in each batch\n", - " a = np.zeros(env.sim.model.nu,)\n", + " a = np.zeros(env.unwrapped.sim.model.nu,)\n", " if i%3==0:\n", " a[3:]=0.5\n", " elif i%3==1:\n", @@ -314,47 +313,47 @@ " next_f_o, r_f, done_F, *_, ifo_f = envFatigue.step(a) # take an action\n", " \n", " data_store.append({\"action\":a.copy(), \n", - " \"jpos\":env.sim.data.qpos.copy(), \n", - " \"mlen\":env.sim.data.actuator_length.copy(), \n", - " \"act\":env.sim.data.act.copy()})\n", + " \"jpos\":env.unwrapped.sim.data.qpos.copy(), \n", + " \"mlen\":env.unwrapped.sim.data.actuator_length.copy(), \n", + " \"act\":env.unwrapped.sim.data.act.copy()})\n", " data_store_f.append({\"action\":a.copy(), \n", - " \"jpos\":envFatigue.sim.data.qpos.copy(), \n", - " \"mlen\":envFatigue.sim.data.actuator_length.copy(),\n", - " \"MF\":envFatigue.muscle_fatigue.MF.copy(),\n", - " \"MR\":envFatigue.muscle_fatigue.MR.copy(),\n", - " \"MA\":envFatigue.muscle_fatigue.MA.copy(),\n", - " \"act\":envFatigue.sim.data.act.copy()})\n", + " \"jpos\":envFatigue.unwrapped.sim.data.qpos.copy(), \n", + " \"mlen\":envFatigue.unwrapped.sim.data.actuator_length.copy(),\n", + " \"MF\":envFatigue.unwrapped.muscle_fatigue.MF.copy(),\n", + " \"MR\":envFatigue.unwrapped.muscle_fatigue.MR.copy(),\n", + " \"MA\":envFatigue.unwrapped.muscle_fatigue.MA.copy(),\n", + " \"act\":envFatigue.unwrapped.sim.data.act.copy()})\n", "\n", "env.close()\n", "envFatigue.close()\n", "\n", - "muscle_names = [env.sim.model.id2name(i, \"actuator\") for i in range(env.sim.model.nu) if env.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", + "muscle_names = [env.unwrapped.sim.model.id2name(i, \"actuator\") for i in range(env.unwrapped.sim.model.nu) if env.unwrapped.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", "muscle_id = -1\n", "\n", "plt.figure(figsize=(12, 6))\n", "plt.subplot(221)\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['act'][muscle_id] for d in data_store]), label=\"Normal model/Desired activations\")\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['act'][muscle_id] for d in data_store_f]), label='Fatigued model')\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['act'][muscle_id] for d in data_store]), label=\"Normal model/Desired activations\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['act'][muscle_id] for d in data_store_f]), label='Fatigued model')\n", "plt.legend()\n", "plt.title(f'Muscle activations over time ({muscle_names[muscle_id]})')\n", "plt.xlabel('time (s)'),plt.ylabel('act')\n", "\n", "plt.subplot(222)\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['jpos'] for d in data_store]), label=\"Normal model\")\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['jpos'] for d in data_store_f]), label=\"Fatigued model\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['jpos'] for d in data_store]), label=\"Normal model\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['jpos'] for d in data_store_f]), label=\"Fatigued model\")\n", "plt.legend()\n", "plt.title('Joint angle over time')\n", "plt.xlabel('time (s)'),plt.ylabel('angle')\n", "\n", "plt.subplot(223)\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['mlen'][muscle_id] for d in data_store]), label=\"Normal model\")\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['mlen'][muscle_id] for d in data_store_f]), label=\"Fatigued model\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['mlen'][muscle_id] for d in data_store]), label=\"Normal model\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['mlen'][muscle_id] for d in data_store_f]), label=\"Fatigued model\")\n", "plt.legend()\n", "plt.title(f'Muscle lengths over time ({muscle_names[muscle_id]})')\n", "plt.xlabel('time (s)'),plt.ylabel('muscle length')\n", "\n", "plt.subplot(224)\n", - "plt.plot(env.dt*np.arange(len(data_store)), np.array([d['MF'][muscle_id] for d in data_store_f]), color=\"tab:orange\")\n", + "plt.plot(env.unwrapped.dt*np.arange(len(data_store)), np.array([d['MF'][muscle_id] for d in data_store_f]), color=\"tab:orange\")\n", "plt.title(f'Fatigued motor units over time ({muscle_names[muscle_id]})')\n", "plt.xlabel('time (s)'),plt.ylabel('%MVC')\n", "\n", @@ -369,78 +368,52 @@ "## Train Agents with Fatigue" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**NOTE:** By default, random fatigue states are sampled at the beginning of each training episode. \\\n", - "To start with a specific, fixed fatigue state, set `fatigue_reset_random=False` and define `fatigue_reset_vec` as the vector MF of fatigued motor units per muscle.\n", - "\n", - "Best practice is to create a new of the desired environment, i.e., calling `register_env_variant()` with\n", - "`variants={'muscle_condition': 'fatigue',\n", - " 'fatigue_reset_vec': np.array([0., 0., 0.]),\n", - " 'fatigue_reset_random': False}`.\n" - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from mjrl.utils.gym_env import GymEnv\n", - "from mjrl.policies.gaussian_mlp import MLP\n", - "from mjrl.baselines.mlp_baseline import MLPBaseline\n", - "from mjrl.algos.npg_cg import NPG\n", - "from mjrl.utils.train_agent import train_agent" + "from stable_baselines3 import PPO\n", + "from stable_baselines3.common.callbacks import CheckpointCallback" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "env_name = \"myoFatiElbowPose1D6MRandom-v0\"\n", "\n", "env = gym.make(env_name)\n", - "env.set_fatigue_reset_random(True)\n", + "env.unwrapped.set_fatigue_reset_random(True)\n", "env.reset()\n", "\n", - "policy_size = (32, 32)\n", - "vf_hidden_size = (128, 128)\n", - "seed = 123\n", - "rl_step_size = 0.1\n", - "e = GymEnv(env)\n", - "\n", - "policy = MLP(e.spec, hidden_sizes=policy_size, seed=seed, init_log_std=-0.25, min_log_std=-1.0)\n", - "\n", - "baseline = MLPBaseline(e.spec, reg_coef=1e-3, batch_size=64, hidden_sizes=vf_hidden_size, \\\n", - " epochs=2, use_gpu=True, learn_rate=1e-3)\n", + "# Save a checkpoint every 1000 steps\n", + "checkpoint_callback = CheckpointCallback(\n", + " save_freq=50000,\n", + " save_path=f\"./{env_name}/iterations/\",\n", + " name_prefix=\"rl_model\",\n", + " save_replay_buffer=True,\n", + " save_vecnormalize=True,\n", + ")\n", "\n", - "agent = NPG(e, policy, baseline, normalized_step_size=rl_step_size, \\\n", - " seed=seed, save_logs=True)\n", - "\n", - "print(\"========================================\")\n", - "print(\"Starting policy learning\")\n", - "print(\"========================================\")\n", - "\n", - "train_agent(job_name=f'./{env_name}',\n", - " agent=agent,\n", - " seed=seed,\n", - " niter=200000,\n", - " gamma=0.995,\n", - " gae_lambda=0.97,\n", - " num_cpu=8,\n", - " sample_mode=\"trajectories\",\n", - " num_traj=96,\n", - " num_samples=0,\n", - " save_freq=500,\n", - " evaluation_rollouts=10)\n", + "model = PPO(\"MlpPolicy\", env, verbose=0)\n", + "model.learn(total_timesteps=200000, callback=checkpoint_callback)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**NOTE:** By default, random fatigue states are sampled at the beginning of each training episode. \\\n", + "To start with a specific, fixed fatigue state, set `fatigue_reset_random=False` and define `fatigue_reset_vec` as the vector MF of fatigued motor units per muscle.\n", "\n", - "print(\"========================================\")\n", - "print(\"Job Finished.\") \n", - "print(\"========================================\")" + "Best practice is to create a new of the desired environment, i.e., calling `register_env_variant()` with\n", + "`variants={'muscle_condition': 'fatigue',\n", + " 'fatigue_reset_vec': np.array([0., 0., 0.]),\n", + " 'fatigue_reset_random': False}`.\n" ] }, { @@ -485,21 +458,21 @@ "\n", "env = gym.make(env_name)\n", "\n", - "policy = f\"{env_name}/iterations/best_policy.pickle\"\n", - "pi = pickle.load(open(policy, 'rb'))\n", + "from stable_baselines3 import PPO\n", + "model = PPO.load(f\"{env_name}/iterations/rl_model_200000_steps\")\n", "\n", - "env.set_fatigue_reset_random(False)\n", + "env.unwrapped.set_fatigue_reset_random(False)\n", "env.reset(fatigue_reset=True) #ensure that fatigue is reset before the simulation starts\n", "\n", - "env.sim.model.cam_poscom0[0]= np.array([-1.3955, -0.3287, 0.6579])\n", + "env.unwrapped.sim.model.cam_poscom0[0]= np.array([-1.3955, -0.3287, 0.6579])\n", "\n", "data_store = []\n", "if GENERATE_VIDEO:\n", " frames = []\n", "\n", - "env.env.target_jnt_value = env.target_jnt_range[:, 1]\n", - "env.env.target_type = 'fixed'\n", - "env.env.update_target(restore_sim=True)\n", + "env.unwrapped.target_jnt_value = env.unwrapped.target_jnt_range[:, 1]\n", + "env.unwrapped.target_type = 'fixed'\n", + "env.unwrapped.update_target(restore_sim=True)\n", "\n", "start_time = time.time()\n", "for ep in range(n_eps):\n", @@ -507,30 +480,30 @@ " \n", " for _cstep in range(env.spec.max_episode_steps):\n", " if GENERATE_VIDEO and (ep in range(GENERATE_VIDEO_EPS) or ep in range(n_eps-GENERATE_VIDEO_EPS, n_eps)):\n", - " frame = env.sim.renderer.render_offscreen(width=400, height=400, camera_id=0)\n", + " frame = env.unwrapped.sim.renderer.render_offscreen(width=400, height=400, camera_id=0)\n", " \n", " # Add text overlay\n", - " _current_time = (ep*env.spec.max_episode_steps + _cstep)*env.dt\n", + " _current_time = (ep*env.spec.max_episode_steps + _cstep)*env.unwrapped.dt\n", " frame = np.array(add_text_to_frame(frame,\n", " f\"t={str(int(_current_time//60)).zfill(2)}:{str(int(_current_time%60)).zfill(2)}min\",\n", " pos=(285, 3), color=(0, 0, 0), fontsize=18))\n", " \n", " frames.append(frame)\n", - " o = env.get_obs()\n", - " a = pi.get_action(o)[0]\n", - " next_o, r, done, ifo = env.step(a) # take an action based on the current observation\n", + " o = env.unwrapped.get_obs()\n", + " a = model.predict(o)[0]\n", + " next_o, r, done, _, ifo = env.step(a) # take an action based on the current observation\n", "\n", " data_store.append({\"action\":a.copy(), \n", - " \"jpos\":env.sim.data.qpos.copy(), \n", - " \"mlen\":env.sim.data.actuator_length.copy(), \n", - " \"act\":env.sim.data.act.copy(),\n", + " \"jpos\":env.unwrapped.sim.data.qpos.copy(), \n", + " \"mlen\":env.unwrapped.sim.data.actuator_length.copy(), \n", + " \"act\":env.unwrapped.sim.data.act.copy(),\n", " \"reward\":r,\n", - " \"solved\":env.rwd_dict['solved'].item(),\n", - " \"pose_err\":env.get_obs_dict(env.sim)[\"pose_err\"],\n", - " \"MA\":env.muscle_fatigue.MA.copy(),\n", - " \"MR\":env.muscle_fatigue.MR.copy(),\n", - " \"MF\":env.muscle_fatigue.MF.copy(),\n", - " \"ctrl\":env.last_ctrl.copy()})\n", + " \"solved\":env.unwrapped.rwd_dict['solved'].item(),\n", + " \"pose_err\":env.unwrapped.get_obs_dict(env.unwrapped.sim)[\"pose_err\"],\n", + " \"MA\":env.unwrapped.muscle_fatigue.MA.copy(),\n", + " \"MR\":env.unwrapped.muscle_fatigue.MR.copy(),\n", + " \"MF\":env.unwrapped.muscle_fatigue.MF.copy(),\n", + " \"ctrl\":env.unwrapped.last_ctrl.copy()})\n", "env.close()\n", "\n", "## OPTIONALLY: Stored simulated data\n", @@ -542,7 +515,7 @@ "if GENERATE_VIDEO:\n", " os.makedirs(f'{env_name}/videos', exist_ok=True)\n", " # make a local copy\n", - " skvideo.io.vwrite(f'{env_name}/videos/fatitest.mp4', np.asarray(frames),inputdict={'-r': str(int(1/env.dt))},outputdict={\"-pix_fmt\": \"yuv420p\"})\n", + " skvideo.io.vwrite(f'{env_name}/videos/fatitest.mp4', np.asarray(frames),inputdict={'-r': str(int(1/env.unwrapped.dt))},outputdict={\"-pix_fmt\": \"yuv420p\"})\n", "\n", "end_time = time.time()\n", "print(f\"DURATION: {end_time - start_time:.2f}s\")\n", @@ -562,8 +535,8 @@ "####################\n", "\n", "env_test = gym.make(env_name, normalize_act=False)\n", - "muscle_names = [env_test.sim.model.id2name(i, \"actuator\") for i in range(env_test.sim.model.nu) if env_test.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", - "_env_dt = env_test.dt #0.02\n", + "muscle_names = [env_test.unwrapped.sim.model.id2name(i, \"actuator\") for i in range(env_test.unwrapped.sim.model.nu) if env_test.unwrapped.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", + "_env_dt = env_test.unwrapped.dt #0.02\n", "\n", "data_store = np.load(f\"{env_name}/logs/fatitest.npy\", allow_pickle=True)\n", "\n", @@ -626,18 +599,18 @@ "policy = \"../../../myosuite/agents/baslines_NPG/myoElbowPose1D6MRandom-v0/2022-02-26_21-16-27/33_env=myoElbowPose1D6MRandom-v0,seed=1/iterations/best_policy.pickle\"\n", "pi = pickle.load(open(policy, 'rb'))\n", "\n", - "env.set_fatigue_reset_random(False)\n", + "env.unwrapped.set_fatigue_reset_random(False)\n", "env.reset(fatigue_reset=True) #ensure that fatigue is reset before the simulation starts\n", "\n", - "env.sim.model.cam_poscom0[0]= np.array([-1.3955, -0.3287, 0.6579])\n", + "env.unwrapped.sim.model.cam_poscom0[0]= np.array([-1.3955, -0.3287, 0.6579])\n", "\n", "data_store = []\n", "if GENERATE_VIDEO:\n", " frames = []\n", "\n", - "env.env.target_jnt_value = env.target_jnt_range[:, 1]\n", - "env.env.target_type = 'fixed'\n", - "env.env.update_target(restore_sim=True)\n", + "env.unwrapped.target_jnt_value = env.unwrapped.target_jnt_range[:, 1]\n", + "env.unwrapped.target_type = 'fixed'\n", + "env.unwrapped.update_target(restore_sim=True)\n", "\n", "start_time = time.time()\n", "for ep in range(n_eps):\n", @@ -645,30 +618,30 @@ "\n", " for _cstep in range(env.spec.max_episode_steps):\n", " if GENERATE_VIDEO and (ep in range(GENERATE_VIDEO_EPS) or ep in range(n_eps-GENERATE_VIDEO_EPS, n_eps)):\n", - " frame = env.sim.renderer.render_offscreen(width=400, height=400, camera_id=0)\n", + " frame = env.unwrapped.sim.renderer.render_offscreen(width=400, height=400, camera_id=0)\n", " \n", " # Add text overlay\n", - " _current_time = (ep*env.spec.max_episode_steps + _cstep)*env.dt\n", + " _current_time = (ep*env.spec.max_episode_steps + _cstep)*env.unwrapped.dt\n", " frame = np.array(add_text_to_frame(frame,\n", " f\"t={str(int(_current_time//60)).zfill(2)}:{str(int(_current_time%60)).zfill(2)}min\",\n", " pos=(285, 3), color=(0, 0, 0), fontsize=18))\n", " \n", " frames.append(frame)\n", - " o = env.get_obs()\n", + " o = env.unwrapped.get_obs()\n", " a = pi.get_action(o)[0]\n", - " next_o, r, done, ifo = env.step(a) # take an action based on the current observation\n", + " next_o, r, done, _, ifo = env.step(a) # take an action based on the current observation\n", "\n", " data_store.append({\"action\":a.copy(), \n", - " \"jpos\":env.sim.data.qpos.copy(), \n", - " \"mlen\":env.sim.data.actuator_length.copy(), \n", - " \"act\":env.sim.data.act.copy(),\n", + " \"jpos\":env.unwrapped.sim.data.qpos.copy(), \n", + " \"mlen\":env.unwrapped.sim.data.actuator_length.copy(), \n", + " \"act\":env.unwrapped.sim.data.act.copy(),\n", " \"reward\":r,\n", - " \"solved\":env.rwd_dict['solved'].item(),\n", - " \"pose_err\":env.get_obs_dict(env.sim)[\"pose_err\"],\n", - " \"MA\":env.muscle_fatigue.MA.copy(),\n", - " \"MR\":env.muscle_fatigue.MR.copy(),\n", - " \"MF\":env.muscle_fatigue.MF.copy(),\n", - " \"ctrl\":env.last_ctrl.copy()})\n", + " \"solved\":env.unwrapped.rwd_dict['solved'].item(),\n", + " \"pose_err\":env.unwrapped.get_obs_dict(env.unwrapped.sim)[\"pose_err\"],\n", + " \"MA\":env.unwrapped.muscle_fatigue.MA.copy(),\n", + " \"MR\":env.unwrapped.muscle_fatigue.MR.copy(),\n", + " \"MF\":env.unwrapped.muscle_fatigue.MF.copy(),\n", + " \"ctrl\":env.unwrapped.last_ctrl.copy()})\n", "env.close()\n", "\n", "## OPTIONALLY: Stored simulated data\n", @@ -680,7 +653,7 @@ "if GENERATE_VIDEO:\n", " os.makedirs(f'{env_name}/videos', exist_ok=True)\n", " # make a local copy\n", - " skvideo.io.vwrite(f'{env_name}/videos/fatitest_trained_wo_fatigue.mp4', np.asarray(frames),inputdict={'-r': str(int(1/env.dt))},outputdict={\"-pix_fmt\": \"yuv420p\"})\n", + " skvideo.io.vwrite(f'{env_name}/videos/fatitest_trained_wo_fatigue.mp4', np.asarray(frames),inputdict={'-r': str(int(1/env.unwrapped.dt))},outputdict={\"-pix_fmt\": \"yuv420p\"})\n", "\n", "end_time = time.time()\n", "print(f\"DURATION: {end_time - start_time:.2f}s\")\n", @@ -700,8 +673,8 @@ "####################\n", "\n", "env_test = gym.make(env_name, normalize_act=False)\n", - "muscle_names = [env_test.sim.model.id2name(i, \"actuator\") for i in range(env_test.sim.model.nu) if env_test.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", - "_env_dt = env_test.dt #0.02\n", + "muscle_names = [env_test.unwrapped.sim.model.id2name(i, \"actuator\") for i in range(env_test.unwrapped.sim.model.nu) if env_test.unwrapped.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", + "_env_dt = env_test.unwrapped.dt #0.02\n", "\n", "data_store = np.load(f\"{env_name}/logs/fatitest_trained_wo_fatigue.npy\", allow_pickle=True)\n", "\n", @@ -768,21 +741,21 @@ "\n", "env = gym.make(env_name)\n", "\n", - "policy = f\"{env_name}/iterations/best_policy.pickle\"\n", - "pi = pickle.load(open(policy, 'rb'))\n", + "from stable_baselines3 import PPO\n", + "model = PPO.load(f\"{env_name}/iterations/rl_model_200000_steps\")\n", "\n", - "env.set_fatigue_reset_random(False)\n", + "env.unwrapped.set_fatigue_reset_random(False)\n", "env.reset(fatigue_reset=True) #ensure that fatigue is reset before the simulation starts\n", "\n", - "env.sim.model.cam_poscom0[0]= np.array([-1.3955, -0.3287, 0.6579])\n", + "env.unwrapped.sim.model.cam_poscom0[0]= np.array([-1.3955, -0.3287, 0.6579])\n", "\n", "data_store = []\n", "if GENERATE_VIDEO:\n", " frames = []\n", "\n", - "env.env.target_jnt_value = env.target_jnt_range[:, 1]\n", - "env.env.target_type = 'fixed'\n", - "env.env.update_target(restore_sim=True)\n", + "env.unwrapped.target_jnt_value = env.unwrapped.target_jnt_range[:, 1]\n", + "env.unwrapped.target_type = 'fixed'\n", + "env.unwrapped.update_target(restore_sim=True)\n", "\n", "start_time = time.time()\n", "for ep in range(n_eps):\n", @@ -790,10 +763,10 @@ " \n", " for _cstep in range(env.spec.max_episode_steps):\n", " if GENERATE_VIDEO and (ep in range(GENERATE_VIDEO_EPS) or ep in range(n_eps-GENERATE_VIDEO_EPS, n_eps)):\n", - " frame = env.sim.renderer.render_offscreen(width=480, height=480, camera_id=0)\n", + " frame = env.unwrapped.sim.renderer.render_offscreen(width=480, height=480, camera_id=0)\n", " \n", " # Add text overlay\n", - " _current_time = (ep*env.spec.max_episode_steps + _cstep)*env.dt\n", + " _current_time = (ep*env.spec.max_episode_steps + _cstep)*env.unwrapped.dt\n", " frame = np.array(add_text_to_frame(frame,\n", " f\"t={str(int(_current_time//60)).zfill(2)}:{str(int(_current_time%60)).zfill(2)}min\",\n", " pos=(365, 3), color=(0, 0, 0), fontsize=18))\n", @@ -804,30 +777,30 @@ " pos=(320, 450), color=(84, 184, 81), fontsize=18))\n", "\n", " frames.append(frame)\n", - " o = env.get_obs()\n", - " a = pi.get_action(o)[0]\n", + " o = env.unwrapped.get_obs()\n", + " a = model.predict(o)[0]\n", "\n", " if ep >= n_eps*0.5 and ep < n_eps*0.75:\n", " a[:] = -100000 #resting period (corresponds to zero muscle activations)\n", - " env.sim.model.site_rgba[env.target_sids[0]][-1] = 0 #hide target during resting period\n", - " env.sim.model.tendon_rgba[-1][-1] = 0 #hide error line during resting period\n", + " env.unwrapped.sim.model.site_rgba[env.unwrapped.target_sids[0]][-1] = 0 #hide target during resting period\n", + " env.unwrapped.sim.model.tendon_rgba[-1][-1] = 0 #hide error line during resting period\n", " else:\n", - " env.sim.model.site_rgba[env.target_sids[0]][-1] = 0.2 #visualise target during task\n", - " env.sim.model.tendon_rgba[-1][-1] = 0.2 #visualise error line during task\n", + " env.unwrapped.sim.model.site_rgba[env.unwrapped.target_sids[0]][-1] = 0.2 #visualise target during task\n", + " env.unwrapped.sim.model.tendon_rgba[-1][-1] = 0.2 #visualise error line during task\n", "\n", - " next_o, r, done, ifo = env.step(a) # take an action based on the current observation\n", + " next_o, r, done, _, ifo = env.step(a) # take an action based on the current observation\n", "\n", " data_store.append({\"action\":a.copy(), \n", - " \"jpos\":env.sim.data.qpos.copy(), \n", - " \"mlen\":env.sim.data.actuator_length.copy(), \n", - " \"act\":env.sim.data.act.copy(),\n", + " \"jpos\":env.unwrapped.sim.data.qpos.copy(), \n", + " \"mlen\":env.unwrapped.sim.data.actuator_length.copy(), \n", + " \"act\":env.unwrapped.sim.data.act.copy(),\n", " \"reward\":r,\n", - " \"solved\":env.rwd_dict['solved'].item(),\n", - " \"pose_err\":env.get_obs_dict(env.sim)[\"pose_err\"],\n", - " \"MA\":env.muscle_fatigue.MA.copy(),\n", - " \"MR\":env.muscle_fatigue.MR.copy(),\n", - " \"MF\":env.muscle_fatigue.MF.copy(),\n", - " \"ctrl\":env.last_ctrl.copy()})\n", + " \"solved\":env.unwrapped.rwd_dict['solved'].item(),\n", + " \"pose_err\":env.unwrapped.get_obs_dict(env.unwrapped.sim)[\"pose_err\"],\n", + " \"MA\":env.unwrapped.muscle_fatigue.MA.copy(),\n", + " \"MR\":env.unwrapped.muscle_fatigue.MR.copy(),\n", + " \"MF\":env.unwrapped.muscle_fatigue.MF.copy(),\n", + " \"ctrl\":env.unwrapped.last_ctrl.copy()})\n", "env.close()\n", "\n", "## OPTIONALLY: Stored simulated data\n", @@ -839,7 +812,7 @@ "if GENERATE_VIDEO:\n", " os.makedirs(f'{env_name}/videos', exist_ok=True)\n", " # make a local copy\n", - " skvideo.io.vwrite(f'{env_name}/videos/fatitest_recovery.mp4', np.asarray(frames),inputdict={'-r': str(int(1/env.dt))},outputdict={\"-pix_fmt\": \"yuv420p\"})\n", + " skvideo.io.vwrite(f'{env_name}/videos/fatitest_recovery.mp4', np.asarray(frames),inputdict={'-r': str(int(1/env.unwrapped.dt))},outputdict={\"-pix_fmt\": \"yuv420p\"})\n", "\n", "end_time = time.time()\n", "print(f\"DURATION: {end_time - start_time:.2f}s\")\n", @@ -859,8 +832,8 @@ "####################\n", "\n", "env_test = gym.make(env_name, normalize_act=False)\n", - "muscle_names = [env_test.sim.model.id2name(i, \"actuator\") for i in range(env_test.sim.model.nu) if env_test.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", - "_env_dt = env_test.dt #0.02\n", + "muscle_names = [env_test.unwrapped.sim.model.id2name(i, \"actuator\") for i in range(env_test.unwrapped.sim.model.nu) if env_test.unwrapped.sim.model.actuator_dyntype[i] == mujoco.mjtDyn.mjDYN_MUSCLE]\n", + "_env_dt = env_test.unwrapped.dt #0.02\n", "\n", "data_store = np.load(f\"{env_name}/logs/fatitest_recovery.npy\", allow_pickle=True)\n", "\n",