Quad 2d with 5 states and attitude ctrl

utiasDSL · Jul 17, 2024 · bfd0d31 · bfd0d31
1 parent 5e980ec
commit bfd0d31
Show file tree

Hide file tree

Showing 12 changed files with 269 additions and 35 deletions.
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
@@ -60,7 +60,7 @@ task_config:
   obs_goal_horizon: 1
 
   # RL Reward
-  rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.5]
+  rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.1]
   rew_act_weight: 0.1
   rew_exponential: True
 

diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/ppo_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/ppo_quadrotor_2D_attitude_5s.yaml
@@ -0,0 +1,28 @@
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+
+  # loss args
+  use_gae: True
+  entropy_coef: 0.01
+
+  # optim args
+  opt_epochs: 20
+  mini_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+
+  # runner args
+  max_env_steps: 480000
+  rollout_batch_size: 4
+  rollout_steps: 600
+  eval_batch_size: 50
+
+  # misc
+  log_interval: 9600
+  save_interval: 480000
+  num_checkpoints: 0
+  eval_interval: 9600
+  eval_save_best: True
+  tensorboard: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/quadrotor_2D_attitude_5s_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/quadrotor_2D_attitude_5s_track.yaml
@@ -0,0 +1,74 @@
+task_config:
+  seed: 1337
+  info_in_reset: True
+  ctrl_freq: 60
+  pyb_freq: 1200
+  physics: pyb
+  quad_type: 5
+  normalized_rl_action_space: True
+
+  init_state:
+    init_x: 0
+    init_x_dot: 0
+    init_z: 1.15
+    init_z_dot: 0
+    init_theta: 0
+    init_theta_dot: 0
+  randomized_init: True
+  randomized_inertial_prop: False
+
+  init_state_randomization_info:
+    init_x:
+      distrib: 'uniform'
+      low: -0.01
+      high: 0.01
+    init_x_dot:
+      distrib: 'uniform'
+      low: -0.01
+      high: 0.01
+    init_z:
+      distrib: 'uniform'
+      low: -0.01
+      high: 0.01
+    init_z_dot:
+      distrib: 'uniform'
+      low: -0.01
+      high: 0.01
+    init_theta:
+      distrib: 'uniform'
+      low: -0.02
+      high: 0.02
+
+  task: traj_tracking
+  task_info:
+    trajectory_type: figure8
+    num_cycles: 1
+    trajectory_plane: 'xz'
+    trajectory_position_offset: [0, 1.2]
+    trajectory_scale: 0.5
+
+  inertial_prop:
+    M: 0.027
+    Iyy: 1.4e-05
+
+  episode_len_sec: 10
+  cost: rl_reward
+  obs_goal_horizon: 1
+
+  # RL Reward
+  rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1]
+  rew_act_weight: 0.1
+  rew_exponential: True
+
+#  constraints:
+#    - constraint_form: default_constraint
+#      constrained_variable: state
+#      upper_bounds: [2, 1, 2, 1, 0.2, 2.5]
+#      lower_bounds: [-2, -1, 0, -1, -0.2, -2.5]
+#    - constraint_form: default_constraint
+#      constrained_variable: input
+#      upper_bounds: [0.58, 0.8]
+#      lower_bounds: [0.06, -0.8]
+
+  done_on_out_of_bound: True
+  done_on_violation: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/sac_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/sac_quadrotor_2D_attitude_5s.yaml
@@ -0,0 +1,29 @@
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+  use_entropy_tuning: True
+
+  # optim args
+  train_interval: 100
+  train_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+  entropy_lr: 0.001
+
+  # runner args
+  max_env_steps: 200000
+  warm_up_steps: 1000
+  rollout_batch_size: 4
+  num_workers: 1
+  max_buffer_size: 50000
+  deque_size: 50
+  eval_batch_size: 50
+
+  # misc
+  log_interval: 4000
+  save_interval: 0
+  num_checkpoints: 0
+  eval_interval: 4000
+  eval_save_best: True
+  tensorboard: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/td3_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/td3_quadrotor_2D_attitude_5s.yaml
@@ -0,0 +1,29 @@
+algo: sac
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+  use_entropy_tuning: False
+
+  # optim args
+  train_interval: 100
+  train_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+
+  # runner args
+  max_env_steps: 200000
+  warm_up_steps: 1000
+  rollout_batch_size: 4
+  num_workers: 1
+  max_buffer_size: 50000
+  deque_size: 10
+  eval_batch_size: 50
+
+  # misc
+  log_interval: 2000
+  save_interval: 0
+  num_checkpoints: 0
+  eval_interval: 2000
+  eval_save_best: True
+  tensorboard: False
diff --git a/examples/rl/rl_experiment.py b/examples/rl/rl_experiment.py
@@ -124,17 +124,17 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):
 
         if config.task == Environment.QUADROTOR and system == 'quadrotor_4D':
             _, ax4 = plt.subplots()
-            ax4.plot(results['timestamp'][0][:], results['action'][0][:, graph4_1], 'r', label='Thrust')
-            ax4.set_ylabel(r'Thrust')
+            ax4.plot(results['timestamp'][0][:], results['action'][0][:, graph4_1], 'r', label='Action: Thrust')
+            ax4.set_ylabel(r'Action: Thrust')
             _, ax5 = plt.subplots()
-            ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Pitch')
-            ax5.set_ylabel(r'Pitch')
+            ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Action: Pitch')
+            ax5.set_ylabel(r'Action: Pitch')
             _, ax6 = plt.subplots()
-            ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Thrust')
-            ax6.set_ylabel(r'Pitch')
+            ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Obs: Pitch')
+            ax6.set_ylabel(r'Obs: Pitch')
             _, ax7 = plt.subplots()
-            ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Pitch')
-            ax7.set_ylabel(r'Pitch rate')
+            ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Obs: Pitch rate')
+            ax7.set_ylabel(r'Obs: Pitch rate')
 
         if config.task == Environment.QUADROTOR and system == 'quadrotor_2D':
             # _, ax4 = plt.subplots()
@@ -144,10 +144,10 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):
             # ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Pitch')
             # ax5.set_ylabel(r'Pitch')
             _, ax6 = plt.subplots()
-            ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Thrust')
+            ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Thrust')
             ax6.set_ylabel(r'Pitch')
             _, ax7 = plt.subplots()
-            ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 6], 'r', label='Pitch')
+            ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Pitch')
             ax7.set_ylabel(r'Pitch rate')
 
         plt.tight_layout()

diff --git a/examples/rl/rl_experiment.sh b/examples/rl/rl_experiment.sh
@@ -2,14 +2,15 @@
 
 #SYS='cartpole'
 #SYS='quadrotor_2D'
-SYS='quadrotor_2D_attitude'
+#SYS='quadrotor_2D_attitude'
+SYS='quadrotor_2D_attitude_5s'
 #SYS='quadrotor_3D'
 
 #TASK='stab'
 TASK='track'
 
-#ALGO='ppo'
-ALGO='sac'
+ALGO='ppo'
+#ALGO='sac'
 #ALGO='td3'
 #ALGO='safe_explorer_ppo'
 

diff --git a/examples/rl/train_rl_model.sh b/examples/rl/train_rl_model.sh
@@ -2,7 +2,8 @@
 
 #SYS='cartpole'
 #SYS='quadrotor_2D'
-SYS='quadrotor_2D_attitude'
+#SYS='quadrotor_2D_attitude'
+SYS='quadrotor_2D_attitude_5s'
 #SYS='quadrotor_3D'
 
 #TASK='stab'

diff --git a/safe_control_gym/controllers/pid/pid.py b/safe_control_gym/controllers/pid/pid.py
@@ -94,7 +94,7 @@ def select_action(self, obs, info=None):
         step = self.extract_step(info)
 
         # Step the environment and print all returned information.
-        if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4:
+        if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5:
             cur_pos = np.array([obs[0], 0, obs[2]])
             cur_quat = np.array(p.getQuaternionFromEuler([0, obs[4], 0]))
             cur_vel = np.array([obs[1], 0, obs[3]])
@@ -103,17 +103,14 @@ def select_action(self, obs, info=None):
             cur_quat = np.array(p.getQuaternionFromEuler([obs[6], obs[7], obs[8]]))
             cur_vel = np.array([obs[1], obs[3], obs[5]])
 
-        if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4:
+        if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5:
             if self.env.TASK == Task.TRAJ_TRACKING:
                 target_pos = np.array([self.reference[step, 0],
                                        0,
                                        self.reference[step, 2]])
                 target_vel = np.array([self.reference[step, 1],
                                        0,
                                        self.reference[step, 3]])
-
-                # target_pos = np.array([0.2, 0, 0.2])
-                # target_vel = np.array([0, 0, 0])
             elif self.env.TASK == Task.STABILIZATION:
                 target_pos = np.array([self.reference[0], 0, self.reference[2]])
                 target_vel = np.array([0, 0, 0])
@@ -148,7 +145,7 @@ def select_action(self, obs, info=None):
             action = rpm
             action = self.KF * action ** 2
             action = np.array([action[0] + action[3], action[1] + action[2]])
-        elif self.env.QUAD_TYPE == 4:  # 2D quadrotor with attitude control
+        elif self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5:  # 2D quadrotor with attitude control
             action = np.array([self.env.attitude_control.pwm2thrust(thrust / 3) * 4, computed_target_rpy[1]])
         #### Extra checks
         # rpm = self.env.before_step(action)

diff --git a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
@@ -263,6 +263,7 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
         time_before_stepping = time.time()
         # clipped_action = np.reshape(clipped_action, (self.NUM_DRONES, 4))
         clipped_action = np.expand_dims(clipped_action, axis=0)
+        # rpm = self._preprocess_control(clipped_action[0, :])
 
         # Repeat for as many as the aggregate physics steps.
         for _ in range(self.PYB_STEPS_PER_CTRL):
@@ -313,8 +314,8 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
                 p.stepSimulation(physicsClientId=self.PYB_CLIENT)
             # Save the last applied action (e.g. to compute drag).
             self.last_clipped_action = clipped_action
-            # Update and store the drones kinematic information.
-            self._update_and_store_kinematic_information()
+        # Update and store the drones kinematic information.
+        self._update_and_store_kinematic_information()
 
         time_after_stepping = time.time()
         # print('time stepping', time_after_stepping - time_before_stepping)