diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
index 524db0880..45015a868 100644
--- a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
+++ b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
@@ -60,7 +60,7 @@ task_config:
   obs_goal_horizon: 1
 
   # RL Reward
-  rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.5]
+  rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.1]
   rew_act_weight: 0.1
   rew_exponential: True
 
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/ppo_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/ppo_quadrotor_2D_attitude_5s.yaml
new file mode 100644
index 000000000..1723971d4
--- /dev/null
+++ b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/ppo_quadrotor_2D_attitude_5s.yaml
@@ -0,0 +1,28 @@
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+
+  # loss args
+  use_gae: True
+  entropy_coef: 0.01
+
+  # optim args
+  opt_epochs: 20
+  mini_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+
+  # runner args
+  max_env_steps: 480000
+  rollout_batch_size: 4
+  rollout_steps: 600
+  eval_batch_size: 50
+
+  # misc
+  log_interval: 9600
+  save_interval: 480000
+  num_checkpoints: 0
+  eval_interval: 9600
+  eval_save_best: True
+  tensorboard: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/quadrotor_2D_attitude_5s_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/quadrotor_2D_attitude_5s_track.yaml
new file mode 100644
index 000000000..dc42afe78
--- /dev/null
+++ b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/quadrotor_2D_attitude_5s_track.yaml
@@ -0,0 +1,74 @@
+task_config:
+  seed: 1337
+  info_in_reset: True
+  ctrl_freq: 60
+  pyb_freq: 1200
+  physics: pyb
+  quad_type: 5
+  normalized_rl_action_space: True
+
+  init_state:
+    init_x: 0
+    init_x_dot: 0
+    init_z: 1.15
+    init_z_dot: 0
+    init_theta: 0
+    init_theta_dot: 0
+  randomized_init: True
+  randomized_inertial_prop: False
+
+  init_state_randomization_info:
+    init_x:
+      distrib: 'uniform'
+      low: -0.01
+      high: 0.01
+    init_x_dot:
+      distrib: 'uniform'
+      low: -0.01
+      high: 0.01
+    init_z:
+      distrib: 'uniform'
+      low: -0.01
+      high: 0.01
+    init_z_dot:
+      distrib: 'uniform'
+      low: -0.01
+      high: 0.01
+    init_theta:
+      distrib: 'uniform'
+      low: -0.02
+      high: 0.02
+
+  task: traj_tracking
+  task_info:
+    trajectory_type: figure8
+    num_cycles: 1
+    trajectory_plane: 'xz'
+    trajectory_position_offset: [0, 1.2]
+    trajectory_scale: 0.5
+
+  inertial_prop:
+    M: 0.027
+    Iyy: 1.4e-05
+
+  episode_len_sec: 10
+  cost: rl_reward
+  obs_goal_horizon: 1
+
+  # RL Reward
+  rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1]
+  rew_act_weight: 0.1
+  rew_exponential: True
+
+#  constraints:
+#    - constraint_form: default_constraint
+#      constrained_variable: state
+#      upper_bounds: [2, 1, 2, 1, 0.2, 2.5]
+#      lower_bounds: [-2, -1, 0, -1, -0.2, -2.5]
+#    - constraint_form: default_constraint
+#      constrained_variable: input
+#      upper_bounds: [0.58, 0.8]
+#      lower_bounds: [0.06, -0.8]
+
+  done_on_out_of_bound: True
+  done_on_violation: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/sac_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/sac_quadrotor_2D_attitude_5s.yaml
new file mode 100644
index 000000000..2e7f2db4e
--- /dev/null
+++ b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/sac_quadrotor_2D_attitude_5s.yaml
@@ -0,0 +1,29 @@
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+  use_entropy_tuning: True
+
+  # optim args
+  train_interval: 100
+  train_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+  entropy_lr: 0.001
+
+  # runner args
+  max_env_steps: 200000
+  warm_up_steps: 1000
+  rollout_batch_size: 4
+  num_workers: 1
+  max_buffer_size: 50000
+  deque_size: 50
+  eval_batch_size: 50
+
+  # misc
+  log_interval: 4000
+  save_interval: 0
+  num_checkpoints: 0
+  eval_interval: 4000
+  eval_save_best: True
+  tensorboard: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/td3_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/td3_quadrotor_2D_attitude_5s.yaml
new file mode 100644
index 000000000..b968a24ca
--- /dev/null
+++ b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/td3_quadrotor_2D_attitude_5s.yaml
@@ -0,0 +1,29 @@
+algo: sac
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+  use_entropy_tuning: False
+
+  # optim args
+  train_interval: 100
+  train_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+
+  # runner args
+  max_env_steps: 200000
+  warm_up_steps: 1000
+  rollout_batch_size: 4
+  num_workers: 1
+  max_buffer_size: 50000
+  deque_size: 10
+  eval_batch_size: 50
+
+  # misc
+  log_interval: 2000
+  save_interval: 0
+  num_checkpoints: 0
+  eval_interval: 2000
+  eval_save_best: True
+  tensorboard: False
diff --git a/examples/rl/rl_experiment.py b/examples/rl/rl_experiment.py
index dc08aa849..37c7c4090 100644
--- a/examples/rl/rl_experiment.py
+++ b/examples/rl/rl_experiment.py
@@ -124,17 +124,17 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):
 
         if config.task == Environment.QUADROTOR and system == 'quadrotor_4D':
             _, ax4 = plt.subplots()
-            ax4.plot(results['timestamp'][0][:], results['action'][0][:, graph4_1], 'r', label='Thrust')
-            ax4.set_ylabel(r'Thrust')
+            ax4.plot(results['timestamp'][0][:], results['action'][0][:, graph4_1], 'r', label='Action: Thrust')
+            ax4.set_ylabel(r'Action: Thrust')
             _, ax5 = plt.subplots()
-            ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Pitch')
-            ax5.set_ylabel(r'Pitch')
+            ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Action: Pitch')
+            ax5.set_ylabel(r'Action: Pitch')
             _, ax6 = plt.subplots()
-            ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Thrust')
-            ax6.set_ylabel(r'Pitch')
+            ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Obs: Pitch')
+            ax6.set_ylabel(r'Obs: Pitch')
             _, ax7 = plt.subplots()
-            ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Pitch')
-            ax7.set_ylabel(r'Pitch rate')
+            ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Obs: Pitch rate')
+            ax7.set_ylabel(r'Obs: Pitch rate')
 
         if config.task == Environment.QUADROTOR and system == 'quadrotor_2D':
             # _, ax4 = plt.subplots()
@@ -144,10 +144,10 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):
             # ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Pitch')
             # ax5.set_ylabel(r'Pitch')
             _, ax6 = plt.subplots()
-            ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Thrust')
+            ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Thrust')
             ax6.set_ylabel(r'Pitch')
             _, ax7 = plt.subplots()
-            ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 6], 'r', label='Pitch')
+            ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Pitch')
             ax7.set_ylabel(r'Pitch rate')
 
         plt.tight_layout()
diff --git a/examples/rl/rl_experiment.sh b/examples/rl/rl_experiment.sh
index cb137d74a..eeead6a93 100755
--- a/examples/rl/rl_experiment.sh
+++ b/examples/rl/rl_experiment.sh
@@ -2,14 +2,15 @@
 
 #SYS='cartpole'
 #SYS='quadrotor_2D'
-SYS='quadrotor_2D_attitude'
+#SYS='quadrotor_2D_attitude'
+SYS='quadrotor_2D_attitude_5s'
 #SYS='quadrotor_3D'
 
 #TASK='stab'
 TASK='track'
 
-#ALGO='ppo'
-ALGO='sac'
+ALGO='ppo'
+#ALGO='sac'
 #ALGO='td3'
 #ALGO='safe_explorer_ppo'
 
diff --git a/examples/rl/train_rl_model.sh b/examples/rl/train_rl_model.sh
index f5393ec1d..354d7f192 100755
--- a/examples/rl/train_rl_model.sh
+++ b/examples/rl/train_rl_model.sh
@@ -2,7 +2,8 @@
 
 #SYS='cartpole'
 #SYS='quadrotor_2D'
-SYS='quadrotor_2D_attitude'
+#SYS='quadrotor_2D_attitude'
+SYS='quadrotor_2D_attitude_5s'
 #SYS='quadrotor_3D'
 
 #TASK='stab'
diff --git a/safe_control_gym/controllers/pid/pid.py b/safe_control_gym/controllers/pid/pid.py
index 4132f1559..9558977ad 100644
--- a/safe_control_gym/controllers/pid/pid.py
+++ b/safe_control_gym/controllers/pid/pid.py
@@ -94,7 +94,7 @@ def select_action(self, obs, info=None):
         step = self.extract_step(info)
 
         # Step the environment and print all returned information.
-        if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4:
+        if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5:
             cur_pos = np.array([obs[0], 0, obs[2]])
             cur_quat = np.array(p.getQuaternionFromEuler([0, obs[4], 0]))
             cur_vel = np.array([obs[1], 0, obs[3]])
@@ -103,7 +103,7 @@ def select_action(self, obs, info=None):
             cur_quat = np.array(p.getQuaternionFromEuler([obs[6], obs[7], obs[8]]))
             cur_vel = np.array([obs[1], obs[3], obs[5]])
 
-        if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4:
+        if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5:
             if self.env.TASK == Task.TRAJ_TRACKING:
                 target_pos = np.array([self.reference[step, 0],
                                        0,
@@ -111,9 +111,6 @@ def select_action(self, obs, info=None):
                 target_vel = np.array([self.reference[step, 1],
                                        0,
                                        self.reference[step, 3]])
-
-                # target_pos = np.array([0.2, 0, 0.2])
-                # target_vel = np.array([0, 0, 0])
             elif self.env.TASK == Task.STABILIZATION:
                 target_pos = np.array([self.reference[0], 0, self.reference[2]])
                 target_vel = np.array([0, 0, 0])
@@ -148,7 +145,7 @@ def select_action(self, obs, info=None):
             action = rpm
             action = self.KF * action ** 2
             action = np.array([action[0] + action[3], action[1] + action[2]])
-        elif self.env.QUAD_TYPE == 4:  # 2D quadrotor with attitude control
+        elif self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5:  # 2D quadrotor with attitude control
             action = np.array([self.env.attitude_control.pwm2thrust(thrust / 3) * 4, computed_target_rpy[1]])
         #### Extra checks
         # rpm = self.env.before_step(action)
diff --git a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
index 89c8718ad..c8641c14e 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
@@ -263,6 +263,7 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
         time_before_stepping = time.time()
         # clipped_action = np.reshape(clipped_action, (self.NUM_DRONES, 4))
         clipped_action = np.expand_dims(clipped_action, axis=0)
+        # rpm = self._preprocess_control(clipped_action[0, :])
 
         # Repeat for as many as the aggregate physics steps.
         for _ in range(self.PYB_STEPS_PER_CTRL):
@@ -313,8 +314,8 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
                 p.stepSimulation(physicsClientId=self.PYB_CLIENT)
             # Save the last applied action (e.g. to compute drag).
             self.last_clipped_action = clipped_action
-            # Update and store the drones kinematic information.
-            self._update_and_store_kinematic_information()
+        # Update and store the drones kinematic information.
+        self._update_and_store_kinematic_information()
 
         time_after_stepping = time.time()
         # print('time stepping', time_after_stepping - time_before_stepping)
diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
index 7621728b9..deebde8f5 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
@@ -189,6 +189,8 @@ def __init__(self,
                 self.info_mse_metric_state_weight = np.array([1, 0, 1, 0, 0, 0], ndmin=1, dtype=float)
             elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
                 self.info_mse_metric_state_weight = np.array([1, 0, 1, 0, 0, 0], ndmin=1, dtype=float)
+            elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+                self.info_mse_metric_state_weight = np.array([1, 0, 1, 0, 0], ndmin=1, dtype=float)
             elif self.QUAD_TYPE == QuadType.THREE_D:
                 self.info_mse_metric_state_weight = np.array([1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0], ndmin=1, dtype=float)
             else:
@@ -197,7 +199,8 @@ def __init__(self,
             if (self.QUAD_TYPE == QuadType.ONE_D and len(info_mse_metric_state_weight) == 2) or \
                     (self.QUAD_TYPE == QuadType.TWO_D and len(info_mse_metric_state_weight) == 6) or \
                     (self.QUAD_TYPE == QuadType.THREE_D and len(info_mse_metric_state_weight) == 12) or \
-                    (self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE and len(info_mse_metric_state_weight) == 6):
+                    (self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE and len(info_mse_metric_state_weight) == 6) or \
+                    (self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S and len(info_mse_metric_state_weight) == 5):
                 self.info_mse_metric_state_weight = np.array(info_mse_metric_state_weight, ndmin=1, dtype=float)
             else:
                 raise ValueError('[ERROR] in Quadrotor.__init__(), wrong info_mse_metric_state_weight argument size.')
@@ -211,6 +214,7 @@ def __init__(self,
             QuadType.ONE_D: ['init_x', 'init_x_dot'],
             QuadType.TWO_D: ['init_x', 'init_x_dot', 'init_z', 'init_z_dot', 'init_theta', 'init_theta_dot'],
             QuadType.TWO_D_ATTITUDE: ['init_x', 'init_x_dot', 'init_z', 'init_z_dot', 'init_theta', 'init_theta_dot'],
+            QuadType.TWO_D_ATTITUDE_5S: ['init_x', 'init_x_dot', 'init_z', 'init_z_dot', 'init_theta'],
             QuadType.THREE_D: ['init_x', 'init_x_dot', 'init_y', 'init_y_dot', 'init_z', 'init_z_dot',
                                'init_phi', 'init_theta', 'init_psi', 'init_p', 'init_q', 'init_r']
         }
@@ -237,7 +241,9 @@ def __init__(self,
             self.INERTIAL_PROP_RAND_INFO.pop('Ixx', None)
             self.INERTIAL_PROP_RAND_INFO.pop('Iyy', None)
             self.INERTIAL_PROP_RAND_INFO.pop('Izz', None)
-        elif self.QUAD_TYPE == QuadType.TWO_D or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
+        elif self.QUAD_TYPE == QuadType.TWO_D or \
+                self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or \
+                self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
             # Only randomize Iyy for the 2D quadrotor.
             self.INERTIAL_PROP_RAND_INFO.pop('Ixx', None)
             self.INERTIAL_PROP_RAND_INFO.pop('Izz', None)
@@ -251,6 +257,8 @@ def __init__(self,
             self.MASS, self.J[1, 1] = inertial_prop
         elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE and np.array(inertial_prop).shape == (2,):
             self.MASS, self.J[1, 1] = inertial_prop
+        elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S and np.array(inertial_prop).shape == (2,):
+            self.MASS, self.J[1, 1] = inertial_prop
         elif self.QUAD_TYPE == QuadType.THREE_D and np.array(inertial_prop).shape == (4,):
             self.MASS, self.J[0, 0], self.J[1, 1], self.J[2, 2] = inertial_prop
         elif isinstance(inertial_prop, dict):
@@ -262,7 +270,7 @@ def __init__(self,
             raise ValueError('[ERROR] in Quadrotor.__init__(), inertial_prop incorrect format.')
 
         # Create X_GOAL and U_GOAL references for the assigned task.
-        if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
+        if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
             self.U_GOAL = np.array([self.MASS * self.GRAVITY_ACC, 0.0])
         else:
             self.U_GOAL = np.ones(self.action_dim) * self.MASS * self.GRAVITY_ACC / self.action_dim
@@ -281,6 +289,11 @@ def __init__(self,
                     self.TASK_INFO['stabilization_goal'][0], 0.0,
                     self.TASK_INFO['stabilization_goal'][1], 0.0, 0.0, 0.0
                 ])  # x = {x, x_dot, z, z_dot, theta, theta_dot}.
+            elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+                self.X_GOAL = np.hstack([
+                    self.TASK_INFO['stabilization_goal'][0], 0.0,
+                    self.TASK_INFO['stabilization_goal'][1], 0.0, 0.0
+                ])  # x = {x, x_dot, z, z_dot, theta}.
             elif self.QUAD_TYPE == QuadType.THREE_D:
                 self.X_GOAL = np.hstack([
                     self.TASK_INFO['stabilization_goal'][0], 0.0,
@@ -320,6 +333,14 @@ def __init__(self,
                     np.zeros(POS_REF.shape[0]),  # zeros
                     np.zeros(VEL_REF.shape[0])
                 ]).transpose()
+            elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+                self.X_GOAL = np.vstack([
+                    POS_REF[:, 0],  # x
+                    VEL_REF[:, 0],  # x_dot
+                    POS_REF[:, 2],  # z
+                    VEL_REF[:, 2],  # z_dot
+                    np.zeros(POS_REF.shape[0]),  # zeros
+                ]).transpose()
             elif self.QUAD_TYPE == QuadType.THREE_D:
                 # Additional transformation of the originally planar trajectory.
                 POS_REF_TRANS, VEL_REF_TRANS = transform_trajectory(
@@ -343,7 +364,7 @@ def __init__(self,
                 ]).transpose()
 
         # Set attitude controller if quadtype is QuadType.TWO_D_ATTITUDE
-        if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
+        if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
             self.attitude_control = AttitudeControl(self.CTRL_TIMESTEP, self.PYB_TIMESTEP)
 
         # Set prior/symbolic info.
@@ -399,8 +420,8 @@ def reset(self, seed=None):
         INIT_RPY = [init_values.get('init_' + k, 0.) for k in ['phi', 'theta', 'psi']]
         if self.QUAD_TYPE == QuadType.TWO_D:
             INIT_ANG_VEL = [0, init_values.get('init_theta_dot', 0.), 0]
-        elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
-            INIT_ANG_VEL = [0, 0, 0]
+        elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+            INIT_ANG_VEL = [0, init_values.get('init_theta_dot', 0.), 0]
             self.attitude_control.reset()
         else:
             INIT_ANG_VEL = [init_values.get('init_' + k, 0.) for k in ['p', 'q', 'r']]  # TODO: transform from body rates.
@@ -464,6 +485,9 @@ def step(self, action):
             elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
                 # Only disturb on x-z plane.
                 disturb_force = [float(disturb_force[0]), 0, float(disturb_force[1])]
+            elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+                # Only disturb on x-z plane.
+                disturb_force = [float(disturb_force[0]), 0, float(disturb_force[1])]
             elif self.QUAD_TYPE == QuadType.THREE_D:
                 disturb_force = np.asarray(disturb_force).flatten()
 
@@ -575,6 +599,29 @@ def _setup_symbolic(self, prior_prop={}, **kwargs):
                                -60.00143727772195 * theta + 60.00143727772195 * P)
             # Define observation.
             Y = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot)
+        elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+            nx, nu = 5, 2
+            # Define states.
+            x = cs.MX.sym('x')
+            x_dot = cs.MX.sym('x_dot')
+            theta = cs.MX.sym('theta')  # pitch angle [rad]
+            X = cs.vertcat(x, x_dot, z, z_dot, theta)
+            # Define input collective thrust and theta.
+            T = cs.MX.sym('T_c')  # normlized thrust [N]
+            P = cs.MX.sym('P_c')  # desired pitch angle [rad]
+            U = cs.vertcat(T, P)
+            # The thrust in PWM is converted from the normalized thrust.
+            # With the formulat F_desired = b_F * T + a_F
+
+            # Define dynamics equations.
+            # TODO: create a parameter for the new quad model
+            X_dot = cs.vertcat(x_dot,
+                               (18.112984649321753 * T + 3.7613154938448576) * cs.sin(theta),
+                               z_dot,
+                               (18.112984649321753 * T + 3.7613154938448576) * cs.cos(theta) - g,
+                               -60.00143727772195 * theta + 60.00143727772195 * P)
+            # Define observation.
+            Y = cs.vertcat(x, x_dot, z, z_dot, theta)
         elif self.QUAD_TYPE == QuadType.THREE_D:
             nx, nu = 12, 4
             Ixx = prior_prop.get('Ixx', self.J[0, 0])
@@ -684,12 +731,16 @@ def _set_action_space(self):
             action_dim = 2
             self.ACTION_LABELS = ['T_c', 'P_c']
             self.ACTION_UNITS = ['N', 'rad'] if not self.NORMALIZED_RL_ACTION_SPACE else ['-', '-']
+        elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+            action_dim = 2
+            self.ACTION_LABELS = ['T_c', 'P_c']
+            self.ACTION_UNITS = ['N', 'rad'] if not self.NORMALIZED_RL_ACTION_SPACE else ['-', '-']
         elif self.QUAD_TYPE == QuadType.THREE_D:
             action_dim = 4
             self.ACTION_LABELS = ['T1', 'T2', 'T3', 'T4']
             self.ACTION_UNITS = ['N', 'N', 'N', 'N'] if not self.NORMALIZED_RL_ACTION_SPACE else ['-', '-', '-', '-']
 
-        if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
+        if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
             n_mot = 4  # due to collective thrust
             a_low = self.KF * n_mot * (self.PWM2RPM_SCALE * self.MIN_PWM + self.PWM2RPM_CONST)**2
             a_high = self.KF * n_mot * (self.PWM2RPM_SCALE * self.MAX_PWM + self.PWM2RPM_CONST)**2
@@ -704,7 +755,7 @@ def _set_action_space(self):
 
         if self.NORMALIZED_RL_ACTION_SPACE:
             # Normalized thrust (around hover thrust).
-            if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
+            if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
                 self.hover_thrust = self.GRAVITY_ACC * self.MASS
             else:
                 self.hover_thrust = self.GRAVITY_ACC * self.MASS / action_dim
@@ -767,6 +818,20 @@ def _set_observation_space(self):
             ])
             self.STATE_LABELS = ['x', 'x_dot', 'z', 'z_dot', 'theta', 'theta_dot']
             self.STATE_UNITS = ['m', 'm/s', 'm', 'm/s', 'rad', 'rad/s']
+        elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+            # obs/state = {x, x_dot, z, z_dot, theta, theta_dot}.
+            low = np.array([
+                -self.x_threshold, -self.x_dot_threshold,
+                self.GROUND_PLANE_Z, -self.z_dot_threshold,
+                -self.theta_threshold_radians
+            ])
+            high = np.array([
+                self.x_threshold, self.x_dot_threshold,
+                self.z_threshold, self.z_dot_threshold,
+                self.theta_threshold_radians
+            ])
+            self.STATE_LABELS = ['x', 'x_dot', 'z', 'z_dot', 'theta']
+            self.STATE_UNITS = ['m', 'm/s', 'm', 'm/s', 'rad']
         elif self.QUAD_TYPE == QuadType.THREE_D:
             # obs/state = {x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p_body, q_body, r_body}.
             low = np.array([
@@ -833,7 +898,7 @@ def _preprocess_control(self, action):
             self.current_physical_action = self.current_physical_action + self.adv_action
         self.current_noisy_physical_action = self.current_physical_action
 
-        if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
+        if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
             collective_thrust, pitch = action
 
             # rpm = self.attitude_control._dslPIDAttitudeControl(individual_thrust,
@@ -843,8 +908,9 @@ def _preprocess_control(self, action):
             # self.quat[0], np.array([0, pitch, 0])) # input thrust is in Newton
             # print(f"collective_thrust: {collective_thrust}, pitch: {pitch}")
 
+            _, quat = p.getBasePositionAndOrientation(self.DRONE_IDS[0], physicsClientId=self.PYB_CLIENT)
             thrust_action = self.attitude_control._dslPIDAttitudeControl(collective_thrust / 4,
-                                                                         self.quat[0], np.array([0, pitch, 0]))
+                                                                         quat, np.array([0, pitch, 0]))
             # input thrust is in Newton
             thrust = np.array([thrust_action[0] + thrust_action[3], thrust_action[1] + thrust_action[2]])
             thrust = np.clip(thrust, np.full(2, self.physical_action_bounds[0][0] / 2),
@@ -869,7 +935,7 @@ def normalize_action(self, action):
             normalized_action (ndarray): The action in the correct action space.
         """
         if self.NORMALIZED_RL_ACTION_SPACE:
-            if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
+            if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
                 action = np.array([(action[0] / self.hover_thrust - 1) / self.norm_act_scale, action[1]])
             else:
                 action = (action / self.hover_thrust - 1) / self.norm_act_scale
@@ -887,7 +953,7 @@ def denormalize_action(self, action):
         """
 
         if self.NORMALIZED_RL_ACTION_SPACE:
-            if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
+            if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
                 # # divided by 4 as action[0] is a collective thrust
                 # thrust = action[0] / 4
                 # hover_pwm = (self.HOVER_RPM - self.PWM2RPM_CONST) / self.PWM2RPM_SCALE
@@ -930,6 +996,11 @@ def _get_observation(self):
             self.state = np.hstack(
                 [pos[0], vel[0], pos[2], vel[2], rpy[1], ang_v[1]]
             ).reshape((6,))
+        elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+            # {x, x_dot, z, z_dot, theta, theta_dot}.
+            self.state = np.hstack(
+                [pos[0], vel[0], pos[2], vel[2], rpy[1]]
+            ).reshape((5,))
         elif self.QUAD_TYPE == QuadType.THREE_D:
             Rob = np.array(p.getMatrixFromQuaternion(self.quat[0])).reshape((3, 3))
             Rbo = Rob.T
@@ -1026,6 +1097,8 @@ def _get_done(self):
                 mask = np.array([1, 0, 1, 0, 1, 0])
             if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
                 mask = np.array([1, 0, 1, 0, 1, 0])
+            if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
+                mask = np.array([1, 0, 1, 0, 1])
             if self.QUAD_TYPE == QuadType.THREE_D:
                 mask = np.array([1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0])
             # Element-wise or to check out-of-bound conditions.
diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py
index 33d91cfb8..6e395d702 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py
@@ -15,6 +15,7 @@ class QuadType(IntEnum):
     TWO_D = 2  # Two-dimensional (in the x-z plane) movement.
     THREE_D = 3  # Three-dimensional movement.
     TWO_D_ATTITUDE = 4  # Two-dimensional (in the x-z plane) movement with attitude control.
+    TWO_D_ATTITUDE_5S = 5  # Two-dimensional (in the x-z plane) movement with attitude control with 5 states.
 
 
 def cmd2pwm(thrust, pwm2rpm_scale, pwm2rpm_const, ct, pwm_min, pwm_max):