diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml index 524db0880..45015a868 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml @@ -60,7 +60,7 @@ task_config: obs_goal_horizon: 1 # RL Reward - rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.5] + rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.1] rew_act_weight: 0.1 rew_exponential: True diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/ppo_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/ppo_quadrotor_2D_attitude_5s.yaml new file mode 100644 index 000000000..1723971d4 --- /dev/null +++ b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/ppo_quadrotor_2D_attitude_5s.yaml @@ -0,0 +1,28 @@ +algo_config: + # model args + hidden_dim: 128 + activation: "relu" + + # loss args + use_gae: True + entropy_coef: 0.01 + + # optim args + opt_epochs: 20 + mini_batch_size: 256 + actor_lr: 0.001 + critic_lr: 0.001 + + # runner args + max_env_steps: 480000 + rollout_batch_size: 4 + rollout_steps: 600 + eval_batch_size: 50 + + # misc + log_interval: 9600 + save_interval: 480000 + num_checkpoints: 0 + eval_interval: 9600 + eval_save_best: True + tensorboard: False diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/quadrotor_2D_attitude_5s_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/quadrotor_2D_attitude_5s_track.yaml new file mode 100644 index 000000000..dc42afe78 --- /dev/null +++ b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/quadrotor_2D_attitude_5s_track.yaml @@ -0,0 +1,74 @@ +task_config: + seed: 1337 + info_in_reset: True + ctrl_freq: 60 + pyb_freq: 1200 + physics: pyb + quad_type: 5 + normalized_rl_action_space: True + + init_state: + init_x: 0 + init_x_dot: 0 + init_z: 1.15 + init_z_dot: 0 + init_theta: 0 + init_theta_dot: 0 + randomized_init: True + randomized_inertial_prop: False + + init_state_randomization_info: + init_x: + distrib: 'uniform' + low: -0.01 + high: 0.01 + init_x_dot: + distrib: 'uniform' + low: -0.01 + high: 0.01 + init_z: + distrib: 'uniform' + low: -0.01 + high: 0.01 + init_z_dot: + distrib: 'uniform' + low: -0.01 + high: 0.01 + init_theta: + distrib: 'uniform' + low: -0.02 + high: 0.02 + + task: traj_tracking + task_info: + trajectory_type: figure8 + num_cycles: 1 + trajectory_plane: 'xz' + trajectory_position_offset: [0, 1.2] + trajectory_scale: 0.5 + + inertial_prop: + M: 0.027 + Iyy: 1.4e-05 + + episode_len_sec: 10 + cost: rl_reward + obs_goal_horizon: 1 + + # RL Reward + rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1] + rew_act_weight: 0.1 + rew_exponential: True + +# constraints: +# - constraint_form: default_constraint +# constrained_variable: state +# upper_bounds: [2, 1, 2, 1, 0.2, 2.5] +# lower_bounds: [-2, -1, 0, -1, -0.2, -2.5] +# - constraint_form: default_constraint +# constrained_variable: input +# upper_bounds: [0.58, 0.8] +# lower_bounds: [0.06, -0.8] + + done_on_out_of_bound: True + done_on_violation: False diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/sac_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/sac_quadrotor_2D_attitude_5s.yaml new file mode 100644 index 000000000..2e7f2db4e --- /dev/null +++ b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/sac_quadrotor_2D_attitude_5s.yaml @@ -0,0 +1,29 @@ +algo_config: + # model args + hidden_dim: 128 + activation: "relu" + use_entropy_tuning: True + + # optim args + train_interval: 100 + train_batch_size: 256 + actor_lr: 0.001 + critic_lr: 0.001 + entropy_lr: 0.001 + + # runner args + max_env_steps: 200000 + warm_up_steps: 1000 + rollout_batch_size: 4 + num_workers: 1 + max_buffer_size: 50000 + deque_size: 50 + eval_batch_size: 50 + + # misc + log_interval: 4000 + save_interval: 0 + num_checkpoints: 0 + eval_interval: 4000 + eval_save_best: True + tensorboard: False diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude_5s/td3_quadrotor_2D_attitude_5s.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/td3_quadrotor_2D_attitude_5s.yaml new file mode 100644 index 000000000..b968a24ca --- /dev/null +++ b/examples/rl/config_overrides/quadrotor_2D_attitude_5s/td3_quadrotor_2D_attitude_5s.yaml @@ -0,0 +1,29 @@ +algo: sac +algo_config: + # model args + hidden_dim: 128 + activation: "relu" + use_entropy_tuning: False + + # optim args + train_interval: 100 + train_batch_size: 256 + actor_lr: 0.001 + critic_lr: 0.001 + + # runner args + max_env_steps: 200000 + warm_up_steps: 1000 + rollout_batch_size: 4 + num_workers: 1 + max_buffer_size: 50000 + deque_size: 10 + eval_batch_size: 50 + + # misc + log_interval: 2000 + save_interval: 0 + num_checkpoints: 0 + eval_interval: 2000 + eval_save_best: True + tensorboard: False diff --git a/examples/rl/rl_experiment.py b/examples/rl/rl_experiment.py index dc08aa849..37c7c4090 100644 --- a/examples/rl/rl_experiment.py +++ b/examples/rl/rl_experiment.py @@ -124,17 +124,17 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'): if config.task == Environment.QUADROTOR and system == 'quadrotor_4D': _, ax4 = plt.subplots() - ax4.plot(results['timestamp'][0][:], results['action'][0][:, graph4_1], 'r', label='Thrust') - ax4.set_ylabel(r'Thrust') + ax4.plot(results['timestamp'][0][:], results['action'][0][:, graph4_1], 'r', label='Action: Thrust') + ax4.set_ylabel(r'Action: Thrust') _, ax5 = plt.subplots() - ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Pitch') - ax5.set_ylabel(r'Pitch') + ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Action: Pitch') + ax5.set_ylabel(r'Action: Pitch') _, ax6 = plt.subplots() - ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Thrust') - ax6.set_ylabel(r'Pitch') + ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Obs: Pitch') + ax6.set_ylabel(r'Obs: Pitch') _, ax7 = plt.subplots() - ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Pitch') - ax7.set_ylabel(r'Pitch rate') + ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Obs: Pitch rate') + ax7.set_ylabel(r'Obs: Pitch rate') if config.task == Environment.QUADROTOR and system == 'quadrotor_2D': # _, ax4 = plt.subplots() @@ -144,10 +144,10 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'): # ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Pitch') # ax5.set_ylabel(r'Pitch') _, ax6 = plt.subplots() - ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Thrust') + ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Thrust') ax6.set_ylabel(r'Pitch') _, ax7 = plt.subplots() - ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 6], 'r', label='Pitch') + ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Pitch') ax7.set_ylabel(r'Pitch rate') plt.tight_layout() diff --git a/examples/rl/rl_experiment.sh b/examples/rl/rl_experiment.sh index cb137d74a..eeead6a93 100755 --- a/examples/rl/rl_experiment.sh +++ b/examples/rl/rl_experiment.sh @@ -2,14 +2,15 @@ #SYS='cartpole' #SYS='quadrotor_2D' -SYS='quadrotor_2D_attitude' +#SYS='quadrotor_2D_attitude' +SYS='quadrotor_2D_attitude_5s' #SYS='quadrotor_3D' #TASK='stab' TASK='track' -#ALGO='ppo' -ALGO='sac' +ALGO='ppo' +#ALGO='sac' #ALGO='td3' #ALGO='safe_explorer_ppo' diff --git a/examples/rl/train_rl_model.sh b/examples/rl/train_rl_model.sh index f5393ec1d..354d7f192 100755 --- a/examples/rl/train_rl_model.sh +++ b/examples/rl/train_rl_model.sh @@ -2,7 +2,8 @@ #SYS='cartpole' #SYS='quadrotor_2D' -SYS='quadrotor_2D_attitude' +#SYS='quadrotor_2D_attitude' +SYS='quadrotor_2D_attitude_5s' #SYS='quadrotor_3D' #TASK='stab' diff --git a/safe_control_gym/controllers/pid/pid.py b/safe_control_gym/controllers/pid/pid.py index 4132f1559..9558977ad 100644 --- a/safe_control_gym/controllers/pid/pid.py +++ b/safe_control_gym/controllers/pid/pid.py @@ -94,7 +94,7 @@ def select_action(self, obs, info=None): step = self.extract_step(info) # Step the environment and print all returned information. - if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4: + if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5: cur_pos = np.array([obs[0], 0, obs[2]]) cur_quat = np.array(p.getQuaternionFromEuler([0, obs[4], 0])) cur_vel = np.array([obs[1], 0, obs[3]]) @@ -103,7 +103,7 @@ def select_action(self, obs, info=None): cur_quat = np.array(p.getQuaternionFromEuler([obs[6], obs[7], obs[8]])) cur_vel = np.array([obs[1], obs[3], obs[5]]) - if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4: + if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5: if self.env.TASK == Task.TRAJ_TRACKING: target_pos = np.array([self.reference[step, 0], 0, @@ -111,9 +111,6 @@ def select_action(self, obs, info=None): target_vel = np.array([self.reference[step, 1], 0, self.reference[step, 3]]) - - # target_pos = np.array([0.2, 0, 0.2]) - # target_vel = np.array([0, 0, 0]) elif self.env.TASK == Task.STABILIZATION: target_pos = np.array([self.reference[0], 0, self.reference[2]]) target_vel = np.array([0, 0, 0]) @@ -148,7 +145,7 @@ def select_action(self, obs, info=None): action = rpm action = self.KF * action ** 2 action = np.array([action[0] + action[3], action[1] + action[2]]) - elif self.env.QUAD_TYPE == 4: # 2D quadrotor with attitude control + elif self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5: # 2D quadrotor with attitude control action = np.array([self.env.attitude_control.pwm2thrust(thrust / 3) * 4, computed_target_rpy[1]]) #### Extra checks # rpm = self.env.before_step(action) diff --git a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py index 89c8718ad..c8641c14e 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py +++ b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py @@ -263,6 +263,7 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): time_before_stepping = time.time() # clipped_action = np.reshape(clipped_action, (self.NUM_DRONES, 4)) clipped_action = np.expand_dims(clipped_action, axis=0) + # rpm = self._preprocess_control(clipped_action[0, :]) # Repeat for as many as the aggregate physics steps. for _ in range(self.PYB_STEPS_PER_CTRL): @@ -313,8 +314,8 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): p.stepSimulation(physicsClientId=self.PYB_CLIENT) # Save the last applied action (e.g. to compute drag). self.last_clipped_action = clipped_action - # Update and store the drones kinematic information. - self._update_and_store_kinematic_information() + # Update and store the drones kinematic information. + self._update_and_store_kinematic_information() time_after_stepping = time.time() # print('time stepping', time_after_stepping - time_before_stepping) diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py index 7621728b9..deebde8f5 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py @@ -189,6 +189,8 @@ def __init__(self, self.info_mse_metric_state_weight = np.array([1, 0, 1, 0, 0, 0], ndmin=1, dtype=float) elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: self.info_mse_metric_state_weight = np.array([1, 0, 1, 0, 0, 0], ndmin=1, dtype=float) + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + self.info_mse_metric_state_weight = np.array([1, 0, 1, 0, 0], ndmin=1, dtype=float) elif self.QUAD_TYPE == QuadType.THREE_D: self.info_mse_metric_state_weight = np.array([1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0], ndmin=1, dtype=float) else: @@ -197,7 +199,8 @@ def __init__(self, if (self.QUAD_TYPE == QuadType.ONE_D and len(info_mse_metric_state_weight) == 2) or \ (self.QUAD_TYPE == QuadType.TWO_D and len(info_mse_metric_state_weight) == 6) or \ (self.QUAD_TYPE == QuadType.THREE_D and len(info_mse_metric_state_weight) == 12) or \ - (self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE and len(info_mse_metric_state_weight) == 6): + (self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE and len(info_mse_metric_state_weight) == 6) or \ + (self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S and len(info_mse_metric_state_weight) == 5): self.info_mse_metric_state_weight = np.array(info_mse_metric_state_weight, ndmin=1, dtype=float) else: raise ValueError('[ERROR] in Quadrotor.__init__(), wrong info_mse_metric_state_weight argument size.') @@ -211,6 +214,7 @@ def __init__(self, QuadType.ONE_D: ['init_x', 'init_x_dot'], QuadType.TWO_D: ['init_x', 'init_x_dot', 'init_z', 'init_z_dot', 'init_theta', 'init_theta_dot'], QuadType.TWO_D_ATTITUDE: ['init_x', 'init_x_dot', 'init_z', 'init_z_dot', 'init_theta', 'init_theta_dot'], + QuadType.TWO_D_ATTITUDE_5S: ['init_x', 'init_x_dot', 'init_z', 'init_z_dot', 'init_theta'], QuadType.THREE_D: ['init_x', 'init_x_dot', 'init_y', 'init_y_dot', 'init_z', 'init_z_dot', 'init_phi', 'init_theta', 'init_psi', 'init_p', 'init_q', 'init_r'] } @@ -237,7 +241,9 @@ def __init__(self, self.INERTIAL_PROP_RAND_INFO.pop('Ixx', None) self.INERTIAL_PROP_RAND_INFO.pop('Iyy', None) self.INERTIAL_PROP_RAND_INFO.pop('Izz', None) - elif self.QUAD_TYPE == QuadType.TWO_D or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: + elif self.QUAD_TYPE == QuadType.TWO_D or \ + self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or \ + self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: # Only randomize Iyy for the 2D quadrotor. self.INERTIAL_PROP_RAND_INFO.pop('Ixx', None) self.INERTIAL_PROP_RAND_INFO.pop('Izz', None) @@ -251,6 +257,8 @@ def __init__(self, self.MASS, self.J[1, 1] = inertial_prop elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE and np.array(inertial_prop).shape == (2,): self.MASS, self.J[1, 1] = inertial_prop + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S and np.array(inertial_prop).shape == (2,): + self.MASS, self.J[1, 1] = inertial_prop elif self.QUAD_TYPE == QuadType.THREE_D and np.array(inertial_prop).shape == (4,): self.MASS, self.J[0, 0], self.J[1, 1], self.J[2, 2] = inertial_prop elif isinstance(inertial_prop, dict): @@ -262,7 +270,7 @@ def __init__(self, raise ValueError('[ERROR] in Quadrotor.__init__(), inertial_prop incorrect format.') # Create X_GOAL and U_GOAL references for the assigned task. - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: self.U_GOAL = np.array([self.MASS * self.GRAVITY_ACC, 0.0]) else: self.U_GOAL = np.ones(self.action_dim) * self.MASS * self.GRAVITY_ACC / self.action_dim @@ -281,6 +289,11 @@ def __init__(self, self.TASK_INFO['stabilization_goal'][0], 0.0, self.TASK_INFO['stabilization_goal'][1], 0.0, 0.0, 0.0 ]) # x = {x, x_dot, z, z_dot, theta, theta_dot}. + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + self.X_GOAL = np.hstack([ + self.TASK_INFO['stabilization_goal'][0], 0.0, + self.TASK_INFO['stabilization_goal'][1], 0.0, 0.0 + ]) # x = {x, x_dot, z, z_dot, theta}. elif self.QUAD_TYPE == QuadType.THREE_D: self.X_GOAL = np.hstack([ self.TASK_INFO['stabilization_goal'][0], 0.0, @@ -320,6 +333,14 @@ def __init__(self, np.zeros(POS_REF.shape[0]), # zeros np.zeros(VEL_REF.shape[0]) ]).transpose() + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + self.X_GOAL = np.vstack([ + POS_REF[:, 0], # x + VEL_REF[:, 0], # x_dot + POS_REF[:, 2], # z + VEL_REF[:, 2], # z_dot + np.zeros(POS_REF.shape[0]), # zeros + ]).transpose() elif self.QUAD_TYPE == QuadType.THREE_D: # Additional transformation of the originally planar trajectory. POS_REF_TRANS, VEL_REF_TRANS = transform_trajectory( @@ -343,7 +364,7 @@ def __init__(self, ]).transpose() # Set attitude controller if quadtype is QuadType.TWO_D_ATTITUDE - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: self.attitude_control = AttitudeControl(self.CTRL_TIMESTEP, self.PYB_TIMESTEP) # Set prior/symbolic info. @@ -399,8 +420,8 @@ def reset(self, seed=None): INIT_RPY = [init_values.get('init_' + k, 0.) for k in ['phi', 'theta', 'psi']] if self.QUAD_TYPE == QuadType.TWO_D: INIT_ANG_VEL = [0, init_values.get('init_theta_dot', 0.), 0] - elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: - INIT_ANG_VEL = [0, 0, 0] + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + INIT_ANG_VEL = [0, init_values.get('init_theta_dot', 0.), 0] self.attitude_control.reset() else: INIT_ANG_VEL = [init_values.get('init_' + k, 0.) for k in ['p', 'q', 'r']] # TODO: transform from body rates. @@ -464,6 +485,9 @@ def step(self, action): elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: # Only disturb on x-z plane. disturb_force = [float(disturb_force[0]), 0, float(disturb_force[1])] + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + # Only disturb on x-z plane. + disturb_force = [float(disturb_force[0]), 0, float(disturb_force[1])] elif self.QUAD_TYPE == QuadType.THREE_D: disturb_force = np.asarray(disturb_force).flatten() @@ -575,6 +599,29 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): -60.00143727772195 * theta + 60.00143727772195 * P) # Define observation. Y = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot) + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + nx, nu = 5, 2 + # Define states. + x = cs.MX.sym('x') + x_dot = cs.MX.sym('x_dot') + theta = cs.MX.sym('theta') # pitch angle [rad] + X = cs.vertcat(x, x_dot, z, z_dot, theta) + # Define input collective thrust and theta. + T = cs.MX.sym('T_c') # normlized thrust [N] + P = cs.MX.sym('P_c') # desired pitch angle [rad] + U = cs.vertcat(T, P) + # The thrust in PWM is converted from the normalized thrust. + # With the formulat F_desired = b_F * T + a_F + + # Define dynamics equations. + # TODO: create a parameter for the new quad model + X_dot = cs.vertcat(x_dot, + (18.112984649321753 * T + 3.7613154938448576) * cs.sin(theta), + z_dot, + (18.112984649321753 * T + 3.7613154938448576) * cs.cos(theta) - g, + -60.00143727772195 * theta + 60.00143727772195 * P) + # Define observation. + Y = cs.vertcat(x, x_dot, z, z_dot, theta) elif self.QUAD_TYPE == QuadType.THREE_D: nx, nu = 12, 4 Ixx = prior_prop.get('Ixx', self.J[0, 0]) @@ -684,12 +731,16 @@ def _set_action_space(self): action_dim = 2 self.ACTION_LABELS = ['T_c', 'P_c'] self.ACTION_UNITS = ['N', 'rad'] if not self.NORMALIZED_RL_ACTION_SPACE else ['-', '-'] + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + action_dim = 2 + self.ACTION_LABELS = ['T_c', 'P_c'] + self.ACTION_UNITS = ['N', 'rad'] if not self.NORMALIZED_RL_ACTION_SPACE else ['-', '-'] elif self.QUAD_TYPE == QuadType.THREE_D: action_dim = 4 self.ACTION_LABELS = ['T1', 'T2', 'T3', 'T4'] self.ACTION_UNITS = ['N', 'N', 'N', 'N'] if not self.NORMALIZED_RL_ACTION_SPACE else ['-', '-', '-', '-'] - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: n_mot = 4 # due to collective thrust a_low = self.KF * n_mot * (self.PWM2RPM_SCALE * self.MIN_PWM + self.PWM2RPM_CONST)**2 a_high = self.KF * n_mot * (self.PWM2RPM_SCALE * self.MAX_PWM + self.PWM2RPM_CONST)**2 @@ -704,7 +755,7 @@ def _set_action_space(self): if self.NORMALIZED_RL_ACTION_SPACE: # Normalized thrust (around hover thrust). - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: self.hover_thrust = self.GRAVITY_ACC * self.MASS else: self.hover_thrust = self.GRAVITY_ACC * self.MASS / action_dim @@ -767,6 +818,20 @@ def _set_observation_space(self): ]) self.STATE_LABELS = ['x', 'x_dot', 'z', 'z_dot', 'theta', 'theta_dot'] self.STATE_UNITS = ['m', 'm/s', 'm', 'm/s', 'rad', 'rad/s'] + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + # obs/state = {x, x_dot, z, z_dot, theta, theta_dot}. + low = np.array([ + -self.x_threshold, -self.x_dot_threshold, + self.GROUND_PLANE_Z, -self.z_dot_threshold, + -self.theta_threshold_radians + ]) + high = np.array([ + self.x_threshold, self.x_dot_threshold, + self.z_threshold, self.z_dot_threshold, + self.theta_threshold_radians + ]) + self.STATE_LABELS = ['x', 'x_dot', 'z', 'z_dot', 'theta'] + self.STATE_UNITS = ['m', 'm/s', 'm', 'm/s', 'rad'] elif self.QUAD_TYPE == QuadType.THREE_D: # obs/state = {x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p_body, q_body, r_body}. low = np.array([ @@ -833,7 +898,7 @@ def _preprocess_control(self, action): self.current_physical_action = self.current_physical_action + self.adv_action self.current_noisy_physical_action = self.current_physical_action - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: collective_thrust, pitch = action # rpm = self.attitude_control._dslPIDAttitudeControl(individual_thrust, @@ -843,8 +908,9 @@ def _preprocess_control(self, action): # self.quat[0], np.array([0, pitch, 0])) # input thrust is in Newton # print(f"collective_thrust: {collective_thrust}, pitch: {pitch}") + _, quat = p.getBasePositionAndOrientation(self.DRONE_IDS[0], physicsClientId=self.PYB_CLIENT) thrust_action = self.attitude_control._dslPIDAttitudeControl(collective_thrust / 4, - self.quat[0], np.array([0, pitch, 0])) + quat, np.array([0, pitch, 0])) # input thrust is in Newton thrust = np.array([thrust_action[0] + thrust_action[3], thrust_action[1] + thrust_action[2]]) thrust = np.clip(thrust, np.full(2, self.physical_action_bounds[0][0] / 2), @@ -869,7 +935,7 @@ def normalize_action(self, action): normalized_action (ndarray): The action in the correct action space. """ if self.NORMALIZED_RL_ACTION_SPACE: - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: action = np.array([(action[0] / self.hover_thrust - 1) / self.norm_act_scale, action[1]]) else: action = (action / self.hover_thrust - 1) / self.norm_act_scale @@ -887,7 +953,7 @@ def denormalize_action(self, action): """ if self.NORMALIZED_RL_ACTION_SPACE: - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: # # divided by 4 as action[0] is a collective thrust # thrust = action[0] / 4 # hover_pwm = (self.HOVER_RPM - self.PWM2RPM_CONST) / self.PWM2RPM_SCALE @@ -930,6 +996,11 @@ def _get_observation(self): self.state = np.hstack( [pos[0], vel[0], pos[2], vel[2], rpy[1], ang_v[1]] ).reshape((6,)) + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + # {x, x_dot, z, z_dot, theta, theta_dot}. + self.state = np.hstack( + [pos[0], vel[0], pos[2], vel[2], rpy[1]] + ).reshape((5,)) elif self.QUAD_TYPE == QuadType.THREE_D: Rob = np.array(p.getMatrixFromQuaternion(self.quat[0])).reshape((3, 3)) Rbo = Rob.T @@ -1026,6 +1097,8 @@ def _get_done(self): mask = np.array([1, 0, 1, 0, 1, 0]) if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: mask = np.array([1, 0, 1, 0, 1, 0]) + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: + mask = np.array([1, 0, 1, 0, 1]) if self.QUAD_TYPE == QuadType.THREE_D: mask = np.array([1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0]) # Element-wise or to check out-of-bound conditions. diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py index 33d91cfb8..6e395d702 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py @@ -15,6 +15,7 @@ class QuadType(IntEnum): TWO_D = 2 # Two-dimensional (in the x-z plane) movement. THREE_D = 3 # Three-dimensional movement. TWO_D_ATTITUDE = 4 # Two-dimensional (in the x-z plane) movement with attitude control. + TWO_D_ATTITUDE_5S = 5 # Two-dimensional (in the x-z plane) movement with attitude control with 5 states. def cmd2pwm(thrust, pwm2rpm_scale, pwm2rpm_const, ct, pwm_min, pwm_max):