Skip to content

Commit

Permalink
Quad 2d with 5 states and attitude ctrl
Browse files Browse the repository at this point in the history
  • Loading branch information
svsawant committed Jul 17, 2024
1 parent 5e980ec commit bfd0d31
Show file tree
Hide file tree
Showing 12 changed files with 269 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ task_config:
obs_goal_horizon: 1

# RL Reward
rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.5]
rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.1]
rew_act_weight: 0.1
rew_exponential: True

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
algo_config:
# model args
hidden_dim: 128
activation: "relu"

# loss args
use_gae: True
entropy_coef: 0.01

# optim args
opt_epochs: 20
mini_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001

# runner args
max_env_steps: 480000
rollout_batch_size: 4
rollout_steps: 600
eval_batch_size: 50

# misc
log_interval: 9600
save_interval: 480000
num_checkpoints: 0
eval_interval: 9600
eval_save_best: True
tensorboard: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
task_config:
seed: 1337
info_in_reset: True
ctrl_freq: 60
pyb_freq: 1200
physics: pyb
quad_type: 5
normalized_rl_action_space: True

init_state:
init_x: 0
init_x_dot: 0
init_z: 1.15
init_z_dot: 0
init_theta: 0
init_theta_dot: 0
randomized_init: True
randomized_inertial_prop: False

init_state_randomization_info:
init_x:
distrib: 'uniform'
low: -0.01
high: 0.01
init_x_dot:
distrib: 'uniform'
low: -0.01
high: 0.01
init_z:
distrib: 'uniform'
low: -0.01
high: 0.01
init_z_dot:
distrib: 'uniform'
low: -0.01
high: 0.01
init_theta:
distrib: 'uniform'
low: -0.02
high: 0.02

task: traj_tracking
task_info:
trajectory_type: figure8
num_cycles: 1
trajectory_plane: 'xz'
trajectory_position_offset: [0, 1.2]
trajectory_scale: 0.5

inertial_prop:
M: 0.027
Iyy: 1.4e-05

episode_len_sec: 10
cost: rl_reward
obs_goal_horizon: 1

# RL Reward
rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1]
rew_act_weight: 0.1
rew_exponential: True

# constraints:
# - constraint_form: default_constraint
# constrained_variable: state
# upper_bounds: [2, 1, 2, 1, 0.2, 2.5]
# lower_bounds: [-2, -1, 0, -1, -0.2, -2.5]
# - constraint_form: default_constraint
# constrained_variable: input
# upper_bounds: [0.58, 0.8]
# lower_bounds: [0.06, -0.8]

done_on_out_of_bound: True
done_on_violation: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
algo_config:
# model args
hidden_dim: 128
activation: "relu"
use_entropy_tuning: True

# optim args
train_interval: 100
train_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001
entropy_lr: 0.001

# runner args
max_env_steps: 200000
warm_up_steps: 1000
rollout_batch_size: 4
num_workers: 1
max_buffer_size: 50000
deque_size: 50
eval_batch_size: 50

# misc
log_interval: 4000
save_interval: 0
num_checkpoints: 0
eval_interval: 4000
eval_save_best: True
tensorboard: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
algo: sac
algo_config:
# model args
hidden_dim: 128
activation: "relu"
use_entropy_tuning: False

# optim args
train_interval: 100
train_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001

# runner args
max_env_steps: 200000
warm_up_steps: 1000
rollout_batch_size: 4
num_workers: 1
max_buffer_size: 50000
deque_size: 10
eval_batch_size: 50

# misc
log_interval: 2000
save_interval: 0
num_checkpoints: 0
eval_interval: 2000
eval_save_best: True
tensorboard: False
20 changes: 10 additions & 10 deletions examples/rl/rl_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,17 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):

if config.task == Environment.QUADROTOR and system == 'quadrotor_4D':
_, ax4 = plt.subplots()
ax4.plot(results['timestamp'][0][:], results['action'][0][:, graph4_1], 'r', label='Thrust')
ax4.set_ylabel(r'Thrust')
ax4.plot(results['timestamp'][0][:], results['action'][0][:, graph4_1], 'r', label='Action: Thrust')
ax4.set_ylabel(r'Action: Thrust')
_, ax5 = plt.subplots()
ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Pitch')
ax5.set_ylabel(r'Pitch')
ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Action: Pitch')
ax5.set_ylabel(r'Action: Pitch')
_, ax6 = plt.subplots()
ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Thrust')
ax6.set_ylabel(r'Pitch')
ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Obs: Pitch')
ax6.set_ylabel(r'Obs: Pitch')
_, ax7 = plt.subplots()
ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Pitch')
ax7.set_ylabel(r'Pitch rate')
ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Obs: Pitch rate')
ax7.set_ylabel(r'Obs: Pitch rate')

if config.task == Environment.QUADROTOR and system == 'quadrotor_2D':
# _, ax4 = plt.subplots()
Expand All @@ -144,10 +144,10 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):
# ax5.plot(results['timestamp'][0][:], results['action'][0][:, graph4_2], 'r', label='Pitch')
# ax5.set_ylabel(r'Pitch')
_, ax6 = plt.subplots()
ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Thrust')
ax6.plot(results['timestamp'][0][:], results['obs'][0][1:, 4], 'r', label='Thrust')
ax6.set_ylabel(r'Pitch')
_, ax7 = plt.subplots()
ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 6], 'r', label='Pitch')
ax7.plot(results['timestamp'][0][:], results['obs'][0][1:, 5], 'r', label='Pitch')
ax7.set_ylabel(r'Pitch rate')

plt.tight_layout()
Expand Down
7 changes: 4 additions & 3 deletions examples/rl/rl_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

#SYS='cartpole'
#SYS='quadrotor_2D'
SYS='quadrotor_2D_attitude'
#SYS='quadrotor_2D_attitude'
SYS='quadrotor_2D_attitude_5s'
#SYS='quadrotor_3D'

#TASK='stab'
TASK='track'

#ALGO='ppo'
ALGO='sac'
ALGO='ppo'
#ALGO='sac'
#ALGO='td3'
#ALGO='safe_explorer_ppo'

Expand Down
3 changes: 2 additions & 1 deletion examples/rl/train_rl_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

#SYS='cartpole'
#SYS='quadrotor_2D'
SYS='quadrotor_2D_attitude'
#SYS='quadrotor_2D_attitude'
SYS='quadrotor_2D_attitude_5s'
#SYS='quadrotor_3D'

#TASK='stab'
Expand Down
9 changes: 3 additions & 6 deletions safe_control_gym/controllers/pid/pid.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def select_action(self, obs, info=None):
step = self.extract_step(info)

# Step the environment and print all returned information.
if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4:
if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5:
cur_pos = np.array([obs[0], 0, obs[2]])
cur_quat = np.array(p.getQuaternionFromEuler([0, obs[4], 0]))
cur_vel = np.array([obs[1], 0, obs[3]])
Expand All @@ -103,17 +103,14 @@ def select_action(self, obs, info=None):
cur_quat = np.array(p.getQuaternionFromEuler([obs[6], obs[7], obs[8]]))
cur_vel = np.array([obs[1], obs[3], obs[5]])

if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4:
if self.env.QUAD_TYPE == 2 or self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5:
if self.env.TASK == Task.TRAJ_TRACKING:
target_pos = np.array([self.reference[step, 0],
0,
self.reference[step, 2]])
target_vel = np.array([self.reference[step, 1],
0,
self.reference[step, 3]])

# target_pos = np.array([0.2, 0, 0.2])
# target_vel = np.array([0, 0, 0])
elif self.env.TASK == Task.STABILIZATION:
target_pos = np.array([self.reference[0], 0, self.reference[2]])
target_vel = np.array([0, 0, 0])
Expand Down Expand Up @@ -148,7 +145,7 @@ def select_action(self, obs, info=None):
action = rpm
action = self.KF * action ** 2
action = np.array([action[0] + action[3], action[1] + action[2]])
elif self.env.QUAD_TYPE == 4: # 2D quadrotor with attitude control
elif self.env.QUAD_TYPE == 4 or self.env.QUAD_TYPE == 5: # 2D quadrotor with attitude control
action = np.array([self.env.attitude_control.pwm2thrust(thrust / 3) * 4, computed_target_rpy[1]])
#### Extra checks
# rpm = self.env.before_step(action)
Expand Down
5 changes: 3 additions & 2 deletions safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
time_before_stepping = time.time()
# clipped_action = np.reshape(clipped_action, (self.NUM_DRONES, 4))
clipped_action = np.expand_dims(clipped_action, axis=0)
# rpm = self._preprocess_control(clipped_action[0, :])

# Repeat for as many as the aggregate physics steps.
for _ in range(self.PYB_STEPS_PER_CTRL):
Expand Down Expand Up @@ -313,8 +314,8 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
p.stepSimulation(physicsClientId=self.PYB_CLIENT)
# Save the last applied action (e.g. to compute drag).
self.last_clipped_action = clipped_action
# Update and store the drones kinematic information.
self._update_and_store_kinematic_information()
# Update and store the drones kinematic information.
self._update_and_store_kinematic_information()

time_after_stepping = time.time()
# print('time stepping', time_after_stepping - time_before_stepping)
Expand Down
Loading

0 comments on commit bfd0d31

Please sign in to comment.