-
Notifications
You must be signed in to change notification settings - Fork 438
/
AllegroHandDextremeADRPPO.yaml
executable file
·149 lines (130 loc) · 3.28 KB
/
AllegroHandDextremeADRPPO.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
params:
seed: ${...seed}
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
inputs:
dof_pos_randomized: {}
object_pose_cam_randomized: { }
goal_pose: { }
goal_relative_rot_cam_randomized: { }
last_actions: { }
mlp:
units: [512, 512]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
rnn:
name: lstm
units: 1024
layers: 1
before_mlp: True
layer_norm: True
load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load
config:
name: ${resolve_default:AllegroHandADRAsymmLSTM,${....experiment}}
full_experiment_name: ${.name}
env_name: rlgpu
multi_gpu: ${....multi_gpu}
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: False
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.998
tau: 0.95
learning_rate: 1e-4
lr_schedule: linear #adaptive
schedule_type: standard
kl_threshold: 0.01
score_to_win: 1000000
max_epochs: ${resolve_default:1000_000,${....max_iterations}}
save_best_after: 10000
save_frequency: 500
print_stats: True
grad_norm: 1.0
entropy_coef: 0.002
truncate_grads: True
e_clip: 0.2
horizon_length: 16
minibatch_size: 16384
mini_epochs: 4
critic_coef: 4
clip_value: True
seq_length: 16
bound_loss_type: regularization
bounds_loss_coef: 0.005
zero_rnn_on_done: False
# optimize summaries to prevent tf.event files from growing to gigabytes
force_interval_writer: True
central_value_config:
minibatch_size: 16384
mini_epochs: 4
learning_rate: 5e-5
kl_threshold: 0.016
clip_value: True
normalize_input: True
truncate_grads: True
network:
name: actor_critic
central_value: True
inputs:
dof_pos: { }
dof_vel: { }
dof_force: { }
object_pose: { }
object_pose_cam_randomized: { }
object_vels: { }
goal_pose: { }
goal_relative_rot: {}
last_actions: { }
stochastic_delay_params: { }
affine_params: { }
cube_random_params: {}
hand_random_params: {}
ft_force_torques: {}
gravity_vec: {}
ft_states: {}
rot_dist: {}
rb_forces: {}
mlp:
units: [1024, 512]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
rnn:
name: lstm
units: 2048
layers: 1
before_mlp: True
layer_norm: True
player:
deterministic: True
use_vecenv: True
games_num: 1000000
print_stats: False