-
Notifications
You must be signed in to change notification settings - Fork 438
/
AllegroHandDextremeManualDRPPO.yaml
executable file
·134 lines (115 loc) · 2.84 KB
/
AllegroHandDextremeManualDRPPO.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
params:
seed: ${...seed}
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
inputs:
dof_pos_randomized: { }
object_pose_cam_randomized: { }
goal_pose_randomized: { }
goal_relative_rot_cam_randomized: { }
last_actions_randomized: { }
mlp:
units: [256]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
rnn:
name: lstm
units: 512
layers: 1
before_mlp: True
layer_norm: True
load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load
config:
name: ${resolve_default:AllegroHandManualDRAsymmLSTM,${....experiment}}
full_experiment_name: ${.name}
env_name: rlgpu
multi_gpu: ${....multi_gpu}
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: False
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.998
tau: 0.95
learning_rate: 1e-4
lr_schedule: adaptive
schedule_type: standard
kl_threshold: 0.016
score_to_win: 100000
max_epochs: ${resolve_default:50000,${....max_iterations}}
save_best_after: 200
save_frequency: 500
print_stats: True
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 16
minibatch_size: 16384
mini_epochs: 4
critic_coef: 4
clip_value: True
seq_length: 16
bounds_loss_coef: 0.0001
zero_rnn_on_done: False
central_value_config:
minibatch_size: 16384
mini_epochs: 4
learning_rate: 1e-4
kl_threshold: 0.016
clip_value: True
normalize_input: True
truncate_grads: True
network:
name: actor_critic
central_value: True
inputs:
dof_pos: { }
dof_vel: { }
dof_force: { }
object_pose: { }
object_pose_cam_randomized: { }
object_vels: { }
goal_pose: { }
goal_relative_rot: {}
last_actions: { }
ft_force_torques: {}
gravity_vec: {}
ft_states: {}
mlp:
units: [512, 256, 128]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
player:
deterministic: True
use_vecenv: True
games_num: 1000000
print_stats: False