-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
137 lines (111 loc) · 4.47 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from ppo import PPO
import torch
import matplotlib.pyplot as plt
import arguements
import pickle
from os.path import dirname, join, abspath
from pyrep import PyRep
def main():
torch.set_default_tensor_type('torch.DoubleTensor')
args = arguements.achieve_args()
# launch the simulation environment ---
SCENE_FILE = join(dirname(abspath(__file__)), 'UR10_reach_003.ttt')
# SCENE_FILE = join(dirname(abspath(__file__)), 'UR10_reach_002.ttt')
pr = PyRep()
pr.launch(SCENE_FILE, headless=False) # lunch the ttt file
pr.start()
# instance of PPO ---
ppo = PPO()
# load trained models ---
# ignore01, ignore02 = ppo.load()
tot_reward_avgs = []
translation_reward_avgs = []
rotation_reward_avgs = []
saved_tot_reward_avgs = []
saved_translation_reward_avgs = []
saved_rotation_reward_avgs = []
for i in range(1000):
tot_reward_avg, translation_reward_avg, rotation_reward_avg, POLICY = ppo.update(pr)
tot_reward_avgs.append(tot_reward_avg)
translation_reward_avgs.append(translation_reward_avg)
rotation_reward_avgs.append(rotation_reward_avg)
print('tot_reward_avgs:', tot_reward_avgs)
saved_tot_reward_avgs.append(tot_reward_avg)
saved_translation_reward_avgs.append(translation_reward_avg)
saved_rotation_reward_avgs.append(rotation_reward_avg)
idx = i + 0 # MUST CHANGE THIS WHEN RESUME TRAINING !!!!!!!!!
print("------------------------------------------- Iter ", idx, " -----------------------------------------")
# save model each 10 iterations, add the rewards in the 10 iterations to the .txt files ---
if idx % 10 == 0 and idx != 0:
print('--- saving models ---')
ppo.save(idx, filename=args.model_name)
tot_rewards_name = args.model_name + 'rewards.txt'
tra_rewards_name = args.model_name + 'tra_rewards.txt'
rot_rewards_name = args.model_name + 'tor_rewards.txt'
if idx == 10:
print('--- creating new reward.txt file ---') #
with open(tot_rewards_name, "wb") as fp: # Pickling
pickle.dump(saved_tot_reward_avgs, fp)
print('rewards in the newly created reward.txt:', saved_tot_reward_avgs)
with open(tra_rewards_name, "wb") as fp: # Pickling
pickle.dump(saved_translation_reward_avgs, fp)
print('rewards in the newly created tra_reward.txt:', saved_translation_reward_avgs)
with open(rot_rewards_name, "wb") as fp: # Pickling
pickle.dump(saved_rotation_reward_avgs, fp)
print('rewards in the newly created rot_reward.txt:', saved_rotation_reward_avgs)
else:
print('--- updating reward.txt file ---')
with open(tot_rewards_name, "rb") as fp: # Unpickling
prev_r = pickle.load(fp)
tot_r = prev_r + saved_tot_reward_avgs
with open(tot_rewards_name, "wb") as fp: # Pickling
pickle.dump(tot_r, fp)
# print('rewards:', tot_r)
saved_tot_reward_avgs = []
print('--- updating tra_reward.txt file ---')
with open(tra_rewards_name, "rb") as fp: # Unpickling
prev_r = pickle.load(fp)
tra_r = prev_r + saved_translation_reward_avgs
with open(tra_rewards_name, "wb") as fp: # Pickling
pickle.dump(tra_r, fp)
# print('rewards:', tot_r)
saved_translation_reward_avgs = []
print('--- updating rot_reward.txt file ---')
with open(rot_rewards_name, "rb") as fp: # Unpickling
prev_r = pickle.load(fp)
rot_r = prev_r + saved_rotation_reward_avgs
with open(rot_rewards_name, "wb") as fp: # Pickling
pickle.dump(rot_r, fp)
# print('rewards:', tot_r)
saved_rotation_reward_avgs = []
# plot the rewards each 5 iterations ---
if idx % 5 == 0:
if idx >= 10:
with open(tot_rewards_name, "rb") as fp: # Unpickling
prev_r = pickle.load(fp)
tot_r = prev_r + tot_reward_avgs
with open(tra_rewards_name, "rb") as fp: # Unpickling
prev_r = pickle.load(fp)
tra_r = prev_r + translation_reward_avgs
with open(rot_rewards_name, "rb") as fp: # Unpickling
prev_r = pickle.load(fp)
rot_r = prev_r + rotation_reward_avgs
tot_reward_avgs = []
translation_reward_avgs = []
rotation_reward_avgs = []
else:
pass
tot_r = tot_reward_avgs
tra_r = translation_reward_avgs
rot_r = rotation_reward_avgs
iter = list(range(len(tot_r)))
plt.plot(iter, tot_r)
plt.plot(iter, tra_r)
plt.plot(iter, rot_r)
plt.legend(['total_r', 'translation_r', 'rotation_r'], loc='upper left')
plt.savefig(args.model_name + 'plot.png')
plt.show()
pr.shutdown() # shut down the simulator
if __name__ == '__main__':
# print('make sure to execute: [export OMP_NUM_THREADS=1] already.')
main()