-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFuturesEnv.py
141 lines (126 loc) · 5.78 KB
/
FuturesEnv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/python3
import pickle;
import numpy as np;
import tensorflow as tf;
from tf_agents.environments import py_environment;
from tf_agents.environments import tf_environment;
from tf_agents.environments import tf_py_environment;
from tf_agents.environments import utils;
from tf_agents.specs import array_spec;
from tf_agents.environments import wrappers;
from tf_agents.environments import suite_gym;
from tf_agents.trajectories import time_step as ts;
class FuturesEnv(py_environment.PyEnvironment):
def __init__(self, capital = 10000.0, dataset = 'dataset.pkl'):
assert type(capital) is float and capital > 0;
self._action_spec = array_spec.BoundedArraySpec(
(3,),
dtype = np.int32,
# transaction: sell(0), buy(1), none(2)
# lever: n * 0.01; sell(n>0), buy(n<0), none(n=0)
# stop-profit price: when buy: price + n * 0.01; when sell: price - n * 0.01
# stop-loss price: when buy: price - n * 0.01; when sell: price + n * 0.01
minimum = [-100, 1, 1],
maximum = [100, 10000, 10000],
name = 'action');
self._observation_spec = array_spec.BoundedArraySpec(
(2,),
dtype = np.float32,
# sell price: n
# buy price: n
minimum = [0, 0],
maximum = [np.inf, np.inf],
name = 'observation');
self._positions = list();
self._profit = 0;
self._episode_ended = False;
# customized member
self.capital = capital;
with open(dataset, 'rb') as f:
dataset = pickle.loads(f.read());
self.dataset = dataset;
self.index = np.random.randint(low = 0, high = 100000, size = ());
def action_spec(self):
return self._action_spec;
def observation_spec(self):
return self._observation_spec;
def _reset(self):
self._positions = list();
self._profit = 0;
self._episode_ended = False;
# customized member
self.index = np.random.randint(low = 0, high = 100000, size = ());
return ts.restart((self.dataset[self.index] ,self._profit));
def _step(self, action):
# 1) reset condition
if self._episode_ended:
return self.reset();
# 2) state transition
sell_price = self.dataset[self.index, 0];
buy_price = self.dataset[self.index, 1];
# add position into state
if action[0] != 0:
# sell or buy
assert sell_price <= buy_price;
if action[0] > 0:
# when sell, stop profit/loss price for buy price
stop_profit_price = buy_price - 0.01 * action[1];
stop_loss_price = buy_price + 0.01 * action[2];
# (sell, lever scale, sell price, stop_profit_price, stop_loss_price)
self._state.append({'lever': action[0], 'position': sell_price, 'stop profit price': stop_profit_price, 'stop loss price': stop_loss_price});
else:
# when buy, stop profit/loss price for sell price
stop_profit_price = sell_price + 0.01 * action[1];
stop_loss_price = sell_price - 0.01 * action[2];
# (buy, lever scale, buy price, stop_profit_price, stop_loss_price)
self._state.append({'lever': action[0], 'position': buy_price, 'stop profit price': stop_profit_price, 'stop loss price': stop_loss_price});
# check whether a position need to be closed out
left_positions = list();
unsettled_profit = 0;
for position in self._positions:
stop_profit_price = position['stop profit price'];
stop_loss_price = position['stop loss price'];
if position['lever'] > 0:
# sell
prev_sell_price = position['position'];
if buy_price <= stop_profit_price or buy_price >= stop_loss_price:
# settled profit by short selling
self._profit += abs(position['lever']) * (prev_sell_price - buy_price);
close_out = True;
else:
# unsettled profit by short selling
unsettled_profit += abs(position['lever']) * (prev_sell_price - buy_price);
close_out = False;
elif position['lever'] < 0:
# buy
prev_buy_price = position['position'];
if sell_price >= stop_profit_price or sell_price <= stop_loss_price:
# settled profit by going long
self._profit += abs(position['lever']) * (sell_price - prev_buy_price);
close_out = True;
else:
# unsettled profit by going long
unsettled_profit += abs(position['lever']) * (sell_price - prev_buy_price);
close_out = False;
else:
raise "invalid action!";
if close_out == False:
left_positions.append(position);
self._positions = left_positions;
# 3) end episode conditions
if self.index == len(self.dataset):
# end of dataset
self._episode_ended = True;
if self.capital + self._profit + unsettled_profit <= 0:
# mandatory liquidation
self._episode_ended = True;
# 4) update dataset iterator
self.index += 1;
# 5) return
if self._episode_ended:
return ts.termination(self.dataset[self.index], self._profit + unsettled_profit);
else:
return ts.transition(self.dataset[self.index], reward = self._profit + unsettled_profit, discount = 0.3);
if __name__ == "__main__":
assert True == tf.executing_eagerly();
env = tf_py_environment.TFPyEnvironment(FuturesEnv());