Spaces:
Runtime error
Runtime error
fix env
Browse files- rl_agent/env.py +24 -30
rl_agent/env.py
CHANGED
@@ -3,50 +3,39 @@ import pandas as pd
|
|
3 |
|
4 |
class Environment:
|
5 |
|
6 |
-
def __init__(self, data, history_t=
|
7 |
self.data = data
|
|
|
8 |
self.history_t = history_t
|
|
|
9 |
self.reset()
|
10 |
|
11 |
def reset(self):
|
12 |
self.t = 0
|
13 |
self.done = False
|
14 |
self.profits = 0
|
15 |
-
self.
|
16 |
-
self.
|
17 |
-
self.history = [0 for _ in range(self.history_t)]
|
18 |
return [self.position_value] + self.history # obs
|
19 |
|
20 |
def step(self, act):
|
21 |
reward = 0
|
22 |
|
23 |
-
# act = 0: stay,
|
24 |
-
|
25 |
-
|
26 |
-
elif act == 2: # sell
|
27 |
-
if len(self.positions) == 0:
|
28 |
-
reward = -1
|
29 |
-
else:
|
30 |
-
profits = 0
|
31 |
-
for p in self.positions:
|
32 |
-
profits += (self.data.iloc[self.t, :]['Close'] - p)
|
33 |
-
reward += profits
|
34 |
-
self.profits += profits
|
35 |
-
self.positions = []
|
36 |
|
|
|
|
|
|
|
|
|
|
|
37 |
# set next time
|
38 |
self.t += 1
|
39 |
-
self.position_value =
|
40 |
-
for p in self.positions:
|
41 |
-
self.position_value += (self.data.iloc[self.t, :]['Close'] - p)
|
42 |
-
self.history.pop(0)
|
43 |
-
self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close'])
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
reward = 1
|
48 |
-
elif reward < 0:
|
49 |
-
reward = -1
|
50 |
|
51 |
return [self.position_value] + self.history, reward, self.done # obs, reward, done
|
52 |
|
@@ -64,9 +53,14 @@ if __name__ == "__main__":
|
|
64 |
test = data[date_split:]
|
65 |
print(train.head(10))
|
66 |
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
68 |
print(env.reset())
|
69 |
-
for _ in range(
|
70 |
pact = np.random.randint(3)
|
71 |
-
print(env.step(pact))
|
72 |
|
|
|
3 |
|
4 |
class Environment:
|
5 |
|
6 |
+
def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9]):
|
7 |
self.data = data
|
8 |
+
self.history = history
|
9 |
self.history_t = history_t
|
10 |
+
self.cost_rate = 0.0001
|
11 |
self.reset()
|
12 |
|
13 |
def reset(self):
|
14 |
self.t = 0
|
15 |
self.done = False
|
16 |
self.profits = 0
|
17 |
+
self.position_value = 0.
|
18 |
+
self.history = self.history[:7]
|
|
|
19 |
return [self.position_value] + self.history # obs
|
20 |
|
21 |
def step(self, act):
|
22 |
reward = 0
|
23 |
|
24 |
+
# act = 0: stay, act > 0: buy, act < 0: sell
|
25 |
+
#Additive profits
|
26 |
+
cost_amount = np.abs(act-self.position_value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
|
29 |
+
reward = (self.position_value * Zt) - (self.cost_rate * cost_amount)
|
30 |
+
profit = self.position_value * Zt
|
31 |
+
self.profits += profit
|
32 |
+
|
33 |
# set next time
|
34 |
self.t += 1
|
35 |
+
self.position_value = act
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
self.history.pop(0)
|
38 |
+
self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']) # the price being traded
|
|
|
|
|
|
|
39 |
|
40 |
return [self.position_value] + self.history, reward, self.done # obs, reward, done
|
41 |
|
|
|
53 |
test = data[date_split:]
|
54 |
print(train.head(10))
|
55 |
|
56 |
+
history = []
|
57 |
+
for i in range(1, 9):
|
58 |
+
c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
|
59 |
+
history.append(c)
|
60 |
+
|
61 |
+
env = Environment(train, history=history)
|
62 |
print(env.reset())
|
63 |
+
for _ in range(9, 12):
|
64 |
pact = np.random.randint(3)
|
65 |
+
print(env.step(pact)[1])
|
66 |
|