ttn0011 commited on
Commit
7bfc81f
·
1 Parent(s): a4539de
Files changed (1) hide show
  1. rl_agent/env.py +24 -30
rl_agent/env.py CHANGED
@@ -3,50 +3,39 @@ import pandas as pd
3
 
4
  class Environment:
5
 
6
- def __init__(self, data, history_t=90):
7
  self.data = data
 
8
  self.history_t = history_t
 
9
  self.reset()
10
 
11
  def reset(self):
12
  self.t = 0
13
  self.done = False
14
  self.profits = 0
15
- self.positions = []
16
- self.position_value = 0
17
- self.history = [0 for _ in range(self.history_t)]
18
  return [self.position_value] + self.history # obs
19
 
20
  def step(self, act):
21
  reward = 0
22
 
23
- # act = 0: stay, 1: buy, -1: sell
24
- if act == 1:
25
- self.positions.append(self.data.iloc[self.t, :]['Close'])
26
- elif act == 2: # sell
27
- if len(self.positions) == 0:
28
- reward = -1
29
- else:
30
- profits = 0
31
- for p in self.positions:
32
- profits += (self.data.iloc[self.t, :]['Close'] - p)
33
- reward += profits
34
- self.profits += profits
35
- self.positions = []
36
 
 
 
 
 
 
37
  # set next time
38
  self.t += 1
39
- self.position_value = 0
40
- for p in self.positions:
41
- self.position_value += (self.data.iloc[self.t, :]['Close'] - p)
42
- self.history.pop(0)
43
- self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close'])
44
 
45
- # clipping reward
46
- if reward > 0:
47
- reward = 1
48
- elif reward < 0:
49
- reward = -1
50
 
51
  return [self.position_value] + self.history, reward, self.done # obs, reward, done
52
 
@@ -64,9 +53,14 @@ if __name__ == "__main__":
64
  test = data[date_split:]
65
  print(train.head(10))
66
 
67
- env = Environment(train)
 
 
 
 
 
68
  print(env.reset())
69
- for _ in range(3):
70
  pact = np.random.randint(3)
71
- print(env.step(pact))
72
 
 
3
 
4
  class Environment:
5
 
6
+ def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9]):
7
  self.data = data
8
+ self.history = history
9
  self.history_t = history_t
10
+ self.cost_rate = 0.0001
11
  self.reset()
12
 
13
  def reset(self):
14
  self.t = 0
15
  self.done = False
16
  self.profits = 0
17
+ self.position_value = 0.
18
+ self.history = self.history[:7]
 
19
  return [self.position_value] + self.history # obs
20
 
21
  def step(self, act):
22
  reward = 0
23
 
24
+ # act = 0: stay, act > 0: buy, act < 0: sell
25
+ #Additive profits
26
+ cost_amount = np.abs(act-self.position_value)
 
 
 
 
 
 
 
 
 
 
27
 
28
+ Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
29
+ reward = (self.position_value * Zt) - (self.cost_rate * cost_amount)
30
+ profit = self.position_value * Zt
31
+ self.profits += profit
32
+
33
  # set next time
34
  self.t += 1
35
+ self.position_value = act
 
 
 
 
36
 
37
+ self.history.pop(0)
38
+ self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']) # the price being traded
 
 
 
39
 
40
  return [self.position_value] + self.history, reward, self.done # obs, reward, done
41
 
 
53
  test = data[date_split:]
54
  print(train.head(10))
55
 
56
+ history = []
57
+ for i in range(1, 9):
58
+ c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
59
+ history.append(c)
60
+
61
+ env = Environment(train, history=history)
62
  print(env.reset())
63
+ for _ in range(9, 12):
64
  pact = np.random.randint(3)
65
+ print(env.step(pact)[1])
66