Spaces:

ATB
/

AI-trade-bot-demo

Runtime error

App Files Files Community

ttn0011 commited on Feb 12, 2023

Commit

7bfc81f

1 Parent(s): a4539de

fix env

Browse files

Files changed (1) hide show

rl_agent/env.py +24 -30

rl_agent/env.py CHANGED Viewed

@@ -3,50 +3,39 @@ import pandas as pd
 class Environment:
-    def __init__(self, data, history_t=90):
         self.data = data
         self.history_t = history_t
         self.reset()
     def reset(self):
         self.t = 0
         self.done = False
         self.profits = 0
-        self.positions = []
-        self.position_value = 0
-        self.history = [0 for _ in range(self.history_t)]
         return [self.position_value] + self.history # obs
     def step(self, act):
         reward = 0
-        # act = 0: stay, 1: buy, -1: sell
-        if act == 1:
-            self.positions.append(self.data.iloc[self.t, :]['Close'])
-        elif act == 2: # sell
-            if len(self.positions) == 0:
-                reward = -1
-            else:
-                profits = 0
-                for p in self.positions:
-                    profits += (self.data.iloc[self.t, :]['Close'] - p)
-                reward += profits
-                self.profits += profits
-                self.positions = []
         # set next time
         self.t += 1
-        self.position_value = 0
-        for p in self.positions:
-            self.position_value += (self.data.iloc[self.t, :]['Close'] - p)
-        self.history.pop(0)
-        self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close'])
-        # clipping reward
-        if reward > 0:
-            reward = 1
-        elif reward < 0:
-            reward = -1
         return [self.position_value] + self.history, reward, self.done # obs, reward, done
@@ -64,9 +53,14 @@ if __name__ == "__main__":
     test = data[date_split:]
     print(train.head(10))
-    env = Environment(train)
     print(env.reset())
-    for _ in range(3):
         pact = np.random.randint(3)
-        print(env.step(pact))

 class Environment:
+    def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9]):
         self.data = data
+        self.history = history
         self.history_t = history_t
+        self.cost_rate = 0.0001
         self.reset()
     def reset(self):
         self.t = 0
         self.done = False
         self.profits = 0
+        self.position_value = 0.
+        self.history = self.history[:7]
         return [self.position_value] + self.history # obs
     def step(self, act):
         reward = 0
+        # act = 0: stay, act > 0: buy, act < 0: sell
+        #Additive profits
+        cost_amount = np.abs(act-self.position_value)
+        Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
+        reward = (self.position_value * Zt) - (self.cost_rate * cost_amount)
+        profit = self.position_value * Zt
+        self.profits += profit
         # set next time
         self.t += 1
+        self.position_value = act
+        self.history.pop(0)
+        self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']) # the price being traded
         return [self.position_value] + self.history, reward, self.done # obs, reward, done
     test = data[date_split:]
     print(train.head(10))
+    history = []
+    for i in range(1, 9):
+        c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
+        history.append(c)
+    env = Environment(train, history=history)
     print(env.reset())
+    for _ in range(9, 12):
         pact = np.random.randint(3)
+        print(env.step(pact)[1])