Spaces:

ATB
/

AI-trade-bot-demo

Runtime error

App Files Files Community

pmthangk09 commited on Feb 12, 2023

Commit

53b9021

1 Parent(s): c3e8fb1

Visualize total profits

Browse files

Files changed (4) hide show

app.py +30 -16
rl_agent/env.py +4 -1
rl_agent/policy.py +0 -9
rl_agent/utils.py +0 -5

app.py CHANGED Viewed

@@ -23,6 +23,10 @@ def get_profit():
     return profit
 def pretrain_rl_agent():
     global equity
     observations = env_train.reset()
@@ -31,7 +35,7 @@ def pretrain_rl_agent():
         observations = torch.as_tensor(observations).float()
         action = agent(observations)
         observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
-        reward *= 1e3
         action.backward()
@@ -88,17 +92,19 @@ dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in list(dt
 df_data_test = df_data[df_data['Date'].dt.year == test_year]
 df_data_train = df_data[df_data['Date'].dt.year != test_year]
 # ----------------------------------------------------------------------------------------------------------------------
 # ----------------------------------------------------------------------------------------------------------------------
 #   For RL Agent
 # ----------------------------------------------------------------------------------------------------------------------
 data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
-data = data.tail(50000)
 data = data.set_index('Local time')
-date_split = '01.01.2023 16:04:00.000 GMT-0600'
-learning_rate = 0.01
 first_momentum = 0.0
 second_momentum = 0.0001
 transaction_cost = 0.0001
@@ -107,7 +113,7 @@ state_size = 15
 equity = 1.0
 train = data[:date_split]
-test = train.iloc[-(state_size - 1)] + data[date_split:]
 # Initialize agent and optimizer
 agent = Policy(input_channels=state_size)
@@ -120,7 +126,13 @@ for i in range(1, state_size):
 # Initialize train and test environments
 env_train = Environment(train, history=history, state_size=state_size)
-env_test = Environment(test, history=env_train.history, state_size=state_size)
 model_gradients_history = dict()
 checkpoint = OrderedDict()
@@ -149,8 +161,8 @@ def trading_plot():
             action, observations = make_prediction(observations)
             actions.append(action.item())
         position = statistics.mean(actions)
-        profit += -1.0 * (last_observation - observations[-1]) * position
     else:
         df_data_train = df_data
@@ -188,8 +200,8 @@ def trading_plot():
 # The UI of the demo defines here.
 with gr.Blocks() as demo:
-    gr.Markdown("Auto trade bot.")
-    # gr.Markdown(f"Profit: {profit}")
     dt = gr.Textbox(label="Total profit")
     demo.queue().load(get_profit, inputs=None, outputs=dt, every=1)
@@ -200,18 +212,20 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            gr.Markdown("User Interactive panel.")
             amount = gr.components.Textbox(value="", label="Amount", interactive=True)
             with gr.Row():
                 buy_btn = gr.components.Button("Buy", label="Buy", interactive=True, inputs=[amount])
                 sell_btn = gr.components.Button("Sell", label="Sell", interactive=True, inputs=[amount])
                 hold_btn = gr.components.Button("Hold", label="Hold", interactive=True, inputs=[amount])
         with gr.Column():
-            gr.Markdown("Trade bot history.")
-            df_data_train = pd.DataFrame(columns=["Action", "Amount", "Profit"])
-            trade_bot_table = gr.Dataframe(df_data_train)
-            # show trade box history in a table or something
-            gr.components.Textbox(value="Some history? Need to decide how to show bot history", label="History", interactive=True)
 demo.launch()

     return profit
+# def update_table():
+#     global
 def pretrain_rl_agent():
     global equity
     observations = env_train.reset()
         observations = torch.as_tensor(observations).float()
         action = agent(observations)
         observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
+        # reward *= 1e3
         action.backward()
 df_data_test = df_data[df_data['Date'].dt.year == test_year]
 df_data_train = df_data[df_data['Date'].dt.year != test_year]
+df_data_train_viz = pd.DataFrame(columns=["Action", "Amount", "Profit"])
 # ----------------------------------------------------------------------------------------------------------------------
 # ----------------------------------------------------------------------------------------------------------------------
 #   For RL Agent
 # ----------------------------------------------------------------------------------------------------------------------
 data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
+data = data.head(600000)
 data = data.set_index('Local time')
+date_split = '31.01.2022 03:29:00.000 GMT-0600'
+learning_rate = 0.001
 first_momentum = 0.0
 second_momentum = 0.0001
 transaction_cost = 0.0001
 equity = 1.0
 train = data[:date_split]
+test = pd.concat([train.tail(state_size), data[date_split:]])
 # Initialize agent and optimizer
 agent = Policy(input_channels=state_size)
 # Initialize train and test environments
 env_train = Environment(train, history=history, state_size=state_size)
+history = []
+for i in range(1, state_size):
+    c = test.iloc[i, :]['Close'] - test.iloc[i - 1, :]['Close']
+    history.append(c)
+env_test = Environment(test, history=history, state_size=state_size)
 model_gradients_history = dict()
 checkpoint = OrderedDict()
             action, observations = make_prediction(observations)
             actions.append(action.item())
         position = statistics.mean(actions)
+        # profit += -1.0 * (last_observation - observations[-1]) * position
+        profit = env_test.profits
     else:
         df_data_train = df_data
 # The UI of the demo defines here.
 with gr.Blocks() as demo:
+    gr.Markdown("Auto AI Trading Bot")
+    gr.Markdown(f"Investment: $100,000")
     dt = gr.Textbox(label="Total profit")
     demo.queue().load(get_profit, inputs=None, outputs=dt, every=1)
     with gr.Row():
         with gr.Column():
+            gr.Markdown("User Interactive panel")
             amount = gr.components.Textbox(value="", label="Amount", interactive=True)
             with gr.Row():
                 buy_btn = gr.components.Button("Buy", label="Buy", interactive=True, inputs=[amount])
                 sell_btn = gr.components.Button("Sell", label="Sell", interactive=True, inputs=[amount])
                 hold_btn = gr.components.Button("Hold", label="Hold", interactive=True, inputs=[amount])
         with gr.Column():
+            gr.Markdown("Trade bot history")
+            # trade_bot_table = gr.Dataframe(df_data_train_viz)
+            # demo.queue().load(update_table, inputs=None, outputs=trade_bot_table, every=1)
+            # Show trade box history in a table or something
+            # gr.components.Textbox(value="Some history? Need to decide how to show bot history", label="History", interactive=True)
 demo.launch()

rl_agent/env.py CHANGED Viewed

@@ -21,10 +21,13 @@ class Environment:
         return [self.position_value] + self.history # obs
     def step(self, act):
         reward = 0
         # act = 0: stay, act > 0: buy, act < 0: sell
-        #Additive profits
         cost_amount = np.abs(act-self.position_value)
         Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']

         return [self.position_value] + self.history # obs
     def step(self, act):
+        # if self.t == len(self.data) - 1:
+        #     return self.history + [0], 0, False
         reward = 0
         # act = 0: stay, act > 0: buy, act < 0: sell
+        # Additive profits
         cost_amount = np.abs(act-self.position_value)
         Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']

rl_agent/policy.py CHANGED Viewed

@@ -1,27 +1,18 @@
-import numpy as np
-import torch
 import torch.nn as nn
 class Policy(nn.Module):
     def __init__(self, input_channels=8):
         super(Policy, self).__init__()
         self.layer1 = nn.Linear(input_channels, 1)
         self.tanh1   = nn.Tanh()
         # self.layer2 = nn.Linear(2 * input_channels, 1)
         # self.tanh2 = nn.Tanh()
     def forward(self, state):
         hidden = self.layer1(state)
         hidden = self.tanh1(hidden)
         # hidden = self.layer2(hidden)
         # action = self.tanh2(hidden)
         return hidden

 import torch.nn as nn
 class Policy(nn.Module):
     def __init__(self, input_channels=8):
         super(Policy, self).__init__()
         self.layer1 = nn.Linear(input_channels, 1)
         self.tanh1   = nn.Tanh()
         # self.layer2 = nn.Linear(2 * input_channels, 1)
         # self.tanh2 = nn.Tanh()
     def forward(self, state):
         hidden = self.layer1(state)
         hidden = self.tanh1(hidden)
         # hidden = self.layer2(hidden)
         # action = self.tanh2(hidden)
         return hidden

rl_agent/utils.py CHANGED Viewed

@@ -11,16 +11,11 @@ class myOptimizer():
         self.transaction_cost = transaction_cost
     def step(self, grad_n, reward, last_observation, last_gradient):
         numerator = self.mu_square - (self.mu * reward)
         denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
         gradient = numerator / denominator
         current_grad = (-1.0 * self.transaction_cost * grad_n)
         previous_grad = (last_observation + self.transaction_cost) * last_gradient
         gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
         return torch.as_tensor(self.lr * gradient)

         self.transaction_cost = transaction_cost
     def step(self, grad_n, reward, last_observation, last_gradient):
         numerator = self.mu_square - (self.mu * reward)
         denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
         gradient = numerator / denominator
         current_grad = (-1.0 * self.transaction_cost * grad_n)
         previous_grad = (last_observation + self.transaction_cost) * last_gradient
         gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
         return torch.as_tensor(self.lr * gradient)