Spaces:
Runtime error
Runtime error
Commit
·
53b9021
1
Parent(s):
c3e8fb1
Visualize total profits
Browse files- app.py +30 -16
- rl_agent/env.py +4 -1
- rl_agent/policy.py +0 -9
- rl_agent/utils.py +0 -5
app.py
CHANGED
|
@@ -23,6 +23,10 @@ def get_profit():
|
|
| 23 |
return profit
|
| 24 |
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def pretrain_rl_agent():
|
| 27 |
global equity
|
| 28 |
observations = env_train.reset()
|
|
@@ -31,7 +35,7 @@ def pretrain_rl_agent():
|
|
| 31 |
observations = torch.as_tensor(observations).float()
|
| 32 |
action = agent(observations)
|
| 33 |
observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
|
| 34 |
-
reward *= 1e3
|
| 35 |
|
| 36 |
action.backward()
|
| 37 |
|
|
@@ -88,17 +92,19 @@ dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in list(dt
|
|
| 88 |
|
| 89 |
df_data_test = df_data[df_data['Date'].dt.year == test_year]
|
| 90 |
df_data_train = df_data[df_data['Date'].dt.year != test_year]
|
|
|
|
|
|
|
| 91 |
# ----------------------------------------------------------------------------------------------------------------------
|
| 92 |
|
| 93 |
# ----------------------------------------------------------------------------------------------------------------------
|
| 94 |
# For RL Agent
|
| 95 |
# ----------------------------------------------------------------------------------------------------------------------
|
| 96 |
data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
|
| 97 |
-
data = data.
|
| 98 |
data = data.set_index('Local time')
|
| 99 |
-
date_split = '
|
| 100 |
|
| 101 |
-
learning_rate = 0.
|
| 102 |
first_momentum = 0.0
|
| 103 |
second_momentum = 0.0001
|
| 104 |
transaction_cost = 0.0001
|
|
@@ -107,7 +113,7 @@ state_size = 15
|
|
| 107 |
equity = 1.0
|
| 108 |
|
| 109 |
train = data[:date_split]
|
| 110 |
-
test = train.
|
| 111 |
|
| 112 |
# Initialize agent and optimizer
|
| 113 |
agent = Policy(input_channels=state_size)
|
|
@@ -120,7 +126,13 @@ for i in range(1, state_size):
|
|
| 120 |
|
| 121 |
# Initialize train and test environments
|
| 122 |
env_train = Environment(train, history=history, state_size=state_size)
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
model_gradients_history = dict()
|
| 126 |
checkpoint = OrderedDict()
|
|
@@ -149,8 +161,8 @@ def trading_plot():
|
|
| 149 |
action, observations = make_prediction(observations)
|
| 150 |
actions.append(action.item())
|
| 151 |
position = statistics.mean(actions)
|
| 152 |
-
profit += -1.0 * (last_observation - observations[-1]) * position
|
| 153 |
-
|
| 154 |
else:
|
| 155 |
df_data_train = df_data
|
| 156 |
|
|
@@ -188,8 +200,8 @@ def trading_plot():
|
|
| 188 |
|
| 189 |
# The UI of the demo defines here.
|
| 190 |
with gr.Blocks() as demo:
|
| 191 |
-
gr.Markdown("Auto
|
| 192 |
-
|
| 193 |
|
| 194 |
dt = gr.Textbox(label="Total profit")
|
| 195 |
demo.queue().load(get_profit, inputs=None, outputs=dt, every=1)
|
|
@@ -200,18 +212,20 @@ with gr.Blocks() as demo:
|
|
| 200 |
|
| 201 |
with gr.Row():
|
| 202 |
with gr.Column():
|
| 203 |
-
gr.Markdown("User Interactive panel
|
| 204 |
amount = gr.components.Textbox(value="", label="Amount", interactive=True)
|
| 205 |
with gr.Row():
|
| 206 |
buy_btn = gr.components.Button("Buy", label="Buy", interactive=True, inputs=[amount])
|
| 207 |
sell_btn = gr.components.Button("Sell", label="Sell", interactive=True, inputs=[amount])
|
| 208 |
hold_btn = gr.components.Button("Hold", label="Hold", interactive=True, inputs=[amount])
|
|
|
|
| 209 |
with gr.Column():
|
| 210 |
-
gr.Markdown("Trade bot history
|
| 211 |
-
|
| 212 |
-
trade_bot_table = gr.Dataframe(
|
| 213 |
-
#
|
| 214 |
-
|
|
|
|
| 215 |
|
| 216 |
demo.launch()
|
| 217 |
|
|
|
|
| 23 |
return profit
|
| 24 |
|
| 25 |
|
| 26 |
+
# def update_table():
|
| 27 |
+
# global
|
| 28 |
+
|
| 29 |
+
|
| 30 |
def pretrain_rl_agent():
|
| 31 |
global equity
|
| 32 |
observations = env_train.reset()
|
|
|
|
| 35 |
observations = torch.as_tensor(observations).float()
|
| 36 |
action = agent(observations)
|
| 37 |
observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
|
| 38 |
+
# reward *= 1e3
|
| 39 |
|
| 40 |
action.backward()
|
| 41 |
|
|
|
|
| 92 |
|
| 93 |
df_data_test = df_data[df_data['Date'].dt.year == test_year]
|
| 94 |
df_data_train = df_data[df_data['Date'].dt.year != test_year]
|
| 95 |
+
|
| 96 |
+
df_data_train_viz = pd.DataFrame(columns=["Action", "Amount", "Profit"])
|
| 97 |
# ----------------------------------------------------------------------------------------------------------------------
|
| 98 |
|
| 99 |
# ----------------------------------------------------------------------------------------------------------------------
|
| 100 |
# For RL Agent
|
| 101 |
# ----------------------------------------------------------------------------------------------------------------------
|
| 102 |
data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
|
| 103 |
+
data = data.head(600000)
|
| 104 |
data = data.set_index('Local time')
|
| 105 |
+
date_split = '31.01.2022 03:29:00.000 GMT-0600'
|
| 106 |
|
| 107 |
+
learning_rate = 0.001
|
| 108 |
first_momentum = 0.0
|
| 109 |
second_momentum = 0.0001
|
| 110 |
transaction_cost = 0.0001
|
|
|
|
| 113 |
equity = 1.0
|
| 114 |
|
| 115 |
train = data[:date_split]
|
| 116 |
+
test = pd.concat([train.tail(state_size), data[date_split:]])
|
| 117 |
|
| 118 |
# Initialize agent and optimizer
|
| 119 |
agent = Policy(input_channels=state_size)
|
|
|
|
| 126 |
|
| 127 |
# Initialize train and test environments
|
| 128 |
env_train = Environment(train, history=history, state_size=state_size)
|
| 129 |
+
|
| 130 |
+
history = []
|
| 131 |
+
for i in range(1, state_size):
|
| 132 |
+
c = test.iloc[i, :]['Close'] - test.iloc[i - 1, :]['Close']
|
| 133 |
+
history.append(c)
|
| 134 |
+
|
| 135 |
+
env_test = Environment(test, history=history, state_size=state_size)
|
| 136 |
|
| 137 |
model_gradients_history = dict()
|
| 138 |
checkpoint = OrderedDict()
|
|
|
|
| 161 |
action, observations = make_prediction(observations)
|
| 162 |
actions.append(action.item())
|
| 163 |
position = statistics.mean(actions)
|
| 164 |
+
# profit += -1.0 * (last_observation - observations[-1]) * position
|
| 165 |
+
profit = env_test.profits
|
| 166 |
else:
|
| 167 |
df_data_train = df_data
|
| 168 |
|
|
|
|
| 200 |
|
| 201 |
# The UI of the demo defines here.
|
| 202 |
with gr.Blocks() as demo:
|
| 203 |
+
gr.Markdown("Auto AI Trading Bot")
|
| 204 |
+
gr.Markdown(f"Investment: $100,000")
|
| 205 |
|
| 206 |
dt = gr.Textbox(label="Total profit")
|
| 207 |
demo.queue().load(get_profit, inputs=None, outputs=dt, every=1)
|
|
|
|
| 212 |
|
| 213 |
with gr.Row():
|
| 214 |
with gr.Column():
|
| 215 |
+
gr.Markdown("User Interactive panel")
|
| 216 |
amount = gr.components.Textbox(value="", label="Amount", interactive=True)
|
| 217 |
with gr.Row():
|
| 218 |
buy_btn = gr.components.Button("Buy", label="Buy", interactive=True, inputs=[amount])
|
| 219 |
sell_btn = gr.components.Button("Sell", label="Sell", interactive=True, inputs=[amount])
|
| 220 |
hold_btn = gr.components.Button("Hold", label="Hold", interactive=True, inputs=[amount])
|
| 221 |
+
|
| 222 |
with gr.Column():
|
| 223 |
+
gr.Markdown("Trade bot history")
|
| 224 |
+
|
| 225 |
+
# trade_bot_table = gr.Dataframe(df_data_train_viz)
|
| 226 |
+
# demo.queue().load(update_table, inputs=None, outputs=trade_bot_table, every=1)
|
| 227 |
+
# Show trade box history in a table or something
|
| 228 |
+
# gr.components.Textbox(value="Some history? Need to decide how to show bot history", label="History", interactive=True)
|
| 229 |
|
| 230 |
demo.launch()
|
| 231 |
|
rl_agent/env.py
CHANGED
|
@@ -21,10 +21,13 @@ class Environment:
|
|
| 21 |
return [self.position_value] + self.history # obs
|
| 22 |
|
| 23 |
def step(self, act):
|
|
|
|
|
|
|
|
|
|
| 24 |
reward = 0
|
| 25 |
|
| 26 |
# act = 0: stay, act > 0: buy, act < 0: sell
|
| 27 |
-
#Additive profits
|
| 28 |
cost_amount = np.abs(act-self.position_value)
|
| 29 |
|
| 30 |
Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
|
|
|
|
| 21 |
return [self.position_value] + self.history # obs
|
| 22 |
|
| 23 |
def step(self, act):
|
| 24 |
+
# if self.t == len(self.data) - 1:
|
| 25 |
+
# return self.history + [0], 0, False
|
| 26 |
+
|
| 27 |
reward = 0
|
| 28 |
|
| 29 |
# act = 0: stay, act > 0: buy, act < 0: sell
|
| 30 |
+
# Additive profits
|
| 31 |
cost_amount = np.abs(act-self.position_value)
|
| 32 |
|
| 33 |
Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
|
rl_agent/policy.py
CHANGED
|
@@ -1,27 +1,18 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
import torch
|
| 3 |
import torch.nn as nn
|
| 4 |
|
| 5 |
|
| 6 |
class Policy(nn.Module):
|
| 7 |
def __init__(self, input_channels=8):
|
| 8 |
-
|
| 9 |
super(Policy, self).__init__()
|
| 10 |
-
|
| 11 |
self.layer1 = nn.Linear(input_channels, 1)
|
| 12 |
self.tanh1 = nn.Tanh()
|
| 13 |
# self.layer2 = nn.Linear(2 * input_channels, 1)
|
| 14 |
# self.tanh2 = nn.Tanh()
|
| 15 |
|
| 16 |
def forward(self, state):
|
| 17 |
-
|
| 18 |
hidden = self.layer1(state)
|
| 19 |
hidden = self.tanh1(hidden)
|
| 20 |
# hidden = self.layer2(hidden)
|
| 21 |
# action = self.tanh2(hidden)
|
| 22 |
|
| 23 |
return hidden
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
| 1 |
import torch.nn as nn
|
| 2 |
|
| 3 |
|
| 4 |
class Policy(nn.Module):
|
| 5 |
def __init__(self, input_channels=8):
|
|
|
|
| 6 |
super(Policy, self).__init__()
|
|
|
|
| 7 |
self.layer1 = nn.Linear(input_channels, 1)
|
| 8 |
self.tanh1 = nn.Tanh()
|
| 9 |
# self.layer2 = nn.Linear(2 * input_channels, 1)
|
| 10 |
# self.tanh2 = nn.Tanh()
|
| 11 |
|
| 12 |
def forward(self, state):
|
|
|
|
| 13 |
hidden = self.layer1(state)
|
| 14 |
hidden = self.tanh1(hidden)
|
| 15 |
# hidden = self.layer2(hidden)
|
| 16 |
# action = self.tanh2(hidden)
|
| 17 |
|
| 18 |
return hidden
|
|
|
|
|
|
|
|
|
|
|
|
rl_agent/utils.py
CHANGED
|
@@ -11,16 +11,11 @@ class myOptimizer():
|
|
| 11 |
self.transaction_cost = transaction_cost
|
| 12 |
|
| 13 |
def step(self, grad_n, reward, last_observation, last_gradient):
|
| 14 |
-
|
| 15 |
numerator = self.mu_square - (self.mu * reward)
|
| 16 |
denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
|
| 17 |
-
|
| 18 |
gradient = numerator / denominator
|
| 19 |
-
|
| 20 |
current_grad = (-1.0 * self.transaction_cost * grad_n)
|
| 21 |
-
|
| 22 |
previous_grad = (last_observation + self.transaction_cost) * last_gradient
|
| 23 |
-
|
| 24 |
gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
|
| 25 |
|
| 26 |
return torch.as_tensor(self.lr * gradient)
|
|
|
|
| 11 |
self.transaction_cost = transaction_cost
|
| 12 |
|
| 13 |
def step(self, grad_n, reward, last_observation, last_gradient):
|
|
|
|
| 14 |
numerator = self.mu_square - (self.mu * reward)
|
| 15 |
denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
|
|
|
|
| 16 |
gradient = numerator / denominator
|
|
|
|
| 17 |
current_grad = (-1.0 * self.transaction_cost * grad_n)
|
|
|
|
| 18 |
previous_grad = (last_observation + self.transaction_cost) * last_gradient
|
|
|
|
| 19 |
gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
|
| 20 |
|
| 21 |
return torch.as_tensor(self.lr * gradient)
|