Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,99 +3,161 @@ import torch
|
|
3 |
import torch.nn as nn
|
4 |
import torch.optim as optim
|
5 |
from torch.utils.data import Dataset, DataLoader
|
6 |
-
from sklearn.model_selection import
|
7 |
from sklearn.preprocessing import StandardScaler
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
#
|
16 |
-
|
17 |
-
data
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
class BankNiftyDataset(Dataset):
|
21 |
-
def __init__(self, data, seq_len,
|
22 |
self.data = data
|
23 |
self.seq_len = seq_len
|
24 |
-
self.
|
25 |
|
26 |
def __len__(self):
|
27 |
return len(self.data) - self.seq_len
|
28 |
|
29 |
def __getitem__(self, idx):
|
30 |
-
seq_data = self.data.iloc[idx:idx+self.seq_len][self.
|
31 |
-
label = self.data['close'].iloc[idx+self.seq_len]
|
32 |
return {
|
33 |
'features': torch.tensor(seq_data, dtype=torch.float32),
|
34 |
'label': torch.tensor(label, dtype=torch.float32)
|
35 |
}
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
train_dataset = BankNiftyDataset(train_data, seq_len, numerical_features)
|
42 |
-
val_dataset = BankNiftyDataset(val_data, seq_len, numerical_features)
|
43 |
-
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
44 |
-
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
|
45 |
-
|
46 |
-
# Define the LSTM-RNN model
|
47 |
-
class LSTMModel(nn.Module):
|
48 |
-
def __init__(self, input_dim, hidden_dim, output_dim):
|
49 |
-
super(LSTMModel, self).__init__()
|
50 |
-
self.hidden_dim = hidden_dim
|
51 |
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, batch_first=True)
|
|
|
|
|
|
|
52 |
self.fc = nn.Linear(hidden_dim, output_dim)
|
53 |
|
54 |
def forward(self, x):
|
55 |
-
h0 = torch.zeros(1, x.size(0),
|
56 |
-
c0 = torch.zeros(1, x.size(0),
|
57 |
-
|
58 |
out, _ = self.lstm(x, (h0, c0))
|
|
|
59 |
out = self.fc(out[:, -1, :])
|
60 |
return out
|
61 |
|
62 |
-
#
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
model.eval()
|
83 |
-
|
|
|
84 |
with torch.no_grad():
|
85 |
-
|
86 |
-
|
87 |
-
label = batch['label'].unsqueeze(1)
|
88 |
-
output = model(features)
|
89 |
-
loss = criterion(output, label)
|
90 |
-
total_loss += loss.item()
|
91 |
-
print(f'Iteration {i+1}, Val Loss: {total_loss / len(val_loader)}')
|
92 |
-
|
93 |
-
# Use the final trained model to generate strategies
|
94 |
-
def generate_strategies(data):
|
95 |
-
seq_data = data.iloc[-seq_len:][numerical_features].values
|
96 |
-
features = torch.tensor(seq_data, dtype=torch.float32).unsqueeze(0) # Add batch dimension
|
97 |
-
output = model(features)
|
98 |
return output.item()
|
99 |
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import torch.nn as nn
|
4 |
import torch.optim as optim
|
5 |
from torch.utils.data import Dataset, DataLoader
|
6 |
+
from sklearn.model_selection import TimeSeriesSplit
|
7 |
from sklearn.preprocessing import StandardScaler
|
8 |
+
from sklearn.metrics import mean_squared_error
|
9 |
+
import numpy as np
|
10 |
+
import os
|
11 |
+
import gradio as gr
|
12 |
+
import time
|
13 |
+
import joblib
|
14 |
+
|
15 |
+
# Load and preprocess data (updated every retrain)
|
16 |
+
def load_data():
|
17 |
+
# Load the latest CSV data (assume it's updated periodically)
|
18 |
+
data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
|
19 |
+
|
20 |
+
# Feature engineering: Create technical indicators, lag features, etc.
|
21 |
+
data['SMA_20'] = data['close'].rolling(window=20).mean()
|
22 |
+
data['SMA_50'] = data['close'].rolling(window=50).mean()
|
23 |
+
data['RSI'] = 100 - (100 / (1 + (data['close'].diff(1).clip(lower=0).mean() /
|
24 |
+
data['close'].diff(1).clip(upper=0).mean())))
|
25 |
+
data.fillna(0, inplace=True)
|
26 |
+
return data
|
27 |
+
|
28 |
+
# Define dataset class
|
29 |
class BankNiftyDataset(Dataset):
|
30 |
+
def __init__(self, data, seq_len, features):
|
31 |
self.data = data
|
32 |
self.seq_len = seq_len
|
33 |
+
self.features = features
|
34 |
|
35 |
def __len__(self):
|
36 |
return len(self.data) - self.seq_len
|
37 |
|
38 |
def __getitem__(self, idx):
|
39 |
+
seq_data = self.data.iloc[idx:idx + self.seq_len][self.features].values
|
40 |
+
label = self.data['close'].iloc[idx + self.seq_len]
|
41 |
return {
|
42 |
'features': torch.tensor(seq_data, dtype=torch.float32),
|
43 |
'label': torch.tensor(label, dtype=torch.float32)
|
44 |
}
|
45 |
|
46 |
+
# Transformer model with LSTM
|
47 |
+
class TransformerLSTMModel(nn.Module):
|
48 |
+
def __init__(self, input_dim, hidden_dim, output_dim, nhead=4, num_encoder_layers=2):
|
49 |
+
super(TransformerLSTMModel, self).__init__()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, batch_first=True)
|
51 |
+
self.transformer_encoder = nn.TransformerEncoder(
|
52 |
+
nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nhead), num_layers=num_encoder_layers
|
53 |
+
)
|
54 |
self.fc = nn.Linear(hidden_dim, output_dim)
|
55 |
|
56 |
def forward(self, x):
|
57 |
+
h0 = torch.zeros(1, x.size(0), 128).to(x.device)
|
58 |
+
c0 = torch.zeros(1, x.size(0), 128).to(x.device)
|
|
|
59 |
out, _ = self.lstm(x, (h0, c0))
|
60 |
+
out = self.transformer_encoder(out)
|
61 |
out = self.fc(out[:, -1, :])
|
62 |
return out
|
63 |
|
64 |
+
# Function to train the model and update it periodically
|
65 |
+
def retrain_model(data, seq_len=10, batch_size=32, n_splits=5):
|
66 |
+
input_dim = len(features)
|
67 |
+
model = TransformerLSTMModel(input_dim=input_dim, hidden_dim=128, output_dim=1)
|
68 |
+
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
69 |
+
criterion = nn.MSELoss()
|
70 |
+
|
71 |
+
tscv = TimeSeriesSplit(n_splits=n_splits)
|
72 |
+
best_loss = float('inf')
|
73 |
+
|
74 |
+
for fold, (train_idx, val_idx) in enumerate(tscv.split(data)):
|
75 |
+
train_data, val_data = data.iloc[train_idx], data.iloc[val_idx]
|
76 |
+
train_dataset = BankNiftyDataset(train_data, seq_len, features)
|
77 |
+
val_dataset = BankNiftyDataset(val_data, seq_len, features)
|
78 |
+
|
79 |
+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
80 |
+
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
|
81 |
+
|
82 |
+
for epoch in range(10): # Train for 10 epochs per fold
|
83 |
+
model.train()
|
84 |
+
for batch in train_loader:
|
85 |
+
features = batch['features']
|
86 |
+
labels = batch['label'].unsqueeze(1)
|
87 |
+
|
88 |
+
optimizer.zero_grad()
|
89 |
+
outputs = model(features)
|
90 |
+
loss = criterion(outputs, labels)
|
91 |
+
loss.backward()
|
92 |
+
optimizer.step()
|
93 |
+
|
94 |
+
# Validation
|
95 |
+
model.eval()
|
96 |
+
val_loss = 0
|
97 |
+
with torch.no_grad():
|
98 |
+
for batch in val_loader:
|
99 |
+
features = batch['features']
|
100 |
+
labels = batch['label'].unsqueeze(1)
|
101 |
+
outputs = model(features)
|
102 |
+
val_loss += criterion(outputs, labels).item()
|
103 |
+
|
104 |
+
val_loss /= len(val_loader)
|
105 |
+
print(f'Fold {fold + 1}, Epoch {epoch + 1}, Val Loss: {val_loss}')
|
106 |
+
|
107 |
+
# Save the best model
|
108 |
+
if val_loss < best_loss:
|
109 |
+
best_loss = val_loss
|
110 |
+
torch.save(model.state_dict(), 'best_model.pth')
|
111 |
+
print("Model updated with new best performance.")
|
112 |
+
|
113 |
+
# Periodically check for new data and retrain
|
114 |
+
def schedule_retraining(interval_hours=24):
|
115 |
+
while True:
|
116 |
+
print("Retraining model...")
|
117 |
+
data = load_data() # Load the latest data
|
118 |
+
retrain_model(data) # Retrain the model
|
119 |
+
print(f"Next retraining scheduled in {interval_hours} hours.")
|
120 |
+
time.sleep(interval_hours * 3600) # Sleep for the specified interval
|
121 |
+
|
122 |
+
# Gradio interface for user prediction after automatic retraining
|
123 |
+
def generate_strategy(open_, high, low, close, volume, oi, sma20, sma50, rsi):
|
124 |
+
# Prepare new data
|
125 |
+
new_data = pd.DataFrame({
|
126 |
+
'open': [open_], 'high': [high], 'low': [low], 'close': [close],
|
127 |
+
'volume': [volume], 'oi': [oi], 'SMA_20': [sma20], 'SMA_50': [sma50], 'RSI': [rsi]
|
128 |
+
})
|
129 |
+
new_data[features] = scaler.transform(new_data[features])
|
130 |
+
seq_data = new_data[features].values
|
131 |
+
|
132 |
+
# Load best model
|
133 |
+
model = TransformerLSTMModel(input_dim=len(features), hidden_dim=128, output_dim=1)
|
134 |
+
model.load_state_dict(torch.load('best_model.pth'))
|
135 |
model.eval()
|
136 |
+
|
137 |
+
# Make prediction
|
138 |
with torch.no_grad():
|
139 |
+
features = torch.tensor(seq_data, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
|
140 |
+
output = model(features)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
return output.item()
|
142 |
|
143 |
+
# Gradio interface for real-time predictions
|
144 |
+
inputs = [
|
145 |
+
gr.inputs.Number(label="Open Price"),
|
146 |
+
gr.inputs.Number(label="High Price"),
|
147 |
+
gr.inputs.Number(label="Low Price"),
|
148 |
+
gr.inputs.Number(label="Close Price"),
|
149 |
+
gr.inputs.Number(label="Volume"),
|
150 |
+
gr.inputs.Number(label="Open Interest"),
|
151 |
+
gr.inputs.Number(label="SMA_20"),
|
152 |
+
gr.inputs.Number(label="SMA_50"),
|
153 |
+
gr.inputs.Number(label="RSI")
|
154 |
+
]
|
155 |
+
|
156 |
+
outputs = gr.outputs.Textbox(label="Predicted Strategy")
|
157 |
+
|
158 |
+
# Launch Gradio interface for strategy prediction
|
159 |
+
gr.Interface(fn=generate_strategy, inputs=inputs, outputs=outputs, title="BankNifty Strategy Generator").launch()
|
160 |
+
|
161 |
+
# Start automatic retraining (optional, can be run separately)
|
162 |
+
if __name__ == "__main__":
|
163 |
+
schedule_retraining(interval_hours=24) # Retrain every 24 hours
|