Spaces:
Sleeping
Sleeping
File size: 6,405 Bytes
182c4ed 3776d99 a9a4e96 3776d99 a9a4e96 3776d99 a9a4e96 3776d99 a9a4e96 7e2ed99 3776d99 7e2ed99 3776d99 a9a4e96 3776d99 7e2ed99 a9a4e96 3776d99 a9a4e96 3776d99 7e2ed99 3776d99 a9a4e96 3776d99 a9a4e96 3776d99 a9a4e96 3776d99 a9a4e96 3776d99 a9a4e96 3776d99 7e2ed99 a9a4e96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import os
import gradio as gr
import time
import joblib
# Load and preprocess data (updated every retrain)
def load_data():
# Load the latest CSV data (assume it's updated periodically)
data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
# Feature engineering: Create technical indicators, lag features, etc.
data['SMA_20'] = data['close'].rolling(window=20).mean()
data['SMA_50'] = data['close'].rolling(window=50).mean()
data['RSI'] = 100 - (100 / (1 + (data['close'].diff(1).clip(lower=0).mean() /
data['close'].diff(1).clip(upper=0).mean())))
data.fillna(0, inplace=True)
return data
# Define dataset class
class BankNiftyDataset(Dataset):
def __init__(self, data, seq_len, features):
self.data = data
self.seq_len = seq_len
self.features = features
def __len__(self):
return len(self.data) - self.seq_len
def __getitem__(self, idx):
seq_data = self.data.iloc[idx:idx + self.seq_len][self.features].values
label = self.data['close'].iloc[idx + self.seq_len]
return {
'features': torch.tensor(seq_data, dtype=torch.float32),
'label': torch.tensor(label, dtype=torch.float32)
}
# Transformer model with LSTM
class TransformerLSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, nhead=4, num_encoder_layers=2):
super(TransformerLSTMModel, self).__init__()
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, batch_first=True)
self.transformer_encoder = nn.TransformerEncoder(
nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nhead), num_layers=num_encoder_layers
)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(1, x.size(0), 128).to(x.device)
c0 = torch.zeros(1, x.size(0), 128).to(x.device)
out, _ = self.lstm(x, (h0, c0))
out = self.transformer_encoder(out)
out = self.fc(out[:, -1, :])
return out
# Function to train the model and update it periodically
def retrain_model(data, seq_len=10, batch_size=32, n_splits=5):
input_dim = len(features)
model = TransformerLSTMModel(input_dim=input_dim, hidden_dim=128, output_dim=1)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
tscv = TimeSeriesSplit(n_splits=n_splits)
best_loss = float('inf')
for fold, (train_idx, val_idx) in enumerate(tscv.split(data)):
train_data, val_data = data.iloc[train_idx], data.iloc[val_idx]
train_dataset = BankNiftyDataset(train_data, seq_len, features)
val_dataset = BankNiftyDataset(val_data, seq_len, features)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
for epoch in range(10): # Train for 10 epochs per fold
model.train()
for batch in train_loader:
features = batch['features']
labels = batch['label'].unsqueeze(1)
optimizer.zero_grad()
outputs = model(features)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# Validation
model.eval()
val_loss = 0
with torch.no_grad():
for batch in val_loader:
features = batch['features']
labels = batch['label'].unsqueeze(1)
outputs = model(features)
val_loss += criterion(outputs, labels).item()
val_loss /= len(val_loader)
print(f'Fold {fold + 1}, Epoch {epoch + 1}, Val Loss: {val_loss}')
# Save the best model
if val_loss < best_loss:
best_loss = val_loss
torch.save(model.state_dict(), 'best_model.pth')
print("Model updated with new best performance.")
# Periodically check for new data and retrain
def schedule_retraining(interval_hours=24):
while True:
print("Retraining model...")
data = load_data() # Load the latest data
retrain_model(data) # Retrain the model
print(f"Next retraining scheduled in {interval_hours} hours.")
time.sleep(interval_hours * 3600) # Sleep for the specified interval
# Gradio interface for user prediction after automatic retraining
def generate_strategy(open_, high, low, close, volume, oi, sma20, sma50, rsi):
# Prepare new data
new_data = pd.DataFrame({
'open': [open_], 'high': [high], 'low': [low], 'close': [close],
'volume': [volume], 'oi': [oi], 'SMA_20': [sma20], 'SMA_50': [sma50], 'RSI': [rsi]
})
new_data[features] = scaler.transform(new_data[features])
seq_data = new_data[features].values
# Load best model
model = TransformerLSTMModel(input_dim=len(features), hidden_dim=128, output_dim=1)
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
# Make prediction
with torch.no_grad():
features = torch.tensor(seq_data, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
output = model(features)
return output.item()
# Gradio interface for real-time predictions
inputs = [
gr.inputs.Number(label="Open Price"),
gr.inputs.Number(label="High Price"),
gr.inputs.Number(label="Low Price"),
gr.inputs.Number(label="Close Price"),
gr.inputs.Number(label="Volume"),
gr.inputs.Number(label="Open Interest"),
gr.inputs.Number(label="SMA_20"),
gr.inputs.Number(label="SMA_50"),
gr.inputs.Number(label="RSI")
]
outputs = gr.outputs.Textbox(label="Predicted Strategy")
# Launch Gradio interface for strategy prediction
gr.Interface(fn=generate_strategy, inputs=inputs, outputs=outputs, title="BankNifty Strategy Generator").launch()
# Start automatic retraining (optional, can be run separately)
if __name__ == "__main__":
schedule_retraining(interval_hours=24) # Retrain every 24 hours
|