BN / app.py
Avinash109's picture
Update app.py
a9a4e96 verified
raw
history blame
6.41 kB
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import os
import gradio as gr
import time
import joblib
# Load and preprocess data (updated every retrain)
def load_data():
# Load the latest CSV data (assume it's updated periodically)
data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
# Feature engineering: Create technical indicators, lag features, etc.
data['SMA_20'] = data['close'].rolling(window=20).mean()
data['SMA_50'] = data['close'].rolling(window=50).mean()
data['RSI'] = 100 - (100 / (1 + (data['close'].diff(1).clip(lower=0).mean() /
data['close'].diff(1).clip(upper=0).mean())))
data.fillna(0, inplace=True)
return data
# Define dataset class
class BankNiftyDataset(Dataset):
def __init__(self, data, seq_len, features):
self.data = data
self.seq_len = seq_len
self.features = features
def __len__(self):
return len(self.data) - self.seq_len
def __getitem__(self, idx):
seq_data = self.data.iloc[idx:idx + self.seq_len][self.features].values
label = self.data['close'].iloc[idx + self.seq_len]
return {
'features': torch.tensor(seq_data, dtype=torch.float32),
'label': torch.tensor(label, dtype=torch.float32)
}
# Transformer model with LSTM
class TransformerLSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, nhead=4, num_encoder_layers=2):
super(TransformerLSTMModel, self).__init__()
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, batch_first=True)
self.transformer_encoder = nn.TransformerEncoder(
nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nhead), num_layers=num_encoder_layers
)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(1, x.size(0), 128).to(x.device)
c0 = torch.zeros(1, x.size(0), 128).to(x.device)
out, _ = self.lstm(x, (h0, c0))
out = self.transformer_encoder(out)
out = self.fc(out[:, -1, :])
return out
# Function to train the model and update it periodically
def retrain_model(data, seq_len=10, batch_size=32, n_splits=5):
input_dim = len(features)
model = TransformerLSTMModel(input_dim=input_dim, hidden_dim=128, output_dim=1)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
tscv = TimeSeriesSplit(n_splits=n_splits)
best_loss = float('inf')
for fold, (train_idx, val_idx) in enumerate(tscv.split(data)):
train_data, val_data = data.iloc[train_idx], data.iloc[val_idx]
train_dataset = BankNiftyDataset(train_data, seq_len, features)
val_dataset = BankNiftyDataset(val_data, seq_len, features)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
for epoch in range(10): # Train for 10 epochs per fold
model.train()
for batch in train_loader:
features = batch['features']
labels = batch['label'].unsqueeze(1)
optimizer.zero_grad()
outputs = model(features)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# Validation
model.eval()
val_loss = 0
with torch.no_grad():
for batch in val_loader:
features = batch['features']
labels = batch['label'].unsqueeze(1)
outputs = model(features)
val_loss += criterion(outputs, labels).item()
val_loss /= len(val_loader)
print(f'Fold {fold + 1}, Epoch {epoch + 1}, Val Loss: {val_loss}')
# Save the best model
if val_loss < best_loss:
best_loss = val_loss
torch.save(model.state_dict(), 'best_model.pth')
print("Model updated with new best performance.")
# Periodically check for new data and retrain
def schedule_retraining(interval_hours=24):
while True:
print("Retraining model...")
data = load_data() # Load the latest data
retrain_model(data) # Retrain the model
print(f"Next retraining scheduled in {interval_hours} hours.")
time.sleep(interval_hours * 3600) # Sleep for the specified interval
# Gradio interface for user prediction after automatic retraining
def generate_strategy(open_, high, low, close, volume, oi, sma20, sma50, rsi):
# Prepare new data
new_data = pd.DataFrame({
'open': [open_], 'high': [high], 'low': [low], 'close': [close],
'volume': [volume], 'oi': [oi], 'SMA_20': [sma20], 'SMA_50': [sma50], 'RSI': [rsi]
})
new_data[features] = scaler.transform(new_data[features])
seq_data = new_data[features].values
# Load best model
model = TransformerLSTMModel(input_dim=len(features), hidden_dim=128, output_dim=1)
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
# Make prediction
with torch.no_grad():
features = torch.tensor(seq_data, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
output = model(features)
return output.item()
# Gradio interface for real-time predictions
inputs = [
gr.inputs.Number(label="Open Price"),
gr.inputs.Number(label="High Price"),
gr.inputs.Number(label="Low Price"),
gr.inputs.Number(label="Close Price"),
gr.inputs.Number(label="Volume"),
gr.inputs.Number(label="Open Interest"),
gr.inputs.Number(label="SMA_20"),
gr.inputs.Number(label="SMA_50"),
gr.inputs.Number(label="RSI")
]
outputs = gr.outputs.Textbox(label="Predicted Strategy")
# Launch Gradio interface for strategy prediction
gr.Interface(fn=generate_strategy, inputs=inputs, outputs=outputs, title="BankNifty Strategy Generator").launch()
# Start automatic retraining (optional, can be run separately)
if __name__ == "__main__":
schedule_retraining(interval_hours=24) # Retrain every 24 hours