File size: 6,405 Bytes
182c4ed
3776d99
 
 
 
a9a4e96
3776d99
a9a4e96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3776d99
a9a4e96
3776d99
 
a9a4e96
7e2ed99
3776d99
 
7e2ed99
3776d99
a9a4e96
 
3776d99
 
 
 
7e2ed99
a9a4e96
 
 
 
3776d99
a9a4e96
 
 
3776d99
7e2ed99
3776d99
a9a4e96
 
3776d99
a9a4e96
3776d99
 
 
a9a4e96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3776d99
a9a4e96
 
3776d99
a9a4e96
 
3776d99
7e2ed99
a9a4e96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import os
import gradio as gr
import time
import joblib

# Load and preprocess data (updated every retrain)
def load_data():
    # Load the latest CSV data (assume it's updated periodically)
    data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')

    # Feature engineering: Create technical indicators, lag features, etc.
    data['SMA_20'] = data['close'].rolling(window=20).mean()
    data['SMA_50'] = data['close'].rolling(window=50).mean()
    data['RSI'] = 100 - (100 / (1 + (data['close'].diff(1).clip(lower=0).mean() /
                                     data['close'].diff(1).clip(upper=0).mean())))
    data.fillna(0, inplace=True)
    return data

# Define dataset class
class BankNiftyDataset(Dataset):
    def __init__(self, data, seq_len, features):
        self.data = data
        self.seq_len = seq_len
        self.features = features

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        seq_data = self.data.iloc[idx:idx + self.seq_len][self.features].values
        label = self.data['close'].iloc[idx + self.seq_len]
        return {
            'features': torch.tensor(seq_data, dtype=torch.float32),
            'label': torch.tensor(label, dtype=torch.float32)
        }

# Transformer model with LSTM
class TransformerLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, nhead=4, num_encoder_layers=2):
        super(TransformerLSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nhead), num_layers=num_encoder_layers
        )
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), 128).to(x.device)
        c0 = torch.zeros(1, x.size(0), 128).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.transformer_encoder(out)
        out = self.fc(out[:, -1, :])
        return out

# Function to train the model and update it periodically
def retrain_model(data, seq_len=10, batch_size=32, n_splits=5):
    input_dim = len(features)
    model = TransformerLSTMModel(input_dim=input_dim, hidden_dim=128, output_dim=1)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    tscv = TimeSeriesSplit(n_splits=n_splits)
    best_loss = float('inf')

    for fold, (train_idx, val_idx) in enumerate(tscv.split(data)):
        train_data, val_data = data.iloc[train_idx], data.iloc[val_idx]
        train_dataset = BankNiftyDataset(train_data, seq_len, features)
        val_dataset = BankNiftyDataset(val_data, seq_len, features)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        for epoch in range(10):  # Train for 10 epochs per fold
            model.train()
            for batch in train_loader:
                features = batch['features']
                labels = batch['label'].unsqueeze(1)

                optimizer.zero_grad()
                outputs = model(features)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            # Validation
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for batch in val_loader:
                    features = batch['features']
                    labels = batch['label'].unsqueeze(1)
                    outputs = model(features)
                    val_loss += criterion(outputs, labels).item()

            val_loss /= len(val_loader)
            print(f'Fold {fold + 1}, Epoch {epoch + 1}, Val Loss: {val_loss}')

            # Save the best model
            if val_loss < best_loss:
                best_loss = val_loss
                torch.save(model.state_dict(), 'best_model.pth')
                print("Model updated with new best performance.")

# Periodically check for new data and retrain
def schedule_retraining(interval_hours=24):
    while True:
        print("Retraining model...")
        data = load_data()  # Load the latest data
        retrain_model(data)  # Retrain the model
        print(f"Next retraining scheduled in {interval_hours} hours.")
        time.sleep(interval_hours * 3600)  # Sleep for the specified interval

# Gradio interface for user prediction after automatic retraining
def generate_strategy(open_, high, low, close, volume, oi, sma20, sma50, rsi):
    # Prepare new data
    new_data = pd.DataFrame({
        'open': [open_], 'high': [high], 'low': [low], 'close': [close],
        'volume': [volume], 'oi': [oi], 'SMA_20': [sma20], 'SMA_50': [sma50], 'RSI': [rsi]
    })
    new_data[features] = scaler.transform(new_data[features])
    seq_data = new_data[features].values

    # Load best model
    model = TransformerLSTMModel(input_dim=len(features), hidden_dim=128, output_dim=1)
    model.load_state_dict(torch.load('best_model.pth'))
    model.eval()

    # Make prediction
    with torch.no_grad():
        features = torch.tensor(seq_data, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
        output = model(features)
    return output.item()

# Gradio interface for real-time predictions
inputs = [
    gr.inputs.Number(label="Open Price"),
    gr.inputs.Number(label="High Price"),
    gr.inputs.Number(label="Low Price"),
    gr.inputs.Number(label="Close Price"),
    gr.inputs.Number(label="Volume"),
    gr.inputs.Number(label="Open Interest"),
    gr.inputs.Number(label="SMA_20"),
    gr.inputs.Number(label="SMA_50"),
    gr.inputs.Number(label="RSI")
]

outputs = gr.outputs.Textbox(label="Predicted Strategy")

# Launch Gradio interface for strategy prediction
gr.Interface(fn=generate_strategy, inputs=inputs, outputs=outputs, title="BankNifty Strategy Generator").launch()

# Start automatic retraining (optional, can be run separately)
if __name__ == "__main__":
    schedule_retraining(interval_hours=24)  # Retrain every 24 hours