Avinash109 commited on
Commit
8448555
·
verified ·
1 Parent(s): 49cea6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -245
app.py CHANGED
@@ -4,276 +4,152 @@ import torch
4
  import torch.nn as nn
5
  import torch.optim as optim
6
  from torch.utils.data import Dataset, DataLoader
7
- from sklearn.model_selection import train_test_split
8
  from sklearn.preprocessing import StandardScaler
9
- import joblib
10
  import gradio as gr
11
- from apscheduler.schedulers.background import BackgroundScheduler
12
- from torch.optim.lr_scheduler import ReduceLROnPlateau
13
- from torch.nn import TransformerEncoder, TransformerEncoderLayer
14
- import optuna
15
- from sklearn.metrics import mean_squared_error
16
- import matplotlib.pyplot as plt
17
- import seaborn as sns
18
- import logging
19
-
20
- # Set up logging
21
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
-
23
- # Load and preprocess data
24
- try:
25
- data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
26
- scaler = StandardScaler()
27
- scaled_data = scaler.fit_transform(data[['open', 'high', 'low', 'close', 'volume', 'oi']])
28
- data[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaled_data
29
- joblib.dump(scaler, 'scaler.gz')
30
- logging.info(f"Data loaded and preprocessed. Total data points: {len(data)}")
31
- except Exception as e:
32
- logging.error(f"Error in data loading and preprocessing: {str(e)}")
33
- raise
34
 
 
35
  class BankNiftyDataset(Dataset):
36
- def __init__(self, data, seq_len, expiry_type, target_cols=['close']):
37
  self.data = data
38
  self.seq_len = seq_len
39
- self.expiry_type = expiry_type
40
  self.target_cols = target_cols
41
 
42
- if self.expiry_type == "weekly":
43
- self.filtered_data = data[data['Expiry'].str.contains("W")]
44
- elif self.expiry_type == "monthly":
45
- self.filtered_data = data[~data['Expiry'].str.contains("W")]
46
- else:
47
- self.filtered_data = data
48
-
49
- if len(self.filtered_data) < self.seq_len:
50
- raise ValueError(f"Not enough data points for the specified sequence length. "
51
- f"Got {len(self.filtered_data)} data points, need at least {self.seq_len}.")
52
-
53
- logging.info(f"{expiry_type.capitalize()} dataset created with {len(self.filtered_data)} data points")
54
-
55
  def __len__(self):
56
- return max(0, len(self.filtered_data) - self.seq_len + 1)
57
 
58
  def __getitem__(self, idx):
59
- if idx < 0 or idx >= len(self):
60
- raise IndexError("Index out of range")
61
-
62
- seq_data = self.filtered_data.iloc[idx:idx+self.seq_len]
63
  features = torch.tensor(seq_data[['open', 'high', 'low', 'close', 'volume', 'oi']].values, dtype=torch.float32)
64
  label = torch.tensor(seq_data[self.target_cols].iloc[-1].values, dtype=torch.float32)
65
  return features, label
66
 
67
- class AdvancedModel(nn.Module):
68
- def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, nhead=4, dropout=0.1):
69
- super(AdvancedModel, self).__init__()
 
70
  self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
71
- self.gru = nn.GRU(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
72
-
73
- transformer_dim = (input_dim // nhead) * nhead
74
- self.input_proj = nn.Linear(input_dim, transformer_dim) if input_dim != transformer_dim else nn.Identity()
75
-
76
- encoder_layers = TransformerEncoderLayer(d_model=transformer_dim, nhead=nhead, dim_feedforward=hidden_dim, dropout=dropout)
77
- self.transformer = TransformerEncoder(encoder_layers, num_layers=num_layers)
78
-
79
- self.attention = nn.MultiheadAttention(hidden_dim, num_heads=nhead, dropout=dropout)
80
-
81
  self.fc = nn.Sequential(
82
- nn.Linear(hidden_dim * 3, hidden_dim),
83
  nn.ReLU(),
84
  nn.Dropout(dropout),
85
- nn.Linear(hidden_dim, output_dim)
86
  )
87
 
88
  def forward(self, x):
89
  lstm_out, _ = self.lstm(x)
90
- gru_out, _ = self.gru(x)
91
-
92
- transformer_input = self.input_proj(x)
93
- transformer_out = self.transformer(transformer_input.transpose(0, 1)).transpose(0, 1)
94
-
95
- combined = torch.cat((lstm_out[:, -1, :], gru_out[:, -1, :], transformer_out[:, -1, :]), dim=1)
96
-
97
- out = self.fc(combined)
98
  return out
99
 
100
- def objective(trial):
101
- try:
102
- input_dim = 6
103
- hidden_dim = trial.suggest_int("hidden_dim", 64, 256)
104
- output_dim = len(target_cols)
105
- num_layers = trial.suggest_int("num_layers", 1, 4)
106
-
107
- max_nhead = min(8, hidden_dim // 8)
108
- nhead = trial.suggest_int("nhead", 2, max_nhead)
109
- hidden_dim = (hidden_dim // nhead) * nhead
110
-
111
- dropout = trial.suggest_float("dropout", 0.1, 0.5)
112
- lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
113
-
114
- model = AdvancedModel(input_dim, hidden_dim, output_dim, num_layers, nhead, dropout)
115
- optimizer = optim.Adam(model.parameters(), lr=lr)
116
- criterion = nn.MSELoss()
117
-
118
- train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
119
- val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
120
-
121
- for epoch in range(10):
122
- train_model(model, optimizer, criterion, train_loader)
123
- val_loss = evaluate_model(model, criterion, val_loader)
124
-
125
- return val_loss
126
- except Exception as e:
127
- logging.error(f"Error in objective function: {str(e)}")
128
- return float('inf')
129
-
130
- def train_model(model, optimizer, criterion, train_loader):
131
- model.train()
132
- for batch in train_loader:
133
- features, label = batch
134
- optimizer.zero_grad()
135
- output = model(features)
136
- loss = criterion(output, label)
137
- loss.backward()
138
- optimizer.step()
139
-
140
- def evaluate_model(model, criterion, val_loader):
141
- model.eval()
142
- total_loss = 0
143
- with torch.no_grad():
144
- for batch in val_loader:
145
- features, label = batch
146
- output = model(features)
147
- loss = criterion(output, label)
148
- total_loss += loss.item()
149
- return total_loss / len(val_loader)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
- def generate_strategy(model, expiry_type):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  model.eval()
153
- dataset = BankNiftyDataset(data, seq_len, expiry_type, target_cols)
154
- loader = DataLoader(dataset, batch_size=1, shuffle=False)
155
-
156
  with torch.no_grad():
157
- predictions = []
158
- for features, _ in loader:
159
- output = model(features)
160
- predictions.append(output.squeeze().tolist())
161
- return predictions
162
-
163
- def retrain_model():
164
- try:
165
- new_data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
166
- new_scaled_data = scaler.transform(new_data[['open', 'high', 'low', 'close', 'volume', 'oi']])
167
- new_data[['open', 'high', 'low', 'close', 'volume', 'oi']] = new_scaled_data
168
-
169
- new_train_data, new_val_data = train_test_split(new_data, test_size=0.2, random_state=42)
170
- new_train_dataset = BankNiftyDataset(new_train_data, seq_len, "weekly", target_cols)
171
- new_val_dataset = BankNiftyDataset(new_val_data, seq_len, "weekly", target_cols)
172
-
173
- new_train_loader = DataLoader(new_train_dataset, batch_size=32, shuffle=True)
174
- new_val_loader = DataLoader(new_val_dataset, batch_size=32, shuffle=False)
175
-
176
- train_model(model, optimizer, criterion, new_train_loader)
177
- val_loss = evaluate_model(model, criterion, new_val_loader)
178
- logging.info(f'Validation Loss after retraining: {val_loss:.4f}')
179
-
180
- torch.save(model.state_dict(), 'retrained_model.pth')
181
- except Exception as e:
182
- logging.error(f"Error in retraining model: {str(e)}")
183
-
184
- def plot_predictions(predictions, actual_values, title):
185
- plt.figure(figsize=(12, 6))
186
- plt.plot(predictions, label='Predictions')
187
- plt.plot(actual_values, label='Actual Values')
188
- plt.title(title)
189
- plt.xlabel('Time')
190
- plt.ylabel('Value')
191
- plt.legend()
192
- return plt
193
-
194
- def display_strategies():
195
- try:
196
- weekly_predictions = generate_strategy(model, "weekly")
197
- monthly_predictions = generate_strategy(model, "monthly")
198
-
199
- weekly_actual = data[data['Expiry'].str.contains("W")][target_cols].values[-len(weekly_predictions):]
200
- monthly_actual = data[~data['Expiry'].str.contains("W")][target_cols].values[-len(monthly_predictions):]
201
-
202
- weekly_plot = plot_predictions(weekly_predictions, weekly_actual, "Weekly Expiry Predictions vs Actual")
203
- monthly_plot = plot_predictions(monthly_predictions, monthly_actual, "Monthly Expiry Predictions vs Actual")
204
-
205
- weekly_mse = mean_squared_error(weekly_actual, weekly_predictions)
206
- monthly_mse = mean_squared_error(monthly_actual, monthly_predictions)
207
-
208
- return (
209
- f"Weekly Expiry Strategy Predictions (MSE: {weekly_mse:.4f}):\n{weekly_predictions}\n\n"
210
- f"Monthly Expiry Strategy Predictions (MSE: {monthly_mse:.4f}):\n{monthly_predictions}",
211
- weekly_plot,
212
- monthly_plot
213
- )
214
- except Exception as e:
215
- logging.error(f"Error in displaying strategies: {str(e)}")
216
- return "An error occurred while generating strategies.", None, None
217
-
218
- # Main execution
219
- if __name__ == "__main__":
220
- try:
221
- target_cols = ['close', 'volume', 'oi']
222
- seq_len = 20
223
-
224
- logging.info(f"Total data points: {len(data)}")
225
- logging.info(f"Weekly data points: {len(data[data['Expiry'].str.contains('W')])}")
226
- logging.info(f"Monthly data points: {len(data[~data['Expiry'].str.contains('W')])}")
227
-
228
- train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
229
- logging.info(f"Train data points: {len(train_data)}")
230
- logging.info(f"Validation data points: {len(val_data)}")
231
-
232
- train_dataset = BankNiftyDataset(train_data, seq_len, "weekly", target_cols)
233
- val_dataset = BankNiftyDataset(val_data, seq_len, "weekly", target_cols)
234
-
235
- study = optuna.create_study(direction="minimize")
236
- study.optimize(objective, n_trials=50)
237
-
238
- best_params = study.best_params
239
- logging.info(f"Best hyperparameters: {best_params}")
240
-
241
- input_dim = 6
242
- output_dim = len(target_cols)
243
- model = AdvancedModel(input_dim, best_params['hidden_dim'], output_dim, best_params['num_layers'], best_params['nhead'], best_params['dropout'])
244
- optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])
245
- criterion = nn.MSELoss()
246
-
247
- scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
248
-
249
- num_epochs = 100
250
- train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
251
- val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
252
-
253
- for epoch in range(num_epochs):
254
- train_model(model, optimizer, criterion, train_loader)
255
- val_loss = evaluate_model(model, criterion, val_loader)
256
- scheduler.step(val_loss)
257
- logging.info(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}")
258
-
259
- torch.save(model.state_dict(), 'final_model.pth')
260
-
261
- retraining_scheduler = BackgroundScheduler()
262
- retraining_scheduler.add_job(retrain_model, 'interval', hours=1)
263
- retraining_scheduler.start()
264
-
265
- iface = gr.Interface(
266
- fn=display_strategies,
267
- inputs=None,
268
- outputs=[
269
- gr.Textbox(label="Strategy Predictions"),
270
- gr.Plot(label="Weekly Expiry Predictions"),
271
- gr.Plot(label="Monthly Expiry Predictions")
272
- ],
273
- title="Advanced BankNifty Option Chain Strategy Generator",
274
- description="This model predicts close price, volume, and open interest for weekly and monthly expiries."
275
- )
276
-
277
- iface.launch()
278
- except Exception as e:
279
- logging.error(f"Error in main execution: {str(e)}")
 
4
  import torch.nn as nn
5
  import torch.optim as optim
6
  from torch.utils.data import Dataset, DataLoader
 
7
  from sklearn.preprocessing import StandardScaler
8
+ from sklearn.model_selection import train_test_split
9
  import gradio as gr
10
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Define the Dataset class
13
  class BankNiftyDataset(Dataset):
14
+ def __init__(self, data, seq_len, target_cols=['close']):
15
  self.data = data
16
  self.seq_len = seq_len
 
17
  self.target_cols = target_cols
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def __len__(self):
20
+ return max(0, len(self.data) - self.seq_len + 1)
21
 
22
  def __getitem__(self, idx):
23
+ seq_data = self.data.iloc[idx:idx+self.seq_len]
 
 
 
24
  features = torch.tensor(seq_data[['open', 'high', 'low', 'close', 'volume', 'oi']].values, dtype=torch.float32)
25
  label = torch.tensor(seq_data[self.target_cols].iloc[-1].values, dtype=torch.float32)
26
  return features, label
27
 
28
+ # Define the LSTM model
29
+ class LSTMModel(nn.Module):
30
+ def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, dropout=0.1):
31
+ super(LSTMModel, self).__init__()
32
  self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
 
 
 
 
 
 
 
 
 
 
33
  self.fc = nn.Sequential(
34
+ nn.Linear(hidden_dim, hidden_dim // 2),
35
  nn.ReLU(),
36
  nn.Dropout(dropout),
37
+ nn.Linear(hidden_dim // 2, output_dim)
38
  )
39
 
40
  def forward(self, x):
41
  lstm_out, _ = self.lstm(x)
42
+ out = self.fc(lstm_out[:, -1, :])
 
 
 
 
 
 
 
43
  return out
44
 
45
+ # Function to train the model
46
+ def train_model(model, train_loader, val_loader, num_epochs=10):
47
+ criterion = nn.MSELoss()
48
+ optimizer = optim.Adam(model.parameters(), lr=0.001)
49
+
50
+ for epoch in range(num_epochs):
51
+ model.train()
52
+ for features, labels in train_loader:
53
+ optimizer.zero_grad()
54
+ outputs = model(features)
55
+ loss = criterion(outputs, labels)
56
+ loss.backward()
57
+ optimizer.step()
58
+
59
+ model.eval()
60
+ val_loss = 0
61
+ with torch.no_grad():
62
+ for features, labels in val_loader:
63
+ outputs = model(features)
64
+ val_loss += criterion(outputs, labels).item()
65
+ val_loss /= len(val_loader)
66
+
67
+ print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}")
68
+
69
+ # Function to generate trading signals
70
+ def generate_signals(predictions, actual_values, stop_loss_threshold=0.05):
71
+ signals = []
72
+ for pred, actual in zip(predictions, actual_values):
73
+ if pred > actual * (1 + stop_loss_threshold):
74
+ signals.append("Buy CE")
75
+ elif pred < actual * (1 - stop_loss_threshold):
76
+ signals.append("Buy PE")
77
+ else:
78
+ signals.append("Hold")
79
+ return signals
80
+
81
+ # Function to generate a report
82
+ def generate_report(predictions, actual_values, signals):
83
+ report = []
84
+ cumulative_profit = 0
85
+ for i in range(len(signals)):
86
+ signal = signals[i]
87
+ profit = actual_values[i] - predictions[i]
88
+ if signal == "Buy CE":
89
+ cumulative_profit += profit
90
+ elif signal == "Buy PE":
91
+ cumulative_profit -= profit
92
+ report.append(f"Signal: {signal}, Actual: {actual_values[i]:.2f}, Predicted: {predictions[i]:.2f}, Profit: {profit:.2f}")
93
+
94
+ total_profit = cumulative_profit
95
+ report.append(f"Total Profit: {total_profit:.2f}")
96
+ return "\n".join(report)
97
+
98
+ # Function to process data and make predictions
99
+ def predict():
100
+ # Load the pre-existing CSV file
101
+ csv_path = 'BANKNIFTY_OPTION_CHAIN_data.csv'
102
+ if not os.path.exists(csv_path):
103
+ return "Error: CSV file not found in the expected location."
104
+
105
+ # Load and preprocess data
106
+ data = pd.read_csv(csv_path)
107
+ scaler = StandardScaler()
108
+ scaled_data = scaler.fit_transform(data[['open', 'high', 'low', 'close', 'volume', 'oi']])
109
+ data[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaled_data
110
 
111
+ # Split data
112
+ train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
113
+
114
+ # Create datasets and dataloaders
115
+ seq_len = 20
116
+ target_cols = ['close']
117
+ train_dataset = BankNiftyDataset(train_data, seq_len, target_cols)
118
+ val_dataset = BankNiftyDataset(val_data, seq_len, target_cols)
119
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
120
+ val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
121
+
122
+ # Initialize and train the model
123
+ input_dim = 6
124
+ hidden_dim = 64
125
+ output_dim = len(target_cols)
126
+ model = LSTMModel(input_dim, hidden_dim, output_dim)
127
+ train_model(model, train_loader, val_loader)
128
+
129
+ # Make predictions
130
  model.eval()
131
+ predictions = []
132
+ actual_values = val_data['close'].values[seq_len-1:]
 
133
  with torch.no_grad():
134
+ for i in range(len(val_dataset)):
135
+ features, _ = val_dataset[i]
136
+ pred = model(features.unsqueeze(0)).item()
137
+ predictions.append(pred)
138
+
139
+ # Generate signals and report
140
+ signals = generate_signals(predictions, actual_values)
141
+ report = generate_report(predictions, actual_values, signals)
142
+
143
+ return report
144
+
145
+ # Set up the Gradio interface
146
+ iface = gr.Interface(
147
+ fn=predict,
148
+ inputs=None,
149
+ outputs=gr.Textbox(label="Prediction Report"),
150
+ title="BankNifty Option Chain Predictor",
151
+ description="Click 'Submit' to generate predictions and trading signals based on the pre-loaded BankNifty option chain data."
152
+ )
153
+
154
+ # Launch the app
155
+ iface.launch()