Avinash109 commited on
Commit
49cea6e
·
verified ·
1 Parent(s): e3fd241

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -121
app.py CHANGED
@@ -15,13 +15,22 @@ import optuna
15
  from sklearn.metrics import mean_squared_error
16
  import matplotlib.pyplot as plt
17
  import seaborn as sns
 
 
 
 
18
 
19
  # Load and preprocess data
20
- data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
21
- scaler = StandardScaler()
22
- scaled_data = scaler.fit_transform(data[['open', 'high', 'low', 'close', 'volume', 'oi']])
23
- data[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaled_data
24
- joblib.dump(scaler, 'scaler.gz')
 
 
 
 
 
25
 
26
  class BankNiftyDataset(Dataset):
27
  def __init__(self, data, seq_len, expiry_type, target_cols=['close']):
@@ -34,11 +43,22 @@ class BankNiftyDataset(Dataset):
34
  self.filtered_data = data[data['Expiry'].str.contains("W")]
35
  elif self.expiry_type == "monthly":
36
  self.filtered_data = data[~data['Expiry'].str.contains("W")]
 
 
 
 
 
 
 
 
37
 
38
  def __len__(self):
39
- return len(self.filtered_data) - self.seq_len
40
 
41
  def __getitem__(self, idx):
 
 
 
42
  seq_data = self.filtered_data.iloc[idx:idx+self.seq_len]
43
  features = torch.tensor(seq_data[['open', 'high', 'low', 'close', 'volume', 'oi']].values, dtype=torch.float32)
44
  label = torch.tensor(seq_data[self.target_cols].iloc[-1].values, dtype=torch.float32)
@@ -50,7 +70,6 @@ class AdvancedModel(nn.Module):
50
  self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
51
  self.gru = nn.GRU(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
52
 
53
- # Adjust input_dim for transformer if it's not divisible by nhead
54
  transformer_dim = (input_dim // nhead) * nhead
55
  self.input_proj = nn.Linear(input_dim, transformer_dim) if input_dim != transformer_dim else nn.Identity()
56
 
@@ -70,7 +89,6 @@ class AdvancedModel(nn.Module):
70
  lstm_out, _ = self.lstm(x)
71
  gru_out, _ = self.gru(x)
72
 
73
- # Project input for transformer if necessary
74
  transformer_input = self.input_proj(x)
75
  transformer_out = self.transformer(transformer_input.transpose(0, 1)).transpose(0, 1)
76
 
@@ -80,31 +98,34 @@ class AdvancedModel(nn.Module):
80
  return out
81
 
82
  def objective(trial):
83
- input_dim = 6
84
- hidden_dim = trial.suggest_int("hidden_dim", 64, 256)
85
- output_dim = len(target_cols)
86
- num_layers = trial.suggest_int("num_layers", 1, 4)
87
-
88
- # Ensure that hidden_dim is divisible by nhead
89
- max_nhead = min(8, hidden_dim // 8) # Ensure at least 8 dimensions per head
90
- nhead = trial.suggest_int("nhead", 2, max_nhead)
91
- hidden_dim = (hidden_dim // nhead) * nhead # Adjust hidden_dim to be divisible by nhead
92
-
93
- dropout = trial.suggest_float("dropout", 0.1, 0.5)
94
- lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
95
-
96
- model = AdvancedModel(input_dim, hidden_dim, output_dim, num_layers, nhead, dropout)
97
- optimizer = optim.Adam(model.parameters(), lr=lr)
98
- criterion = nn.MSELoss()
99
-
100
- train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
101
- val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
102
-
103
- for epoch in range(10): # Reduced epochs for faster optimization
104
- train_model(model, optimizer, criterion, train_loader)
105
- val_loss = evaluate_model(model, criterion, val_loader)
106
-
107
- return val_loss
 
 
 
108
 
109
  def train_model(model, optimizer, criterion, train_loader):
110
  model.train()
@@ -140,22 +161,25 @@ def generate_strategy(model, expiry_type):
140
  return predictions
141
 
142
  def retrain_model():
143
- new_data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
144
- new_scaled_data = scaler.transform(new_data[['open', 'high', 'low', 'close', 'volume', 'oi']])
145
- new_data[['open', 'high', 'low', 'close', 'volume', 'oi']] = new_scaled_data
 
146
 
147
- new_train_data, new_val_data = train_test_split(new_data, test_size=0.2, random_state=42)
148
- new_train_dataset = BankNiftyDataset(new_train_data, seq_len, "weekly", target_cols)
149
- new_val_dataset = BankNiftyDataset(new_val_data, seq_len, "weekly", target_cols)
150
 
151
- new_train_loader = DataLoader(new_train_dataset, batch_size=32, shuffle=True)
152
- new_val_loader = DataLoader(new_val_dataset, batch_size=32, shuffle=False)
153
 
154
- train_model(model, optimizer, criterion, new_train_loader)
155
- val_loss = evaluate_model(model, criterion, new_val_loader)
156
- print(f'Validation Loss after retraining: {val_loss:.4f}')
157
 
158
- torch.save(model.state_dict(), 'retrained_model.pth')
 
 
159
 
160
  def plot_predictions(predictions, actual_values, title):
161
  plt.figure(figsize=(12, 6))
@@ -168,79 +192,88 @@ def plot_predictions(predictions, actual_values, title):
168
  return plt
169
 
170
  def display_strategies():
171
- weekly_predictions = generate_strategy(model, "weekly")
172
- monthly_predictions = generate_strategy(model, "monthly")
173
-
174
- weekly_actual = data[data['Expiry'].str.contains("W")][target_cols].values[-len(weekly_predictions):]
175
- monthly_actual = data[~data['Expiry'].str.contains("W")][target_cols].values[-len(monthly_predictions):]
176
-
177
- weekly_plot = plot_predictions(weekly_predictions, weekly_actual, "Weekly Expiry Predictions vs Actual")
178
- monthly_plot = plot_predictions(monthly_predictions, monthly_actual, "Monthly Expiry Predictions vs Actual")
179
-
180
- weekly_mse = mean_squared_error(weekly_actual, weekly_predictions)
181
- monthly_mse = mean_squared_error(monthly_actual, monthly_predictions)
182
-
183
- return (
184
- f"Weekly Expiry Strategy Predictions (MSE: {weekly_mse:.4f}):\n{weekly_predictions}\n\n"
185
- f"Monthly Expiry Strategy Predictions (MSE: {monthly_mse:.4f}):\n{monthly_predictions}",
186
- weekly_plot,
187
- monthly_plot
188
- )
189
-
190
- # Hyperparameter optimization
191
- target_cols = ['close', 'volume', 'oi'] # Predicting multiple targets
192
- seq_len = 20 # Increased sequence length
193
-
194
- train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
195
- train_dataset = BankNiftyDataset(train_data, seq_len, "weekly", target_cols)
196
- val_dataset = BankNiftyDataset(val_data, seq_len, "weekly", target_cols)
197
-
198
- study = optuna.create_study(direction="minimize")
199
- study.optimize(objective, n_trials=50)
200
-
201
- best_params = study.best_params
202
- print("Best hyperparameters:", best_params)
203
-
204
- # Initialize the model with best parameters
205
- input_dim = 6
206
- output_dim = len(target_cols)
207
- model = AdvancedModel(input_dim, best_params['hidden_dim'], output_dim, best_params['num_layers'], best_params['nhead'], best_params['dropout'])
208
- optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])
209
- criterion = nn.MSELoss()
210
-
211
- # Learning rate scheduler
212
- scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
213
-
214
- # Training loop
215
- num_epochs = 100
216
- train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
217
- val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
218
-
219
- for epoch in range(num_epochs):
220
- train_model(model, optimizer, criterion, train_loader)
221
- val_loss = evaluate_model(model, criterion, val_loader)
222
- scheduler.step(val_loss)
223
- print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}")
224
-
225
- # Save the final model
226
- torch.save(model.state_dict(), 'final_model.pth')
227
-
228
- # Scheduler for automatic retraining
229
- scheduler = BackgroundScheduler()
230
- scheduler.add_job(retrain_model, 'interval', hours=1)
231
- scheduler.start()
232
-
233
- # Gradio interface
234
- iface = gr.Interface(
235
- fn=display_strategies,
236
- inputs=None,
237
- outputs=[
238
- gr.Textbox(label="Strategy Predictions"),
239
- gr.Plot(label="Weekly Expiry Predictions"),
240
- gr.Plot(label="Monthly Expiry Predictions")
241
- ],
242
- title="Advanced BankNifty Option Chain Strategy Generator",
243
- description="This model predicts close price, volume, and open interest for weekly and monthly expiries."
244
- )
245
-
246
- iface.launch()
 
 
 
 
 
 
 
 
 
 
15
  from sklearn.metrics import mean_squared_error
16
  import matplotlib.pyplot as plt
17
  import seaborn as sns
18
+ import logging
19
+
20
+ # Set up logging
21
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
 
23
  # Load and preprocess data
24
+ try:
25
+ data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
26
+ scaler = StandardScaler()
27
+ scaled_data = scaler.fit_transform(data[['open', 'high', 'low', 'close', 'volume', 'oi']])
28
+ data[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaled_data
29
+ joblib.dump(scaler, 'scaler.gz')
30
+ logging.info(f"Data loaded and preprocessed. Total data points: {len(data)}")
31
+ except Exception as e:
32
+ logging.error(f"Error in data loading and preprocessing: {str(e)}")
33
+ raise
34
 
35
  class BankNiftyDataset(Dataset):
36
  def __init__(self, data, seq_len, expiry_type, target_cols=['close']):
 
43
  self.filtered_data = data[data['Expiry'].str.contains("W")]
44
  elif self.expiry_type == "monthly":
45
  self.filtered_data = data[~data['Expiry'].str.contains("W")]
46
+ else:
47
+ self.filtered_data = data
48
+
49
+ if len(self.filtered_data) < self.seq_len:
50
+ raise ValueError(f"Not enough data points for the specified sequence length. "
51
+ f"Got {len(self.filtered_data)} data points, need at least {self.seq_len}.")
52
+
53
+ logging.info(f"{expiry_type.capitalize()} dataset created with {len(self.filtered_data)} data points")
54
 
55
  def __len__(self):
56
+ return max(0, len(self.filtered_data) - self.seq_len + 1)
57
 
58
  def __getitem__(self, idx):
59
+ if idx < 0 or idx >= len(self):
60
+ raise IndexError("Index out of range")
61
+
62
  seq_data = self.filtered_data.iloc[idx:idx+self.seq_len]
63
  features = torch.tensor(seq_data[['open', 'high', 'low', 'close', 'volume', 'oi']].values, dtype=torch.float32)
64
  label = torch.tensor(seq_data[self.target_cols].iloc[-1].values, dtype=torch.float32)
 
70
  self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
71
  self.gru = nn.GRU(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
72
 
 
73
  transformer_dim = (input_dim // nhead) * nhead
74
  self.input_proj = nn.Linear(input_dim, transformer_dim) if input_dim != transformer_dim else nn.Identity()
75
 
 
89
  lstm_out, _ = self.lstm(x)
90
  gru_out, _ = self.gru(x)
91
 
 
92
  transformer_input = self.input_proj(x)
93
  transformer_out = self.transformer(transformer_input.transpose(0, 1)).transpose(0, 1)
94
 
 
98
  return out
99
 
100
  def objective(trial):
101
+ try:
102
+ input_dim = 6
103
+ hidden_dim = trial.suggest_int("hidden_dim", 64, 256)
104
+ output_dim = len(target_cols)
105
+ num_layers = trial.suggest_int("num_layers", 1, 4)
106
+
107
+ max_nhead = min(8, hidden_dim // 8)
108
+ nhead = trial.suggest_int("nhead", 2, max_nhead)
109
+ hidden_dim = (hidden_dim // nhead) * nhead
110
+
111
+ dropout = trial.suggest_float("dropout", 0.1, 0.5)
112
+ lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
113
+
114
+ model = AdvancedModel(input_dim, hidden_dim, output_dim, num_layers, nhead, dropout)
115
+ optimizer = optim.Adam(model.parameters(), lr=lr)
116
+ criterion = nn.MSELoss()
117
+
118
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
119
+ val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
120
+
121
+ for epoch in range(10):
122
+ train_model(model, optimizer, criterion, train_loader)
123
+ val_loss = evaluate_model(model, criterion, val_loader)
124
+
125
+ return val_loss
126
+ except Exception as e:
127
+ logging.error(f"Error in objective function: {str(e)}")
128
+ return float('inf')
129
 
130
  def train_model(model, optimizer, criterion, train_loader):
131
  model.train()
 
161
  return predictions
162
 
163
  def retrain_model():
164
+ try:
165
+ new_data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
166
+ new_scaled_data = scaler.transform(new_data[['open', 'high', 'low', 'close', 'volume', 'oi']])
167
+ new_data[['open', 'high', 'low', 'close', 'volume', 'oi']] = new_scaled_data
168
 
169
+ new_train_data, new_val_data = train_test_split(new_data, test_size=0.2, random_state=42)
170
+ new_train_dataset = BankNiftyDataset(new_train_data, seq_len, "weekly", target_cols)
171
+ new_val_dataset = BankNiftyDataset(new_val_data, seq_len, "weekly", target_cols)
172
 
173
+ new_train_loader = DataLoader(new_train_dataset, batch_size=32, shuffle=True)
174
+ new_val_loader = DataLoader(new_val_dataset, batch_size=32, shuffle=False)
175
 
176
+ train_model(model, optimizer, criterion, new_train_loader)
177
+ val_loss = evaluate_model(model, criterion, new_val_loader)
178
+ logging.info(f'Validation Loss after retraining: {val_loss:.4f}')
179
 
180
+ torch.save(model.state_dict(), 'retrained_model.pth')
181
+ except Exception as e:
182
+ logging.error(f"Error in retraining model: {str(e)}")
183
 
184
  def plot_predictions(predictions, actual_values, title):
185
  plt.figure(figsize=(12, 6))
 
192
  return plt
193
 
194
  def display_strategies():
195
+ try:
196
+ weekly_predictions = generate_strategy(model, "weekly")
197
+ monthly_predictions = generate_strategy(model, "monthly")
198
+
199
+ weekly_actual = data[data['Expiry'].str.contains("W")][target_cols].values[-len(weekly_predictions):]
200
+ monthly_actual = data[~data['Expiry'].str.contains("W")][target_cols].values[-len(monthly_predictions):]
201
+
202
+ weekly_plot = plot_predictions(weekly_predictions, weekly_actual, "Weekly Expiry Predictions vs Actual")
203
+ monthly_plot = plot_predictions(monthly_predictions, monthly_actual, "Monthly Expiry Predictions vs Actual")
204
+
205
+ weekly_mse = mean_squared_error(weekly_actual, weekly_predictions)
206
+ monthly_mse = mean_squared_error(monthly_actual, monthly_predictions)
207
+
208
+ return (
209
+ f"Weekly Expiry Strategy Predictions (MSE: {weekly_mse:.4f}):\n{weekly_predictions}\n\n"
210
+ f"Monthly Expiry Strategy Predictions (MSE: {monthly_mse:.4f}):\n{monthly_predictions}",
211
+ weekly_plot,
212
+ monthly_plot
213
+ )
214
+ except Exception as e:
215
+ logging.error(f"Error in displaying strategies: {str(e)}")
216
+ return "An error occurred while generating strategies.", None, None
217
+
218
+ # Main execution
219
+ if __name__ == "__main__":
220
+ try:
221
+ target_cols = ['close', 'volume', 'oi']
222
+ seq_len = 20
223
+
224
+ logging.info(f"Total data points: {len(data)}")
225
+ logging.info(f"Weekly data points: {len(data[data['Expiry'].str.contains('W')])}")
226
+ logging.info(f"Monthly data points: {len(data[~data['Expiry'].str.contains('W')])}")
227
+
228
+ train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
229
+ logging.info(f"Train data points: {len(train_data)}")
230
+ logging.info(f"Validation data points: {len(val_data)}")
231
+
232
+ train_dataset = BankNiftyDataset(train_data, seq_len, "weekly", target_cols)
233
+ val_dataset = BankNiftyDataset(val_data, seq_len, "weekly", target_cols)
234
+
235
+ study = optuna.create_study(direction="minimize")
236
+ study.optimize(objective, n_trials=50)
237
+
238
+ best_params = study.best_params
239
+ logging.info(f"Best hyperparameters: {best_params}")
240
+
241
+ input_dim = 6
242
+ output_dim = len(target_cols)
243
+ model = AdvancedModel(input_dim, best_params['hidden_dim'], output_dim, best_params['num_layers'], best_params['nhead'], best_params['dropout'])
244
+ optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])
245
+ criterion = nn.MSELoss()
246
+
247
+ scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
248
+
249
+ num_epochs = 100
250
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
251
+ val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
252
+
253
+ for epoch in range(num_epochs):
254
+ train_model(model, optimizer, criterion, train_loader)
255
+ val_loss = evaluate_model(model, criterion, val_loader)
256
+ scheduler.step(val_loss)
257
+ logging.info(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}")
258
+
259
+ torch.save(model.state_dict(), 'final_model.pth')
260
+
261
+ retraining_scheduler = BackgroundScheduler()
262
+ retraining_scheduler.add_job(retrain_model, 'interval', hours=1)
263
+ retraining_scheduler.start()
264
+
265
+ iface = gr.Interface(
266
+ fn=display_strategies,
267
+ inputs=None,
268
+ outputs=[
269
+ gr.Textbox(label="Strategy Predictions"),
270
+ gr.Plot(label="Weekly Expiry Predictions"),
271
+ gr.Plot(label="Monthly Expiry Predictions")
272
+ ],
273
+ title="Advanced BankNifty Option Chain Strategy Generator",
274
+ description="This model predicts close price, volume, and open interest for weekly and monthly expiries."
275
+ )
276
+
277
+ iface.launch()
278
+ except Exception as e:
279
+ logging.error(f"Error in main execution: {str(e)}")