robertselvam commited on
Commit
5bfe37b
·
verified ·
1 Parent(s): 275022b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -64
app.py CHANGED
@@ -83,19 +83,18 @@ class DemandForecasting:
83
  return None
84
 
85
 
86
- def find_date_column(self, df_data: pd.DataFrame, list_columns: list) -> str:
87
  """
88
- Find the column containing date information from the list of columns.
89
 
90
  Args:
91
  - df_data (pd.DataFrame): Input DataFrame.
92
- - list_columns (list): List of column names to search for date information.
93
 
94
  Returns:
95
- - str: Name of the column containing date information.
96
  """
97
- for column in list_columns:
98
- # Check if the column contains date-like values
99
  try:
100
  pd.to_datetime(df_data[column])
101
  return column
@@ -105,61 +104,53 @@ class DemandForecasting:
105
  # Return None if no date column is found
106
  return None
107
 
108
- def preprocess_data(self, df_data: pd.DataFrame, list_columns, target_column) -> pd.DataFrame:
109
  """
110
- Preprocess the input DataFrame.
111
 
112
  Args:
113
- - df_data (pd.DataFrame): Input DataFrame to preprocess.
 
 
114
 
115
  Returns:
116
- - pd.DataFrame: Preprocessed DataFrame.
117
  """
118
- try:
119
- print(type(list_columns))
120
- # Make a copy of the input DataFrame to avoid modifying the original data
121
- df_data = df_data.copy()
122
-
123
- list_columns.append(target_column)
124
-
125
- # Drop columns not in list_columns
126
- columns_to_drop = [col for col in df_data.columns if col not in list_columns]
127
- df_data.drop(columns=columns_to_drop, inplace=True)
128
 
129
- # Find the date column
130
- date_column = self.find_date_column(df_data, list_columns)
131
- if date_column is None:
132
- raise ValueError("No date column found in the provided list of columns.")
133
 
 
 
 
134
 
 
 
135
 
136
- # Parse date information
137
- df_data[date_column] = pd.to_datetime(df_data[date_column]) # Convert 'date' column to datetime format
138
- df_data['day'] = df_data[date_column].dt.day # Extract day of the month
139
- df_data['month'] = df_data[date_column].dt.month # Extract month
140
- df_data['year'] = df_data[date_column].dt.year # Extract year
 
 
 
 
141
 
142
  # Cyclical Encoding for Months
143
- df_data['month_sin'] = np.sin(2 * np.pi * df_data['month'] / 12) # Cyclical sine encoding for month
144
- df_data['month_cos'] = np.cos(2 * np.pi * df_data['month'] / 12) # Cyclical cosine encoding for month
145
 
146
  # Day of the Week
147
- df_data['day_of_week'] = df_data[date_column].dt.weekday # Extract day of the week (0 = Monday, 6 = Sunday)
148
 
149
  # Week of the Year
150
- df_data['week_of_year'] = df_data[date_column].dt.isocalendar().week.astype(int) # Extract week of the year as integer
151
 
152
- df_data.drop(columns=[date_column], inplace=True)
153
 
154
- print("df_data", df_data)
155
- return df_data
156
-
157
- except Exception as e:
158
- # Log an error message if an exception occurs during data preprocessing
159
- LOGGER.error(f"Error preprocessing data: {e}")
160
-
161
- # Return None in case of an error
162
- return None
163
 
164
  def train_model(self, train: pd.DataFrame, target_column, list_columns) -> tuple:
165
  """
@@ -215,24 +206,48 @@ class DemandForecasting:
215
  # Return None for all outputs in case of an error
216
  return None, None, None
217
 
218
- def plot_evaluation_interactive(self, y_true: np.ndarray, y_pred: np.ndarray, title: str) -> None:
219
- """
220
- Plot interactive evaluation using Plotly.
221
 
222
- Args:
223
- - y_true (np.ndarray): True values.
224
- - y_pred (np.ndarray): Predicted values.
225
- - title (str): Title of the plot.
226
- """
227
- try:
228
- # Create a scatter plot using Plotly
229
- fig = px.scatter(x=y_true, y=y_pred, labels={'x': 'True Values', 'y': 'Predictions'}, title=title, color_discrete_map={'': 'purple'})
230
- fig.show()
231
- return fig
232
 
233
- except Exception as e:
234
- # Log an error message if an exception occurs during plot generation
235
- LOGGER.error(f"Error plotting evaluation: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
 
238
  def predict_sales_for_date(self, input_data, model: xgb.Booster) -> float:
@@ -356,10 +371,6 @@ class DemandForecasting:
356
  # Split the string by comma and convert it into a list
357
  column_list = column_list.split(",")
358
 
359
- print("train_csv_path", train_csv_path)
360
- print("audio_path", audio_path)
361
- print("column_list", column_list)
362
- print("target_column", target_column)
363
 
364
  text = self.audio_to_text(audio_path)
365
 
@@ -375,7 +386,9 @@ class DemandForecasting:
375
  trained_model, y_val, y_val_pred, validation = self.train_model(train_df, target_column, column_list)
376
 
377
  # Plot interactive evaluation for training
378
- plot = self.plot_evaluation_interactive(y_val, y_val_pred, title='Validation Set Evaluation')
 
 
379
 
380
  # Predict sales for the specified date using the trained model
381
  predicted_value = self.predict_sales_for_date(input_data, trained_model)
 
83
  return None
84
 
85
 
86
+ def find_date_column(self, df_data: pd.DataFrame) -> str:
87
  """
88
+ Find the column containing date-type values from the DataFrame.
89
 
90
  Args:
91
  - df_data (pd.DataFrame): Input DataFrame.
 
92
 
93
  Returns:
94
+ - str: Name of the column containing date-type values.
95
  """
96
+ for column in df_data.columns:
97
+ # Check if the column can be converted to datetime
98
  try:
99
  pd.to_datetime(df_data[column])
100
  return column
 
104
  # Return None if no date column is found
105
  return None
106
 
107
+ def preprocess_data(self, df_data: pd.DataFrame, list_columns: list, target_column: str) -> pd.DataFrame:
108
  """
109
+ Transform date-related data in the DataFrame.
110
 
111
  Args:
112
+ - df_data (pd.DataFrame): Input DataFrame.
113
+ - list_columns (list): List of column names to retain.
114
+ - target_column (str): Name of the target column.
115
 
116
  Returns:
117
+ - pd.DataFrame: Transformed DataFrame.
118
  """
119
+ # Make a copy of the input DataFrame to avoid modifying the original data
120
+ df_data = df_data.copy()
 
 
 
 
 
 
 
 
121
 
122
+ list_columns.append(target_column)
 
 
 
123
 
124
+ # Drop columns not in list_columns
125
+ columns_to_drop = [col for col in df_data.columns if col not in list_columns]
126
+ df_data.drop(columns=columns_to_drop, inplace=True)
127
 
128
+ # Find the date column
129
+ date_column = self.find_date_column(df_data)
130
 
131
+ if date_column is None:
132
+ raise ValueError("No date column found in the provided list of columns.")
133
+ else:
134
+ print("date_column", date_column)
135
+ # Parse date information only if a valid date column is found
136
+ df_data[date_column] = pd.to_datetime(df_data[date_column]) # Convert 'date' column to datetime format
137
+ df_data['day'] = df_data[date_column].dt.day # Extract day of the month
138
+ df_data['month'] = df_data[date_column].dt.month # Extract month
139
+ df_data['year'] = df_data[date_column].dt.year # Extract year
140
 
141
  # Cyclical Encoding for Months
142
+ df_data['month_sin'] = np.sin(2 * np.pi * df_data['month'] / 12) # Cyclical sine encoding for month
143
+ df_data['month_cos'] = np.cos(2 * np.pi * df_data['month'] / 12) # Cyclical cosine encoding for month
144
 
145
  # Day of the Week
146
+ df_data['day_of_week'] = df_data[date_column].dt.weekday # Extract day of the week (0 = Monday, 6 = Sunday)
147
 
148
  # Week of the Year
149
+ df_data['week_of_year'] = df_data[date_column].dt.isocalendar().week.astype(int) # Extract week of the year as integer
150
 
151
+ df_data.drop(columns=[date_column], axis=1, inplace=True) # Drop the original date column
152
 
153
+ return df_data
 
 
 
 
 
 
 
 
154
 
155
  def train_model(self, train: pd.DataFrame, target_column, list_columns) -> tuple:
156
  """
 
206
  # Return None for all outputs in case of an error
207
  return None, None, None
208
 
209
+ def plot_line_graph(self, y_val, y_val_pred):
 
 
210
 
211
+ # Take only the first 1000 data points
212
+ num_data_points = 1000
213
+ y_val = y_val[:num_data_points]
214
+ y_val_pred = y_val_pred[:num_data_points]
 
 
 
 
 
 
215
 
216
+ # Create Plotly figure
217
+ fig = make_subplots(rows=1, cols=1)
218
+
219
+ # Add actual vs predicted traces to the figure (line plot)
220
+ fig.add_trace(go.Scatter(x=np.arange(len(y_val)), y=y_val, mode='lines', name='Actual'), row=1, col=1)
221
+ fig.add_trace(go.Scatter(x=np.arange(len(y_val)), y=y_val_pred, mode='lines', name='Predicted'), row=1, col=1)
222
+
223
+ # Update layout
224
+ fig.update_layout(title='Actual vs Predicted Over Time', xaxis_title='Time', yaxis_title='Value')
225
+
226
+ # Show interactive plot
227
+ fig.show()
228
+ return fig
229
+
230
+ def plot_scatter_plot(self, y_val, y_val_pred):
231
+
232
+ # Take only the first 1000 data points
233
+ num_data_points = 1000
234
+ y_val = y_val[:num_data_points]
235
+ y_val_pred = y_val_pred[:num_data_points]
236
+
237
+ # Create Plotly figure
238
+ fig = make_subplots(rows=1, cols=1)
239
+
240
+ # Add scatter plots for actual vs predicted (scatter plot)
241
+ fig.add_trace(go.Scatter(x=np.arange(len(y_val)), y=y_val, mode='markers', name='Actual', marker=dict(color='blue', size=8)), row=1, col=1)
242
+ fig.add_trace(go.Scatter(x=np.arange(len(y_val)), y=y_val_pred, mode='markers', name='Predicted', marker=dict(color='orange', size=8)), row=1, col=1)
243
+
244
+ # Update layout
245
+ fig.update_layout(title='Actual vs Predicted Over Time (Scatter Plot)', xaxis_title='Time', yaxis_title='Value')
246
+
247
+ # Show interactive plot
248
+ fig.show()
249
+
250
+ return fig
251
 
252
 
253
  def predict_sales_for_date(self, input_data, model: xgb.Booster) -> float:
 
371
  # Split the string by comma and convert it into a list
372
  column_list = column_list.split(",")
373
 
 
 
 
 
374
 
375
  text = self.audio_to_text(audio_path)
376
 
 
386
  trained_model, y_val, y_val_pred, validation = self.train_model(train_df, target_column, column_list)
387
 
388
  # Plot interactive evaluation for training
389
+ line_graph = self.plot_line_graph(y_val, y_val_pred)
390
+
391
+ scatter_plot = self.plot_scatter_plot(y_val, y_val_pred)
392
 
393
  # Predict sales for the specified date using the trained model
394
  predicted_value = self.predict_sales_for_date(input_data, trained_model)