demand-forecasting

Sleeping

App Files Files Community

robertselvam commited on May 2, 2024

Commit

5bfe37b

verified ·

1 Parent(s): 275022b

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -64

app.py CHANGED Viewed

@@ -83,19 +83,18 @@ class DemandForecasting:
             return None
-    def find_date_column(self, df_data: pd.DataFrame, list_columns: list) -> str:
         """
-        Find the column containing date information from the list of columns.
         Args:
         - df_data (pd.DataFrame): Input DataFrame.
-        - list_columns (list): List of column names to search for date information.
         Returns:
-        - str: Name of the column containing date information.
         """
-        for column in list_columns:
-            # Check if the column contains date-like values
             try:
                 pd.to_datetime(df_data[column])
                 return column
@@ -105,61 +104,53 @@ class DemandForecasting:
         # Return None if no date column is found
         return None
-    def preprocess_data(self, df_data: pd.DataFrame, list_columns, target_column) -> pd.DataFrame:
         """
-        Preprocess the input DataFrame.
         Args:
-        - df_data (pd.DataFrame): Input DataFrame to preprocess.
         Returns:
-        - pd.DataFrame: Preprocessed DataFrame.
         """
-        try:
-            print(type(list_columns))
-            # Make a copy of the input DataFrame to avoid modifying the original data
-            df_data = df_data.copy()
-            list_columns.append(target_column)
-            # Drop columns not in list_columns
-            columns_to_drop = [col for col in df_data.columns if col not in list_columns]
-            df_data.drop(columns=columns_to_drop, inplace=True)
-            # Find the date column
-            date_column = self.find_date_column(df_data, list_columns)
-            if date_column is None:
-                raise ValueError("No date column found in the provided list of columns.")
-            # Parse date information
-            df_data[date_column] = pd.to_datetime(df_data[date_column])     # Convert 'date' column to datetime format
-            df_data['day'] = df_data[date_column].dt.day        # Extract day of the month
-            df_data['month'] = df_data[date_column].dt.month         # Extract month
-            df_data['year'] = df_data[date_column].dt.year        # Extract year
             # Cyclical Encoding for Months
-            df_data['month_sin'] = np.sin(2 * np.pi * df_data['month'] / 12)   # Cyclical sine encoding for month
-            df_data['month_cos'] = np.cos(2 * np.pi * df_data['month'] / 12)   # Cyclical cosine encoding for month
             # Day of the Week
-            df_data['day_of_week'] = df_data[date_column].dt.weekday      # Extract day of the week (0 = Monday, 6 = Sunday)
             # Week of the Year
-            df_data['week_of_year'] = df_data[date_column].dt.isocalendar().week.astype(int)   # Extract week of the year as integer
-            df_data.drop(columns=[date_column], inplace=True)
-            print("df_data", df_data)
-            return df_data
-        except Exception as e:
-            # Log an error message if an exception occurs during data preprocessing
-            LOGGER.error(f"Error preprocessing data: {e}")
-            # Return None in case of an error
-            return None
     def train_model(self, train: pd.DataFrame, target_column, list_columns) -> tuple:
         """
@@ -215,24 +206,48 @@ class DemandForecasting:
             # Return None for all outputs in case of an error
             return None, None, None
-    def plot_evaluation_interactive(self, y_true: np.ndarray, y_pred: np.ndarray, title: str) -> None:
-        """
-        Plot interactive evaluation using Plotly.
-        Args:
-        - y_true (np.ndarray): True values.
-        - y_pred (np.ndarray): Predicted values.
-        - title (str): Title of the plot.
-        """
-        try:
-            # Create a scatter plot using Plotly
-            fig = px.scatter(x=y_true, y=y_pred, labels={'x': 'True Values', 'y': 'Predictions'}, title=title, color_discrete_map={'': 'purple'})
-            fig.show()
-            return fig
-        except Exception as e:
-            # Log an error message if an exception occurs during plot generation
-            LOGGER.error(f"Error plotting evaluation: {e}")
     def predict_sales_for_date(self, input_data, model: xgb.Booster) -> float:
@@ -356,10 +371,6 @@ class DemandForecasting:
             # Split the string by comma and convert it into a list
             column_list = column_list.split(",")
-            print("train_csv_path", train_csv_path)
-            print("audio_path", audio_path)
-            print("column_list", column_list)
-            print("target_column", target_column)
             text = self.audio_to_text(audio_path)
@@ -375,7 +386,9 @@ class DemandForecasting:
             trained_model, y_val, y_val_pred, validation = self.train_model(train_df, target_column, column_list)
             # Plot interactive evaluation for training
-            plot = self.plot_evaluation_interactive(y_val, y_val_pred, title='Validation Set Evaluation')
             # Predict sales for the specified date using the trained model
             predicted_value = self.predict_sales_for_date(input_data, trained_model)

             return None
+    def find_date_column(self, df_data: pd.DataFrame) -> str:
         """
+        Find the column containing date-type values from the DataFrame.
         Args:
         - df_data (pd.DataFrame): Input DataFrame.
         Returns:
+        - str: Name of the column containing date-type values.
         """
+        for column in df_data.columns:
+            # Check if the column can be converted to datetime
             try:
                 pd.to_datetime(df_data[column])
                 return column
         # Return None if no date column is found
         return None
+    def preprocess_data(self, df_data: pd.DataFrame, list_columns: list, target_column: str) -> pd.DataFrame:
         """
+        Transform date-related data in the DataFrame.
         Args:
+        - df_data (pd.DataFrame): Input DataFrame.
+        - list_columns (list): List of column names to retain.
+        - target_column (str): Name of the target column.
         Returns:
+        - pd.DataFrame: Transformed DataFrame.
         """
+        # Make a copy of the input DataFrame to avoid modifying the original data
+        df_data = df_data.copy()
+        list_columns.append(target_column)
+        # Drop columns not in list_columns
+        columns_to_drop = [col for col in df_data.columns if col not in list_columns]
+        df_data.drop(columns=columns_to_drop, inplace=True)
+        # Find the date column
+        date_column = self.find_date_column(df_data)
+        if date_column is None:
+            raise ValueError("No date column found in the provided list of columns.")
+        else:
+            print("date_column", date_column)
+            # Parse date information only if a valid date column is found
+            df_data[date_column] = pd.to_datetime(df_data[date_column])  # Convert 'date' column to datetime format
+            df_data['day'] = df_data[date_column].dt.day  # Extract day of the month
+            df_data['month'] = df_data[date_column].dt.month  # Extract month
+            df_data['year'] = df_data[date_column].dt.year  # Extract year
             # Cyclical Encoding for Months
+            df_data['month_sin'] = np.sin(2 * np.pi * df_data['month'] / 12)  # Cyclical sine encoding for month
+            df_data['month_cos'] = np.cos(2 * np.pi * df_data['month'] / 12)  # Cyclical cosine encoding for month
             # Day of the Week
+            df_data['day_of_week'] = df_data[date_column].dt.weekday  # Extract day of the week (0 = Monday, 6 = Sunday)
             # Week of the Year
+            df_data['week_of_year'] = df_data[date_column].dt.isocalendar().week.astype(int)  # Extract week of the year as integer
+            df_data.drop(columns=[date_column], axis=1, inplace=True)  # Drop the original date column
+        return df_data
     def train_model(self, train: pd.DataFrame, target_column, list_columns) -> tuple:
         """
             # Return None for all outputs in case of an error
             return None, None, None
+    def plot_line_graph(self, y_val, y_val_pred):
+        # Take only the first 1000 data points
+        num_data_points = 1000
+        y_val = y_val[:num_data_points]
+        y_val_pred = y_val_pred[:num_data_points]
+        # Create Plotly figure
+        fig = make_subplots(rows=1, cols=1)
+        # Add actual vs predicted traces to the figure (line plot)
+        fig.add_trace(go.Scatter(x=np.arange(len(y_val)), y=y_val, mode='lines', name='Actual'), row=1, col=1)
+        fig.add_trace(go.Scatter(x=np.arange(len(y_val)), y=y_val_pred, mode='lines', name='Predicted'), row=1, col=1)
+        # Update layout
+        fig.update_layout(title='Actual vs Predicted Over Time', xaxis_title='Time', yaxis_title='Value')
+        # Show interactive plot
+        fig.show()
+        return fig
+    def plot_scatter_plot(self, y_val, y_val_pred):
+        # Take only the first 1000 data points
+        num_data_points = 1000
+        y_val = y_val[:num_data_points]
+        y_val_pred = y_val_pred[:num_data_points]
+        # Create Plotly figure
+        fig = make_subplots(rows=1, cols=1)
+        # Add scatter plots for actual vs predicted (scatter plot)
+        fig.add_trace(go.Scatter(x=np.arange(len(y_val)), y=y_val, mode='markers', name='Actual', marker=dict(color='blue', size=8)), row=1, col=1)
+        fig.add_trace(go.Scatter(x=np.arange(len(y_val)), y=y_val_pred, mode='markers', name='Predicted', marker=dict(color='orange', size=8)), row=1, col=1)
+        # Update layout
+        fig.update_layout(title='Actual vs Predicted Over Time (Scatter Plot)', xaxis_title='Time', yaxis_title='Value')
+        # Show interactive plot
+        fig.show()
+        return fig
     def predict_sales_for_date(self, input_data, model: xgb.Booster) -> float:
             # Split the string by comma and convert it into a list
             column_list = column_list.split(",")
             text = self.audio_to_text(audio_path)
             trained_model, y_val, y_val_pred, validation = self.train_model(train_df, target_column, column_list)
             # Plot interactive evaluation for training
+            line_graph = self.plot_line_graph(y_val, y_val_pred)
+            scatter_plot = self.plot_scatter_plot(y_val, y_val_pred)
             # Predict sales for the specified date using the trained model
             predicted_value = self.predict_sales_for_date(input_data, trained_model)