Spaces:

nikhilsingh
/

monte-carlo-simulation

Running

App Files Files Community

nikhilsingh commited on 10 days ago

Commit

89edfc8

verified ·

1 Parent(s): ef2037b

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -51

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 # ----------------------------------------------------------------------------
 # Import necessary libraries
 # ----------------------------------------------------------------------------
@@ -25,6 +27,7 @@ plt.switch_backend('Agg')
 # We use a small, efficient model to ensure the app runs smoothly.
 try:
     explanation_generator = pipeline('text2text-generation', model='google/flan-t5-small')
 except Exception as e:
     print(f"Could not load Hugging Face model. Explanations will be disabled. Error: {e}")
     explanation_generator = None
@@ -42,6 +45,15 @@ sample_project_costs.to_csv(SAMPLE_CSV_PATH, index=False)
 # Core Logic Functions
 # ----------------------------------------------------------------------------
 def process_input_data(file_obj, example_choice, manual_mean, manual_std):
     """
     Processes the user's input from the UI.
@@ -71,21 +83,18 @@ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
             source_info = f"from uploaded file: {os.path.basename(file_obj.name)}"
             data = df
         except Exception as e:
-            return None, None, None, f"Error reading file: {e}. Please ensure it's a valid CSV."
-    elif example_choice == "Project Cost Estimation":
         df = pd.read_csv(SAMPLE_CSV_PATH)
         source_info = "from the 'Project Cost Estimation' example"
         data = df
     elif manual_mean is not None and manual_std is not None:
-         # If manual input, we don't have raw data, just parameters.
-         # We'll return these params to be used directly in the simulation.
          if manual_std <= 0:
-             return None, None, None, "Manual Input Error: Standard Deviation must be positive."
          stats_text = (f"Source: Manual Input\n"
                        f"Mean: {manual_mean:.2f}\n"
                        f"Standard Deviation: {manual_std:.2f}")
-         # Create a dummy plot for manual input
          fig, ax = plt.subplots()
          ax.text(0.5, 0.5, 'Manual input:\nNo data to plot.\nSimulation will use\nthe provided Mean/Std.',
                  ha='center', va='center', fontsize=12)
@@ -93,19 +102,18 @@ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
          ax.set_yticks([])
          plt.tight_layout()
-         # Use a special DataFrame to signal manual input downstream
          manual_df = pd.DataFrame({'mean': [manual_mean], 'std': [manual_std]})
          return manual_df, fig, stats_text, "Manual parameters accepted. Ready to run simulation."
     if data is None:
-        return None, None, None, "No data source provided. Please upload a file, choose an example, or enter parameters."
     # 2. Validate data structure
     if data.shape[1] != 1 or not pd.api.types.is_numeric_dtype(data.iloc[:, 0]):
         error_msg = (f"Data Error: The data {source_info} is not compatible. "
                      "The app requires a CSV with a single column of numerical data. "
                      f"Detected {data.shape[1]} columns.")
-        return None, None, None, error_msg
     # 3. Process valid data
     series = data.iloc[:, 0].dropna()
@@ -113,13 +121,13 @@ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
     std = series.std()
     if std == 0:
-        return None, None, None, "Data Error: All values are the same. Standard deviation is zero, cannot simulate uncertainty."
     # 4. Generate visualization and stats
     fig, ax = plt.subplots(figsize=(6, 4))
     ax.hist(series, bins='auto', density=True, alpha=0.7, label='Input Data Distribution')
-    # Overlay a normal distribution curve
     xmin, xmax = plt.xlim()
     x = np.linspace(xmin, xmax, 100)
     p = norm.pdf(x, mean, std)
@@ -147,49 +155,34 @@ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
 def run_monte_carlo_simulation(data, num_simulations, target_value):
     """
     Performs the Monte Carlo simulation based on the processed data.
-    Args:
-        data (pd.DataFrame): The validated input data.
-        num_simulations (int): The number of simulation iterations to run.
-        target_value (float): A user-defined target to calculate probability against.
-    Returns:
-        tuple: A tuple containing:
-               - A Matplotlib figure of the simulation results histogram.
-               - A Matplotlib figure of the cumulative distribution (CDF).
-               - A string containing detailed numerical results.
     """
     if data is None:
-        return None, None, "Please process valid data before running the simulation."
     num_simulations = int(num_simulations)
-    # Check if data is from manual input or from a file/example
     if 'mean' in data.columns and 'std' in data.columns and data.shape[0] == 1:
         mean = data['mean'].iloc[0]
         std = data['std'].iloc[0]
-        data_name = "Value" # Generic name for manual input
     else:
         series = data.iloc[:, 0]
         mean = series.mean()
         std = series.std()
         data_name = series.name
-    # The core of the Monte Carlo simulation: generate random samples
-    # We assume the underlying uncertainty follows a Normal Distribution
-    # defined by the mean and standard deviation of the input data.
     simulation_results = np.random.normal(mean, std, num_simulations)
-    # --- Generate Results Histogram Plot ---
     fig_hist, ax_hist = plt.subplots(figsize=(8, 5))
     ax_hist.hist(simulation_results, bins=50, density=True, alpha=0.8, color='skyblue', edgecolor='black')
-    # Calculate key statistics for plotting
     sim_mean = np.mean(simulation_results)
     p5 = np.percentile(simulation_results, 5)
     p95 = np.percentile(simulation_results, 95)
-    # Add vertical lines for key statistics
     ax_hist.axvline(sim_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {sim_mean:.2f}')
     ax_hist.axvline(p5, color='green', linestyle=':', linewidth=2, label=f'5th Percentile (P5): {p5:.2f}')
     ax_hist.axvline(p95, color='green', linestyle=':', linewidth=2, label=f'95th Percentile (P95): {p95:.2f}')
@@ -201,13 +194,11 @@ def run_monte_carlo_simulation(data, num_simulations, target_value):
     ax_hist.grid(True, linestyle='--', alpha=0.6)
     plt.tight_layout()
-    # --- Generate Cumulative Distribution (CDF) Plot ---
     fig_cdf, ax_cdf = plt.subplots(figsize=(8, 5))
     sorted_results = np.sort(simulation_results)
     yvals = np.arange(len(sorted_results)) / float(len(sorted_results) - 1)
     ax_cdf.plot(sorted_results, yvals, label='CDF')
-    # Add markers for P5, P50, P95
     p50 = np.percentile(simulation_results, 50)
     ax_cdf.plot(p5, 0.05, 'go', ms=8, label=f'P5: {p5:.2f}')
     ax_cdf.plot(p50, 0.50, 'ro', ms=8, label=f'Median (P50): {p50:.2f}')
@@ -220,7 +211,6 @@ def run_monte_carlo_simulation(data, num_simulations, target_value):
     ax_cdf.legend()
     plt.tight_layout()
-    # --- Calculate Final Numerical Results ---
     prob_achieved = 0
     if target_value is not None:
         prob_achieved = np.sum(simulation_results <= target_value) / num_simulations * 100
@@ -248,17 +238,12 @@ def run_monte_carlo_simulation(data, num_simulations, target_value):
 def generate_explanation(results_summary):
     """
     Uses a Hugging Face model to explain the simulation results in simple terms.
-    Args:
-        results_summary (str): The numerical summary from the simulation.
-    Returns:
-        str: A generated explanation of the results.
     """
     if explanation_generator is None:
         return "LLM model not loaded. Cannot generate explanation."
-    if not results_summary or "Please process valid data" in results_summary:
-        return "Run a successful simulation first to generate an explanation."
     prompt = f"""
     Explain the following Monte Carlo simulation results to a non-technical manager.
@@ -297,9 +282,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
     # --- Row 1: Data Input and Preparation ---
     with gr.Row():
-        # --- Column 1.1: Data Collection ---
         with gr.Column(scale=1):
-            with gr.Box():
                 gr.Markdown("### 1. Data Collection")
                 gr.Markdown("Choose **one** method below.")
@@ -317,9 +301,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
                 prepare_button = gr.Button("Prepare Simulation", variant="secondary")
-        # --- Column 1.2: Preparation Plots & Visualization ---
         with gr.Column(scale=2):
-            with gr.Box():
                 gr.Markdown("### 2. Preparation & Visualization")
                 validation_output = gr.Textbox(label="Validation Status", interactive=False, lines=3)
                 input_stats_output = gr.Textbox(label="Input Data Statistics", interactive=False, lines=6)
@@ -327,7 +310,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
     # --- Row 2: Simulation Controls and Results ---
     with gr.Row():
-        with gr.Box():
             gr.Markdown("### 3. Simulation Run & Results")
             with gr.Row():
                 with gr.Column(scale=1, min_width=250):
@@ -353,7 +336,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
     # --- Row 3: AI-Powered Explanation ---
     with gr.Row():
-        with gr.Box():
             gr.Markdown("### 4. AI-Powered Explanation")
             explain_button = gr.Button("Explain the Takeaways", variant="secondary")
             explanation_output = gr.Textbox(
@@ -367,7 +350,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
     # Define UI Component Interactions
     # ----------------------------------------------------------------------------
-    # Hidden state to store the processed data between steps
     processed_data_state = gr.State()
     prepare_button.click(
@@ -392,6 +374,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
 # Launch the Gradio App
 # ----------------------------------------------------------------------------
 if __name__ == "__main__":
-    # To run this app, save the code as a Python file (e.g., main.py)
-    # and run `python main.py` from your terminal.
     app.launch(debug=True)

+# main.py
 # ----------------------------------------------------------------------------
 # Import necessary libraries
 # ----------------------------------------------------------------------------
 # We use a small, efficient model to ensure the app runs smoothly.
 try:
     explanation_generator = pipeline('text2text-generation', model='google/flan-t5-small')
+    print("Hugging Face model loaded successfully.")
 except Exception as e:
     print(f"Could not load Hugging Face model. Explanations will be disabled. Error: {e}")
     explanation_generator = None
 # Core Logic Functions
 # ----------------------------------------------------------------------------
+def create_error_plot(message):
+    """Creates a matplotlib plot with a specified error message."""
+    fig, ax = plt.subplots(figsize=(8, 5))
+    ax.text(0.5, 0.5, message, ha='center', va='center', wrap=True, color='red', fontsize=12)
+    ax.set_xticks([])
+    ax.set_yticks([])
+    plt.tight_layout()
+    return fig
 def process_input_data(file_obj, example_choice, manual_mean, manual_std):
     """
     Processes the user's input from the UI.
             source_info = f"from uploaded file: {os.path.basename(file_obj.name)}"
             data = df
         except Exception as e:
+            return None, create_error_plot(f"Error reading file: {e}"), None, f"Error reading file: {e}. Please ensure it's a valid CSV."
+    elif example_choice and example_choice == "Project Cost Estimation":
         df = pd.read_csv(SAMPLE_CSV_PATH)
         source_info = "from the 'Project Cost Estimation' example"
         data = df
     elif manual_mean is not None and manual_std is not None:
          if manual_std <= 0:
+             return None, create_error_plot("Standard Deviation must be positive."), None, "Manual Input Error: Standard Deviation must be positive."
          stats_text = (f"Source: Manual Input\n"
                        f"Mean: {manual_mean:.2f}\n"
                        f"Standard Deviation: {manual_std:.2f}")
          fig, ax = plt.subplots()
          ax.text(0.5, 0.5, 'Manual input:\nNo data to plot.\nSimulation will use\nthe provided Mean/Std.',
                  ha='center', va='center', fontsize=12)
          ax.set_yticks([])
          plt.tight_layout()
          manual_df = pd.DataFrame({'mean': [manual_mean], 'std': [manual_std]})
          return manual_df, fig, stats_text, "Manual parameters accepted. Ready to run simulation."
     if data is None:
+        return None, create_error_plot("No data source provided."), None, "No data source provided. Please upload a file, choose an example, or enter parameters."
     # 2. Validate data structure
     if data.shape[1] != 1 or not pd.api.types.is_numeric_dtype(data.iloc[:, 0]):
         error_msg = (f"Data Error: The data {source_info} is not compatible. "
                      "The app requires a CSV with a single column of numerical data. "
                      f"Detected {data.shape[1]} columns.")
+        return None, create_error_plot(error_msg), None, error_msg
     # 3. Process valid data
     series = data.iloc[:, 0].dropna()
     std = series.std()
     if std == 0:
+        error_msg = "Data Error: All values are the same. Standard deviation is zero, cannot simulate uncertainty."
+        return None, create_error_plot(error_msg), None, error_msg
     # 4. Generate visualization and stats
     fig, ax = plt.subplots(figsize=(6, 4))
     ax.hist(series, bins='auto', density=True, alpha=0.7, label='Input Data Distribution')
     xmin, xmax = plt.xlim()
     x = np.linspace(xmin, xmax, 100)
     p = norm.pdf(x, mean, std)
 def run_monte_carlo_simulation(data, num_simulations, target_value):
     """
     Performs the Monte Carlo simulation based on the processed data.
     """
+    # **NEW**: Check for valid data at the beginning and return clear error plots if invalid.
     if data is None:
+        error_message = "ERROR: No valid data available.\nPlease go to Step 1 & 2 and click 'Prepare Simulation' first."
+        error_plot = create_error_plot(error_message)
+        return error_plot, error_plot, "Simulation failed. See plot for details."
     num_simulations = int(num_simulations)
     if 'mean' in data.columns and 'std' in data.columns and data.shape[0] == 1:
         mean = data['mean'].iloc[0]
         std = data['std'].iloc[0]
+        data_name = "Value"
     else:
         series = data.iloc[:, 0]
         mean = series.mean()
         std = series.std()
         data_name = series.name
     simulation_results = np.random.normal(mean, std, num_simulations)
     fig_hist, ax_hist = plt.subplots(figsize=(8, 5))
     ax_hist.hist(simulation_results, bins=50, density=True, alpha=0.8, color='skyblue', edgecolor='black')
     sim_mean = np.mean(simulation_results)
     p5 = np.percentile(simulation_results, 5)
     p95 = np.percentile(simulation_results, 95)
     ax_hist.axvline(sim_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {sim_mean:.2f}')
     ax_hist.axvline(p5, color='green', linestyle=':', linewidth=2, label=f'5th Percentile (P5): {p5:.2f}')
     ax_hist.axvline(p95, color='green', linestyle=':', linewidth=2, label=f'95th Percentile (P95): {p95:.2f}')
     ax_hist.grid(True, linestyle='--', alpha=0.6)
     plt.tight_layout()
     fig_cdf, ax_cdf = plt.subplots(figsize=(8, 5))
     sorted_results = np.sort(simulation_results)
     yvals = np.arange(len(sorted_results)) / float(len(sorted_results) - 1)
     ax_cdf.plot(sorted_results, yvals, label='CDF')
     p50 = np.percentile(simulation_results, 50)
     ax_cdf.plot(p5, 0.05, 'go', ms=8, label=f'P5: {p5:.2f}')
     ax_cdf.plot(p50, 0.50, 'ro', ms=8, label=f'Median (P50): {p50:.2f}')
     ax_cdf.legend()
     plt.tight_layout()
     prob_achieved = 0
     if target_value is not None:
         prob_achieved = np.sum(simulation_results <= target_value) / num_simulations * 100
 def generate_explanation(results_summary):
     """
     Uses a Hugging Face model to explain the simulation results in simple terms.
     """
     if explanation_generator is None:
         return "LLM model not loaded. Cannot generate explanation."
+    # **NEW**: More robust check for failed simulation runs.
+    if not results_summary or "Please process valid data" in results_summary or "Simulation failed" in results_summary:
+        return "Could not generate explanation. Please run a successful simulation first."
     prompt = f"""
     Explain the following Monte Carlo simulation results to a non-technical manager.
     # --- Row 1: Data Input and Preparation ---
     with gr.Row():
         with gr.Column(scale=1):
+            with gr.Group():
                 gr.Markdown("### 1. Data Collection")
                 gr.Markdown("Choose **one** method below.")
                 prepare_button = gr.Button("Prepare Simulation", variant="secondary")
         with gr.Column(scale=2):
+            with gr.Group():
                 gr.Markdown("### 2. Preparation & Visualization")
                 validation_output = gr.Textbox(label="Validation Status", interactive=False, lines=3)
                 input_stats_output = gr.Textbox(label="Input Data Statistics", interactive=False, lines=6)
     # --- Row 2: Simulation Controls and Results ---
     with gr.Row():
+        with gr.Group():
             gr.Markdown("### 3. Simulation Run & Results")
             with gr.Row():
                 with gr.Column(scale=1, min_width=250):
     # --- Row 3: AI-Powered Explanation ---
     with gr.Row():
+        with gr.Group():
             gr.Markdown("### 4. AI-Powered Explanation")
             explain_button = gr.Button("Explain the Takeaways", variant="secondary")
             explanation_output = gr.Textbox(
     # Define UI Component Interactions
     # ----------------------------------------------------------------------------
     processed_data_state = gr.State()
     prepare_button.click(
 # Launch the Gradio App
 # ----------------------------------------------------------------------------
 if __name__ == "__main__":
     app.launch(debug=True)