# main.py # ---------------------------------------------------------------------------- # Import necessary libraries # ---------------------------------------------------------------------------- # pip install gradio numpy pandas matplotlib scipy transformers torch sentencepiece # ---------------------------------------------------------------------------- import gradio as gr import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy.stats import norm from transformers import pipeline import warnings import os # Suppress warnings for a cleaner output warnings.filterwarnings("ignore") # Set Matplotlib backend to a non-interactive one to avoid display issues in some environments plt.switch_backend('Agg') # ---------------------------------------------------------------------------- # Global Variables and Initial Setup # ---------------------------------------------------------------------------- # Initialize the Hugging Face pipeline for text generation. # We use a small, efficient model to ensure the app runs smoothly. try: explanation_generator = pipeline('text2text-generation', model='google/flan-t5-small') print("Hugging Face model loaded successfully.") except Exception as e: print(f"Could not load Hugging Face model. Explanations will be disabled. Error: {e}") explanation_generator = None # Create a sample dataset for demonstration purposes. # This simulates the uncertain costs (in thousands of $) for different tasks in a project. sample_project_costs = pd.DataFrame({ 'task_cost_thousands': [12, 15, 10, 13, 18, 9, 22, 14, 16, 11, 17, 20] }) SAMPLE_CSV_PATH = 'sample_project_costs.csv' sample_project_costs.to_csv(SAMPLE_CSV_PATH, index=False) # ---------------------------------------------------------------------------- # Core Logic Functions # ---------------------------------------------------------------------------- def create_error_plot(message): """Creates a matplotlib plot with a specified error message.""" fig, ax = plt.subplots(figsize=(8, 5)) ax.text(0.5, 0.5, message, ha='center', va='center', wrap=True, color='red', fontsize=12) ax.set_xticks([]) ax.set_yticks([]) plt.tight_layout() return fig def process_input_data(file_obj, example_choice, manual_mean, manual_std): """ Processes the user's input from the UI. It prioritizes input in the order: File Upload > Example Dataset > Manual Entry. It validates the data to ensure it's a single column of numbers. Args: file_obj (File object): The uploaded file from gr.File. example_choice (str): The name of the chosen example dataset. manual_mean (float): Manually entered mean. manual_std (float): Manually entered standard deviation. Returns: tuple: A tuple containing: - A pandas DataFrame with the processed data. - A Matplotlib figure showing the data distribution. - A string with summary statistics. - A string with a validation message. """ data = None source_info = "" # 1. Prioritize input source if file_obj is not None: try: df = pd.read_csv(file_obj.name) source_info = f"from uploaded file: {os.path.basename(file_obj.name)}" data = df except Exception as e: return None, create_error_plot(f"Error reading file: {e}"), None, f"Error reading file: {e}. Please ensure it's a valid CSV." elif example_choice and example_choice == "Project Cost Estimation": df = pd.read_csv(SAMPLE_CSV_PATH) source_info = "from the 'Project Cost Estimation' example" data = df elif manual_mean is not None and manual_std is not None: if manual_std <= 0: return None, create_error_plot("Standard Deviation must be positive."), None, "Manual Input Error: Standard Deviation must be positive." stats_text = (f"Source: Manual Input\n" f"Mean: {manual_mean:.2f}\n" f"Standard Deviation: {manual_std:.2f}") fig, ax = plt.subplots() ax.text(0.5, 0.5, 'Manual input:\nNo data to plot.\nSimulation will use\nthe provided Mean/Std.', ha='center', va='center', fontsize=12) ax.set_xticks([]) ax.set_yticks([]) plt.tight_layout() manual_df = pd.DataFrame({'mean': [manual_mean], 'std': [manual_std]}) return manual_df, fig, stats_text, "Manual parameters accepted. Ready to run simulation." if data is None: return None, create_error_plot("No data source provided."), None, "No data source provided. Please upload a file, choose an example, or enter parameters." # 2. Validate data structure if data.shape[1] != 1 or not pd.api.types.is_numeric_dtype(data.iloc[:, 0]): error_msg = (f"Data Error: The data {source_info} is not compatible. " "The app requires a CSV with a single column of numerical data. " f"Detected {data.shape[1]} columns.") return None, create_error_plot(error_msg), None, error_msg # 3. Process valid data series = data.iloc[:, 0].dropna() mean = series.mean() std = series.std() if std == 0: error_msg = "Data Error: All values are the same. Standard deviation is zero, cannot simulate uncertainty." return None, create_error_plot(error_msg), None, error_msg # 4. Generate visualization and stats fig, ax = plt.subplots(figsize=(6, 4)) ax.hist(series, bins='auto', density=True, alpha=0.7, label='Input Data Distribution') xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p = norm.pdf(x, mean, std) ax.plot(x, p, 'k', linewidth=2, label='Fitted Normal Curve') ax.set_title(f"Distribution of Input Data") ax.set_xlabel(series.name) ax.set_ylabel("Density") ax.legend() ax.grid(True, linestyle='--', alpha=0.6) plt.tight_layout() stats_text = (f"Source: {source_info}\n" f"Number of Data Points: {len(series)}\n" f"Mean: {mean:.2f}\n" f"Standard Deviation: {std:.2f}\n" f"Min: {series.min():.2f}\n" f"Max: {series.max():.2f}") validation_message = "Data loaded and validated successfully! Ready to run the simulation." return data, fig, stats_text, validation_message def run_monte_carlo_simulation(data, num_simulations, target_value): """ Performs the Monte Carlo simulation based on the processed data. """ # **NEW**: Check for valid data at the beginning and return clear error plots if invalid. if data is None: error_message = "ERROR: No valid data available.\nPlease go to Step 1 & 2 and click 'Prepare Simulation' first." error_plot = create_error_plot(error_message) return error_plot, error_plot, "Simulation failed. See plot for details." num_simulations = int(num_simulations) if 'mean' in data.columns and 'std' in data.columns and data.shape[0] == 1: mean = data['mean'].iloc[0] std = data['std'].iloc[0] data_name = "Value" else: series = data.iloc[:, 0] mean = series.mean() std = series.std() data_name = series.name simulation_results = np.random.normal(mean, std, num_simulations) fig_hist, ax_hist = plt.subplots(figsize=(8, 5)) ax_hist.hist(simulation_results, bins=50, density=True, alpha=0.8, color='skyblue', edgecolor='black') sim_mean = np.mean(simulation_results) p5 = np.percentile(simulation_results, 5) p95 = np.percentile(simulation_results, 95) ax_hist.axvline(sim_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {sim_mean:.2f}') ax_hist.axvline(p5, color='green', linestyle=':', linewidth=2, label=f'5th Percentile (P5): {p5:.2f}') ax_hist.axvline(p95, color='green', linestyle=':', linewidth=2, label=f'95th Percentile (P95): {p95:.2f}') ax_hist.set_title(f'Monte Carlo Simulation Results ({num_simulations:,} Iterations)', fontsize=14) ax_hist.set_xlabel(f'Simulated {data_name}') ax_hist.set_ylabel('Probability Density') ax_hist.legend() ax_hist.grid(True, linestyle='--', alpha=0.6) plt.tight_layout() fig_cdf, ax_cdf = plt.subplots(figsize=(8, 5)) sorted_results = np.sort(simulation_results) yvals = np.arange(len(sorted_results)) / float(len(sorted_results) - 1) ax_cdf.plot(sorted_results, yvals, label='CDF') p50 = np.percentile(simulation_results, 50) ax_cdf.plot(p5, 0.05, 'go', ms=8, label=f'P5: {p5:.2f}') ax_cdf.plot(p50, 0.50, 'ro', ms=8, label=f'Median (P50): {p50:.2f}') ax_cdf.plot(p95, 0.95, 'go', ms=8, label=f'P95: {p95:.2f}') ax_cdf.set_title('Cumulative Distribution Function (CDF)', fontsize=14) ax_cdf.set_xlabel(f'Simulated {data_name}') ax_cdf.set_ylabel('Cumulative Probability') ax_cdf.grid(True, linestyle='--', alpha=0.6) ax_cdf.legend() plt.tight_layout() prob_achieved = 0 if target_value is not None: prob_achieved = np.sum(simulation_results <= target_value) / num_simulations * 100 results_summary = ( f"Simulation Summary ({num_simulations:,} iterations):\n" f"--------------------------------------------------\n" f"Mean (Average Outcome): {sim_mean:.2f}\n" f"Standard Deviation: {np.std(simulation_results):.2f}\n\n" f"Percentiles (Confidence Range):\n" f" - 5th Percentile (P5): {p5:.2f}\n" f" - 50th Percentile (Median): {p50:.2f}\n" f" - 95th Percentile (P95): {p95:.2f}\n" f"This means there is a 90% probability the outcome will be between {p5:.2f} and {p95:.2f}.\n\n" ) if target_value is not None: results_summary += ( f"Probability Analysis:\n" f" - Probability of outcome being less than or equal to {target_value:.2f}: {prob_achieved:.2f}%\n" ) return fig_hist, fig_cdf, results_summary def generate_explanation(results_summary): """ Uses a Hugging Face model to explain the simulation results in simple terms. """ if explanation_generator is None: return "LLM model not loaded. Cannot generate explanation." # **NEW**: More robust check for failed simulation runs. if not results_summary or "Please process valid data" in results_summary or "Simulation failed" in results_summary: return "Could not generate explanation. Please run a successful simulation first." prompt = f""" Explain the following Monte Carlo simulation results to a non-technical manager. Focus on what the numbers mean in terms of risk and decision-making. Be concise and clear. Results: {results_summary} Explanation: """ try: response = explanation_generator(prompt, max_length=200, num_beams=3, no_repeat_ngram_size=2) return response[0]['generated_text'] except Exception as e: return f"Error generating explanation: {e}" # ---------------------------------------------------------------------------- # Gradio UI Layout # ---------------------------------------------------------------------------- with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer") as app: gr.Markdown( """ # Welcome to the Monte Carlo Simulation Explorer! This tool helps you understand and perform a Monte Carlo simulation, a powerful technique for modeling uncertainty. **How it works:** Instead of guessing a single outcome, you provide a range of possible inputs (or a distribution). The simulation then runs thousands of trials with random values from that input, creating a probability distribution of all possible outcomes. **Get started:** 1. **Provide Data:** Use one of the methods in the "Data Collection" box below. 2. **Prepare Simulation:** Click the "Prepare Simulation" button to validate and visualize your input. 3. **Run Simulation:** Adjust the settings and click "Run Simulation". 4. **Interpret:** Analyze the resulting plots and get an AI-powered explanation. """ ) # --- Row 1: Data Input and Preparation --- with gr.Row(): with gr.Column(scale=1): with gr.Group(): gr.Markdown("### 1. Data Collection") gr.Markdown("Choose **one** method below.") with gr.Tabs(): with gr.TabItem("Upload File"): file_input = gr.File(label="Upload a Single-Column CSV File", file_types=[".csv"]) with gr.TabItem("Use Example"): example_input = gr.Dropdown( ["Project Cost Estimation"], label="Select an Example Dataset" ) with gr.TabItem("Manual Input"): gr.Markdown("Define a normal distribution manually.") manual_mean_input = gr.Number(label="Mean (Average)") manual_std_input = gr.Number(label="Standard Deviation (Spread)") prepare_button = gr.Button("Prepare Simulation", variant="secondary") with gr.Column(scale=2): with gr.Group(): gr.Markdown("### 2. Preparation & Visualization") validation_output = gr.Textbox(label="Validation Status", interactive=False, lines=3) input_stats_output = gr.Textbox(label="Input Data Statistics", interactive=False, lines=6) input_plot_output = gr.Plot(label="Input Data Distribution") # --- Row 2: Simulation Controls and Results --- with gr.Row(): with gr.Group(): gr.Markdown("### 3. Simulation Run & Results") with gr.Row(): with gr.Column(scale=1, min_width=250): gr.Markdown("**Simulation Settings**") num_simulations_input = gr.Slider( minimum=1000, maximum=50000, value=10000, step=1000, label="Number of Simulations" ) target_value_input = gr.Number( label="Target Value (Optional)", info="Calculate the probability of the result being <= this value." ) run_button = gr.Button("Run Simulation", variant="primary") with gr.Column(scale=3): with gr.Tabs(): with gr.TabItem("Results Histogram"): results_plot_output = gr.Plot(label="Simulation Outcome Distribution") with gr.TabItem("Cumulative Probability (CDF)"): cdf_plot_output = gr.Plot(label="Cumulative Distribution Function") with gr.TabItem("Numerical Summary"): results_summary_output = gr.Textbox(label="Detailed Results", interactive=False, lines=12) # --- Row 3: AI-Powered Explanation --- with gr.Row(): with gr.Group(): gr.Markdown("### 4. AI-Powered Explanation") explain_button = gr.Button("Explain the Takeaways", variant="secondary") explanation_output = gr.Textbox( label="Key Takeaways from the LLM", interactive=False, lines=5, placeholder="Click the button above to generate an explanation of the results..." ) # ---------------------------------------------------------------------------- # Define UI Component Interactions # ---------------------------------------------------------------------------- processed_data_state = gr.State() prepare_button.click( fn=process_input_data, inputs=[file_input, example_input, manual_mean_input, manual_std_input], outputs=[processed_data_state, input_plot_output, input_stats_output, validation_output] ) run_button.click( fn=run_monte_carlo_simulation, inputs=[processed_data_state, num_simulations_input, target_value_input], outputs=[results_plot_output, cdf_plot_output, results_summary_output] ) explain_button.click( fn=generate_explanation, inputs=[results_summary_output], outputs=[explanation_output] ) # ---------------------------------------------------------------------------- # Launch the Gradio App # ---------------------------------------------------------------------------- if __name__ == "__main__": app.launch(debug=True)