Spaces:

nikhilsingh
/

monte-carlo-simulation

Sleeping

App Files Files Community

nikhilsingh commited on 11 days ago

Commit

b75c41f

verified ·

1 Parent(s): 5fb86bd

Create mcs-app.py

Browse files

Files changed (1) hide show

mcs-app.py +397 -0

mcs-app.py ADDED Viewed

	@@ -0,0 +1,397 @@

+# ----------------------------------------------------------------------------
+# Import necessary libraries
+# ----------------------------------------------------------------------------
+# pip install gradio numpy pandas matplotlib scipy transformers torch sentencepiece
+# ----------------------------------------------------------------------------
+import gradio as gr
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from scipy.stats import norm
+from transformers import pipeline
+import warnings
+import os
+# Suppress warnings for a cleaner output
+warnings.filterwarnings("ignore")
+# Set Matplotlib backend to a non-interactive one to avoid display issues in some environments
+plt.switch_backend('Agg')
+# ----------------------------------------------------------------------------
+# Global Variables and Initial Setup
+# ----------------------------------------------------------------------------
+# Initialize the Hugging Face pipeline for text generation.
+# We use a small, efficient model to ensure the app runs smoothly.
+try:
+    explanation_generator = pipeline('text2text-generation', model='google/flan-t5-small')
+except Exception as e:
+    print(f"Could not load Hugging Face model. Explanations will be disabled. Error: {e}")
+    explanation_generator = None
+# Create a sample dataset for demonstration purposes.
+# This simulates the uncertain costs (in thousands of $) for different tasks in a project.
+sample_project_costs = pd.DataFrame({
+    'task_cost_thousands': [12, 15, 10, 13, 18, 9, 22, 14, 16, 11, 17, 20]
+})
+SAMPLE_CSV_PATH = 'sample_project_costs.csv'
+sample_project_costs.to_csv(SAMPLE_CSV_PATH, index=False)
+# ----------------------------------------------------------------------------
+# Core Logic Functions
+# ----------------------------------------------------------------------------
+def process_input_data(file_obj, example_choice, manual_mean, manual_std):
+    """
+    Processes the user's input from the UI.
+    It prioritizes input in the order: File Upload > Example Dataset > Manual Entry.
+    It validates the data to ensure it's a single column of numbers.
+    Args:
+        file_obj (File object): The uploaded file from gr.File.
+        example_choice (str): The name of the chosen example dataset.
+        manual_mean (float): Manually entered mean.
+        manual_std (float): Manually entered standard deviation.
+    Returns:
+        tuple: A tuple containing:
+               - A pandas DataFrame with the processed data.
+               - A Matplotlib figure showing the data distribution.
+               - A string with summary statistics.
+               - A string with a validation message.
+    """
+    data = None
+    source_info = ""
+    # 1. Prioritize input source
+    if file_obj is not None:
+        try:
+            df = pd.read_csv(file_obj.name)
+            source_info = f"from uploaded file: {os.path.basename(file_obj.name)}"
+            data = df
+        except Exception as e:
+            return None, None, None, f"Error reading file: {e}. Please ensure it's a valid CSV."
+    elif example_choice == "Project Cost Estimation":
+        df = pd.read_csv(SAMPLE_CSV_PATH)
+        source_info = "from the 'Project Cost Estimation' example"
+        data = df
+    elif manual_mean is not None and manual_std is not None:
+         # If manual input, we don't have raw data, just parameters.
+         # We'll return these params to be used directly in the simulation.
+         if manual_std <= 0:
+             return None, None, None, "Manual Input Error: Standard Deviation must be positive."
+         stats_text = (f"Source: Manual Input\n"
+                       f"Mean: {manual_mean:.2f}\n"
+                       f"Standard Deviation: {manual_std:.2f}")
+         # Create a dummy plot for manual input
+         fig, ax = plt.subplots()
+         ax.text(0.5, 0.5, 'Manual input:\nNo data to plot.\nSimulation will use\nthe provided Mean/Std.',
+                 ha='center', va='center', fontsize=12)
+         ax.set_xticks([])
+         ax.set_yticks([])
+         plt.tight_layout()
+         # Use a special DataFrame to signal manual input downstream
+         manual_df = pd.DataFrame({'mean': [manual_mean], 'std': [manual_std]})
+         return manual_df, fig, stats_text, "Manual parameters accepted. Ready to run simulation."
+    if data is None:
+        return None, None, None, "No data source provided. Please upload a file, choose an example, or enter parameters."
+    # 2. Validate data structure
+    if data.shape[1] != 1 or not pd.api.types.is_numeric_dtype(data.iloc[:, 0]):
+        error_msg = (f"Data Error: The data {source_info} is not compatible. "
+                     "The app requires a CSV with a single column of numerical data. "
+                     f"Detected {data.shape[1]} columns.")
+        return None, None, None, error_msg
+    # 3. Process valid data
+    series = data.iloc[:, 0].dropna()
+    mean = series.mean()
+    std = series.std()
+    if std == 0:
+        return None, None, None, "Data Error: All values are the same. Standard deviation is zero, cannot simulate uncertainty."
+    # 4. Generate visualization and stats
+    fig, ax = plt.subplots(figsize=(6, 4))
+    ax.hist(series, bins='auto', density=True, alpha=0.7, label='Input Data Distribution')
+    # Overlay a normal distribution curve
+    xmin, xmax = plt.xlim()
+    x = np.linspace(xmin, xmax, 100)
+    p = norm.pdf(x, mean, std)
+    ax.plot(x, p, 'k', linewidth=2, label='Fitted Normal Curve')
+    ax.set_title(f"Distribution of Input Data")
+    ax.set_xlabel(series.name)
+    ax.set_ylabel("Density")
+    ax.legend()
+    ax.grid(True, linestyle='--', alpha=0.6)
+    plt.tight_layout()
+    stats_text = (f"Source: {source_info}\n"
+                  f"Number of Data Points: {len(series)}\n"
+                  f"Mean: {mean:.2f}\n"
+                  f"Standard Deviation: {std:.2f}\n"
+                  f"Min: {series.min():.2f}\n"
+                  f"Max: {series.max():.2f}")
+    validation_message = "Data loaded and validated successfully! Ready to run the simulation."
+    return data, fig, stats_text, validation_message
+def run_monte_carlo_simulation(data, num_simulations, target_value):
+    """
+    Performs the Monte Carlo simulation based on the processed data.
+    Args:
+        data (pd.DataFrame): The validated input data.
+        num_simulations (int): The number of simulation iterations to run.
+        target_value (float): A user-defined target to calculate probability against.
+    Returns:
+        tuple: A tuple containing:
+               - A Matplotlib figure of the simulation results histogram.
+               - A Matplotlib figure of the cumulative distribution (CDF).
+               - A string containing detailed numerical results.
+    """
+    if data is None:
+        return None, None, "Please process valid data before running the simulation."
+    num_simulations = int(num_simulations)
+    # Check if data is from manual input or from a file/example
+    if 'mean' in data.columns and 'std' in data.columns and data.shape[0] == 1:
+        mean = data['mean'].iloc[0]
+        std = data['std'].iloc[0]
+        data_name = "Value" # Generic name for manual input
+    else:
+        series = data.iloc[:, 0]
+        mean = series.mean()
+        std = series.std()
+        data_name = series.name
+    # The core of the Monte Carlo simulation: generate random samples
+    # We assume the underlying uncertainty follows a Normal Distribution
+    # defined by the mean and standard deviation of the input data.
+    simulation_results = np.random.normal(mean, std, num_simulations)
+    # --- Generate Results Histogram Plot ---
+    fig_hist, ax_hist = plt.subplots(figsize=(8, 5))
+    ax_hist.hist(simulation_results, bins=50, density=True, alpha=0.8, color='skyblue', edgecolor='black')
+    # Calculate key statistics for plotting
+    sim_mean = np.mean(simulation_results)
+    p5 = np.percentile(simulation_results, 5)
+    p95 = np.percentile(simulation_results, 95)
+    # Add vertical lines for key statistics
+    ax_hist.axvline(sim_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {sim_mean:.2f}')
+    ax_hist.axvline(p5, color='green', linestyle=':', linewidth=2, label=f'5th Percentile (P5): {p5:.2f}')
+    ax_hist.axvline(p95, color='green', linestyle=':', linewidth=2, label=f'95th Percentile (P95): {p95:.2f}')
+    ax_hist.set_title(f'Monte Carlo Simulation Results ({num_simulations:,} Iterations)', fontsize=14)
+    ax_hist.set_xlabel(f'Simulated {data_name}')
+    ax_hist.set_ylabel('Probability Density')
+    ax_hist.legend()
+    ax_hist.grid(True, linestyle='--', alpha=0.6)
+    plt.tight_layout()
+    # --- Generate Cumulative Distribution (CDF) Plot ---
+    fig_cdf, ax_cdf = plt.subplots(figsize=(8, 5))
+    sorted_results = np.sort(simulation_results)
+    yvals = np.arange(len(sorted_results)) / float(len(sorted_results) - 1)
+    ax_cdf.plot(sorted_results, yvals, label='CDF')
+    # Add markers for P5, P50, P95
+    p50 = np.percentile(simulation_results, 50)
+    ax_cdf.plot(p5, 0.05, 'go', ms=8, label=f'P5: {p5:.2f}')
+    ax_cdf.plot(p50, 0.50, 'ro', ms=8, label=f'Median (P50): {p50:.2f}')
+    ax_cdf.plot(p95, 0.95, 'go', ms=8, label=f'P95: {p95:.2f}')
+    ax_cdf.set_title('Cumulative Distribution Function (CDF)', fontsize=14)
+    ax_cdf.set_xlabel(f'Simulated {data_name}')
+    ax_cdf.set_ylabel('Cumulative Probability')
+    ax_cdf.grid(True, linestyle='--', alpha=0.6)
+    ax_cdf.legend()
+    plt.tight_layout()
+    # --- Calculate Final Numerical Results ---
+    prob_achieved = 0
+    if target_value is not None:
+        prob_achieved = np.sum(simulation_results <= target_value) / num_simulations * 100
+    results_summary = (
+        f"Simulation Summary ({num_simulations:,} iterations):\n"
+        f"--------------------------------------------------\n"
+        f"Mean (Average Outcome): {sim_mean:.2f}\n"
+        f"Standard Deviation: {np.std(simulation_results):.2f}\n\n"
+        f"Percentiles (Confidence Range):\n"
+        f"  - 5th Percentile (P5): {p5:.2f}\n"
+        f"  - 50th Percentile (Median): {p50:.2f}\n"
+        f"  - 95th Percentile (P95): {p95:.2f}\n"
+        f"This means there is a 90% probability the outcome will be between {p5:.2f} and {p95:.2f}.\n\n"
+    )
+    if target_value is not None:
+        results_summary += (
+            f"Probability Analysis:\n"
+            f"  - Probability of outcome being less than or equal to {target_value:.2f}: {prob_achieved:.2f}%\n"
+        )
+    return fig_hist, fig_cdf, results_summary
+def generate_explanation(results_summary):
+    """
+    Uses a Hugging Face model to explain the simulation results in simple terms.
+    Args:
+        results_summary (str): The numerical summary from the simulation.
+    Returns:
+        str: A generated explanation of the results.
+    """
+    if explanation_generator is None:
+        return "LLM model not loaded. Cannot generate explanation."
+    if not results_summary or "Please process valid data" in results_summary:
+        return "Run a successful simulation first to generate an explanation."
+    prompt = f"""
+    Explain the following Monte Carlo simulation results to a non-technical manager.
+    Focus on what the numbers mean in terms of risk and decision-making. Be concise and clear.
+    Results:
+    {results_summary}
+    Explanation:
+    """
+    try:
+        response = explanation_generator(prompt, max_length=200, num_beams=3, no_repeat_ngram_size=2)
+        return response[0]['generated_text']
+    except Exception as e:
+        return f"Error generating explanation: {e}"
+# ----------------------------------------------------------------------------
+# Gradio UI Layout
+# ----------------------------------------------------------------------------
+with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer") as app:
+    gr.Markdown(
+        """
+        # Welcome to the Monte Carlo Simulation Explorer!
+        This tool helps you understand and perform a Monte Carlo simulation, a powerful technique for modeling uncertainty.
+        **How it works:** Instead of guessing a single outcome, you provide a range of possible inputs (or a distribution). The simulation then runs thousands of trials with random values from that input, creating a probability distribution of all possible outcomes.
+        **Get started:**
+        1.  **Provide Data:** Use one of the methods in the "Data Collection" box below.
+        2.  **Prepare Simulation:** Click the "Prepare Simulation" button to validate and visualize your input.
+        3.  **Run Simulation:** Adjust the settings and click "Run Simulation".
+        4.  **Interpret:** Analyze the resulting plots and get an AI-powered explanation.
+        """
+    )
+    # --- Row 1: Data Input and Preparation ---
+    with gr.Row():
+        # --- Column 1.1: Data Collection ---
+        with gr.Column(scale=1):
+            with gr.Box():
+                gr.Markdown("### 1. Data Collection")
+                gr.Markdown("Choose **one** method below.")
+                with gr.Tabs():
+                    with gr.TabItem("Upload File"):
+                        file_input = gr.File(label="Upload a Single-Column CSV File", file_types=[".csv"])
+                    with gr.TabItem("Use Example"):
+                        example_input = gr.Dropdown(
+                            ["Project Cost Estimation"], label="Select an Example Dataset"
+                        )
+                    with gr.TabItem("Manual Input"):
+                         gr.Markdown("Define a normal distribution manually.")
+                         manual_mean_input = gr.Number(label="Mean (Average)")
+                         manual_std_input = gr.Number(label="Standard Deviation (Spread)")
+                prepare_button = gr.Button("Prepare Simulation", variant="secondary")
+        # --- Column 1.2: Preparation Plots & Visualization ---
+        with gr.Column(scale=2):
+            with gr.Box():
+                gr.Markdown("### 2. Preparation & Visualization")
+                validation_output = gr.Textbox(label="Validation Status", interactive=False, lines=3)
+                input_stats_output = gr.Textbox(label="Input Data Statistics", interactive=False, lines=6)
+                input_plot_output = gr.Plot(label="Input Data Distribution")
+    # --- Row 2: Simulation Controls and Results ---
+    with gr.Row():
+        with gr.Box():
+            gr.Markdown("### 3. Simulation Run & Results")
+            with gr.Row():
+                with gr.Column(scale=1, min_width=250):
+                    gr.Markdown("**Simulation Settings**")
+                    num_simulations_input = gr.Slider(
+                        minimum=1000, maximum=50000, value=10000, step=1000,
+                        label="Number of Simulations"
+                    )
+                    target_value_input = gr.Number(
+                        label="Target Value (Optional)",
+                        info="Calculate the probability of the result being <= this value."
+                    )
+                    run_button = gr.Button("Run Simulation", variant="primary")
+                with gr.Column(scale=3):
+                    with gr.Tabs():
+                        with gr.TabItem("Results Histogram"):
+                            results_plot_output = gr.Plot(label="Simulation Outcome Distribution")
+                        with gr.TabItem("Cumulative Probability (CDF)"):
+                            cdf_plot_output = gr.Plot(label="Cumulative Distribution Function")
+                        with gr.TabItem("Numerical Summary"):
+                            results_summary_output = gr.Textbox(label="Detailed Results", interactive=False, lines=12)
+    # --- Row 3: AI-Powered Explanation ---
+    with gr.Row():
+        with gr.Box():
+            gr.Markdown("### 4. AI-Powered Explanation")
+            explain_button = gr.Button("Explain the Takeaways", variant="secondary")
+            explanation_output = gr.Textbox(
+                label="Key Takeaways from the LLM",
+                interactive=False,
+                lines=5,
+                placeholder="Click the button above to generate an explanation of the results..."
+            )
+    # ----------------------------------------------------------------------------
+    # Define UI Component Interactions
+    # ----------------------------------------------------------------------------
+    # Hidden state to store the processed data between steps
+    processed_data_state = gr.State()
+    prepare_button.click(
+        fn=process_input_data,
+        inputs=[file_input, example_input, manual_mean_input, manual_std_input],
+        outputs=[processed_data_state, input_plot_output, input_stats_output, validation_output]
+    )
+    run_button.click(
+        fn=run_monte_carlo_simulation,
+        inputs=[processed_data_state, num_simulations_input, target_value_input],
+        outputs=[results_plot_output, cdf_plot_output, results_summary_output]
+    )
+    explain_button.click(
+        fn=generate_explanation,
+        inputs=[results_summary_output],
+        outputs=[explanation_output]
+    )
+# ----------------------------------------------------------------------------
+# Launch the Gradio App
+# ----------------------------------------------------------------------------
+if __name__ == "__main__":
+    # To run this app, save the code as a Python file (e.g., main.py)
+    # and run `python main.py` from your terminal.
+    app.launch(debug=True)