|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
import numpy as np |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from scipy.stats import norm |
|
from transformers import pipeline |
|
import warnings |
|
import os |
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
plt.switch_backend('Agg') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
explanation_generator = pipeline('text2text-generation', model='google/flan-t5-small') |
|
print("Hugging Face model loaded successfully.") |
|
except Exception as e: |
|
print(f"Could not load Hugging Face model. Explanations will be disabled. Error: {e}") |
|
explanation_generator = None |
|
|
|
|
|
|
|
sample_project_costs = pd.DataFrame({ |
|
'task_cost_thousands': [12, 15, 10, 13, 18, 9, 22, 14, 16, 11, 17, 20] |
|
}) |
|
SAMPLE_CSV_PATH = 'sample_project_costs.csv' |
|
sample_project_costs.to_csv(SAMPLE_CSV_PATH, index=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_error_plot(message): |
|
"""Creates a matplotlib plot with a specified error message.""" |
|
fig, ax = plt.subplots(figsize=(8, 5)) |
|
ax.text(0.5, 0.5, message, ha='center', va='center', wrap=True, color='red', fontsize=12) |
|
ax.set_xticks([]) |
|
ax.set_yticks([]) |
|
plt.tight_layout() |
|
return fig |
|
|
|
def process_input_data(file_obj, example_choice, manual_mean, manual_std): |
|
""" |
|
Processes the user's input from the UI. |
|
It prioritizes input in the order: File Upload > Example Dataset > Manual Entry. |
|
It validates the data to ensure it's a single column of numbers. |
|
|
|
Args: |
|
file_obj (File object): The uploaded file from gr.File. |
|
example_choice (str): The name of the chosen example dataset. |
|
manual_mean (float): Manually entered mean. |
|
manual_std (float): Manually entered standard deviation. |
|
|
|
Returns: |
|
tuple: A tuple containing: |
|
- A pandas DataFrame with the processed data. |
|
- A Matplotlib figure showing the data distribution. |
|
- A string with summary statistics. |
|
- A string with a validation message. |
|
""" |
|
data = None |
|
source_info = "" |
|
|
|
|
|
if file_obj is not None: |
|
try: |
|
df = pd.read_csv(file_obj.name) |
|
source_info = f"from uploaded file: {os.path.basename(file_obj.name)}" |
|
data = df |
|
except Exception as e: |
|
return None, create_error_plot(f"Error reading file: {e}"), None, f"Error reading file: {e}. Please ensure it's a valid CSV." |
|
elif example_choice and example_choice == "Project Cost Estimation": |
|
df = pd.read_csv(SAMPLE_CSV_PATH) |
|
source_info = "from the 'Project Cost Estimation' example" |
|
data = df |
|
elif manual_mean is not None and manual_std is not None: |
|
if manual_std <= 0: |
|
return None, create_error_plot("Standard Deviation must be positive."), None, "Manual Input Error: Standard Deviation must be positive." |
|
|
|
stats_text = (f"Source: Manual Input\n" |
|
f"Mean: {manual_mean:.2f}\n" |
|
f"Standard Deviation: {manual_std:.2f}") |
|
fig, ax = plt.subplots() |
|
ax.text(0.5, 0.5, 'Manual input:\nNo data to plot.\nSimulation will use\nthe provided Mean/Std.', |
|
ha='center', va='center', fontsize=12) |
|
ax.set_xticks([]) |
|
ax.set_yticks([]) |
|
plt.tight_layout() |
|
|
|
manual_df = pd.DataFrame({'mean': [manual_mean], 'std': [manual_std]}) |
|
return manual_df, fig, stats_text, "Manual parameters accepted. Ready to run simulation." |
|
|
|
if data is None: |
|
return None, create_error_plot("No data source provided."), None, "No data source provided. Please upload a file, choose an example, or enter parameters." |
|
|
|
|
|
if data.shape[1] != 1 or not pd.api.types.is_numeric_dtype(data.iloc[:, 0]): |
|
error_msg = (f"Data Error: The data {source_info} is not compatible. " |
|
"The app requires a CSV with a single column of numerical data. " |
|
f"Detected {data.shape[1]} columns.") |
|
return None, create_error_plot(error_msg), None, error_msg |
|
|
|
|
|
series = data.iloc[:, 0].dropna() |
|
mean = series.mean() |
|
std = series.std() |
|
|
|
if std == 0: |
|
error_msg = "Data Error: All values are the same. Standard deviation is zero, cannot simulate uncertainty." |
|
return None, create_error_plot(error_msg), None, error_msg |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(6, 4)) |
|
ax.hist(series, bins='auto', density=True, alpha=0.7, label='Input Data Distribution') |
|
|
|
xmin, xmax = plt.xlim() |
|
x = np.linspace(xmin, xmax, 100) |
|
p = norm.pdf(x, mean, std) |
|
ax.plot(x, p, 'k', linewidth=2, label='Fitted Normal Curve') |
|
|
|
ax.set_title(f"Distribution of Input Data") |
|
ax.set_xlabel(series.name) |
|
ax.set_ylabel("Density") |
|
ax.legend() |
|
ax.grid(True, linestyle='--', alpha=0.6) |
|
plt.tight_layout() |
|
|
|
stats_text = (f"Source: {source_info}\n" |
|
f"Number of Data Points: {len(series)}\n" |
|
f"Mean: {mean:.2f}\n" |
|
f"Standard Deviation: {std:.2f}\n" |
|
f"Min: {series.min():.2f}\n" |
|
f"Max: {series.max():.2f}") |
|
|
|
validation_message = "Data loaded and validated successfully! Ready to run the simulation." |
|
|
|
return data, fig, stats_text, validation_message |
|
|
|
|
|
def run_monte_carlo_simulation(data, num_simulations, target_value): |
|
""" |
|
Performs the Monte Carlo simulation based on the processed data. |
|
""" |
|
|
|
if data is None: |
|
error_message = "ERROR: No valid data available.\nPlease go to Step 1 & 2 and click 'Prepare Simulation' first." |
|
error_plot = create_error_plot(error_message) |
|
return error_plot, error_plot, "Simulation failed. See plot for details." |
|
|
|
num_simulations = int(num_simulations) |
|
|
|
if 'mean' in data.columns and 'std' in data.columns and data.shape[0] == 1: |
|
mean = data['mean'].iloc[0] |
|
std = data['std'].iloc[0] |
|
data_name = "Value" |
|
else: |
|
series = data.iloc[:, 0] |
|
mean = series.mean() |
|
std = series.std() |
|
data_name = series.name |
|
|
|
simulation_results = np.random.normal(mean, std, num_simulations) |
|
|
|
fig_hist, ax_hist = plt.subplots(figsize=(8, 5)) |
|
ax_hist.hist(simulation_results, bins=50, density=True, alpha=0.8, color='skyblue', edgecolor='black') |
|
|
|
sim_mean = np.mean(simulation_results) |
|
p5 = np.percentile(simulation_results, 5) |
|
p95 = np.percentile(simulation_results, 95) |
|
|
|
ax_hist.axvline(sim_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {sim_mean:.2f}') |
|
ax_hist.axvline(p5, color='green', linestyle=':', linewidth=2, label=f'5th Percentile (P5): {p5:.2f}') |
|
ax_hist.axvline(p95, color='green', linestyle=':', linewidth=2, label=f'95th Percentile (P95): {p95:.2f}') |
|
|
|
ax_hist.set_title(f'Monte Carlo Simulation Results ({num_simulations:,} Iterations)', fontsize=14) |
|
ax_hist.set_xlabel(f'Simulated {data_name}') |
|
ax_hist.set_ylabel('Probability Density') |
|
ax_hist.legend() |
|
ax_hist.grid(True, linestyle='--', alpha=0.6) |
|
plt.tight_layout() |
|
|
|
fig_cdf, ax_cdf = plt.subplots(figsize=(8, 5)) |
|
sorted_results = np.sort(simulation_results) |
|
yvals = np.arange(len(sorted_results)) / float(len(sorted_results) - 1) |
|
ax_cdf.plot(sorted_results, yvals, label='CDF') |
|
|
|
p50 = np.percentile(simulation_results, 50) |
|
ax_cdf.plot(p5, 0.05, 'go', ms=8, label=f'P5: {p5:.2f}') |
|
ax_cdf.plot(p50, 0.50, 'ro', ms=8, label=f'Median (P50): {p50:.2f}') |
|
ax_cdf.plot(p95, 0.95, 'go', ms=8, label=f'P95: {p95:.2f}') |
|
|
|
ax_cdf.set_title('Cumulative Distribution Function (CDF)', fontsize=14) |
|
ax_cdf.set_xlabel(f'Simulated {data_name}') |
|
ax_cdf.set_ylabel('Cumulative Probability') |
|
ax_cdf.grid(True, linestyle='--', alpha=0.6) |
|
ax_cdf.legend() |
|
plt.tight_layout() |
|
|
|
prob_achieved = 0 |
|
if target_value is not None: |
|
prob_achieved = np.sum(simulation_results <= target_value) / num_simulations * 100 |
|
|
|
results_summary = ( |
|
f"Simulation Summary ({num_simulations:,} iterations):\n" |
|
f"--------------------------------------------------\n" |
|
f"Mean (Average Outcome): {sim_mean:.2f}\n" |
|
f"Standard Deviation: {np.std(simulation_results):.2f}\n\n" |
|
f"Percentiles (Confidence Range):\n" |
|
f" - 5th Percentile (P5): {p5:.2f}\n" |
|
f" - 50th Percentile (Median): {p50:.2f}\n" |
|
f" - 95th Percentile (P95): {p95:.2f}\n" |
|
f"This means there is a 90% probability the outcome will be between {p5:.2f} and {p95:.2f}.\n\n" |
|
) |
|
if target_value is not None: |
|
results_summary += ( |
|
f"Probability Analysis:\n" |
|
f" - Probability of outcome being less than or equal to {target_value:.2f}: {prob_achieved:.2f}%\n" |
|
) |
|
|
|
return fig_hist, fig_cdf, results_summary |
|
|
|
|
|
def generate_explanation(results_summary): |
|
""" |
|
Uses a Hugging Face model to explain the simulation results in simple terms. |
|
""" |
|
if explanation_generator is None: |
|
return "LLM model not loaded. Cannot generate explanation." |
|
|
|
if not results_summary or "Please process valid data" in results_summary or "Simulation failed" in results_summary: |
|
return "Could not generate explanation. Please run a successful simulation first." |
|
|
|
prompt = f""" |
|
Explain the following Monte Carlo simulation results to a non-technical manager. |
|
Focus on what the numbers mean in terms of risk and decision-making. Be concise and clear. |
|
|
|
Results: |
|
{results_summary} |
|
|
|
Explanation: |
|
""" |
|
|
|
try: |
|
response = explanation_generator(prompt, max_length=200, num_beams=3, no_repeat_ngram_size=2) |
|
return response[0]['generated_text'] |
|
except Exception as e: |
|
return f"Error generating explanation: {e}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer") as app: |
|
gr.Markdown( |
|
""" |
|
# Welcome to the Monte Carlo Simulation Explorer! |
|
This tool helps you understand and perform a Monte Carlo simulation, a powerful technique for modeling uncertainty. |
|
**How it works:** Instead of guessing a single outcome, you provide a range of possible inputs (or a distribution). The simulation then runs thousands of trials with random values from that input, creating a probability distribution of all possible outcomes. |
|
**Get started:** |
|
1. **Provide Data:** Use one of the methods in the "Data Collection" box below. |
|
2. **Prepare Simulation:** Click the "Prepare Simulation" button to validate and visualize your input. |
|
3. **Run Simulation:** Adjust the settings and click "Run Simulation". |
|
4. **Interpret:** Analyze the resulting plots and get an AI-powered explanation. |
|
""" |
|
) |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
with gr.Group(): |
|
gr.Markdown("### 1. Data Collection") |
|
gr.Markdown("Choose **one** method below.") |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("Upload File"): |
|
file_input = gr.File(label="Upload a Single-Column CSV File", file_types=[".csv"]) |
|
with gr.TabItem("Use Example"): |
|
example_input = gr.Dropdown( |
|
["Project Cost Estimation"], label="Select an Example Dataset" |
|
) |
|
with gr.TabItem("Manual Input"): |
|
gr.Markdown("Define a normal distribution manually.") |
|
manual_mean_input = gr.Number(label="Mean (Average)") |
|
manual_std_input = gr.Number(label="Standard Deviation (Spread)") |
|
|
|
prepare_button = gr.Button("Prepare Simulation", variant="secondary") |
|
|
|
with gr.Column(scale=2): |
|
with gr.Group(): |
|
gr.Markdown("### 2. Preparation & Visualization") |
|
validation_output = gr.Textbox(label="Validation Status", interactive=False, lines=3) |
|
input_stats_output = gr.Textbox(label="Input Data Statistics", interactive=False, lines=6) |
|
input_plot_output = gr.Plot(label="Input Data Distribution") |
|
|
|
|
|
with gr.Row(): |
|
with gr.Group(): |
|
gr.Markdown("### 3. Simulation Run & Results") |
|
with gr.Row(): |
|
with gr.Column(scale=1, min_width=250): |
|
gr.Markdown("**Simulation Settings**") |
|
num_simulations_input = gr.Slider( |
|
minimum=1000, maximum=50000, value=10000, step=1000, |
|
label="Number of Simulations" |
|
) |
|
target_value_input = gr.Number( |
|
label="Target Value (Optional)", |
|
info="Calculate the probability of the result being <= this value." |
|
) |
|
run_button = gr.Button("Run Simulation", variant="primary") |
|
|
|
with gr.Column(scale=3): |
|
with gr.Tabs(): |
|
with gr.TabItem("Results Histogram"): |
|
results_plot_output = gr.Plot(label="Simulation Outcome Distribution") |
|
with gr.TabItem("Cumulative Probability (CDF)"): |
|
cdf_plot_output = gr.Plot(label="Cumulative Distribution Function") |
|
with gr.TabItem("Numerical Summary"): |
|
results_summary_output = gr.Textbox(label="Detailed Results", interactive=False, lines=12) |
|
|
|
|
|
with gr.Row(): |
|
with gr.Group(): |
|
gr.Markdown("### 4. AI-Powered Explanation") |
|
explain_button = gr.Button("Explain the Takeaways", variant="secondary") |
|
explanation_output = gr.Textbox( |
|
label="Key Takeaways from the LLM", |
|
interactive=False, |
|
lines=5, |
|
placeholder="Click the button above to generate an explanation of the results..." |
|
) |
|
|
|
|
|
|
|
|
|
|
|
processed_data_state = gr.State() |
|
|
|
prepare_button.click( |
|
fn=process_input_data, |
|
inputs=[file_input, example_input, manual_mean_input, manual_std_input], |
|
outputs=[processed_data_state, input_plot_output, input_stats_output, validation_output] |
|
) |
|
|
|
run_button.click( |
|
fn=run_monte_carlo_simulation, |
|
inputs=[processed_data_state, num_simulations_input, target_value_input], |
|
outputs=[results_plot_output, cdf_plot_output, results_summary_output] |
|
) |
|
|
|
explain_button.click( |
|
fn=generate_explanation, |
|
inputs=[results_summary_output], |
|
outputs=[explanation_output] |
|
) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
app.launch(debug=True) |
|
|