nikhilsingh commited on
Commit
b75c41f
·
verified ·
1 Parent(s): 5fb86bd

Create mcs-app.py

Browse files
Files changed (1) hide show
  1. mcs-app.py +397 -0
mcs-app.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ----------------------------------------------------------------------------
2
+ # Import necessary libraries
3
+ # ----------------------------------------------------------------------------
4
+ # pip install gradio numpy pandas matplotlib scipy transformers torch sentencepiece
5
+ # ----------------------------------------------------------------------------
6
+ import gradio as gr
7
+ import numpy as np
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+ from scipy.stats import norm
11
+ from transformers import pipeline
12
+ import warnings
13
+ import os
14
+
15
+ # Suppress warnings for a cleaner output
16
+ warnings.filterwarnings("ignore")
17
+ # Set Matplotlib backend to a non-interactive one to avoid display issues in some environments
18
+ plt.switch_backend('Agg')
19
+
20
+ # ----------------------------------------------------------------------------
21
+ # Global Variables and Initial Setup
22
+ # ----------------------------------------------------------------------------
23
+
24
+ # Initialize the Hugging Face pipeline for text generation.
25
+ # We use a small, efficient model to ensure the app runs smoothly.
26
+ try:
27
+ explanation_generator = pipeline('text2text-generation', model='google/flan-t5-small')
28
+ except Exception as e:
29
+ print(f"Could not load Hugging Face model. Explanations will be disabled. Error: {e}")
30
+ explanation_generator = None
31
+
32
+ # Create a sample dataset for demonstration purposes.
33
+ # This simulates the uncertain costs (in thousands of $) for different tasks in a project.
34
+ sample_project_costs = pd.DataFrame({
35
+ 'task_cost_thousands': [12, 15, 10, 13, 18, 9, 22, 14, 16, 11, 17, 20]
36
+ })
37
+ SAMPLE_CSV_PATH = 'sample_project_costs.csv'
38
+ sample_project_costs.to_csv(SAMPLE_CSV_PATH, index=False)
39
+
40
+
41
+ # ----------------------------------------------------------------------------
42
+ # Core Logic Functions
43
+ # ----------------------------------------------------------------------------
44
+
45
+ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
46
+ """
47
+ Processes the user's input from the UI.
48
+ It prioritizes input in the order: File Upload > Example Dataset > Manual Entry.
49
+ It validates the data to ensure it's a single column of numbers.
50
+
51
+ Args:
52
+ file_obj (File object): The uploaded file from gr.File.
53
+ example_choice (str): The name of the chosen example dataset.
54
+ manual_mean (float): Manually entered mean.
55
+ manual_std (float): Manually entered standard deviation.
56
+
57
+ Returns:
58
+ tuple: A tuple containing:
59
+ - A pandas DataFrame with the processed data.
60
+ - A Matplotlib figure showing the data distribution.
61
+ - A string with summary statistics.
62
+ - A string with a validation message.
63
+ """
64
+ data = None
65
+ source_info = ""
66
+
67
+ # 1. Prioritize input source
68
+ if file_obj is not None:
69
+ try:
70
+ df = pd.read_csv(file_obj.name)
71
+ source_info = f"from uploaded file: {os.path.basename(file_obj.name)}"
72
+ data = df
73
+ except Exception as e:
74
+ return None, None, None, f"Error reading file: {e}. Please ensure it's a valid CSV."
75
+ elif example_choice == "Project Cost Estimation":
76
+ df = pd.read_csv(SAMPLE_CSV_PATH)
77
+ source_info = "from the 'Project Cost Estimation' example"
78
+ data = df
79
+ elif manual_mean is not None and manual_std is not None:
80
+ # If manual input, we don't have raw data, just parameters.
81
+ # We'll return these params to be used directly in the simulation.
82
+ if manual_std <= 0:
83
+ return None, None, None, "Manual Input Error: Standard Deviation must be positive."
84
+
85
+ stats_text = (f"Source: Manual Input\n"
86
+ f"Mean: {manual_mean:.2f}\n"
87
+ f"Standard Deviation: {manual_std:.2f}")
88
+ # Create a dummy plot for manual input
89
+ fig, ax = plt.subplots()
90
+ ax.text(0.5, 0.5, 'Manual input:\nNo data to plot.\nSimulation will use\nthe provided Mean/Std.',
91
+ ha='center', va='center', fontsize=12)
92
+ ax.set_xticks([])
93
+ ax.set_yticks([])
94
+ plt.tight_layout()
95
+
96
+ # Use a special DataFrame to signal manual input downstream
97
+ manual_df = pd.DataFrame({'mean': [manual_mean], 'std': [manual_std]})
98
+ return manual_df, fig, stats_text, "Manual parameters accepted. Ready to run simulation."
99
+
100
+ if data is None:
101
+ return None, None, None, "No data source provided. Please upload a file, choose an example, or enter parameters."
102
+
103
+ # 2. Validate data structure
104
+ if data.shape[1] != 1 or not pd.api.types.is_numeric_dtype(data.iloc[:, 0]):
105
+ error_msg = (f"Data Error: The data {source_info} is not compatible. "
106
+ "The app requires a CSV with a single column of numerical data. "
107
+ f"Detected {data.shape[1]} columns.")
108
+ return None, None, None, error_msg
109
+
110
+ # 3. Process valid data
111
+ series = data.iloc[:, 0].dropna()
112
+ mean = series.mean()
113
+ std = series.std()
114
+
115
+ if std == 0:
116
+ return None, None, None, "Data Error: All values are the same. Standard deviation is zero, cannot simulate uncertainty."
117
+
118
+ # 4. Generate visualization and stats
119
+ fig, ax = plt.subplots(figsize=(6, 4))
120
+ ax.hist(series, bins='auto', density=True, alpha=0.7, label='Input Data Distribution')
121
+
122
+ # Overlay a normal distribution curve
123
+ xmin, xmax = plt.xlim()
124
+ x = np.linspace(xmin, xmax, 100)
125
+ p = norm.pdf(x, mean, std)
126
+ ax.plot(x, p, 'k', linewidth=2, label='Fitted Normal Curve')
127
+
128
+ ax.set_title(f"Distribution of Input Data")
129
+ ax.set_xlabel(series.name)
130
+ ax.set_ylabel("Density")
131
+ ax.legend()
132
+ ax.grid(True, linestyle='--', alpha=0.6)
133
+ plt.tight_layout()
134
+
135
+ stats_text = (f"Source: {source_info}\n"
136
+ f"Number of Data Points: {len(series)}\n"
137
+ f"Mean: {mean:.2f}\n"
138
+ f"Standard Deviation: {std:.2f}\n"
139
+ f"Min: {series.min():.2f}\n"
140
+ f"Max: {series.max():.2f}")
141
+
142
+ validation_message = "Data loaded and validated successfully! Ready to run the simulation."
143
+
144
+ return data, fig, stats_text, validation_message
145
+
146
+
147
+ def run_monte_carlo_simulation(data, num_simulations, target_value):
148
+ """
149
+ Performs the Monte Carlo simulation based on the processed data.
150
+
151
+ Args:
152
+ data (pd.DataFrame): The validated input data.
153
+ num_simulations (int): The number of simulation iterations to run.
154
+ target_value (float): A user-defined target to calculate probability against.
155
+
156
+ Returns:
157
+ tuple: A tuple containing:
158
+ - A Matplotlib figure of the simulation results histogram.
159
+ - A Matplotlib figure of the cumulative distribution (CDF).
160
+ - A string containing detailed numerical results.
161
+ """
162
+ if data is None:
163
+ return None, None, "Please process valid data before running the simulation."
164
+
165
+ num_simulations = int(num_simulations)
166
+
167
+ # Check if data is from manual input or from a file/example
168
+ if 'mean' in data.columns and 'std' in data.columns and data.shape[0] == 1:
169
+ mean = data['mean'].iloc[0]
170
+ std = data['std'].iloc[0]
171
+ data_name = "Value" # Generic name for manual input
172
+ else:
173
+ series = data.iloc[:, 0]
174
+ mean = series.mean()
175
+ std = series.std()
176
+ data_name = series.name
177
+
178
+ # The core of the Monte Carlo simulation: generate random samples
179
+ # We assume the underlying uncertainty follows a Normal Distribution
180
+ # defined by the mean and standard deviation of the input data.
181
+ simulation_results = np.random.normal(mean, std, num_simulations)
182
+
183
+ # --- Generate Results Histogram Plot ---
184
+ fig_hist, ax_hist = plt.subplots(figsize=(8, 5))
185
+ ax_hist.hist(simulation_results, bins=50, density=True, alpha=0.8, color='skyblue', edgecolor='black')
186
+
187
+ # Calculate key statistics for plotting
188
+ sim_mean = np.mean(simulation_results)
189
+ p5 = np.percentile(simulation_results, 5)
190
+ p95 = np.percentile(simulation_results, 95)
191
+
192
+ # Add vertical lines for key statistics
193
+ ax_hist.axvline(sim_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {sim_mean:.2f}')
194
+ ax_hist.axvline(p5, color='green', linestyle=':', linewidth=2, label=f'5th Percentile (P5): {p5:.2f}')
195
+ ax_hist.axvline(p95, color='green', linestyle=':', linewidth=2, label=f'95th Percentile (P95): {p95:.2f}')
196
+
197
+ ax_hist.set_title(f'Monte Carlo Simulation Results ({num_simulations:,} Iterations)', fontsize=14)
198
+ ax_hist.set_xlabel(f'Simulated {data_name}')
199
+ ax_hist.set_ylabel('Probability Density')
200
+ ax_hist.legend()
201
+ ax_hist.grid(True, linestyle='--', alpha=0.6)
202
+ plt.tight_layout()
203
+
204
+ # --- Generate Cumulative Distribution (CDF) Plot ---
205
+ fig_cdf, ax_cdf = plt.subplots(figsize=(8, 5))
206
+ sorted_results = np.sort(simulation_results)
207
+ yvals = np.arange(len(sorted_results)) / float(len(sorted_results) - 1)
208
+ ax_cdf.plot(sorted_results, yvals, label='CDF')
209
+
210
+ # Add markers for P5, P50, P95
211
+ p50 = np.percentile(simulation_results, 50)
212
+ ax_cdf.plot(p5, 0.05, 'go', ms=8, label=f'P5: {p5:.2f}')
213
+ ax_cdf.plot(p50, 0.50, 'ro', ms=8, label=f'Median (P50): {p50:.2f}')
214
+ ax_cdf.plot(p95, 0.95, 'go', ms=8, label=f'P95: {p95:.2f}')
215
+
216
+ ax_cdf.set_title('Cumulative Distribution Function (CDF)', fontsize=14)
217
+ ax_cdf.set_xlabel(f'Simulated {data_name}')
218
+ ax_cdf.set_ylabel('Cumulative Probability')
219
+ ax_cdf.grid(True, linestyle='--', alpha=0.6)
220
+ ax_cdf.legend()
221
+ plt.tight_layout()
222
+
223
+ # --- Calculate Final Numerical Results ---
224
+ prob_achieved = 0
225
+ if target_value is not None:
226
+ prob_achieved = np.sum(simulation_results <= target_value) / num_simulations * 100
227
+
228
+ results_summary = (
229
+ f"Simulation Summary ({num_simulations:,} iterations):\n"
230
+ f"--------------------------------------------------\n"
231
+ f"Mean (Average Outcome): {sim_mean:.2f}\n"
232
+ f"Standard Deviation: {np.std(simulation_results):.2f}\n\n"
233
+ f"Percentiles (Confidence Range):\n"
234
+ f" - 5th Percentile (P5): {p5:.2f}\n"
235
+ f" - 50th Percentile (Median): {p50:.2f}\n"
236
+ f" - 95th Percentile (P95): {p95:.2f}\n"
237
+ f"This means there is a 90% probability the outcome will be between {p5:.2f} and {p95:.2f}.\n\n"
238
+ )
239
+ if target_value is not None:
240
+ results_summary += (
241
+ f"Probability Analysis:\n"
242
+ f" - Probability of outcome being less than or equal to {target_value:.2f}: {prob_achieved:.2f}%\n"
243
+ )
244
+
245
+ return fig_hist, fig_cdf, results_summary
246
+
247
+
248
+ def generate_explanation(results_summary):
249
+ """
250
+ Uses a Hugging Face model to explain the simulation results in simple terms.
251
+
252
+ Args:
253
+ results_summary (str): The numerical summary from the simulation.
254
+
255
+ Returns:
256
+ str: A generated explanation of the results.
257
+ """
258
+ if explanation_generator is None:
259
+ return "LLM model not loaded. Cannot generate explanation."
260
+ if not results_summary or "Please process valid data" in results_summary:
261
+ return "Run a successful simulation first to generate an explanation."
262
+
263
+ prompt = f"""
264
+ Explain the following Monte Carlo simulation results to a non-technical manager.
265
+ Focus on what the numbers mean in terms of risk and decision-making. Be concise and clear.
266
+
267
+ Results:
268
+ {results_summary}
269
+
270
+ Explanation:
271
+ """
272
+
273
+ try:
274
+ response = explanation_generator(prompt, max_length=200, num_beams=3, no_repeat_ngram_size=2)
275
+ return response[0]['generated_text']
276
+ except Exception as e:
277
+ return f"Error generating explanation: {e}"
278
+
279
+
280
+ # ----------------------------------------------------------------------------
281
+ # Gradio UI Layout
282
+ # ----------------------------------------------------------------------------
283
+
284
+ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer") as app:
285
+ gr.Markdown(
286
+ """
287
+ # Welcome to the Monte Carlo Simulation Explorer!
288
+ This tool helps you understand and perform a Monte Carlo simulation, a powerful technique for modeling uncertainty.
289
+ **How it works:** Instead of guessing a single outcome, you provide a range of possible inputs (or a distribution). The simulation then runs thousands of trials with random values from that input, creating a probability distribution of all possible outcomes.
290
+ **Get started:**
291
+ 1. **Provide Data:** Use one of the methods in the "Data Collection" box below.
292
+ 2. **Prepare Simulation:** Click the "Prepare Simulation" button to validate and visualize your input.
293
+ 3. **Run Simulation:** Adjust the settings and click "Run Simulation".
294
+ 4. **Interpret:** Analyze the resulting plots and get an AI-powered explanation.
295
+ """
296
+ )
297
+
298
+ # --- Row 1: Data Input and Preparation ---
299
+ with gr.Row():
300
+ # --- Column 1.1: Data Collection ---
301
+ with gr.Column(scale=1):
302
+ with gr.Box():
303
+ gr.Markdown("### 1. Data Collection")
304
+ gr.Markdown("Choose **one** method below.")
305
+
306
+ with gr.Tabs():
307
+ with gr.TabItem("Upload File"):
308
+ file_input = gr.File(label="Upload a Single-Column CSV File", file_types=[".csv"])
309
+ with gr.TabItem("Use Example"):
310
+ example_input = gr.Dropdown(
311
+ ["Project Cost Estimation"], label="Select an Example Dataset"
312
+ )
313
+ with gr.TabItem("Manual Input"):
314
+ gr.Markdown("Define a normal distribution manually.")
315
+ manual_mean_input = gr.Number(label="Mean (Average)")
316
+ manual_std_input = gr.Number(label="Standard Deviation (Spread)")
317
+
318
+ prepare_button = gr.Button("Prepare Simulation", variant="secondary")
319
+
320
+ # --- Column 1.2: Preparation Plots & Visualization ---
321
+ with gr.Column(scale=2):
322
+ with gr.Box():
323
+ gr.Markdown("### 2. Preparation & Visualization")
324
+ validation_output = gr.Textbox(label="Validation Status", interactive=False, lines=3)
325
+ input_stats_output = gr.Textbox(label="Input Data Statistics", interactive=False, lines=6)
326
+ input_plot_output = gr.Plot(label="Input Data Distribution")
327
+
328
+ # --- Row 2: Simulation Controls and Results ---
329
+ with gr.Row():
330
+ with gr.Box():
331
+ gr.Markdown("### 3. Simulation Run & Results")
332
+ with gr.Row():
333
+ with gr.Column(scale=1, min_width=250):
334
+ gr.Markdown("**Simulation Settings**")
335
+ num_simulations_input = gr.Slider(
336
+ minimum=1000, maximum=50000, value=10000, step=1000,
337
+ label="Number of Simulations"
338
+ )
339
+ target_value_input = gr.Number(
340
+ label="Target Value (Optional)",
341
+ info="Calculate the probability of the result being <= this value."
342
+ )
343
+ run_button = gr.Button("Run Simulation", variant="primary")
344
+
345
+ with gr.Column(scale=3):
346
+ with gr.Tabs():
347
+ with gr.TabItem("Results Histogram"):
348
+ results_plot_output = gr.Plot(label="Simulation Outcome Distribution")
349
+ with gr.TabItem("Cumulative Probability (CDF)"):
350
+ cdf_plot_output = gr.Plot(label="Cumulative Distribution Function")
351
+ with gr.TabItem("Numerical Summary"):
352
+ results_summary_output = gr.Textbox(label="Detailed Results", interactive=False, lines=12)
353
+
354
+ # --- Row 3: AI-Powered Explanation ---
355
+ with gr.Row():
356
+ with gr.Box():
357
+ gr.Markdown("### 4. AI-Powered Explanation")
358
+ explain_button = gr.Button("Explain the Takeaways", variant="secondary")
359
+ explanation_output = gr.Textbox(
360
+ label="Key Takeaways from the LLM",
361
+ interactive=False,
362
+ lines=5,
363
+ placeholder="Click the button above to generate an explanation of the results..."
364
+ )
365
+
366
+ # ----------------------------------------------------------------------------
367
+ # Define UI Component Interactions
368
+ # ----------------------------------------------------------------------------
369
+
370
+ # Hidden state to store the processed data between steps
371
+ processed_data_state = gr.State()
372
+
373
+ prepare_button.click(
374
+ fn=process_input_data,
375
+ inputs=[file_input, example_input, manual_mean_input, manual_std_input],
376
+ outputs=[processed_data_state, input_plot_output, input_stats_output, validation_output]
377
+ )
378
+
379
+ run_button.click(
380
+ fn=run_monte_carlo_simulation,
381
+ inputs=[processed_data_state, num_simulations_input, target_value_input],
382
+ outputs=[results_plot_output, cdf_plot_output, results_summary_output]
383
+ )
384
+
385
+ explain_button.click(
386
+ fn=generate_explanation,
387
+ inputs=[results_summary_output],
388
+ outputs=[explanation_output]
389
+ )
390
+
391
+ # ----------------------------------------------------------------------------
392
+ # Launch the Gradio App
393
+ # ----------------------------------------------------------------------------
394
+ if __name__ == "__main__":
395
+ # To run this app, save the code as a Python file (e.g., main.py)
396
+ # and run `python main.py` from your terminal.
397
+ app.launch(debug=True)