nikhilsingh commited on
Commit
89edfc8
·
verified ·
1 Parent(s): ef2037b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -51
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  # ----------------------------------------------------------------------------
2
  # Import necessary libraries
3
  # ----------------------------------------------------------------------------
@@ -25,6 +27,7 @@ plt.switch_backend('Agg')
25
  # We use a small, efficient model to ensure the app runs smoothly.
26
  try:
27
  explanation_generator = pipeline('text2text-generation', model='google/flan-t5-small')
 
28
  except Exception as e:
29
  print(f"Could not load Hugging Face model. Explanations will be disabled. Error: {e}")
30
  explanation_generator = None
@@ -42,6 +45,15 @@ sample_project_costs.to_csv(SAMPLE_CSV_PATH, index=False)
42
  # Core Logic Functions
43
  # ----------------------------------------------------------------------------
44
 
 
 
 
 
 
 
 
 
 
45
  def process_input_data(file_obj, example_choice, manual_mean, manual_std):
46
  """
47
  Processes the user's input from the UI.
@@ -71,21 +83,18 @@ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
71
  source_info = f"from uploaded file: {os.path.basename(file_obj.name)}"
72
  data = df
73
  except Exception as e:
74
- return None, None, None, f"Error reading file: {e}. Please ensure it's a valid CSV."
75
- elif example_choice == "Project Cost Estimation":
76
  df = pd.read_csv(SAMPLE_CSV_PATH)
77
  source_info = "from the 'Project Cost Estimation' example"
78
  data = df
79
  elif manual_mean is not None and manual_std is not None:
80
- # If manual input, we don't have raw data, just parameters.
81
- # We'll return these params to be used directly in the simulation.
82
  if manual_std <= 0:
83
- return None, None, None, "Manual Input Error: Standard Deviation must be positive."
84
 
85
  stats_text = (f"Source: Manual Input\n"
86
  f"Mean: {manual_mean:.2f}\n"
87
  f"Standard Deviation: {manual_std:.2f}")
88
- # Create a dummy plot for manual input
89
  fig, ax = plt.subplots()
90
  ax.text(0.5, 0.5, 'Manual input:\nNo data to plot.\nSimulation will use\nthe provided Mean/Std.',
91
  ha='center', va='center', fontsize=12)
@@ -93,19 +102,18 @@ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
93
  ax.set_yticks([])
94
  plt.tight_layout()
95
 
96
- # Use a special DataFrame to signal manual input downstream
97
  manual_df = pd.DataFrame({'mean': [manual_mean], 'std': [manual_std]})
98
  return manual_df, fig, stats_text, "Manual parameters accepted. Ready to run simulation."
99
 
100
  if data is None:
101
- return None, None, None, "No data source provided. Please upload a file, choose an example, or enter parameters."
102
 
103
  # 2. Validate data structure
104
  if data.shape[1] != 1 or not pd.api.types.is_numeric_dtype(data.iloc[:, 0]):
105
  error_msg = (f"Data Error: The data {source_info} is not compatible. "
106
  "The app requires a CSV with a single column of numerical data. "
107
  f"Detected {data.shape[1]} columns.")
108
- return None, None, None, error_msg
109
 
110
  # 3. Process valid data
111
  series = data.iloc[:, 0].dropna()
@@ -113,13 +121,13 @@ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
113
  std = series.std()
114
 
115
  if std == 0:
116
- return None, None, None, "Data Error: All values are the same. Standard deviation is zero, cannot simulate uncertainty."
 
117
 
118
  # 4. Generate visualization and stats
119
  fig, ax = plt.subplots(figsize=(6, 4))
120
  ax.hist(series, bins='auto', density=True, alpha=0.7, label='Input Data Distribution')
121
 
122
- # Overlay a normal distribution curve
123
  xmin, xmax = plt.xlim()
124
  x = np.linspace(xmin, xmax, 100)
125
  p = norm.pdf(x, mean, std)
@@ -147,49 +155,34 @@ def process_input_data(file_obj, example_choice, manual_mean, manual_std):
147
  def run_monte_carlo_simulation(data, num_simulations, target_value):
148
  """
149
  Performs the Monte Carlo simulation based on the processed data.
150
-
151
- Args:
152
- data (pd.DataFrame): The validated input data.
153
- num_simulations (int): The number of simulation iterations to run.
154
- target_value (float): A user-defined target to calculate probability against.
155
-
156
- Returns:
157
- tuple: A tuple containing:
158
- - A Matplotlib figure of the simulation results histogram.
159
- - A Matplotlib figure of the cumulative distribution (CDF).
160
- - A string containing detailed numerical results.
161
  """
 
162
  if data is None:
163
- return None, None, "Please process valid data before running the simulation."
 
 
164
 
165
  num_simulations = int(num_simulations)
166
 
167
- # Check if data is from manual input or from a file/example
168
  if 'mean' in data.columns and 'std' in data.columns and data.shape[0] == 1:
169
  mean = data['mean'].iloc[0]
170
  std = data['std'].iloc[0]
171
- data_name = "Value" # Generic name for manual input
172
  else:
173
  series = data.iloc[:, 0]
174
  mean = series.mean()
175
  std = series.std()
176
  data_name = series.name
177
 
178
- # The core of the Monte Carlo simulation: generate random samples
179
- # We assume the underlying uncertainty follows a Normal Distribution
180
- # defined by the mean and standard deviation of the input data.
181
  simulation_results = np.random.normal(mean, std, num_simulations)
182
 
183
- # --- Generate Results Histogram Plot ---
184
  fig_hist, ax_hist = plt.subplots(figsize=(8, 5))
185
  ax_hist.hist(simulation_results, bins=50, density=True, alpha=0.8, color='skyblue', edgecolor='black')
186
 
187
- # Calculate key statistics for plotting
188
  sim_mean = np.mean(simulation_results)
189
  p5 = np.percentile(simulation_results, 5)
190
  p95 = np.percentile(simulation_results, 95)
191
 
192
- # Add vertical lines for key statistics
193
  ax_hist.axvline(sim_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {sim_mean:.2f}')
194
  ax_hist.axvline(p5, color='green', linestyle=':', linewidth=2, label=f'5th Percentile (P5): {p5:.2f}')
195
  ax_hist.axvline(p95, color='green', linestyle=':', linewidth=2, label=f'95th Percentile (P95): {p95:.2f}')
@@ -201,13 +194,11 @@ def run_monte_carlo_simulation(data, num_simulations, target_value):
201
  ax_hist.grid(True, linestyle='--', alpha=0.6)
202
  plt.tight_layout()
203
 
204
- # --- Generate Cumulative Distribution (CDF) Plot ---
205
  fig_cdf, ax_cdf = plt.subplots(figsize=(8, 5))
206
  sorted_results = np.sort(simulation_results)
207
  yvals = np.arange(len(sorted_results)) / float(len(sorted_results) - 1)
208
  ax_cdf.plot(sorted_results, yvals, label='CDF')
209
 
210
- # Add markers for P5, P50, P95
211
  p50 = np.percentile(simulation_results, 50)
212
  ax_cdf.plot(p5, 0.05, 'go', ms=8, label=f'P5: {p5:.2f}')
213
  ax_cdf.plot(p50, 0.50, 'ro', ms=8, label=f'Median (P50): {p50:.2f}')
@@ -220,7 +211,6 @@ def run_monte_carlo_simulation(data, num_simulations, target_value):
220
  ax_cdf.legend()
221
  plt.tight_layout()
222
 
223
- # --- Calculate Final Numerical Results ---
224
  prob_achieved = 0
225
  if target_value is not None:
226
  prob_achieved = np.sum(simulation_results <= target_value) / num_simulations * 100
@@ -248,17 +238,12 @@ def run_monte_carlo_simulation(data, num_simulations, target_value):
248
  def generate_explanation(results_summary):
249
  """
250
  Uses a Hugging Face model to explain the simulation results in simple terms.
251
-
252
- Args:
253
- results_summary (str): The numerical summary from the simulation.
254
-
255
- Returns:
256
- str: A generated explanation of the results.
257
  """
258
  if explanation_generator is None:
259
  return "LLM model not loaded. Cannot generate explanation."
260
- if not results_summary or "Please process valid data" in results_summary:
261
- return "Run a successful simulation first to generate an explanation."
 
262
 
263
  prompt = f"""
264
  Explain the following Monte Carlo simulation results to a non-technical manager.
@@ -297,9 +282,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
297
 
298
  # --- Row 1: Data Input and Preparation ---
299
  with gr.Row():
300
- # --- Column 1.1: Data Collection ---
301
  with gr.Column(scale=1):
302
- with gr.Box():
303
  gr.Markdown("### 1. Data Collection")
304
  gr.Markdown("Choose **one** method below.")
305
 
@@ -317,9 +301,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
317
 
318
  prepare_button = gr.Button("Prepare Simulation", variant="secondary")
319
 
320
- # --- Column 1.2: Preparation Plots & Visualization ---
321
  with gr.Column(scale=2):
322
- with gr.Box():
323
  gr.Markdown("### 2. Preparation & Visualization")
324
  validation_output = gr.Textbox(label="Validation Status", interactive=False, lines=3)
325
  input_stats_output = gr.Textbox(label="Input Data Statistics", interactive=False, lines=6)
@@ -327,7 +310,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
327
 
328
  # --- Row 2: Simulation Controls and Results ---
329
  with gr.Row():
330
- with gr.Box():
331
  gr.Markdown("### 3. Simulation Run & Results")
332
  with gr.Row():
333
  with gr.Column(scale=1, min_width=250):
@@ -353,7 +336,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
353
 
354
  # --- Row 3: AI-Powered Explanation ---
355
  with gr.Row():
356
- with gr.Box():
357
  gr.Markdown("### 4. AI-Powered Explanation")
358
  explain_button = gr.Button("Explain the Takeaways", variant="secondary")
359
  explanation_output = gr.Textbox(
@@ -367,7 +350,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
367
  # Define UI Component Interactions
368
  # ----------------------------------------------------------------------------
369
 
370
- # Hidden state to store the processed data between steps
371
  processed_data_state = gr.State()
372
 
373
  prepare_button.click(
@@ -392,6 +374,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Monte Carlo Simulation Explorer")
392
  # Launch the Gradio App
393
  # ----------------------------------------------------------------------------
394
  if __name__ == "__main__":
395
- # To run this app, save the code as a Python file (e.g., main.py)
396
- # and run `python main.py` from your terminal.
397
  app.launch(debug=True)
 
1
+ # main.py
2
+
3
  # ----------------------------------------------------------------------------
4
  # Import necessary libraries
5
  # ----------------------------------------------------------------------------
 
27
  # We use a small, efficient model to ensure the app runs smoothly.
28
  try:
29
  explanation_generator = pipeline('text2text-generation', model='google/flan-t5-small')
30
+ print("Hugging Face model loaded successfully.")
31
  except Exception as e:
32
  print(f"Could not load Hugging Face model. Explanations will be disabled. Error: {e}")
33
  explanation_generator = None
 
45
  # Core Logic Functions
46
  # ----------------------------------------------------------------------------
47
 
48
+ def create_error_plot(message):
49
+ """Creates a matplotlib plot with a specified error message."""
50
+ fig, ax = plt.subplots(figsize=(8, 5))
51
+ ax.text(0.5, 0.5, message, ha='center', va='center', wrap=True, color='red', fontsize=12)
52
+ ax.set_xticks([])
53
+ ax.set_yticks([])
54
+ plt.tight_layout()
55
+ return fig
56
+
57
  def process_input_data(file_obj, example_choice, manual_mean, manual_std):
58
  """
59
  Processes the user's input from the UI.
 
83
  source_info = f"from uploaded file: {os.path.basename(file_obj.name)}"
84
  data = df
85
  except Exception as e:
86
+ return None, create_error_plot(f"Error reading file: {e}"), None, f"Error reading file: {e}. Please ensure it's a valid CSV."
87
+ elif example_choice and example_choice == "Project Cost Estimation":
88
  df = pd.read_csv(SAMPLE_CSV_PATH)
89
  source_info = "from the 'Project Cost Estimation' example"
90
  data = df
91
  elif manual_mean is not None and manual_std is not None:
 
 
92
  if manual_std <= 0:
93
+ return None, create_error_plot("Standard Deviation must be positive."), None, "Manual Input Error: Standard Deviation must be positive."
94
 
95
  stats_text = (f"Source: Manual Input\n"
96
  f"Mean: {manual_mean:.2f}\n"
97
  f"Standard Deviation: {manual_std:.2f}")
 
98
  fig, ax = plt.subplots()
99
  ax.text(0.5, 0.5, 'Manual input:\nNo data to plot.\nSimulation will use\nthe provided Mean/Std.',
100
  ha='center', va='center', fontsize=12)
 
102
  ax.set_yticks([])
103
  plt.tight_layout()
104
 
 
105
  manual_df = pd.DataFrame({'mean': [manual_mean], 'std': [manual_std]})
106
  return manual_df, fig, stats_text, "Manual parameters accepted. Ready to run simulation."
107
 
108
  if data is None:
109
+ return None, create_error_plot("No data source provided."), None, "No data source provided. Please upload a file, choose an example, or enter parameters."
110
 
111
  # 2. Validate data structure
112
  if data.shape[1] != 1 or not pd.api.types.is_numeric_dtype(data.iloc[:, 0]):
113
  error_msg = (f"Data Error: The data {source_info} is not compatible. "
114
  "The app requires a CSV with a single column of numerical data. "
115
  f"Detected {data.shape[1]} columns.")
116
+ return None, create_error_plot(error_msg), None, error_msg
117
 
118
  # 3. Process valid data
119
  series = data.iloc[:, 0].dropna()
 
121
  std = series.std()
122
 
123
  if std == 0:
124
+ error_msg = "Data Error: All values are the same. Standard deviation is zero, cannot simulate uncertainty."
125
+ return None, create_error_plot(error_msg), None, error_msg
126
 
127
  # 4. Generate visualization and stats
128
  fig, ax = plt.subplots(figsize=(6, 4))
129
  ax.hist(series, bins='auto', density=True, alpha=0.7, label='Input Data Distribution')
130
 
 
131
  xmin, xmax = plt.xlim()
132
  x = np.linspace(xmin, xmax, 100)
133
  p = norm.pdf(x, mean, std)
 
155
  def run_monte_carlo_simulation(data, num_simulations, target_value):
156
  """
157
  Performs the Monte Carlo simulation based on the processed data.
 
 
 
 
 
 
 
 
 
 
 
158
  """
159
+ # **NEW**: Check for valid data at the beginning and return clear error plots if invalid.
160
  if data is None:
161
+ error_message = "ERROR: No valid data available.\nPlease go to Step 1 & 2 and click 'Prepare Simulation' first."
162
+ error_plot = create_error_plot(error_message)
163
+ return error_plot, error_plot, "Simulation failed. See plot for details."
164
 
165
  num_simulations = int(num_simulations)
166
 
 
167
  if 'mean' in data.columns and 'std' in data.columns and data.shape[0] == 1:
168
  mean = data['mean'].iloc[0]
169
  std = data['std'].iloc[0]
170
+ data_name = "Value"
171
  else:
172
  series = data.iloc[:, 0]
173
  mean = series.mean()
174
  std = series.std()
175
  data_name = series.name
176
 
 
 
 
177
  simulation_results = np.random.normal(mean, std, num_simulations)
178
 
 
179
  fig_hist, ax_hist = plt.subplots(figsize=(8, 5))
180
  ax_hist.hist(simulation_results, bins=50, density=True, alpha=0.8, color='skyblue', edgecolor='black')
181
 
 
182
  sim_mean = np.mean(simulation_results)
183
  p5 = np.percentile(simulation_results, 5)
184
  p95 = np.percentile(simulation_results, 95)
185
 
 
186
  ax_hist.axvline(sim_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {sim_mean:.2f}')
187
  ax_hist.axvline(p5, color='green', linestyle=':', linewidth=2, label=f'5th Percentile (P5): {p5:.2f}')
188
  ax_hist.axvline(p95, color='green', linestyle=':', linewidth=2, label=f'95th Percentile (P95): {p95:.2f}')
 
194
  ax_hist.grid(True, linestyle='--', alpha=0.6)
195
  plt.tight_layout()
196
 
 
197
  fig_cdf, ax_cdf = plt.subplots(figsize=(8, 5))
198
  sorted_results = np.sort(simulation_results)
199
  yvals = np.arange(len(sorted_results)) / float(len(sorted_results) - 1)
200
  ax_cdf.plot(sorted_results, yvals, label='CDF')
201
 
 
202
  p50 = np.percentile(simulation_results, 50)
203
  ax_cdf.plot(p5, 0.05, 'go', ms=8, label=f'P5: {p5:.2f}')
204
  ax_cdf.plot(p50, 0.50, 'ro', ms=8, label=f'Median (P50): {p50:.2f}')
 
211
  ax_cdf.legend()
212
  plt.tight_layout()
213
 
 
214
  prob_achieved = 0
215
  if target_value is not None:
216
  prob_achieved = np.sum(simulation_results <= target_value) / num_simulations * 100
 
238
  def generate_explanation(results_summary):
239
  """
240
  Uses a Hugging Face model to explain the simulation results in simple terms.
 
 
 
 
 
 
241
  """
242
  if explanation_generator is None:
243
  return "LLM model not loaded. Cannot generate explanation."
244
+ # **NEW**: More robust check for failed simulation runs.
245
+ if not results_summary or "Please process valid data" in results_summary or "Simulation failed" in results_summary:
246
+ return "Could not generate explanation. Please run a successful simulation first."
247
 
248
  prompt = f"""
249
  Explain the following Monte Carlo simulation results to a non-technical manager.
 
282
 
283
  # --- Row 1: Data Input and Preparation ---
284
  with gr.Row():
 
285
  with gr.Column(scale=1):
286
+ with gr.Group():
287
  gr.Markdown("### 1. Data Collection")
288
  gr.Markdown("Choose **one** method below.")
289
 
 
301
 
302
  prepare_button = gr.Button("Prepare Simulation", variant="secondary")
303
 
 
304
  with gr.Column(scale=2):
305
+ with gr.Group():
306
  gr.Markdown("### 2. Preparation & Visualization")
307
  validation_output = gr.Textbox(label="Validation Status", interactive=False, lines=3)
308
  input_stats_output = gr.Textbox(label="Input Data Statistics", interactive=False, lines=6)
 
310
 
311
  # --- Row 2: Simulation Controls and Results ---
312
  with gr.Row():
313
+ with gr.Group():
314
  gr.Markdown("### 3. Simulation Run & Results")
315
  with gr.Row():
316
  with gr.Column(scale=1, min_width=250):
 
336
 
337
  # --- Row 3: AI-Powered Explanation ---
338
  with gr.Row():
339
+ with gr.Group():
340
  gr.Markdown("### 4. AI-Powered Explanation")
341
  explain_button = gr.Button("Explain the Takeaways", variant="secondary")
342
  explanation_output = gr.Textbox(
 
350
  # Define UI Component Interactions
351
  # ----------------------------------------------------------------------------
352
 
 
353
  processed_data_state = gr.State()
354
 
355
  prepare_button.click(
 
374
  # Launch the Gradio App
375
  # ----------------------------------------------------------------------------
376
  if __name__ == "__main__":
 
 
377
  app.launch(debug=True)