rohansampath commited on
Commit
33231b0
·
verified ·
1 Parent(s): 671bd95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -15
app.py CHANGED
@@ -68,7 +68,7 @@ def run_toy_evaluation():
68
  # 3. MMLU Evaluation call
69
  # ---------------------------------------------------------------------------
70
  @spaces.GPU(duration=120) # Allow up to 2 minutes for full evaluation
71
- def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, all_questions, num_questions):
72
  """
73
  Runs the MMLU evaluation with the specified parameters.
74
 
@@ -78,13 +78,15 @@ def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, all_questions, nu
78
  num_shots (int): Number of few-shot examples (0-5)
79
  all_questions (bool): Whether to evaluate all questions per subject
80
  num_questions (int): Number of examples per subject (1-20 or -1 for all)
 
81
  """
82
 
83
  if not model_loaded:
84
  load_model()
85
 
86
  if not model_loaded:
87
- return "⚠️ Model not loaded. Please load the model first.", None
 
88
 
89
  # Convert num_subjects to -1 if all_subjects is True
90
  if all_subjects:
@@ -142,7 +144,9 @@ def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, all_questions, nu
142
  f"* Evaluation completed in {elapsed_time:.2f} seconds\n"
143
  )
144
 
145
- return report, results_df # Return both text report and dataframe
 
 
146
 
147
  # ---------------------------------------------------------------------------
148
  # 4. Gradio Interface
@@ -200,20 +204,31 @@ with gr.Blocks() as demo:
200
  value=False, # Default is unchecked
201
  info="When checked, evaluates all available questions for each subject"
202
  )
 
 
 
 
 
 
 
203
  num_questions_slider = gr.Slider(
204
  minimum=1,
205
  maximum=20,
206
  value=10, # Default is 10 questions
207
  step=1,
208
  label="Questions per Subject",
209
- info="Choose a subset of questions (1-20), or click the checkbox for All Questions",
210
  interactive=True
211
  )
212
 
213
  with gr.Row():
214
- eval_mmlu_button = gr.Button("Run MMLU Evaluation", variant="primary")
 
 
215
  results_output = gr.Markdown(label="Evaluation Results")
216
- results_table = gr.DataFrame(interactive=True, label="Detailed Results (Sortable)")
 
 
217
 
218
  # Connect components
219
  load_button.click(fn=load_model, inputs=None, outputs=load_status)
@@ -238,21 +253,61 @@ with gr.Blocks() as demo:
238
  outputs=[num_subjects_slider]
239
  )
240
 
241
- # Update num_questions_slider interactivity based on all_questions checkbox
242
- def update_questions_slider(checked):
243
  if checked:
244
- return gr.update(interactive=False)
245
  else:
246
- return gr.update(interactive=True)
247
 
248
  all_questions_checkbox.change(
249
- fn=update_questions_slider,
250
  inputs=[all_questions_checkbox],
251
- outputs=[num_questions_slider]
252
  )
253
 
254
- # Connect MMLU evaluation button
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  eval_mmlu_button.click(
 
 
 
 
 
 
 
 
 
 
 
 
256
  fn=run_mmlu_evaluation,
257
  inputs=[
258
  all_subjects_checkbox,
@@ -261,7 +316,30 @@ with gr.Blocks() as demo:
261
  all_questions_checkbox,
262
  num_questions_slider
263
  ],
264
- outputs=[results_output, results_table]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  )
266
 
267
- demo.launch()
 
68
  # 3. MMLU Evaluation call
69
  # ---------------------------------------------------------------------------
70
  @spaces.GPU(duration=120) # Allow up to 2 minutes for full evaluation
71
+ def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, all_questions, num_questions, progress=gr.Progress()):
72
  """
73
  Runs the MMLU evaluation with the specified parameters.
74
 
 
78
  num_shots (int): Number of few-shot examples (0-5)
79
  all_questions (bool): Whether to evaluate all questions per subject
80
  num_questions (int): Number of examples per subject (1-20 or -1 for all)
81
+ progress (gr.Progress): Progress indicator
82
  """
83
 
84
  if not model_loaded:
85
  load_model()
86
 
87
  if not model_loaded:
88
+ return "⚠️ Model not loaded. Please load the model first.", None, gr.update(interactive=True), gr.update(visible=False), \
89
+ [gr.update(interactive=True) for _ in range(5)]
90
 
91
  # Convert num_subjects to -1 if all_subjects is True
92
  if all_subjects:
 
144
  f"* Evaluation completed in {elapsed_time:.2f} seconds\n"
145
  )
146
 
147
+ # Return values that re-enable UI components after completion
148
+ return report, results_df, gr.update(interactive=True), gr.update(visible=False), \
149
+ [gr.update(interactive=True) for _ in range(5)]
150
 
151
  # ---------------------------------------------------------------------------
152
  # 4. Gradio Interface
 
204
  value=False, # Default is unchecked
205
  info="When checked, evaluates all available questions for each subject"
206
  )
207
+ questions_info_text = gr.Markdown(visible=False, value="**All 14,042 questions across all subjects will be evaluated**")
208
+
209
+ with gr.Row(elem_id="questions_selection_row"):
210
+ questions_container = gr.Column(scale=1, elem_id="questions_slider_container")
211
+
212
+ # Move the slider into the container for easier visibility toggling
213
+ with questions_container:
214
  num_questions_slider = gr.Slider(
215
  minimum=1,
216
  maximum=20,
217
  value=10, # Default is 10 questions
218
  step=1,
219
  label="Questions per Subject",
220
+ info="Choose a subset of questions (1-20)",
221
  interactive=True
222
  )
223
 
224
  with gr.Row():
225
+ with gr.Column(scale=1):
226
+ eval_mmlu_button = gr.Button("Run MMLU Evaluation", variant="primary", interactive=True)
227
+ cancel_mmlu_button = gr.Button("Cancel MMLU Evaluation", variant="stop", visible=False)
228
  results_output = gr.Markdown(label="Evaluation Results")
229
+
230
+ with gr.Row():
231
+ results_table = gr.DataFrame(interactive=True, label="Detailed Results (Sortable)", visible=True)
232
 
233
  # Connect components
234
  load_button.click(fn=load_model, inputs=None, outputs=load_status)
 
253
  outputs=[num_subjects_slider]
254
  )
255
 
256
+ # Update interface based on all_questions checkbox
257
+ def update_questions_interface(checked):
258
  if checked:
259
+ return gr.update(visible=False), gr.update(visible=True)
260
  else:
261
+ return gr.update(visible=True), gr.update(visible=False)
262
 
263
  all_questions_checkbox.change(
264
+ fn=update_questions_interface,
265
  inputs=[all_questions_checkbox],
266
+ outputs=[questions_container, questions_info_text]
267
  )
268
 
269
+ # Function to disable UI components during evaluation
270
+ def disable_ui_for_evaluation():
271
+ return [
272
+ gr.update(interactive=False, info="MMLU Evaluation currently in progress"), # all_subjects_checkbox
273
+ gr.update(interactive=False, info="MMLU Evaluation currently in progress"), # num_subjects_slider
274
+ gr.update(interactive=False, info="MMLU Evaluation currently in progress"), # num_shots_slider
275
+ gr.update(interactive=False, info="MMLU Evaluation currently in progress"), # all_questions_checkbox
276
+ gr.update(interactive=False, info="MMLU Evaluation currently in progress"), # num_questions_slider
277
+ gr.update(interactive=False), # eval_mmlu_button
278
+ gr.update(visible=True) # cancel_mmlu_button
279
+ ]
280
+
281
+ # Function to handle cancel button click
282
+ def cancel_evaluation():
283
+ # This doesn't actually cancel the GPU job (which would require more backend support)
284
+ # But it does reset the UI state to be interactive again
285
+ return [
286
+ gr.update(interactive=True, info="When checked, evaluates all 57 MMLU subjects"), # all_subjects_checkbox
287
+ gr.update(interactive=True, info="Number of subjects to evaluate (1-57). They will be loaded in alphabetical order."), # num_subjects_slider
288
+ gr.update(interactive=True, info="Number of examples to use for few-shot learning (0-5). They will be loaded in alphabetical order."), # num_shots_slider
289
+ gr.update(interactive=True, info="When checked, evaluates all available questions for each subject"), # all_questions_checkbox
290
+ gr.update(interactive=True, info="Choose a subset of questions (1-20)"), # num_questions_slider
291
+ gr.update(interactive=True), # eval_mmlu_button
292
+ gr.update(visible=False), # cancel_mmlu_button
293
+ "⚠️ Evaluation canceled by user", # results_output
294
+ None # results_table
295
+ ]
296
+
297
+ # Connect MMLU evaluation button - now disables UI and shows cancel button
298
  eval_mmlu_button.click(
299
+ fn=disable_ui_for_evaluation,
300
+ inputs=None,
301
+ outputs=[
302
+ all_subjects_checkbox,
303
+ num_subjects_slider,
304
+ num_shots_slider,
305
+ all_questions_checkbox,
306
+ num_questions_slider,
307
+ eval_mmlu_button,
308
+ cancel_mmlu_button
309
+ ]
310
+ ).then(
311
  fn=run_mmlu_evaluation,
312
  inputs=[
313
  all_subjects_checkbox,
 
316
  all_questions_checkbox,
317
  num_questions_slider
318
  ],
319
+ outputs=[
320
+ results_output,
321
+ results_table,
322
+ eval_mmlu_button,
323
+ cancel_mmlu_button,
324
+ [all_subjects_checkbox, num_subjects_slider, num_shots_slider, all_questions_checkbox, num_questions_slider]
325
+ ]
326
+ )
327
+
328
+ # Connect cancel button
329
+ cancel_mmlu_button.click(
330
+ fn=cancel_evaluation,
331
+ inputs=None,
332
+ outputs=[
333
+ all_subjects_checkbox,
334
+ num_subjects_slider,
335
+ num_shots_slider,
336
+ all_questions_checkbox,
337
+ num_questions_slider,
338
+ eval_mmlu_button,
339
+ cancel_mmlu_button,
340
+ results_output,
341
+ results_table
342
+ ]
343
  )
344
 
345
+ demo.launch()