joshause commited on
Commit
29c2d64
·
verified ·
1 Parent(s): 259005b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -170,6 +170,13 @@ def run_comparison(target_model, target_adapter, count):
170
  ft_accuracy, ft_subjects = run_mmlu_evaluation(ft_model, tokenizer, f"{target_adapter}", count)
171
 
172
  progress(1, desc="Evaluation complete.")
 
 
 
 
 
 
 
173
 
174
  output = ''
175
 
@@ -273,6 +280,16 @@ def run_comparison(target_model, target_adapter, count):
273
  if other_changes:
274
  output += f"📋 Other: {np.mean(other_changes):+.2f}% avg change ({len(other_changes)} subjects)\n\n"
275
 
 
 
 
 
 
 
 
 
 
 
276
  return output
277
 
278
  def get_base_model(adapter_repo):
@@ -420,7 +437,7 @@ with gr.Blocks() as interface:
420
  output_text_markdown = gr.Markdown("""
421
 
422
  """)
423
-
424
  evaluate_button.click(
425
  fn=disable_button,
426
  inputs=None,
 
170
  ft_accuracy, ft_subjects = run_mmlu_evaluation(ft_model, tokenizer, f"{target_adapter}", count)
171
 
172
  progress(1, desc="Evaluation complete.")
173
+
174
+ data = {
175
+ "base_accuracy": base_accuracy,
176
+ "base_subjects": base_subjects,
177
+ "adapter_accuracy": ft_accuracy,
178
+ "adapter_subjects": ft_subjects
179
+ }
180
 
181
  output = ''
182
 
 
280
  if other_changes:
281
  output += f"📋 Other: {np.mean(other_changes):+.2f}% avg change ({len(other_changes)} subjects)\n\n"
282
 
283
+ print(data)
284
+ print(output)
285
+
286
+ # Create a temporary file
287
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
288
+
289
+ # Save JSON
290
+ with open(temp_file.name, "w", encoding="utf-8") as f:
291
+ json.dump(data, f, indent=2, ensure_ascii=False)
292
+
293
  return output
294
 
295
  def get_base_model(adapter_repo):
 
437
  output_text_markdown = gr.Markdown("""
438
 
439
  """)
440
+
441
  evaluate_button.click(
442
  fn=disable_button,
443
  inputs=None,