Spaces:

joshause
/

flex-mmlu-fine-tune-vs-base-model-evaluator

Running

joshause commited on 9 days ago

Commit

29c2d64

verified ·

1 Parent(s): 259005b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -170,6 +170,13 @@ def run_comparison(target_model, target_adapter, count):
     ft_accuracy, ft_subjects = run_mmlu_evaluation(ft_model, tokenizer, f"{target_adapter}", count)
     progress(1, desc="Evaluation complete.")
     output = ''
@@ -273,6 +280,16 @@ def run_comparison(target_model, target_adapter, count):
     if other_changes:
       output += f"📋 Other: {np.mean(other_changes):+.2f}% avg change ({len(other_changes)} subjects)\n\n"
     return output
 def get_base_model(adapter_repo):
@@ -420,7 +437,7 @@ with gr.Blocks() as interface:
         output_text_markdown = gr.Markdown("""
         """)
         evaluate_button.click(
             fn=disable_button,
             inputs=None,

     ft_accuracy, ft_subjects = run_mmlu_evaluation(ft_model, tokenizer, f"{target_adapter}", count)
     progress(1, desc="Evaluation complete.")
+    data = {
+        "base_accuracy": base_accuracy,
+        "base_subjects": base_subjects,
+        "adapter_accuracy": ft_accuracy,
+        "adapter_subjects": ft_subjects
+    }
     output = ''
     if other_changes:
       output += f"📋 Other: {np.mean(other_changes):+.2f}% avg change ({len(other_changes)} subjects)\n\n"
+    print(data)
+    print(output)
+    # Create a temporary file
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
+    # Save JSON
+    with open(temp_file.name, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
     return output
 def get_base_model(adapter_repo):
         output_text_markdown = gr.Markdown("""
         """)
         evaluate_button.click(
             fn=disable_button,
             inputs=None,