Update app.py
Browse files
app.py
CHANGED
@@ -170,6 +170,13 @@ def run_comparison(target_model, target_adapter, count):
|
|
170 |
ft_accuracy, ft_subjects = run_mmlu_evaluation(ft_model, tokenizer, f"{target_adapter}", count)
|
171 |
|
172 |
progress(1, desc="Evaluation complete.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
output = ''
|
175 |
|
@@ -273,6 +280,16 @@ def run_comparison(target_model, target_adapter, count):
|
|
273 |
if other_changes:
|
274 |
output += f"📋 Other: {np.mean(other_changes):+.2f}% avg change ({len(other_changes)} subjects)\n\n"
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
return output
|
277 |
|
278 |
def get_base_model(adapter_repo):
|
@@ -420,7 +437,7 @@ with gr.Blocks() as interface:
|
|
420 |
output_text_markdown = gr.Markdown("""
|
421 |
|
422 |
""")
|
423 |
-
|
424 |
evaluate_button.click(
|
425 |
fn=disable_button,
|
426 |
inputs=None,
|
|
|
170 |
ft_accuracy, ft_subjects = run_mmlu_evaluation(ft_model, tokenizer, f"{target_adapter}", count)
|
171 |
|
172 |
progress(1, desc="Evaluation complete.")
|
173 |
+
|
174 |
+
data = {
|
175 |
+
"base_accuracy": base_accuracy,
|
176 |
+
"base_subjects": base_subjects,
|
177 |
+
"adapter_accuracy": ft_accuracy,
|
178 |
+
"adapter_subjects": ft_subjects
|
179 |
+
}
|
180 |
|
181 |
output = ''
|
182 |
|
|
|
280 |
if other_changes:
|
281 |
output += f"📋 Other: {np.mean(other_changes):+.2f}% avg change ({len(other_changes)} subjects)\n\n"
|
282 |
|
283 |
+
print(data)
|
284 |
+
print(output)
|
285 |
+
|
286 |
+
# Create a temporary file
|
287 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
|
288 |
+
|
289 |
+
# Save JSON
|
290 |
+
with open(temp_file.name, "w", encoding="utf-8") as f:
|
291 |
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
292 |
+
|
293 |
return output
|
294 |
|
295 |
def get_base_model(adapter_repo):
|
|
|
437 |
output_text_markdown = gr.Markdown("""
|
438 |
|
439 |
""")
|
440 |
+
|
441 |
evaluate_button.click(
|
442 |
fn=disable_button,
|
443 |
inputs=None,
|