H2H-eval-comparator

Sleeping

rohansampath commited on Feb 26

Commit

dfececa

verified ·

1 Parent(s): 2010e21

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -151,9 +151,11 @@ def enhanced_format_preview_for_display(preview_data):
 # 3. Gradio Interface
 # ---------------------------------------------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# Mistral-7B Evaluation Demo")
     gr.Markdown("""
-    This demo evaluates [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on various benchmark datasets.
     """)
     # Dataset Selection Section
@@ -164,7 +166,7 @@ with gr.Blocks() as demo:
             choices=["(Select Dataset)", "MMLU-Pro"],
             value="(Select Dataset)",
             label="Dataset",
-            info="Select a dataset to evaluate the model on"
         )
         preview_toggle = gr.Button("Show Preview", interactive=False, variant="secondary")

 # 3. Gradio Interface
 # ---------------------------------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# Head-to-Head Model Evaluation Comparator")
     gr.Markdown("""
+    This demo evaluates two models (or one model with two different configs) on a benchmark dataset.
+    Available Datasets:[MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)
+    Available Models: [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
     """)
     # Dataset Selection Section
             choices=["(Select Dataset)", "MMLU-Pro"],
             value="(Select Dataset)",
             label="Dataset",
+            info="Select a dataset to perform the Head to Head Evaluation on. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)"
         )
         preview_toggle = gr.Button("Show Preview", interactive=False, variant="secondary")