Spaces:
Sleeping
Sleeping
import gradio as gr | |
# CSS styling for the application | |
CSS = """ | |
#preview_header { | |
margin-bottom: 10px; | |
margin-top: 5px; | |
} | |
#preview_table { | |
background-color: #f8f9fa; | |
border-radius: 8px; | |
padding: 10px; | |
} | |
h1 { | |
text-align: center; | |
} | |
.section-divider { | |
border-top: 1px solid #ddd; | |
margin: 12px 0; | |
} | |
.config-box { | |
border: 1px solid #ddd; | |
border-radius: 8px; | |
padding: 15px; | |
margin: 10px; | |
background-color: #f9f9f9; | |
} | |
.center-divider { | |
display: flex; | |
justify-content: center; | |
height: 100%; | |
} | |
.error-message { | |
color: #d32f2f; | |
background-color: #ffebee; | |
padding: 10px; | |
border-radius: 4px; | |
margin: 10px 0; | |
} | |
""" | |
def create_header(): | |
""" | |
Creates the main application header. | |
Returns: | |
list: List of header components. | |
""" | |
title = gr.Markdown("# Head-to-Head Model Evaluation Comparator") | |
description = gr.Markdown(""" | |
This demo / proof of concept evaluates two models (or one model with two different configs), head-to-head, on a benchmark dataset. | |
Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro) | |
Available Models: [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) | |
""") | |
return [title, description] | |
def create_results_section(): | |
""" | |
Creates the results section of the UI. | |
Returns: | |
dict: Dictionary containing results components. | |
""" | |
with gr.Column(visible=False) as results_container: | |
results_output = gr.Markdown(label="Evaluation Results") | |
# Results table - Initially hidden until evaluation completes | |
with gr.Column(visible=False) as results_table_container: | |
with gr.Row(): | |
results_table = gr.DataFrame( | |
interactive=True, | |
label="Detailed Results (Sortable)", | |
visible=True | |
) | |
# Create a State component to store temporary data | |
tmp_data = gr.State(None) | |
return { | |
'container': results_container, | |
'output': results_output, | |
'table_container': results_table_container, | |
'table': results_table, | |
'tmp_data': tmp_data # Add this line | |
} | |
def create_action_buttons(): | |
""" | |
Creates the action buttons for evaluation. | |
Returns: | |
dict: Dictionary containing button components. | |
""" | |
with gr.Row(): | |
with gr.Column(scale=1): | |
eval_button = gr.Button("Run Evaluation", variant="primary", interactive=False) | |
cancel_button = gr.Button("Cancel Evaluation", variant="stop", visible=False) | |
return { | |
'eval_button': eval_button, | |
'cancel_button': cancel_button | |
} | |