H2H-eval-comparator / modules /ui_components.py
rohansampath's picture
Update modules/ui_components.py
6805c47 verified
import gradio as gr
# CSS styling for the application
CSS = """
#preview_header {
margin-bottom: 10px;
margin-top: 5px;
}
#preview_table {
background-color: #f8f9fa;
border-radius: 8px;
padding: 10px;
}
h1 {
text-align: center;
}
.section-divider {
border-top: 1px solid #ddd;
margin: 12px 0;
}
.config-box {
border: 1px solid #ddd;
border-radius: 8px;
padding: 15px;
margin: 10px;
background-color: #f9f9f9;
}
.center-divider {
display: flex;
justify-content: center;
height: 100%;
}
.error-message {
color: #d32f2f;
background-color: #ffebee;
padding: 10px;
border-radius: 4px;
margin: 10px 0;
}
"""
def create_header():
"""
Creates the main application header.
Returns:
list: List of header components.
"""
title = gr.Markdown("# Head-to-Head Model Evaluation Comparator")
description = gr.Markdown("""
This demo / proof of concept evaluates two models (or one model with two different configs), head-to-head, on a benchmark dataset.
Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)
Available Models: [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
""")
return [title, description]
def create_results_section():
"""
Creates the results section of the UI.
Returns:
dict: Dictionary containing results components.
"""
with gr.Column(visible=False) as results_container:
results_output = gr.Markdown(label="Evaluation Results")
# Results table - Initially hidden until evaluation completes
with gr.Column(visible=False) as results_table_container:
with gr.Row():
results_table = gr.DataFrame(
interactive=True,
label="Detailed Results (Sortable)",
visible=True
)
# Create a State component to store temporary data
tmp_data = gr.State(None)
return {
'container': results_container,
'output': results_output,
'table_container': results_table_container,
'table': results_table,
'tmp_data': tmp_data # Add this line
}
def create_action_buttons():
"""
Creates the action buttons for evaluation.
Returns:
dict: Dictionary containing button components.
"""
with gr.Row():
with gr.Column(scale=1):
eval_button = gr.Button("Run Evaluation", variant="primary", interactive=False)
cancel_button = gr.Button("Cancel Evaluation", variant="stop", visible=False)
return {
'eval_button': eval_button,
'cancel_button': cancel_button
}