Spaces:
Sleeping
Sleeping
File size: 2,759 Bytes
e136af0 6805c47 e136af0 6805c47 e136af0 921f3a0 e136af0 921f3a0 e136af0 921f3a0 e136af0 921f3a0 e136af0 fdb8962 e136af0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import gradio as gr
# CSS styling for the application
CSS = """
#preview_header {
margin-bottom: 10px;
margin-top: 5px;
}
#preview_table {
background-color: #f8f9fa;
border-radius: 8px;
padding: 10px;
}
h1 {
text-align: center;
}
.section-divider {
border-top: 1px solid #ddd;
margin: 12px 0;
}
.config-box {
border: 1px solid #ddd;
border-radius: 8px;
padding: 15px;
margin: 10px;
background-color: #f9f9f9;
}
.center-divider {
display: flex;
justify-content: center;
height: 100%;
}
.error-message {
color: #d32f2f;
background-color: #ffebee;
padding: 10px;
border-radius: 4px;
margin: 10px 0;
}
"""
def create_header():
"""
Creates the main application header.
Returns:
list: List of header components.
"""
title = gr.Markdown("# Head-to-Head Model Evaluation Comparator")
description = gr.Markdown("""
This demo / proof of concept evaluates two models (or one model with two different configs), head-to-head, on a benchmark dataset.
Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)
Available Models: [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
""")
return [title, description]
def create_results_section():
"""
Creates the results section of the UI.
Returns:
dict: Dictionary containing results components.
"""
with gr.Column(visible=False) as results_container:
results_output = gr.Markdown(label="Evaluation Results")
# Results table - Initially hidden until evaluation completes
with gr.Column(visible=False) as results_table_container:
with gr.Row():
results_table = gr.DataFrame(
interactive=True,
label="Detailed Results (Sortable)",
visible=True
)
# Create a State component to store temporary data
tmp_data = gr.State(None)
return {
'container': results_container,
'output': results_output,
'table_container': results_table_container,
'table': results_table,
'tmp_data': tmp_data # Add this line
}
def create_action_buttons():
"""
Creates the action buttons for evaluation.
Returns:
dict: Dictionary containing button components.
"""
with gr.Row():
with gr.Column(scale=1):
eval_button = gr.Button("Run Evaluation", variant="primary", interactive=False)
cancel_button = gr.Button("Cancel Evaluation", variant="stop", visible=False)
return {
'eval_button': eval_button,
'cancel_button': cancel_button
}
|