import gradio as gr # CSS styling for the application CSS = """ #preview_header { margin-bottom: 10px; margin-top: 5px; } #preview_table { background-color: #f8f9fa; border-radius: 8px; padding: 10px; } h1 { text-align: center; } .section-divider { border-top: 1px solid #ddd; margin: 12px 0; } .config-box { border: 1px solid #ddd; border-radius: 8px; padding: 15px; margin: 10px; background-color: #f9f9f9; } .center-divider { display: flex; justify-content: center; height: 100%; } .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; } """ def create_header(): """ Creates the main application header. Returns: list: List of header components. """ title = gr.Markdown("# Head-to-Head Model Evaluation Comparator") description = gr.Markdown(""" This demo / proof of concept evaluates two models (or one model with two different configs), head-to-head, on a benchmark dataset. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro) Available Models: [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) """) return [title, description] def create_results_section(): """ Creates the results section of the UI. Returns: dict: Dictionary containing results components. """ with gr.Column(visible=False) as results_container: results_output = gr.Markdown(label="Evaluation Results") # Results table - Initially hidden until evaluation completes with gr.Column(visible=False) as results_table_container: with gr.Row(): results_table = gr.DataFrame( interactive=True, label="Detailed Results (Sortable)", visible=True ) # Create a State component to store temporary data tmp_data = gr.State(None) return { 'container': results_container, 'output': results_output, 'table_container': results_table_container, 'table': results_table, 'tmp_data': tmp_data # Add this line } def create_action_buttons(): """ Creates the action buttons for evaluation. Returns: dict: Dictionary containing button components. """ with gr.Row(): with gr.Column(scale=1): eval_button = gr.Button("Run Evaluation", variant="primary", interactive=False) cancel_button = gr.Button("Cancel Evaluation", variant="stop", visible=False) return { 'eval_button': eval_button, 'cancel_button': cancel_button }