import os import gradio as gr from huggingface_hub import login from modules.ui_components import CSS, create_header, create_results_section, create_action_buttons from configs.dataset_config import (create_dataset_section, create_mmlu_config_section, load_dataset_preview, update_interface_based_on_dataset, toggle_preview, update_subject_selection_ui, update_questions_interface, get_subject_mode_param, get_subject_names) from configs.models_config import (create_model_config_section, update_eval_button_state, get_model_configs) from run_evaluation import run_mmlu_evaluation from utils.state_management import (start_evaluation, finish_evaluation, cancel_evaluation, handle_evaluation_results) # Read token and login hf_token = os.getenv("HF_READ_WRITE_TOKEN") if hf_token: login(hf_token) else: print("⚠️ No HF_READ_WRITE_TOKEN found in environment") # --------------------------------------------------------------------------- # Gradio Interface # --------------------------------------------------------------------------- with gr.Blocks(css=CSS) as demo: # Create header header_components = create_header() # Create dataset section (Section A) dataset_components = create_dataset_section() # Create MMLU config section (Section B) mmlu_components = create_mmlu_config_section() # Create model config section (Section C) model_components = create_model_config_section() # Create results section results_components = create_results_section() # Create action buttons action_components = create_action_buttons() # State tracking evaluation_state = gr.State({"running": False}) preview_visibility = gr.State(False) # Connect dataset dropdown to show/hide appropriate configuration and load preview data dataset_components['dropdown'].change( fn=load_dataset_preview, inputs=[dataset_components['dropdown']], outputs=[ dataset_components['preview_data_state'], mmlu_components['specific_subjects'], mmlu_components['num_subjects_slider'] ], ).then( fn=update_interface_based_on_dataset, inputs=[ dataset_components['dropdown'], preview_visibility ], outputs=[ mmlu_components['container'], model_components['container'], results_components['container'], dataset_components['preview_toggle'], dataset_components['preview_container'], preview_visibility, dataset_components['preview_toggle'] ] ) # Connect preview toggle to show/hide dataset information dataset_components['preview_toggle'].click( fn=toggle_preview, inputs=[ dataset_components['dropdown'], preview_visibility, dataset_components['preview_data_state'] ], outputs=[ preview_visibility, dataset_components['preview_container'], dataset_components['preview_output'], dataset_components['preview_toggle'] ] ) # Connect subject selection mode to UI updates mmlu_components['subject_selection_mode'].change( fn=update_subject_selection_ui, inputs=[ mmlu_components['subject_selection_mode'] ], outputs=[ mmlu_components['num_subjects_container'], mmlu_components['specific_subjects_container'] ] ) # Update interface based on all_questions checkbox mmlu_components['all_questions_checkbox'].change( fn=update_questions_interface, inputs=[ mmlu_components['all_questions_checkbox'] ], outputs=[ mmlu_components['num_questions_slider'], mmlu_components['questions_info_text'] ] ) # Connect model config changes to validation for component in [ model_components['model1_dropdown'], model_components['model1_shots'], model_components['model1_regex'], model_components['model1_flash_attn'], model_components['model2_dropdown'], model_components['model2_shots'], model_components['model2_regex'], model_components['model2_flash_attn'] ]: component.change( fn=update_eval_button_state, inputs=[ model_components['model1_dropdown'], model_components['model1_shots'], model_components['model1_regex'], model_components['model1_flash_attn'], model_components['model2_dropdown'], model_components['model2_shots'], model_components['model2_regex'], model_components['model2_flash_attn'] ], outputs=[ model_components['error_message'], action_components['eval_button'] ] ) # Connect evaluation button with state tracking action_components['eval_button'].click( fn=start_evaluation, inputs=[evaluation_state], outputs=[ evaluation_state, mmlu_components['subject_selection_mode'], mmlu_components['num_subjects_slider'], mmlu_components['specific_subjects'], mmlu_components['all_questions_checkbox'], mmlu_components['num_questions_slider'], model_components['model1_dropdown'], model_components['model2_dropdown'], action_components['eval_button'], action_components['cancel_button'], results_components['output'], results_components['table'], results_components['table_container'] ] ).then( fn=lambda mode, num, subjects, all_q, num_q, m1, m1_shots, m1_regex, m1_flash, m2, m2_shots, m2_regex, m2_flash: run_mmlu_evaluation( get_subject_mode_param(mode), num, get_subject_names(subjects), all_q, num_q, get_model_configs( m1, m1_shots, m1_regex, m1_flash, m2, m2_shots, m2_regex, m2_flash ) ), inputs=[ mmlu_components['subject_selection_mode'], mmlu_components['num_subjects_slider'], mmlu_components['specific_subjects'], mmlu_components['all_questions_checkbox'], mmlu_components['num_questions_slider'], model_components['model1_dropdown'], model_components['model1_shots'], model_components['model1_regex'], model_components['model1_flash_attn'], model_components['model2_dropdown'], model_components['model2_shots'], model_components['model2_regex'], model_components['model2_flash_attn'] ], outputs=[results_components['tmp_data']] ).then( fn=handle_evaluation_results, inputs=[results_components['tmp_data']], # Change: Pass the dictionary to handle_evaluation_results outputs=[ results_components['output'], results_components['table'], action_components['eval_button'], action_components['cancel_button'], mmlu_components['subject_selection_mode'], mmlu_components['num_subjects_slider'], mmlu_components['all_questions_checkbox'], mmlu_components['num_questions_slider'], model_components['model1_dropdown'], results_components['table_container'] ] ).then( fn=finish_evaluation, inputs=[evaluation_state], outputs=[evaluation_state] ) # Connect cancel button action_components['cancel_button'].click( fn=cancel_evaluation, inputs=[evaluation_state], outputs=[ evaluation_state, mmlu_components['subject_selection_mode'], mmlu_components['num_subjects_slider'], mmlu_components['specific_subjects'], mmlu_components['all_questions_checkbox'], mmlu_components['num_questions_slider'], model_components['model1_dropdown'], model_components['model2_dropdown'], action_components['eval_button'], action_components['cancel_button'], results_components['output'], results_components['table'], results_components['table_container'] ] ) if __name__ == "__main__": demo.launch()