Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from huggingface_hub import login | |
from modules.ui_components import CSS, create_header, create_results_section, create_action_buttons | |
from configs.dataset_config import (create_dataset_section, create_mmlu_config_section, | |
load_dataset_preview, update_interface_based_on_dataset, | |
toggle_preview, update_subject_selection_ui, | |
update_questions_interface, get_subject_mode_param, | |
get_subject_names) | |
from configs.models_config import (create_model_config_section, update_eval_button_state, | |
get_model_configs) | |
from run_evaluation import run_mmlu_evaluation | |
from utils.state_management import (start_evaluation, finish_evaluation, | |
cancel_evaluation, handle_evaluation_results) | |
# Read token and login | |
hf_token = os.getenv("HF_READ_WRITE_TOKEN") | |
if hf_token: | |
login(hf_token) | |
else: | |
print("⚠️ No HF_READ_WRITE_TOKEN found in environment") | |
# --------------------------------------------------------------------------- | |
# Gradio Interface | |
# --------------------------------------------------------------------------- | |
with gr.Blocks(css=CSS) as demo: | |
# Create header | |
header_components = create_header() | |
# Create dataset section (Section A) | |
dataset_components = create_dataset_section() | |
# Create MMLU config section (Section B) | |
mmlu_components = create_mmlu_config_section() | |
# Create model config section (Section C) | |
model_components = create_model_config_section() | |
# Create results section | |
results_components = create_results_section() | |
# Create action buttons | |
action_components = create_action_buttons() | |
# State tracking | |
evaluation_state = gr.State({"running": False}) | |
preview_visibility = gr.State(False) | |
# Connect dataset dropdown to show/hide appropriate configuration and load preview data | |
dataset_components['dropdown'].change( | |
fn=load_dataset_preview, | |
inputs=[dataset_components['dropdown']], | |
outputs=[ | |
dataset_components['preview_data_state'], | |
mmlu_components['specific_subjects'], | |
mmlu_components['num_subjects_slider'] | |
], | |
).then( | |
fn=update_interface_based_on_dataset, | |
inputs=[ | |
dataset_components['dropdown'], | |
preview_visibility | |
], | |
outputs=[ | |
mmlu_components['container'], | |
model_components['container'], | |
results_components['container'], | |
dataset_components['preview_toggle'], | |
dataset_components['preview_container'], | |
preview_visibility, | |
dataset_components['preview_toggle'] | |
] | |
) | |
# Connect preview toggle to show/hide dataset information | |
dataset_components['preview_toggle'].click( | |
fn=toggle_preview, | |
inputs=[ | |
dataset_components['dropdown'], | |
preview_visibility, | |
dataset_components['preview_data_state'] | |
], | |
outputs=[ | |
preview_visibility, | |
dataset_components['preview_container'], | |
dataset_components['preview_output'], | |
dataset_components['preview_toggle'] | |
] | |
) | |
# Connect subject selection mode to UI updates | |
mmlu_components['subject_selection_mode'].change( | |
fn=update_subject_selection_ui, | |
inputs=[ | |
mmlu_components['subject_selection_mode'] | |
], | |
outputs=[ | |
mmlu_components['num_subjects_container'], | |
mmlu_components['specific_subjects_container'] | |
] | |
) | |
# Update interface based on all_questions checkbox | |
mmlu_components['all_questions_checkbox'].change( | |
fn=update_questions_interface, | |
inputs=[ | |
mmlu_components['all_questions_checkbox'] | |
], | |
outputs=[ | |
mmlu_components['num_questions_slider'], | |
mmlu_components['questions_info_text'] | |
] | |
) | |
# Connect model config changes to validation | |
for component in [ | |
model_components['model1_dropdown'], | |
model_components['model1_shots'], | |
model_components['model1_regex'], | |
model_components['model1_flash_attn'], | |
model_components['model2_dropdown'], | |
model_components['model2_shots'], | |
model_components['model2_regex'], | |
model_components['model2_flash_attn'] | |
]: | |
component.change( | |
fn=update_eval_button_state, | |
inputs=[ | |
model_components['model1_dropdown'], | |
model_components['model1_shots'], | |
model_components['model1_regex'], | |
model_components['model1_flash_attn'], | |
model_components['model2_dropdown'], | |
model_components['model2_shots'], | |
model_components['model2_regex'], | |
model_components['model2_flash_attn'] | |
], | |
outputs=[ | |
model_components['error_message'], | |
action_components['eval_button'] | |
] | |
) | |
# Connect evaluation button with state tracking | |
action_components['eval_button'].click( | |
fn=start_evaluation, | |
inputs=[evaluation_state], | |
outputs=[ | |
evaluation_state, | |
mmlu_components['subject_selection_mode'], | |
mmlu_components['num_subjects_slider'], | |
mmlu_components['specific_subjects'], | |
mmlu_components['all_questions_checkbox'], | |
mmlu_components['num_questions_slider'], | |
model_components['model1_dropdown'], | |
model_components['model2_dropdown'], | |
action_components['eval_button'], | |
action_components['cancel_button'], | |
results_components['output'], | |
results_components['table'], | |
results_components['table_container'] | |
] | |
).then( | |
fn=lambda mode, num, subjects, all_q, num_q, m1, m1_shots, m1_regex, m1_flash, m2, m2_shots, m2_regex, m2_flash: | |
run_mmlu_evaluation( | |
get_subject_mode_param(mode), | |
num, | |
get_subject_names(subjects), | |
all_q, | |
num_q, | |
get_model_configs( | |
m1, m1_shots, m1_regex, m1_flash, | |
m2, m2_shots, m2_regex, m2_flash | |
) | |
), | |
inputs=[ | |
mmlu_components['subject_selection_mode'], | |
mmlu_components['num_subjects_slider'], | |
mmlu_components['specific_subjects'], | |
mmlu_components['all_questions_checkbox'], | |
mmlu_components['num_questions_slider'], | |
model_components['model1_dropdown'], | |
model_components['model1_shots'], | |
model_components['model1_regex'], | |
model_components['model1_flash_attn'], | |
model_components['model2_dropdown'], | |
model_components['model2_shots'], | |
model_components['model2_regex'], | |
model_components['model2_flash_attn'] | |
], | |
outputs=[results_components['tmp_data']] | |
).then( | |
fn=handle_evaluation_results, | |
inputs=[results_components['tmp_data']], # Change: Pass the dictionary to handle_evaluation_results | |
outputs=[ | |
results_components['output'], | |
results_components['table'], | |
action_components['eval_button'], | |
action_components['cancel_button'], | |
mmlu_components['subject_selection_mode'], | |
mmlu_components['num_subjects_slider'], | |
mmlu_components['all_questions_checkbox'], | |
mmlu_components['num_questions_slider'], | |
model_components['model1_dropdown'], | |
results_components['table_container'] | |
] | |
).then( | |
fn=finish_evaluation, | |
inputs=[evaluation_state], | |
outputs=[evaluation_state] | |
) | |
# Connect cancel button | |
action_components['cancel_button'].click( | |
fn=cancel_evaluation, | |
inputs=[evaluation_state], | |
outputs=[ | |
evaluation_state, | |
mmlu_components['subject_selection_mode'], | |
mmlu_components['num_subjects_slider'], | |
mmlu_components['specific_subjects'], | |
mmlu_components['all_questions_checkbox'], | |
mmlu_components['num_questions_slider'], | |
model_components['model1_dropdown'], | |
model_components['model2_dropdown'], | |
action_components['eval_button'], | |
action_components['cancel_button'], | |
results_components['output'], | |
results_components['table'], | |
results_components['table_container'] | |
] | |
) | |
if __name__ == "__main__": | |
demo.launch() |