H2H-eval-comparator / utils /state_management.py
rohansampath's picture
Update utils/state_management.py
b369d74 verified
import gradio as gr
import logging
logging.basicConfig(level=logging.INFO)
def start_evaluation(state):
"""
Disables UI components when evaluation starts.
Args:
state (dict): Current evaluation state.
Returns:
list: List of updated state and UI components.
"""
if state["running"]:
return [
state,
gr.update(interactive=False), # subject_selection_mode
gr.update(interactive=False), # num_subjects_slider
gr.update(interactive=False), # specific_subjects
gr.update(interactive=False), # all_questions_checkbox
gr.update(interactive=False), # num_questions_slider
gr.update(interactive=False), # model1_dropdown
gr.update(interactive=False), # model2_dropdown
gr.update(interactive=False), # eval_button
gr.update(visible=True), # cancel_button
"Evaluation already in progress. Please wait.", # results_output
None, # results_table
gr.update(visible=False) # results_table_container
]
# Update state to running
state["running"] = True
# Return updates for UI components
return [
state, # Updated state
gr.update(interactive=False), # subject_selection_mode
gr.update(interactive=False), # num_subjects_slider
gr.update(interactive=False), # specific_subjects
gr.update(interactive=False), # all_questions_checkbox
gr.update(interactive=False), # num_questions_slider
gr.update(interactive=False), # model1_dropdown
gr.update(interactive=False), # model2_dropdown
gr.update(interactive=False), # eval_button
gr.update(visible=True), # cancel_button
"Starting evaluation...", # results_output
None, # results_table
gr.update(visible=False) # results_table_container
]
def finish_evaluation(state):
"""
Updates state when evaluation finishes.
Args:
state (dict): Current evaluation state.
Returns:
dict: Updated state.
"""
state["running"] = False
return state
def cancel_evaluation(state):
"""
Re-enables UI components when evaluation is canceled.
Args:
state (dict): Current evaluation state.
Returns:
list: List of updated state and UI components.
"""
# Update state to not running
state["running"] = False
# Return updates for UI components
return [
state, # Updated state
gr.update(interactive=True), # subject_selection_mode
gr.update(interactive=True), # num_subjects_slider
gr.update(interactive=True), # specific_subjects
gr.update(interactive=True), # all_questions_checkbox
gr.update(interactive=True), # num_questions_slider
gr.update(interactive=True), # model1_dropdown
gr.update(interactive=True), # model2_dropdown
gr.update(interactive=True), # eval_button
gr.update(visible=False), # cancel_button
"⚠️ Evaluation canceled by user (note: backend process may continue running)", # results_output
None, # results_table
gr.update(visible=False) # results_table_container
]
def handle_evaluation_results(eval_results):
"""
Updates UI components based on evaluation results.
Args:
eval_results (dict): Results from evaluation.
Returns:
list: List of updated UI components.
"""
logging.info("Eval results: %s", eval_results)
if eval_results['success']:
return [
eval_results['report'], # results_output
eval_results['comparison_df'], # results_table
gr.update(interactive=True), # eval_button
gr.update(visible=False), # cancel_button
gr.update(interactive=True), # subject_selection_mode
gr.update(interactive=True), # num_subjects_slider
gr.update(interactive=True), # all_questions_checkbox
gr.update(interactive=True), # num_questions_slider
gr.update(interactive=True), # model1_dropdown
gr.update(visible=True) # results_table_container
]
else:
return [
eval_results['report'], # results_output
None, # results_table
gr.update(interactive=True), # eval_button
gr.update(visible=False), # cancel_button
gr.update(interactive=True), # subject_selection_mode
gr.update(interactive=True), # num_subjects_slider
gr.update(interactive=True), # all_questions_checkbox
gr.update(interactive=True), # num_questions_slider
gr.update(interactive=True), # model1_dropdown
gr.update(visible=False) # results_table_container
]