File size: 5,009 Bytes
528d9fb
b369d74
528d9fb
ff7943b
 
2061520
528d9fb
 
 
 
 
 
 
 
 
 
 
 
2061520
 
 
 
 
 
 
 
 
528d9fb
2061520
 
528d9fb
 
 
 
 
2061520
 
528d9fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2061520
528d9fb
 
 
 
 
 
 
 
 
 
 
 
2061520
 
528d9fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2061520
528d9fb
 
 
 
 
 
 
 
 
ff7943b
528d9fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2061520
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
import logging

logging.basicConfig(level=logging.INFO)

def start_evaluation(state):
    """
    Disables UI components when evaluation starts.
    
    Args:
        state (dict): Current evaluation state.
    
    Returns:
        list: List of updated state and UI components.
    """
    if state["running"]:
        return [
            state,
            gr.update(interactive=False),  # subject_selection_mode
            gr.update(interactive=False),  # num_subjects_slider
            gr.update(interactive=False),  # specific_subjects
            gr.update(interactive=False),  # all_questions_checkbox
            gr.update(interactive=False),  # num_questions_slider
            gr.update(interactive=False),  # model1_dropdown
            gr.update(interactive=False),  # model2_dropdown
            gr.update(interactive=False),  # eval_button
            gr.update(visible=True),       # cancel_button
            "Evaluation already in progress. Please wait.",  # results_output
            None,                          # results_table
            gr.update(visible=False)       # results_table_container
        ]
    
    # Update state to running
    state["running"] = True
    
    # Return updates for UI components
    return [
        state,  # Updated state
        gr.update(interactive=False),  # subject_selection_mode
        gr.update(interactive=False),  # num_subjects_slider
        gr.update(interactive=False),  # specific_subjects
        gr.update(interactive=False),  # all_questions_checkbox
        gr.update(interactive=False),  # num_questions_slider
        gr.update(interactive=False),  # model1_dropdown
        gr.update(interactive=False),  # model2_dropdown
        gr.update(interactive=False),  # eval_button
        gr.update(visible=True),       # cancel_button
        "Starting evaluation...",      # results_output
        None,                          # results_table
        gr.update(visible=False)       # results_table_container
    ]

def finish_evaluation(state):
    """
    Updates state when evaluation finishes.
    
    Args:
        state (dict): Current evaluation state.
    
    Returns:
        dict: Updated state.
    """
    state["running"] = False
    return state

def cancel_evaluation(state):
    """
    Re-enables UI components when evaluation is canceled.
    
    Args:
        state (dict): Current evaluation state.
    
    Returns:
        list: List of updated state and UI components.
    """
    # Update state to not running
    state["running"] = False
    
    # Return updates for UI components
    return [
        state,  # Updated state
        gr.update(interactive=True),  # subject_selection_mode
        gr.update(interactive=True),  # num_subjects_slider
        gr.update(interactive=True),  # specific_subjects
        gr.update(interactive=True),  # all_questions_checkbox
        gr.update(interactive=True),  # num_questions_slider
        gr.update(interactive=True),  # model1_dropdown
        gr.update(interactive=True),  # model2_dropdown
        gr.update(interactive=True),  # eval_button
        gr.update(visible=False),     # cancel_button
        "⚠️ Evaluation canceled by user (note: backend process may continue running)",  # results_output
        None,                         # results_table
        gr.update(visible=False)      # results_table_container
    ]

def handle_evaluation_results(eval_results):
    """
    Updates UI components based on evaluation results.
    
    Args:
        eval_results (dict): Results from evaluation.
    
    Returns:
        list: List of updated UI components.
    """
    logging.info("Eval results: %s", eval_results)
    if eval_results['success']:
        return [
            eval_results['report'],  # results_output
            eval_results['comparison_df'],  # results_table
            gr.update(interactive=True),  # eval_button
            gr.update(visible=False),  # cancel_button
            gr.update(interactive=True),  # subject_selection_mode
            gr.update(interactive=True),  # num_subjects_slider
            gr.update(interactive=True),  # all_questions_checkbox
            gr.update(interactive=True),  # num_questions_slider
            gr.update(interactive=True),  # model1_dropdown
            gr.update(visible=True)  # results_table_container
        ]
    else:
        return [
            eval_results['report'],  # results_output
            None,  # results_table
            gr.update(interactive=True),  # eval_button
            gr.update(visible=False),  # cancel_button
            gr.update(interactive=True),  # subject_selection_mode
            gr.update(interactive=True),  # num_subjects_slider
            gr.update(interactive=True),  # all_questions_checkbox
            gr.update(interactive=True),  # num_questions_slider
            gr.update(interactive=True),  # model1_dropdown
            gr.update(visible=False)  # results_table_container
        ]