File size: 8,922 Bytes
4c36941
0b21fdd
4c36941
0b21fdd
6997696
0b21fdd
 
 
 
6997696
0b21fdd
6997696
0b21fdd
 
4c36941
e8d7a5b
940cf49
e8d7a5b
 
 
bd05b7b
c5224d3
3195f7f
0b21fdd
3195f7f
0b21fdd
 
 
df31ae3
0b21fdd
 
b03c5d3
0b21fdd
 
b03c5d3
0b21fdd
 
6896b10
0b21fdd
 
7bf88c1
0b21fdd
 
6896b10
0b21fdd
bd05b7b
fc5f4bd
bd05b7b
110a507
0b21fdd
110a507
0b21fdd
 
 
 
 
 
110a507
5ea0bec
0b21fdd
 
 
 
 
 
 
 
 
 
 
 
 
6896b10
 
2010e21
0b21fdd
2010e21
0b21fdd
 
 
 
 
 
 
 
 
 
 
5ea0bec
 
a5202a8
0b21fdd
a5202a8
0b21fdd
 
 
 
 
 
 
df31ae3
 
33231b0
0b21fdd
33231b0
0b21fdd
 
 
 
 
 
 
ed9a008
 
0b21fdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd05b7b
0b21fdd
33231b0
a033447
0b21fdd
bd05b7b
 
33231b0
bd05b7b
0b21fdd
 
 
 
 
 
 
 
 
 
 
 
33231b0
 
0b21fdd
a5202a8
 
 
 
 
0b21fdd
 
 
 
 
a5202a8
df31ae3
0b21fdd
 
 
 
 
 
 
 
 
 
 
 
 
a033447
61b4a88
e1b71fe
cec8405
61b4a88
33231b0
0b21fdd
 
 
 
 
 
 
 
 
 
33231b0
bd05b7b
 
 
 
33231b0
 
 
0b21fdd
33231b0
bd05b7b
33231b0
bd05b7b
0b21fdd
 
 
 
 
 
 
 
 
 
 
 
33231b0
df31ae3
ee60006
0b21fdd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import os
import gradio as gr
from huggingface_hub import login
from modules.ui_components import CSS, create_header, create_results_section, create_action_buttons
from configs.dataset_config import (create_dataset_section, create_mmlu_config_section, 
                                  load_dataset_preview, update_interface_based_on_dataset,
                                  toggle_preview, update_subject_selection_ui, 
                                  update_questions_interface, get_subject_mode_param,
                                  get_subject_names)
from configs.models_config import (create_model_config_section, update_eval_button_state,
                                      get_model_configs)
from run_evaluation import run_mmlu_evaluation
from utils.state_management import (start_evaluation, finish_evaluation, 
                                   cancel_evaluation, handle_evaluation_results)

# Read token and login
hf_token = os.getenv("HF_READ_WRITE_TOKEN")
if hf_token:
    login(hf_token)
else:
    print("⚠️ No HF_READ_WRITE_TOKEN found in environment")

# ---------------------------------------------------------------------------
# Gradio Interface
# ---------------------------------------------------------------------------
with gr.Blocks(css=CSS) as demo:
    # Create header
    header_components = create_header()
    
    # Create dataset section (Section A)
    dataset_components = create_dataset_section()
    
    # Create MMLU config section (Section B)
    mmlu_components = create_mmlu_config_section()
    
    # Create model config section (Section C)
    model_components = create_model_config_section()
    
    # Create results section
    results_components = create_results_section()
    
    # Create action buttons
    action_components = create_action_buttons()
    
    # State tracking
    evaluation_state = gr.State({"running": False})
    preview_visibility = gr.State(False)
    
    # Connect dataset dropdown to show/hide appropriate configuration and load preview data
    dataset_components['dropdown'].change(
        fn=load_dataset_preview,
        inputs=[dataset_components['dropdown']],
        outputs=[
            dataset_components['preview_data_state'], 
            mmlu_components['specific_subjects'], 
            mmlu_components['num_subjects_slider']
        ],
    ).then(
        fn=update_interface_based_on_dataset,
        inputs=[
            dataset_components['dropdown'], 
            preview_visibility
        ],
        outputs=[
            mmlu_components['container'], 
            model_components['container'],
            results_components['container'], 
            dataset_components['preview_toggle'], 
            dataset_components['preview_container'], 
            preview_visibility, 
            dataset_components['preview_toggle']
        ]
    )
    
    # Connect preview toggle to show/hide dataset information
    dataset_components['preview_toggle'].click(
        fn=toggle_preview,
        inputs=[
            dataset_components['dropdown'], 
            preview_visibility, 
            dataset_components['preview_data_state']
        ],
        outputs=[
            preview_visibility, 
            dataset_components['preview_container'], 
            dataset_components['preview_output'], 
            dataset_components['preview_toggle']
        ]
    )
    
    # Connect subject selection mode to UI updates
    mmlu_components['subject_selection_mode'].change(
        fn=update_subject_selection_ui,
        inputs=[
            mmlu_components['subject_selection_mode']
        ],
        outputs=[
            mmlu_components['num_subjects_container'], 
            mmlu_components['specific_subjects_container']
        ]
    )
    
    # Update interface based on all_questions checkbox
    mmlu_components['all_questions_checkbox'].change(
        fn=update_questions_interface,
        inputs=[
            mmlu_components['all_questions_checkbox']
        ],
        outputs=[
            mmlu_components['num_questions_slider'], 
            mmlu_components['questions_info_text']
        ]
    )
    
    # Connect model config changes to validation
    for component in [
        model_components['model1_dropdown'],
        model_components['model1_shots'], 
        model_components['model1_regex'], 
        model_components['model1_flash_attn'],
        model_components['model2_dropdown'],
        model_components['model2_shots'], 
        model_components['model2_regex'],
        model_components['model2_flash_attn']
    ]:
        component.change(
            fn=update_eval_button_state,
            inputs=[
                model_components['model1_dropdown'],
                model_components['model1_shots'],
                model_components['model1_regex'],
                model_components['model1_flash_attn'],
                model_components['model2_dropdown'],
                model_components['model2_shots'],
                model_components['model2_regex'],
                model_components['model2_flash_attn']
            ],
            outputs=[
                model_components['error_message'], 
                action_components['eval_button']
            ]
        )
    
        # Connect evaluation button with state tracking
    action_components['eval_button'].click(
        fn=start_evaluation,
        inputs=[evaluation_state],
        outputs=[
            evaluation_state,
            mmlu_components['subject_selection_mode'],
            mmlu_components['num_subjects_slider'],
            mmlu_components['specific_subjects'],
            mmlu_components['all_questions_checkbox'],
            mmlu_components['num_questions_slider'],
            model_components['model1_dropdown'],
            model_components['model2_dropdown'],
            action_components['eval_button'],
            action_components['cancel_button'],
            results_components['output'],
            results_components['table'],
            results_components['table_container']
        ]
    ).then(
        fn=lambda mode, num, subjects, all_q, num_q, m1, m1_shots, m1_regex, m1_flash, m2, m2_shots, m2_regex, m2_flash: 
            run_mmlu_evaluation(
                get_subject_mode_param(mode),
                num,
                get_subject_names(subjects),
                all_q,
                num_q,
                get_model_configs(
                    m1, m1_shots, m1_regex, m1_flash,
                    m2, m2_shots, m2_regex, m2_flash
                )
            ),
        inputs=[
            mmlu_components['subject_selection_mode'],
            mmlu_components['num_subjects_slider'],
            mmlu_components['specific_subjects'],
            mmlu_components['all_questions_checkbox'],
            mmlu_components['num_questions_slider'],
            model_components['model1_dropdown'],
            model_components['model1_shots'],
            model_components['model1_regex'],
            model_components['model1_flash_attn'],
            model_components['model2_dropdown'],
            model_components['model2_shots'],
            model_components['model2_regex'],
            model_components['model2_flash_attn']
        ],
        outputs=[results_components['tmp_data']]
    ).then(
        fn=handle_evaluation_results,
        inputs=[results_components['tmp_data']],  # Change: Pass the dictionary to handle_evaluation_results
        outputs=[
            results_components['output'],
            results_components['table'],
            action_components['eval_button'], 
            action_components['cancel_button'],
            mmlu_components['subject_selection_mode'],
            mmlu_components['num_subjects_slider'],
            mmlu_components['all_questions_checkbox'],
            mmlu_components['num_questions_slider'],
            model_components['model1_dropdown'],
            results_components['table_container']
        ]
    ).then(
        fn=finish_evaluation,
        inputs=[evaluation_state],
        outputs=[evaluation_state]
    )
    
    # Connect cancel button
    action_components['cancel_button'].click(
        fn=cancel_evaluation,
        inputs=[evaluation_state],
        outputs=[
            evaluation_state,
            mmlu_components['subject_selection_mode'],
            mmlu_components['num_subjects_slider'],
            mmlu_components['specific_subjects'],
            mmlu_components['all_questions_checkbox'],
            mmlu_components['num_questions_slider'],
            model_components['model1_dropdown'],
            model_components['model2_dropdown'],
            action_components['eval_button'],
            action_components['cancel_button'],
            results_components['output'],
            results_components['table'],
            results_components['table_container']
        ]
    )

if __name__ == "__main__":
    demo.launch()