Spaces:
Sleeping
Sleeping
File size: 6,684 Bytes
24af1c0 4114529 24af1c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import gradio as gr
# Available models
AVAILABLE_MODELS = [
"(Select Model)",
"mistralai/Mistral-7B-v0.1",
]
def create_model_config_section():
"""
Creates the "Head to Head - Choose Models" section with two model configurations side by side.
Returns the components needed for the main app.
"""
with gr.Column() as model_config_container:
gr.Markdown("## (C) Head to Head - Choose Models to evaluate against each other")
with gr.Row():
# Left column - Model 1 configuration
with gr.Column(scale=1) as model1_column:
with gr.Group(elem_classes=["config-box"]):
gr.Markdown("### Model 1")
model1_dropdown = gr.Dropdown(
choices=AVAILABLE_MODELS,
value="(Select Model)",
label="Select Model 1",
info="Choose the first model for head-to-head comparison"
)
model1_shots = gr.Slider(
minimum=0,
maximum=5,
value=5,
step=1,
label="Number of Few-shot Examples",
info="Number of examples to use for few-shot learning (0-5)"
)
model1_regex = gr.Textbox(
label="Regex Pattern",
placeholder="Optional: Apply regex pattern to model outputs",
info="Leave empty for no regex pattern"
)
model1_flash_attn = gr.Checkbox(
label="Use FlashAttention",
value=True,
info="Use FlashAttention for better performance (if supported by model)"
)
# Divider in the middle
with gr.Column(scale=0.1):
gr.Markdown('<div style="border-left: 1px solid #ddd; height: 100%;"></div>', elem_classes=["center-divider"])
# Right column - Model 2 configuration
with gr.Column(scale=1) as model2_column:
with gr.Group(elem_classes=["config-box"]):
gr.Markdown("### Model 2")
model2_dropdown = gr.Dropdown(
choices=AVAILABLE_MODELS,
value="(Select Model)",
label="Select Model 2",
info="Choose the second model for head-to-head comparison"
)
model2_shots = gr.Slider(
minimum=0,
maximum=5,
value=5,
step=1,
label="Number of Few-shot Examples",
info="Number of examples to use for few-shot learning (0-5)"
)
model2_regex = gr.Textbox(
label="Regex Pattern",
placeholder="Optional: Apply regex pattern to model outputs",
info="Leave empty for no regex pattern"
)
model2_flash_attn = gr.Checkbox(
label="Use FlashAttention",
value=True,
info="Use FlashAttention for better performance (if supported by model)"
)
# Error message area - initially hidden
model_config_error = gr.Markdown(
visible=False,
value="⚠️ **Error**: Both models and configurations are identical. Please select different models or configurations for comparison.",
elem_classes=["error-message"]
)
return {
'container': model_config_container,
'model1_dropdown': model1_dropdown,
'model1_shots': model1_shots,
'model1_regex': model1_regex,
'model1_flash_attn': model1_flash_attn,
'model2_dropdown': model2_dropdown,
'model2_shots': model2_shots,
'model2_regex': model2_regex,
'model2_flash_attn': model2_flash_attn,
'error_message': model_config_error
}
def validate_model_configs(model1, model1_shots, model1_regex, model1_flash,
model2, model2_shots, model2_regex, model2_flash):
"""
Validates that the two model configurations are not identical.
Returns:
- bool: Whether the configurations are valid (not identical)
- str: Error message if invalid, otherwise empty string
"""
if model1 == "(Select Model)" or model2 == "(Select Model)":
return True, ""
# Check if models and all configs are identical
if (model1 == model2 and
model1_shots == model2_shots and
model1_regex == model2_regex and
model1_flash == model2_flash):
return False, "⚠️ **Error**: Both configurations are identical. Please select different configurations (e.g., number of few-shot examples) for comparison."
return True, ""
def update_eval_button_state(model1, model1_shots, model1_regex, model1_flash,
model2, model2_shots, model2_regex, model2_flash):
"""
Checks model configurations and updates the error message visibility and eval button state.
"""
is_valid, error_msg = validate_model_configs(
model1, model1_shots, model1_regex, model1_flash,
model2, model2_shots, model2_regex, model2_flash
)
if model1 == "(Select Model)" or model2 == "(Select Model)":
return gr.update(visible=False), gr.update(interactive=False)
if not is_valid:
return gr.update(visible=True, value=error_msg), gr.update(interactive=False)
return gr.update(visible=False), gr.update(interactive=True)
def get_model_configs(model1, model1_shots, model1_regex, model1_flash,
model2, model2_shots, model2_regex, model2_flash):
"""
Returns the model configurations as structured data for the evaluation function.
"""
return {
"model1": {
"name": model1,
"shots": model1_shots,
"regex": model1_regex,
"flash_attention": model1_flash
},
"model2": {
"name": model2,
"shots": model2_shots,
"regex": model2_regex,
"flash_attention": model2_flash
}
} |