H2H-eval-comparator / configs /models_config.py
rohansampath's picture
Update configs/models_config.py
4114529 verified
import gradio as gr
# Available models
AVAILABLE_MODELS = [
"(Select Model)",
"mistralai/Mistral-7B-v0.1",
]
def create_model_config_section():
"""
Creates the "Head to Head - Choose Models" section with two model configurations side by side.
Returns the components needed for the main app.
"""
with gr.Column() as model_config_container:
gr.Markdown("## (C) Head to Head - Choose Models to evaluate against each other")
with gr.Row():
# Left column - Model 1 configuration
with gr.Column(scale=1) as model1_column:
with gr.Group(elem_classes=["config-box"]):
gr.Markdown("### Model 1")
model1_dropdown = gr.Dropdown(
choices=AVAILABLE_MODELS,
value="(Select Model)",
label="Select Model 1",
info="Choose the first model for head-to-head comparison"
)
model1_shots = gr.Slider(
minimum=0,
maximum=5,
value=5,
step=1,
label="Number of Few-shot Examples",
info="Number of examples to use for few-shot learning (0-5)"
)
model1_regex = gr.Textbox(
label="Regex Pattern",
placeholder="Optional: Apply regex pattern to model outputs",
info="Leave empty for no regex pattern"
)
model1_flash_attn = gr.Checkbox(
label="Use FlashAttention",
value=True,
info="Use FlashAttention for better performance (if supported by model)"
)
# Divider in the middle
with gr.Column(scale=0.1):
gr.Markdown('<div style="border-left: 1px solid #ddd; height: 100%;"></div>', elem_classes=["center-divider"])
# Right column - Model 2 configuration
with gr.Column(scale=1) as model2_column:
with gr.Group(elem_classes=["config-box"]):
gr.Markdown("### Model 2")
model2_dropdown = gr.Dropdown(
choices=AVAILABLE_MODELS,
value="(Select Model)",
label="Select Model 2",
info="Choose the second model for head-to-head comparison"
)
model2_shots = gr.Slider(
minimum=0,
maximum=5,
value=5,
step=1,
label="Number of Few-shot Examples",
info="Number of examples to use for few-shot learning (0-5)"
)
model2_regex = gr.Textbox(
label="Regex Pattern",
placeholder="Optional: Apply regex pattern to model outputs",
info="Leave empty for no regex pattern"
)
model2_flash_attn = gr.Checkbox(
label="Use FlashAttention",
value=True,
info="Use FlashAttention for better performance (if supported by model)"
)
# Error message area - initially hidden
model_config_error = gr.Markdown(
visible=False,
value="⚠️ **Error**: Both models and configurations are identical. Please select different models or configurations for comparison.",
elem_classes=["error-message"]
)
return {
'container': model_config_container,
'model1_dropdown': model1_dropdown,
'model1_shots': model1_shots,
'model1_regex': model1_regex,
'model1_flash_attn': model1_flash_attn,
'model2_dropdown': model2_dropdown,
'model2_shots': model2_shots,
'model2_regex': model2_regex,
'model2_flash_attn': model2_flash_attn,
'error_message': model_config_error
}
def validate_model_configs(model1, model1_shots, model1_regex, model1_flash,
model2, model2_shots, model2_regex, model2_flash):
"""
Validates that the two model configurations are not identical.
Returns:
- bool: Whether the configurations are valid (not identical)
- str: Error message if invalid, otherwise empty string
"""
if model1 == "(Select Model)" or model2 == "(Select Model)":
return True, ""
# Check if models and all configs are identical
if (model1 == model2 and
model1_shots == model2_shots and
model1_regex == model2_regex and
model1_flash == model2_flash):
return False, "⚠️ **Error**: Both configurations are identical. Please select different configurations (e.g., number of few-shot examples) for comparison."
return True, ""
def update_eval_button_state(model1, model1_shots, model1_regex, model1_flash,
model2, model2_shots, model2_regex, model2_flash):
"""
Checks model configurations and updates the error message visibility and eval button state.
"""
is_valid, error_msg = validate_model_configs(
model1, model1_shots, model1_regex, model1_flash,
model2, model2_shots, model2_regex, model2_flash
)
if model1 == "(Select Model)" or model2 == "(Select Model)":
return gr.update(visible=False), gr.update(interactive=False)
if not is_valid:
return gr.update(visible=True, value=error_msg), gr.update(interactive=False)
return gr.update(visible=False), gr.update(interactive=True)
def get_model_configs(model1, model1_shots, model1_regex, model1_flash,
model2, model2_shots, model2_regex, model2_flash):
"""
Returns the model configurations as structured data for the evaluation function.
"""
return {
"model1": {
"name": model1,
"shots": model1_shots,
"regex": model1_regex,
"flash_attention": model1_flash
},
"model2": {
"name": model2,
"shots": model2_shots,
"regex": model2_regex,
"flash_attention": model2_flash
}
}