H2H-eval-comparator

Sleeping

App Files Files Community

rohansampath commited on Feb 26

Commit

24af1c0

verified ·

1 Parent(s): c9f6bdd

Create app_models_config.py

Browse files

Files changed (1) hide show

app_models_config.py +167 -0

app_models_config.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import gradio as gr
+# Available models
+AVAILABLE_MODELS = [
+    "(Select Model)",
+    "mistralai/Mistral-7B-v0.1",
+    "meta-llama/Llama-2-7b-hf",
+    "google/flan-t5-xxl",
+    "tiiuae/falcon-7b"
+]
+def create_model_config_section():
+    """
+    Creates the "Head to Head - Choose Models" section with two model configurations side by side.
+    Returns the components needed for the main app.
+    """
+    with gr.Column() as model_config_container:
+        gr.Markdown("## (C) Head to Head - Choose Models to evaluate against each other")
+        with gr.Row():
+            # Left column - Model 1 configuration
+            with gr.Column(scale=1) as model1_column:
+                with gr.Group(elem_classes=["config-box"]):
+                    gr.Markdown("### Model 1")
+                    model1_dropdown = gr.Dropdown(
+                        choices=AVAILABLE_MODELS,
+                        value="(Select Model)",
+                        label="Select Model 1",
+                        info="Choose the first model for head-to-head comparison"
+                    )
+                    model1_shots = gr.Slider(
+                        minimum=0,
+                        maximum=5,
+                        value=5,
+                        step=1,
+                        label="Number of Few-shot Examples",
+                        info="Number of examples to use for few-shot learning (0-5)"
+                    )
+                    model1_regex = gr.Textbox(
+                        label="Regex Pattern",
+                        placeholder="Optional: Apply regex pattern to model outputs",
+                        info="Leave empty for no regex pattern"
+                    )
+                    model1_flash_attn = gr.Checkbox(
+                        label="Use FlashAttention",
+                        value=True,
+                        info="Use FlashAttention for better performance (if supported by model)"
+                    )
+            # Divider in the middle
+            with gr.Column(scale=0.1):
+                gr.Markdown('<div style="border-left: 1px solid #ddd; height: 100%;"></div>', elem_classes=["center-divider"])
+            # Right column - Model 2 configuration
+            with gr.Column(scale=1) as model2_column:
+                with gr.Group(elem_classes=["config-box"]):
+                    gr.Markdown("### Model 2")
+                    model2_dropdown = gr.Dropdown(
+                        choices=AVAILABLE_MODELS,
+                        value="(Select Model)",
+                        label="Select Model 2",
+                        info="Choose the second model for head-to-head comparison"
+                    )
+                    model2_shots = gr.Slider(
+                        minimum=0,
+                        maximum=5,
+                        value=5,
+                        step=1,
+                        label="Number of Few-shot Examples",
+                        info="Number of examples to use for few-shot learning (0-5)"
+                    )
+                    model2_regex = gr.Textbox(
+                        label="Regex Pattern",
+                        placeholder="Optional: Apply regex pattern to model outputs",
+                        info="Leave empty for no regex pattern"
+                    )
+                    model2_flash_attn = gr.Checkbox(
+                        label="Use FlashAttention",
+                        value=True,
+                        info="Use FlashAttention for better performance (if supported by model)"
+                    )
+        # Error message area - initially hidden
+        model_config_error = gr.Markdown(
+            visible=False,
+            value="⚠️ **Error**: Both models and configurations are identical. Please select different models or configurations for comparison.",
+            elem_classes=["error-message"]
+        )
+    return {
+        'container': model_config_container,
+        'model1_dropdown': model1_dropdown,
+        'model1_shots': model1_shots,
+        'model1_regex': model1_regex,
+        'model1_flash_attn': model1_flash_attn,
+        'model2_dropdown': model2_dropdown,
+        'model2_shots': model2_shots,
+        'model2_regex': model2_regex,
+        'model2_flash_attn': model2_flash_attn,
+        'error_message': model_config_error
+    }
+def validate_model_configs(model1, model1_shots, model1_regex, model1_flash,
+                          model2, model2_shots, model2_regex, model2_flash):
+    """
+    Validates that the two model configurations are not identical.
+    Returns:
+    - bool: Whether the configurations are valid (not identical)
+    - str: Error message if invalid, otherwise empty string
+    """
+    if model1 == "(Select Model)" or model2 == "(Select Model)":
+        return True, ""
+    # Check if models and all configs are identical
+    if (model1 == model2 and
+        model1_shots == model2_shots and
+        model1_regex == model2_regex and
+        model1_flash == model2_flash):
+        return False, "⚠️ **Error**: Both models and configurations are identical. Please select different models or configurations for comparison."
+    return True, ""
+def update_eval_button_state(model1, model1_shots, model1_regex, model1_flash,
+                            model2, model2_shots, model2_regex, model2_flash):
+    """
+    Checks model configurations and updates the error message visibility and eval button state.
+    """
+    is_valid, error_msg = validate_model_configs(
+        model1, model1_shots, model1_regex, model1_flash,
+        model2, model2_shots, model2_regex, model2_flash
+    )
+    if model1 == "(Select Model)" or model2 == "(Select Model)":
+        return gr.update(visible=False), gr.update(interactive=False)
+    if not is_valid:
+        return gr.update(visible=True, value=error_msg), gr.update(interactive=False)
+    return gr.update(visible=False), gr.update(interactive=True)
+def get_model_configs(model1, model1_shots, model1_regex, model1_flash,
+                     model2, model2_shots, model2_regex, model2_flash):
+    """
+    Returns the model configurations as structured data for the evaluation function.
+    """
+    return {
+        "model1": {
+            "name": model1,
+            "shots": model1_shots,
+            "regex": model1_regex,
+            "flash_attention": model1_flash
+        },
+        "model2": {
+            "name": model2,
+            "shots": model2_shots,
+            "regex": model2_regex,
+            "flash_attention": model2_flash
+        }
+    }