Spaces:

zhangchenxu
/

TinyV

Sleeping

App Files Files Community

zhangchenxu commited on Apr 14

Commit

fbc8f04

1 Parent(s): 8459fca

update

Browse files

Files changed (1) hide show

app.py +133 -38

app.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-# Initialize the client
 client = InferenceClient("zhangchenxu/TinyV-1.5B")
 LV_PROMPT = """
 You are an AI tasked with identifying false negatives in answer verification. A false negative occurs when a model's answer is essentially correct but is marked as incorrect due to minor discrepancies or formatting issues. Your job is to analyze the given question, ground truth answer, and model answer to determine if the model's answer is actually correct despite appearing different from the ground truth.
@@ -25,7 +27,7 @@ EXAMPLES = [
         "model_answer": "-10, -8, -6",
         "temp": 0.3,
         "top_p": 0.95,
-        "tokens": 2
     },
     {
         "name": "Latex Expression",
@@ -34,7 +36,7 @@ EXAMPLES = [
         "model_answer": "4/7",
         "temp": 0.3,
         "top_p": 0.95,
-        "tokens": 2
     },
     {
         "name": "Variable Labeling",
@@ -43,7 +45,7 @@ EXAMPLES = [
         "model_answer": "b=-3, c=0",
         "temp": 0.3,
         "top_p": 0.95,
-        "tokens": 2
     },
     {
         "name": "Paraphrase",
@@ -52,7 +54,7 @@ EXAMPLES = [
         "model_answer": "Yes, Peter can guarantee finding 5 genuine coins while ensuring that none of these genuine coins are paid to Vasya.",
         "temp": 0.3,
         "top_p": 0.95,
-        "tokens": 2
     },
     {
         "name": "False Example",
@@ -61,19 +63,27 @@ EXAMPLES = [
         "model_answer": "K2 is the tallest mountain on Earth.",
         "temp": 0.3,
         "top_p": 0.95,
-        "tokens": 2
     }
 ]
 def verify_answer(question, ground_truth, model_answer, temperature, top_p, max_tokens):
     prompt = LV_PROMPT.format(
         question=question,
         ground_truth=ground_truth,
         model_answer=model_answer
     )
     messages = [{"role": "user", "content": prompt}]
     response_text = ""
     try:
         for message in client.chat_completion(
             messages,
             max_tokens=max_tokens,
@@ -88,6 +98,7 @@ def verify_answer(question, ground_truth, model_answer, temperature, top_p, max_
     except Exception as e:
         yield f"Error: {str(e)}"
 def load_example(example_index):
     example = EXAMPLES[example_index]
     return (
@@ -99,44 +110,128 @@ def load_example(example_index):
         example["tokens"]
     )
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("## 🧠 TinyV - Answer Verification Tool\nThis tool verifies model-generated answers for correctness.")
-    # ✅ Define sliders first so they can be referenced later
-    temperature = gr.Slider(0, 1, value=0.3, step=0.1, label="Temperature", visible=False)
-    top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p", visible=False)
-    max_tokens = gr.Slider(1, 128, value=2, step=1, label="Max Tokens", visible=False)
-    with gr.Row():
-        with gr.Column(scale=1):
-            question = gr.Textbox(lines=3, label="📘 Question")
-            ground_truth = gr.Textbox(lines=3, label="✅ Ground Truth Answer")
-            model_answer = gr.Textbox(lines=3, label="🤖 Model Answer")
-            gr.Markdown("### 🔍 Try Examples:")
             with gr.Row():
-                for i, ex in enumerate(EXAMPLES):
-                    btn = gr.Button(ex["name"], size="sm")
-                    btn.click(
-                        fn=lambda idx=i: load_example(idx),
-                        outputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens]
-                    )
-        with gr.Column(scale=1):
-            with gr.Accordion("⚙️ Advanced Settings", open=False):
-                temperature.visible = True
-                top_p.visible = True
-                max_tokens.visible = True
-            verify_btn = gr.Button("✅ Verify Answer", variant="primary")
-            result = gr.Textbox(label="🧾 Verification Result", lines=5, placeholder="Result will appear here...")
     verify_btn.click(
         fn=verify_answer,
         inputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens],
         outputs=result
     )
 demo.queue()
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+import time
+# Initialize the client with your model
 client = InferenceClient("zhangchenxu/TinyV-1.5B")
+# The prompt template for the LLM verifier
 LV_PROMPT = """
 You are an AI tasked with identifying false negatives in answer verification. A false negative occurs when a model's answer is essentially correct but is marked as incorrect due to minor discrepancies or formatting issues. Your job is to analyze the given question, ground truth answer, and model answer to determine if the model's answer is actually correct despite appearing different from the ground truth.
         "model_answer": "-10, -8, -6",
         "temp": 0.3,
         "top_p": 0.95,
+        "tokens": 1
     },
     {
         "name": "Latex Expression",
         "model_answer": "4/7",
         "temp": 0.3,
         "top_p": 0.95,
+        "tokens": 1
     },
     {
         "name": "Variable Labeling",
         "model_answer": "b=-3, c=0",
         "temp": 0.3,
         "top_p": 0.95,
+        "tokens": 1
     },
     {
         "name": "Paraphrase",
         "model_answer": "Yes, Peter can guarantee finding 5 genuine coins while ensuring that none of these genuine coins are paid to Vasya.",
         "temp": 0.3,
         "top_p": 0.95,
+        "tokens": 1
     },
     {
         "name": "False Example",
         "model_answer": "K2 is the tallest mountain on Earth.",
         "temp": 0.3,
         "top_p": 0.95,
+        "tokens": 1
     }
 ]
+# Verification function
 def verify_answer(question, ground_truth, model_answer, temperature, top_p, max_tokens):
+    # Format the prompt with user inputs
     prompt = LV_PROMPT.format(
         question=question,
         ground_truth=ground_truth,
         model_answer=model_answer
     )
+    # Prepare messages for the API
     messages = [{"role": "user", "content": prompt}]
+    # Initialize response
     response_text = ""
     try:
+        # Stream the response for better UX
         for message in client.chat_completion(
             messages,
             max_tokens=max_tokens,
     except Exception as e:
         yield f"Error: {str(e)}"
+# Function to load an example
 def load_example(example_index):
     example = EXAMPLES[example_index]
     return (
         example["tokens"]
     )
+# Create the Gradio interface
+with gr.Blocks(
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"]
+    ),
+    css="""
+    .container { max-width: 1000px; margin: auto; }
+    .example-btn { min-width: 140px; }
+    .title { text-align: center; margin-bottom: 1rem; }
+    .result-box { min-height: 100px; }
+    """
+) as demo:
+    # Header
+    with gr.Group(elem_classes="container"):
+        gr.Markdown(
+            """
+            # 🧠 TinyV - Answer Verification Tool
+            This tool verifies if a model-generated answer is semantically correct compared to a ground truth answer using a fine-tuned LLM.
+            """,
+            elem_classes="title"
+        )
+        # Main input area
+        with gr.Row(equal_height=True):
+            # Left column - Inputs
+            with gr.Column():
+                question = gr.Textbox(
+                    lines=3,
+                    label="📘 Question",
+                    placeholder="Enter the question here..."
+                )
+                ground_truth = gr.Textbox(
+                    lines=3,
+                    label="✅ Ground Truth Answer",
+                    placeholder="Enter the correct answer here..."
+                )
+                model_answer = gr.Textbox(
+                    lines=3,
+                    label="🤖 Model Answer",
+                    placeholder="Enter the answer to verify here..."
+                )
+            # Right column - Result and verification button
+            with gr.Column():
+                verify_btn = gr.Button("✅ Verify Answer", variant="primary", size="lg")
+                result = gr.Textbox(
+                    label="🧾 Verification Result",
+                    placeholder="Result will appear here...",
+                    lines=9,
+                    elem_classes="result-box"
+                )
+        # Examples section
+        gr.Markdown("### 🔍 Try Examples:")
+        with gr.Row() as example_row:
+            for i, ex in enumerate(EXAMPLES):
+                btn = gr.Button(ex["name"], size="sm", elem_classes="example-btn")
+                btn.click(
+                    fn=lambda idx=i: load_example(idx),
+                    outputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens]
+                )
+                # Also run verification when example is loaded
+                btn.click(
+                    fn=verify_answer,
+                    inputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens],
+                    outputs=result,
+                    queue=False
+                )
+        # Advanced Settings (hidden at the bottom)
+        with gr.Accordion("⚙️ Advanced Settings", open=False):
             with gr.Row():
+                temperature = gr.Slider(0, 1, value=0.3, step=0.1, label="Temperature")
+                top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+                max_tokens = gr.Slider(1, 128, value=1, step=1, label="Max Tokens")
+        # About section
+        with gr.Accordion("ℹ️ About This Tool", open=False):
+            gr.Markdown(
+                """
+                ### What This Tool Does
+                This verification tool uses the TinyV-1.5B model to determine if answers are semantically equivalent,
+                even when they have different:
+                - **Formatting** (LaTeX vs. plain text, spacing, etc.)
+                - **Ordering** (e.g., listing items in different orders)
+                - **Phrasing** (paraphrases with the same meaning)
+                - **Variable labeling** (with or without variable names)
+                ### API Usage
+                ```python
+                from gradio_client import Client
+                client = Client("zhangchenxu/TinyV")
+                result = client.predict(
+                    question="What is the capital of France?",
+                    ground_truth="The capital of France is Paris.",
+                    model_answer="Paris is the capital of France.",
+                    temperature=0.3,
+                    top_p=0.95,
+                    max_tokens=2,
+                    api_name="/verify_answer"
+                )
+                print(result)
+                ```
+                """
+            )
+    # Connect the interface to the verification function
     verify_btn.click(
         fn=verify_answer,
         inputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens],
         outputs=result
     )
+# Define the public API
 demo.queue()
+# Launch the app
 if __name__ == "__main__":
+    demo.launch()