zhangchenxu commited on
Commit
5852c53
Β·
1 Parent(s): b17581e
Files changed (1) hide show
  1. app.py +25 -104
app.py CHANGED
@@ -1,11 +1,9 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import time
4
 
5
- # Initialize the client with your model
6
  client = InferenceClient("zhangchenxu/TinyV-1.5B")
7
 
8
- # The prompt template for the LLM verifier
9
  LV_PROMPT = """
10
  You are an AI tasked with identifying false negatives in answer verification. A false negative occurs when a model's answer is essentially correct but is marked as incorrect due to minor discrepancies or formatting issues. Your job is to analyze the given question, ground truth answer, and model answer to determine if the model's answer is actually correct despite appearing different from the ground truth.
11
 
@@ -67,25 +65,15 @@ EXAMPLES = [
67
  }
68
  ]
69
 
70
- # Main verification function
71
  def verify_answer(question, ground_truth, model_answer, temperature, top_p, max_tokens):
72
- # Format the prompt with user inputs
73
  prompt = LV_PROMPT.format(
74
  question=question,
75
  ground_truth=ground_truth,
76
  model_answer=model_answer
77
  )
78
-
79
- # Prepare the message format required by the API
80
- messages = [
81
- {"role": "user", "content": prompt}
82
- ]
83
-
84
- # Initialize response
85
  response_text = ""
86
-
87
  try:
88
- # Stream the response for better UX
89
  for message in client.chat_completion(
90
  messages,
91
  max_tokens=max_tokens,
@@ -100,7 +88,6 @@ def verify_answer(question, ground_truth, model_answer, temperature, top_p, max_
100
  except Exception as e:
101
  yield f"Error: {str(e)}"
102
 
103
- # Function to load an example when its button is clicked
104
  def load_example(example_index):
105
  example = EXAMPLES[example_index]
106
  return (
@@ -112,108 +99,42 @@ def load_example(example_index):
112
  example["tokens"]
113
  )
114
 
115
- # Create the Gradio interface
116
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"]), title="TinyV") as demo:
117
- # Header with title and description
118
- with gr.Row():
119
- with gr.Column():
120
- gr.Markdown(
121
- """
122
- # TinyV - Answer Verification Tool
123
-
124
- This tool verifies if an answer is correct compared to a ground truth answer for RL.
125
- """
126
- )
127
 
128
- # Main interface
129
  with gr.Row():
130
  with gr.Column(scale=1):
131
- gr.Markdown(
132
- """
133
- ## How to Use
134
-
135
- 1. Enter the question in the first box
136
- 2. Enter the ground truth answer
137
- 3. Enter the model's answer to verify
138
- 4. Adjust model parameters if needed
139
- 5. Click "Verify Answer" to see the result
140
-
141
- ### What this tool does
142
-
143
- This tool determines if a model's answer is semantically correct compared to a ground truth answer using a fine-tuned LLM.
144
-
145
- The model analyzes both answers and returns:
146
- - **True** if the model answer is correct
147
- - **False** if the model answer is incorrect
148
-
149
- ### API Usage Example
150
- ```python
151
- from gradio_client import Client
152
-
153
- client = Client("zhangchenxu/TinyV")
154
- result = client.predict(
155
- question="Determine all real values of $x$ for which $(x+8)^{4}=(2 x+16)^{2}$.",
156
- ground_truth="-6,-8,-10",
157
- model_answer="-10, -8, -6",
158
- temperature=0.3,
159
- top_p=0.95,
160
- max_tokens=1,
161
- api_name="/verify_answer"
162
- )
163
- print(result)
164
- ```
165
- """
166
- )
167
-
168
- # Model parameters (hidden in a collapsible section)
169
- with gr.Accordion("Advanced Settings", open=False):
170
- temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.1, label="Temperature")
171
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
172
- max_tokens = gr.Slider(minimum=1, maximum=256, value=1, step=1, label="Max Tokens")
173
-
174
- with gr.Column(scale=1):
175
- gr.Markdown("## Input")
176
- question = gr.Textbox(lines=3, label="Question", placeholder="Enter the question here...")
177
- ground_truth = gr.Textbox(lines=5, label="Ground Truth Answer", placeholder="Enter the correct answer here...")
178
- model_answer = gr.Textbox(lines=5, label="Model Answer", placeholder="Enter the answer to verify here...")
179
-
180
- # Examples section as buttons
181
- gr.Markdown("### Try an example:")
182
  with gr.Row():
183
- example_buttons = []
184
- for i, example in enumerate(EXAMPLES):
185
- btn = gr.Button(example["name"], size="sm")
186
- example_buttons.append(btn)
187
- # Connect each button to the load_example function
188
  btn.click(
189
  fn=lambda idx=i: load_example(idx),
190
  outputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens]
191
  )
192
-
193
- verify_btn = gr.Button("Verify Answer", variant="primary")
194
-
195
- gr.Markdown("## Result")
196
- result = gr.Textbox(label="Verification Result", placeholder="Result will appear here...", lines=5)
197
-
198
- # Connect the interface to the verification function
 
 
 
 
199
  verify_btn.click(
200
- verify_answer,
201
  inputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens],
202
  outputs=result
203
  )
204
-
205
- # Run verification when an example is loaded (optional)
206
- for btn in example_buttons:
207
- btn.click(
208
- fn=verify_answer,
209
- inputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens],
210
- outputs=result,
211
- _js="() => {setTimeout(() => document.querySelector('#verify-btn').click(), 100)}",
212
- queue=False
213
- )
214
 
215
- # Define the public API
216
- demo.queue()
217
  # Launch the app
 
218
  if __name__ == "__main__":
219
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
+ # Initialize the client
5
  client = InferenceClient("zhangchenxu/TinyV-1.5B")
6
 
 
7
  LV_PROMPT = """
8
  You are an AI tasked with identifying false negatives in answer verification. A false negative occurs when a model's answer is essentially correct but is marked as incorrect due to minor discrepancies or formatting issues. Your job is to analyze the given question, ground truth answer, and model answer to determine if the model's answer is actually correct despite appearing different from the ground truth.
9
 
 
65
  }
66
  ]
67
 
 
68
  def verify_answer(question, ground_truth, model_answer, temperature, top_p, max_tokens):
 
69
  prompt = LV_PROMPT.format(
70
  question=question,
71
  ground_truth=ground_truth,
72
  model_answer=model_answer
73
  )
74
+ messages = [{"role": "user", "content": prompt}]
 
 
 
 
 
 
75
  response_text = ""
 
76
  try:
 
77
  for message in client.chat_completion(
78
  messages,
79
  max_tokens=max_tokens,
 
88
  except Exception as e:
89
  yield f"Error: {str(e)}"
90
 
 
91
  def load_example(example_index):
92
  example = EXAMPLES[example_index]
93
  return (
 
99
  example["tokens"]
100
  )
101
 
102
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
103
+ gr.Markdown("## 🧠 TinyV - Answer Verification Tool\nThis tool verifies model-generated answers for correctness.")
 
 
 
 
 
 
 
 
 
 
104
 
 
105
  with gr.Row():
106
  with gr.Column(scale=1):
107
+ question = gr.Textbox(lines=3, label="πŸ“˜ Question")
108
+ ground_truth = gr.Textbox(lines=3, label="βœ… Ground Truth Answer")
109
+ model_answer = gr.Textbox(lines=3, label="πŸ€– Model Answer")
110
+
111
+ gr.Markdown("### πŸ” Try Examples:")
112
+ example_buttons = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  with gr.Row():
114
+ for i, ex in enumerate(EXAMPLES):
115
+ btn = gr.Button(ex["name"], size="sm")
 
 
 
116
  btn.click(
117
  fn=lambda idx=i: load_example(idx),
118
  outputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens]
119
  )
120
+ example_buttons.append(btn)
121
+
122
+ with gr.Column(scale=1):
123
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
124
+ temperature = gr.Slider(0, 1, value=0.3, step=0.1, label="Temperature")
125
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
126
+ max_tokens = gr.Slider(1, 128, value=2, step=1, label="Max Tokens")
127
+
128
+ verify_btn = gr.Button("βœ… Verify Answer", variant="primary")
129
+ result = gr.Textbox(label="🧾 Verification Result", lines=5, placeholder="Result will appear here...")
130
+
131
  verify_btn.click(
132
+ fn=verify_answer,
133
  inputs=[question, ground_truth, model_answer, temperature, top_p, max_tokens],
134
  outputs=result
135
  )
 
 
 
 
 
 
 
 
 
 
136
 
 
 
137
  # Launch the app
138
+ demo.queue()
139
  if __name__ == "__main__":
140
  demo.launch()