Add session statistics
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import random
|
3 |
import json
|
4 |
import os
|
|
|
5 |
from datetime import datetime
|
6 |
from datasets import load_dataset, Dataset
|
7 |
from huggingface_hub import HfApi, create_repo, dataset_info
|
@@ -142,19 +143,45 @@ def get_random_example():
|
|
142 |
|
143 |
return prompt_display, completion_1_display, completion_2_display, idx, instruction, completion_1, completion_2
|
144 |
|
145 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
"""Handle when user selects left completion as better"""
|
147 |
print(f"User selected LEFT completion as better for example {current_idx}")
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
# Save the annotation
|
150 |
success = save_annotation(current_idx, instruction, completion_1, completion_2, "left")
|
151 |
|
152 |
# Get new random example
|
153 |
new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
|
|
|
|
|
|
|
154 |
|
155 |
message = "β
Annotation saved! Left completion selected as better." if success else "β
Left completion selected (save failed - check console)"
|
156 |
gr.Info(message)
|
157 |
|
|
|
|
|
|
|
158 |
return (
|
159 |
new_prompt,
|
160 |
new_comp_1,
|
@@ -162,22 +189,42 @@ def handle_left_better(prompt, completion_1_display, completion_2_display, curre
|
|
162 |
new_idx,
|
163 |
new_instruction,
|
164 |
new_completion_1,
|
165 |
-
new_completion_2
|
|
|
|
|
|
|
|
|
|
|
166 |
)
|
167 |
|
168 |
-
def handle_right_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2
|
|
|
169 |
"""Handle when user selects right completion as better"""
|
170 |
print(f"User selected RIGHT completion as better for example {current_idx}")
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
# Save the annotation
|
173 |
success = save_annotation(current_idx, instruction, completion_1, completion_2, "right")
|
174 |
|
175 |
# Get new random example
|
176 |
new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
|
|
|
|
|
|
|
177 |
|
178 |
message = "β
Annotation saved! Right completion selected as better." if success else "β
Right completion selected (save failed - check console)"
|
179 |
gr.Info(message)
|
180 |
|
|
|
|
|
|
|
181 |
return (
|
182 |
new_prompt,
|
183 |
new_comp_1,
|
@@ -185,18 +232,31 @@ def handle_right_better(prompt, completion_1_display, completion_2_display, curr
|
|
185 |
new_idx,
|
186 |
new_instruction,
|
187 |
new_completion_1,
|
188 |
-
new_completion_2
|
|
|
|
|
|
|
|
|
|
|
189 |
)
|
190 |
|
191 |
-
def handle_skip(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2
|
|
|
192 |
"""Handle when user skips the current example"""
|
193 |
print(f"User skipped example {current_idx}")
|
194 |
|
195 |
-
#
|
|
|
196 |
|
197 |
# Get new random example
|
198 |
new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
gr.Info("βοΈ Skipped example (not saved).")
|
201 |
|
202 |
return (
|
@@ -206,7 +266,12 @@ def handle_skip(prompt, completion_1_display, completion_2_display, current_idx,
|
|
206 |
new_idx,
|
207 |
new_instruction,
|
208 |
new_completion_1,
|
209 |
-
new_completion_2
|
|
|
|
|
|
|
|
|
|
|
210 |
)
|
211 |
|
212 |
# Initialize dataset on startup
|
@@ -237,6 +302,12 @@ This simulates the data annotation process used in RLHF (Reinforcement Learning
|
|
237 |
current_completion_1 = gr.State(init_completion_1)
|
238 |
current_completion_2 = gr.State(init_completion_2)
|
239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
# Display prompt
|
241 |
prompt_display = gr.Markdown(init_prompt, label="Prompt")
|
242 |
|
@@ -259,23 +330,32 @@ This simulates the data annotation process used in RLHF (Reinforcement Learning
|
|
259 |
status_msg = f"**Status:** β
Connected. Annotations are being saved to [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})"
|
260 |
gr.Markdown(status_msg)
|
261 |
|
|
|
|
|
|
|
262 |
# Wire up the buttons
|
263 |
left_better_btn.click(
|
264 |
handle_left_better,
|
265 |
-
inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2
|
266 |
-
|
|
|
|
|
267 |
)
|
268 |
|
269 |
right_better_btn.click(
|
270 |
handle_right_better,
|
271 |
-
inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2
|
272 |
-
|
|
|
|
|
273 |
)
|
274 |
|
275 |
skip_btn.click(
|
276 |
handle_skip,
|
277 |
-
inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2
|
278 |
-
|
|
|
|
|
279 |
)
|
280 |
|
281 |
if __name__ == "__main__":
|
|
|
2 |
import random
|
3 |
import json
|
4 |
import os
|
5 |
+
import time
|
6 |
from datetime import datetime
|
7 |
from datasets import load_dataset, Dataset
|
8 |
from huggingface_hub import HfApi, create_repo, dataset_info
|
|
|
143 |
|
144 |
return prompt_display, completion_1_display, completion_2_display, idx, instruction, completion_1, completion_2
|
145 |
|
146 |
+
def format_stats_display(judgment_times, num_judgments, num_skips):
|
147 |
+
"""Format the statistics display"""
|
148 |
+
if num_judgments == 0:
|
149 |
+
return "π **Session Statistics:** No judgments made yet."
|
150 |
+
|
151 |
+
avg_time = sum(judgment_times) / len(judgment_times)
|
152 |
+
|
153 |
+
stats = f"""π **Session Statistics:** {num_judgments} judgements made, {num_skips} items skipped. Average time per judgement {avg_time:.1f} seconds)."""
|
154 |
+
|
155 |
+
return stats
|
156 |
+
|
157 |
+
def handle_left_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
|
158 |
+
start_time, judgment_times, num_judgments, num_skips):
|
159 |
"""Handle when user selects left completion as better"""
|
160 |
print(f"User selected LEFT completion as better for example {current_idx}")
|
161 |
|
162 |
+
# Calculate time taken for this judgment
|
163 |
+
end_time = time.time()
|
164 |
+
time_taken = end_time - start_time
|
165 |
+
judgment_times.append(time_taken)
|
166 |
+
num_judgments += 1
|
167 |
+
|
168 |
+
print(f"Time taken for judgment: {time_taken:.1f} seconds")
|
169 |
+
|
170 |
# Save the annotation
|
171 |
success = save_annotation(current_idx, instruction, completion_1, completion_2, "left")
|
172 |
|
173 |
# Get new random example
|
174 |
new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
|
175 |
+
|
176 |
+
# Update stats display
|
177 |
+
stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
|
178 |
|
179 |
message = "β
Annotation saved! Left completion selected as better." if success else "β
Left completion selected (save failed - check console)"
|
180 |
gr.Info(message)
|
181 |
|
182 |
+
# Reset timer for new example
|
183 |
+
new_start_time = time.time()
|
184 |
+
|
185 |
return (
|
186 |
new_prompt,
|
187 |
new_comp_1,
|
|
|
189 |
new_idx,
|
190 |
new_instruction,
|
191 |
new_completion_1,
|
192 |
+
new_completion_2,
|
193 |
+
new_start_time,
|
194 |
+
judgment_times,
|
195 |
+
num_judgments,
|
196 |
+
num_skips,
|
197 |
+
stats_display
|
198 |
)
|
199 |
|
200 |
+
def handle_right_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
|
201 |
+
start_time, judgment_times, num_judgments, num_skips):
|
202 |
"""Handle when user selects right completion as better"""
|
203 |
print(f"User selected RIGHT completion as better for example {current_idx}")
|
204 |
|
205 |
+
# Calculate time taken for this judgment
|
206 |
+
end_time = time.time()
|
207 |
+
time_taken = end_time - start_time
|
208 |
+
judgment_times.append(time_taken)
|
209 |
+
num_judgments += 1
|
210 |
+
|
211 |
+
print(f"Time taken for judgment: {time_taken:.1f} seconds")
|
212 |
+
|
213 |
# Save the annotation
|
214 |
success = save_annotation(current_idx, instruction, completion_1, completion_2, "right")
|
215 |
|
216 |
# Get new random example
|
217 |
new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
|
218 |
+
|
219 |
+
# Update stats display
|
220 |
+
stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
|
221 |
|
222 |
message = "β
Annotation saved! Right completion selected as better." if success else "β
Right completion selected (save failed - check console)"
|
223 |
gr.Info(message)
|
224 |
|
225 |
+
# Reset timer for new example
|
226 |
+
new_start_time = time.time()
|
227 |
+
|
228 |
return (
|
229 |
new_prompt,
|
230 |
new_comp_1,
|
|
|
232 |
new_idx,
|
233 |
new_instruction,
|
234 |
new_completion_1,
|
235 |
+
new_completion_2,
|
236 |
+
new_start_time,
|
237 |
+
judgment_times,
|
238 |
+
num_judgments,
|
239 |
+
num_skips,
|
240 |
+
stats_display
|
241 |
)
|
242 |
|
243 |
+
def handle_skip(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
|
244 |
+
start_time, judgment_times, num_judgments, num_skips):
|
245 |
"""Handle when user skips the current example"""
|
246 |
print(f"User skipped example {current_idx}")
|
247 |
|
248 |
+
# Increment skip counter (don't track time for skips)
|
249 |
+
num_skips += 1
|
250 |
|
251 |
# Get new random example
|
252 |
new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
|
253 |
|
254 |
+
# Reset timer for new example
|
255 |
+
new_start_time = time.time()
|
256 |
+
|
257 |
+
# Update stats display
|
258 |
+
stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
|
259 |
+
|
260 |
gr.Info("βοΈ Skipped example (not saved).")
|
261 |
|
262 |
return (
|
|
|
266 |
new_idx,
|
267 |
new_instruction,
|
268 |
new_completion_1,
|
269 |
+
new_completion_2,
|
270 |
+
new_start_time,
|
271 |
+
judgment_times,
|
272 |
+
num_judgments,
|
273 |
+
num_skips,
|
274 |
+
stats_display
|
275 |
)
|
276 |
|
277 |
# Initialize dataset on startup
|
|
|
302 |
current_completion_1 = gr.State(init_completion_1)
|
303 |
current_completion_2 = gr.State(init_completion_2)
|
304 |
|
305 |
+
# State to track timing and statistics
|
306 |
+
start_time = gr.State(time.time()) # When current example was loaded
|
307 |
+
judgment_times = gr.State([]) # List of times taken for each judgment
|
308 |
+
num_judgments = gr.State(0) # Number of judgments made
|
309 |
+
num_skips = gr.State(0) # Number of examples skipped
|
310 |
+
|
311 |
# Display prompt
|
312 |
prompt_display = gr.Markdown(init_prompt, label="Prompt")
|
313 |
|
|
|
330 |
status_msg = f"**Status:** β
Connected. Annotations are being saved to [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})"
|
331 |
gr.Markdown(status_msg)
|
332 |
|
333 |
+
# Statistics display
|
334 |
+
stats_display = gr.Markdown("π **Session Statistics:** No judgments made yet.", label="Performance Stats")
|
335 |
+
|
336 |
# Wire up the buttons
|
337 |
left_better_btn.click(
|
338 |
handle_left_better,
|
339 |
+
inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
|
340 |
+
start_time, judgment_times, num_judgments, num_skips],
|
341 |
+
outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
|
342 |
+
start_time, judgment_times, num_judgments, num_skips, stats_display]
|
343 |
)
|
344 |
|
345 |
right_better_btn.click(
|
346 |
handle_right_better,
|
347 |
+
inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
|
348 |
+
start_time, judgment_times, num_judgments, num_skips],
|
349 |
+
outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
|
350 |
+
start_time, judgment_times, num_judgments, num_skips, stats_display]
|
351 |
)
|
352 |
|
353 |
skip_btn.click(
|
354 |
handle_skip,
|
355 |
+
inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
|
356 |
+
start_time, judgment_times, num_judgments, num_skips],
|
357 |
+
outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
|
358 |
+
start_time, judgment_times, num_judgments, num_skips, stats_display]
|
359 |
)
|
360 |
|
361 |
if __name__ == "__main__":
|