aaronsnoswell commited on
Commit
5a5d1ac
Β·
verified Β·
1 Parent(s): 92f26e3

Add session statistics

Browse files
Files changed (1) hide show
  1. app.py +93 -13
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import random
3
  import json
4
  import os
 
5
  from datetime import datetime
6
  from datasets import load_dataset, Dataset
7
  from huggingface_hub import HfApi, create_repo, dataset_info
@@ -142,19 +143,45 @@ def get_random_example():
142
 
143
  return prompt_display, completion_1_display, completion_2_display, idx, instruction, completion_1, completion_2
144
 
145
- def handle_left_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2):
 
 
 
 
 
 
 
 
 
 
 
 
146
  """Handle when user selects left completion as better"""
147
  print(f"User selected LEFT completion as better for example {current_idx}")
148
 
 
 
 
 
 
 
 
 
149
  # Save the annotation
150
  success = save_annotation(current_idx, instruction, completion_1, completion_2, "left")
151
 
152
  # Get new random example
153
  new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
 
 
 
154
 
155
  message = "βœ… Annotation saved! Left completion selected as better." if success else "βœ… Left completion selected (save failed - check console)"
156
  gr.Info(message)
157
 
 
 
 
158
  return (
159
  new_prompt,
160
  new_comp_1,
@@ -162,22 +189,42 @@ def handle_left_better(prompt, completion_1_display, completion_2_display, curre
162
  new_idx,
163
  new_instruction,
164
  new_completion_1,
165
- new_completion_2
 
 
 
 
 
166
  )
167
 
168
- def handle_right_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2):
 
169
  """Handle when user selects right completion as better"""
170
  print(f"User selected RIGHT completion as better for example {current_idx}")
171
 
 
 
 
 
 
 
 
 
172
  # Save the annotation
173
  success = save_annotation(current_idx, instruction, completion_1, completion_2, "right")
174
 
175
  # Get new random example
176
  new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
 
 
 
177
 
178
  message = "βœ… Annotation saved! Right completion selected as better." if success else "βœ… Right completion selected (save failed - check console)"
179
  gr.Info(message)
180
 
 
 
 
181
  return (
182
  new_prompt,
183
  new_comp_1,
@@ -185,18 +232,31 @@ def handle_right_better(prompt, completion_1_display, completion_2_display, curr
185
  new_idx,
186
  new_instruction,
187
  new_completion_1,
188
- new_completion_2
 
 
 
 
 
189
  )
190
 
191
- def handle_skip(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2):
 
192
  """Handle when user skips the current example"""
193
  print(f"User skipped example {current_idx}")
194
 
195
- # Don't save skipped annotations
 
196
 
197
  # Get new random example
198
  new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
199
 
 
 
 
 
 
 
200
  gr.Info("⏭️ Skipped example (not saved).")
201
 
202
  return (
@@ -206,7 +266,12 @@ def handle_skip(prompt, completion_1_display, completion_2_display, current_idx,
206
  new_idx,
207
  new_instruction,
208
  new_completion_1,
209
- new_completion_2
 
 
 
 
 
210
  )
211
 
212
  # Initialize dataset on startup
@@ -237,6 +302,12 @@ This simulates the data annotation process used in RLHF (Reinforcement Learning
237
  current_completion_1 = gr.State(init_completion_1)
238
  current_completion_2 = gr.State(init_completion_2)
239
 
 
 
 
 
 
 
240
  # Display prompt
241
  prompt_display = gr.Markdown(init_prompt, label="Prompt")
242
 
@@ -259,23 +330,32 @@ This simulates the data annotation process used in RLHF (Reinforcement Learning
259
  status_msg = f"**Status:** βœ… Connected. Annotations are being saved to [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})"
260
  gr.Markdown(status_msg)
261
 
 
 
 
262
  # Wire up the buttons
263
  left_better_btn.click(
264
  handle_left_better,
265
- inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2],
266
- outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2]
 
 
267
  )
268
 
269
  right_better_btn.click(
270
  handle_right_better,
271
- inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2],
272
- outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2]
 
 
273
  )
274
 
275
  skip_btn.click(
276
  handle_skip,
277
- inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2],
278
- outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2]
 
 
279
  )
280
 
281
  if __name__ == "__main__":
 
2
  import random
3
  import json
4
  import os
5
+ import time
6
  from datetime import datetime
7
  from datasets import load_dataset, Dataset
8
  from huggingface_hub import HfApi, create_repo, dataset_info
 
143
 
144
  return prompt_display, completion_1_display, completion_2_display, idx, instruction, completion_1, completion_2
145
 
146
+ def format_stats_display(judgment_times, num_judgments, num_skips):
147
+ """Format the statistics display"""
148
+ if num_judgments == 0:
149
+ return "πŸ“Š **Session Statistics:** No judgments made yet."
150
+
151
+ avg_time = sum(judgment_times) / len(judgment_times)
152
+
153
+ stats = f"""πŸ“Š **Session Statistics:** {num_judgments} judgements made, {num_skips} items skipped. Average time per judgement {avg_time:.1f} seconds)."""
154
+
155
+ return stats
156
+
157
+ def handle_left_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
158
+ start_time, judgment_times, num_judgments, num_skips):
159
  """Handle when user selects left completion as better"""
160
  print(f"User selected LEFT completion as better for example {current_idx}")
161
 
162
+ # Calculate time taken for this judgment
163
+ end_time = time.time()
164
+ time_taken = end_time - start_time
165
+ judgment_times.append(time_taken)
166
+ num_judgments += 1
167
+
168
+ print(f"Time taken for judgment: {time_taken:.1f} seconds")
169
+
170
  # Save the annotation
171
  success = save_annotation(current_idx, instruction, completion_1, completion_2, "left")
172
 
173
  # Get new random example
174
  new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
175
+
176
+ # Update stats display
177
+ stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
178
 
179
  message = "βœ… Annotation saved! Left completion selected as better." if success else "βœ… Left completion selected (save failed - check console)"
180
  gr.Info(message)
181
 
182
+ # Reset timer for new example
183
+ new_start_time = time.time()
184
+
185
  return (
186
  new_prompt,
187
  new_comp_1,
 
189
  new_idx,
190
  new_instruction,
191
  new_completion_1,
192
+ new_completion_2,
193
+ new_start_time,
194
+ judgment_times,
195
+ num_judgments,
196
+ num_skips,
197
+ stats_display
198
  )
199
 
200
+ def handle_right_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
201
+ start_time, judgment_times, num_judgments, num_skips):
202
  """Handle when user selects right completion as better"""
203
  print(f"User selected RIGHT completion as better for example {current_idx}")
204
 
205
+ # Calculate time taken for this judgment
206
+ end_time = time.time()
207
+ time_taken = end_time - start_time
208
+ judgment_times.append(time_taken)
209
+ num_judgments += 1
210
+
211
+ print(f"Time taken for judgment: {time_taken:.1f} seconds")
212
+
213
  # Save the annotation
214
  success = save_annotation(current_idx, instruction, completion_1, completion_2, "right")
215
 
216
  # Get new random example
217
  new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
218
+
219
+ # Update stats display
220
+ stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
221
 
222
  message = "βœ… Annotation saved! Right completion selected as better." if success else "βœ… Right completion selected (save failed - check console)"
223
  gr.Info(message)
224
 
225
+ # Reset timer for new example
226
+ new_start_time = time.time()
227
+
228
  return (
229
  new_prompt,
230
  new_comp_1,
 
232
  new_idx,
233
  new_instruction,
234
  new_completion_1,
235
+ new_completion_2,
236
+ new_start_time,
237
+ judgment_times,
238
+ num_judgments,
239
+ num_skips,
240
+ stats_display
241
  )
242
 
243
+ def handle_skip(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
244
+ start_time, judgment_times, num_judgments, num_skips):
245
  """Handle when user skips the current example"""
246
  print(f"User skipped example {current_idx}")
247
 
248
+ # Increment skip counter (don't track time for skips)
249
+ num_skips += 1
250
 
251
  # Get new random example
252
  new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
253
 
254
+ # Reset timer for new example
255
+ new_start_time = time.time()
256
+
257
+ # Update stats display
258
+ stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
259
+
260
  gr.Info("⏭️ Skipped example (not saved).")
261
 
262
  return (
 
266
  new_idx,
267
  new_instruction,
268
  new_completion_1,
269
+ new_completion_2,
270
+ new_start_time,
271
+ judgment_times,
272
+ num_judgments,
273
+ num_skips,
274
+ stats_display
275
  )
276
 
277
  # Initialize dataset on startup
 
302
  current_completion_1 = gr.State(init_completion_1)
303
  current_completion_2 = gr.State(init_completion_2)
304
 
305
+ # State to track timing and statistics
306
+ start_time = gr.State(time.time()) # When current example was loaded
307
+ judgment_times = gr.State([]) # List of times taken for each judgment
308
+ num_judgments = gr.State(0) # Number of judgments made
309
+ num_skips = gr.State(0) # Number of examples skipped
310
+
311
  # Display prompt
312
  prompt_display = gr.Markdown(init_prompt, label="Prompt")
313
 
 
330
  status_msg = f"**Status:** βœ… Connected. Annotations are being saved to [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})"
331
  gr.Markdown(status_msg)
332
 
333
+ # Statistics display
334
+ stats_display = gr.Markdown("πŸ“Š **Session Statistics:** No judgments made yet.", label="Performance Stats")
335
+
336
  # Wire up the buttons
337
  left_better_btn.click(
338
  handle_left_better,
339
+ inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
340
+ start_time, judgment_times, num_judgments, num_skips],
341
+ outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
342
+ start_time, judgment_times, num_judgments, num_skips, stats_display]
343
  )
344
 
345
  right_better_btn.click(
346
  handle_right_better,
347
+ inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
348
+ start_time, judgment_times, num_judgments, num_skips],
349
+ outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
350
+ start_time, judgment_times, num_judgments, num_skips, stats_display]
351
  )
352
 
353
  skip_btn.click(
354
  handle_skip,
355
+ inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
356
+ start_time, judgment_times, num_judgments, num_skips],
357
+ outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
358
+ start_time, judgment_times, num_judgments, num_skips, stats_display]
359
  )
360
 
361
  if __name__ == "__main__":