Spaces:

QUTGenAILab
/

alignment-annotation-pairwise

Sleeping

App Files Files Community

aaronsnoswell commited on 6 days ago

Commit

5a5d1ac

verified ·

1 Parent(s): 92f26e3

Add session statistics

Browse files

Files changed (1) hide show

app.py +93 -13

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import random
 import json
 import os
 from datetime import datetime
 from datasets import load_dataset, Dataset
 from huggingface_hub import HfApi, create_repo, dataset_info
@@ -142,19 +143,45 @@ def get_random_example():
     return prompt_display, completion_1_display, completion_2_display, idx, instruction, completion_1, completion_2
-def handle_left_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2):
     """Handle when user selects left completion as better"""
     print(f"User selected LEFT completion as better for example {current_idx}")
     # Save the annotation
     success = save_annotation(current_idx, instruction, completion_1, completion_2, "left")
     # Get new random example
     new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
     message = "✅ Annotation saved! Left completion selected as better." if success else "✅ Left completion selected (save failed - check console)"
     gr.Info(message)
     return (
         new_prompt,
         new_comp_1,
@@ -162,22 +189,42 @@ def handle_left_better(prompt, completion_1_display, completion_2_display, curre
         new_idx,
         new_instruction,
         new_completion_1,
-        new_completion_2
     )
-def handle_right_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2):
     """Handle when user selects right completion as better"""
     print(f"User selected RIGHT completion as better for example {current_idx}")
     # Save the annotation
     success = save_annotation(current_idx, instruction, completion_1, completion_2, "right")
     # Get new random example
     new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
     message = "✅ Annotation saved! Right completion selected as better." if success else "✅ Right completion selected (save failed - check console)"
     gr.Info(message)
     return (
         new_prompt,
         new_comp_1,
@@ -185,18 +232,31 @@ def handle_right_better(prompt, completion_1_display, completion_2_display, curr
         new_idx,
         new_instruction,
         new_completion_1,
-        new_completion_2
     )
-def handle_skip(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2):
     """Handle when user skips the current example"""
     print(f"User skipped example {current_idx}")
-    # Don't save skipped annotations
     # Get new random example
     new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
     gr.Info("⏭️ Skipped example (not saved).")
     return (
@@ -206,7 +266,12 @@ def handle_skip(prompt, completion_1_display, completion_2_display, current_idx,
         new_idx,
         new_instruction,
         new_completion_1,
-        new_completion_2
     )
 # Initialize dataset on startup
@@ -237,6 +302,12 @@ This simulates the data annotation process used in RLHF (Reinforcement Learning
     current_completion_1 = gr.State(init_completion_1)
     current_completion_2 = gr.State(init_completion_2)
     # Display prompt
     prompt_display = gr.Markdown(init_prompt, label="Prompt")
@@ -259,23 +330,32 @@ This simulates the data annotation process used in RLHF (Reinforcement Learning
         status_msg = f"**Status:** ✅ Connected. Annotations are being saved to [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})"
     gr.Markdown(status_msg)
     # Wire up the buttons
     left_better_btn.click(
         handle_left_better,
-        inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2],
-        outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2]
     )
     right_better_btn.click(
         handle_right_better,
-        inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2],
-        outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2]
     )
     skip_btn.click(
         handle_skip,
-        inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2],
-        outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2]
     )
 if __name__ == "__main__":

 import random
 import json
 import os
+import time
 from datetime import datetime
 from datasets import load_dataset, Dataset
 from huggingface_hub import HfApi, create_repo, dataset_info
     return prompt_display, completion_1_display, completion_2_display, idx, instruction, completion_1, completion_2
+def format_stats_display(judgment_times, num_judgments, num_skips):
+    """Format the statistics display"""
+    if num_judgments == 0:
+        return "📊 **Session Statistics:** No judgments made yet."
+    avg_time = sum(judgment_times) / len(judgment_times)
+    stats = f"""📊 **Session Statistics:** {num_judgments} judgements made, {num_skips} items skipped. Average time per judgement {avg_time:.1f} seconds)."""
+    return stats
+def handle_left_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
+                      start_time, judgment_times, num_judgments, num_skips):
     """Handle when user selects left completion as better"""
     print(f"User selected LEFT completion as better for example {current_idx}")
+    # Calculate time taken for this judgment
+    end_time = time.time()
+    time_taken = end_time - start_time
+    judgment_times.append(time_taken)
+    num_judgments += 1
+    print(f"Time taken for judgment: {time_taken:.1f} seconds")
     # Save the annotation
     success = save_annotation(current_idx, instruction, completion_1, completion_2, "left")
     # Get new random example
     new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
+    # Update stats display
+    stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
     message = "✅ Annotation saved! Left completion selected as better." if success else "✅ Left completion selected (save failed - check console)"
     gr.Info(message)
+    # Reset timer for new example
+    new_start_time = time.time()
     return (
         new_prompt,
         new_comp_1,
         new_idx,
         new_instruction,
         new_completion_1,
+        new_completion_2,
+        new_start_time,
+        judgment_times,
+        num_judgments,
+        num_skips,
+        stats_display
     )
+def handle_right_better(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
+                       start_time, judgment_times, num_judgments, num_skips):
     """Handle when user selects right completion as better"""
     print(f"User selected RIGHT completion as better for example {current_idx}")
+    # Calculate time taken for this judgment
+    end_time = time.time()
+    time_taken = end_time - start_time
+    judgment_times.append(time_taken)
+    num_judgments += 1
+    print(f"Time taken for judgment: {time_taken:.1f} seconds")
     # Save the annotation
     success = save_annotation(current_idx, instruction, completion_1, completion_2, "right")
     # Get new random example
     new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
+    # Update stats display
+    stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
     message = "✅ Annotation saved! Right completion selected as better." if success else "✅ Right completion selected (save failed - check console)"
     gr.Info(message)
+    # Reset timer for new example
+    new_start_time = time.time()
     return (
         new_prompt,
         new_comp_1,
         new_idx,
         new_instruction,
         new_completion_1,
+        new_completion_2,
+        new_start_time,
+        judgment_times,
+        num_judgments,
+        num_skips,
+        stats_display
     )
+def handle_skip(prompt, completion_1_display, completion_2_display, current_idx, instruction, completion_1, completion_2,
+               start_time, judgment_times, num_judgments, num_skips):
     """Handle when user skips the current example"""
     print(f"User skipped example {current_idx}")
+    # Increment skip counter (don't track time for skips)
+    num_skips += 1
     # Get new random example
     new_prompt, new_comp_1, new_comp_2, new_idx, new_instruction, new_completion_1, new_completion_2 = get_random_example()
+    # Reset timer for new example
+    new_start_time = time.time()
+    # Update stats display
+    stats_display = format_stats_display(judgment_times, num_judgments, num_skips)
     gr.Info("⏭️ Skipped example (not saved).")
     return (
         new_idx,
         new_instruction,
         new_completion_1,
+        new_completion_2,
+        new_start_time,
+        judgment_times,
+        num_judgments,
+        num_skips,
+        stats_display
     )
 # Initialize dataset on startup
     current_completion_1 = gr.State(init_completion_1)
     current_completion_2 = gr.State(init_completion_2)
+    # State to track timing and statistics
+    start_time = gr.State(time.time())  # When current example was loaded
+    judgment_times = gr.State([])  # List of times taken for each judgment
+    num_judgments = gr.State(0)  # Number of judgments made
+    num_skips = gr.State(0)  # Number of examples skipped
     # Display prompt
     prompt_display = gr.Markdown(init_prompt, label="Prompt")
         status_msg = f"**Status:** ✅ Connected. Annotations are being saved to [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})"
     gr.Markdown(status_msg)
+    # Statistics display
+    stats_display = gr.Markdown("📊 **Session Statistics:** No judgments made yet.", label="Performance Stats")
     # Wire up the buttons
     left_better_btn.click(
         handle_left_better,
+        inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
+                start_time, judgment_times, num_judgments, num_skips],
+        outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
+                 start_time, judgment_times, num_judgments, num_skips, stats_display]
     )
     right_better_btn.click(
         handle_right_better,
+        inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
+                start_time, judgment_times, num_judgments, num_skips],
+        outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
+                 start_time, judgment_times, num_judgments, num_skips, stats_display]
     )
     skip_btn.click(
         handle_skip,
+        inputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
+                start_time, judgment_times, num_judgments, num_skips],
+        outputs=[prompt_display, completion_1_display, completion_2_display, current_idx, current_instruction, current_completion_1, current_completion_2,
+                 start_time, judgment_times, num_judgments, num_skips, stats_display]
     )
 if __name__ == "__main__":