Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on Aug 9

Commit

4d1a43b

1 Parent(s): 8dcb829

Resume Feature Implementation

Browse files

Files changed (1) hide show

components/review_dashboard_page.py +95 -41

components/review_dashboard_page.py CHANGED Viewed

@@ -278,6 +278,42 @@ class ReviewDashboardPage:
                     log.error(f"Error calculating review progress for user {user_id}: {e}")
                     return f"⚠️ **Error calculating progress**"
         def load_review_items_fn(session):
             user_id = session.get("user_id")
             username = session.get("username")
@@ -302,7 +338,7 @@ class ReviewDashboardPage:
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
-            # Load annotations from target annotator with FAST INITIAL LOADING
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
@@ -312,33 +348,60 @@ class ReviewDashboardPage:
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
-                # FAST INITIAL QUERY: Load only essential data without complex validation processing
-                # Reduced batch size for instant loading in HuggingFace spaces
-                INITIAL_BATCH_SIZE = 5  # Load only 5 items initially for instant response
-                # Simple query to get basic annotation data quickly
-                initial_query = db.query(
-                    Annotation,
-                    TTSData.filename,
-                    TTSData.sentence
-                ).join(
-                    TTSData, Annotation.tts_data_id == TTSData.id
-                ).filter(
-                    Annotation.annotator_id == target_annotator_obj.id
-                ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
-                initial_results = initial_query.all()
-                # Get total count for progress info (this is fast)
                 total_count = db.query(Annotation).filter(
                     Annotation.annotator_id == target_annotator_obj.id
                 ).count()
-                log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
                 # Process items with minimal data - validation status will be loaded on-demand
                 items = []
-                for annotation, filename, sentence in initial_results:
                     # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
                     annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
@@ -355,29 +418,20 @@ class ReviewDashboardPage:
                         "validation_loaded": False  # Track if validation status has been loaded
                     })
-                # Find the first item that is not reviewed (prioritize non-deleted annotations)
                 initial_idx = 0
                 if items:
-                    found_unreviewed = False
-                    # First, try to find unreviewed non-deleted annotations
-                    for i, item_data in enumerate(items):
-                        if (item_data["validation_status"] == "Not Reviewed" and
-                            not item_data.get("is_deleted", False)):
-                            initial_idx = i
-                            found_unreviewed = True
-                            break
-                    # If no unreviewed non-deleted items, look for any unreviewed items
-                    if not found_unreviewed:
-                        for i, item_data in enumerate(items):
-                            if item_data["validation_status"].startswith("Not Reviewed"):
-                                initial_idx = i
-                                found_unreviewed = True
-                                break
-                    # If no unreviewed items at all, use the last item
-                    if not found_unreviewed:
-                        initial_idx = len(items) - 1 if items else 0
                 # Set initial display
                 if items:

                     log.error(f"Error calculating review progress for user {user_id}: {e}")
                     return f"⚠️ **Error calculating progress**"
+        def find_first_unreviewed_annotation_index(db, target_annotator_obj, reviewer_user_id):
+            """
+            Find the index (0-based) of the first annotation that hasn't been reviewed by the current reviewer.
+            Returns the global index within all annotations for the target annotator.
+            """
+            try:
+                # Query to find the first annotation that doesn't have a validation record from this reviewer
+                # We use LEFT JOIN to include annotations without validations
+                first_unreviewed = db.query(Annotation).outerjoin(
+                    Validation,
+                    (Annotation.id == Validation.annotation_id) &
+                    (Validation.validator_id == reviewer_user_id)
+                ).filter(
+                    Annotation.annotator_id == target_annotator_obj.id,
+                    Validation.id.is_(None)  # No validation record exists
+                ).order_by(Annotation.id).first()
+                if first_unreviewed:
+                    # Find the index of this annotation among all annotations for the target annotator
+                    all_annotations = db.query(Annotation).filter(
+                        Annotation.annotator_id == target_annotator_obj.id
+                    ).order_by(Annotation.id).all()
+                    for idx, annotation in enumerate(all_annotations):
+                        if annotation.id == first_unreviewed.id:
+                            log.info(f"Found first unreviewed annotation at index {idx} (ID: {first_unreviewed.id})")
+                            return idx
+                # If no unreviewed annotation found, return -1
+                log.info("No unreviewed annotations found")
+                return -1
+            except Exception as e:
+                log.error(f"Error finding first unreviewed annotation: {e}")
+                return -1
         def load_review_items_fn(session):
             user_id = session.get("user_id")
             username = session.get("username")
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+            # Load annotations from target annotator with RESUME LOGIC
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
+                # --- RESUME LOGIC: Find first unreviewed annotation ---
+                resume_index = find_first_unreviewed_annotation_index(db, target_annotator_obj, user_id)
+                # Get total count for progress info
                 total_count = db.query(Annotation).filter(
                     Annotation.annotator_id == target_annotator_obj.id
                 ).count()
+                # Calculate how many items to load initially to include the resume index
+                INITIAL_BATCH_SIZE = 5  # Minimum batch size
+                if resume_index >= 0:
+                    # Load enough items to include the resume index, but at least INITIAL_BATCH_SIZE
+                    items_to_load = max(INITIAL_BATCH_SIZE, resume_index + 1)
+                    # Cap it to avoid loading too many items at once
+                    items_to_load = min(items_to_load, 20)
+                else:
+                    # No unreviewed items found, load standard batch from the end
+                    items_to_load = INITIAL_BATCH_SIZE
+                # Load items based on resume logic
+                if resume_index >= 0:
+                    # Load from the beginning to include the resume index
+                    query = db.query(
+                        Annotation,
+                        TTSData.filename,
+                        TTSData.sentence
+                    ).join(
+                        TTSData, Annotation.tts_data_id == TTSData.id
+                    ).filter(
+                        Annotation.annotator_id == target_annotator_obj.id
+                    ).order_by(Annotation.id).limit(items_to_load)
+                else:
+                    # All items reviewed, load from the end
+                    query = db.query(
+                        Annotation,
+                        TTSData.filename,
+                        TTSData.sentence
+                    ).join(
+                        TTSData, Annotation.tts_data_id == TTSData.id
+                    ).filter(
+                        Annotation.annotator_id == target_annotator_obj.id
+                    ).order_by(Annotation.id.desc()).limit(items_to_load)
+                results = query.all()
+                # If we loaded from the end (all reviewed), reverse the results to maintain chronological order
+                if resume_index < 0:
+                    results = list(reversed(results))
+                log.info(f"Loaded {len(results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
                 # Process items with minimal data - validation status will be loaded on-demand
                 items = []
+                for annotation, filename, sentence in results:
                     # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
                     annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
                         "validation_loaded": False  # Track if validation status has been loaded
                     })
+                # --- Calculate initial index based on resume logic ---
                 initial_idx = 0
                 if items:
+                    if resume_index >= 0 and resume_index < len(items):
+                        # Resume from the first unreviewed item
+                        initial_idx = resume_index
+                        log.info(f"User '{username}' resuming at first unreviewed item, index: {initial_idx} (annotation ID: {items[initial_idx]['annotation_id']})")
+                    else:
+                        # All items reviewed or no unreviewed items in current batch, start from the last item
+                        initial_idx = len(items) - 1 if items else 0
+                        if items:
+                            log.info(f"User '{username}' has all loaded items reviewed, starting at last item index: {initial_idx} (annotation ID: {items[initial_idx]['annotation_id']})")
+                else:
+                    log.info(f"User '{username}' has no items assigned, starting at index 0.")
                 # Set initial display
                 if items: