Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on 2 days ago

Commit

79f51f8

1 Parent(s): e4c3230

Resume Feature Implementation

Browse files

Files changed (1) hide show

components/review_dashboard_page.py +59 -106

components/review_dashboard_page.py CHANGED Viewed

@@ -278,48 +278,6 @@ class ReviewDashboardPage:
                     log.error(f"Error calculating review progress for user {user_id}: {e}")
                     return f"⚠️ **Error calculating progress**"
-        def find_first_unreviewed_annotation_index(db, target_annotator_obj, reviewer_user_id):
-            """
-            Find the index (0-based) of the first annotation that hasn't been reviewed by the current reviewer.
-            Returns the global index within all annotations for the target annotator.
-            Uses the same JOIN as load_review_items_fn to ensure consistent indexing.
-            """
-            try:
-                # Query to find the first annotation that doesn't have a validation record from this reviewer
-                # Use the same JOIN as load_review_items_fn to ensure we're counting the same set of annotations
-                first_unreviewed = db.query(Annotation).join(
-                    TTSData, Annotation.tts_data_id == TTSData.id
-                ).outerjoin(
-                    Validation,
-                    (Annotation.id == Validation.annotation_id) &
-                    (Validation.validator_id == reviewer_user_id)
-                ).filter(
-                    Annotation.annotator_id == target_annotator_obj.id,
-                    Validation.id.is_(None)  # No validation record exists
-                ).order_by(Annotation.id).first()
-                if first_unreviewed:
-                    # Find the index of this annotation among all valid annotations (with TTSData) for the target annotator
-                    # Use the same query structure as load_review_items_fn
-                    all_annotations = db.query(Annotation).join(
-                        TTSData, Annotation.tts_data_id == TTSData.id
-                    ).filter(
-                        Annotation.annotator_id == target_annotator_obj.id
-                    ).order_by(Annotation.id).all()
-                    for idx, annotation in enumerate(all_annotations):
-                        if annotation.id == first_unreviewed.id:
-                            log.info(f"Found first unreviewed annotation at index {idx} (ID: {first_unreviewed.id})")
-                            return idx
-                # If no unreviewed annotation found, return -1
-                log.info("No unreviewed annotations found")
-                return -1
-            except Exception as e:
-                log.error(f"Error finding first unreviewed annotation: {e}")
-                return -1
         def load_review_items_fn(session):
             user_id = session.get("user_id")
             username = session.get("username")
@@ -344,7 +302,7 @@ class ReviewDashboardPage:
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
-            # Load annotations from target annotator with RESUME LOGIC
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
@@ -354,59 +312,33 @@ class ReviewDashboardPage:
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
-                # --- RESUME LOGIC: Find first unreviewed annotation ---
-                resume_index = find_first_unreviewed_annotation_index(db, target_annotator_obj, user_id)
-                # Get total count for progress info
-                total_count = db.query(Annotation).filter(
                     Annotation.annotator_id == target_annotator_obj.id
-                ).count()
-                # Calculate loading strategy based on resume logic
-                INITIAL_BATCH_SIZE = 10  # Standard batch size
-                CONTEXT_ITEMS = 1  # Number of reviewed items to include for context
-                load_offset = 0  # Initialize load_offset
-                if resume_index >= 0:
-                    # Load items starting from around the first unreviewed annotation
-                    # Include some context (already reviewed items) before the unreviewed one
-                    load_offset = max(0, resume_index - CONTEXT_ITEMS)
-                    items_to_load = INITIAL_BATCH_SIZE
-                    # Load items starting from the calculated offset
-                    query = db.query(
-                        Annotation,
-                        TTSData.filename,
-                        TTSData.sentence
-                    ).join(
-                        TTSData, Annotation.tts_data_id == TTSData.id
-                    ).filter(
-                        Annotation.annotator_id == target_annotator_obj.id
-                    ).order_by(Annotation.id).offset(load_offset).limit(items_to_load)
-                    log.info(f"Resume mode: Loading {items_to_load} items starting from offset {load_offset} (first unreviewed at global index {resume_index})")
-                else:
-                    # No unreviewed items found, load standard batch from the end
-                    load_offset = max(0, total_count - INITIAL_BATCH_SIZE)
-                    query = db.query(
-                        Annotation,
-                        TTSData.filename,
-                        TTSData.sentence
-                    ).join(
-                        TTSData, Annotation.tts_data_id == TTSData.id
-                    ).filter(
-                        Annotation.annotator_id == target_annotator_obj.id
-                    ).order_by(Annotation.id).offset(load_offset).limit(INITIAL_BATCH_SIZE)
-                    log.info(f"All reviewed mode: Loading {INITIAL_BATCH_SIZE} items from offset {load_offset}")
-                results = query.all()
-                log.info(f"Loaded {len(results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
                 # Process items with minimal data - validation status will be loaded on-demand
                 items = []
-                for annotation, filename, sentence in results:
                     # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
                     annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
@@ -423,27 +355,48 @@ class ReviewDashboardPage:
                         "validation_loaded": False  # Track if validation status has been loaded
                     })
-                # --- Calculate initial index based on resume logic ---
                 initial_idx = 0
                 if items:
-                    if resume_index >= 0:
-                        # Calculate the local index within loaded items
-                        # The first unreviewed annotation should be at: resume_index - load_offset
-                        local_resume_index = resume_index - load_offset
-                        if 0 <= local_resume_index < len(items):
-                            initial_idx = local_resume_index
-                            log.info(f"User '{username}' resuming at first unreviewed item, local index: {initial_idx} (global index: {resume_index}, annotation ID: {items[initial_idx]['annotation_id']})")
-                        else:
-                            # Fallback to first item if calculation is off
-                            initial_idx = 0
-                            log.warning(f"Resume index calculation off, starting at first loaded item. Local index: {local_resume_index}, loaded items: {len(items)}")
-                    else:
-                        # All items reviewed, start from the last item
                         initial_idx = len(items) - 1 if items else 0
-                        if items:
-                            log.info(f"User '{username}' has all items reviewed, starting at last item index: {initial_idx} (annotation ID: {items[initial_idx]['annotation_id']})")
-                else:
-                    log.info(f"User '{username}' has no items assigned, starting at index 0.")
                 # Set initial display
                 if items:

                     log.error(f"Error calculating review progress for user {user_id}: {e}")
                     return f"⚠️ **Error calculating progress**"
         def load_review_items_fn(session):
             user_id = session.get("user_id")
             username = session.get("username")
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+            # Load annotations from target annotator with FAST INITIAL LOADING
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
+                # FAST INITIAL QUERY: Load only essential data without complex validation processing
+                # Reduced batch size for instant loading in HuggingFace spaces
+                INITIAL_BATCH_SIZE = 5  # Load only 5 items initially for instant response
+                # Simple query to get basic annotation data quickly
+                initial_query = db.query(
+                    Annotation,
+                    TTSData.filename,
+                    TTSData.sentence
+                ).join(
+                    TTSData, Annotation.tts_data_id == TTSData.id
+                ).filter(
                     Annotation.annotator_id == target_annotator_obj.id
+                ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
+                initial_results = initial_query.all()
+                # Get total count for progress info (this is fast)
+                total_count = db.query(Annotation).filter(
+                    Annotation.annotator_id == target_annotator_obj.id
+                ).count()
+                log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
                 # Process items with minimal data - validation status will be loaded on-demand
                 items = []
+                for annotation, filename, sentence in initial_results:
                     # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
                     annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
                         "validation_loaded": False  # Track if validation status has been loaded
                     })
+                # Find the first item that is not reviewed (prioritize non-deleted annotations)
                 initial_idx = 0
                 if items:
+                    found_unreviewed = False
+                    # Check database for validation status to find first unreviewed item
+                    for i, item_data in enumerate(items):
+                        # Check if this annotation has been validated by current user
+                        existing_validation = db.query(Validation).filter_by(
+                            annotation_id=item_data["annotation_id"],
+                            validator_id=user_id
+                        ).first()
+                        # If no validation exists, this is unreviewed
+                        if not existing_validation:
+                            # Prioritize non-deleted annotations
+                            if not item_data.get("is_deleted", False):
+                                initial_idx = i
+                                found_unreviewed = True
+                                log.info(f"Found first unreviewed non-deleted item at index {i} (annotation_id: {item_data['annotation_id']})")
+                                break
+                    # If no unreviewed non-deleted items found, look for any unreviewed items (including deleted)
+                    if not found_unreviewed:
+                        for i, item_data in enumerate(items):
+                            existing_validation = db.query(Validation).filter_by(
+                                annotation_id=item_data["annotation_id"],
+                                validator_id=user_id
+                            ).first()
+                            if not existing_validation:
+                                initial_idx = i
+                                found_unreviewed = True
+                                log.info(f"Found first unreviewed item at index {i} (annotation_id: {item_data['annotation_id']}) - may be deleted")
+                                break
+                    # If all items have been reviewed, continue from the end
+                    if not found_unreviewed:
                         initial_idx = len(items) - 1 if items else 0
+                        log.info(f"All loaded items have been reviewed, starting from index {initial_idx}")
+                    log.info(f"Set initial review index to {initial_idx} out of {len(items)} loaded items")
                 # Set initial display
                 if items: