Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on 23 days ago

Commit

aad2e16

1 Parent(s): 1a410ee

revert automated progress bar

Browse files

Files changed (4) hide show

components/review_dashboard_page.py +55 -52
data/models.py +1 -20
utils/database.py +14 -23
utils/user_progress.py +0 -113

components/review_dashboard_page.py CHANGED Viewed

@@ -12,7 +12,6 @@ from config import conf
 from utils.database import get_db
 from data.models import Annotation, TTSData, Annotator, Validation
 from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
-from utils.user_progress import get_user_progress, update_user_progress, get_next_unreviewed_annotation, get_annotations_from_position
 log = Logger()
 LOADER = CloudServerAudioLoader(conf.FTP_URL)
@@ -303,7 +302,7 @@ class ReviewDashboardPage:
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
-            # Load annotations from target annotator with PROGRESS-AWARE LOADING
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
@@ -313,34 +312,33 @@ class ReviewDashboardPage:
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
-                # Get the next unreviewed annotation position for this user
-                next_annotation_id, next_position = get_next_unreviewed_annotation(user_id, target_annotator_obj.id)
-                if next_annotation_id is None:
-                    # All annotations have been reviewed
-                    total_count = db.query(Annotation).filter(
-                        Annotation.annotator_id == target_annotator_obj.id
-                    ).count()
-                    return [], 0, f"🎉 **All Done!** You have reviewed all {total_count} annotations for {target_annotator}.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
-                # Load annotations starting from the next unreviewed position
-                INITIAL_BATCH_SIZE = 10  # Load 10 items starting from where user left off
-                annotations_data = get_annotations_from_position(
-                    target_annotator_obj.id,
-                    next_position,
-                    INITIAL_BATCH_SIZE
-                )
-                # Get total count for progress info
                 total_count = db.query(Annotation).filter(
                     Annotation.annotator_id == target_annotator_obj.id
                 ).count()
-                log.info(f"Progress-aware load: Starting from position {next_position}, loaded {len(annotations_data)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
                 # Process items with minimal data - validation status will be loaded on-demand
                 items = []
-                for annotation, filename, sentence in annotations_data:
                     # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
                     annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
@@ -357,12 +355,36 @@ class ReviewDashboardPage:
                         "validation_loaded": False  # Track if validation status has been loaded
                     })
-                # Set initial display - start from the first item (which should be unreviewed)
                 if items:
-                    initial_item = items[0]  # Always start from the first item in the loaded batch
-                    review_info_text = f"🔍 **Phase 2 Review Mode** - Continuing from where you left off. Loaded {len(items)} items starting from position {next_position + 1} of {total_count} total items."
-                    # Ensure correct order of return values for 14 outputs
                     rejection_reason_val = ""
                     rejection_visible_val = False
                     if initial_item["validation_status"].startswith("Rejected"):
@@ -374,7 +396,7 @@ class ReviewDashboardPage:
                     return (
                         items,
-                        0,  # Always start from index 0 of the loaded batch
                         review_info_text,
                         str(initial_item["tts_id"]),
                         initial_item["filename"],
@@ -392,6 +414,13 @@ class ReviewDashboardPage:
                     # Ensure correct order and number of return values for empty items (14 outputs)
                     return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
         def show_current_review_item_fn(items, idx, session):
             if not items or idx >= len(items) or idx < 0:
                 # tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
@@ -520,32 +549,6 @@ class ReviewDashboardPage:
                 db.commit()
                 log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
-                # Update user progress tracking
-                try:
-                    # Find target annotator for this user
-                    username = session.get("username")
-                    target_annotator = None
-                    for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
-                        if reviewer_name == username:
-                            target_annotator = annotator_name
-                            break
-                    if target_annotator:
-                        target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
-                        if target_annotator_obj:
-                            # Calculate the current position in the review list
-                            current_position = db.query(Annotation).filter(
-                                Annotation.annotator_id == target_annotator_obj.id,
-                                Annotation.id <= annotation_id
-                            ).count() - 1  # Convert to 0-based index
-                            # Update user progress
-                            update_user_progress(user_id, target_annotator_obj.id, annotation_id, current_position)
-                            log.info(f"Updated user progress: user {user_id} at position {current_position} for annotation {annotation_id}")
-                except Exception as e:
-                    log.warning(f"Failed to update user progress: {e}")
-                    # Don't fail the validation save if progress tracking fails
                 items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
                 # Show rejection reason input only if rejected, otherwise hide and clear

 from utils.database import get_db
 from data.models import Annotation, TTSData, Annotator, Validation
 from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
 log = Logger()
 LOADER = CloudServerAudioLoader(conf.FTP_URL)
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+            # Load annotations from target annotator with FAST INITIAL LOADING
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
+                # FAST INITIAL QUERY: Load only essential data without complex validation processing
+                # Reduced batch size for instant loading in HuggingFace spaces
+                INITIAL_BATCH_SIZE = 5  # Load only 5 items initially for instant response
+                # Simple query to get basic annotation data quickly
+                initial_query = db.query(
+                    Annotation,
+                    TTSData.filename,
+                    TTSData.sentence
+                ).join(
+                    TTSData, Annotation.tts_data_id == TTSData.id
+                ).filter(
+                    Annotation.annotator_id == target_annotator_obj.id
+                ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
+                initial_results = initial_query.all()
+                # Get total count for progress info (this is fast)
                 total_count = db.query(Annotation).filter(
                     Annotation.annotator_id == target_annotator_obj.id
                 ).count()
+                log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
                 # Process items with minimal data - validation status will be loaded on-demand
                 items = []
+                for annotation, filename, sentence in initial_results:
                     # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
                     annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
                         "validation_loaded": False  # Track if validation status has been loaded
                     })
+                # Find the first item that is not reviewed (prioritize non-deleted annotations)
+                initial_idx = 0
                 if items:
+                    found_unreviewed = False
+                    # First, try to find unreviewed non-deleted annotations
+                    for i, item_data in enumerate(items):
+                        if (item_data["validation_status"] == "Not Reviewed" and
+                            not item_data.get("is_deleted", False)):
+                            initial_idx = i
+                            found_unreviewed = True
+                            break
+                    # If no unreviewed non-deleted items, look for any unreviewed items
+                    if not found_unreviewed:
+                        for i, item_data in enumerate(items):
+                            if item_data["validation_status"].startswith("Not Reviewed"):
+                                initial_idx = i
+                                found_unreviewed = True
+                                break
+                    # If no unreviewed items at all, use the last item
+                    if not found_unreviewed:
+                        initial_idx = len(items) - 1 if items else 0
+                # Set initial display
+                if items:
+                    initial_item = items[initial_idx]
+                    review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} of {total_count} total items."
+                    # Ensure correct order of return values for 12 outputs
+                    # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
                     rejection_reason_val = ""
                     rejection_visible_val = False
                     if initial_item["validation_status"].startswith("Rejected"):
                     return (
                         items,
+                        initial_idx,
                         review_info_text,
                         str(initial_item["tts_id"]),
                         initial_item["filename"],
                     # Ensure correct order and number of return values for empty items (14 outputs)
                     return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+                # except Exception as e:
+                #     log.error(f"Error loading review items: {e}")
+                #     sentry_sdk.capture_exception(e)
+                #     gr.Error(f"Failed to load review data: {e}")
+                #     # Ensure correct order and number of return values for error case (14 outputs)
+                #     return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
         def show_current_review_item_fn(items, idx, session):
             if not items or idx >= len(items) or idx < 0:
                 # tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
                 db.commit()
                 log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
                 items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
                 # Show rejection reason input only if rejected, otherwise hide and clear

data/models.py CHANGED Viewed

@@ -158,23 +158,4 @@ class Validation(Base):
     validated_at = Column(DateTime, nullable=False)
     annotation = relationship("Annotation")
-    validator = relationship("Annotator", foreign_keys=[validator_id])  # Fixed: should reference Annotator
-# --------------------------------------------------------------------------- #
-#                                 UserProgress                                #
-# --------------------------------------------------------------------------- #
-class UserProgress(Base):
-    __tablename__ = "user_progress"
-    id = Column(Integer, primary_key=True)
-    user_id = Column(Integer, ForeignKey("annotators.id"), nullable=False)
-    target_annotator_id = Column(Integer, ForeignKey("annotators.id"), nullable=False)
-    last_reviewed_annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=True)
-    last_position = Column(Integer, default=0)  # Position in the review list
-    updated_at = Column(DateTime, nullable=False)
-    # Relationships
-    user = relationship("Annotator", foreign_keys=[user_id])
-    target_annotator = relationship("Annotator", foreign_keys=[target_annotator_id])
-    last_reviewed_annotation = relationship("Annotation", foreign_keys=[last_reviewed_annotation_id])

     validated_at = Column(DateTime, nullable=False)
     annotation = relationship("Annotation")
+    validator = relationship("Annotator", foreign_keys=[validator_id])  # Fixed: should reference Annotator

utils/database.py CHANGED Viewed

@@ -21,29 +21,20 @@ log = Logger()
 def get_db_engine():
     """Create DB engine with error handling for HF Spaces"""
-    try:
-        # SQLite doesn't support some MySQL-specific parameters
-        if conf.db_url.startswith("sqlite"):
-            engine = create_engine(
-                conf.db_url,
-                pool_pre_ping=True,
-                pool_size=5,
-                max_overflow=10,
-            )
-        else:
-            engine = create_engine(
-                conf.db_url,
-                pool_pre_ping=True,
-                pool_size=5,
-                max_overflow=10,
-                connect_args={"connect_timeout": 10},
-            )
-        log.info("Database engine created successfully")
-        return engine
-    except Exception as e:
-        log.error(f"Failed to create database engine: {e}")
-        sentry_sdk.capture_exception(e)
-        raise
 engine = get_db_engine()

 def get_db_engine():
     """Create DB engine with error handling for HF Spaces"""
+    # try:
+    engine = create_engine(
+        conf.db_url,
+        pool_pre_ping=True,
+        pool_size=5,
+        max_overflow=10,
+        connect_args={"connect_timeout": 10},
+    )
+    log.info("Database engine created successfully")
+    return engine
+    # except Exception as e:
+    #     log.error(f"Failed to create database engine: {e}")
+    #     sentry_sdk.capture_exception(e)
+    #     raise
 engine = get_db_engine()

utils/user_progress.py DELETED Viewed

@@ -1,113 +0,0 @@
-# utils/user_progress.py
-import datetime
-from sqlalchemy import and_
-from utils.database import get_db
-from data.models import UserProgress, Annotation, Validation, Annotator, TTSData
-from utils.logger import Logger
-log = Logger()
-def get_user_progress(user_id: int, target_annotator_id: int) -> UserProgress:
-    """Get or create user progress record for a specific target annotator"""
-    with get_db() as db:
-        progress = db.query(UserProgress).filter(
-            and_(
-                UserProgress.user_id == user_id,
-                UserProgress.target_annotator_id == target_annotator_id
-            )
-        ).first()
-        if not progress:
-            # Create new progress record
-            progress = UserProgress(
-                user_id=user_id,
-                target_annotator_id=target_annotator_id,
-                last_reviewed_annotation_id=None,
-                last_position=0,
-                updated_at=datetime.datetime.now()
-            )
-            db.add(progress)
-            db.commit()
-            log.info(f"Created new progress record for user {user_id} on target annotator {target_annotator_id}")
-        return progress
-def update_user_progress(user_id: int, target_annotator_id: int, annotation_id: int, position: int):
-    """Update user progress with the latest reviewed annotation"""
-    with get_db() as db:
-        progress = get_user_progress(user_id, target_annotator_id)
-        progress.last_reviewed_annotation_id = annotation_id
-        progress.last_position = position
-        progress.updated_at = datetime.datetime.now()
-        db.commit()
-        log.info(f"Updated progress for user {user_id}: annotation {annotation_id}, position {position}")
-def get_next_unreviewed_annotation(user_id: int, target_annotator_id: int) -> tuple[int, int]:
-    """
-    Get the next unreviewed annotation ID and its position in the review list.
-    Returns (annotation_id, position) or (None, 0) if no unreviewed items found.
-    """
-    with get_db() as db:
-        # Get all annotations for the target annotator, ordered by ID
-        annotations = db.query(Annotation).filter(
-            Annotation.annotator_id == target_annotator_id
-        ).order_by(Annotation.id).all()
-        if not annotations:
-            return None, 0
-        # Find the first annotation that hasn't been reviewed by this user
-        for position, annotation in enumerate(annotations):
-            # Check if this annotation has been validated by the current user
-            validation = db.query(Validation).filter(
-                and_(
-                    Validation.annotation_id == annotation.id,
-                    Validation.validator_id == user_id
-                )
-            ).first()
-            if not validation:
-                # This annotation hasn't been reviewed yet
-                return annotation.id, position
-        # All annotations have been reviewed
-        return None, len(annotations)
-def get_last_reviewed_position(user_id: int, target_annotator_id: int) -> int:
-    """Get the position of the last reviewed annotation by the user"""
-    with get_db() as db:
-        progress = db.query(UserProgress).filter(
-            and_(
-                UserProgress.user_id == user_id,
-                UserProgress.target_annotator_id == target_annotator_id
-            )
-        ).first()
-        if progress and progress.last_position is not None:
-            return progress.last_position
-        return 0
-def get_annotations_from_position(target_annotator_id: int, start_position: int, batch_size: int = 10) -> list:
-    """
-    Get annotations starting from a specific position with pagination.
-    Returns list of (annotation, filename, sentence) tuples.
-    """
-    with get_db() as db:
-        # Get annotations ordered by ID, starting from the specified position
-        annotations = db.query(
-            Annotation,
-            TTSData.filename,
-            TTSData.sentence
-        ).join(
-            TTSData, Annotation.tts_data_id == TTSData.id
-        ).filter(
-            Annotation.annotator_id == target_annotator_id
-        ).order_by(Annotation.id).offset(start_position).limit(batch_size).all()
-        return annotations