vargha commited on
Commit
8bec8f1
·
1 Parent(s): 71830dc

automated progress bar

Browse files
components/review_dashboard_page.py CHANGED
@@ -12,6 +12,7 @@ from config import conf
12
  from utils.database import get_db
13
  from data.models import Annotation, TTSData, Annotator, Validation
14
  from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
 
15
 
16
  log = Logger()
17
  LOADER = CloudServerAudioLoader(conf.FTP_URL)
@@ -302,7 +303,7 @@ class ReviewDashboardPage:
302
  log.warning(f"No target annotator found for reviewer {username}")
303
  return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
304
 
305
- # Load ALL annotations from target annotator to properly implement "continue from where left off"
306
  with get_db() as db:
307
  # Get target annotator's ID
308
  target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
@@ -312,70 +313,56 @@ class ReviewDashboardPage:
312
 
313
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
314
 
315
- # Load ALL annotations to properly implement resume functionality
316
- # This is similar to phase 1 dashboard approach
317
- all_annotations_query = db.query(
318
- Annotation,
319
- TTSData.filename,
320
- TTSData.sentence
321
- ).join(
322
- TTSData, Annotation.tts_data_id == TTSData.id
323
- ).filter(
 
 
 
 
 
 
 
 
 
 
 
324
  Annotation.annotator_id == target_annotator_obj.id
325
- ).order_by(Annotation.id)
326
 
327
- all_results = all_annotations_query.all()
328
- total_count = len(all_results)
329
 
330
- log.info(f"Loaded {total_count} annotations for target annotator ID {target_annotator_obj.id}")
331
-
332
- # Process all items and determine validation status immediately
333
  items = []
334
- for annotation, filename, sentence in all_results:
335
- # Check if annotation is deleted
336
  is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
337
  annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
338
 
339
- # Get validation status immediately (not on-demand)
340
- validation_status, is_deleted_updated = get_validation_status_for_item(db, annotation.id, user_id, annotation)
341
-
342
  items.append({
343
  "annotation_id": annotation.id,
344
  "tts_id": annotation.tts_data_id,
345
  "filename": filename,
346
  "sentence": sentence,
347
  "annotated_sentence": annotated_sentence_display,
348
- "is_deleted": is_deleted_updated,
349
  "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
350
- "validation_status": validation_status,
351
- "validation_loaded": True # Already loaded
352
  })
353
 
354
- # --- Resume Logic: Find first unreviewed item (similar to phase 1 dashboard) ---
355
- initial_idx = 0
356
  if items:
357
- first_unreviewed_idx = -1
358
- for i, item_data in enumerate(items):
359
- if item_data["validation_status"] == "Not Reviewed":
360
- first_unreviewed_idx = i
361
- break
362
 
363
- if first_unreviewed_idx != -1:
364
- initial_idx = first_unreviewed_idx
365
- log.info(f"Resuming at first unreviewed item, index: {initial_idx} (ID: {items[initial_idx]['tts_id']})")
366
- else: # All items are reviewed
367
- initial_idx = len(items) - 1
368
- log.info(f"All items reviewed, starting at last item, index: {initial_idx} (ID: {items[initial_idx]['tts_id']})")
369
- else: # No items assigned
370
- initial_idx = 0
371
- log.info("No items assigned to user, starting at index 0.")
372
-
373
- # Set initial display
374
- if items:
375
- initial_item = items[initial_idx]
376
- review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} items."
377
  # Ensure correct order of return values for 14 outputs
378
- # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
379
  rejection_reason_val = ""
380
  rejection_visible_val = False
381
  if initial_item["validation_status"].startswith("Rejected"):
@@ -387,7 +374,7 @@ class ReviewDashboardPage:
387
 
388
  return (
389
  items,
390
- initial_idx,
391
  review_info_text,
392
  str(initial_item["tts_id"]),
393
  initial_item["filename"],
@@ -405,13 +392,6 @@ class ReviewDashboardPage:
405
  # Ensure correct order and number of return values for empty items (14 outputs)
406
  return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
407
 
408
- # except Exception as e:
409
- # log.error(f"Error loading review items: {e}")
410
- # sentry_sdk.capture_exception(e)
411
- # gr.Error(f"Failed to load review data: {e}")
412
- # # Ensure correct order and number of return values for error case (14 outputs)
413
- # return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
414
-
415
  def show_current_review_item_fn(items, idx, session):
416
  if not items or idx >= len(items) or idx < 0:
417
  # tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
@@ -419,7 +399,28 @@ class ReviewDashboardPage:
419
 
420
  current_item = items[idx]
421
 
422
- # Validation status is already loaded, no need for on-demand loading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
  rejection_reason = ""
425
  rejection_visible = False
@@ -452,19 +453,29 @@ class ReviewDashboardPage:
452
  def update_review_info_fn(items, total_count):
453
  """Update the review info banner with current loaded items count"""
454
  if items:
455
- return f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} items."
456
  else:
457
  return f"🔍 **Phase 2 Review Mode** - No annotations found for review."
458
 
459
  def navigate_and_load_fn(items, current_idx, direction, session):
460
- """Simple navigation function - all items are already loaded"""
461
  if not items:
462
  return items, 0, ""
463
 
464
  # Navigate
465
  if direction == "next":
466
  new_idx = min(current_idx + 1, len(items) - 1)
467
- return items, new_idx, "" # No review info update needed
 
 
 
 
 
 
 
 
 
 
468
  else: # prev
469
  new_idx = max(current_idx - 1, 0)
470
  return items, new_idx, "" # No review info update needed
@@ -509,6 +520,32 @@ class ReviewDashboardPage:
509
  db.commit()
510
  log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
  items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
513
 
514
  # Show rejection reason input only if rejected, otherwise hide and clear
@@ -578,7 +615,73 @@ class ReviewDashboardPage:
578
  # gr.Warning(f"Invalid Data ID format: {target_data_id}")
579
  return current_idx
580
 
581
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
 
583
  # Output definitions
584
  review_display_outputs = [
 
12
  from utils.database import get_db
13
  from data.models import Annotation, TTSData, Annotator, Validation
14
  from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
15
+ from utils.user_progress import get_user_progress, update_user_progress, get_next_unreviewed_annotation, get_annotations_from_position
16
 
17
  log = Logger()
18
  LOADER = CloudServerAudioLoader(conf.FTP_URL)
 
303
  log.warning(f"No target annotator found for reviewer {username}")
304
  return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
305
 
306
+ # Load annotations from target annotator with PROGRESS-AWARE LOADING
307
  with get_db() as db:
308
  # Get target annotator's ID
309
  target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
 
313
 
314
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
315
 
316
+ # Get the next unreviewed annotation position for this user
317
+ next_annotation_id, next_position = get_next_unreviewed_annotation(user_id, target_annotator_obj.id)
318
+
319
+ if next_annotation_id is None:
320
+ # All annotations have been reviewed
321
+ total_count = db.query(Annotation).filter(
322
+ Annotation.annotator_id == target_annotator_obj.id
323
+ ).count()
324
+ return [], 0, f"🎉 **All Done!** You have reviewed all {total_count} annotations for {target_annotator}.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
325
+
326
+ # Load annotations starting from the next unreviewed position
327
+ INITIAL_BATCH_SIZE = 10 # Load 10 items starting from where user left off
328
+ annotations_data = get_annotations_from_position(
329
+ target_annotator_obj.id,
330
+ next_position,
331
+ INITIAL_BATCH_SIZE
332
+ )
333
+
334
+ # Get total count for progress info
335
+ total_count = db.query(Annotation).filter(
336
  Annotation.annotator_id == target_annotator_obj.id
337
+ ).count()
338
 
339
+ log.info(f"Progress-aware load: Starting from position {next_position}, loaded {len(annotations_data)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
 
340
 
341
+ # Process items with minimal data - validation status will be loaded on-demand
 
 
342
  items = []
343
+ for annotation, filename, sentence in annotations_data:
344
+ # Check if annotation is deleted (minimal processing)
345
  is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
346
  annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
347
 
 
 
 
348
  items.append({
349
  "annotation_id": annotation.id,
350
  "tts_id": annotation.tts_data_id,
351
  "filename": filename,
352
  "sentence": sentence,
353
  "annotated_sentence": annotated_sentence_display,
354
+ "is_deleted": is_deleted,
355
  "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
356
+ "validation_status": "Loading...", # Will be loaded on-demand
357
+ "validation_loaded": False # Track if validation status has been loaded
358
  })
359
 
360
+ # Set initial display - start from the first item (which should be unreviewed)
 
361
  if items:
362
+ initial_item = items[0] # Always start from the first item in the loaded batch
363
+ review_info_text = f"🔍 **Phase 2 Review Mode** - Continuing from where you left off. Loaded {len(items)} items starting from position {next_position + 1} of {total_count} total items."
 
 
 
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  # Ensure correct order of return values for 14 outputs
 
366
  rejection_reason_val = ""
367
  rejection_visible_val = False
368
  if initial_item["validation_status"].startswith("Rejected"):
 
374
 
375
  return (
376
  items,
377
+ 0, # Always start from index 0 of the loaded batch
378
  review_info_text,
379
  str(initial_item["tts_id"]),
380
  initial_item["filename"],
 
392
  # Ensure correct order and number of return values for empty items (14 outputs)
393
  return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
394
 
 
 
 
 
 
 
 
395
  def show_current_review_item_fn(items, idx, session):
396
  if not items or idx >= len(items) or idx < 0:
397
  # tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
 
399
 
400
  current_item = items[idx]
401
 
402
+ # Load validation status on-demand if not already loaded
403
+ if not current_item.get("validation_loaded", False):
404
+ user_id = session.get("user_id")
405
+ if user_id:
406
+ with get_db() as db:
407
+ try:
408
+ # Get the full annotation object for validation processing
409
+ annotation_obj = db.query(Annotation).filter_by(id=current_item["annotation_id"]).first()
410
+ if annotation_obj:
411
+ validation_status, is_deleted = get_validation_status_for_item(db, current_item["annotation_id"], user_id, annotation_obj)
412
+ current_item["validation_status"] = validation_status
413
+ current_item["is_deleted"] = is_deleted
414
+ current_item["validation_loaded"] = True
415
+
416
+ # Update displayed annotation if deleted
417
+ if is_deleted:
418
+ current_item["annotated_sentence"] = "[DELETED ANNOTATION]"
419
+
420
+ log.info(f"Loaded validation status for item {idx}: {validation_status}")
421
+ except Exception as e:
422
+ log.error(f"Error loading validation status for item {idx}: {e}")
423
+ current_item["validation_status"] = "Error loading status"
424
 
425
  rejection_reason = ""
426
  rejection_visible = False
 
453
  def update_review_info_fn(items, total_count):
454
  """Update the review info banner with current loaded items count"""
455
  if items:
456
+ return f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} of {total_count} total items."
457
  else:
458
  return f"🔍 **Phase 2 Review Mode** - No annotations found for review."
459
 
460
  def navigate_and_load_fn(items, current_idx, direction, session):
461
+ """Combined navigation and loading function"""
462
  if not items:
463
  return items, 0, ""
464
 
465
  # Navigate
466
  if direction == "next":
467
  new_idx = min(current_idx + 1, len(items) - 1)
468
+ # Only load more items when user reaches the LAST item of a batch
469
+ should_load_more = (new_idx == len(items) - 1 and len(items) % 5 == 0)
470
+ if should_load_more:
471
+ log.info(f"User reached end of loaded items ({new_idx}/{len(items)}), will load more items")
472
+ # Load more items
473
+ updated_items, total_count = load_more_items_fn(items, session, current_batch_size=10)
474
+ # Update review info with new count
475
+ review_info = update_review_info_fn(updated_items, total_count)
476
+ return updated_items, new_idx, review_info
477
+ else:
478
+ return items, new_idx, "" # No review info update needed
479
  else: # prev
480
  new_idx = max(current_idx - 1, 0)
481
  return items, new_idx, "" # No review info update needed
 
520
  db.commit()
521
  log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
522
 
523
+ # Update user progress tracking
524
+ try:
525
+ # Find target annotator for this user
526
+ username = session.get("username")
527
+ target_annotator = None
528
+ for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
529
+ if reviewer_name == username:
530
+ target_annotator = annotator_name
531
+ break
532
+
533
+ if target_annotator:
534
+ target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
535
+ if target_annotator_obj:
536
+ # Calculate the current position in the review list
537
+ current_position = db.query(Annotation).filter(
538
+ Annotation.annotator_id == target_annotator_obj.id,
539
+ Annotation.id <= annotation_id
540
+ ).count() - 1 # Convert to 0-based index
541
+
542
+ # Update user progress
543
+ update_user_progress(user_id, target_annotator_obj.id, annotation_id, current_position)
544
+ log.info(f"Updated user progress: user {user_id} at position {current_position} for annotation {annotation_id}")
545
+ except Exception as e:
546
+ log.warning(f"Failed to update user progress: {e}")
547
+ # Don't fail the validation save if progress tracking fails
548
+
549
  items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
550
 
551
  # Show rejection reason input only if rejected, otherwise hide and clear
 
615
  # gr.Warning(f"Invalid Data ID format: {target_data_id}")
616
  return current_idx
617
 
618
+ def load_more_items_fn(items, session, current_batch_size=10):
619
+ """Load more items when user needs them (pagination support)"""
620
+ user_id = session.get("user_id")
621
+ username = session.get("username")
622
+
623
+ if not user_id or not username:
624
+ return items, 0 # Return existing items if no user session
625
+
626
+ # Find target annotator
627
+ target_annotator = None
628
+ for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
629
+ if reviewer_name == username:
630
+ target_annotator = annotator_name
631
+ break
632
+
633
+ if not target_annotator:
634
+ return items, 0
635
+
636
+ with get_db() as db:
637
+ target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
638
+ if not target_annotator_obj:
639
+ return items, 0
640
+
641
+ # Get total count for updated review info
642
+ total_count = db.query(Annotation).filter(
643
+ Annotation.annotator_id == target_annotator_obj.id
644
+ ).count()
645
+
646
+ # Load next batch starting from where we left off
647
+ offset = len(items)
648
+
649
+ # FAST LOADING: Use same strategy as initial load - simple query without complex JOINs
650
+ query = db.query(
651
+ Annotation,
652
+ TTSData.filename,
653
+ TTSData.sentence
654
+ ).join(
655
+ TTSData, Annotation.tts_data_id == TTSData.id
656
+ ).filter(
657
+ Annotation.annotator_id == target_annotator_obj.id
658
+ ).order_by(Annotation.id).offset(offset).limit(current_batch_size)
659
+
660
+ results = query.all()
661
+
662
+ # Process new items with minimal data - validation status loaded on-demand
663
+ new_items = []
664
+ for annotation, filename, sentence in results:
665
+ # Check if annotation is deleted (minimal processing)
666
+ is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
667
+ annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
668
+
669
+ new_items.append({
670
+ "annotation_id": annotation.id,
671
+ "tts_id": annotation.tts_data_id,
672
+ "filename": filename,
673
+ "sentence": sentence,
674
+ "annotated_sentence": annotated_sentence_display,
675
+ "is_deleted": is_deleted,
676
+ "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
677
+ "validation_status": "Loading...", # Will be loaded on-demand
678
+ "validation_loaded": False # Track if validation status has been loaded
679
+ })
680
+
681
+ # Combine with existing items
682
+ all_items = items + new_items
683
+ log.info(f"Loaded {len(new_items)} more items, total now: {len(all_items)}")
684
+ return all_items, total_count
685
 
686
  # Output definitions
687
  review_display_outputs = [
data/models.py CHANGED
@@ -158,4 +158,23 @@ class Validation(Base):
158
  validated_at = Column(DateTime, nullable=False)
159
 
160
  annotation = relationship("Annotation")
161
- validator = relationship("Annotator", foreign_keys=[validator_id]) # Fixed: should reference Annotator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  validated_at = Column(DateTime, nullable=False)
159
 
160
  annotation = relationship("Annotation")
161
+ validator = relationship("Annotator", foreign_keys=[validator_id]) # Fixed: should reference Annotator
162
+
163
+
164
+ # --------------------------------------------------------------------------- #
165
+ # UserProgress #
166
+ # --------------------------------------------------------------------------- #
167
+ class UserProgress(Base):
168
+ __tablename__ = "user_progress"
169
+
170
+ id = Column(Integer, primary_key=True)
171
+ user_id = Column(Integer, ForeignKey("annotators.id"), nullable=False)
172
+ target_annotator_id = Column(Integer, ForeignKey("annotators.id"), nullable=False)
173
+ last_reviewed_annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=True)
174
+ last_position = Column(Integer, default=0) # Position in the review list
175
+ updated_at = Column(DateTime, nullable=False)
176
+
177
+ # Relationships
178
+ user = relationship("Annotator", foreign_keys=[user_id])
179
+ target_annotator = relationship("Annotator", foreign_keys=[target_annotator_id])
180
+ last_reviewed_annotation = relationship("Annotation", foreign_keys=[last_reviewed_annotation_id])
utils/database.py CHANGED
@@ -21,20 +21,29 @@ log = Logger()
21
 
22
  def get_db_engine():
23
  """Create DB engine with error handling for HF Spaces"""
24
- # try:
25
- engine = create_engine(
26
- conf.db_url,
27
- pool_pre_ping=True,
28
- pool_size=5,
29
- max_overflow=10,
30
- connect_args={"connect_timeout": 10},
31
- )
32
- log.info("Database engine created successfully")
33
- return engine
34
- # except Exception as e:
35
- # log.error(f"Failed to create database engine: {e}")
36
- # sentry_sdk.capture_exception(e)
37
- # raise
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  engine = get_db_engine()
 
21
 
22
  def get_db_engine():
23
  """Create DB engine with error handling for HF Spaces"""
24
+ try:
25
+ # SQLite doesn't support some MySQL-specific parameters
26
+ if conf.db_url.startswith("sqlite"):
27
+ engine = create_engine(
28
+ conf.db_url,
29
+ pool_pre_ping=True,
30
+ pool_size=5,
31
+ max_overflow=10,
32
+ )
33
+ else:
34
+ engine = create_engine(
35
+ conf.db_url,
36
+ pool_pre_ping=True,
37
+ pool_size=5,
38
+ max_overflow=10,
39
+ connect_args={"connect_timeout": 10},
40
+ )
41
+ log.info("Database engine created successfully")
42
+ return engine
43
+ except Exception as e:
44
+ log.error(f"Failed to create database engine: {e}")
45
+ sentry_sdk.capture_exception(e)
46
+ raise
47
 
48
 
49
  engine = get_db_engine()