vargha commited on
Commit
aad2e16
Β·
1 Parent(s): 1a410ee

revert automated progress bar

Browse files
components/review_dashboard_page.py CHANGED
@@ -12,7 +12,6 @@ from config import conf
12
  from utils.database import get_db
13
  from data.models import Annotation, TTSData, Annotator, Validation
14
  from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
15
- from utils.user_progress import get_user_progress, update_user_progress, get_next_unreviewed_annotation, get_annotations_from_position
16
 
17
  log = Logger()
18
  LOADER = CloudServerAudioLoader(conf.FTP_URL)
@@ -303,7 +302,7 @@ class ReviewDashboardPage:
303
  log.warning(f"No target annotator found for reviewer {username}")
304
  return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
305
 
306
- # Load annotations from target annotator with PROGRESS-AWARE LOADING
307
  with get_db() as db:
308
  # Get target annotator's ID
309
  target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
@@ -313,34 +312,33 @@ class ReviewDashboardPage:
313
 
314
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
315
 
316
- # Get the next unreviewed annotation position for this user
317
- next_annotation_id, next_position = get_next_unreviewed_annotation(user_id, target_annotator_obj.id)
 
318
 
319
- if next_annotation_id is None:
320
- # All annotations have been reviewed
321
- total_count = db.query(Annotation).filter(
322
- Annotation.annotator_id == target_annotator_obj.id
323
- ).count()
324
- return [], 0, f"πŸŽ‰ **All Done!** You have reviewed all {total_count} annotations for {target_annotator}.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
325
-
326
- # Load annotations starting from the next unreviewed position
327
- INITIAL_BATCH_SIZE = 10 # Load 10 items starting from where user left off
328
- annotations_data = get_annotations_from_position(
329
- target_annotator_obj.id,
330
- next_position,
331
- INITIAL_BATCH_SIZE
332
- )
333
 
334
- # Get total count for progress info
335
  total_count = db.query(Annotation).filter(
336
  Annotation.annotator_id == target_annotator_obj.id
337
  ).count()
338
 
339
- log.info(f"Progress-aware load: Starting from position {next_position}, loaded {len(annotations_data)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
340
 
341
  # Process items with minimal data - validation status will be loaded on-demand
342
  items = []
343
- for annotation, filename, sentence in annotations_data:
344
  # Check if annotation is deleted (minimal processing)
345
  is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
346
  annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
@@ -357,12 +355,36 @@ class ReviewDashboardPage:
357
  "validation_loaded": False # Track if validation status has been loaded
358
  })
359
 
360
- # Set initial display - start from the first item (which should be unreviewed)
 
361
  if items:
362
- initial_item = items[0] # Always start from the first item in the loaded batch
363
- review_info_text = f"πŸ” **Phase 2 Review Mode** - Continuing from where you left off. Loaded {len(items)} items starting from position {next_position + 1} of {total_count} total items."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
- # Ensure correct order of return values for 14 outputs
 
 
 
 
 
 
 
 
 
366
  rejection_reason_val = ""
367
  rejection_visible_val = False
368
  if initial_item["validation_status"].startswith("Rejected"):
@@ -374,7 +396,7 @@ class ReviewDashboardPage:
374
 
375
  return (
376
  items,
377
- 0, # Always start from index 0 of the loaded batch
378
  review_info_text,
379
  str(initial_item["tts_id"]),
380
  initial_item["filename"],
@@ -392,6 +414,13 @@ class ReviewDashboardPage:
392
  # Ensure correct order and number of return values for empty items (14 outputs)
393
  return [], 0, f"πŸ” **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
394
 
 
 
 
 
 
 
 
395
  def show_current_review_item_fn(items, idx, session):
396
  if not items or idx >= len(items) or idx < 0:
397
  # tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
@@ -520,32 +549,6 @@ class ReviewDashboardPage:
520
  db.commit()
521
  log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
522
 
523
- # Update user progress tracking
524
- try:
525
- # Find target annotator for this user
526
- username = session.get("username")
527
- target_annotator = None
528
- for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
529
- if reviewer_name == username:
530
- target_annotator = annotator_name
531
- break
532
-
533
- if target_annotator:
534
- target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
535
- if target_annotator_obj:
536
- # Calculate the current position in the review list
537
- current_position = db.query(Annotation).filter(
538
- Annotation.annotator_id == target_annotator_obj.id,
539
- Annotation.id <= annotation_id
540
- ).count() - 1 # Convert to 0-based index
541
-
542
- # Update user progress
543
- update_user_progress(user_id, target_annotator_obj.id, annotation_id, current_position)
544
- log.info(f"Updated user progress: user {user_id} at position {current_position} for annotation {annotation_id}")
545
- except Exception as e:
546
- log.warning(f"Failed to update user progress: {e}")
547
- # Don't fail the validation save if progress tracking fails
548
-
549
  items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
550
 
551
  # Show rejection reason input only if rejected, otherwise hide and clear
 
12
  from utils.database import get_db
13
  from data.models import Annotation, TTSData, Annotator, Validation
14
  from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
 
15
 
16
  log = Logger()
17
  LOADER = CloudServerAudioLoader(conf.FTP_URL)
 
302
  log.warning(f"No target annotator found for reviewer {username}")
303
  return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
304
 
305
+ # Load annotations from target annotator with FAST INITIAL LOADING
306
  with get_db() as db:
307
  # Get target annotator's ID
308
  target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
 
312
 
313
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
314
 
315
+ # FAST INITIAL QUERY: Load only essential data without complex validation processing
316
+ # Reduced batch size for instant loading in HuggingFace spaces
317
+ INITIAL_BATCH_SIZE = 5 # Load only 5 items initially for instant response
318
 
319
+ # Simple query to get basic annotation data quickly
320
+ initial_query = db.query(
321
+ Annotation,
322
+ TTSData.filename,
323
+ TTSData.sentence
324
+ ).join(
325
+ TTSData, Annotation.tts_data_id == TTSData.id
326
+ ).filter(
327
+ Annotation.annotator_id == target_annotator_obj.id
328
+ ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
329
+
330
+ initial_results = initial_query.all()
 
 
331
 
332
+ # Get total count for progress info (this is fast)
333
  total_count = db.query(Annotation).filter(
334
  Annotation.annotator_id == target_annotator_obj.id
335
  ).count()
336
 
337
+ log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
338
 
339
  # Process items with minimal data - validation status will be loaded on-demand
340
  items = []
341
+ for annotation, filename, sentence in initial_results:
342
  # Check if annotation is deleted (minimal processing)
343
  is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
344
  annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
 
355
  "validation_loaded": False # Track if validation status has been loaded
356
  })
357
 
358
+ # Find the first item that is not reviewed (prioritize non-deleted annotations)
359
+ initial_idx = 0
360
  if items:
361
+ found_unreviewed = False
362
+ # First, try to find unreviewed non-deleted annotations
363
+ for i, item_data in enumerate(items):
364
+ if (item_data["validation_status"] == "Not Reviewed" and
365
+ not item_data.get("is_deleted", False)):
366
+ initial_idx = i
367
+ found_unreviewed = True
368
+ break
369
+
370
+ # If no unreviewed non-deleted items, look for any unreviewed items
371
+ if not found_unreviewed:
372
+ for i, item_data in enumerate(items):
373
+ if item_data["validation_status"].startswith("Not Reviewed"):
374
+ initial_idx = i
375
+ found_unreviewed = True
376
+ break
377
 
378
+ # If no unreviewed items at all, use the last item
379
+ if not found_unreviewed:
380
+ initial_idx = len(items) - 1 if items else 0
381
+
382
+ # Set initial display
383
+ if items:
384
+ initial_item = items[initial_idx]
385
+ review_info_text = f"πŸ” **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} of {total_count} total items."
386
+ # Ensure correct order of return values for 12 outputs
387
+ # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
388
  rejection_reason_val = ""
389
  rejection_visible_val = False
390
  if initial_item["validation_status"].startswith("Rejected"):
 
396
 
397
  return (
398
  items,
399
+ initial_idx,
400
  review_info_text,
401
  str(initial_item["tts_id"]),
402
  initial_item["filename"],
 
414
  # Ensure correct order and number of return values for empty items (14 outputs)
415
  return [], 0, f"πŸ” **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
416
 
417
+ # except Exception as e:
418
+ # log.error(f"Error loading review items: {e}")
419
+ # sentry_sdk.capture_exception(e)
420
+ # gr.Error(f"Failed to load review data: {e}")
421
+ # # Ensure correct order and number of return values for error case (14 outputs)
422
+ # return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
423
+
424
  def show_current_review_item_fn(items, idx, session):
425
  if not items or idx >= len(items) or idx < 0:
426
  # tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
 
549
  db.commit()
550
  log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
551
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
  items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
553
 
554
  # Show rejection reason input only if rejected, otherwise hide and clear
data/models.py CHANGED
@@ -158,23 +158,4 @@ class Validation(Base):
158
  validated_at = Column(DateTime, nullable=False)
159
 
160
  annotation = relationship("Annotation")
161
- validator = relationship("Annotator", foreign_keys=[validator_id]) # Fixed: should reference Annotator
162
-
163
-
164
- # --------------------------------------------------------------------------- #
165
- # UserProgress #
166
- # --------------------------------------------------------------------------- #
167
- class UserProgress(Base):
168
- __tablename__ = "user_progress"
169
-
170
- id = Column(Integer, primary_key=True)
171
- user_id = Column(Integer, ForeignKey("annotators.id"), nullable=False)
172
- target_annotator_id = Column(Integer, ForeignKey("annotators.id"), nullable=False)
173
- last_reviewed_annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=True)
174
- last_position = Column(Integer, default=0) # Position in the review list
175
- updated_at = Column(DateTime, nullable=False)
176
-
177
- # Relationships
178
- user = relationship("Annotator", foreign_keys=[user_id])
179
- target_annotator = relationship("Annotator", foreign_keys=[target_annotator_id])
180
- last_reviewed_annotation = relationship("Annotation", foreign_keys=[last_reviewed_annotation_id])
 
158
  validated_at = Column(DateTime, nullable=False)
159
 
160
  annotation = relationship("Annotation")
161
+ validator = relationship("Annotator", foreign_keys=[validator_id]) # Fixed: should reference Annotator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/database.py CHANGED
@@ -21,29 +21,20 @@ log = Logger()
21
 
22
  def get_db_engine():
23
  """Create DB engine with error handling for HF Spaces"""
24
- try:
25
- # SQLite doesn't support some MySQL-specific parameters
26
- if conf.db_url.startswith("sqlite"):
27
- engine = create_engine(
28
- conf.db_url,
29
- pool_pre_ping=True,
30
- pool_size=5,
31
- max_overflow=10,
32
- )
33
- else:
34
- engine = create_engine(
35
- conf.db_url,
36
- pool_pre_ping=True,
37
- pool_size=5,
38
- max_overflow=10,
39
- connect_args={"connect_timeout": 10},
40
- )
41
- log.info("Database engine created successfully")
42
- return engine
43
- except Exception as e:
44
- log.error(f"Failed to create database engine: {e}")
45
- sentry_sdk.capture_exception(e)
46
- raise
47
 
48
 
49
  engine = get_db_engine()
 
21
 
22
  def get_db_engine():
23
  """Create DB engine with error handling for HF Spaces"""
24
+ # try:
25
+ engine = create_engine(
26
+ conf.db_url,
27
+ pool_pre_ping=True,
28
+ pool_size=5,
29
+ max_overflow=10,
30
+ connect_args={"connect_timeout": 10},
31
+ )
32
+ log.info("Database engine created successfully")
33
+ return engine
34
+ # except Exception as e:
35
+ # log.error(f"Failed to create database engine: {e}")
36
+ # sentry_sdk.capture_exception(e)
37
+ # raise
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  engine = get_db_engine()
utils/user_progress.py DELETED
@@ -1,113 +0,0 @@
1
- # utils/user_progress.py
2
-
3
- import datetime
4
- from sqlalchemy import and_
5
- from utils.database import get_db
6
- from data.models import UserProgress, Annotation, Validation, Annotator, TTSData
7
- from utils.logger import Logger
8
-
9
- log = Logger()
10
-
11
-
12
- def get_user_progress(user_id: int, target_annotator_id: int) -> UserProgress:
13
- """Get or create user progress record for a specific target annotator"""
14
- with get_db() as db:
15
- progress = db.query(UserProgress).filter(
16
- and_(
17
- UserProgress.user_id == user_id,
18
- UserProgress.target_annotator_id == target_annotator_id
19
- )
20
- ).first()
21
-
22
- if not progress:
23
- # Create new progress record
24
- progress = UserProgress(
25
- user_id=user_id,
26
- target_annotator_id=target_annotator_id,
27
- last_reviewed_annotation_id=None,
28
- last_position=0,
29
- updated_at=datetime.datetime.now()
30
- )
31
- db.add(progress)
32
- db.commit()
33
- log.info(f"Created new progress record for user {user_id} on target annotator {target_annotator_id}")
34
-
35
- return progress
36
-
37
-
38
- def update_user_progress(user_id: int, target_annotator_id: int, annotation_id: int, position: int):
39
- """Update user progress with the latest reviewed annotation"""
40
- with get_db() as db:
41
- progress = get_user_progress(user_id, target_annotator_id)
42
- progress.last_reviewed_annotation_id = annotation_id
43
- progress.last_position = position
44
- progress.updated_at = datetime.datetime.now()
45
- db.commit()
46
- log.info(f"Updated progress for user {user_id}: annotation {annotation_id}, position {position}")
47
-
48
-
49
- def get_next_unreviewed_annotation(user_id: int, target_annotator_id: int) -> tuple[int, int]:
50
- """
51
- Get the next unreviewed annotation ID and its position in the review list.
52
- Returns (annotation_id, position) or (None, 0) if no unreviewed items found.
53
- """
54
- with get_db() as db:
55
- # Get all annotations for the target annotator, ordered by ID
56
- annotations = db.query(Annotation).filter(
57
- Annotation.annotator_id == target_annotator_id
58
- ).order_by(Annotation.id).all()
59
-
60
- if not annotations:
61
- return None, 0
62
-
63
- # Find the first annotation that hasn't been reviewed by this user
64
- for position, annotation in enumerate(annotations):
65
- # Check if this annotation has been validated by the current user
66
- validation = db.query(Validation).filter(
67
- and_(
68
- Validation.annotation_id == annotation.id,
69
- Validation.validator_id == user_id
70
- )
71
- ).first()
72
-
73
- if not validation:
74
- # This annotation hasn't been reviewed yet
75
- return annotation.id, position
76
-
77
- # All annotations have been reviewed
78
- return None, len(annotations)
79
-
80
-
81
- def get_last_reviewed_position(user_id: int, target_annotator_id: int) -> int:
82
- """Get the position of the last reviewed annotation by the user"""
83
- with get_db() as db:
84
- progress = db.query(UserProgress).filter(
85
- and_(
86
- UserProgress.user_id == user_id,
87
- UserProgress.target_annotator_id == target_annotator_id
88
- )
89
- ).first()
90
-
91
- if progress and progress.last_position is not None:
92
- return progress.last_position
93
- return 0
94
-
95
-
96
- def get_annotations_from_position(target_annotator_id: int, start_position: int, batch_size: int = 10) -> list:
97
- """
98
- Get annotations starting from a specific position with pagination.
99
- Returns list of (annotation, filename, sentence) tuples.
100
- """
101
- with get_db() as db:
102
- # Get annotations ordered by ID, starting from the specified position
103
- annotations = db.query(
104
- Annotation,
105
- TTSData.filename,
106
- TTSData.sentence
107
- ).join(
108
- TTSData, Annotation.tts_data_id == TTSData.id
109
- ).filter(
110
- Annotation.annotator_id == target_annotator_id
111
- ).order_by(Annotation.id).offset(start_position).limit(batch_size).all()
112
-
113
- return annotations