vargha commited on
Commit
79f51f8
·
1 Parent(s): e4c3230

Resume Feature Implementation

Browse files
Files changed (1) hide show
  1. components/review_dashboard_page.py +59 -106
components/review_dashboard_page.py CHANGED
@@ -278,48 +278,6 @@ class ReviewDashboardPage:
278
  log.error(f"Error calculating review progress for user {user_id}: {e}")
279
  return f"⚠️ **Error calculating progress**"
280
 
281
- def find_first_unreviewed_annotation_index(db, target_annotator_obj, reviewer_user_id):
282
- """
283
- Find the index (0-based) of the first annotation that hasn't been reviewed by the current reviewer.
284
- Returns the global index within all annotations for the target annotator.
285
- Uses the same JOIN as load_review_items_fn to ensure consistent indexing.
286
- """
287
- try:
288
- # Query to find the first annotation that doesn't have a validation record from this reviewer
289
- # Use the same JOIN as load_review_items_fn to ensure we're counting the same set of annotations
290
- first_unreviewed = db.query(Annotation).join(
291
- TTSData, Annotation.tts_data_id == TTSData.id
292
- ).outerjoin(
293
- Validation,
294
- (Annotation.id == Validation.annotation_id) &
295
- (Validation.validator_id == reviewer_user_id)
296
- ).filter(
297
- Annotation.annotator_id == target_annotator_obj.id,
298
- Validation.id.is_(None) # No validation record exists
299
- ).order_by(Annotation.id).first()
300
-
301
- if first_unreviewed:
302
- # Find the index of this annotation among all valid annotations (with TTSData) for the target annotator
303
- # Use the same query structure as load_review_items_fn
304
- all_annotations = db.query(Annotation).join(
305
- TTSData, Annotation.tts_data_id == TTSData.id
306
- ).filter(
307
- Annotation.annotator_id == target_annotator_obj.id
308
- ).order_by(Annotation.id).all()
309
-
310
- for idx, annotation in enumerate(all_annotations):
311
- if annotation.id == first_unreviewed.id:
312
- log.info(f"Found first unreviewed annotation at index {idx} (ID: {first_unreviewed.id})")
313
- return idx
314
-
315
- # If no unreviewed annotation found, return -1
316
- log.info("No unreviewed annotations found")
317
- return -1
318
-
319
- except Exception as e:
320
- log.error(f"Error finding first unreviewed annotation: {e}")
321
- return -1
322
-
323
  def load_review_items_fn(session):
324
  user_id = session.get("user_id")
325
  username = session.get("username")
@@ -344,7 +302,7 @@ class ReviewDashboardPage:
344
  log.warning(f"No target annotator found for reviewer {username}")
345
  return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
346
 
347
- # Load annotations from target annotator with RESUME LOGIC
348
  with get_db() as db:
349
  # Get target annotator's ID
350
  target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
@@ -354,59 +312,33 @@ class ReviewDashboardPage:
354
 
355
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
356
 
357
- # --- RESUME LOGIC: Find first unreviewed annotation ---
358
- resume_index = find_first_unreviewed_annotation_index(db, target_annotator_obj, user_id)
 
359
 
360
- # Get total count for progress info
361
- total_count = db.query(Annotation).filter(
 
 
 
 
 
 
362
  Annotation.annotator_id == target_annotator_obj.id
363
- ).count()
364
 
365
- # Calculate loading strategy based on resume logic
366
- INITIAL_BATCH_SIZE = 10 # Standard batch size
367
- CONTEXT_ITEMS = 1 # Number of reviewed items to include for context
368
- load_offset = 0 # Initialize load_offset
369
 
370
- if resume_index >= 0:
371
- # Load items starting from around the first unreviewed annotation
372
- # Include some context (already reviewed items) before the unreviewed one
373
- load_offset = max(0, resume_index - CONTEXT_ITEMS)
374
- items_to_load = INITIAL_BATCH_SIZE
375
-
376
- # Load items starting from the calculated offset
377
- query = db.query(
378
- Annotation,
379
- TTSData.filename,
380
- TTSData.sentence
381
- ).join(
382
- TTSData, Annotation.tts_data_id == TTSData.id
383
- ).filter(
384
- Annotation.annotator_id == target_annotator_obj.id
385
- ).order_by(Annotation.id).offset(load_offset).limit(items_to_load)
386
-
387
- log.info(f"Resume mode: Loading {items_to_load} items starting from offset {load_offset} (first unreviewed at global index {resume_index})")
388
- else:
389
- # No unreviewed items found, load standard batch from the end
390
- load_offset = max(0, total_count - INITIAL_BATCH_SIZE)
391
- query = db.query(
392
- Annotation,
393
- TTSData.filename,
394
- TTSData.sentence
395
- ).join(
396
- TTSData, Annotation.tts_data_id == TTSData.id
397
- ).filter(
398
- Annotation.annotator_id == target_annotator_obj.id
399
- ).order_by(Annotation.id).offset(load_offset).limit(INITIAL_BATCH_SIZE)
400
-
401
- log.info(f"All reviewed mode: Loading {INITIAL_BATCH_SIZE} items from offset {load_offset}")
402
 
403
- results = query.all()
404
-
405
- log.info(f"Loaded {len(results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
406
 
407
  # Process items with minimal data - validation status will be loaded on-demand
408
  items = []
409
- for annotation, filename, sentence in results:
410
  # Check if annotation is deleted (minimal processing)
411
  is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
412
  annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
@@ -423,27 +355,48 @@ class ReviewDashboardPage:
423
  "validation_loaded": False # Track if validation status has been loaded
424
  })
425
 
426
- # --- Calculate initial index based on resume logic ---
427
  initial_idx = 0
428
  if items:
429
- if resume_index >= 0:
430
- # Calculate the local index within loaded items
431
- # The first unreviewed annotation should be at: resume_index - load_offset
432
- local_resume_index = resume_index - load_offset
433
- if 0 <= local_resume_index < len(items):
434
- initial_idx = local_resume_index
435
- log.info(f"User '{username}' resuming at first unreviewed item, local index: {initial_idx} (global index: {resume_index}, annotation ID: {items[initial_idx]['annotation_id']})")
436
- else:
437
- # Fallback to first item if calculation is off
438
- initial_idx = 0
439
- log.warning(f"Resume index calculation off, starting at first loaded item. Local index: {local_resume_index}, loaded items: {len(items)}")
440
- else:
441
- # All items reviewed, start from the last item
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  initial_idx = len(items) - 1 if items else 0
443
- if items:
444
- log.info(f"User '{username}' has all items reviewed, starting at last item index: {initial_idx} (annotation ID: {items[initial_idx]['annotation_id']})")
445
- else:
446
- log.info(f"User '{username}' has no items assigned, starting at index 0.")
447
 
448
  # Set initial display
449
  if items:
 
278
  log.error(f"Error calculating review progress for user {user_id}: {e}")
279
  return f"⚠️ **Error calculating progress**"
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  def load_review_items_fn(session):
282
  user_id = session.get("user_id")
283
  username = session.get("username")
 
302
  log.warning(f"No target annotator found for reviewer {username}")
303
  return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
304
 
305
+ # Load annotations from target annotator with FAST INITIAL LOADING
306
  with get_db() as db:
307
  # Get target annotator's ID
308
  target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
 
312
 
313
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
314
 
315
+ # FAST INITIAL QUERY: Load only essential data without complex validation processing
316
+ # Reduced batch size for instant loading in HuggingFace spaces
317
+ INITIAL_BATCH_SIZE = 5 # Load only 5 items initially for instant response
318
 
319
+ # Simple query to get basic annotation data quickly
320
+ initial_query = db.query(
321
+ Annotation,
322
+ TTSData.filename,
323
+ TTSData.sentence
324
+ ).join(
325
+ TTSData, Annotation.tts_data_id == TTSData.id
326
+ ).filter(
327
  Annotation.annotator_id == target_annotator_obj.id
328
+ ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
329
 
330
+ initial_results = initial_query.all()
 
 
 
331
 
332
+ # Get total count for progress info (this is fast)
333
+ total_count = db.query(Annotation).filter(
334
+ Annotation.annotator_id == target_annotator_obj.id
335
+ ).count()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
+ log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
 
 
338
 
339
  # Process items with minimal data - validation status will be loaded on-demand
340
  items = []
341
+ for annotation, filename, sentence in initial_results:
342
  # Check if annotation is deleted (minimal processing)
343
  is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
344
  annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
 
355
  "validation_loaded": False # Track if validation status has been loaded
356
  })
357
 
358
+ # Find the first item that is not reviewed (prioritize non-deleted annotations)
359
  initial_idx = 0
360
  if items:
361
+ found_unreviewed = False
362
+
363
+ # Check database for validation status to find first unreviewed item
364
+ for i, item_data in enumerate(items):
365
+ # Check if this annotation has been validated by current user
366
+ existing_validation = db.query(Validation).filter_by(
367
+ annotation_id=item_data["annotation_id"],
368
+ validator_id=user_id
369
+ ).first()
370
+
371
+ # If no validation exists, this is unreviewed
372
+ if not existing_validation:
373
+ # Prioritize non-deleted annotations
374
+ if not item_data.get("is_deleted", False):
375
+ initial_idx = i
376
+ found_unreviewed = True
377
+ log.info(f"Found first unreviewed non-deleted item at index {i} (annotation_id: {item_data['annotation_id']})")
378
+ break
379
+
380
+ # If no unreviewed non-deleted items found, look for any unreviewed items (including deleted)
381
+ if not found_unreviewed:
382
+ for i, item_data in enumerate(items):
383
+ existing_validation = db.query(Validation).filter_by(
384
+ annotation_id=item_data["annotation_id"],
385
+ validator_id=user_id
386
+ ).first()
387
+
388
+ if not existing_validation:
389
+ initial_idx = i
390
+ found_unreviewed = True
391
+ log.info(f"Found first unreviewed item at index {i} (annotation_id: {item_data['annotation_id']}) - may be deleted")
392
+ break
393
+
394
+ # If all items have been reviewed, continue from the end
395
+ if not found_unreviewed:
396
  initial_idx = len(items) - 1 if items else 0
397
+ log.info(f"All loaded items have been reviewed, starting from index {initial_idx}")
398
+
399
+ log.info(f"Set initial review index to {initial_idx} out of {len(items)} loaded items")
 
400
 
401
  # Set initial display
402
  if items: