# components/review_dashboard_page.py import gradio as gr import datetime import sentry_sdk from sqlalchemy import orm from components.header import Header from utils.logger import Logger from utils.cloud_server_audio_loader import CloudServerAudioLoader from config import conf from utils.database import get_db from data.models import Annotation, TTSData, Annotator, Validation from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo log = Logger() LOADER = CloudServerAudioLoader(conf.FTP_URL) class ReviewDashboardPage: def __init__(self) -> None: with gr.Column(visible=False) as self.container: self.header = Header() self.load_trigger = gr.Number(value=0, visible=False) # Add this hidden trigger # Review info banner with gr.Row(): self.review_info = gr.Markdown("", elem_classes="review-banner") with gr.Row(): # Left Column - Review Content with gr.Column(scale=3): with gr.Row(): self.tts_id = gr.Textbox(label="ID", interactive=False, scale=1) self.filename = gr.Textbox(label="Filename", interactive=False, scale=3) self.sentence = gr.Textbox( label="Original Sentence", interactive=False, max_lines=5, rtl=True ) self.ann_sentence = gr.Textbox( label="Annotated Sentence (by Original Annotator)", interactive=False, max_lines=5, rtl=True ) with gr.Row(): # self.annotator_name = gr.Textbox(label="Original Annotator", interactive=False, scale=1) # Removed for anonymization self.annotated_at = gr.Textbox(label="Annotated At", interactive=False, scale=2) # Review Actions with gr.Row(): self.btn_approve = gr.Button("✅ Approve", variant="primary", min_width=120) self.btn_reject = gr.Button("❌ Reject", variant="stop", min_width=120) self.btn_skip = gr.Button("⏭️ Skip (No Decision)", min_width=150) # Navigation with gr.Row(): self.btn_prev = gr.Button("⬅️ Previous", min_width=120) self.btn_next = gr.Button("Next ➡️", min_width=120) # Jump controls with gr.Row(): self.jump_data_id_input = gr.Number( label="Jump to ID", value=None, precision=0, interactive=True, min_width=120 ) self.btn_jump = gr.Button("Go to ID", min_width=70) # Right Column - Audio with gr.Column(scale=2): self.btn_load_voice = gr.Button("Load Audio & Play", min_width=150) self.audio = gr.Audio( label="🔊 Audio", interactive=False, autoplay=True ) # Review status display with gr.Group(): gr.Markdown("### Review Status") self.current_validation_status = gr.Textbox( label="Current Status", interactive=False ) self.rejection_reason_input = gr.Textbox( label="Rejection Reason", placeholder="Enter reason and press Enter or click away...", interactive=True, visible=False, max_lines=3, elem_id="rejection_reason_input" # Added elem_id for clarity ) # State variables self.items_state = gr.State([]) self.idx_state = gr.State(0) self.original_audio_state = gr.State(None) self.rejection_mode_active = gr.State(False) # Track if waiting for rejection reason # List of interactive UI elements for enabling/disabling self.interactive_ui_elements = [ self.btn_prev, self.btn_next, self.btn_approve, self.btn_reject, self.btn_skip, self.btn_jump, self.jump_data_id_input, self.btn_load_voice ] def register_callbacks(self, login_page, session_state: gr.State, root_blocks: gr.Blocks): self.header.register_callbacks(login_page, self, session_state) def update_ui_interactive_state(is_interactive: bool): updates = [] for elem in self.interactive_ui_elements: if elem == self.btn_load_voice and not is_interactive: updates.append(gr.update(value="⏳ Loading Audio...", interactive=False)) elif elem == self.btn_load_voice and is_interactive: updates.append(gr.update(value="Load Audio & Play", interactive=True)) else: updates.append(gr.update(interactive=is_interactive)) return updates def download_voice_fn(filename_to_load): if not filename_to_load: return None, None, gr.update(value=None, autoplay=False) # try: log.info(f"Downloading voice for review: {filename_to_load}") # Show progress to user # gr.Info(f"Loading audio file: {filename_to_load}") sr, wav = LOADER.load_audio(filename_to_load) log.info(f"Successfully loaded audio: {filename_to_load} (SR: {sr}, Length: {len(wav)} samples)") # gr.Info(f"✅ Audio loaded successfully!") return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=True) # except TimeoutError as e: # log.error(f"Audio download timeout for {filename_to_load}: {e}") # sentry_sdk.capture_exception(e) # raise # except ConnectionError as e: # log.error(f"Audio download connection error for {filename_to_load}: {e}") # sentry_sdk.capture_exception(e) # gr.Error(f"🌐 Connection error loading audio: {filename_to_load}. Please check your internet connection.") # return None, None, gr.update(value=None, autoplay=False) # except FileNotFoundError as e: # log.error(f"Audio file not found for {filename_to_load}: {e}") # sentry_sdk.capture_exception(e) # gr.Error(f"📁 Audio file not found: {filename_to_load}") # return None, None, gr.update(value=None, autoplay=False) # except Exception as e: # log.error(f"Audio download failed for {filename_to_load}: {e}") # sentry_sdk.capture_exception(e) # gr.Error(f"❌ Failed to load audio: {filename_to_load}. Error: {e}") # return None, None, gr.update(value=None, autoplay=False) def get_validation_status_for_item(db, annotation_id, user_id, annotation_obj): """Get validation status for a specific item - called on-demand""" validation = db.query(Validation).filter_by( annotation_id=annotation_id, validator_id=user_id ).first() # Check if annotation is deleted is_deleted = not annotation_obj.annotated_sentence or annotation_obj.annotated_sentence.strip() == "" validation_status = "Not Reviewed" if validation: if validation.validated: validation_status = "Approved" else: validation_status = "Rejected" if validation.description: validation_status += f" ({validation.description})" # For deleted annotations, show special status if is_deleted and validation_status == "Not Reviewed": validation_status = "Not Reviewed (Deleted)" return validation_status, is_deleted def load_review_items_fn(session): user_id = session.get("user_id") username = session.get("username") if not user_id or not username: log.warning("load_review_items_fn: user not found in session") return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject") # Check if user is in Phase 2 (should be a reviewer) if username not in conf.REVIEW_MAPPING.values(): log.warning(f"User {username} is not assigned as a reviewer") return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject") # Find which annotator this user should review target_annotator = None for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items(): if reviewer_name == username: target_annotator = annotator_name break if not target_annotator: log.warning(f"No target annotator found for reviewer {username}") return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject") # Load annotations from target annotator with FAST INITIAL LOADING with get_db() as db: # Get target annotator's ID target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first() if not target_annotator_obj: log.error(f"Target annotator {target_annotator} not found in database") return [], 0, f"Review Target Error: Annotator '{target_annotator}' not found.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject") log.info(f"Found target annotator with ID: {target_annotator_obj.id}") # FAST INITIAL QUERY: Load only essential data without complex validation processing # Reduced batch size for instant loading in HuggingFace spaces INITIAL_BATCH_SIZE = 5 # Load only 5 items initially for instant response # Simple query to get basic annotation data quickly initial_query = db.query( Annotation, TTSData.filename, TTSData.sentence ).join( TTSData, Annotation.tts_data_id == TTSData.id ).filter( Annotation.annotator_id == target_annotator_obj.id ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE) initial_results = initial_query.all() # Get total count for progress info (this is fast) total_count = db.query(Annotation).filter( Annotation.annotator_id == target_annotator_obj.id ).count() log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}") # Process items with minimal data - validation status will be loaded on-demand items = [] for annotation, filename, sentence in initial_results: # Check if annotation is deleted (minimal processing) is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == "" annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence items.append({ "annotation_id": annotation.id, "tts_id": annotation.tts_data_id, "filename": filename, "sentence": sentence, "annotated_sentence": annotated_sentence_display, "is_deleted": is_deleted, "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "", "validation_status": "Loading...", # Will be loaded on-demand "validation_loaded": False # Track if validation status has been loaded }) # Find the first item that is not reviewed (prioritize non-deleted annotations) initial_idx = 0 if items: found_unreviewed = False # First, try to find unreviewed non-deleted annotations for i, item_data in enumerate(items): if (item_data["validation_status"] == "Not Reviewed" and not item_data.get("is_deleted", False)): initial_idx = i found_unreviewed = True break # If no unreviewed non-deleted items, look for any unreviewed items if not found_unreviewed: for i, item_data in enumerate(items): if item_data["validation_status"].startswith("Not Reviewed"): initial_idx = i found_unreviewed = True break # If no unreviewed items at all, use the last item if not found_unreviewed: initial_idx = len(items) - 1 if items else 0 # Set initial display if items: initial_item = items[initial_idx] review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} of {total_count} total items." # Ensure correct order of return values for 12 outputs # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update rejection_reason_val = "" rejection_visible_val = False if initial_item["validation_status"].startswith("Rejected"): start_paren = initial_item["validation_status"].find("(") end_paren = initial_item["validation_status"].find(")") if start_paren != -1 and end_paren != -1: rejection_reason_val = initial_item["validation_status"][start_paren+1:end_paren] rejection_visible_val = True return ( items, initial_idx, review_info_text, str(initial_item["tts_id"]), initial_item["filename"], initial_item["sentence"], initial_item["annotated_sentence"], initial_item["annotated_at"], initial_item["validation_status"], "", # Placeholder for the original annotator name (maps to header.welcome) gr.update(value=None, autoplay=False), # audio_update gr.update(visible=rejection_visible_val, value=rejection_reason_val), # rejection_reason_input update False, # Reset rejection mode gr.update(value="❌ Reject") # Reset reject button ) else: # Ensure correct order and number of return values for empty items (14 outputs) return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject") # except Exception as e: # log.error(f"Error loading review items: {e}") # sentry_sdk.capture_exception(e) # gr.Error(f"Failed to load review data: {e}") # # Ensure correct order and number of return values for error case (14 outputs) # return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject") def show_current_review_item_fn(items, idx, session): if not items or idx >= len(items) or idx < 0: # tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update return "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject") current_item = items[idx] # Load validation status on-demand if not already loaded if not current_item.get("validation_loaded", False): user_id = session.get("user_id") if user_id: with get_db() as db: try: # Get the full annotation object for validation processing annotation_obj = db.query(Annotation).filter_by(id=current_item["annotation_id"]).first() if annotation_obj: validation_status, is_deleted = get_validation_status_for_item(db, current_item["annotation_id"], user_id, annotation_obj) current_item["validation_status"] = validation_status current_item["is_deleted"] = is_deleted current_item["validation_loaded"] = True # Update displayed annotation if deleted if is_deleted: current_item["annotated_sentence"] = "[DELETED ANNOTATION]" log.info(f"Loaded validation status for item {idx}: {validation_status}") except Exception as e: log.error(f"Error loading validation status for item {idx}: {e}") current_item["validation_status"] = "Error loading status" rejection_reason = "" rejection_visible = False # Check if this is a deleted annotation is_deleted = current_item.get("is_deleted", False) if current_item["validation_status"].startswith("Rejected"): # Extract reason from status like "Rejected (reason)" or just use empty if no parenthesis start_paren = current_item["validation_status"].find("(") end_paren = current_item["validation_status"].find(")") if start_paren != -1 and end_paren != -1: rejection_reason = current_item["validation_status"][start_paren+1:end_paren] rejection_visible = True return ( str(current_item["tts_id"]), current_item["filename"], current_item["sentence"], current_item["annotated_sentence"], current_item["annotated_at"], current_item["validation_status"], "", # Placeholder for annotator_name gr.update(value=None, autoplay=False), gr.update(visible=rejection_visible, value=rejection_reason), False, # Reset rejection mode gr.update(value="❌ Reject") # Reset reject button text ) def navigate_review_fn(items, current_idx, direction): if not items: return 0 if direction == "next": new_idx = min(current_idx + 1, len(items) - 1) # Check if we're getting close to the end - load more items if needed if new_idx >= len(items) - 2 and len(items) % 5 == 0: # Near end and items is a multiple of initial batch size log.info(f"User is near end of loaded items ({new_idx}/{len(items)}), may need to load more items") return new_idx else: # prev return max(current_idx - 1, 0) def save_validation_fn(items, idx, session, approved: bool, rejection_reason: str = ""): if not items or idx >= len(items): gr.Error("Invalid item index") return items, "Error: Invalid item index", gr.update(visible=False) user_id = session.get("user_id") if not user_id: gr.Error("User not logged in") return items, "Error: User not logged in", gr.update(visible=False) current_item = items[idx] annotation_id = current_item["annotation_id"] log.info(f"Saving validation for annotation_id: {annotation_id}, validator_id: {user_id}, approved: {approved}, reason: {rejection_reason}") with get_db() as db: # try: existing_validation = db.query(Validation).filter_by( annotation_id=annotation_id, validator_id=user_id ).first() if existing_validation: log.info(f"Updating existing validation for annotation_id: {annotation_id}") existing_validation.validated = approved existing_validation.description = rejection_reason if not approved else None existing_validation.validated_at = datetime.datetime.utcnow() else: log.info(f"Creating new validation for annotation_id: {annotation_id}") new_validation = Validation( annotation_id=annotation_id, validator_id=user_id, validated=approved, description=rejection_reason if not approved else None, validated_at=datetime.datetime.utcnow(), ) db.add(new_validation) db.commit() log.info(f"Validation saved successfully for annotation_id: {annotation_id}") items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected" # Show rejection reason input only if rejected, otherwise hide and clear rejection_input_update = gr.update(visible=not approved, value="" if approved else rejection_reason) return items, items[idx]["validation_status"], rejection_input_update # except Exception as e: # db.rollback() # log.error(f"Error saving validation: {e}") # sentry_sdk.capture_exception(e) # gr.Error(f"Failed to save validation: {e}") # return items, current_item["validation_status"], gr.update(visible=False) # Return original status and hide input on error def handle_rejection_fn(items, idx, session, rejection_reason, rejection_mode_active): """Handle rejection button click - two-step process""" if not items or idx >= len(items): return items, "Error: Invalid item", gr.update(visible=False), False, gr.update(value="❌ Reject") current_item = items[idx] current_status = current_item["validation_status"] if not rejection_mode_active: # First click - show rejection reason input and change button text return ( items, # items unchanged current_status, # Keep current validation status gr.update(visible=True, value=""), # Show rejection reason input, clear any existing value True, # Set rejection mode active gr.update(value="⚠️ Confirm Reject") # Change button text ) else: # Second click - validate reason and save if provided if not rejection_reason or rejection_reason.strip() == "": gr.Warning("Rejection reason cannot be empty. Please provide a reason before confirming rejection.") return ( items, # items unchanged current_status, # Keep current validation status gr.update(visible=True, value=rejection_reason), # Keep input visible True, # Keep rejection mode active gr.update(value="⚠️ Confirm Reject") # Keep button text ) else: # Save the rejection with reason updated_items, validation_status, rejection_input_update = save_validation_fn( items, idx, session, approved=False, rejection_reason=rejection_reason.strip() ) return ( updated_items, validation_status, gr.update(visible=False, value=""), # Hide rejection input after successful save False, # Reset rejection mode gr.update(value="❌ Reject") # Reset button text ) def jump_by_data_id_fn(items, target_data_id, current_idx): if not target_data_id: return current_idx # try: target_id = int(target_data_id) for i, item in enumerate(items): if item["tts_id"] == target_id: return i gr.Warning(f"Data ID {target_id} not found in review items") # except ValueError: # sentry_sdk.capture_exception() # gr.Warning(f"Invalid Data ID format: {target_data_id}") return current_idx def load_more_items_fn(items, session, current_batch_size=100): """Load more items when user needs them (pagination support)""" user_id = session.get("user_id") username = session.get("username") if not user_id or not username: return items # Return existing items if no user session # Find target annotator target_annotator = None for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items(): if reviewer_name == username: target_annotator = annotator_name break if not target_annotator: return items with get_db() as db: target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first() if not target_annotator_obj: return items # Load next batch starting from where we left off offset = len(items) query = db.query( Annotation, TTSData.filename, TTSData.sentence, Validation.validated, Validation.description ).join( TTSData, Annotation.tts_data_id == TTSData.id ).outerjoin( Validation, (Validation.annotation_id == Annotation.id) & (Validation.validator_id == user_id) ).filter( Annotation.annotator_id == target_annotator_obj.id ).order_by(Annotation.id).offset(offset).limit(current_batch_size) results = query.all() # Process new items same as before new_items = [] for annotation, filename, sentence, validated, validation_description in results: is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == "" validation_status = "Not Reviewed" if validated is not None: if validated: validation_status = "Approved" else: validation_status = "Rejected" if validation_description: validation_status += f" ({validation_description})" if is_deleted: annotated_sentence_display = "[DELETED ANNOTATION]" if validation_status == "Not Reviewed": validation_status = "Not Reviewed (Deleted)" else: annotated_sentence_display = annotation.annotated_sentence new_items.append({ "annotation_id": annotation.id, "tts_id": annotation.tts_data_id, "filename": filename, "sentence": sentence, "annotated_sentence": annotated_sentence_display, "is_deleted": is_deleted, "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "", "validation_status": validation_status }) # Combine with existing items all_items = items + new_items log.info(f"Loaded {len(new_items)} more items, total now: {len(all_items)}") return all_items # Output definitions review_display_outputs = [ self.tts_id, self.filename, self.sentence, self.ann_sentence, self.annotated_at, self.current_validation_status, self.header.welcome, # Placeholder for anonymized annotator name self.audio, self.rejection_reason_input, # Added rejection reason input to display outputs self.rejection_mode_active, # Added rejection mode state self.btn_reject # Added reject button to display outputs ] # Trigger data loading when load_trigger changes (after successful login for a reviewer) self.load_trigger.change( fn=lambda: update_ui_interactive_state(False), outputs=self.interactive_ui_elements ).then( fn=load_review_items_fn, inputs=[session_state], outputs=[self.items_state, self.idx_state, self.review_info] + review_display_outputs ).then( fn=lambda: (None, gr.update(value=None)), # Clear audio state outputs=[self.original_audio_state, self.audio] ).then( fn=lambda: update_ui_interactive_state(True), outputs=self.interactive_ui_elements ) # Audio loading is now manual only via the Load Audio button # Removed automatic filename.change callback to prevent slow loading during initialization # Navigation buttons for btn, direction in [(self.btn_prev, "prev"), (self.btn_next, "next")]: btn.click( fn=lambda: update_ui_interactive_state(False), outputs=self.interactive_ui_elements ).then( fn=navigate_review_fn, inputs=[self.items_state, self.idx_state, gr.State(direction)], outputs=self.idx_state ).then( fn=show_current_review_item_fn, inputs=[self.items_state, self.idx_state, session_state], outputs=review_display_outputs ).then( lambda: gr.update(value=None), outputs=self.jump_data_id_input ).then( fn=lambda: update_ui_interactive_state(True), outputs=self.interactive_ui_elements ) # Approve/Reject buttons self.btn_approve.click( fn=lambda items, idx, session: save_validation_fn(items, idx, session, approved=True, rejection_reason=""), # Pass empty rejection_reason inputs=[self.items_state, self.idx_state, session_state], outputs=[self.items_state, self.current_validation_status, self.rejection_reason_input] ).then( fn=lambda: False, # Reset rejection mode outputs=[self.rejection_mode_active] ).then( fn=lambda: gr.update(value="❌ Reject"), # Reset reject button outputs=[self.btn_reject] ).then( fn=lambda items, idx: navigate_review_fn(items, idx, "next"), inputs=[self.items_state, self.idx_state], outputs=[self.idx_state] ).then( fn=show_current_review_item_fn, inputs=[self.items_state, self.idx_state, session_state], outputs=review_display_outputs ) self.btn_reject.click( fn=handle_rejection_fn, inputs=[self.items_state, self.idx_state, session_state, self.rejection_reason_input, self.rejection_mode_active], outputs=[self.items_state, self.current_validation_status, self.rejection_reason_input, self.rejection_mode_active, self.btn_reject] ).then( fn=lambda items, idx, rejection_mode: navigate_review_fn(items, idx, "next") if not rejection_mode else idx, inputs=[self.items_state, self.idx_state, self.rejection_mode_active], outputs=[self.idx_state] ).then( fn=lambda items, idx, session, rejection_mode: show_current_review_item_fn(items, idx, session) if not rejection_mode else ( str(items[idx]["tts_id"]) if items and idx < len(items) else "", items[idx]["filename"] if items and idx < len(items) else "", items[idx]["sentence"] if items and idx < len(items) else "", items[idx]["annotated_sentence"] if items and idx < len(items) else "", items[idx]["annotated_at"] if items and idx < len(items) else "", items[idx]["validation_status"] if items and idx < len(items) else "", "", # annotator placeholder gr.update(value=None, autoplay=False), # audio gr.update(), # rejection_reason_input - don't change rejection_mode, # keep rejection mode as is gr.update() # btn_reject - don't change ), inputs=[self.items_state, self.idx_state, session_state, self.rejection_mode_active], outputs=review_display_outputs ) # Skip button (just navigate to next) self.btn_skip.click( fn=navigate_review_fn, inputs=[self.items_state, self.idx_state, gr.State("next")], outputs=self.idx_state ).then( fn=show_current_review_item_fn, inputs=[self.items_state, self.idx_state, session_state], outputs=review_display_outputs ) # Jump button self.btn_jump.click( fn=jump_by_data_id_fn, inputs=[self.items_state, self.jump_data_id_input, self.idx_state], outputs=self.idx_state ).then( fn=show_current_review_item_fn, inputs=[self.items_state, self.idx_state, session_state], outputs=review_display_outputs ).then( lambda: gr.update(value=None), outputs=self.jump_data_id_input ) # Load audio button self.btn_load_voice.click( fn=lambda: update_ui_interactive_state(False), outputs=self.interactive_ui_elements ).then( fn=download_voice_fn, inputs=[self.filename], outputs=[self.audio, self.original_audio_state, self.audio] ).then( fn=lambda: update_ui_interactive_state(True), outputs=self.interactive_ui_elements ) return self.container