Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on Jun 12

Commit

86cf81a

1 Parent(s): 1000353

sentry intergration

Browse files

Files changed (10) hide show

app.py +16 -5
components/dashboard_page.py +14 -8
components/review_dashboard_page.py +172 -165
config.py +6 -0
data/repository/annotator_repo.py +41 -37
requirements.txt +2 -1
utils/database.py +30 -27
utils/logger.py +31 -1
utils/security.py +13 -13
utils/sentry_integration.py +298 -0

app.py CHANGED Viewed

@@ -1,6 +1,17 @@
 import gradio as gr
 from pathlib import Path
 from utils.logger import Logger
 from components.login_page import LoginPage
 from components.dashboard_page import DashboardPage
@@ -44,8 +55,8 @@ def build_app() -> gr.Blocks:
 if __name__ == "__main__":
-    # try:
-    log.info("Launching App ...")
-    build_app().launch()
-    # except Exception as err:
-    #     log.error(err)

 import gradio as gr
 from pathlib import Path
+# Initialize Sentry first (before other imports)
+try:
+    from utils.sentry_integration import initialize_sentry
+    sentry_initialized = initialize_sentry()
+    if sentry_initialized:
+        print("✅ Sentry monitoring enabled")
+    else:
+        print("⚠️ Sentry monitoring disabled (no DSN configured)")
+except Exception as e:
+    raise
 from utils.logger import Logger
 from components.login_page import LoginPage
 from components.dashboard_page import DashboardPage
 if __name__ == "__main__":
+    try:
+        log.info("Launching App ...")
+        build_app().launch()
+    except Exception as err:
+        raise

components/dashboard_page.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import numpy as np
 import datetime
 from sqlalchemy import orm, func # Added func for count
 from components.header import Header
@@ -185,7 +186,8 @@ class DashboardPage:
                         return f"Annotation Progress: {completed_count}/{total_assigned} labeled"
                 except Exception as e:
                     log.error(f"Error fetching progress for user {user_id}: {e}")
-                    return "Annotation Progress: Error" # Added label
         def download_voice_fn(filename_to_load, autoplay_on_load=True): # Autoplay here is for the btn_load_voice click
             if not filename_to_load:
@@ -196,8 +198,8 @@ class DashboardPage:
                 return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=autoplay_on_load)
             except Exception as e:
                 log.error(f"GDrive download failed for {filename_to_load}: {e}")
-                gr.Error(f"Failed to load audio: {filename_to_load}. Error: {e}")
-                return None, None, gr.update(value=None, autoplay=False)
         def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, applied_trims_list):
             annotator_id = session.get("user_id")
@@ -260,8 +262,8 @@ class DashboardPage:
                 except Exception as e:
                     db.rollback()
                     log.error(f"Failed to save annotation for {current_tts_id}: {e}") # Removed exc_info=True
-                    gr.Error(f"Save failed: {e}")
-                    # Removed 'return False'
         def show_current_item_fn(items, idx, session):
             initial_trims_list_sec = []
@@ -298,7 +300,8 @@ class DashboardPage:
                                 initial_trims_df_data = self._convert_trims_to_df_data(initial_trims_list_sec)
                     except Exception as e:
                         log.error(f"DB error in show_current_item_fn for TTS ID {tts_data_id}: {e}") # Removed exc_info=True
-                        gr.Error(f"Error loading annotation details: {e}")
             return (
                 current_item.get("id", ""), current_item.get("filename", ""),
@@ -363,7 +366,8 @@ class DashboardPage:
                     except Exception as e:
                         log.error(f"Failed to load items or determine resume index for user {user_name}: {e}")
-                        gr.Error(f"Could not load your assigned data: {e}")
             initial_ui_values_tuple = show_current_item_fn(items_to_load, initial_idx, sess)
             progress_str = get_user_progress_fn(sess)
@@ -377,6 +381,7 @@ class DashboardPage:
                     if item_dict.get("id") == target_id: return i
                 gr.Warning(f"Data ID {target_id} not found in your assigned items.")
             except ValueError:
                 gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
             return current_idx
@@ -441,7 +446,8 @@ class DashboardPage:
                     except Exception as e:
                         db.rollback()
                         log.error(f"Error deleting annotation from DB for {tts_data_id_to_clear}: {e}") # Removed exc_info=True
-                        gr.Error(f"Failed to delete annotation from database: {e}")
             else:
                  gr.Error("Cannot clear/delete annotation from DB: Missing TTS ID or User ID.")

 import gradio as gr
 import numpy as np
 import datetime
+import sentry_sdk
 from sqlalchemy import orm, func # Added func for count
 from components.header import Header
                         return f"Annotation Progress: {completed_count}/{total_assigned} labeled"
                 except Exception as e:
                     log.error(f"Error fetching progress for user {user_id}: {e}")
+                    sentry_sdk.capture_exception(e)
+                    raise
         def download_voice_fn(filename_to_load, autoplay_on_load=True): # Autoplay here is for the btn_load_voice click
             if not filename_to_load:
                 return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=autoplay_on_load)
             except Exception as e:
                 log.error(f"GDrive download failed for {filename_to_load}: {e}")
+                sentry_sdk.capture_exception(e)
+                raise
         def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, applied_trims_list):
             annotator_id = session.get("user_id")
                 except Exception as e:
                     db.rollback()
                     log.error(f"Failed to save annotation for {current_tts_id}: {e}") # Removed exc_info=True
+                    sentry_sdk.capture_exception(e)
+                    raise
         def show_current_item_fn(items, idx, session):
             initial_trims_list_sec = []
                                 initial_trims_df_data = self._convert_trims_to_df_data(initial_trims_list_sec)
                     except Exception as e:
                         log.error(f"DB error in show_current_item_fn for TTS ID {tts_data_id}: {e}") # Removed exc_info=True
+                        sentry_sdk.capture_exception(e)
+                        raise
             return (
                 current_item.get("id", ""), current_item.get("filename", ""),
                     except Exception as e:
                         log.error(f"Failed to load items or determine resume index for user {user_name}: {e}")
+                        sentry_sdk.capture_exception(e)
+                        raise
             initial_ui_values_tuple = show_current_item_fn(items_to_load, initial_idx, sess)
             progress_str = get_user_progress_fn(sess)
                     if item_dict.get("id") == target_id: return i
                 gr.Warning(f"Data ID {target_id} not found in your assigned items.")
             except ValueError:
+                sentry_sdk.capture_exception()
                 gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
             return current_idx
                     except Exception as e:
                         db.rollback()
                         log.error(f"Error deleting annotation from DB for {tts_data_id_to_clear}: {e}") # Removed exc_info=True
+                        sentry_sdk.capture_exception(e)
+                        raise
             else:
                  gr.Error("Cannot clear/delete annotation from DB: Missing TTS ID or User ID.")

components/review_dashboard_page.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import gradio as gr
 import datetime
 from sqlalchemy import orm
 from components.header import Header
@@ -132,18 +133,21 @@ class ReviewDashboardPage:
                 return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=True)
             except TimeoutError as e:
                 log.error(f"Audio download timeout for {filename_to_load}: {e}")
-                gr.Error(f"⏱️ Timeout loading audio: {filename_to_load}. The server may be slow or unreachable.")
-                return None, None, gr.update(value=None, autoplay=False)
             except ConnectionError as e:
                 log.error(f"Audio download connection error for {filename_to_load}: {e}")
                 gr.Error(f"🌐 Connection error loading audio: {filename_to_load}. Please check your internet connection.")
                 return None, None, gr.update(value=None, autoplay=False)
             except FileNotFoundError as e:
                 log.error(f"Audio file not found for {filename_to_load}: {e}")
                 gr.Error(f"📁 Audio file not found: {filename_to_load}")
                 return None, None, gr.update(value=None, autoplay=False)
             except Exception as e:
                 log.error(f"Audio download failed for {filename_to_load}: {e}")
                 gr.Error(f"❌ Failed to load audio: {filename_to_load}. Error: {e}")
                 return None, None, gr.update(value=None, autoplay=False)
@@ -174,134 +178,135 @@ class ReviewDashboardPage:
             # Load annotations from target annotator
             with get_db() as db:
-                try:
-                    # Get target annotator's ID
-                    target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
-                    if not target_annotator_obj:
-                        log.error(f"Target annotator {target_annotator} not found in database")
-                        return [], 0, f"Review Target Error: Annotator '{target_annotator}' not found.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
-                    log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
-                    # Get all annotations by target annotator (including deleted ones)
-                    annotations = db.query(Annotation).join(TTSData).filter(
-                        Annotation.annotator_id == target_annotator_obj.id
-                    ).options(
-                        orm.joinedload(Annotation.tts_data),
-                        orm.joinedload(Annotation.annotator)
-                    ).order_by(Annotation.id).all() # Added order_by for consistency
-                    log.info(f"Fetched {len(annotations)} annotations for target annotator ID {target_annotator_obj.id}")
-                    items = []
-                    for annotation in annotations:
-                        # Check if annotation is deleted (no annotated_sentence or empty)
-                        is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
-                        # Check if this annotation has been reviewed by current user
-                        existing_validation = db.query(Validation).filter_by(
-                            annotation_id=annotation.id,
-                            validator_id=user_id
-                        ).first()
-                        validation_status = "Not Reviewed"
-                        rejection_reason_val = "" # For the input box
-                        rejection_visible_val = False # For the input box
-                        if existing_validation:
-                            if existing_validation.validated:
-                                validation_status = "Approved"
-                            else:
-                                validation_status = f"Rejected"
-                                if existing_validation.description:
-                                    validation_status += f" ({existing_validation.description})"
-                                    rejection_reason_val = existing_validation.description
-                                    rejection_visible_val = True
-                        # For deleted annotations, show special status
-                        if is_deleted:
-                            annotated_sentence_display = "[DELETED ANNOTATION]"
-                            if validation_status == "Not Reviewed":
-                                validation_status = "Not Reviewed (Deleted)"
                         else:
-                            annotated_sentence_display = annotation.annotated_sentence
-                        items.append({
-                            "annotation_id": annotation.id,
-                            "tts_id": annotation.tts_data.id,
-                            "filename": annotation.tts_data.filename,
-                            "sentence": annotation.tts_data.sentence,
-                            "annotated_sentence": annotated_sentence_display,
-                            "is_deleted": is_deleted,
-                            # "annotator_name": annotation.annotator.name, # Anonymized
-                            "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
-                            "validation_status": validation_status
-                        })
-                    # Find the first item that is not reviewed (prioritize non-deleted annotations)
-                    initial_idx = 0
-                    if items:
-                        found_unreviewed = False
-                        # First, try to find unreviewed non-deleted annotations
                         for i, item_data in enumerate(items):
-                            if (item_data["validation_status"] == "Not Reviewed" and
-                                not item_data.get("is_deleted", False)):
                                 initial_idx = i
                                 found_unreviewed = True
                                 break
-                        # If no unreviewed non-deleted items, look for any unreviewed items
-                        if not found_unreviewed:
-                            for i, item_data in enumerate(items):
-                                if item_data["validation_status"].startswith("Not Reviewed"):
-                                    initial_idx = i
-                                    found_unreviewed = True
-                                    break
-                        # If no unreviewed items at all, use the last item
-                        if not found_unreviewed:
-                            initial_idx = len(items) - 1 if items else 0
-                    # Set initial display
-                    if items:
-                        initial_item = items[initial_idx]
-                        review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations."
-                        # Ensure correct order of return values for 12 outputs
-                        # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
-                        rejection_reason_val = ""
-                        rejection_visible_val = False
-                        if initial_item["validation_status"].startswith("Rejected"):
-                            start_paren = initial_item["validation_status"].find("(")
-                            end_paren = initial_item["validation_status"].find(")")
-                            if start_paren != -1 and end_paren != -1:
-                                rejection_reason_val = initial_item["validation_status"][start_paren+1:end_paren]
-                            rejection_visible_val = True
-                        return (
-                            items,
-                            initial_idx,
-                            review_info_text,
-                            str(initial_item["tts_id"]),
-                            initial_item["filename"],
-                            initial_item["sentence"],
-                            initial_item["annotated_sentence"],
-                            initial_item["annotated_at"],
-                            initial_item["validation_status"],
-                            "", # Placeholder for the original annotator name (maps to header.welcome)
-                            gr.update(value=None, autoplay=False), # audio_update
-                            gr.update(visible=rejection_visible_val, value=rejection_reason_val), # rejection_reason_input update
-                            False,  # Reset rejection mode
-                            gr.update(value="❌ Reject")  # Reset reject button
-                        )
-                    else:
-                        # Ensure correct order and number of return values for empty items (14 outputs)
-                        return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
-                except Exception as e:
-                    log.error(f"Error loading review items: {e}")
-                    gr.Error(f"Failed to load review data: {e}")
-                    # Ensure correct order and number of return values for error case (14 outputs)
-                    return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
         def show_current_review_item_fn(items, idx, session):
             if not items or idx >= len(items) or idx < 0:
@@ -360,43 +365,44 @@ class ReviewDashboardPage:
             log.info(f"Saving validation for annotation_id: {annotation_id}, validator_id: {user_id}, approved: {approved}, reason: {rejection_reason}")
             with get_db() as db:
-                try:
-                    existing_validation = db.query(Validation).filter_by(
                         annotation_id=annotation_id,
-                        validator_id=user_id
-                    ).first()
-                    if existing_validation:
-                        log.info(f"Updating existing validation for annotation_id: {annotation_id}")
-                        existing_validation.validated = approved
-                        existing_validation.description = rejection_reason if not approved else None
-                        existing_validation.validated_at = datetime.datetime.utcnow()
-                    else:
-                        log.info(f"Creating new validation for annotation_id: {annotation_id}")
-                        new_validation = Validation(
-                            annotation_id=annotation_id,
-                            validator_id=user_id,
-                            validated=approved,
-                            description=rejection_reason if not approved else None,
-                            validated_at=datetime.datetime.utcnow(),
-                        )
-                        db.add(new_validation)
-                    db.commit()
-                    log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
-                    items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
-                    # Show rejection reason input only if rejected, otherwise hide and clear
-                    rejection_input_update = gr.update(visible=not approved, value="" if approved else rejection_reason)
-                    return items, items[idx]["validation_status"], rejection_input_update
-                except Exception as e:
-                    db.rollback()
-                    log.error(f"Error saving validation: {e}")
-                    gr.Error(f"Failed to save validation: {e}")
-                    return items, current_item["validation_status"], gr.update(visible=False) # Return original status and hide input on error
         def handle_rejection_fn(items, idx, session, rejection_reason, rejection_mode_active):
             """Handle rejection button click - two-step process"""
@@ -442,14 +448,15 @@ class ReviewDashboardPage:
         def jump_by_data_id_fn(items, target_data_id, current_idx):
             if not target_data_id:
                 return current_idx
-            try:
-                target_id = int(target_data_id)
-                for i, item in enumerate(items):
-                    if item["tts_id"] == target_id:
-                        return i
-                gr.Warning(f"Data ID {target_id} not found in review items")
-            except ValueError:
-                gr.Warning(f"Invalid Data ID format: {target_data_id}")
             return current_idx
         # Output definitions

 import gradio as gr
 import datetime
+import sentry_sdk
 from sqlalchemy import orm
 from components.header import Header
                 return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=True)
             except TimeoutError as e:
                 log.error(f"Audio download timeout for {filename_to_load}: {e}")
+                sentry_sdk.capture_exception(e)
+                raise
             except ConnectionError as e:
                 log.error(f"Audio download connection error for {filename_to_load}: {e}")
+                sentry_sdk.capture_exception(e)
                 gr.Error(f"🌐 Connection error loading audio: {filename_to_load}. Please check your internet connection.")
                 return None, None, gr.update(value=None, autoplay=False)
             except FileNotFoundError as e:
                 log.error(f"Audio file not found for {filename_to_load}: {e}")
+                sentry_sdk.capture_exception(e)
                 gr.Error(f"📁 Audio file not found: {filename_to_load}")
                 return None, None, gr.update(value=None, autoplay=False)
             except Exception as e:
                 log.error(f"Audio download failed for {filename_to_load}: {e}")
+                sentry_sdk.capture_exception(e)
                 gr.Error(f"❌ Failed to load audio: {filename_to_load}. Error: {e}")
                 return None, None, gr.update(value=None, autoplay=False)
             # Load annotations from target annotator
             with get_db() as db:
+                # try:
+                # Get target annotator's ID
+                target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
+                if not target_annotator_obj:
+                    log.error(f"Target annotator {target_annotator} not found in database")
+                    return [], 0, f"Review Target Error: Annotator '{target_annotator}' not found.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+                log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
+                # Get all annotations by target annotator (including deleted ones)
+                annotations = db.query(Annotation).join(TTSData).filter(
+                    Annotation.annotator_id == target_annotator_obj.id
+                ).options(
+                    orm.joinedload(Annotation.tts_data),
+                    orm.joinedload(Annotation.annotator)
+                ).order_by(Annotation.id).all() # Added order_by for consistency
+                log.info(f"Fetched {len(annotations)} annotations for target annotator ID {target_annotator_obj.id}")
+                items = []
+                for annotation in annotations:
+                    # Check if annotation is deleted (no annotated_sentence or empty)
+                    is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
+                    # Check if this annotation has been reviewed by current user
+                    existing_validation = db.query(Validation).filter_by(
+                        annotation_id=annotation.id,
+                        validator_id=user_id
+                    ).first()
+                    validation_status = "Not Reviewed"
+                    rejection_reason_val = "" # For the input box
+                    rejection_visible_val = False # For the input box
+                    if existing_validation:
+                        if existing_validation.validated:
+                            validation_status = "Approved"
                         else:
+                            validation_status = f"Rejected"
+                            if existing_validation.description:
+                                validation_status += f" ({existing_validation.description})"
+                                rejection_reason_val = existing_validation.description
+                                rejection_visible_val = True
+                    # For deleted annotations, show special status
+                    if is_deleted:
+                        annotated_sentence_display = "[DELETED ANNOTATION]"
+                        if validation_status == "Not Reviewed":
+                            validation_status = "Not Reviewed (Deleted)"
+                    else:
+                        annotated_sentence_display = annotation.annotated_sentence
+                    items.append({
+                        "annotation_id": annotation.id,
+                        "tts_id": annotation.tts_data.id,
+                        "filename": annotation.tts_data.filename,
+                        "sentence": annotation.tts_data.sentence,
+                        "annotated_sentence": annotated_sentence_display,
+                        "is_deleted": is_deleted,
+                        # "annotator_name": annotation.annotator.name, # Anonymized
+                        "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
+                        "validation_status": validation_status
+                    })
+                # Find the first item that is not reviewed (prioritize non-deleted annotations)
+                initial_idx = 0
+                if items:
+                    found_unreviewed = False
+                    # First, try to find unreviewed non-deleted annotations
+                    for i, item_data in enumerate(items):
+                        if (item_data["validation_status"] == "Not Reviewed" and
+                            not item_data.get("is_deleted", False)):
+                            initial_idx = i
+                            found_unreviewed = True
+                            break
+                    # If no unreviewed non-deleted items, look for any unreviewed items
+                    if not found_unreviewed:
                         for i, item_data in enumerate(items):
+                            if item_data["validation_status"].startswith("Not Reviewed"):
                                 initial_idx = i
                                 found_unreviewed = True
                                 break
+                    # If no unreviewed items at all, use the last item
+                    if not found_unreviewed:
+                        initial_idx = len(items) - 1 if items else 0
+                # Set initial display
+                if items:
+                    initial_item = items[initial_idx]
+                    review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations."
+                    # Ensure correct order of return values for 12 outputs
+                    # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
+                    rejection_reason_val = ""
+                    rejection_visible_val = False
+                    if initial_item["validation_status"].startswith("Rejected"):
+                        start_paren = initial_item["validation_status"].find("(")
+                        end_paren = initial_item["validation_status"].find(")")
+                        if start_paren != -1 and end_paren != -1:
+                            rejection_reason_val = initial_item["validation_status"][start_paren+1:end_paren]
+                        rejection_visible_val = True
+                    return (
+                        items,
+                        initial_idx,
+                        review_info_text,
+                        str(initial_item["tts_id"]),
+                        initial_item["filename"],
+                        initial_item["sentence"],
+                        initial_item["annotated_sentence"],
+                        initial_item["annotated_at"],
+                        initial_item["validation_status"],
+                        "", # Placeholder for the original annotator name (maps to header.welcome)
+                        gr.update(value=None, autoplay=False), # audio_update
+                        gr.update(visible=rejection_visible_val, value=rejection_reason_val), # rejection_reason_input update
+                        False,  # Reset rejection mode
+                        gr.update(value="❌ Reject")  # Reset reject button
+                    )
+                else:
+                    # Ensure correct order and number of return values for empty items (14 outputs)
+                    return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+                # except Exception as e:
+                #     log.error(f"Error loading review items: {e}")
+                #     sentry_sdk.capture_exception(e)
+                #     gr.Error(f"Failed to load review data: {e}")
+                #     # Ensure correct order and number of return values for error case (14 outputs)
+                #     return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
         def show_current_review_item_fn(items, idx, session):
             if not items or idx >= len(items) or idx < 0:
             log.info(f"Saving validation for annotation_id: {annotation_id}, validator_id: {user_id}, approved: {approved}, reason: {rejection_reason}")
             with get_db() as db:
+                # try:
+                existing_validation = db.query(Validation).filter_by(
+                    annotation_id=annotation_id,
+                    validator_id=user_id
+                ).first()
+                if existing_validation:
+                    log.info(f"Updating existing validation for annotation_id: {annotation_id}")
+                    existing_validation.validated = approved
+                    existing_validation.description = rejection_reason if not approved else None
+                    existing_validation.validated_at = datetime.datetime.utcnow()
+                else:
+                    log.info(f"Creating new validation for annotation_id: {annotation_id}")
+                    new_validation = Validation(
                         annotation_id=annotation_id,
+                        validator_id=user_id,
+                        validated=approved,
+                        description=rejection_reason if not approved else None,
+                        validated_at=datetime.datetime.utcnow(),
+                    )
+                    db.add(new_validation)
+                db.commit()
+                log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
+                items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
+                # Show rejection reason input only if rejected, otherwise hide and clear
+                rejection_input_update = gr.update(visible=not approved, value="" if approved else rejection_reason)
+                return items, items[idx]["validation_status"], rejection_input_update
+                # except Exception as e:
+                #     db.rollback()
+                #     log.error(f"Error saving validation: {e}")
+                #     sentry_sdk.capture_exception(e)
+                #     gr.Error(f"Failed to save validation: {e}")
+                #     return items, current_item["validation_status"], gr.update(visible=False) # Return original status and hide input on error
         def handle_rejection_fn(items, idx, session, rejection_reason, rejection_mode_active):
             """Handle rejection button click - two-step process"""
         def jump_by_data_id_fn(items, target_data_id, current_idx):
             if not target_data_id:
                 return current_idx
+            # try:
+            target_id = int(target_data_id)
+            for i, item in enumerate(items):
+                if item["tts_id"] == target_id:
+                    return i
+            gr.Warning(f"Data ID {target_id} not found in review items")
+            # except ValueError:
+            #     sentry_sdk.capture_exception()
+            #     gr.Warning(f"Invalid Data ID format: {target_data_id}")
             return current_idx
         # Output definitions

config.py CHANGED Viewed

@@ -15,6 +15,12 @@ class Config(BaseSettings):
     FTP_URL: str = os.environ.get("FTP_URL")
     APP_TITLE: str = "Gooya TTS Annotation Tools"
     # Phase 2 Review Mapping: Defines who reviews whose work.
     # Key: Original annotator's username, Value: Reviewer's username

     FTP_URL: str = os.environ.get("FTP_URL")
     APP_TITLE: str = "Gooya TTS Annotation Tools"
+    # Sentry Configuration
+    SENTRY_DSN: str = os.environ.get("SENTRY_DSN", "")
+    SENTRY_ENVIRONMENT: str = os.environ.get("SENTRY_ENVIRONMENT", "development")
+    SENTRY_TRACES_SAMPLE_RATE: float = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.1"))
+    SENTRY_PROFILES_SAMPLE_RATE: float = float(os.environ.get("SENTRY_PROFILES_SAMPLE_RATE", "0.1"))
     # Phase 2 Review Mapping: Defines who reviews whose work.
     # Key: Original annotator's username, Value: Reviewer's username

data/repository/annotator_repo.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Optional
 from sqlalchemy.orm import Session
 from data.models import Annotator
 from utils.logger import Logger
@@ -20,26 +21,28 @@ class AnnotatorRepo:
     # READ METHODS
     # ------------------------------------------------------------------ #
     def get_annotator_by_name(self, name: str) -> Optional[Annotator]:
-        try:
-            return (
-                self.db.query(Annotator)
-                .filter(Annotator.name == name)
-                .first()
-            )
-        except Exception as exc:
-            log.error(f"Unable to fetch annotator <name={name}> : {exc}")
-            raise
     def get_annotator_by_id(self, user_id: int) -> Optional[Annotator]:
-        try:
-            return (
-                self.db.query(Annotator)
-                .filter(Annotator.id == user_id)
-                .first()
-            )
-        except Exception as exc:
-            log.error(f"Unable to fetch annotator <id={user_id}> : {exc}")
-            raise
     # ------------------------------------------------------------------ #
     # WRITE METHODS
@@ -56,25 +59,26 @@ class AnnotatorRepo:
         Raises:
             ValueError: if name already exists.
         """
-        try:
-            if self.get_annotator_by_name(name):
-                raise ValueError(f"name `{name}` already exists.")
-            # ------------------ HASH PASSWORD ------------------ #
-            hashed_pass = hash_password(password)
-            annotator = Annotator(
-                name=name,
-                password=hashed_pass,
-                is_active=is_active,
-            )
-            self.db.add(annotator)
-            self.db.flush()    # Ensure PK generated
-            self.db.refresh(annotator)
-            log.info(f"New annotator created <id={annotator.id} name={name}>")
-            return annotator
-        except Exception as exc:
-            self.db.rollback()
-            log.error(f"Unable to create annotator `{name}` : {exc}")
-            raise

 from typing import Optional
 from sqlalchemy.orm import Session
+import sentry_sdk
 from data.models import Annotator
 from utils.logger import Logger
     # READ METHODS
     # ------------------------------------------------------------------ #
     def get_annotator_by_name(self, name: str) -> Optional[Annotator]:
+        # try:
+        return (
+            self.db.query(Annotator)
+            .filter(Annotator.name == name)
+            .first()
+        )
+        # except Exception as exc:
+        #     log.error(f"Unable to fetch annotator <name={name}> : {exc}")
+        #     sentry_sdk.capture_exception(exc)
+        #     raise
     def get_annotator_by_id(self, user_id: int) -> Optional[Annotator]:
+        # try:
+        return (
+            self.db.query(Annotator)
+            .filter(Annotator.id == user_id)
+            .first()
+        )
+        # except Exception as exc:
+        #     log.error(f"Unable to fetch annotator <id={user_id}> : {exc}")
+        #     sentry_sdk.capture_exception(exc)
+        #     raise
     # ------------------------------------------------------------------ #
     # WRITE METHODS
         Raises:
             ValueError: if name already exists.
         """
+        # try:
+        if self.get_annotator_by_name(name):
+            raise ValueError(f"name `{name}` already exists.")
+        # ------------------ HASH PASSWORD ------------------ #
+        hashed_pass = hash_password(password)
+        annotator = Annotator(
+            name=name,
+            password=hashed_pass,
+            is_active=is_active,
+        )
+        self.db.add(annotator)
+        self.db.flush()    # Ensure PK generated
+        self.db.refresh(annotator)
+        log.info(f"New annotator created <id={annotator.id} name={name}>")
+        return annotator
+        # except Exception as exc:
+        #     self.db.rollback()
+        #     log.error(f"Unable to create annotator `{name}` : {exc}")
+        #     sentry_sdk.capture_exception(exc)
+        #     raise

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ bcrypt
 google-api-python-client
 pydub
 numpy
-requests

 google-api-python-client
 pydub
 numpy
+requests
+sentry-sdk

utils/database.py CHANGED Viewed

@@ -5,6 +5,7 @@ from sqlalchemy.orm import sessionmaker
 from contextlib import contextmanager
 import sys  # Add sys import
 import os  # Add os import
 # Add project root to Python path to ensure local modules are prioritized
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -20,18 +21,20 @@ log = Logger()
 def get_db_engine():
     """Create DB engine with error handling for HF Spaces"""
-    try:
-        engine = create_engine(
-            conf.db_url,
-            pool_pre_ping=True,
-            pool_size=5,
-            max_overflow=10,
-            connect_args={"connect_timeout": 10},
-        )
-        log.info("Database engine created successfully")
-        return engine
-    except Exception as e:
-        log.error(f"Failed to create database engine: {e}")
 engine = get_db_engine()
@@ -42,23 +45,23 @@ SessionLocal = sessionmaker(bind=engine)
 def get_db():
     """Session manager for HF Spaces"""
     db = SessionLocal()
-    try:
-        yield db
-        db.commit()
-    except Exception as e:
-        db.rollback()
-        log.error(f"Database error: {e}")
-        raise
-    finally:
-        db.close()
 def initialize_database():
     """Initialize tables with HF Spaces compatibility"""
-    try:
-        from data.models import Base
-        Base.metadata.create_all(bind=engine)
-        log.info("Tables created successfully")
-    except Exception as e:
-        log.error(f"Table creation failed: {e}")

 from contextlib import contextmanager
 import sys  # Add sys import
 import os  # Add os import
+import sentry_sdk
 # Add project root to Python path to ensure local modules are prioritized
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 def get_db_engine():
     """Create DB engine with error handling for HF Spaces"""
+    # try:
+    engine = create_engine(
+        conf.db_url,
+        pool_pre_ping=True,
+        pool_size=5,
+        max_overflow=10,
+        connect_args={"connect_timeout": 10},
+    )
+    log.info("Database engine created successfully")
+    return engine
+    # except Exception as e:
+    #     log.error(f"Failed to create database engine: {e}")
+    #     sentry_sdk.capture_exception(e)
+    #     raise
 engine = get_db_engine()
 def get_db():
     """Session manager for HF Spaces"""
     db = SessionLocal()
+    # try:
+    yield db
+    db.commit()
+    # except Exception as e:
+    #     db.rollback()
+    #     log.error(f"Database error: {e}")
+    #     raise
+    # finally:
+    db.close()
 def initialize_database():
     """Initialize tables with HF Spaces compatibility"""
+    # try:
+    from data.models import Base
+    Base.metadata.create_all(bind=engine)
+    log.info("Tables created successfully")
+    # except Exception as e:
+    #     log.error(f"Table creation failed: {e}")

utils/logger.py CHANGED Viewed

@@ -1,9 +1,17 @@
 from datetime import datetime
 class Logger:
     def _log(self, level, message):
         now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         print(f"[{now}] [{level.upper()}] {message}")
     def info(self, message):
         self._log('info', message)
@@ -12,4 +20,26 @@ class Logger:
         self._log('warning', message)
     def error(self, message):
-        self._log('error', message)

 from datetime import datetime
+import sentry_sdk
 class Logger:
     def _log(self, level, message):
         now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         print(f"[{now}] [{level.upper()}] {message}")
+        # Send to Sentry based on level
+        if level == 'error':
+            sentry_sdk.capture_message(message, level='error')
+        elif level == 'warning':
+            sentry_sdk.capture_message(message, level='warning')
+        # Info messages are not sent to Sentry to avoid noise
     def info(self, message):
         self._log('info', message)
         self._log('warning', message)
     def error(self, message):
+        self._log('error', message)
+    def exception(self, message, exc_info=None):
+        """Log an exception with full traceback"""
+        now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        print(f"[{now}] [ERROR] {message}")
+        # Capture exception in Sentry
+        if exc_info:
+            sentry_sdk.capture_exception(exc_info)
+        else:
+            sentry_sdk.capture_exception()
+    def capture_user_action(self, action, user_id=None, **kwargs):
+        """Capture user actions for analytics"""
+        # try:
+        from utils.sentry_integration import capture_annotation_event
+        capture_annotation_event(action, user_id=user_id, **kwargs)
+        # except ImportError:
+        #     # Fallback if sentry_integration is not available
+        #     self.info(f"User action: {action} (user_id: {user_id})")
+        # except Exception as e:
+        #     self.error(f"Failed to capture user action: {e}")

utils/security.py CHANGED Viewed

@@ -8,22 +8,22 @@ def hash_password(plain_password: str) -> str:
     """
     Hash a plaintext password using bcrypt.
     """
-    try:
-        hashed = bcrypt.hashpw(plain_password.encode("utf-8"), bcrypt.gensalt())
-        return hashed.decode("utf-8")
-    except Exception as exc:
-        log.error(f"Password hashing failed: {exc}")
-        raise
 def verify_password(plain_password: str, hashed_password: str) -> bool:
     """
     Verify a plaintext password against its bcrypt hash.
     """
-    try:
-        return bcrypt.checkpw(
-            plain_password.encode("utf-8"), hashed_password.encode("utf-8")
-        )
-    except Exception as exc:
-        log.error(f"Password verification failed: {exc}")
-        return False

     """
     Hash a plaintext password using bcrypt.
     """
+    # try:
+    hashed = bcrypt.hashpw(plain_password.encode("utf-8"), bcrypt.gensalt())
+    return hashed.decode("utf-8")
+    # except Exception as exc:
+    #     log.error(f"Password hashing failed: {exc}")
+    #     raise
 def verify_password(plain_password: str, hashed_password: str) -> bool:
     """
     Verify a plaintext password against its bcrypt hash.
     """
+    # try:
+    return bcrypt.checkpw(
+        plain_password.encode("utf-8"), hashed_password.encode("utf-8")
+    )
+    # except Exception as exc:
+    #     log.error(f"Password verification failed: {exc}")
+    #     return False

utils/sentry_integration.py ADDED Viewed

	@@ -0,0 +1,298 @@

+# utils/sentry_integration.py
+import os
+import sentry_sdk
+from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
+from sentry_sdk.integrations.logging import LoggingIntegration
+from sentry_sdk.integrations.threading import ThreadingIntegration
+from utils.logger import Logger
+log = Logger()
+def initialize_sentry():
+    """
+    Initialize Sentry for error tracking and performance monitoring
+    """
+    sentry_dsn = os.environ.get("SENTRY_DSN")
+    if not sentry_dsn:
+        log.info("Sentry DSN not configured, skipping Sentry initialization")
+        return False
+    # try:
+    # Environment configuration
+    environment = os.environ.get("SENTRY_ENVIRONMENT", "development")
+    traces_sample_rate = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.1"))
+    profiles_sample_rate = float(os.environ.get("SENTRY_PROFILES_SAMPLE_RATE", "0.1"))
+    # Logging integration - capture INFO and above
+    logging_integration = LoggingIntegration(
+        level=None,        # Don't capture logs below this level
+        event_level=None   # Send logs as events above this level
+    )
+    # SQLAlchemy integration for database monitoring
+    sqlalchemy_integration = SqlalchemyIntegration()
+    # Threading integration for multi-threaded apps
+    threading_integration = ThreadingIntegration(propagate_hub=True)
+    sentry_sdk.init(
+        dsn=sentry_dsn,
+        environment=environment,
+        traces_sample_rate=traces_sample_rate,
+        profiles_sample_rate=profiles_sample_rate,
+        integrations=[
+            logging_integration,
+            sqlalchemy_integration,
+            threading_integration,
+        ],
+        # Additional configuration
+        send_default_pii=True,  # Don't send personally identifiable information
+        attach_stacktrace=True,  # Attach stack traces to messages
+        before_send=before_send_filter,  # Custom filter function
+        release=get_app_version(),  # App version for release tracking
+    )
+    log.info(f"Sentry initialized successfully for environment: {environment}")
+    return True
+    # except Exception as e:
+    #     log.error(f"Failed to initialize Sentry: {e}")
+    #     return False
+def before_send_filter(event, hint):
+    """
+    Filter function to modify or drop events before sending to Sentry
+    """
+    # Don't send events for certain error types
+    if 'exc_info' in hint:
+        exc_type, exc_value, tb = hint['exc_info']
+        # Skip common/expected errors
+        if isinstance(exc_value, (KeyboardInterrupt, SystemExit)):
+            return None
+        # Skip database connection timeouts in development
+        if "connect_timeout" in str(exc_value) and os.environ.get("SENTRY_ENVIRONMENT") == "development":
+            return None
+    # Add custom tags
+    event.setdefault('tags', {})
+    event['tags']['component'] = 'tts_labeling'
+    # Add user context if available (without PII)
+    if 'user' not in event:
+        event['user'] = {
+            'id': 'anonymous',  # Don't use real user IDs
+        }
+    return event
+def get_app_version():
+    """
+    Get the application version for release tracking
+    """
+    # try:
+    # Try to get version from git
+    import subprocess
+    result = subprocess.run(
+        ['git', 'rev-parse', '--short', 'HEAD'],
+        capture_output=True,
+        text=True,
+        cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    )
+    if result.returncode == 0:
+        return result.stdout.strip()
+    # except:
+    #     pass
+    # Fallback to a default version
+    return "unknown"
+def capture_custom_event(message, level="info", extra=None, tags=None):
+    """
+    Capture custom events to Sentry with structured data
+    Args:
+        message (str): Event message
+        level (str): Event level (debug, info, warning, error, fatal)
+        extra (dict): Additional context data
+        tags (dict): Tags for filtering/grouping
+    """
+    # try:
+    with sentry_sdk.configure_scope() as scope:
+        if extra:
+            for key, value in extra.items():
+                scope.set_extra(key, value)
+        if tags:
+            for key, value in tags.items():
+                scope.set_tag(key, value)
+        sentry_sdk.capture_message(message, level=level)
+    # except Exception as e:
+    #     log.error(f"Failed to capture custom Sentry event: {e}")
+def capture_annotation_event(action, user_id=None, annotation_id=None, tts_id=None, success=True):
+    """
+    Capture annotation-specific events for analytics
+    Args:
+        action (str): Action performed (create, update, delete, review_approve, review_reject)
+        user_id (int): User ID (anonymized)
+        annotation_id (int): Annotation ID
+        tts_id (int): TTS data ID
+        success (bool): Whether the action was successful
+    """
+    # try:
+    tags = {
+        'action_type': action,
+        'success': str(success),
+        'component': 'annotation'
+    }
+    extra = {}
+    if annotation_id:
+        extra['annotation_id'] = annotation_id
+    if tts_id:
+        extra['tts_id'] = tts_id
+    # Anonymize user ID for privacy
+    if user_id:
+        extra['user_hash'] = str(hash(str(user_id)))  # Simple hash for privacy
+    message = f"Annotation {action}: {'success' if success else 'failed'}"
+    capture_custom_event(
+        message=message,
+        level="info" if success else "warning",
+        extra=extra,
+        tags=tags
+    )
+    # except Exception as e:
+    #     log.error(f"Failed to capture annotation event: {e}")
+def capture_database_performance(operation, duration, table=None, success=True):
+    """
+    Capture database performance metrics
+    Args:
+        operation (str): Database operation (select, insert, update, delete)
+        duration (float): Operation duration in seconds
+        table (str): Table name
+        success (bool): Whether operation was successful
+    """
+    # try:
+    tags = {
+        'db_operation': operation,
+        'success': str(success),
+        'component': 'database'
+    }
+    if table:
+        tags['table'] = table
+    extra = {
+        'duration_seconds': duration,
+        'performance_category': 'slow' if duration > 2.0 else 'normal'
+    }
+    level = "warning" if duration > 5.0 else "info"
+    message = f"Database {operation} took {duration:.2f}s"
+    capture_custom_event(
+        message=message,
+        level=level,
+        extra=extra,
+        tags=tags
+    )
+    # except Exception as e:
+    #     log.error(f"Failed to capture database performance event: {e}")
+def capture_user_activity(activity, user_id=None, session_duration=None, items_processed=None):
+    """
+    Capture user activity metrics
+    Args:
+        activity (str): Activity type (login, logout, annotation_session)
+        user_id (int): User ID (will be anonymized)
+        session_duration (float): Session duration in seconds
+        items_processed (int): Number of items processed
+    """
+    # try:
+    tags = {
+        'activity_type': activity,
+        'component': 'user_activity'
+    }
+    extra = {}
+    if session_duration:
+        extra['session_duration_seconds'] = session_duration
+    if items_processed:
+        extra['items_processed'] = items_processed
+    # Anonymize user ID
+    if user_id:
+        extra['user_hash'] = str(hash(str(user_id)))
+    message = f"User {activity}"
+    capture_custom_event(
+        message=message,
+        level="info",
+        extra=extra,
+        tags=tags
+    )
+    # except Exception as e:
+    #     log.error(f"Failed to capture user activity event: {e}")
+# Context manager for capturing performance
+class SentryPerformanceMonitor:
+    """
+    Context manager for monitoring operation performance
+    """
+    def __init__(self, operation_name, tags=None):
+        self.operation_name = operation_name
+        self.tags = tags or {}
+        self.start_time = None
+    def __enter__(self):
+        import time
+        self.start_time = time.time()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        import time
+        duration = time.time() - self.start_time
+        success = exc_type is None
+        # Update tags
+        self.tags.update({
+            'operation': self.operation_name,
+            'success': str(success)
+        })
+        extra = {
+            'duration_seconds': duration,
+            'operation_name': self.operation_name
+        }
+        level = "error" if not success else ("warning" if duration > 5.0 else "info")
+        message = f"Operation '{self.operation_name}' completed in {duration:.2f}s"
+        capture_custom_event(
+            message=message,
+            level=level,
+            extra=extra,
+            tags=self.tags
+        )
+        # Don't suppress exceptions
+        return False