Spaces:
Running
Running
sentry intergration
Browse files- app.py +16 -5
- components/dashboard_page.py +14 -8
- components/review_dashboard_page.py +172 -165
- config.py +6 -0
- data/repository/annotator_repo.py +41 -37
- requirements.txt +2 -1
- utils/database.py +30 -27
- utils/logger.py +31 -1
- utils/security.py +13 -13
- utils/sentry_integration.py +298 -0
app.py
CHANGED
@@ -1,6 +1,17 @@
|
|
1 |
import gradio as gr
|
2 |
from pathlib import Path
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from utils.logger import Logger
|
5 |
from components.login_page import LoginPage
|
6 |
from components.dashboard_page import DashboardPage
|
@@ -44,8 +55,8 @@ def build_app() -> gr.Blocks:
|
|
44 |
|
45 |
|
46 |
if __name__ == "__main__":
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
1 |
import gradio as gr
|
2 |
from pathlib import Path
|
3 |
|
4 |
+
# Initialize Sentry first (before other imports)
|
5 |
+
try:
|
6 |
+
from utils.sentry_integration import initialize_sentry
|
7 |
+
sentry_initialized = initialize_sentry()
|
8 |
+
if sentry_initialized:
|
9 |
+
print("✅ Sentry monitoring enabled")
|
10 |
+
else:
|
11 |
+
print("⚠️ Sentry monitoring disabled (no DSN configured)")
|
12 |
+
except Exception as e:
|
13 |
+
raise
|
14 |
+
|
15 |
from utils.logger import Logger
|
16 |
from components.login_page import LoginPage
|
17 |
from components.dashboard_page import DashboardPage
|
|
|
55 |
|
56 |
|
57 |
if __name__ == "__main__":
|
58 |
+
try:
|
59 |
+
log.info("Launching App ...")
|
60 |
+
build_app().launch()
|
61 |
+
except Exception as err:
|
62 |
+
raise
|
components/dashboard_page.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import datetime
|
|
|
4 |
from sqlalchemy import orm, func # Added func for count
|
5 |
|
6 |
from components.header import Header
|
@@ -185,7 +186,8 @@ class DashboardPage:
|
|
185 |
return f"Annotation Progress: {completed_count}/{total_assigned} labeled"
|
186 |
except Exception as e:
|
187 |
log.error(f"Error fetching progress for user {user_id}: {e}")
|
188 |
-
|
|
|
189 |
|
190 |
def download_voice_fn(filename_to_load, autoplay_on_load=True): # Autoplay here is for the btn_load_voice click
|
191 |
if not filename_to_load:
|
@@ -196,8 +198,8 @@ class DashboardPage:
|
|
196 |
return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=autoplay_on_load)
|
197 |
except Exception as e:
|
198 |
log.error(f"GDrive download failed for {filename_to_load}: {e}")
|
199 |
-
|
200 |
-
|
201 |
|
202 |
def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, applied_trims_list):
|
203 |
annotator_id = session.get("user_id")
|
@@ -260,8 +262,8 @@ class DashboardPage:
|
|
260 |
except Exception as e:
|
261 |
db.rollback()
|
262 |
log.error(f"Failed to save annotation for {current_tts_id}: {e}") # Removed exc_info=True
|
263 |
-
|
264 |
-
|
265 |
|
266 |
def show_current_item_fn(items, idx, session):
|
267 |
initial_trims_list_sec = []
|
@@ -298,7 +300,8 @@ class DashboardPage:
|
|
298 |
initial_trims_df_data = self._convert_trims_to_df_data(initial_trims_list_sec)
|
299 |
except Exception as e:
|
300 |
log.error(f"DB error in show_current_item_fn for TTS ID {tts_data_id}: {e}") # Removed exc_info=True
|
301 |
-
|
|
|
302 |
|
303 |
return (
|
304 |
current_item.get("id", ""), current_item.get("filename", ""),
|
@@ -363,7 +366,8 @@ class DashboardPage:
|
|
363 |
|
364 |
except Exception as e:
|
365 |
log.error(f"Failed to load items or determine resume index for user {user_name}: {e}")
|
366 |
-
|
|
|
367 |
|
368 |
initial_ui_values_tuple = show_current_item_fn(items_to_load, initial_idx, sess)
|
369 |
progress_str = get_user_progress_fn(sess)
|
@@ -377,6 +381,7 @@ class DashboardPage:
|
|
377 |
if item_dict.get("id") == target_id: return i
|
378 |
gr.Warning(f"Data ID {target_id} not found in your assigned items.")
|
379 |
except ValueError:
|
|
|
380 |
gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
|
381 |
return current_idx
|
382 |
|
@@ -441,7 +446,8 @@ class DashboardPage:
|
|
441 |
except Exception as e:
|
442 |
db.rollback()
|
443 |
log.error(f"Error deleting annotation from DB for {tts_data_id_to_clear}: {e}") # Removed exc_info=True
|
444 |
-
|
|
|
445 |
else:
|
446 |
gr.Error("Cannot clear/delete annotation from DB: Missing TTS ID or User ID.")
|
447 |
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import datetime
|
4 |
+
import sentry_sdk
|
5 |
from sqlalchemy import orm, func # Added func for count
|
6 |
|
7 |
from components.header import Header
|
|
|
186 |
return f"Annotation Progress: {completed_count}/{total_assigned} labeled"
|
187 |
except Exception as e:
|
188 |
log.error(f"Error fetching progress for user {user_id}: {e}")
|
189 |
+
sentry_sdk.capture_exception(e)
|
190 |
+
raise
|
191 |
|
192 |
def download_voice_fn(filename_to_load, autoplay_on_load=True): # Autoplay here is for the btn_load_voice click
|
193 |
if not filename_to_load:
|
|
|
198 |
return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=autoplay_on_load)
|
199 |
except Exception as e:
|
200 |
log.error(f"GDrive download failed for {filename_to_load}: {e}")
|
201 |
+
sentry_sdk.capture_exception(e)
|
202 |
+
raise
|
203 |
|
204 |
def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, applied_trims_list):
|
205 |
annotator_id = session.get("user_id")
|
|
|
262 |
except Exception as e:
|
263 |
db.rollback()
|
264 |
log.error(f"Failed to save annotation for {current_tts_id}: {e}") # Removed exc_info=True
|
265 |
+
sentry_sdk.capture_exception(e)
|
266 |
+
raise
|
267 |
|
268 |
def show_current_item_fn(items, idx, session):
|
269 |
initial_trims_list_sec = []
|
|
|
300 |
initial_trims_df_data = self._convert_trims_to_df_data(initial_trims_list_sec)
|
301 |
except Exception as e:
|
302 |
log.error(f"DB error in show_current_item_fn for TTS ID {tts_data_id}: {e}") # Removed exc_info=True
|
303 |
+
sentry_sdk.capture_exception(e)
|
304 |
+
raise
|
305 |
|
306 |
return (
|
307 |
current_item.get("id", ""), current_item.get("filename", ""),
|
|
|
366 |
|
367 |
except Exception as e:
|
368 |
log.error(f"Failed to load items or determine resume index for user {user_name}: {e}")
|
369 |
+
sentry_sdk.capture_exception(e)
|
370 |
+
raise
|
371 |
|
372 |
initial_ui_values_tuple = show_current_item_fn(items_to_load, initial_idx, sess)
|
373 |
progress_str = get_user_progress_fn(sess)
|
|
|
381 |
if item_dict.get("id") == target_id: return i
|
382 |
gr.Warning(f"Data ID {target_id} not found in your assigned items.")
|
383 |
except ValueError:
|
384 |
+
sentry_sdk.capture_exception()
|
385 |
gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
|
386 |
return current_idx
|
387 |
|
|
|
446 |
except Exception as e:
|
447 |
db.rollback()
|
448 |
log.error(f"Error deleting annotation from DB for {tts_data_id_to_clear}: {e}") # Removed exc_info=True
|
449 |
+
sentry_sdk.capture_exception(e)
|
450 |
+
raise
|
451 |
else:
|
452 |
gr.Error("Cannot clear/delete annotation from DB: Missing TTS ID or User ID.")
|
453 |
|
components/review_dashboard_page.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import datetime
|
|
|
5 |
from sqlalchemy import orm
|
6 |
|
7 |
from components.header import Header
|
@@ -132,18 +133,21 @@ class ReviewDashboardPage:
|
|
132 |
return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=True)
|
133 |
except TimeoutError as e:
|
134 |
log.error(f"Audio download timeout for {filename_to_load}: {e}")
|
135 |
-
|
136 |
-
|
137 |
except ConnectionError as e:
|
138 |
log.error(f"Audio download connection error for {filename_to_load}: {e}")
|
|
|
139 |
gr.Error(f"🌐 Connection error loading audio: {filename_to_load}. Please check your internet connection.")
|
140 |
return None, None, gr.update(value=None, autoplay=False)
|
141 |
except FileNotFoundError as e:
|
142 |
log.error(f"Audio file not found for {filename_to_load}: {e}")
|
|
|
143 |
gr.Error(f"📁 Audio file not found: {filename_to_load}")
|
144 |
return None, None, gr.update(value=None, autoplay=False)
|
145 |
except Exception as e:
|
146 |
log.error(f"Audio download failed for {filename_to_load}: {e}")
|
|
|
147 |
gr.Error(f"❌ Failed to load audio: {filename_to_load}. Error: {e}")
|
148 |
return None, None, gr.update(value=None, autoplay=False)
|
149 |
|
@@ -174,134 +178,135 @@ class ReviewDashboardPage:
|
|
174 |
|
175 |
# Load annotations from target annotator
|
176 |
with get_db() as db:
|
177 |
-
try:
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
for annotation in annotations:
|
198 |
-
# Check if annotation is deleted (no annotated_sentence or empty)
|
199 |
-
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
|
200 |
-
|
201 |
-
# Check if this annotation has been reviewed by current user
|
202 |
-
existing_validation = db.query(Validation).filter_by(
|
203 |
-
annotation_id=annotation.id,
|
204 |
-
validator_id=user_id
|
205 |
-
).first()
|
206 |
-
|
207 |
-
validation_status = "Not Reviewed"
|
208 |
-
rejection_reason_val = "" # For the input box
|
209 |
-
rejection_visible_val = False # For the input box
|
210 |
-
|
211 |
-
if existing_validation:
|
212 |
-
if existing_validation.validated:
|
213 |
-
validation_status = "Approved"
|
214 |
-
else:
|
215 |
-
validation_status = f"Rejected"
|
216 |
-
if existing_validation.description:
|
217 |
-
validation_status += f" ({existing_validation.description})"
|
218 |
-
rejection_reason_val = existing_validation.description
|
219 |
-
rejection_visible_val = True
|
220 |
-
|
221 |
-
# For deleted annotations, show special status
|
222 |
-
if is_deleted:
|
223 |
-
annotated_sentence_display = "[DELETED ANNOTATION]"
|
224 |
-
if validation_status == "Not Reviewed":
|
225 |
-
validation_status = "Not Reviewed (Deleted)"
|
226 |
else:
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
"
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
for i, item_data in enumerate(items):
|
247 |
-
if
|
248 |
-
not item_data.get("is_deleted", False)):
|
249 |
initial_idx = i
|
250 |
found_unreviewed = True
|
251 |
break
|
252 |
-
|
253 |
-
# If no unreviewed non-deleted items, look for any unreviewed items
|
254 |
-
if not found_unreviewed:
|
255 |
-
for i, item_data in enumerate(items):
|
256 |
-
if item_data["validation_status"].startswith("Not Reviewed"):
|
257 |
-
initial_idx = i
|
258 |
-
found_unreviewed = True
|
259 |
-
break
|
260 |
-
|
261 |
-
# If no unreviewed items at all, use the last item
|
262 |
-
if not found_unreviewed:
|
263 |
-
initial_idx = len(items) - 1 if items else 0
|
264 |
|
265 |
-
#
|
266 |
-
if
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
)
|
296 |
-
|
297 |
-
#
|
298 |
-
|
|
|
|
|
|
|
|
|
299 |
|
300 |
-
except Exception as e:
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
|
|
305 |
|
306 |
def show_current_review_item_fn(items, idx, session):
|
307 |
if not items or idx >= len(items) or idx < 0:
|
@@ -360,43 +365,44 @@ class ReviewDashboardPage:
|
|
360 |
log.info(f"Saving validation for annotation_id: {annotation_id}, validator_id: {user_id}, approved: {approved}, reason: {rejection_reason}")
|
361 |
|
362 |
with get_db() as db:
|
363 |
-
try:
|
364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
annotation_id=annotation_id,
|
366 |
-
validator_id=user_id
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
)
|
383 |
-
db.add(new_validation)
|
384 |
-
|
385 |
-
db.commit()
|
386 |
-
log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
|
387 |
-
|
388 |
-
items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
|
389 |
-
|
390 |
-
# Show rejection reason input only if rejected, otherwise hide and clear
|
391 |
-
rejection_input_update = gr.update(visible=not approved, value="" if approved else rejection_reason)
|
392 |
-
|
393 |
-
return items, items[idx]["validation_status"], rejection_input_update
|
394 |
|
395 |
-
except Exception as e:
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
|
|
400 |
|
401 |
def handle_rejection_fn(items, idx, session, rejection_reason, rejection_mode_active):
|
402 |
"""Handle rejection button click - two-step process"""
|
@@ -442,14 +448,15 @@ class ReviewDashboardPage:
|
|
442 |
def jump_by_data_id_fn(items, target_data_id, current_idx):
|
443 |
if not target_data_id:
|
444 |
return current_idx
|
445 |
-
try:
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
except ValueError:
|
452 |
-
|
|
|
453 |
return current_idx
|
454 |
|
455 |
# Output definitions
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import datetime
|
5 |
+
import sentry_sdk
|
6 |
from sqlalchemy import orm
|
7 |
|
8 |
from components.header import Header
|
|
|
133 |
return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=True)
|
134 |
except TimeoutError as e:
|
135 |
log.error(f"Audio download timeout for {filename_to_load}: {e}")
|
136 |
+
sentry_sdk.capture_exception(e)
|
137 |
+
raise
|
138 |
except ConnectionError as e:
|
139 |
log.error(f"Audio download connection error for {filename_to_load}: {e}")
|
140 |
+
sentry_sdk.capture_exception(e)
|
141 |
gr.Error(f"🌐 Connection error loading audio: {filename_to_load}. Please check your internet connection.")
|
142 |
return None, None, gr.update(value=None, autoplay=False)
|
143 |
except FileNotFoundError as e:
|
144 |
log.error(f"Audio file not found for {filename_to_load}: {e}")
|
145 |
+
sentry_sdk.capture_exception(e)
|
146 |
gr.Error(f"📁 Audio file not found: {filename_to_load}")
|
147 |
return None, None, gr.update(value=None, autoplay=False)
|
148 |
except Exception as e:
|
149 |
log.error(f"Audio download failed for {filename_to_load}: {e}")
|
150 |
+
sentry_sdk.capture_exception(e)
|
151 |
gr.Error(f"❌ Failed to load audio: {filename_to_load}. Error: {e}")
|
152 |
return None, None, gr.update(value=None, autoplay=False)
|
153 |
|
|
|
178 |
|
179 |
# Load annotations from target annotator
|
180 |
with get_db() as db:
|
181 |
+
# try:
|
182 |
+
# Get target annotator's ID
|
183 |
+
target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
|
184 |
+
if not target_annotator_obj:
|
185 |
+
log.error(f"Target annotator {target_annotator} not found in database")
|
186 |
+
return [], 0, f"Review Target Error: Annotator '{target_annotator}' not found.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
|
187 |
+
|
188 |
+
log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
|
189 |
+
|
190 |
+
# Get all annotations by target annotator (including deleted ones)
|
191 |
+
annotations = db.query(Annotation).join(TTSData).filter(
|
192 |
+
Annotation.annotator_id == target_annotator_obj.id
|
193 |
+
).options(
|
194 |
+
orm.joinedload(Annotation.tts_data),
|
195 |
+
orm.joinedload(Annotation.annotator)
|
196 |
+
).order_by(Annotation.id).all() # Added order_by for consistency
|
197 |
+
|
198 |
+
log.info(f"Fetched {len(annotations)} annotations for target annotator ID {target_annotator_obj.id}")
|
199 |
+
|
200 |
+
items = []
|
201 |
+
for annotation in annotations:
|
202 |
+
# Check if annotation is deleted (no annotated_sentence or empty)
|
203 |
+
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
|
204 |
|
205 |
+
# Check if this annotation has been reviewed by current user
|
206 |
+
existing_validation = db.query(Validation).filter_by(
|
207 |
+
annotation_id=annotation.id,
|
208 |
+
validator_id=user_id
|
209 |
+
).first()
|
210 |
+
|
211 |
+
validation_status = "Not Reviewed"
|
212 |
+
rejection_reason_val = "" # For the input box
|
213 |
+
rejection_visible_val = False # For the input box
|
214 |
+
|
215 |
+
if existing_validation:
|
216 |
+
if existing_validation.validated:
|
217 |
+
validation_status = "Approved"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
else:
|
219 |
+
validation_status = f"Rejected"
|
220 |
+
if existing_validation.description:
|
221 |
+
validation_status += f" ({existing_validation.description})"
|
222 |
+
rejection_reason_val = existing_validation.description
|
223 |
+
rejection_visible_val = True
|
224 |
+
|
225 |
+
# For deleted annotations, show special status
|
226 |
+
if is_deleted:
|
227 |
+
annotated_sentence_display = "[DELETED ANNOTATION]"
|
228 |
+
if validation_status == "Not Reviewed":
|
229 |
+
validation_status = "Not Reviewed (Deleted)"
|
230 |
+
else:
|
231 |
+
annotated_sentence_display = annotation.annotated_sentence
|
232 |
+
|
233 |
+
items.append({
|
234 |
+
"annotation_id": annotation.id,
|
235 |
+
"tts_id": annotation.tts_data.id,
|
236 |
+
"filename": annotation.tts_data.filename,
|
237 |
+
"sentence": annotation.tts_data.sentence,
|
238 |
+
"annotated_sentence": annotated_sentence_display,
|
239 |
+
"is_deleted": is_deleted,
|
240 |
+
# "annotator_name": annotation.annotator.name, # Anonymized
|
241 |
+
"annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
|
242 |
+
"validation_status": validation_status
|
243 |
+
})
|
244 |
+
|
245 |
+
# Find the first item that is not reviewed (prioritize non-deleted annotations)
|
246 |
+
initial_idx = 0
|
247 |
+
if items:
|
248 |
+
found_unreviewed = False
|
249 |
+
# First, try to find unreviewed non-deleted annotations
|
250 |
+
for i, item_data in enumerate(items):
|
251 |
+
if (item_data["validation_status"] == "Not Reviewed" and
|
252 |
+
not item_data.get("is_deleted", False)):
|
253 |
+
initial_idx = i
|
254 |
+
found_unreviewed = True
|
255 |
+
break
|
256 |
+
|
257 |
+
# If no unreviewed non-deleted items, look for any unreviewed items
|
258 |
+
if not found_unreviewed:
|
259 |
for i, item_data in enumerate(items):
|
260 |
+
if item_data["validation_status"].startswith("Not Reviewed"):
|
|
|
261 |
initial_idx = i
|
262 |
found_unreviewed = True
|
263 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
|
265 |
+
# If no unreviewed items at all, use the last item
|
266 |
+
if not found_unreviewed:
|
267 |
+
initial_idx = len(items) - 1 if items else 0
|
268 |
+
|
269 |
+
# Set initial display
|
270 |
+
if items:
|
271 |
+
initial_item = items[initial_idx]
|
272 |
+
review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations."
|
273 |
+
# Ensure correct order of return values for 12 outputs
|
274 |
+
# items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
|
275 |
+
rejection_reason_val = ""
|
276 |
+
rejection_visible_val = False
|
277 |
+
if initial_item["validation_status"].startswith("Rejected"):
|
278 |
+
start_paren = initial_item["validation_status"].find("(")
|
279 |
+
end_paren = initial_item["validation_status"].find(")")
|
280 |
+
if start_paren != -1 and end_paren != -1:
|
281 |
+
rejection_reason_val = initial_item["validation_status"][start_paren+1:end_paren]
|
282 |
+
rejection_visible_val = True
|
283 |
+
|
284 |
+
return (
|
285 |
+
items,
|
286 |
+
initial_idx,
|
287 |
+
review_info_text,
|
288 |
+
str(initial_item["tts_id"]),
|
289 |
+
initial_item["filename"],
|
290 |
+
initial_item["sentence"],
|
291 |
+
initial_item["annotated_sentence"],
|
292 |
+
initial_item["annotated_at"],
|
293 |
+
initial_item["validation_status"],
|
294 |
+
"", # Placeholder for the original annotator name (maps to header.welcome)
|
295 |
+
gr.update(value=None, autoplay=False), # audio_update
|
296 |
+
gr.update(visible=rejection_visible_val, value=rejection_reason_val), # rejection_reason_input update
|
297 |
+
False, # Reset rejection mode
|
298 |
+
gr.update(value="❌ Reject") # Reset reject button
|
299 |
+
)
|
300 |
+
else:
|
301 |
+
# Ensure correct order and number of return values for empty items (14 outputs)
|
302 |
+
return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
|
303 |
|
304 |
+
# except Exception as e:
|
305 |
+
# log.error(f"Error loading review items: {e}")
|
306 |
+
# sentry_sdk.capture_exception(e)
|
307 |
+
# gr.Error(f"Failed to load review data: {e}")
|
308 |
+
# # Ensure correct order and number of return values for error case (14 outputs)
|
309 |
+
# return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
|
310 |
|
311 |
def show_current_review_item_fn(items, idx, session):
|
312 |
if not items or idx >= len(items) or idx < 0:
|
|
|
365 |
log.info(f"Saving validation for annotation_id: {annotation_id}, validator_id: {user_id}, approved: {approved}, reason: {rejection_reason}")
|
366 |
|
367 |
with get_db() as db:
|
368 |
+
# try:
|
369 |
+
existing_validation = db.query(Validation).filter_by(
|
370 |
+
annotation_id=annotation_id,
|
371 |
+
validator_id=user_id
|
372 |
+
).first()
|
373 |
+
|
374 |
+
if existing_validation:
|
375 |
+
log.info(f"Updating existing validation for annotation_id: {annotation_id}")
|
376 |
+
existing_validation.validated = approved
|
377 |
+
existing_validation.description = rejection_reason if not approved else None
|
378 |
+
existing_validation.validated_at = datetime.datetime.utcnow()
|
379 |
+
else:
|
380 |
+
log.info(f"Creating new validation for annotation_id: {annotation_id}")
|
381 |
+
new_validation = Validation(
|
382 |
annotation_id=annotation_id,
|
383 |
+
validator_id=user_id,
|
384 |
+
validated=approved,
|
385 |
+
description=rejection_reason if not approved else None,
|
386 |
+
validated_at=datetime.datetime.utcnow(),
|
387 |
+
)
|
388 |
+
db.add(new_validation)
|
389 |
+
|
390 |
+
db.commit()
|
391 |
+
log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
|
392 |
+
|
393 |
+
items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
|
394 |
+
|
395 |
+
# Show rejection reason input only if rejected, otherwise hide and clear
|
396 |
+
rejection_input_update = gr.update(visible=not approved, value="" if approved else rejection_reason)
|
397 |
+
|
398 |
+
return items, items[idx]["validation_status"], rejection_input_update
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
+
# except Exception as e:
|
401 |
+
# db.rollback()
|
402 |
+
# log.error(f"Error saving validation: {e}")
|
403 |
+
# sentry_sdk.capture_exception(e)
|
404 |
+
# gr.Error(f"Failed to save validation: {e}")
|
405 |
+
# return items, current_item["validation_status"], gr.update(visible=False) # Return original status and hide input on error
|
406 |
|
407 |
def handle_rejection_fn(items, idx, session, rejection_reason, rejection_mode_active):
|
408 |
"""Handle rejection button click - two-step process"""
|
|
|
448 |
def jump_by_data_id_fn(items, target_data_id, current_idx):
|
449 |
if not target_data_id:
|
450 |
return current_idx
|
451 |
+
# try:
|
452 |
+
target_id = int(target_data_id)
|
453 |
+
for i, item in enumerate(items):
|
454 |
+
if item["tts_id"] == target_id:
|
455 |
+
return i
|
456 |
+
gr.Warning(f"Data ID {target_id} not found in review items")
|
457 |
+
# except ValueError:
|
458 |
+
# sentry_sdk.capture_exception()
|
459 |
+
# gr.Warning(f"Invalid Data ID format: {target_data_id}")
|
460 |
return current_idx
|
461 |
|
462 |
# Output definitions
|
config.py
CHANGED
@@ -15,6 +15,12 @@ class Config(BaseSettings):
|
|
15 |
FTP_URL: str = os.environ.get("FTP_URL")
|
16 |
|
17 |
APP_TITLE: str = "Gooya TTS Annotation Tools"
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Phase 2 Review Mapping: Defines who reviews whose work.
|
20 |
# Key: Original annotator's username, Value: Reviewer's username
|
|
|
15 |
FTP_URL: str = os.environ.get("FTP_URL")
|
16 |
|
17 |
APP_TITLE: str = "Gooya TTS Annotation Tools"
|
18 |
+
|
19 |
+
# Sentry Configuration
|
20 |
+
SENTRY_DSN: str = os.environ.get("SENTRY_DSN", "")
|
21 |
+
SENTRY_ENVIRONMENT: str = os.environ.get("SENTRY_ENVIRONMENT", "development")
|
22 |
+
SENTRY_TRACES_SAMPLE_RATE: float = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.1"))
|
23 |
+
SENTRY_PROFILES_SAMPLE_RATE: float = float(os.environ.get("SENTRY_PROFILES_SAMPLE_RATE", "0.1"))
|
24 |
|
25 |
# Phase 2 Review Mapping: Defines who reviews whose work.
|
26 |
# Key: Original annotator's username, Value: Reviewer's username
|
data/repository/annotator_repo.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from typing import Optional
|
2 |
from sqlalchemy.orm import Session
|
|
|
3 |
|
4 |
from data.models import Annotator
|
5 |
from utils.logger import Logger
|
@@ -20,26 +21,28 @@ class AnnotatorRepo:
|
|
20 |
# READ METHODS
|
21 |
# ------------------------------------------------------------------ #
|
22 |
def get_annotator_by_name(self, name: str) -> Optional[Annotator]:
|
23 |
-
try:
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
except Exception as exc:
|
30 |
-
|
31 |
-
|
|
|
32 |
|
33 |
def get_annotator_by_id(self, user_id: int) -> Optional[Annotator]:
|
34 |
-
try:
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
except Exception as exc:
|
41 |
-
|
42 |
-
|
|
|
43 |
|
44 |
# ------------------------------------------------------------------ #
|
45 |
# WRITE METHODS
|
@@ -56,25 +59,26 @@ class AnnotatorRepo:
|
|
56 |
Raises:
|
57 |
ValueError: if name already exists.
|
58 |
"""
|
59 |
-
try:
|
60 |
-
|
61 |
-
|
62 |
|
63 |
-
|
64 |
-
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
except Exception as exc:
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
1 |
from typing import Optional
|
2 |
from sqlalchemy.orm import Session
|
3 |
+
import sentry_sdk
|
4 |
|
5 |
from data.models import Annotator
|
6 |
from utils.logger import Logger
|
|
|
21 |
# READ METHODS
|
22 |
# ------------------------------------------------------------------ #
|
23 |
def get_annotator_by_name(self, name: str) -> Optional[Annotator]:
|
24 |
+
# try:
|
25 |
+
return (
|
26 |
+
self.db.query(Annotator)
|
27 |
+
.filter(Annotator.name == name)
|
28 |
+
.first()
|
29 |
+
)
|
30 |
+
# except Exception as exc:
|
31 |
+
# log.error(f"Unable to fetch annotator <name={name}> : {exc}")
|
32 |
+
# sentry_sdk.capture_exception(exc)
|
33 |
+
# raise
|
34 |
|
35 |
def get_annotator_by_id(self, user_id: int) -> Optional[Annotator]:
|
36 |
+
# try:
|
37 |
+
return (
|
38 |
+
self.db.query(Annotator)
|
39 |
+
.filter(Annotator.id == user_id)
|
40 |
+
.first()
|
41 |
+
)
|
42 |
+
# except Exception as exc:
|
43 |
+
# log.error(f"Unable to fetch annotator <id={user_id}> : {exc}")
|
44 |
+
# sentry_sdk.capture_exception(exc)
|
45 |
+
# raise
|
46 |
|
47 |
# ------------------------------------------------------------------ #
|
48 |
# WRITE METHODS
|
|
|
59 |
Raises:
|
60 |
ValueError: if name already exists.
|
61 |
"""
|
62 |
+
# try:
|
63 |
+
if self.get_annotator_by_name(name):
|
64 |
+
raise ValueError(f"name `{name}` already exists.")
|
65 |
|
66 |
+
# ------------------ HASH PASSWORD ------------------ #
|
67 |
+
hashed_pass = hash_password(password)
|
68 |
|
69 |
+
annotator = Annotator(
|
70 |
+
name=name,
|
71 |
+
password=hashed_pass,
|
72 |
+
is_active=is_active,
|
73 |
+
)
|
74 |
+
self.db.add(annotator)
|
75 |
+
self.db.flush() # Ensure PK generated
|
76 |
+
self.db.refresh(annotator)
|
77 |
|
78 |
+
log.info(f"New annotator created <id={annotator.id} name={name}>")
|
79 |
+
return annotator
|
80 |
+
# except Exception as exc:
|
81 |
+
# self.db.rollback()
|
82 |
+
# log.error(f"Unable to create annotator `{name}` : {exc}")
|
83 |
+
# sentry_sdk.capture_exception(exc)
|
84 |
+
# raise
|
requirements.txt
CHANGED
@@ -10,4 +10,5 @@ bcrypt
|
|
10 |
google-api-python-client
|
11 |
pydub
|
12 |
numpy
|
13 |
-
requests
|
|
|
|
10 |
google-api-python-client
|
11 |
pydub
|
12 |
numpy
|
13 |
+
requests
|
14 |
+
sentry-sdk
|
utils/database.py
CHANGED
@@ -5,6 +5,7 @@ from sqlalchemy.orm import sessionmaker
|
|
5 |
from contextlib import contextmanager
|
6 |
import sys # Add sys import
|
7 |
import os # Add os import
|
|
|
8 |
|
9 |
# Add project root to Python path to ensure local modules are prioritized
|
10 |
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
@@ -20,18 +21,20 @@ log = Logger()
|
|
20 |
|
21 |
def get_db_engine():
|
22 |
"""Create DB engine with error handling for HF Spaces"""
|
23 |
-
try:
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
except Exception as e:
|
34 |
-
|
|
|
|
|
35 |
|
36 |
|
37 |
engine = get_db_engine()
|
@@ -42,23 +45,23 @@ SessionLocal = sessionmaker(bind=engine)
|
|
42 |
def get_db():
|
43 |
"""Session manager for HF Spaces"""
|
44 |
db = SessionLocal()
|
45 |
-
try:
|
46 |
-
|
47 |
-
|
48 |
-
except Exception as e:
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
finally:
|
53 |
-
|
54 |
|
55 |
|
56 |
def initialize_database():
|
57 |
"""Initialize tables with HF Spaces compatibility"""
|
58 |
-
try:
|
59 |
-
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
except Exception as e:
|
64 |
-
|
|
|
5 |
from contextlib import contextmanager
|
6 |
import sys # Add sys import
|
7 |
import os # Add os import
|
8 |
+
import sentry_sdk
|
9 |
|
10 |
# Add project root to Python path to ensure local modules are prioritized
|
11 |
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
21 |
|
22 |
def get_db_engine():
|
23 |
"""Create DB engine with error handling for HF Spaces"""
|
24 |
+
# try:
|
25 |
+
engine = create_engine(
|
26 |
+
conf.db_url,
|
27 |
+
pool_pre_ping=True,
|
28 |
+
pool_size=5,
|
29 |
+
max_overflow=10,
|
30 |
+
connect_args={"connect_timeout": 10},
|
31 |
+
)
|
32 |
+
log.info("Database engine created successfully")
|
33 |
+
return engine
|
34 |
+
# except Exception as e:
|
35 |
+
# log.error(f"Failed to create database engine: {e}")
|
36 |
+
# sentry_sdk.capture_exception(e)
|
37 |
+
# raise
|
38 |
|
39 |
|
40 |
engine = get_db_engine()
|
|
|
45 |
def get_db():
|
46 |
"""Session manager for HF Spaces"""
|
47 |
db = SessionLocal()
|
48 |
+
# try:
|
49 |
+
yield db
|
50 |
+
db.commit()
|
51 |
+
# except Exception as e:
|
52 |
+
# db.rollback()
|
53 |
+
# log.error(f"Database error: {e}")
|
54 |
+
# raise
|
55 |
+
# finally:
|
56 |
+
db.close()
|
57 |
|
58 |
|
59 |
def initialize_database():
|
60 |
"""Initialize tables with HF Spaces compatibility"""
|
61 |
+
# try:
|
62 |
+
from data.models import Base
|
63 |
|
64 |
+
Base.metadata.create_all(bind=engine)
|
65 |
+
log.info("Tables created successfully")
|
66 |
+
# except Exception as e:
|
67 |
+
# log.error(f"Table creation failed: {e}")
|
utils/logger.py
CHANGED
@@ -1,9 +1,17 @@
|
|
1 |
from datetime import datetime
|
|
|
2 |
|
3 |
class Logger:
|
4 |
def _log(self, level, message):
|
5 |
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
6 |
print(f"[{now}] [{level.upper()}] {message}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def info(self, message):
|
9 |
self._log('info', message)
|
@@ -12,4 +20,26 @@ class Logger:
|
|
12 |
self._log('warning', message)
|
13 |
|
14 |
def error(self, message):
|
15 |
-
self._log('error', message)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from datetime import datetime
|
2 |
+
import sentry_sdk
|
3 |
|
4 |
class Logger:
|
5 |
def _log(self, level, message):
|
6 |
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
7 |
print(f"[{now}] [{level.upper()}] {message}")
|
8 |
+
|
9 |
+
# Send to Sentry based on level
|
10 |
+
if level == 'error':
|
11 |
+
sentry_sdk.capture_message(message, level='error')
|
12 |
+
elif level == 'warning':
|
13 |
+
sentry_sdk.capture_message(message, level='warning')
|
14 |
+
# Info messages are not sent to Sentry to avoid noise
|
15 |
|
16 |
def info(self, message):
|
17 |
self._log('info', message)
|
|
|
20 |
self._log('warning', message)
|
21 |
|
22 |
def error(self, message):
|
23 |
+
self._log('error', message)
|
24 |
+
|
25 |
+
def exception(self, message, exc_info=None):
|
26 |
+
"""Log an exception with full traceback"""
|
27 |
+
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
28 |
+
print(f"[{now}] [ERROR] {message}")
|
29 |
+
|
30 |
+
# Capture exception in Sentry
|
31 |
+
if exc_info:
|
32 |
+
sentry_sdk.capture_exception(exc_info)
|
33 |
+
else:
|
34 |
+
sentry_sdk.capture_exception()
|
35 |
+
|
36 |
+
def capture_user_action(self, action, user_id=None, **kwargs):
|
37 |
+
"""Capture user actions for analytics"""
|
38 |
+
# try:
|
39 |
+
from utils.sentry_integration import capture_annotation_event
|
40 |
+
capture_annotation_event(action, user_id=user_id, **kwargs)
|
41 |
+
# except ImportError:
|
42 |
+
# # Fallback if sentry_integration is not available
|
43 |
+
# self.info(f"User action: {action} (user_id: {user_id})")
|
44 |
+
# except Exception as e:
|
45 |
+
# self.error(f"Failed to capture user action: {e}")
|
utils/security.py
CHANGED
@@ -8,22 +8,22 @@ def hash_password(plain_password: str) -> str:
|
|
8 |
"""
|
9 |
Hash a plaintext password using bcrypt.
|
10 |
"""
|
11 |
-
try:
|
12 |
-
|
13 |
-
|
14 |
-
except Exception as exc:
|
15 |
-
|
16 |
-
|
17 |
|
18 |
|
19 |
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
20 |
"""
|
21 |
Verify a plaintext password against its bcrypt hash.
|
22 |
"""
|
23 |
-
try:
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
except Exception as exc:
|
28 |
-
|
29 |
-
|
|
|
8 |
"""
|
9 |
Hash a plaintext password using bcrypt.
|
10 |
"""
|
11 |
+
# try:
|
12 |
+
hashed = bcrypt.hashpw(plain_password.encode("utf-8"), bcrypt.gensalt())
|
13 |
+
return hashed.decode("utf-8")
|
14 |
+
# except Exception as exc:
|
15 |
+
# log.error(f"Password hashing failed: {exc}")
|
16 |
+
# raise
|
17 |
|
18 |
|
19 |
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
20 |
"""
|
21 |
Verify a plaintext password against its bcrypt hash.
|
22 |
"""
|
23 |
+
# try:
|
24 |
+
return bcrypt.checkpw(
|
25 |
+
plain_password.encode("utf-8"), hashed_password.encode("utf-8")
|
26 |
+
)
|
27 |
+
# except Exception as exc:
|
28 |
+
# log.error(f"Password verification failed: {exc}")
|
29 |
+
# return False
|
utils/sentry_integration.py
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# utils/sentry_integration.py
|
2 |
+
|
3 |
+
import os
|
4 |
+
import sentry_sdk
|
5 |
+
from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
|
6 |
+
from sentry_sdk.integrations.logging import LoggingIntegration
|
7 |
+
from sentry_sdk.integrations.threading import ThreadingIntegration
|
8 |
+
from utils.logger import Logger
|
9 |
+
|
10 |
+
log = Logger()
|
11 |
+
|
12 |
+
def initialize_sentry():
|
13 |
+
"""
|
14 |
+
Initialize Sentry for error tracking and performance monitoring
|
15 |
+
"""
|
16 |
+
sentry_dsn = os.environ.get("SENTRY_DSN")
|
17 |
+
|
18 |
+
if not sentry_dsn:
|
19 |
+
log.info("Sentry DSN not configured, skipping Sentry initialization")
|
20 |
+
return False
|
21 |
+
|
22 |
+
# try:
|
23 |
+
# Environment configuration
|
24 |
+
environment = os.environ.get("SENTRY_ENVIRONMENT", "development")
|
25 |
+
traces_sample_rate = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.1"))
|
26 |
+
profiles_sample_rate = float(os.environ.get("SENTRY_PROFILES_SAMPLE_RATE", "0.1"))
|
27 |
+
|
28 |
+
# Logging integration - capture INFO and above
|
29 |
+
logging_integration = LoggingIntegration(
|
30 |
+
level=None, # Don't capture logs below this level
|
31 |
+
event_level=None # Send logs as events above this level
|
32 |
+
)
|
33 |
+
|
34 |
+
# SQLAlchemy integration for database monitoring
|
35 |
+
sqlalchemy_integration = SqlalchemyIntegration()
|
36 |
+
|
37 |
+
# Threading integration for multi-threaded apps
|
38 |
+
threading_integration = ThreadingIntegration(propagate_hub=True)
|
39 |
+
|
40 |
+
sentry_sdk.init(
|
41 |
+
dsn=sentry_dsn,
|
42 |
+
environment=environment,
|
43 |
+
traces_sample_rate=traces_sample_rate,
|
44 |
+
profiles_sample_rate=profiles_sample_rate,
|
45 |
+
integrations=[
|
46 |
+
logging_integration,
|
47 |
+
sqlalchemy_integration,
|
48 |
+
threading_integration,
|
49 |
+
],
|
50 |
+
# Additional configuration
|
51 |
+
send_default_pii=True, # Don't send personally identifiable information
|
52 |
+
attach_stacktrace=True, # Attach stack traces to messages
|
53 |
+
before_send=before_send_filter, # Custom filter function
|
54 |
+
release=get_app_version(), # App version for release tracking
|
55 |
+
)
|
56 |
+
|
57 |
+
log.info(f"Sentry initialized successfully for environment: {environment}")
|
58 |
+
return True
|
59 |
+
|
60 |
+
# except Exception as e:
|
61 |
+
# log.error(f"Failed to initialize Sentry: {e}")
|
62 |
+
# return False
|
63 |
+
|
64 |
+
def before_send_filter(event, hint):
|
65 |
+
"""
|
66 |
+
Filter function to modify or drop events before sending to Sentry
|
67 |
+
"""
|
68 |
+
# Don't send events for certain error types
|
69 |
+
if 'exc_info' in hint:
|
70 |
+
exc_type, exc_value, tb = hint['exc_info']
|
71 |
+
|
72 |
+
# Skip common/expected errors
|
73 |
+
if isinstance(exc_value, (KeyboardInterrupt, SystemExit)):
|
74 |
+
return None
|
75 |
+
|
76 |
+
# Skip database connection timeouts in development
|
77 |
+
if "connect_timeout" in str(exc_value) and os.environ.get("SENTRY_ENVIRONMENT") == "development":
|
78 |
+
return None
|
79 |
+
|
80 |
+
# Add custom tags
|
81 |
+
event.setdefault('tags', {})
|
82 |
+
event['tags']['component'] = 'tts_labeling'
|
83 |
+
|
84 |
+
# Add user context if available (without PII)
|
85 |
+
if 'user' not in event:
|
86 |
+
event['user'] = {
|
87 |
+
'id': 'anonymous', # Don't use real user IDs
|
88 |
+
}
|
89 |
+
|
90 |
+
return event
|
91 |
+
|
92 |
+
def get_app_version():
|
93 |
+
"""
|
94 |
+
Get the application version for release tracking
|
95 |
+
"""
|
96 |
+
# try:
|
97 |
+
# Try to get version from git
|
98 |
+
import subprocess
|
99 |
+
result = subprocess.run(
|
100 |
+
['git', 'rev-parse', '--short', 'HEAD'],
|
101 |
+
capture_output=True,
|
102 |
+
text=True,
|
103 |
+
cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
104 |
+
)
|
105 |
+
if result.returncode == 0:
|
106 |
+
return result.stdout.strip()
|
107 |
+
# except:
|
108 |
+
# pass
|
109 |
+
|
110 |
+
# Fallback to a default version
|
111 |
+
return "unknown"
|
112 |
+
|
113 |
+
def capture_custom_event(message, level="info", extra=None, tags=None):
|
114 |
+
"""
|
115 |
+
Capture custom events to Sentry with structured data
|
116 |
+
|
117 |
+
Args:
|
118 |
+
message (str): Event message
|
119 |
+
level (str): Event level (debug, info, warning, error, fatal)
|
120 |
+
extra (dict): Additional context data
|
121 |
+
tags (dict): Tags for filtering/grouping
|
122 |
+
"""
|
123 |
+
# try:
|
124 |
+
with sentry_sdk.configure_scope() as scope:
|
125 |
+
if extra:
|
126 |
+
for key, value in extra.items():
|
127 |
+
scope.set_extra(key, value)
|
128 |
+
|
129 |
+
if tags:
|
130 |
+
for key, value in tags.items():
|
131 |
+
scope.set_tag(key, value)
|
132 |
+
|
133 |
+
sentry_sdk.capture_message(message, level=level)
|
134 |
+
|
135 |
+
# except Exception as e:
|
136 |
+
# log.error(f"Failed to capture custom Sentry event: {e}")
|
137 |
+
|
138 |
+
def capture_annotation_event(action, user_id=None, annotation_id=None, tts_id=None, success=True):
|
139 |
+
"""
|
140 |
+
Capture annotation-specific events for analytics
|
141 |
+
|
142 |
+
Args:
|
143 |
+
action (str): Action performed (create, update, delete, review_approve, review_reject)
|
144 |
+
user_id (int): User ID (anonymized)
|
145 |
+
annotation_id (int): Annotation ID
|
146 |
+
tts_id (int): TTS data ID
|
147 |
+
success (bool): Whether the action was successful
|
148 |
+
"""
|
149 |
+
# try:
|
150 |
+
tags = {
|
151 |
+
'action_type': action,
|
152 |
+
'success': str(success),
|
153 |
+
'component': 'annotation'
|
154 |
+
}
|
155 |
+
|
156 |
+
extra = {}
|
157 |
+
if annotation_id:
|
158 |
+
extra['annotation_id'] = annotation_id
|
159 |
+
if tts_id:
|
160 |
+
extra['tts_id'] = tts_id
|
161 |
+
|
162 |
+
# Anonymize user ID for privacy
|
163 |
+
if user_id:
|
164 |
+
extra['user_hash'] = str(hash(str(user_id))) # Simple hash for privacy
|
165 |
+
|
166 |
+
message = f"Annotation {action}: {'success' if success else 'failed'}"
|
167 |
+
|
168 |
+
capture_custom_event(
|
169 |
+
message=message,
|
170 |
+
level="info" if success else "warning",
|
171 |
+
extra=extra,
|
172 |
+
tags=tags
|
173 |
+
)
|
174 |
+
|
175 |
+
# except Exception as e:
|
176 |
+
# log.error(f"Failed to capture annotation event: {e}")
|
177 |
+
|
178 |
+
def capture_database_performance(operation, duration, table=None, success=True):
|
179 |
+
"""
|
180 |
+
Capture database performance metrics
|
181 |
+
|
182 |
+
Args:
|
183 |
+
operation (str): Database operation (select, insert, update, delete)
|
184 |
+
duration (float): Operation duration in seconds
|
185 |
+
table (str): Table name
|
186 |
+
success (bool): Whether operation was successful
|
187 |
+
"""
|
188 |
+
# try:
|
189 |
+
tags = {
|
190 |
+
'db_operation': operation,
|
191 |
+
'success': str(success),
|
192 |
+
'component': 'database'
|
193 |
+
}
|
194 |
+
|
195 |
+
if table:
|
196 |
+
tags['table'] = table
|
197 |
+
|
198 |
+
extra = {
|
199 |
+
'duration_seconds': duration,
|
200 |
+
'performance_category': 'slow' if duration > 2.0 else 'normal'
|
201 |
+
}
|
202 |
+
|
203 |
+
level = "warning" if duration > 5.0 else "info"
|
204 |
+
message = f"Database {operation} took {duration:.2f}s"
|
205 |
+
|
206 |
+
capture_custom_event(
|
207 |
+
message=message,
|
208 |
+
level=level,
|
209 |
+
extra=extra,
|
210 |
+
tags=tags
|
211 |
+
)
|
212 |
+
|
213 |
+
# except Exception as e:
|
214 |
+
# log.error(f"Failed to capture database performance event: {e}")
|
215 |
+
|
216 |
+
def capture_user_activity(activity, user_id=None, session_duration=None, items_processed=None):
|
217 |
+
"""
|
218 |
+
Capture user activity metrics
|
219 |
+
|
220 |
+
Args:
|
221 |
+
activity (str): Activity type (login, logout, annotation_session)
|
222 |
+
user_id (int): User ID (will be anonymized)
|
223 |
+
session_duration (float): Session duration in seconds
|
224 |
+
items_processed (int): Number of items processed
|
225 |
+
"""
|
226 |
+
# try:
|
227 |
+
tags = {
|
228 |
+
'activity_type': activity,
|
229 |
+
'component': 'user_activity'
|
230 |
+
}
|
231 |
+
|
232 |
+
extra = {}
|
233 |
+
if session_duration:
|
234 |
+
extra['session_duration_seconds'] = session_duration
|
235 |
+
if items_processed:
|
236 |
+
extra['items_processed'] = items_processed
|
237 |
+
|
238 |
+
# Anonymize user ID
|
239 |
+
if user_id:
|
240 |
+
extra['user_hash'] = str(hash(str(user_id)))
|
241 |
+
|
242 |
+
message = f"User {activity}"
|
243 |
+
|
244 |
+
capture_custom_event(
|
245 |
+
message=message,
|
246 |
+
level="info",
|
247 |
+
extra=extra,
|
248 |
+
tags=tags
|
249 |
+
)
|
250 |
+
|
251 |
+
# except Exception as e:
|
252 |
+
# log.error(f"Failed to capture user activity event: {e}")
|
253 |
+
|
254 |
+
# Context manager for capturing performance
|
255 |
+
class SentryPerformanceMonitor:
|
256 |
+
"""
|
257 |
+
Context manager for monitoring operation performance
|
258 |
+
"""
|
259 |
+
|
260 |
+
def __init__(self, operation_name, tags=None):
|
261 |
+
self.operation_name = operation_name
|
262 |
+
self.tags = tags or {}
|
263 |
+
self.start_time = None
|
264 |
+
|
265 |
+
def __enter__(self):
|
266 |
+
import time
|
267 |
+
self.start_time = time.time()
|
268 |
+
return self
|
269 |
+
|
270 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
271 |
+
import time
|
272 |
+
duration = time.time() - self.start_time
|
273 |
+
|
274 |
+
success = exc_type is None
|
275 |
+
|
276 |
+
# Update tags
|
277 |
+
self.tags.update({
|
278 |
+
'operation': self.operation_name,
|
279 |
+
'success': str(success)
|
280 |
+
})
|
281 |
+
|
282 |
+
extra = {
|
283 |
+
'duration_seconds': duration,
|
284 |
+
'operation_name': self.operation_name
|
285 |
+
}
|
286 |
+
|
287 |
+
level = "error" if not success else ("warning" if duration > 5.0 else "info")
|
288 |
+
message = f"Operation '{self.operation_name}' completed in {duration:.2f}s"
|
289 |
+
|
290 |
+
capture_custom_event(
|
291 |
+
message=message,
|
292 |
+
level=level,
|
293 |
+
extra=extra,
|
294 |
+
tags=self.tags
|
295 |
+
)
|
296 |
+
|
297 |
+
# Don't suppress exceptions
|
298 |
+
return False
|