vargha commited on
Commit
f7ef7d3
·
1 Parent(s): 3c835a7

alligned interface and data import scripts

Browse files
components/dashboard_page.py CHANGED
@@ -1,16 +1,17 @@
1
  import gradio as gr
2
  import numpy as np
3
  import datetime
4
- from sqlalchemy import orm
5
 
6
  from components.header import Header
7
- from utils.logger import Logger
8
- from utils.gdrive_downloader import PublicFolderAudioLoader # Assuming LOADER uses this
9
  from config import conf
10
- from utils.database import get_db # For DB operations
11
- from data.models import Annotation, AudioTrim, TTSData # Import your models
 
12
 
13
- log = Logger()
14
  LOADER = PublicFolderAudioLoader(conf.GDRIVE_API_KEY)
15
  GDRIVE_FOLDER = conf.GDRIVE_FOLDER
16
 
@@ -18,70 +19,108 @@ GDRIVE_FOLDER = conf.GDRIVE_FOLDER
18
  class DashboardPage:
19
  def __init__(self) -> None:
20
  with gr.Column(visible=False) as self.container:
21
- self.header = Header()
22
 
23
  with gr.Row():
24
- # ستون چپ
25
  with gr.Column(scale=3):
26
  with gr.Row():
27
- self.tts_id = gr.Textbox(label="ID", interactive=False)
28
- self.filename = gr.Textbox(label="Filename", interactive=False)
29
- with gr.Row():
30
- self.sentence = gr.Textbox(
31
- label="Sentence", interactive=False, max_lines=5, rtl=True
32
- )
33
- self.btn_copy = gr.Button("📋 Copy", interactive=True)
34
- with gr.Row():
35
- self.ann_sentence = gr.Textbox(
36
- label="Annotated Sentence",
37
- interactive=True,
38
- max_lines=5,
39
- rtl=True,
40
- )
41
- self.btn_paste = gr.Button("📥 Paste", interactive=True)
42
- with gr.Row():
43
- self.validated = gr.Checkbox(
44
- label="Validated", interactive=True
45
- )
46
  with gr.Row():
47
- self.btn_prev = gr.Button("⬅️ Previous", interactive=True)
48
- self.btn_next = gr.Button("Next ➡️", interactive=True)
49
- self.btn_delete = gr.Button("🗑️ Delete", interactive=True)
 
 
 
 
 
 
 
 
 
 
50
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  self.jump_data_id_input = gr.Number(
52
- label="Jump to Data ID", value=0, precision=0, interactive=True
53
- )
54
- self.btn_jump = gr.Button("Go", interactive=True)
55
- with gr.Row():
56
- self.trim_start_sec = gr.Number(
57
- label="Trim Start (s)", value=0.0, precision=3, interactive=True
58
- )
59
- self.trim_end_sec = gr.Number(
60
- label="Trim End (s)", value=0.0, precision=3, interactive=True
61
  )
62
- self.btn_trim = gr.Button("✂️ Trim", interactive=True)
63
- self.btn_undo_trim = gr.Button("↩️ Undo Trim", interactive=True)
64
 
65
- # ستون راست
66
  with gr.Column(scale=2):
67
- self.btn_load_voice = gr.Button("Load Audio", interactive=True)
68
  self.audio = gr.Audio(
69
  label="🔊 Audio", interactive=False, autoplay=True
70
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- # stateها
73
  self.items_state = gr.State([])
74
  self.idx_state = gr.State(0)
75
- self.clipboard_state = gr.State("")
76
  self.original_audio_state = gr.State(None)
77
- self.current_trim_params = gr.State(None)
78
 
79
  # List of all interactive UI elements for enabling/disabling
80
  self.interactive_ui_elements = [
81
- self.btn_prev, self.btn_next, self.btn_delete, self.btn_jump,
 
82
  self.jump_data_id_input, self.trim_start_sec, self.trim_end_sec,
83
  self.btn_trim, self.btn_undo_trim, self.btn_load_voice,
84
- self.ann_sentence, self.validated, self.btn_copy, self.btn_paste
85
  ]
86
 
87
  # ---------------- wiring ---------------- #
@@ -90,151 +129,184 @@ class DashboardPage:
90
  ):
91
  self.header.register_callbacks(login_page, self, session_state)
92
 
93
- # Helper function to update UI interactive state
94
  def update_ui_interactive_state(is_interactive: bool):
95
  updates = []
96
  for elem in self.interactive_ui_elements:
97
  if elem == self.btn_load_voice and not is_interactive:
98
- updates.append(gr.update(value="⏳ Loading...", interactive=False))
99
  elif elem == self.btn_load_voice and is_interactive:
100
- updates.append(gr.update(value="Load Audio", interactive=True))
 
 
 
 
 
101
  else:
102
  updates.append(gr.update(interactive=is_interactive))
103
  return updates
104
 
105
- # ---- All Helper Functions ----
106
- def apply_loaded_trim_fn(audio_data_as_loaded, trim_params_from_state, original_audio_for_state):
107
- """
108
- Applies trim if trim_params_from_state are available to the audio_data_as_loaded.
109
- This is used after loading an item and its original audio.
110
- original_audio_for_state is preserved as the true original.
111
- """
112
- if audio_data_as_loaded and trim_params_from_state:
113
- sr, wav = audio_data_as_loaded
114
- start = trim_params_from_state.get("start")
115
- end = trim_params_from_state.get("end")
116
- operation = trim_params_from_state.get("operation")
117
-
118
- if operation == "delete" and start is not None and end is not None and end > start and start >= 0:
119
- start_sample = int(sr * start / 1000.0)
120
- end_sample = int(sr * end / 1000.0)
121
-
122
- audio_duration_samples = len(wav)
123
- start_sample = max(0, min(start_sample, audio_duration_samples))
124
- end_sample = max(start_sample, min(end_sample, audio_duration_samples))
125
-
126
- if start_sample == 0 and end_sample == audio_duration_samples:
127
- log.info(f"Applying saved trim: delete entire audio from {start}ms to {end}ms. Resulting in empty audio.")
128
- return (sr, np.array([], dtype=wav.dtype)), original_audio_for_state
129
 
130
- part1 = wav[:start_sample]
131
- part2 = wav[end_sample:]
132
- deleted_segment_wav = np.concatenate((part1, part2))
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- log.info(f"Applied saved trim (delete operation): {start}ms to {end}ms. Original shape: {wav.shape}, New shape: {deleted_segment_wav.shape}")
135
- return (sr, deleted_segment_wav), original_audio_for_state
136
- else:
137
- if operation != "delete":
138
- log.warning("Saved trim parameters do not specify a 'delete' operation. Using original audio.")
139
- else:
140
- log.warning("Invalid saved trim parameters for delete operation. Using original audio.")
141
- return audio_data_as_loaded, original_audio_for_state
142
- return audio_data_as_loaded, original_audio_for_state
 
 
 
 
143
 
144
- def download_voice_fn(folder_link, filename_to_load):
145
  if not filename_to_load:
146
- return None, None
147
  try:
148
- log.info(f"Downloading voice: {filename_to_load}")
149
  sr, wav = LOADER.load_audio(folder_link, filename_to_load)
150
- return (sr, wav), (sr, wav.copy())
151
  except Exception as e:
152
- log.error(f"GDrive download failed for {filename_to_load}: {e}")
153
  gr.Error(f"Failed to load audio: {filename_to_load}. Error: {e}")
154
- return None, None
155
 
156
- def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, is_validated_ui, active_trim_params):
157
  annotator_id = session.get("user_id")
158
  if not current_tts_id or not annotator_id:
159
  gr.Error("Cannot save: Missing TTS ID or User ID.")
160
- return False
161
- validated_to_save = bool(is_validated_ui)
162
  with get_db() as db:
163
  try:
164
  annotation_obj = db.query(Annotation).filter_by(
165
  tts_data_id=current_tts_id, annotator_id=annotator_id
166
- ).first()
 
167
  if not annotation_obj:
168
  annotation_obj = Annotation(
169
  tts_data_id=current_tts_id, annotator_id=annotator_id
170
  )
171
  db.add(annotation_obj)
 
172
  annotation_obj.annotated_sentence = ann_text_to_save
173
- annotation_obj.validated = validated_to_save
174
  annotation_obj.annotated_at = datetime.datetime.utcnow()
175
- if active_trim_params and active_trim_params.get("operation") == "delete" and active_trim_params.get("start") is not None:
176
- start_to_save = active_trim_params["start"]
177
- end_to_save = active_trim_params["end"]
178
- if not annotation_obj.audio_trim:
 
 
 
 
 
 
 
 
179
  db.flush()
180
- if annotation_obj.id is None:
181
- gr.Error("Failed to get annotation ID for saving trim.")
182
- db.rollback()
183
- return False
184
- new_trim = AudioTrim(
 
 
 
 
 
185
  annotation_id=annotation_obj.id,
186
- original_tts_data_id=current_tts_id,
187
- start=start_to_save,
188
- end=end_to_save,
189
  )
190
- annotation_obj.audio_trim = new_trim
191
- else:
192
- annotation_obj.audio_trim.start = start_to_save
193
- annotation_obj.audio_trim.end = end_to_save
194
- elif annotation_obj.audio_trim:
195
- db.delete(annotation_obj.audio_trim)
196
- annotation_obj.audio_trim = None
197
  db.commit()
198
  gr.Info(f"Annotation for ID {current_tts_id} saved.")
199
- return validated_to_save
200
  except Exception as e:
201
  db.rollback()
202
- log.error(f"Failed to save annotation for {current_tts_id}: {e}")
203
  gr.Error(f"Save failed: {e}")
204
- return False
205
 
206
  def show_current_item_fn(items, idx, session):
207
- if not items or idx >= len(items):
208
- return "", "", "", "", False, None, 0.0, 0.0, None
 
 
 
 
 
 
 
 
209
  current_item = items[idx]
210
  tts_data_id = current_item.get("id")
211
  annotator_id = session.get("user_id")
212
- ann_text, is_validated, trim_params_for_ui = "", False, None
213
- start_sec_ui, end_sec_ui = 0.0, 0.0
214
  if tts_data_id and annotator_id:
215
  with get_db() as db:
216
  try:
217
  existing_annotation = db.query(Annotation).filter_by(
218
  tts_data_id=tts_data_id, annotator_id=annotator_id
219
- ).options(orm.joinedload(Annotation.audio_trim)).first() # Eager load audio_trim
220
  if existing_annotation:
221
  ann_text = existing_annotation.annotated_sentence or ""
222
- is_validated = existing_annotation.validated
223
- if existing_annotation.audio_trim:
224
- trim_params_for_ui = {
225
- "start": existing_annotation.audio_trim.start,
226
- "end": existing_annotation.audio_trim.end,
227
- "operation": "delete"
228
- }
229
- start_sec_ui = existing_annotation.audio_trim.start / 1000.0
230
- end_sec_ui = existing_annotation.audio_trim.end / 1000.0
231
  except Exception as e:
232
- log.error(f"Database error in show_current_item_fn for TTS ID {tts_data_id}: {e}")
233
  gr.Error(f"Error loading annotation details: {e}")
 
234
  return (
235
  current_item.get("id", ""), current_item.get("filename", ""),
236
- current_item.get("sentence", ""), ann_text, is_validated, None,
237
- start_sec_ui, end_sec_ui, trim_params_for_ui
 
 
 
 
238
  )
239
 
240
  def navigate_idx_fn(items, current_idx, direction):
@@ -243,9 +315,65 @@ class DashboardPage:
243
  return new_idx
244
 
245
  def load_all_items_fn(sess):
246
- items = sess.get("dashboard_items", [])
247
- initial_ui_values = show_current_item_fn(items, 0, sess)
248
- return items, 0, *initial_ui_values
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
  def jump_by_data_id_fn(items, target_data_id_str, current_idx):
251
  if not target_data_id_str: return current_idx
@@ -253,209 +381,348 @@ class DashboardPage:
253
  target_id = int(target_data_id_str)
254
  for i, item_dict in enumerate(items):
255
  if item_dict.get("id") == target_id: return i
256
- gr.Warning(f"Data ID {target_id} not found.")
257
  except ValueError:
258
  gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
259
  return current_idx
260
 
261
- def perform_trim_fn(original_audio_data, start_sec, end_sec, current_audio_for_fallback):
262
- log.info(f"perform_trim_fn called with start_sec: {start_sec}, end_sec: {end_sec}")
263
- if original_audio_data is None:
264
- gr.Warning("No original audio loaded. Cannot perform new trim.")
265
- return current_audio_for_fallback, None
266
- if start_sec is None or end_sec is None or start_sec < 0 or end_sec <= start_sec:
267
- gr.Warning("Invalid trim times. Start must be >= 0 and End > Start.")
268
- return original_audio_data, None
269
- try:
270
- sr, wav = original_audio_data
271
- start_sample, end_sample = int(sr * start_sec), int(sr * end_sec)
272
- audio_duration_samples = len(wav)
273
- start_sample = max(0, min(start_sample, audio_duration_samples))
274
- end_sample = max(start_sample, min(end_sample, audio_duration_samples))
275
- trimmed_wav = np.concatenate((wav[:start_sample], wav[end_sample:]))
276
- active_trim_params = {"start": start_sec * 1000.0, "end": end_sec * 1000.0, "operation": "delete"}
277
- log.info(f"Audio segment deleted. New shape: {trimmed_wav.shape}")
278
- if trimmed_wav.size == 0: gr.Warning("Trim resulted in empty audio.")
279
- return (sr, trimmed_wav), active_trim_params
280
- except Exception as e:
281
- log.error(f"Error during audio trimming: {e}")
282
- gr.Error(f"Failed to trim audio: {e}")
283
- return original_audio_data, None
284
-
285
- def delete_db_and_ui_fn(items, current_idx, session):
286
- item_info = items[current_idx]
287
- tts_data_id_to_delete = item_info.get("id")
288
- annotator_id_for_delete = session.get("user_id")
289
- if tts_data_id_to_delete and annotator_id_for_delete:
 
 
 
 
 
 
 
 
 
 
 
290
  with get_db() as db:
291
  try:
292
  annotation_obj = db.query(Annotation).filter_by(
293
- tts_data_id=tts_data_id_to_delete, annotator_id=annotator_id_for_delete
294
- ).first()
295
  if annotation_obj:
296
- db.delete(annotation_obj) # Cascade should handle AudioTrim
 
 
 
 
 
 
 
297
  db.commit()
298
- gr.Info(f"Annotation for ID {tts_data_id_to_delete} deleted.")
299
  else:
300
- gr.Warning(f"No annotation found to delete for ID {tts_data_id_to_delete}.")
301
  except Exception as e:
302
  db.rollback()
303
- log.error(f"Error deleting annotation {tts_data_id_to_delete}: {e}")
304
- gr.Error(f"Failed to delete annotation: {e}")
305
  else:
306
- gr.Error("Cannot delete: Missing TTS ID or User ID.")
307
- refreshed_ui_values = show_current_item_fn(items, current_idx, session)
308
- return items, current_idx, *refreshed_ui_values
309
 
310
- # ---- Callback Implementations ----
311
- outputs_for_show_current = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  self.tts_id, self.filename, self.sentence, self.ann_sentence,
313
- self.validated, self.audio, self.trim_start_sec,
314
- self.trim_end_sec, self.current_trim_params,
 
 
 
315
  ]
316
-
317
  # Initial Load
 
 
 
 
318
  root_blocks.load(
319
- fn=lambda: update_ui_interactive_state(False),
320
  outputs=self.interactive_ui_elements
321
  ).then(
322
- fn=load_all_items_fn,
323
  inputs=[session_state],
324
- outputs=[self.items_state, self.idx_state] + outputs_for_show_current,
 
 
 
325
  ).then(
326
- fn=download_voice_fn,
327
- inputs=[gr.State(GDRIVE_FOLDER), self.filename],
328
- outputs=[self.audio, self.original_audio_state],
 
 
329
  ).then(
330
- fn=apply_loaded_trim_fn,
331
- inputs=[self.audio, self.current_trim_params, self.original_audio_state],
332
- outputs=[self.audio, self.original_audio_state]
333
- ).then(
334
- fn=lambda: update_ui_interactive_state(True),
335
  outputs=self.interactive_ui_elements
336
  )
337
 
338
- # Navigation (Prev/Next)
339
- for btn_widget, direction_str in [
340
- (self.btn_prev, "prev"), (self.btn_next, "next"),
 
 
 
341
  ]:
342
  event_chain = btn_widget.click(
343
  fn=lambda: update_ui_interactive_state(False),
344
  outputs=self.interactive_ui_elements
345
  )
346
- if direction_str == "next":
347
  event_chain = event_chain.then(
348
  fn=save_annotation_db_fn,
349
  inputs=[
350
  self.tts_id, session_state, self.ann_sentence,
351
- self.validated, self.current_trim_params,
352
  ],
353
- outputs=[self.validated]
 
 
 
 
354
  )
355
- event_chain.then(
356
- fn=navigate_idx_fn,
 
357
  inputs=[self.items_state, self.idx_state, gr.State(direction_str)],
358
  outputs=self.idx_state,
359
  ).then(
360
  fn=show_current_item_fn,
361
  inputs=[self.items_state, self.idx_state, session_state],
362
- outputs=outputs_for_show_current,
363
  ).then(
364
- fn=download_voice_fn,
365
- inputs=[gr.State(GDRIVE_FOLDER), self.filename],
366
- outputs=[self.audio, self.original_audio_state],
367
  ).then(
368
- fn=apply_loaded_trim_fn,
369
- inputs=[self.audio, self.current_trim_params, self.original_audio_state],
370
- outputs=[self.audio, self.original_audio_state]
371
  ).then(
372
  fn=lambda: update_ui_interactive_state(True),
373
  outputs=self.interactive_ui_elements
374
  )
375
-
376
- # Manual Load Audio Button
377
- self.btn_load_voice.click(
378
- fn=lambda: update_ui_interactive_state(False),
379
- outputs=self.interactive_ui_elements
380
- ).then(
381
- fn=download_voice_fn,
382
- inputs=[gr.State(GDRIVE_FOLDER), self.filename],
383
- outputs=[self.audio, self.original_audio_state],
384
- ).then(
385
- fn=apply_loaded_trim_fn,
386
- inputs=[self.audio, self.current_trim_params, self.original_audio_state],
387
- outputs=[self.audio, self.original_audio_state]
388
- ).then(
389
- fn=lambda: update_ui_interactive_state(True),
390
- outputs=self.interactive_ui_elements
391
- )
392
-
393
- # Copy/Paste (Quick operations, no UI disable needed)
394
- self.btn_copy.click(fn=lambda x: x, inputs=self.sentence, outputs=self.clipboard_state)
395
- self.btn_paste.click(fn=lambda x: x, inputs=self.clipboard_state, outputs=self.ann_sentence)
396
-
397
- # Jump to Data ID
398
- self.btn_jump.click(
399
  fn=lambda: update_ui_interactive_state(False),
400
  outputs=self.interactive_ui_elements
401
  ).then(
402
- fn=jump_by_data_id_fn,
403
  inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
404
- outputs=self.idx_state,
405
  ).then(
406
  fn=show_current_item_fn,
407
  inputs=[self.items_state, self.idx_state, session_state],
408
- outputs=outputs_for_show_current,
409
  ).then(
410
- fn=download_voice_fn,
411
- inputs=[gr.State(GDRIVE_FOLDER), self.filename],
412
- outputs=[self.audio, self.original_audio_state],
413
  ).then(
414
- fn=apply_loaded_trim_fn,
415
- inputs=[self.audio, self.current_trim_params, self.original_audio_state],
416
- outputs=[self.audio, self.original_audio_state]
417
  ).then(
418
  fn=lambda: update_ui_interactive_state(True),
419
  outputs=self.interactive_ui_elements
420
  )
421
 
422
- # Trim Audio
423
- self.btn_trim.click(
424
  fn=lambda: update_ui_interactive_state(False),
425
  outputs=self.interactive_ui_elements
426
  ).then(
427
- fn=perform_trim_fn,
428
- inputs=[self.original_audio_state, self.trim_start_sec, self.trim_end_sec, self.audio],
429
- outputs=[self.audio, self.current_trim_params],
 
 
 
 
430
  ).then(
431
  fn=lambda: update_ui_interactive_state(True),
432
  outputs=self.interactive_ui_elements
433
  )
 
 
 
 
 
434
 
435
- # Undo Trim
 
 
 
 
 
 
 
 
 
436
  self.btn_undo_trim.click(
437
- fn=lambda: update_ui_interactive_state(False),
438
- outputs=self.interactive_ui_elements
439
- ).then(
440
- fn=lambda orig_audio: (orig_audio, None, 0.0, 0.0) if orig_audio else (None, None, 0.0, 0.0),
441
- inputs=[self.original_audio_state],
442
- outputs=[self.audio, self.current_trim_params, self.trim_start_sec, self.trim_end_sec],
443
- ).then(
444
- fn=lambda: update_ui_interactive_state(True),
445
- outputs=self.interactive_ui_elements
446
  )
447
 
448
- # Delete Annotation
 
 
 
 
 
449
  self.btn_delete.click(
450
  fn=lambda: update_ui_interactive_state(False),
451
  outputs=self.interactive_ui_elements
452
  ).then(
453
  fn=delete_db_and_ui_fn,
454
- inputs=[self.items_state, self.idx_state, session_state],
455
- outputs=[self.items_state, self.idx_state] + outputs_for_show_current,
456
- ).then(
457
  fn=lambda: update_ui_interactive_state(True),
458
  outputs=self.interactive_ui_elements
459
  )
460
 
461
  return self.container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import datetime
4
+ from sqlalchemy import orm, func # Added func for count
5
 
6
  from components.header import Header
7
+ from utils.logger import Logger # Changed from get_logger to Logger
8
+ from utils.gdrive_downloader import PublicFolderAudioLoader
9
  from config import conf
10
+ from utils.database import get_db
11
+ from data.models import Annotation, AudioTrim, TTSData, AnnotationInterval # Added AnnotationInterval
12
+ from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo # For progress
13
 
14
+ log = Logger() # Changed from get_logger() to Logger()
15
  LOADER = PublicFolderAudioLoader(conf.GDRIVE_API_KEY)
16
  GDRIVE_FOLDER = conf.GDRIVE_FOLDER
17
 
 
19
  class DashboardPage:
20
  def __init__(self) -> None:
21
  with gr.Column(visible=False) as self.container:
22
+ self.header = Header() # Header now includes progress_display
23
 
24
  with gr.Row():
25
+ # Left Column
26
  with gr.Column(scale=3):
27
  with gr.Row():
28
+ self.tts_id = gr.Textbox(label="ID", interactive=False, scale=1)
29
+ self.filename = gr.Textbox(label="Filename", interactive=False, scale=3)
30
+ self.sentence = gr.Textbox(
31
+ label="Original Sentence", interactive=False, max_lines=5, rtl=True
32
+ )
33
+
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  with gr.Row():
35
+ with gr.Column(scale=1, min_width=10): # Left spacer column
36
+ pass
37
+ self.btn_copy_sentence = gr.Button("📋 Copy to Annotated", min_width=150)
38
+ with gr.Column(scale=1, min_width=10): # Right spacer column
39
+ pass
40
+
41
+ self.ann_sentence = gr.Textbox(
42
+ label="Annotated Sentence",
43
+ interactive=True,
44
+ max_lines=5,
45
+ rtl=True,
46
+ )
47
+
48
  with gr.Row():
49
+ self.btn_prev = gr.Button("⬅️ Previous", min_width=120)
50
+ self.btn_next_no_save = gr.Button("Next ➡️ (No Save)", min_width=150)
51
+ self.btn_save_next = gr.Button("Save & Next ➡️", variant="primary", min_width=120)
52
+
53
+ # Combined row for Delete button and Jump controls
54
+ with gr.Row(): # Removed style argument to fix TypeError
55
+ # Delete button on the left
56
+ self.btn_delete = gr.Button("🗑️ Delete Annotation & Clear Fields", min_width=260)
57
+
58
+ # Spacer column to push jump controls to the right.
59
+ # # This column will expand to fill available space.
60
+ # with gr.Column(scale=1, min_width=10):
61
+ # pass
62
+
63
+ # Jump controls, grouped in a nested Row, appearing on the right.
64
+ # 'scale=0' for this nested Row and its children makes them take minimal/intrinsic space.
65
+ with gr.Row(scale=0, variant='compact'): # Added variant='compact'
66
  self.jump_data_id_input = gr.Number(
67
+ # show_label=False, # Remove label to reduce height
68
+ label="Jump to ID (e.g. 123)", # Use placeholder for instruction
69
+ value=None, # Ensure placeholder shows initially
70
+ precision=0,
71
+ interactive=True,
72
+ min_width=120, # Adjusted for longer placeholder
73
+ # scale=0
 
 
74
  )
75
+ self.btn_jump = gr.Button("Go to data ID", min_width=70) # Compact Go button
76
+ # Removed the old separate rows for delete and jump controls
77
 
78
+ # Right Column
79
  with gr.Column(scale=2):
80
+ self.btn_load_voice = gr.Button("Load Audio (Autoplay)", min_width=150)
81
  self.audio = gr.Audio(
82
  label="🔊 Audio", interactive=False, autoplay=True
83
  )
84
+ with gr.Group(): # Grouping trim controls
85
+ gr.Markdown("### Audio Trimming")
86
+ self.trim_start_sec = gr.Number(
87
+ label="Trim Start (s)",
88
+ value=None, # Ensure placeholder shows
89
+ precision=3,
90
+ interactive=True,
91
+ min_width=150
92
+ )
93
+ self.trim_end_sec = gr.Number(
94
+ label="Trim End (s)",
95
+ value=None, # Ensure placeholder shows
96
+ precision=3,
97
+ interactive=True,
98
+ min_width=150
99
+ )
100
+ with gr.Row():
101
+ self.btn_trim = gr.Button("➕ Add Trim (Delete Segment)", min_width=150)
102
+ self.btn_undo_trim = gr.Button("↩️ Undo Last Trim", min_width=150)
103
+ self.trims_display = gr.DataFrame(
104
+ headers=["Start (s)", "End (s)"],
105
+ col_count=(2, "fixed"),
106
+ interactive=False,
107
+ label="Applied Trims",
108
+ wrap=True
109
+ )
110
 
111
+ # State variables
112
  self.items_state = gr.State([])
113
  self.idx_state = gr.State(0)
 
114
  self.original_audio_state = gr.State(None)
115
+ self.applied_trims_list_state = gr.State([])
116
 
117
  # List of all interactive UI elements for enabling/disabling
118
  self.interactive_ui_elements = [
119
+ self.btn_prev, self.btn_save_next, self.btn_next_no_save,
120
+ self.btn_delete, self.btn_jump,
121
  self.jump_data_id_input, self.trim_start_sec, self.trim_end_sec,
122
  self.btn_trim, self.btn_undo_trim, self.btn_load_voice,
123
+ self.ann_sentence, self.btn_copy_sentence
124
  ]
125
 
126
  # ---------------- wiring ---------------- #
 
129
  ):
130
  self.header.register_callbacks(login_page, self, session_state)
131
 
 
132
  def update_ui_interactive_state(is_interactive: bool):
133
  updates = []
134
  for elem in self.interactive_ui_elements:
135
  if elem == self.btn_load_voice and not is_interactive:
136
+ updates.append(gr.update(value="⏳ Loading Audio...", interactive=False))
137
  elif elem == self.btn_load_voice and is_interactive:
138
+ updates.append(gr.update(value="Load Audio (Autoplay)", interactive=True))
139
+ elif elem == self.btn_save_next and not is_interactive:
140
+ updates.append(gr.update(value="�� Saving...", interactive=False))
141
+ elif elem == self.btn_save_next and is_interactive:
142
+ updates.append(gr.update(value="Save & Next ➡️", interactive=True))
143
+ # Add similar handling for btn_next_no_save if needed for text change during processing
144
  else:
145
  updates.append(gr.update(interactive=is_interactive))
146
  return updates
147
 
148
+ def get_user_progress_fn(session):
149
+ user_id = session.get("user_id")
150
+ if not user_id:
151
+ return "Annotation Progress: N/A" # Added label
152
+ with get_db() as db:
153
+ try:
154
+ # Total items assigned to the user
155
+ total_assigned_query = db.query(func.sum(AnnotationInterval.end_index - AnnotationInterval.start_index + 1)).filter(AnnotationInterval.annotator_id == user_id)
156
+ total_assigned_result = total_assigned_query.scalar()
157
+ total_assigned = total_assigned_result if total_assigned_result is not None else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ # Count of non-empty annotations by this user within their assigned intervals
160
+ completed_count_query = db.query(func.count(Annotation.id)).join(
161
+ TTSData, Annotation.tts_data_id == TTSData.id
162
+ ).join(
163
+ AnnotationInterval,
164
+ (AnnotationInterval.annotator_id == user_id) &
165
+ (TTSData.id >= AnnotationInterval.start_index) &
166
+ (TTSData.id <= AnnotationInterval.end_index)
167
+ ).filter(
168
+ Annotation.annotator_id == user_id,
169
+ Annotation.annotated_sentence != None,
170
+ Annotation.annotated_sentence != ""
171
+ )
172
+ completed_count_result = completed_count_query.scalar()
173
+ completed_count = completed_count_result if completed_count_result is not None else 0
174
 
175
+ if total_assigned > 0:
176
+ percent = (completed_count / total_assigned) * 100
177
+ bar_length = 20 # Length of the progress bar
178
+ filled_length = int(bar_length * completed_count // total_assigned)
179
+ bar = '█' * filled_length + '' * (bar_length - filled_length)
180
+ return f"Progress: {bar} {completed_count}/{total_assigned} ({percent:.1f}%)"
181
+ elif total_assigned == 0 and completed_count == 0: # Handles case where user has 0 assigned items initially
182
+ return "Progress: No items assigned yet."
183
+ else: # Should ideally not happen if logic is correct (e.g. completed > total_assigned)
184
+ return f"Annotation Progress: {completed_count}/{total_assigned} labeled"
185
+ except Exception as e:
186
+ log.error(f"Error fetching progress for user {user_id}: {e}")
187
+ return "Annotation Progress: Error" # Added label
188
 
189
+ def download_voice_fn(folder_link, filename_to_load, autoplay_on_load=False): # Autoplay here is for the btn_load_voice click
190
  if not filename_to_load:
191
+ return None, None, gr.update(value=None, autoplay=False)
192
  try:
193
+ log.info(f"Downloading voice: {filename_to_load}, Autoplay: {autoplay_on_load}")
194
  sr, wav = LOADER.load_audio(folder_link, filename_to_load)
195
+ return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=autoplay_on_load)
196
  except Exception as e:
197
+ log.error(f"GDrive download failed for {filename_to_load}: {e}")
198
  gr.Error(f"Failed to load audio: {filename_to_load}. Error: {e}")
199
+ return None, None, gr.update(value=None, autoplay=False)
200
 
201
+ def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, applied_trims_list):
202
  annotator_id = session.get("user_id")
203
  if not current_tts_id or not annotator_id:
204
  gr.Error("Cannot save: Missing TTS ID or User ID.")
205
+ return # Modified: No return value
206
+
207
  with get_db() as db:
208
  try:
209
  annotation_obj = db.query(Annotation).filter_by(
210
  tts_data_id=current_tts_id, annotator_id=annotator_id
211
+ ).options(orm.joinedload(Annotation.audio_trims)).first()
212
+
213
  if not annotation_obj:
214
  annotation_obj = Annotation(
215
  tts_data_id=current_tts_id, annotator_id=annotator_id
216
  )
217
  db.add(annotation_obj)
218
+
219
  annotation_obj.annotated_sentence = ann_text_to_save
 
220
  annotation_obj.annotated_at = datetime.datetime.utcnow()
221
+
222
+ # --- Multi-trim handling ---
223
+ # 1. Delete existing trims for this annotation
224
+ if annotation_obj.audio_trims:
225
+ for old_trim in annotation_obj.audio_trims:
226
+ db.delete(old_trim)
227
+ annotation_obj.audio_trims = [] # Clear the collection
228
+ # db.flush() # Ensure deletes are processed before adds if issues arise
229
+
230
+ # 2. Add new trims from applied_trims_list
231
+ if applied_trims_list:
232
+ if annotation_obj.id is None: # If new annotation, flush to get ID
233
  db.flush()
234
+ if annotation_obj.id is None:
235
+ gr.Error("Failed to get annotation ID for saving new trims.")
236
+ db.rollback(); return # Modified: No return value
237
+
238
+ for trim_info in applied_trims_list:
239
+ start_to_save_ms = trim_info['start_sec'] * 1000.0
240
+ end_to_save_ms = trim_info['end_sec'] * 1000.0
241
+ original_data_id_for_trim = current_tts_id
242
+
243
+ new_trim_db_obj = AudioTrim(
244
  annotation_id=annotation_obj.id,
245
+ original_tts_data_id=original_data_id_for_trim,
246
+ start=start_to_save_ms,
247
+ end=end_to_save_ms,
248
  )
249
+ db.add(new_trim_db_obj)
250
+ # No need to append to annotation_obj.audio_trims if cascade is working correctly
251
+ # but can be done explicitly: annotation_obj.audio_trims.append(new_trim_db_obj)
252
+ log.info(f"Saved {len(applied_trims_list)} trims for annotation {annotation_obj.id} (TTS ID: {current_tts_id}).")
253
+ else:
254
+ log.info(f"No trims applied for {current_tts_id}, any existing DB trims were cleared.")
255
+
256
  db.commit()
257
  gr.Info(f"Annotation for ID {current_tts_id} saved.")
258
+ # Removed 'return True'
259
  except Exception as e:
260
  db.rollback()
261
+ log.error(f"Failed to save annotation for {current_tts_id}: {e}") # Removed exc_info=True
262
  gr.Error(f"Save failed: {e}")
263
+ # Removed 'return False'
264
 
265
  def show_current_item_fn(items, idx, session):
266
+ initial_trims_list_sec = []
267
+ initial_trims_df_data = self._convert_trims_to_df_data([]) # Empty by default
268
+ ui_trim_start_sec = None # Changed from 0.0 to None
269
+ ui_trim_end_sec = None # Changed from 0.0 to None
270
+
271
+ if not items or idx >= len(items) or idx < 0:
272
+ return ("", "", "", "", None, ui_trim_start_sec, ui_trim_end_sec,
273
+ initial_trims_list_sec, initial_trims_df_data,
274
+ gr.update(value=None, autoplay=False))
275
+
276
  current_item = items[idx]
277
  tts_data_id = current_item.get("id")
278
  annotator_id = session.get("user_id")
279
+ ann_text = ""
280
+
281
  if tts_data_id and annotator_id:
282
  with get_db() as db:
283
  try:
284
  existing_annotation = db.query(Annotation).filter_by(
285
  tts_data_id=tts_data_id, annotator_id=annotator_id
286
+ ).options(orm.joinedload(Annotation.audio_trims)).first() # Changed to audio_trims
287
  if existing_annotation:
288
  ann_text = existing_annotation.annotated_sentence or ""
289
+ if existing_annotation.audio_trims: # Check the collection
290
+ initial_trims_list_sec = [
291
+ {
292
+ 'start_sec': trim.start / 1000.0,
293
+ 'end_sec': trim.end / 1000.0
294
+ }
295
+ for trim in existing_annotation.audio_trims # Iterate over the collection
296
+ ]
297
+ initial_trims_df_data = self._convert_trims_to_df_data(initial_trims_list_sec)
298
  except Exception as e:
299
+ log.error(f"DB error in show_current_item_fn for TTS ID {tts_data_id}: {e}") # Removed exc_info=True
300
  gr.Error(f"Error loading annotation details: {e}")
301
+
302
  return (
303
  current_item.get("id", ""), current_item.get("filename", ""),
304
+ current_item.get("sentence", ""), ann_text,
305
+ None,
306
+ ui_trim_start_sec, ui_trim_end_sec,
307
+ initial_trims_list_sec,
308
+ initial_trims_df_data,
309
+ gr.update(value=None, autoplay=False) # Ensure audio does not autoplay on item change
310
  )
311
 
312
  def navigate_idx_fn(items, current_idx, direction):
 
315
  return new_idx
316
 
317
  def load_all_items_fn(sess):
318
+ user_id = sess.get("user_id") # Use user_id for consistency with other functions
319
+ user_name = sess.get("user_name") # Keep for logging if needed
320
+ items_to_load = []
321
+ initial_idx = 0 # Default to 0
322
+
323
+ if not user_id:
324
+ log.warning("load_all_items_fn: user_id not found in session. Dashboard will display default state until login completes and data is refreshed.")
325
+ # Prepare default/empty values for all outputs of show_current_item_fn
326
+ # (tts_id, filename, sentence, ann_text, audio_placeholder,
327
+ # trim_start_sec_ui, trim_end_sec_ui,
328
+ # applied_trims_list_state_val, trims_display_val, audio_update_obj)
329
+ empty_item_display_tuple = ("", "", "", "", None, None, None, [], self._convert_trims_to_df_data([]), gr.update(value=None, autoplay=False))
330
+
331
+ # load_all_items_fn returns: [items_to_load, initial_idx] + list(initial_ui_values_tuple) + [progress_str]
332
+ # Total 13 values.
333
+ return [[], 0] + list(empty_item_display_tuple) + ["Progress: Waiting for login..."]
334
+
335
+ if user_id:
336
+ with get_db() as db:
337
+ try:
338
+ repo = AnnotatorWorkloadRepo(db)
339
+ # Get all assigned items
340
+ raw_items = repo.get_tts_data_with_annotations_for_user_id(user_id)
341
+
342
+ items_to_load = [
343
+ {
344
+ "id": item["tts_data"].id,
345
+ "filename": item["tts_data"].filename,
346
+ "sentence": item["tts_data"].sentence,
347
+ "annotated": item["annotation"] is not None and (item["annotation"].annotated_sentence is not None and item["annotation"].annotated_sentence != "")
348
+ }
349
+ for item in raw_items
350
+ ]
351
+ log.info(f"Loaded {len(items_to_load)} items for user {user_name} (ID: {user_id})")
352
+
353
+ # --- Resume Logic: Find first unannotated or last item ---
354
+ first_unannotated_idx = -1
355
+ for i, item_data in enumerate(items_to_load):
356
+ if not item_data["annotated"]:
357
+ first_unannotated_idx = i
358
+ break
359
+
360
+ if first_unannotated_idx != -1:
361
+ initial_idx = first_unannotated_idx
362
+ log.info(f"Resuming at first unannotated item, index: {initial_idx} (ID: {items_to_load[initial_idx]['id']})")
363
+ elif items_to_load: # All annotated, start at the last one or first if only one
364
+ initial_idx = len(items_to_load) - 1
365
+ log.info(f"All items annotated, starting at last item, index: {initial_idx} (ID: {items_to_load[initial_idx]['id']})")
366
+ else: # No items assigned
367
+ initial_idx = 0
368
+ log.info("No items assigned to user.")
369
+
370
+ except Exception as e:
371
+ log.error(f"Failed to load items or determine resume index for user {user_name}: {e}") # Removed exc_info=True
372
+ gr.Error(f"Could not load your assigned data: {e}")
373
+
374
+ initial_ui_values_tuple = show_current_item_fn(items_to_load, initial_idx, sess)
375
+ progress_str = get_user_progress_fn(sess)
376
+ return [items_to_load, initial_idx] + list(initial_ui_values_tuple) + [progress_str]
377
 
378
  def jump_by_data_id_fn(items, target_data_id_str, current_idx):
379
  if not target_data_id_str: return current_idx
 
381
  target_id = int(target_data_id_str)
382
  for i, item_dict in enumerate(items):
383
  if item_dict.get("id") == target_id: return i
384
+ gr.Warning(f"Data ID {target_id} not found in your assigned items.")
385
  except ValueError:
386
  gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
387
  return current_idx
388
 
389
+ def delete_db_and_ui_fn(items, current_idx, session, original_audio_data_state):
390
+ # ... (ensure Annotation.audio_trims is used if deleting associated trims) ...
391
+ # This function already deletes annotation_obj.audio_trim, which will now be annotation_obj.audio_trims
392
+ # The cascade delete on the relationship should handle deleting all AudioTrim children.
393
+ # However, explicit deletion loop might be safer if cascade behavior is not fully trusted or for clarity.
394
+ # For now, relying on cascade from previous model update.
395
+ # If issues, add explicit loop:
396
+ # if annotation_obj.audio_trims:
397
+ # for trim_to_del in annotation_obj.audio_trims:
398
+ # db.delete(trim_to_del)
399
+ # annotation_obj.audio_trims = []
400
+ # ... rest of the function ...
401
+ new_ann_sentence = ""
402
+ new_trim_start_sec_ui = None # Changed from 0.0
403
+ new_trim_end_sec_ui = None # Changed from 0.0
404
+ new_applied_trims_list = []
405
+ new_trims_df_data = self._convert_trims_to_df_data([])
406
+
407
+ audio_to_display_after_delete = None
408
+ audio_update_obj_after_delete = gr.update(value=None, autoplay=False)
409
+
410
+ if original_audio_data_state:
411
+ audio_to_display_after_delete = original_audio_data_state
412
+ audio_update_obj_after_delete = gr.update(value=original_audio_data_state, autoplay=False)
413
+
414
+ if not items or current_idx >= len(items) or current_idx < 0:
415
+ progress_str_err = get_user_progress_fn(session)
416
+ return (items, current_idx, "", "", "", new_ann_sentence, audio_to_display_after_delete,
417
+ new_trim_start_sec_ui, new_trim_end_sec_ui, new_applied_trims_list, new_trims_df_data,
418
+ audio_update_obj_after_delete, progress_str_err)
419
+
420
+ current_item = items[current_idx]
421
+ tts_id_val = current_item.get("id", "")
422
+ filename_val = current_item.get("filename", "")
423
+ sentence_val = current_item.get("sentence", "")
424
+
425
+ tts_data_id_to_clear = tts_id_val
426
+ annotator_id_for_clear = session.get("user_id")
427
+
428
+ if tts_data_id_to_clear and annotator_id_for_clear:
429
  with get_db() as db:
430
  try:
431
  annotation_obj = db.query(Annotation).filter_by(
432
+ tts_data_id=tts_data_id_to_clear, annotator_id=annotator_id_for_clear
433
+ ).options(orm.joinedload(Annotation.audio_trims)).first() # Ensure audio_trims are loaded
434
  if annotation_obj:
435
+ # Cascade delete should handle deleting AudioTrim objects associated with this annotation
436
+ # If not, uncomment and adapt the loop below:
437
+ # if annotation_obj.audio_trims:
438
+ # log.info(f"Deleting {len(annotation_obj.audio_trims)} trims for annotation ID {annotation_obj.id}")
439
+ # for trim_to_delete in list(annotation_obj.audio_trims): # Iterate over a copy
440
+ # db.delete(trim_to_delete)
441
+ # annotation_obj.audio_trims = [] # Clear the collection
442
+ db.delete(annotation_obj)
443
  db.commit()
444
+ gr.Info(f"Annotation and associated trims for ID {tts_data_id_to_clear} deleted from DB.")
445
  else:
446
+ gr.Warning(f"No DB annotation found to delete for ID {tts_data_id_to_clear}.")
447
  except Exception as e:
448
  db.rollback()
449
+ log.error(f"Error deleting annotation from DB for {tts_data_id_to_clear}: {e}") # Removed exc_info=True
450
+ gr.Error(f"Failed to delete annotation from database: {e}")
451
  else:
452
+ gr.Error("Cannot clear/delete annotation from DB: Missing TTS ID or User ID.")
 
 
453
 
454
+ progress_str = get_user_progress_fn(session)
455
+
456
+ return (items, current_idx, tts_id_val, filename_val, sentence_val,
457
+ new_ann_sentence, audio_to_display_after_delete, new_trim_start_sec_ui, new_trim_end_sec_ui,
458
+ new_applied_trims_list, new_trims_df_data, audio_update_obj_after_delete, progress_str)
459
+
460
+ # ---- New Trim Callbacks ----
461
+ def add_trim_and_reprocess_ui_fn(start_s, end_s, current_trims_list, original_audio_data):
462
+ if start_s is None or end_s is None or not (end_s > start_s and start_s >= 0):
463
+ gr.Warning("Invalid trim times. Start must be >= 0 and End > Start.")
464
+ # Return current states without change if trim is invalid, also return original start/end for UI
465
+ return (current_trims_list, self._convert_trims_to_df_data(current_trims_list),
466
+ original_audio_data, gr.update(value=original_audio_data, autoplay=False),
467
+ start_s, end_s)
468
+
469
+ new_trim = {'start_sec': float(start_s), 'end_sec': float(end_s)}
470
+ updated_trims_list = current_trims_list + [new_trim]
471
+
472
+ processed_audio_data, audio_update = self._apply_multiple_trims_fn(original_audio_data, updated_trims_list)
473
+
474
+ # Reset input fields after adding trim
475
+ ui_trim_start_sec_reset = None # Changed from 0.0
476
+ ui_trim_end_sec_reset = None # Changed from 0.0
477
+
478
+ return (updated_trims_list, self._convert_trims_to_df_data(updated_trims_list),
479
+ processed_audio_data, audio_update,
480
+ ui_trim_start_sec_reset, ui_trim_end_sec_reset)
481
+
482
+ def undo_last_trim_and_reprocess_ui_fn(current_trims_list, original_audio_data):
483
+ if not current_trims_list:
484
+ gr.Info("No trims to undo.")
485
+ return (current_trims_list, self._convert_trims_to_df_data(current_trims_list),
486
+ original_audio_data, gr.update(value=original_audio_data, autoplay=False))
487
+
488
+ updated_trims_list = current_trims_list[:-1]
489
+ processed_audio_data, audio_update = self._apply_multiple_trims_fn(original_audio_data, updated_trims_list)
490
+
491
+ return (updated_trims_list, self._convert_trims_to_df_data(updated_trims_list),
492
+ processed_audio_data, audio_update)
493
+
494
+ # ---- Callback Wiring ----
495
+ # outputs_for_display_item: Defines what `show_current_item_fn` and similar full display updates will populate.
496
+ # It expects 10 values from show_current_item_fn:
497
+ # (tts_id, filename, sentence, ann_text, audio_placeholder,
498
+ # trim_start_sec_ui, trim_end_sec_ui,
499
+ # applied_trims_list_state_val, trims_display_val, audio_update_obj)
500
+ outputs_for_display_item = [
501
  self.tts_id, self.filename, self.sentence, self.ann_sentence,
502
+ self.audio, # This will receive the audio data (sr, wav) or None
503
+ self.trim_start_sec, self.trim_end_sec, # UI fields for new trim
504
+ self.applied_trims_list_state,
505
+ self.trims_display,
506
+ self.audio # This will receive the gr.update object for autoplay etc.
507
  ]
508
+
509
  # Initial Load
510
+ # Chain: Disable UI -> Load Data (items, idx, initial UI values including trims list & df, progress) ->
511
+ # Update UI -> Enable UI
512
+ # Audio is NOT loaded here anymore.
513
+
514
  root_blocks.load(
515
+ fn=lambda: update_ui_interactive_state(False),
516
  outputs=self.interactive_ui_elements
517
  ).then(
518
+ fn=load_all_items_fn,
519
  inputs=[session_state],
520
+ # Outputs: items_state, idx_state, tts_id, filename, sentence, ann_sentence,
521
+ # audio (None), trim_start_sec, trim_end_sec, applied_trims_list_state,
522
+ # trims_display, audio (update obj), progress_display
523
+ outputs=[self.items_state, self.idx_state] + outputs_for_display_item + [self.header.progress_display],
524
  ).then(
525
+ # Explicitly set original_audio_state to None and clear audio display as it's not loaded.
526
+ # show_current_item_fn already sets self.audio to (None, gr.update(value=None, autoplay=False))
527
+ # We also need to ensure original_audio_state is None if no audio is loaded.
528
+ lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
529
+ outputs=[self.original_audio_state, self.audio, self.audio]
530
  ).then(
531
+ fn=lambda: update_ui_interactive_state(True),
 
 
 
 
532
  outputs=self.interactive_ui_elements
533
  )
534
 
535
+ # Navigation (Prev/Save & Next/Next No Save)
536
+ # Audio is NOT loaded here anymore.
537
+ for btn_widget, direction_str, performs_save in [
538
+ (self.btn_prev, "prev", False),
539
+ (self.btn_save_next, "next", True),
540
+ (self.btn_next_no_save, "next", False)
541
  ]:
542
  event_chain = btn_widget.click(
543
  fn=lambda: update_ui_interactive_state(False),
544
  outputs=self.interactive_ui_elements
545
  )
546
+ if performs_save:
547
  event_chain = event_chain.then(
548
  fn=save_annotation_db_fn,
549
  inputs=[
550
  self.tts_id, session_state, self.ann_sentence,
551
+ self.applied_trims_list_state,
552
  ],
553
+ outputs=None
554
+ ).then(
555
+ fn=get_user_progress_fn,
556
+ inputs=[session_state],
557
+ outputs=self.header.progress_display
558
  )
559
+
560
+ event_chain = event_chain.then(
561
+ fn=navigate_idx_fn,
562
  inputs=[self.items_state, self.idx_state, gr.State(direction_str)],
563
  outputs=self.idx_state,
564
  ).then(
565
  fn=show_current_item_fn,
566
  inputs=[self.items_state, self.idx_state, session_state],
567
+ outputs=outputs_for_display_item,
568
  ).then(
569
+ # Explicitly set original_audio_state to None and clear audio display as it's not loaded.
570
+ lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
571
+ outputs=[self.original_audio_state, self.audio, self.audio]
572
  ).then(
573
+ lambda: gr.update(value=None), # Clear jump input
574
+ outputs=self.jump_data_id_input
 
575
  ).then(
576
  fn=lambda: update_ui_interactive_state(True),
577
  outputs=self.interactive_ui_elements
578
  )
579
+
580
+ # Audio is NOT loaded here anymore.
581
+ self.btn_jump.click(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
  fn=lambda: update_ui_interactive_state(False),
583
  outputs=self.interactive_ui_elements
584
  ).then(
585
+ fn=jump_by_data_id_fn,
586
  inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
587
+ outputs=self.idx_state
588
  ).then(
589
  fn=show_current_item_fn,
590
  inputs=[self.items_state, self.idx_state, session_state],
591
+ outputs=outputs_for_display_item
592
  ).then(
593
+ # Explicitly set original_audio_state to None and clear audio display as it's not loaded.
594
+ lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
595
+ outputs=[self.original_audio_state, self.audio, self.audio]
596
  ).then(
597
+ lambda: gr.update(value=None), # Clear jump input
598
+ outputs=self.jump_data_id_input
 
599
  ).then(
600
  fn=lambda: update_ui_interactive_state(True),
601
  outputs=self.interactive_ui_elements
602
  )
603
 
604
+ # Load Audio Button - This is now the ONLY place audio is downloaded and processed.
605
+ self.btn_load_voice.click(
606
  fn=lambda: update_ui_interactive_state(False),
607
  outputs=self.interactive_ui_elements
608
  ).then(
609
+ fn=download_voice_fn,
610
+ inputs=[gr.State(GDRIVE_FOLDER), self.filename, gr.State(True)], # Autoplay TRUE
611
+ outputs=[self.audio, self.original_audio_state, self.audio],
612
+ ).then(
613
+ fn=self._apply_multiple_trims_fn,
614
+ inputs=[self.original_audio_state, self.applied_trims_list_state],
615
+ outputs=[self.audio, self.audio]
616
  ).then(
617
  fn=lambda: update_ui_interactive_state(True),
618
  outputs=self.interactive_ui_elements
619
  )
620
+
621
+ # Copy Sentence Button
622
+ self.btn_copy_sentence.click(
623
+ fn=lambda s: s, inputs=self.sentence, outputs=self.ann_sentence
624
+ )
625
 
626
+ # Trim Button
627
+ self.btn_trim.click(
628
+ fn=add_trim_and_reprocess_ui_fn,
629
+ inputs=[self.trim_start_sec, self.trim_end_sec, self.applied_trims_list_state, self.original_audio_state],
630
+ outputs=[self.applied_trims_list_state, self.trims_display,
631
+ self.audio, self.audio,
632
+ self.trim_start_sec, self.trim_end_sec]
633
+ )
634
+
635
+ # Undo Trim Button
636
  self.btn_undo_trim.click(
637
+ fn=undo_last_trim_and_reprocess_ui_fn,
638
+ inputs=[self.applied_trims_list_state, self.original_audio_state],
639
+ outputs=[self.applied_trims_list_state, self.trims_display, self.audio, self.audio]
 
 
 
 
 
 
640
  )
641
 
642
+ # Delete Button
643
+ outputs_for_delete = [
644
+ self.items_state, self.idx_state, self.tts_id, self.filename, self.sentence,
645
+ self.ann_sentence, self.audio, self.trim_start_sec, self.trim_end_sec,
646
+ self.applied_trims_list_state, self.trims_display, self.audio, self.header.progress_display
647
+ ]
648
  self.btn_delete.click(
649
  fn=lambda: update_ui_interactive_state(False),
650
  outputs=self.interactive_ui_elements
651
  ).then(
652
  fn=delete_db_and_ui_fn,
653
+ inputs=[self.items_state, self.idx_state, session_state, self.original_audio_state],
654
+ outputs=outputs_for_delete
655
+ ).then(
656
  fn=lambda: update_ui_interactive_state(True),
657
  outputs=self.interactive_ui_elements
658
  )
659
 
660
  return self.container
661
+
662
+ def _apply_multiple_trims_fn(self, original_audio_data, trims_list_sec):
663
+ if not original_audio_data:
664
+ log.warning("apply_multiple_trims_fn: No original audio data.")
665
+ return None, gr.update(value=None, autoplay=False)
666
+
667
+ sr, wav_orig = original_audio_data
668
+
669
+ if not trims_list_sec: # No trims to apply
670
+ log.info("apply_multiple_trims_fn: No trims in list, returning original audio.")
671
+ return (sr, wav_orig.copy()), gr.update(value=(sr, wav_orig.copy()), autoplay=False)
672
+
673
+ delete_intervals_samples = []
674
+ for trim_info in trims_list_sec:
675
+ start_s = trim_info.get('start_sec')
676
+ end_s = trim_info.get('end_sec')
677
+ if start_s is not None and end_s is not None and end_s > start_s and start_s >= 0:
678
+ start_sample = int(sr * start_s)
679
+ end_sample = int(sr * end_s)
680
+ start_sample = max(0, min(start_sample, len(wav_orig)))
681
+ end_sample = max(start_sample, min(end_sample, len(wav_orig)))
682
+ if start_sample < end_sample:
683
+ delete_intervals_samples.append((start_sample, end_sample))
684
+ else:
685
+ log.warning(f"apply_multiple_trims_fn: Invalid trim skipped: {trim_info}")
686
+
687
+ if not delete_intervals_samples:
688
+ log.info("apply_multiple_trims_fn: No valid trims to apply, returning original audio.")
689
+ return (sr, wav_orig.copy()), gr.update(value=(sr, wav_orig.copy()), autoplay=False)
690
+
691
+ delete_intervals_samples.sort(key=lambda x: x[0])
692
+
693
+ merged_delete_intervals = []
694
+ if delete_intervals_samples:
695
+ current_start, current_end = delete_intervals_samples[0]
696
+ for next_start, next_end in delete_intervals_samples[1:]:
697
+ if next_start < current_end:
698
+ current_end = max(current_end, next_end)
699
+ else:
700
+ merged_delete_intervals.append((current_start, current_end))
701
+ current_start, current_end = next_start, next_end
702
+ merged_delete_intervals.append((current_start, current_end))
703
+
704
+ log.info(f"apply_multiple_trims_fn: Original wav shape: {wav_orig.shape}, Merged delete intervals (samples): {merged_delete_intervals}")
705
+
706
+ kept_parts_wav = []
707
+ current_pos_samples = 0
708
+ for del_start, del_end in merged_delete_intervals:
709
+ if del_start > current_pos_samples:
710
+ kept_parts_wav.append(wav_orig[current_pos_samples:del_start])
711
+ current_pos_samples = del_end
712
+
713
+ if current_pos_samples < len(wav_orig):
714
+ kept_parts_wav.append(wav_orig[current_pos_samples:])
715
+
716
+ if not kept_parts_wav:
717
+ final_wav = np.array([], dtype=wav_orig.dtype)
718
+ log.info("apply_multiple_trims_fn: All audio trimmed, resulting in empty audio.")
719
+ else:
720
+ final_wav = np.concatenate(kept_parts_wav)
721
+ log.info(f"apply_multiple_trims_fn: Final wav shape after trimming: {final_wav.shape}")
722
+
723
+ return (sr, final_wav), gr.update(value=(sr, final_wav), autoplay=False)
724
+
725
+ def _convert_trims_to_df_data(self, trims_list_sec):
726
+ if not trims_list_sec:
727
+ return None # For gr.DataFrame, None clears it
728
+ return [[f"{t['start_sec']:.3f}", f"{t['end_sec']:.3f}"] for t in trims_list_sec]
components/header.py CHANGED
@@ -8,17 +8,25 @@ class Header:
8
  def __init__(self):
9
  with gr.Row(variant="panel", elem_classes="header-row") as self.container:
10
  self.welcome = gr.Markdown()
 
11
  self.logout_btn = gr.Button("Log out", scale=0, min_width=90)
12
 
13
  # ---------------- wiring ----------------
14
  def register_callbacks(self, login_page, dashboard_page, session_state):
 
 
 
 
 
 
15
  self.logout_btn.click(
16
- fn=AuthService.logout,
17
- inputs=[session_state], # ← حتماً داخل لیست
18
  outputs=[
19
- login_page.container, # 1
20
- dashboard_page.container, # 2
21
- self.welcome, # 3
22
- login_page.message, # 4
 
23
  ],
24
  )
 
8
  def __init__(self):
9
  with gr.Row(variant="panel", elem_classes="header-row") as self.container:
10
  self.welcome = gr.Markdown()
11
+ self.progress_display = gr.Markdown("") # New element for progress display
12
  self.logout_btn = gr.Button("Log out", scale=0, min_width=90)
13
 
14
  # ---------------- wiring ----------------
15
  def register_callbacks(self, login_page, dashboard_page, session_state):
16
+ def logout_and_clear_progress_fn(current_session_state):
17
+ # AuthService.logout is expected to return 4 values for the original outputs
18
+ logout_outputs = AuthService.logout(current_session_state)
19
+ # Add an empty string to clear the progress_display
20
+ return list(logout_outputs) + [""]
21
+
22
  self.logout_btn.click(
23
+ fn=logout_and_clear_progress_fn,
24
+ inputs=[session_state],
25
  outputs=[
26
+ login_page.container,
27
+ dashboard_page.container,
28
+ self.welcome,
29
+ login_page.message,
30
+ self.progress_display, # Cleared on logout
31
  ],
32
  )
components/login_page.py CHANGED
@@ -48,8 +48,6 @@ class LoginPage:
48
  dashboard_page.filename,
49
  dashboard_page.sentence,
50
  dashboard_page.ann_sentence,
51
- # dashboard_page.ann_at,
52
- dashboard_page.validated,
53
  ],
54
  )
55
  .then(
 
48
  dashboard_page.filename,
49
  dashboard_page.sentence,
50
  dashboard_page.ann_sentence,
 
 
51
  ],
52
  )
53
  .then(
data/models.py CHANGED
@@ -119,12 +119,12 @@ class Annotation(Base):
119
  tts_data = relationship("TTSData", back_populates="annotations")
120
  annotator = relationship("Annotator", back_populates="annotations")
121
 
122
- # Relationship to AudioTrim (one-to-one)
123
- audio_trim = relationship(
124
  "AudioTrim",
125
  back_populates="annotation",
126
- uselist=False, # Important for one-to-one
127
- cascade="all, delete-orphan" # If annotation is deleted, delete its trim too
128
  )
129
 
130
 
@@ -135,13 +135,13 @@ class AudioTrim(Base):
135
  __tablename__ = "audio_trims"
136
 
137
  id = Column(Integer, primary_key=True)
138
- annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=False, unique=True) # Enforce one-to-one
139
  original_tts_data_id = Column(Integer, ForeignKey("tts_data.id"), nullable=False) # Link to original audio
140
  start = Column(Float, nullable=False)
141
  end = Column(Float, nullable=False)
142
 
143
  # Relationship back to Annotation
144
- annotation = relationship("Annotation", back_populates="audio_trim")
145
  original_tts_data = relationship("TTSData") # Optional: if you want to navigate from trim to original TTSData directly
146
 
147
  # --------------------------------------------------------------------------- #
 
119
  tts_data = relationship("TTSData", back_populates="annotations")
120
  annotator = relationship("Annotator", back_populates="annotations")
121
 
122
+ # Relationship to AudioTrim (one-to-MANY)
123
+ audio_trims = relationship( # Renamed from audio_trim
124
  "AudioTrim",
125
  back_populates="annotation",
126
+ uselist=True, # Important for one-to-many
127
+ cascade="all, delete-orphan" # If annotation is deleted, delete its trims too
128
  )
129
 
130
 
 
135
  __tablename__ = "audio_trims"
136
 
137
  id = Column(Integer, primary_key=True)
138
+ annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=False) # Removed unique=True
139
  original_tts_data_id = Column(Integer, ForeignKey("tts_data.id"), nullable=False) # Link to original audio
140
  start = Column(Float, nullable=False)
141
  end = Column(Float, nullable=False)
142
 
143
  # Relationship back to Annotation
144
+ annotation = relationship("Annotation", back_populates="audio_trims") # Renamed from audio_trim
145
  original_tts_data = relationship("TTSData") # Optional: if you want to navigate from trim to original TTSData directly
146
 
147
  # --------------------------------------------------------------------------- #
data/repository/annotator_workload_repo.py CHANGED
@@ -17,6 +17,21 @@ class AnnotatorWorkloadRepo:
17
 
18
  def get_tts_data_with_annotations(
19
  self, annotator_name: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  ) -> List[Dict[str, Optional[Any]]]:
21
  """
22
  output: [
@@ -24,11 +39,6 @@ class AnnotatorWorkloadRepo:
24
  ...
25
  ]
26
  """
27
-
28
- annotator = self.annotator_repo.get_annotator_by_name(annotator_name)
29
- if annotator is None:
30
- raise ValueError(f"Annotator '{annotator_name}' not found")
31
-
32
  query = (
33
  self.db.query(
34
  TTSData,
@@ -37,7 +47,7 @@ class AnnotatorWorkloadRepo:
37
  .join(
38
  AnnotationInterval,
39
  and_(
40
- AnnotationInterval.annotator_id == annotator.id,
41
  TTSData.id >= AnnotationInterval.start_index,
42
  TTSData.id <= AnnotationInterval.end_index,
43
  ),
@@ -46,13 +56,15 @@ class AnnotatorWorkloadRepo:
46
  Annotation,
47
  and_(
48
  Annotation.tts_data_id == TTSData.id,
49
- Annotation.annotator_id == annotator.id,
50
  ),
51
  )
52
  .order_by(TTSData.id)
53
- ).distinct(TTSData.id)
54
 
55
  rows = [{"tts_data": tts, "annotation": ann} for tts, ann in query.all()]
56
 
57
- log.info(f"{len(rows)} TTS rows fetched for annotator '{annotator_name}'.")
 
 
58
  return rows
 
17
 
18
  def get_tts_data_with_annotations(
19
  self, annotator_name: str
20
+ ) -> List[Dict[str, Optional[Any]]]:
21
+ # This method is kept for compatibility if used elsewhere, but
22
+ # get_tts_data_with_annotations_for_user_id is preferred for new logic.
23
+ annotator = self.annotator_repo.get_annotator_by_name(annotator_name)
24
+ if annotator is None:
25
+ log.warning(
26
+ f"Annotator '{annotator_name}' not found in get_tts_data_with_annotations. Returning empty list."
27
+ )
28
+ return []
29
+ return self.get_tts_data_with_annotations_for_user_id(
30
+ annotator.id, annotator_name
31
+ )
32
+
33
+ def get_tts_data_with_annotations_for_user_id(
34
+ self, annotator_id: int, annotator_name_for_log: str = "Unknown"
35
  ) -> List[Dict[str, Optional[Any]]]:
36
  """
37
  output: [
 
39
  ...
40
  ]
41
  """
 
 
 
 
 
42
  query = (
43
  self.db.query(
44
  TTSData,
 
47
  .join(
48
  AnnotationInterval,
49
  and_(
50
+ AnnotationInterval.annotator_id == annotator_id,
51
  TTSData.id >= AnnotationInterval.start_index,
52
  TTSData.id <= AnnotationInterval.end_index,
53
  ),
 
56
  Annotation,
57
  and_(
58
  Annotation.tts_data_id == TTSData.id,
59
+ Annotation.annotator_id == annotator_id,
60
  ),
61
  )
62
  .order_by(TTSData.id)
63
+ ).distinct(TTSData.id) # Ensure distinct TTSData items
64
 
65
  rows = [{"tts_data": tts, "annotation": ann} for tts, ann in query.all()]
66
 
67
+ log.info(
68
+ f"{len(rows)} TTS rows fetched for annotator ID '{annotator_id}' (Name: {annotator_name_for_log})."
69
+ )
70
  return rows
scripts/apply_custom_intervals.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \
2
+ # scripts/apply_custom_intervals.py
3
+ import os
4
+ import sys
5
+
6
+ # Add project root to Python path
7
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
8
+ PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
9
+ if PROJECT_ROOT not in sys.path:
10
+ sys.path.insert(0, PROJECT_ROOT)
11
+
12
+ from utils.database import get_db
13
+ from data.models import AnnotationInterval # For direct query and deletion
14
+ from data.repository.annotator_repo import AnnotatorRepo
15
+ from data.repository.annotation_interval_repo import AnnotationIntervalRepo
16
+ from utils.logger import Logger
17
+
18
+ log = Logger()
19
+
20
+ # User-provided data splits
21
+ # Format: 'annotator_name': (start_id_from_json, end_id_from_json)
22
+ ANNOTATOR_SPLITS = {
23
+ 'shahab': (0, 1982),
24
+ 'amir': (1983, 3965),
25
+ 'mohsen': (3966, 5948),
26
+ 'mahya': (5949, 7931),
27
+ 'najmeh': (7932, 9914),
28
+ 'sepehr': (9915, 11897),
29
+ 'zahra': (11898, 13880),
30
+ 'moghim': (13881, 15862),
31
+ 'amin': (15863, 17845)
32
+ }
33
+
34
+ def apply_custom_intervals():
35
+ log.info("Starting application of custom annotator intervals...")
36
+ try:
37
+ with get_db() as db:
38
+ annot_repo = AnnotatorRepo(db)
39
+ interval_repo = AnnotationIntervalRepo(db)
40
+
41
+ for annotator_name, (start_idx_orig, end_idx_orig) in ANNOTATOR_SPLITS.items():
42
+ log.info(f"Processing annotator: '{annotator_name}' with original range ({start_idx_orig}, {end_idx_orig})")
43
+
44
+ annotator = annot_repo.get_annotator_by_name(annotator_name)
45
+
46
+ # Adjust start_idx if it's 0, assuming 1-based indexing for TTSData.id in the database.
47
+ # If TTSData.id can legitimately be 0, this adjustment should be removed.
48
+ start_idx = 1 if start_idx_orig == 0 else start_idx_orig
49
+ end_idx = end_idx_orig
50
+
51
+ if start_idx_orig == 0:
52
+ log.info(f"Adjusted start_index from 0 to 1 for '{annotator_name}' assuming 1-based TTSData IDs.")
53
+
54
+
55
+ if start_idx > end_idx:
56
+ log.warning(f"Invalid range for '{annotator_name}': effective start_idx ({start_idx}) > end_idx ({end_idx}). Skipping.")
57
+ continue
58
+
59
+ # --- Add this part: Clear existing intervals ---
60
+ existing_intervals = db.query(AnnotationInterval).filter_by(annotator_id=annotator.id).all()
61
+ if existing_intervals:
62
+ log.info(f"Deleting {len(existing_intervals)} existing intervals for annotator '{annotator.name}'.")
63
+ for interval in existing_intervals:
64
+ db.delete(interval)
65
+ db.flush() # Process deletes before adding new ones
66
+ # --- End of new part ---
67
+
68
+ # Assign new interval
69
+ try:
70
+ new_interval = interval_repo.assign_interval_to_annotator(
71
+ annotator_id=annotator.id,
72
+ start_idx=start_idx,
73
+ end_idx=end_idx,
74
+ allow_overlap=False # This will prevent assignment if it overlaps with others (unless intended)
75
+ )
76
+ log.info(f"Successfully assigned interval [{new_interval.start_index}, {new_interval.end_index}] to '{annotator_name}'.")
77
+ except ValueError as e:
78
+ log.error(f"Could not assign interval to '{annotator_name}': {e}")
79
+ except Exception as e:
80
+ log.error(f"An unexpected error occurred while assigning interval to '{annotator_name}': {e}", exc_info=True)
81
+
82
+ # db.commit() is handled by the get_db context manager if no exceptions caused a rollback within it.
83
+ log.info("Custom interval application process completed.")
84
+
85
+ except Exception as e:
86
+ log.error(f"A critical error occurred during the custom interval application: {e}", exc_info=True)
87
+ # db.rollback() is handled by get_db context manager on exception
88
+
89
+ if __name__ == "__main__":
90
+ apply_custom_intervals()
scripts/distribute_workload.py DELETED
@@ -1,170 +0,0 @@
1
- import sys
2
- import os
3
-
4
- # Add project root to Python path
5
- project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
6
- if project_root not in sys.path:
7
- sys.path.insert(0, project_root)
8
-
9
- import math
10
- import random # Added for password generation
11
- from sqlalchemy.sql import func
12
-
13
- from utils.database import get_db
14
- from data.models import TTSData
15
- from data.repository.annotator_repo import AnnotatorRepo
16
- from data.repository.annotation_interval_repo import AnnotationIntervalRepo
17
- from utils.logger import Logger
18
-
19
- log = Logger()
20
-
21
- # --- Configuration ---
22
- # List of annotator names to ensure exist and assign work to
23
- ANNOTATOR_NAMES = ["shahab", "amir", "mohsen", "mahya", "najmeh", "sepehr", "zahra", "moghim", "amin"]
24
- # DEFAULT_PASSWORD is no longer used for new users, random password will be generated.
25
-
26
- def generate_random_password():
27
- """Generates a random 4-digit numerical password."""
28
- return str(random.randint(1000, 9999))
29
-
30
- def distribute_workload():
31
- log.info("Starting workload distribution script...")
32
- processed_annotators_details = [] # Stores dicts: {'annotator_obj': obj, 'password_display': str, 'assigned_start': int, 'assigned_end': int}
33
-
34
- try:
35
- with get_db() as db:
36
- annot_repo = AnnotatorRepo(db)
37
- interval_repo = AnnotationIntervalRepo(db)
38
-
39
- # 1. Ensure all annotators exist, create if not, and collect details
40
- log.info("Processing annotators...")
41
- for name in ANNOTATOR_NAMES:
42
- annotator = annot_repo.get_annotator_by_name(name)
43
- password_to_display = "(existing user)"
44
-
45
- if not annotator:
46
- try:
47
- new_password = generate_random_password()
48
- log.info(f"Annotator '{name}' not found, creating with new password...")
49
- annotator = annot_repo.add_new_annotator(name, new_password)
50
- log.info(f"Annotator '{name}' (id={annotator.id}) created successfully with password '{new_password}'.")
51
- password_to_display = new_password
52
- except ValueError as e:
53
- log.warning(f"Could not create annotator '{name}' (likely already exists or other DB issue): {e}. Attempting to fetch again.")
54
- annotator = annot_repo.get_annotator_by_name(name) # Try fetching again
55
- if annotator:
56
- log.info(f"Found existing annotator '{name}' (id={annotator.id}) after creation attempt.")
57
- else:
58
- log.error(f"Failed to create or find annotator '{name}'. Skipping.")
59
- continue
60
- else:
61
- log.info(f"Found existing annotator '{name}' (id={annotator.id}).")
62
-
63
- if annotator:
64
- processed_annotators_details.append({
65
- 'annotator_obj': annotator,
66
- 'password_display': password_to_display,
67
- 'assigned_start': None,
68
- 'assigned_end': None
69
- })
70
-
71
- if not processed_annotators_details:
72
- log.error("No annotators processed or found. Exiting.")
73
- return
74
-
75
- # 2. Get total number of TTSData items
76
- total_tts_items = db.query(func.count(TTSData.id)).scalar()
77
- if total_tts_items is None or total_tts_items == 0:
78
- log.info("No TTSData items found in the database. Nothing to assign.")
79
- # Still print annotator info even if no items to assign
80
- log.info("\\n--- Workload Distribution Summary ---")
81
- for details in processed_annotators_details:
82
- log.info(f"Annotator: {details['annotator_obj'].name}, Assigned Range: N/A (No data items), Password: {details['password_display']}")
83
- return
84
- log.info(f"Total TTSData items found: {total_tts_items}")
85
-
86
- # 3. Calculate distribution
87
- num_annotators_for_assignment = len(processed_annotators_details)
88
- if num_annotators_for_assignment == 0: # Should be caught by earlier check, but as a safeguard
89
- log.error("No annotators available for assignment. Exiting.")
90
- return
91
-
92
- items_per_annotator_base = total_tts_items // num_annotators_for_assignment
93
- remainder_items = total_tts_items % num_annotators_for_assignment
94
-
95
- log.info(f"Distributing {total_tts_items} items among {num_annotators_for_assignment} annotators.")
96
- log.info(f"Base items per annotator: {items_per_annotator_base}, Remainder: {remainder_items}")
97
-
98
- # 4. Assign intervals
99
- current_start_idx = 1 # Assuming TTSData IDs start from 1
100
- for details_dict in processed_annotators_details:
101
- annotator = details_dict['annotator_obj']
102
- num_items_for_this_annotator = items_per_annotator_base
103
- if remainder_items > 0:
104
- num_items_for_this_annotator += 1
105
- remainder_items -= 1
106
-
107
- if num_items_for_this_annotator == 0:
108
- log.info(f"Annotator '{annotator.name}' assigned 0 items (total items might be less than annotators or workload already distributed).")
109
- continue
110
-
111
- current_end_idx = current_start_idx + num_items_for_this_annotator - 1
112
-
113
- if current_end_idx > total_tts_items:
114
- current_end_idx = total_tts_items
115
-
116
- if current_start_idx > current_end_idx:
117
- log.info(f"No items to assign to '{annotator.name}' (start_idx {current_start_idx} > end_idx {current_end_idx}).")
118
- continue
119
-
120
- log.info(f"Attempting to assign interval [{current_start_idx}-{current_end_idx}] to '{annotator.name}' (id={annotator.id})")
121
- try:
122
- existing_intervals = interval_repo.get_intervals_by_annotator(annotator.id)
123
- if existing_intervals:
124
- log.warning(f"Annotator '{annotator.name}' already has existing intervals. Skipping assignment to avoid conflicts. Manual review/cleanup of old intervals might be needed.")
125
- # current_start_idx = current_end_idx + 1 # This line should not be here if we skip the user for this round of assignment.
126
- # The items for this user won't be assigned and won't be passed to the next.
127
- # This means the total items might not be fully distributed if users are skipped.
128
- # For a full distribution even with skips, a more complex item re-allocation would be needed.
129
- # For now, skipped users mean their share is not re-distributed.
130
- continue # Skip this annotator for assignment
131
-
132
- assigned_interval = interval_repo.assign_interval_to_annotator(
133
- annotator_id=annotator.id,
134
- start_idx=current_start_idx,
135
- end_idx=current_end_idx,
136
- allow_overlap=False
137
- )
138
- details_dict['assigned_start'] = assigned_interval.start_index
139
- details_dict['assigned_end'] = assigned_interval.end_index
140
- log.info(
141
- f"Successfully assigned interval [{details_dict['assigned_start']}-{details_dict['assigned_end']}] "
142
- f"to '{annotator.name}' (id={annotator.id})"
143
- )
144
- except ValueError as e:
145
- log.error(f"Could not assign interval [{current_start_idx}-{current_end_idx}] to '{annotator.name}': {e}")
146
- except Exception as e:
147
- log.error(f"An unexpected error occurred while assigning interval to '{annotator.name}': {e}")
148
-
149
- # Only advance current_start_idx if items were potentially assignable to *this* annotator
150
- # If an annotator was skipped due to existing intervals, their share of items is not processed further in this loop.
151
- current_start_idx = current_end_idx + 1
152
- if current_start_idx > total_tts_items:
153
- break
154
-
155
- # 5. Print summary
156
- log.info("\\n--- Workload Distribution Summary ---")
157
- for details in processed_annotators_details:
158
- range_str = "N/A (assignment skipped or failed)"
159
- if details['assigned_start'] is not None and details['assigned_end'] is not None:
160
- range_str = f"[{details['assigned_start']}-{details['assigned_end']}]"
161
-
162
- log.info(f"Annotator: {details['annotator_obj'].name}, Assigned Range: {range_str}, Password: {details['password_display']}")
163
-
164
- log.info("Workload distribution script finished.")
165
-
166
- except Exception as e:
167
- log.error(f"An critical error occurred during workload distribution: {e}", exc_info=True)
168
-
169
- if __name__ == "__main__":
170
- distribute_workload()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/import_annotations_from_json.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \
2
+ import json
3
+ import os
4
+ import sys
5
+ from datetime import datetime
6
+
7
+ # Adjust path to import project modules
8
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
9
+ PROJECT_ROOT = os.path.dirname(SCRIPT_DIR) # e.g. /home/psyborg/Desktop/tts_labeling
10
+
11
+ # Ensure the project root is at the beginning of sys.path
12
+ if PROJECT_ROOT in sys.path and sys.path[0] != PROJECT_ROOT:
13
+ sys.path.remove(PROJECT_ROOT) # Remove if it exists but not at index 0
14
+ if PROJECT_ROOT not in sys.path: # Add if it doesn't exist at all (it will be added at index 0)
15
+ sys.path.insert(0, PROJECT_ROOT)
16
+
17
+ from utils.database import get_db, SessionLocal # Changed Session to SessionLocal
18
+ from sqlalchemy.orm import Session as SQLAlchemySession # Import Session for type hinting
19
+ from data.models import TTSData, Annotator, Annotation, AudioTrim, AnnotationInterval # Added AnnotationInterval
20
+ from utils.logger import Logger
21
+
22
+ log = Logger()
23
+
24
+ ANNOTATIONS_FILE_PATH = os.path.join(PROJECT_ROOT, "annotations.json")
25
+ BATCH_SIZE = 100 # Define batch size for commits
26
+
27
+ def import_annotations(db: SQLAlchemySession, data: dict): # Changed SessionLocal to SQLAlchemySession for type hint
28
+ samples = data.get("samples", [])
29
+ imported_count = 0
30
+ updated_count = 0
31
+ skipped_count = 0
32
+ samples_processed_in_batch = 0
33
+
34
+ # Caches to potentially reduce DB lookups within the script run
35
+ tts_data_cache = {}
36
+ annotator_cache = {}
37
+
38
+ annotation_ids_for_trim_deletion_in_batch = [] # For batch deletion of trims
39
+
40
+ # Create a mapping from JSON ID to sample data for efficient lookup
41
+ samples_by_id = {s.get("id"): s for s in samples if s.get("id") is not None}
42
+ log.info(f"Created a map for {len(samples_by_id)} samples based on their JSON IDs.")
43
+
44
+ # Load all annotator intervals from the database
45
+ db_intervals = db.query(AnnotationInterval).all()
46
+ annotator_intervals = {interval.annotator_id: (interval.start_index, interval.end_index) for interval in db_intervals}
47
+ log.info(f"Loaded {len(annotator_intervals)} annotator intervals from the database.")
48
+
49
+ for sample_idx, sample_data in enumerate(samples): # Renamed sample to sample_data for clarity
50
+ current_sample_json_id = sample_data.get("id")
51
+ if current_sample_json_id is None: # Check for None explicitly
52
+ log.warning("Sample missing ID, skipping.")
53
+ skipped_count += 1
54
+ continue
55
+
56
+ # Assuming TTSData.id in DB matches JSON 'id' for lookup,
57
+ # but interval checks use an adjusted ID.
58
+ # The effective ID for checking against DB intervals (which are potentially 1-based for JSON's 0).
59
+ effective_id_for_interval_check = current_sample_json_id + 1
60
+
61
+ # Check if TTSData entry exists
62
+ if current_sample_json_id in tts_data_cache:
63
+ tts_data_entry = tts_data_cache[current_sample_json_id]
64
+ else:
65
+ # Query TTSData using the direct ID from JSON
66
+ tts_data_entry = db.query(TTSData).filter_by(id=current_sample_json_id).first()
67
+ if tts_data_entry:
68
+ tts_data_cache[current_sample_json_id] = tts_data_entry
69
+
70
+ if not tts_data_entry:
71
+ log.warning(f"TTSData with JSON ID {current_sample_json_id} not found in database, skipping sample.")
72
+ skipped_count += 1
73
+ continue
74
+
75
+ # Use the tts_data_entry.id for foreign keys, which should be the same as current_sample_json_id
76
+ db_tts_data_id = tts_data_entry.id
77
+
78
+ json_annotations = sample_data.get("annotations", [])
79
+ if not json_annotations:
80
+ continue
81
+
82
+ objects_to_add_this_sample = []
83
+
84
+ for json_ann in json_annotations:
85
+ json_annotator_name = json_ann.get("annotator")
86
+
87
+ # Determine the final_annotated_sentence based on the N+1 rule.
88
+ # Rule: Use original_subtitle from the (logical) next sample (N+1).
89
+ # Fallback 1: If N+1 doesn't exist, or its original_subtitle is None,
90
+ # use annotated_subtitle from the current sample's current annotation (json_ann).
91
+ # Fallback 2: If that's also None, use original_subtitle from the current sample (sample_data, top-level).
92
+ # Fallback 3: If all else fails, use an empty string.
93
+
94
+ sentence_to_use = None
95
+ used_n_plus_1 = False
96
+
97
+ logical_next_sample_json_id = current_sample_json_id - 1
98
+ next_sample_data_for_sentence = samples_by_id.get(logical_next_sample_json_id)
99
+
100
+ if next_sample_data_for_sentence:
101
+ sentence_from_n_plus_1 = next_sample_data_for_sentence.get("original_subtitle")
102
+ if sentence_from_n_plus_1 is not None:
103
+ sentence_to_use = sentence_from_n_plus_1
104
+ used_n_plus_1 = True
105
+ # log.debug(f"For sample {current_sample_json_id}, using original_subtitle from next sample {logical_next_sample_json_id}.")
106
+ # else: N+1 exists but its original_subtitle is None. Fall through.
107
+ # else: N+1 does not exist. Fall through.
108
+
109
+ if not used_n_plus_1:
110
+ # log.debug(f"For sample {current_sample_json_id}, N+1 rule not applied. Using current sample's subtitles.")
111
+ sentence_to_use = json_ann.get("annotated_subtitle") # Primary fallback from current annotation
112
+ if sentence_to_use is None:
113
+ # Secondary fallback to the top-level original_subtitle of the current sample
114
+ sentence_to_use = sample_data.get("original_subtitle")
115
+ # log.debug(f"For sample {current_sample_json_id}, json_ann.annotated_subtitle is None, falling back to sample_data.original_subtitle.")
116
+
117
+ final_annotated_sentence = sentence_to_use if sentence_to_use is not None else ""
118
+
119
+ if not json_annotator_name:
120
+ log.warning(f"Annotation for TTSData JSON ID {current_sample_json_id} missing annotator name, skipping.")
121
+ skipped_count +=1
122
+ continue
123
+
124
+ # Get initial annotator details from JSON
125
+ initial_annotator_entry = annotator_cache.get(json_annotator_name)
126
+ if not initial_annotator_entry:
127
+ initial_annotator_entry = db.query(Annotator).filter_by(name=json_annotator_name).first()
128
+ if not initial_annotator_entry:
129
+ log.warning(f"Annotator '{json_annotator_name}' (from JSON) not found in DB for TTSData JSON ID {current_sample_json_id}. Skipping this annotation.")
130
+ skipped_count += 1
131
+ continue
132
+ annotator_cache[json_annotator_name] = initial_annotator_entry
133
+
134
+ initial_annotator_id = initial_annotator_entry.id
135
+
136
+ # These will be the annotator details used for saving the annotation.
137
+ # They start as the initial annotator and may be reassigned.
138
+ save_annotator_id = initial_annotator_id
139
+ save_annotator_name = json_annotator_name # For logging
140
+
141
+ initial_annotator_interval = annotator_intervals.get(initial_annotator_id)
142
+
143
+ is_within_initial_interval = False
144
+ if initial_annotator_interval:
145
+ db_start_index, db_end_index = initial_annotator_interval
146
+ if db_start_index is not None and db_end_index is not None and \
147
+ db_start_index <= effective_id_for_interval_check <= db_end_index:
148
+ is_within_initial_interval = True
149
+
150
+ if not is_within_initial_interval:
151
+ log_message_prefix = f"TTSData JSON ID {current_sample_json_id} (effective: {effective_id_for_interval_check})"
152
+ if initial_annotator_interval:
153
+ log.warning(f"{log_message_prefix} is outside interval [{initial_annotator_interval[0]}, {initial_annotator_interval[1]}] for annotator '{json_annotator_name}'. Attempting to reassign.")
154
+ else:
155
+ log.warning(f"{log_message_prefix}: Annotator '{json_annotator_name}' (ID: {initial_annotator_id}) has no defined interval. Attempting to reassign to an interval owner.")
156
+
157
+ reassigned_successfully = False
158
+ for potential_owner_id, (owner_start, owner_end) in annotator_intervals.items():
159
+ if owner_start is not None and owner_end is not None and \
160
+ owner_start <= effective_id_for_interval_check <= owner_end:
161
+ save_annotator_id = potential_owner_id
162
+ reassigned_annotator_db_entry = db.query(Annotator).filter_by(id=save_annotator_id).first()
163
+ if reassigned_annotator_db_entry:
164
+ save_annotator_name = reassigned_annotator_db_entry.name
165
+ if save_annotator_name not in annotator_cache:
166
+ annotator_cache[save_annotator_name] = reassigned_annotator_db_entry
167
+ else:
168
+ save_annotator_name = f"ID:{save_annotator_id}"
169
+ log.error(f"Critical: Could not find Annotator DB entry for reassigned ID {save_annotator_id}, though an interval exists. Check data integrity.")
170
+
171
+ log.info(f"Reassigning annotation for {log_message_prefix} from '{json_annotator_name}' to '{save_annotator_name}' (ID: {save_annotator_id}) as they own the interval.")
172
+ reassigned_successfully = True
173
+ break
174
+
175
+ if not reassigned_successfully:
176
+ log.error(f"No annotator found with an interval covering {log_message_prefix}. Skipping this annotation by '{json_annotator_name}'.")
177
+ skipped_count += 1
178
+ continue
179
+
180
+ annotator_id = save_annotator_id
181
+ current_annotator_name_for_logs = save_annotator_name
182
+
183
+ annotated_at_str = json_ann.get("update_at") or json_ann.get("create_at")
184
+ annotated_at_dt = None
185
+ if annotated_at_str:
186
+ try:
187
+ annotated_at_dt = datetime.fromisoformat(annotated_at_str.replace('Z', '+00:00'))
188
+ except ValueError:
189
+ try:
190
+ annotated_at_dt = datetime.strptime(annotated_at_str.split('.')[0], "%Y-%m-%dT%H:%M:%S")
191
+ except ValueError as e_parse:
192
+ log.error(f"Could not parse timestamp '{annotated_at_str}' for TTSData JSON ID {current_sample_json_id}, annotator {current_annotator_name_for_logs}: {e_parse}")
193
+ final_annotated_at = annotated_at_dt
194
+
195
+ # Previous N+1 logic and interval checks that led to skipping are removed/replaced by the above.
196
+
197
+ annotation_obj = db.query(Annotation).filter_by(
198
+ tts_data_id=db_tts_data_id,
199
+ annotator_id=annotator_id
200
+ ).first()
201
+
202
+ if annotation_obj:
203
+ annotation_obj.annotated_sentence = final_annotated_sentence
204
+ annotation_obj.annotated_at = final_annotated_at
205
+ updated_count +=1
206
+ else:
207
+ annotation_obj = Annotation(
208
+ tts_data_id=db_tts_data_id,
209
+ annotator_id=annotator_id,
210
+ annotated_sentence=final_annotated_sentence,
211
+ annotated_at=final_annotated_at
212
+ )
213
+ db.add(annotation_obj)
214
+ try:
215
+ db.flush()
216
+ imported_count +=1
217
+ except Exception as e_flush:
218
+ log.error(f"Error flushing new annotation for TTSData JSON ID {current_sample_json_id}, Annotator {current_annotator_name_for_logs}: {e_flush}")
219
+ db.rollback()
220
+ skipped_count +=1
221
+ continue
222
+
223
+ if annotation_obj.id:
224
+ if annotation_obj.id not in annotation_ids_for_trim_deletion_in_batch:
225
+ annotation_ids_for_trim_deletion_in_batch.append(annotation_obj.id)
226
+
227
+ json_audio_trims = json_ann.get("audio_trims", [])
228
+ if json_audio_trims:
229
+ # log.info(f"Preparing to add {len(json_audio_trims)} new trims for Annotation ID {annotation_obj.id}.")
230
+ for trim_info in json_audio_trims:
231
+ start_sec = trim_info.get("start")
232
+ end_sec = trim_info.get("end")
233
+
234
+ if start_sec is not None and end_sec is not None:
235
+ try:
236
+ start_ms = int(float(start_sec) * 1000.0)
237
+ end_ms = int(float(end_sec) * 1000.0)
238
+ if start_ms < 0 or end_ms < 0 or end_ms < start_ms:
239
+ log.warning(f"Invalid trim values (start_ms={start_ms}, end_ms={end_ms}) for annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}. Skipping.")
240
+ continue
241
+
242
+ new_trim_db_obj = AudioTrim(
243
+ annotation_id=annotation_obj.id,
244
+ original_tts_data_id=db_tts_data_id,
245
+ start=start_ms,
246
+ end=end_ms
247
+ )
248
+ objects_to_add_this_sample.append(new_trim_db_obj)
249
+ except ValueError:
250
+ log.warning(f"Invalid start/end format in audio trim for annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}. Skipping: {trim_info}")
251
+ continue
252
+ else:
253
+ log.warning(f"Skipping trim with missing start/end for Annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}: {trim_info}")
254
+ else:
255
+ log.warning(f"Annotation ID not available for TTSData JSON ID {current_sample_json_id}, Annotator {current_annotator_name_for_logs}. Cannot process audio trims.")
256
+
257
+ if objects_to_add_this_sample:
258
+ db.add_all(objects_to_add_this_sample)
259
+
260
+ samples_processed_in_batch += 1
261
+
262
+ if samples_processed_in_batch >= BATCH_SIZE or (sample_idx == len(samples) - 1):
263
+ if annotation_ids_for_trim_deletion_in_batch:
264
+ log.info(f"Batch deleting trims for {len(annotation_ids_for_trim_deletion_in_batch)} annotations in current batch.")
265
+ db.query(AudioTrim).filter(AudioTrim.annotation_id.in_(annotation_ids_for_trim_deletion_in_batch)).delete(synchronize_session=False)
266
+ annotation_ids_for_trim_deletion_in_batch.clear()
267
+
268
+ try:
269
+ db.commit()
270
+ log.info(f"Committed batch. Total samples processed so far: {sample_idx + 1} out of {len(samples)}")
271
+ except Exception as e_commit:
272
+ db.rollback()
273
+ log.error(f"Failed to commit batch after sample index {sample_idx} (TTSData JSON ID {current_sample_json_id}): {e_commit}. Rolling back this batch.")
274
+ annotation_ids_for_trim_deletion_in_batch.clear()
275
+ finally:
276
+ samples_processed_in_batch = 0 # Reset for next batch or end
277
+
278
+ log.info(f"Finished import attempt. Final counts - New: {imported_count}, Updated: {updated_count}, Skipped: {skipped_count}")
279
+
280
+ def main():
281
+ log.info("Starting annotation import script...")
282
+
283
+ if not os.path.exists(ANNOTATIONS_FILE_PATH):
284
+ log.error(f"Annotations file not found at: {ANNOTATIONS_FILE_PATH}")
285
+ return
286
+
287
+ try:
288
+ with open(ANNOTATIONS_FILE_PATH, 'r', encoding='utf-8') as f:
289
+ data = json.load(f)
290
+ except json.JSONDecodeError as e:
291
+ log.error(f"Error decoding JSON from {ANNOTATIONS_FILE_PATH}: {e}")
292
+ return
293
+ except Exception as e:
294
+ log.error(f"Error reading file {ANNOTATIONS_FILE_PATH}: {e}")
295
+ return
296
+
297
+ try:
298
+ with get_db() as db_session:
299
+ import_annotations(db_session, data)
300
+ except Exception as e:
301
+ log.error(f"An error occurred during the import process: {e}")
302
+ finally:
303
+ log.info("Annotation import script finished.")
304
+
305
+ if __name__ == "__main__":
306
+ main()
utils/auth.py CHANGED
@@ -35,38 +35,36 @@ class AuthService:
35
  annotator = repo.get_annotator_by_name(username)
36
 
37
  # ⬇️ توابع کمکی برای تولید خروجی خالی (درصورت خطا)
38
- def empty_dashboard_outputs():
39
  return (
40
  [], # items_state
41
  0, # idx_state
42
- "",
43
- "",
44
- "",
45
- "",
46
- "",
47
- False, # شش فیلد
48
  )
49
 
50
  # --- کاربر موجود نیست / غیر فعال
51
  if annotator is None or not annotator.is_active:
52
  log.warning("Failed login (not found / inactive)")
53
  return (
54
- "❌ Wrong username or password!",
55
- gr.update(),
56
- gr.update(visible=False),
57
- gr.update(value=""),
58
- *empty_dashboard_outputs(),
59
  )
60
 
61
  # --- رمز عبور اشتباه
62
  if not verify_password(password, annotator.password):
63
  log.warning("Failed login (bad password)")
64
  return (
65
- "❌ Wrong username or password!",
66
- gr.update(),
67
- gr.update(visible=False),
68
- gr.update(value=""),
69
- *empty_dashboard_outputs(),
70
  )
71
 
72
  # ---------- ورود موفق ---------- #
@@ -106,16 +104,16 @@ class AuthService:
106
  # مقداردهی فیلدهای رکورد اول (یا مقادیر تهی)
107
  if dashboard_items:
108
  first = dashboard_items[0]
109
- first_vals = (
 
 
110
  first["id"],
111
  first["filename"],
112
  first["sentence"],
113
  first["annotated_sentence"],
114
- first["annotated_at"],
115
- first["validated"],
116
  )
117
  else:
118
- first_vals = ("", "", "", "", "", False)
119
 
120
  log.info(f"User '{username}' logged in successfully.")
121
 
@@ -127,7 +125,7 @@ class AuthService:
127
  gr.update(value=f"👋 Welcome, {annotator.name}!"), # 3
128
  dashboard_items, # 4: items_state
129
  0, # 5: idx_state
130
- *first_vals, # 6-11: شش فیلد نخست
131
  )
132
 
133
  # ───────────── LOGOUT ───────────── #
 
35
  annotator = repo.get_annotator_by_name(username)
36
 
37
  # ⬇️ توابع کمکی برای تولید خروجی خالی (درصورت خطا)
38
+ def empty_dashboard_outputs_for_ui(): # Renamed and adjusted for UI outputs
39
  return (
40
  [], # items_state
41
  0, # idx_state
42
+ "", # tts_id
43
+ "", # filename
44
+ "", # sentence
45
+ "", # ann_sentence
 
 
46
  )
47
 
48
  # --- کاربر موجود نیست / غیر فعال
49
  if annotator is None or not annotator.is_active:
50
  log.warning("Failed login (not found / inactive)")
51
  return (
52
+ "❌ Wrong username or password!", # message
53
+ gr.update(), # login_container (no change)
54
+ gr.update(visible=False), # dashboard_container
55
+ gr.update(value=""), # header_welcome
56
+ *empty_dashboard_outputs_for_ui(), # items_state, idx_state, and 4 UI textboxes
57
  )
58
 
59
  # --- رمز عبور اشتباه
60
  if not verify_password(password, annotator.password):
61
  log.warning("Failed login (bad password)")
62
  return (
63
+ "❌ Wrong username or password!", # message
64
+ gr.update(), # login_container (no change)
65
+ gr.update(visible=False), # dashboard_container
66
+ gr.update(value=""), # header_welcome
67
+ *empty_dashboard_outputs_for_ui(), # items_state, idx_state, and 4 UI textboxes
68
  )
69
 
70
  # ---------- ورود موفق ---------- #
 
104
  # مقداردهی فیلدهای رکورد اول (یا مقادیر تهی)
105
  if dashboard_items:
106
  first = dashboard_items[0]
107
+ # Only take the first 4 values needed for the 4 textboxes
108
+ # tts_id, filename, sentence, ann_sentence
109
+ first_vals_for_ui = (
110
  first["id"],
111
  first["filename"],
112
  first["sentence"],
113
  first["annotated_sentence"],
 
 
114
  )
115
  else:
116
+ first_vals_for_ui = ("", "", "", "")
117
 
118
  log.info(f"User '{username}' logged in successfully.")
119
 
 
125
  gr.update(value=f"👋 Welcome, {annotator.name}!"), # 3
126
  dashboard_items, # 4: items_state
127
  0, # 5: idx_state
128
+ *first_vals_for_ui, # 6-9: چهار فیلد نخست برای UI
129
  )
130
 
131
  # ───────────── LOGOUT ───────────── #
utils/database.py CHANGED
@@ -3,6 +3,15 @@
3
  from sqlalchemy import create_engine
4
  from sqlalchemy.orm import sessionmaker
5
  from contextlib import contextmanager
 
 
 
 
 
 
 
 
 
6
  from config import conf
7
  from utils.logger import Logger
8
 
 
3
  from sqlalchemy import create_engine
4
  from sqlalchemy.orm import sessionmaker
5
  from contextlib import contextmanager
6
+ import sys # Add sys import
7
+ import os # Add os import
8
+
9
+ # Add project root to Python path to ensure local modules are prioritized
10
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
11
+ PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
12
+ if PROJECT_ROOT not in sys.path:
13
+ sys.path.insert(0, PROJECT_ROOT)
14
+
15
  from config import conf
16
  from utils.logger import Logger
17