Spaces:
Running
Running
alligned interface and data import scripts
Browse files- components/dashboard_page.py +530 -263
- components/header.py +14 -6
- components/login_page.py +0 -2
- data/models.py +6 -6
- data/repository/annotator_workload_repo.py +21 -9
- scripts/apply_custom_intervals.py +90 -0
- scripts/distribute_workload.py +0 -170
- scripts/import_annotations_from_json.py +306 -0
- utils/auth.py +20 -22
- utils/database.py +9 -0
components/dashboard_page.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import datetime
|
4 |
-
from sqlalchemy import orm
|
5 |
|
6 |
from components.header import Header
|
7 |
-
from utils.logger import Logger
|
8 |
-
from utils.gdrive_downloader import PublicFolderAudioLoader
|
9 |
from config import conf
|
10 |
-
from utils.database import get_db
|
11 |
-
from data.models import Annotation, AudioTrim, TTSData
|
|
|
12 |
|
13 |
-
log = Logger()
|
14 |
LOADER = PublicFolderAudioLoader(conf.GDRIVE_API_KEY)
|
15 |
GDRIVE_FOLDER = conf.GDRIVE_FOLDER
|
16 |
|
@@ -18,70 +19,108 @@ GDRIVE_FOLDER = conf.GDRIVE_FOLDER
|
|
18 |
class DashboardPage:
|
19 |
def __init__(self) -> None:
|
20 |
with gr.Column(visible=False) as self.container:
|
21 |
-
self.header = Header()
|
22 |
|
23 |
with gr.Row():
|
24 |
-
#
|
25 |
with gr.Column(scale=3):
|
26 |
with gr.Row():
|
27 |
-
self.tts_id = gr.Textbox(label="ID", interactive=False)
|
28 |
-
self.filename = gr.Textbox(label="Filename", interactive=False)
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
self.btn_copy = gr.Button("📋 Copy", interactive=True)
|
34 |
-
with gr.Row():
|
35 |
-
self.ann_sentence = gr.Textbox(
|
36 |
-
label="Annotated Sentence",
|
37 |
-
interactive=True,
|
38 |
-
max_lines=5,
|
39 |
-
rtl=True,
|
40 |
-
)
|
41 |
-
self.btn_paste = gr.Button("📥 Paste", interactive=True)
|
42 |
-
with gr.Row():
|
43 |
-
self.validated = gr.Checkbox(
|
44 |
-
label="Validated", interactive=True
|
45 |
-
)
|
46 |
with gr.Row():
|
47 |
-
|
48 |
-
|
49 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
self.jump_data_id_input = gr.Number(
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
self.trim_end_sec = gr.Number(
|
60 |
-
label="Trim End (s)", value=0.0, precision=3, interactive=True
|
61 |
)
|
62 |
-
self.
|
63 |
-
|
64 |
|
65 |
-
#
|
66 |
with gr.Column(scale=2):
|
67 |
-
self.btn_load_voice = gr.Button("Load Audio",
|
68 |
self.audio = gr.Audio(
|
69 |
label="🔊 Audio", interactive=False, autoplay=True
|
70 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
-
#
|
73 |
self.items_state = gr.State([])
|
74 |
self.idx_state = gr.State(0)
|
75 |
-
self.clipboard_state = gr.State("")
|
76 |
self.original_audio_state = gr.State(None)
|
77 |
-
self.
|
78 |
|
79 |
# List of all interactive UI elements for enabling/disabling
|
80 |
self.interactive_ui_elements = [
|
81 |
-
self.btn_prev, self.
|
|
|
82 |
self.jump_data_id_input, self.trim_start_sec, self.trim_end_sec,
|
83 |
self.btn_trim, self.btn_undo_trim, self.btn_load_voice,
|
84 |
-
self.ann_sentence, self.
|
85 |
]
|
86 |
|
87 |
# ---------------- wiring ---------------- #
|
@@ -90,151 +129,184 @@ class DashboardPage:
|
|
90 |
):
|
91 |
self.header.register_callbacks(login_page, self, session_state)
|
92 |
|
93 |
-
# Helper function to update UI interactive state
|
94 |
def update_ui_interactive_state(is_interactive: bool):
|
95 |
updates = []
|
96 |
for elem in self.interactive_ui_elements:
|
97 |
if elem == self.btn_load_voice and not is_interactive:
|
98 |
-
updates.append(gr.update(value="⏳ Loading...", interactive=False))
|
99 |
elif elem == self.btn_load_voice and is_interactive:
|
100 |
-
updates.append(gr.update(value="Load Audio", interactive=True))
|
|
|
|
|
|
|
|
|
|
|
101 |
else:
|
102 |
updates.append(gr.update(interactive=is_interactive))
|
103 |
return updates
|
104 |
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
end = trim_params_from_state.get("end")
|
116 |
-
operation = trim_params_from_state.get("operation")
|
117 |
-
|
118 |
-
if operation == "delete" and start is not None and end is not None and end > start and start >= 0:
|
119 |
-
start_sample = int(sr * start / 1000.0)
|
120 |
-
end_sample = int(sr * end / 1000.0)
|
121 |
-
|
122 |
-
audio_duration_samples = len(wav)
|
123 |
-
start_sample = max(0, min(start_sample, audio_duration_samples))
|
124 |
-
end_sample = max(start_sample, min(end_sample, audio_duration_samples))
|
125 |
-
|
126 |
-
if start_sample == 0 and end_sample == audio_duration_samples:
|
127 |
-
log.info(f"Applying saved trim: delete entire audio from {start}ms to {end}ms. Resulting in empty audio.")
|
128 |
-
return (sr, np.array([], dtype=wav.dtype)), original_audio_for_state
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
|
|
|
|
|
|
143 |
|
144 |
-
def download_voice_fn(folder_link, filename_to_load):
|
145 |
if not filename_to_load:
|
146 |
-
return None, None
|
147 |
try:
|
148 |
-
log.info(f"Downloading voice: {filename_to_load}")
|
149 |
sr, wav = LOADER.load_audio(folder_link, filename_to_load)
|
150 |
-
return (sr, wav), (sr, wav.copy())
|
151 |
except Exception as e:
|
152 |
-
log.error(f"GDrive download failed for {filename_to_load}: {e}")
|
153 |
gr.Error(f"Failed to load audio: {filename_to_load}. Error: {e}")
|
154 |
-
return None, None
|
155 |
|
156 |
-
def save_annotation_db_fn(current_tts_id, session, ann_text_to_save,
|
157 |
annotator_id = session.get("user_id")
|
158 |
if not current_tts_id or not annotator_id:
|
159 |
gr.Error("Cannot save: Missing TTS ID or User ID.")
|
160 |
-
return
|
161 |
-
|
162 |
with get_db() as db:
|
163 |
try:
|
164 |
annotation_obj = db.query(Annotation).filter_by(
|
165 |
tts_data_id=current_tts_id, annotator_id=annotator_id
|
166 |
-
).first()
|
|
|
167 |
if not annotation_obj:
|
168 |
annotation_obj = Annotation(
|
169 |
tts_data_id=current_tts_id, annotator_id=annotator_id
|
170 |
)
|
171 |
db.add(annotation_obj)
|
|
|
172 |
annotation_obj.annotated_sentence = ann_text_to_save
|
173 |
-
annotation_obj.validated = validated_to_save
|
174 |
annotation_obj.annotated_at = datetime.datetime.utcnow()
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
db.flush()
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
185 |
annotation_id=annotation_obj.id,
|
186 |
-
original_tts_data_id=
|
187 |
-
start=
|
188 |
-
end=
|
189 |
)
|
190 |
-
|
191 |
-
|
192 |
-
annotation_obj.
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
db.commit()
|
198 |
gr.Info(f"Annotation for ID {current_tts_id} saved.")
|
199 |
-
return
|
200 |
except Exception as e:
|
201 |
db.rollback()
|
202 |
-
log.error(f"Failed to save annotation for {current_tts_id}: {e}")
|
203 |
gr.Error(f"Save failed: {e}")
|
204 |
-
return False
|
205 |
|
206 |
def show_current_item_fn(items, idx, session):
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
current_item = items[idx]
|
210 |
tts_data_id = current_item.get("id")
|
211 |
annotator_id = session.get("user_id")
|
212 |
-
ann_text
|
213 |
-
|
214 |
if tts_data_id and annotator_id:
|
215 |
with get_db() as db:
|
216 |
try:
|
217 |
existing_annotation = db.query(Annotation).filter_by(
|
218 |
tts_data_id=tts_data_id, annotator_id=annotator_id
|
219 |
-
).options(orm.joinedload(Annotation.
|
220 |
if existing_annotation:
|
221 |
ann_text = existing_annotation.annotated_sentence or ""
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
except Exception as e:
|
232 |
-
log.error(f"
|
233 |
gr.Error(f"Error loading annotation details: {e}")
|
|
|
234 |
return (
|
235 |
current_item.get("id", ""), current_item.get("filename", ""),
|
236 |
-
current_item.get("sentence", ""), ann_text,
|
237 |
-
|
|
|
|
|
|
|
|
|
238 |
)
|
239 |
|
240 |
def navigate_idx_fn(items, current_idx, direction):
|
@@ -243,9 +315,65 @@ class DashboardPage:
|
|
243 |
return new_idx
|
244 |
|
245 |
def load_all_items_fn(sess):
|
246 |
-
|
247 |
-
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
def jump_by_data_id_fn(items, target_data_id_str, current_idx):
|
251 |
if not target_data_id_str: return current_idx
|
@@ -253,209 +381,348 @@ class DashboardPage:
|
|
253 |
target_id = int(target_data_id_str)
|
254 |
for i, item_dict in enumerate(items):
|
255 |
if item_dict.get("id") == target_id: return i
|
256 |
-
gr.Warning(f"Data ID {target_id} not found.")
|
257 |
except ValueError:
|
258 |
gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
|
259 |
return current_idx
|
260 |
|
261 |
-
def
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
with get_db() as db:
|
291 |
try:
|
292 |
annotation_obj = db.query(Annotation).filter_by(
|
293 |
-
tts_data_id=
|
294 |
-
).first()
|
295 |
if annotation_obj:
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
db.commit()
|
298 |
-
gr.Info(f"Annotation for ID {
|
299 |
else:
|
300 |
-
gr.Warning(f"No annotation found to delete for ID {
|
301 |
except Exception as e:
|
302 |
db.rollback()
|
303 |
-
log.error(f"Error deleting annotation {
|
304 |
-
gr.Error(f"Failed to delete annotation: {e}")
|
305 |
else:
|
306 |
-
|
307 |
-
refreshed_ui_values = show_current_item_fn(items, current_idx, session)
|
308 |
-
return items, current_idx, *refreshed_ui_values
|
309 |
|
310 |
-
|
311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
self.tts_id, self.filename, self.sentence, self.ann_sentence,
|
313 |
-
self.
|
314 |
-
self.
|
|
|
|
|
|
|
315 |
]
|
316 |
-
|
317 |
# Initial Load
|
|
|
|
|
|
|
|
|
318 |
root_blocks.load(
|
319 |
-
fn=lambda: update_ui_interactive_state(False),
|
320 |
outputs=self.interactive_ui_elements
|
321 |
).then(
|
322 |
-
fn=load_all_items_fn,
|
323 |
inputs=[session_state],
|
324 |
-
|
|
|
|
|
|
|
325 |
).then(
|
326 |
-
|
327 |
-
|
328 |
-
|
|
|
|
|
329 |
).then(
|
330 |
-
fn=
|
331 |
-
inputs=[self.audio, self.current_trim_params, self.original_audio_state],
|
332 |
-
outputs=[self.audio, self.original_audio_state]
|
333 |
-
).then(
|
334 |
-
fn=lambda: update_ui_interactive_state(True),
|
335 |
outputs=self.interactive_ui_elements
|
336 |
)
|
337 |
|
338 |
-
# Navigation (Prev/Next)
|
339 |
-
|
340 |
-
|
|
|
|
|
|
|
341 |
]:
|
342 |
event_chain = btn_widget.click(
|
343 |
fn=lambda: update_ui_interactive_state(False),
|
344 |
outputs=self.interactive_ui_elements
|
345 |
)
|
346 |
-
if
|
347 |
event_chain = event_chain.then(
|
348 |
fn=save_annotation_db_fn,
|
349 |
inputs=[
|
350 |
self.tts_id, session_state, self.ann_sentence,
|
351 |
-
self.
|
352 |
],
|
353 |
-
outputs=
|
|
|
|
|
|
|
|
|
354 |
)
|
355 |
-
|
356 |
-
|
|
|
357 |
inputs=[self.items_state, self.idx_state, gr.State(direction_str)],
|
358 |
outputs=self.idx_state,
|
359 |
).then(
|
360 |
fn=show_current_item_fn,
|
361 |
inputs=[self.items_state, self.idx_state, session_state],
|
362 |
-
outputs=
|
363 |
).then(
|
364 |
-
|
365 |
-
|
366 |
-
outputs=[self.audio, self.
|
367 |
).then(
|
368 |
-
|
369 |
-
|
370 |
-
outputs=[self.audio, self.original_audio_state]
|
371 |
).then(
|
372 |
fn=lambda: update_ui_interactive_state(True),
|
373 |
outputs=self.interactive_ui_elements
|
374 |
)
|
375 |
-
|
376 |
-
#
|
377 |
-
self.
|
378 |
-
fn=lambda: update_ui_interactive_state(False),
|
379 |
-
outputs=self.interactive_ui_elements
|
380 |
-
).then(
|
381 |
-
fn=download_voice_fn,
|
382 |
-
inputs=[gr.State(GDRIVE_FOLDER), self.filename],
|
383 |
-
outputs=[self.audio, self.original_audio_state],
|
384 |
-
).then(
|
385 |
-
fn=apply_loaded_trim_fn,
|
386 |
-
inputs=[self.audio, self.current_trim_params, self.original_audio_state],
|
387 |
-
outputs=[self.audio, self.original_audio_state]
|
388 |
-
).then(
|
389 |
-
fn=lambda: update_ui_interactive_state(True),
|
390 |
-
outputs=self.interactive_ui_elements
|
391 |
-
)
|
392 |
-
|
393 |
-
# Copy/Paste (Quick operations, no UI disable needed)
|
394 |
-
self.btn_copy.click(fn=lambda x: x, inputs=self.sentence, outputs=self.clipboard_state)
|
395 |
-
self.btn_paste.click(fn=lambda x: x, inputs=self.clipboard_state, outputs=self.ann_sentence)
|
396 |
-
|
397 |
-
# Jump to Data ID
|
398 |
-
self.btn_jump.click(
|
399 |
fn=lambda: update_ui_interactive_state(False),
|
400 |
outputs=self.interactive_ui_elements
|
401 |
).then(
|
402 |
-
fn=jump_by_data_id_fn,
|
403 |
inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
|
404 |
-
outputs=self.idx_state
|
405 |
).then(
|
406 |
fn=show_current_item_fn,
|
407 |
inputs=[self.items_state, self.idx_state, session_state],
|
408 |
-
outputs=
|
409 |
).then(
|
410 |
-
|
411 |
-
|
412 |
-
outputs=[self.audio, self.
|
413 |
).then(
|
414 |
-
|
415 |
-
|
416 |
-
outputs=[self.audio, self.original_audio_state]
|
417 |
).then(
|
418 |
fn=lambda: update_ui_interactive_state(True),
|
419 |
outputs=self.interactive_ui_elements
|
420 |
)
|
421 |
|
422 |
-
#
|
423 |
-
self.
|
424 |
fn=lambda: update_ui_interactive_state(False),
|
425 |
outputs=self.interactive_ui_elements
|
426 |
).then(
|
427 |
-
fn=
|
428 |
-
inputs=[
|
429 |
-
outputs=[self.audio, self.
|
|
|
|
|
|
|
|
|
430 |
).then(
|
431 |
fn=lambda: update_ui_interactive_state(True),
|
432 |
outputs=self.interactive_ui_elements
|
433 |
)
|
|
|
|
|
|
|
|
|
|
|
434 |
|
435 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
436 |
self.btn_undo_trim.click(
|
437 |
-
fn=
|
438 |
-
|
439 |
-
|
440 |
-
fn=lambda orig_audio: (orig_audio, None, 0.0, 0.0) if orig_audio else (None, None, 0.0, 0.0),
|
441 |
-
inputs=[self.original_audio_state],
|
442 |
-
outputs=[self.audio, self.current_trim_params, self.trim_start_sec, self.trim_end_sec],
|
443 |
-
).then(
|
444 |
-
fn=lambda: update_ui_interactive_state(True),
|
445 |
-
outputs=self.interactive_ui_elements
|
446 |
)
|
447 |
|
448 |
-
# Delete
|
|
|
|
|
|
|
|
|
|
|
449 |
self.btn_delete.click(
|
450 |
fn=lambda: update_ui_interactive_state(False),
|
451 |
outputs=self.interactive_ui_elements
|
452 |
).then(
|
453 |
fn=delete_db_and_ui_fn,
|
454 |
-
inputs=[self.items_state, self.idx_state, session_state],
|
455 |
-
outputs=
|
456 |
-
).then(
|
457 |
fn=lambda: update_ui_interactive_state(True),
|
458 |
outputs=self.interactive_ui_elements
|
459 |
)
|
460 |
|
461 |
return self.container
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import datetime
|
4 |
+
from sqlalchemy import orm, func # Added func for count
|
5 |
|
6 |
from components.header import Header
|
7 |
+
from utils.logger import Logger # Changed from get_logger to Logger
|
8 |
+
from utils.gdrive_downloader import PublicFolderAudioLoader
|
9 |
from config import conf
|
10 |
+
from utils.database import get_db
|
11 |
+
from data.models import Annotation, AudioTrim, TTSData, AnnotationInterval # Added AnnotationInterval
|
12 |
+
from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo # For progress
|
13 |
|
14 |
+
log = Logger() # Changed from get_logger() to Logger()
|
15 |
LOADER = PublicFolderAudioLoader(conf.GDRIVE_API_KEY)
|
16 |
GDRIVE_FOLDER = conf.GDRIVE_FOLDER
|
17 |
|
|
|
19 |
class DashboardPage:
|
20 |
def __init__(self) -> None:
|
21 |
with gr.Column(visible=False) as self.container:
|
22 |
+
self.header = Header() # Header now includes progress_display
|
23 |
|
24 |
with gr.Row():
|
25 |
+
# Left Column
|
26 |
with gr.Column(scale=3):
|
27 |
with gr.Row():
|
28 |
+
self.tts_id = gr.Textbox(label="ID", interactive=False, scale=1)
|
29 |
+
self.filename = gr.Textbox(label="Filename", interactive=False, scale=3)
|
30 |
+
self.sentence = gr.Textbox(
|
31 |
+
label="Original Sentence", interactive=False, max_lines=5, rtl=True
|
32 |
+
)
|
33 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
with gr.Row():
|
35 |
+
with gr.Column(scale=1, min_width=10): # Left spacer column
|
36 |
+
pass
|
37 |
+
self.btn_copy_sentence = gr.Button("📋 Copy to Annotated", min_width=150)
|
38 |
+
with gr.Column(scale=1, min_width=10): # Right spacer column
|
39 |
+
pass
|
40 |
+
|
41 |
+
self.ann_sentence = gr.Textbox(
|
42 |
+
label="Annotated Sentence",
|
43 |
+
interactive=True,
|
44 |
+
max_lines=5,
|
45 |
+
rtl=True,
|
46 |
+
)
|
47 |
+
|
48 |
with gr.Row():
|
49 |
+
self.btn_prev = gr.Button("⬅️ Previous", min_width=120)
|
50 |
+
self.btn_next_no_save = gr.Button("Next ➡️ (No Save)", min_width=150)
|
51 |
+
self.btn_save_next = gr.Button("Save & Next ➡️", variant="primary", min_width=120)
|
52 |
+
|
53 |
+
# Combined row for Delete button and Jump controls
|
54 |
+
with gr.Row(): # Removed style argument to fix TypeError
|
55 |
+
# Delete button on the left
|
56 |
+
self.btn_delete = gr.Button("🗑️ Delete Annotation & Clear Fields", min_width=260)
|
57 |
+
|
58 |
+
# Spacer column to push jump controls to the right.
|
59 |
+
# # This column will expand to fill available space.
|
60 |
+
# with gr.Column(scale=1, min_width=10):
|
61 |
+
# pass
|
62 |
+
|
63 |
+
# Jump controls, grouped in a nested Row, appearing on the right.
|
64 |
+
# 'scale=0' for this nested Row and its children makes them take minimal/intrinsic space.
|
65 |
+
with gr.Row(scale=0, variant='compact'): # Added variant='compact'
|
66 |
self.jump_data_id_input = gr.Number(
|
67 |
+
# show_label=False, # Remove label to reduce height
|
68 |
+
label="Jump to ID (e.g. 123)", # Use placeholder for instruction
|
69 |
+
value=None, # Ensure placeholder shows initially
|
70 |
+
precision=0,
|
71 |
+
interactive=True,
|
72 |
+
min_width=120, # Adjusted for longer placeholder
|
73 |
+
# scale=0
|
|
|
|
|
74 |
)
|
75 |
+
self.btn_jump = gr.Button("Go to data ID", min_width=70) # Compact Go button
|
76 |
+
# Removed the old separate rows for delete and jump controls
|
77 |
|
78 |
+
# Right Column
|
79 |
with gr.Column(scale=2):
|
80 |
+
self.btn_load_voice = gr.Button("Load Audio (Autoplay)", min_width=150)
|
81 |
self.audio = gr.Audio(
|
82 |
label="🔊 Audio", interactive=False, autoplay=True
|
83 |
)
|
84 |
+
with gr.Group(): # Grouping trim controls
|
85 |
+
gr.Markdown("### Audio Trimming")
|
86 |
+
self.trim_start_sec = gr.Number(
|
87 |
+
label="Trim Start (s)",
|
88 |
+
value=None, # Ensure placeholder shows
|
89 |
+
precision=3,
|
90 |
+
interactive=True,
|
91 |
+
min_width=150
|
92 |
+
)
|
93 |
+
self.trim_end_sec = gr.Number(
|
94 |
+
label="Trim End (s)",
|
95 |
+
value=None, # Ensure placeholder shows
|
96 |
+
precision=3,
|
97 |
+
interactive=True,
|
98 |
+
min_width=150
|
99 |
+
)
|
100 |
+
with gr.Row():
|
101 |
+
self.btn_trim = gr.Button("➕ Add Trim (Delete Segment)", min_width=150)
|
102 |
+
self.btn_undo_trim = gr.Button("↩️ Undo Last Trim", min_width=150)
|
103 |
+
self.trims_display = gr.DataFrame(
|
104 |
+
headers=["Start (s)", "End (s)"],
|
105 |
+
col_count=(2, "fixed"),
|
106 |
+
interactive=False,
|
107 |
+
label="Applied Trims",
|
108 |
+
wrap=True
|
109 |
+
)
|
110 |
|
111 |
+
# State variables
|
112 |
self.items_state = gr.State([])
|
113 |
self.idx_state = gr.State(0)
|
|
|
114 |
self.original_audio_state = gr.State(None)
|
115 |
+
self.applied_trims_list_state = gr.State([])
|
116 |
|
117 |
# List of all interactive UI elements for enabling/disabling
|
118 |
self.interactive_ui_elements = [
|
119 |
+
self.btn_prev, self.btn_save_next, self.btn_next_no_save,
|
120 |
+
self.btn_delete, self.btn_jump,
|
121 |
self.jump_data_id_input, self.trim_start_sec, self.trim_end_sec,
|
122 |
self.btn_trim, self.btn_undo_trim, self.btn_load_voice,
|
123 |
+
self.ann_sentence, self.btn_copy_sentence
|
124 |
]
|
125 |
|
126 |
# ---------------- wiring ---------------- #
|
|
|
129 |
):
|
130 |
self.header.register_callbacks(login_page, self, session_state)
|
131 |
|
|
|
132 |
def update_ui_interactive_state(is_interactive: bool):
|
133 |
updates = []
|
134 |
for elem in self.interactive_ui_elements:
|
135 |
if elem == self.btn_load_voice and not is_interactive:
|
136 |
+
updates.append(gr.update(value="⏳ Loading Audio...", interactive=False))
|
137 |
elif elem == self.btn_load_voice and is_interactive:
|
138 |
+
updates.append(gr.update(value="Load Audio (Autoplay)", interactive=True))
|
139 |
+
elif elem == self.btn_save_next and not is_interactive:
|
140 |
+
updates.append(gr.update(value="�� Saving...", interactive=False))
|
141 |
+
elif elem == self.btn_save_next and is_interactive:
|
142 |
+
updates.append(gr.update(value="Save & Next ➡️", interactive=True))
|
143 |
+
# Add similar handling for btn_next_no_save if needed for text change during processing
|
144 |
else:
|
145 |
updates.append(gr.update(interactive=is_interactive))
|
146 |
return updates
|
147 |
|
148 |
+
def get_user_progress_fn(session):
|
149 |
+
user_id = session.get("user_id")
|
150 |
+
if not user_id:
|
151 |
+
return "Annotation Progress: N/A" # Added label
|
152 |
+
with get_db() as db:
|
153 |
+
try:
|
154 |
+
# Total items assigned to the user
|
155 |
+
total_assigned_query = db.query(func.sum(AnnotationInterval.end_index - AnnotationInterval.start_index + 1)).filter(AnnotationInterval.annotator_id == user_id)
|
156 |
+
total_assigned_result = total_assigned_query.scalar()
|
157 |
+
total_assigned = total_assigned_result if total_assigned_result is not None else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
+
# Count of non-empty annotations by this user within their assigned intervals
|
160 |
+
completed_count_query = db.query(func.count(Annotation.id)).join(
|
161 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
162 |
+
).join(
|
163 |
+
AnnotationInterval,
|
164 |
+
(AnnotationInterval.annotator_id == user_id) &
|
165 |
+
(TTSData.id >= AnnotationInterval.start_index) &
|
166 |
+
(TTSData.id <= AnnotationInterval.end_index)
|
167 |
+
).filter(
|
168 |
+
Annotation.annotator_id == user_id,
|
169 |
+
Annotation.annotated_sentence != None,
|
170 |
+
Annotation.annotated_sentence != ""
|
171 |
+
)
|
172 |
+
completed_count_result = completed_count_query.scalar()
|
173 |
+
completed_count = completed_count_result if completed_count_result is not None else 0
|
174 |
|
175 |
+
if total_assigned > 0:
|
176 |
+
percent = (completed_count / total_assigned) * 100
|
177 |
+
bar_length = 20 # Length of the progress bar
|
178 |
+
filled_length = int(bar_length * completed_count // total_assigned)
|
179 |
+
bar = '█' * filled_length + '░' * (bar_length - filled_length)
|
180 |
+
return f"Progress: {bar} {completed_count}/{total_assigned} ({percent:.1f}%)"
|
181 |
+
elif total_assigned == 0 and completed_count == 0: # Handles case where user has 0 assigned items initially
|
182 |
+
return "Progress: No items assigned yet."
|
183 |
+
else: # Should ideally not happen if logic is correct (e.g. completed > total_assigned)
|
184 |
+
return f"Annotation Progress: {completed_count}/{total_assigned} labeled"
|
185 |
+
except Exception as e:
|
186 |
+
log.error(f"Error fetching progress for user {user_id}: {e}")
|
187 |
+
return "Annotation Progress: Error" # Added label
|
188 |
|
189 |
+
def download_voice_fn(folder_link, filename_to_load, autoplay_on_load=False): # Autoplay here is for the btn_load_voice click
|
190 |
if not filename_to_load:
|
191 |
+
return None, None, gr.update(value=None, autoplay=False)
|
192 |
try:
|
193 |
+
log.info(f"Downloading voice: {filename_to_load}, Autoplay: {autoplay_on_load}")
|
194 |
sr, wav = LOADER.load_audio(folder_link, filename_to_load)
|
195 |
+
return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=autoplay_on_load)
|
196 |
except Exception as e:
|
197 |
+
log.error(f"GDrive download failed for {filename_to_load}: {e}")
|
198 |
gr.Error(f"Failed to load audio: {filename_to_load}. Error: {e}")
|
199 |
+
return None, None, gr.update(value=None, autoplay=False)
|
200 |
|
201 |
+
def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, applied_trims_list):
|
202 |
annotator_id = session.get("user_id")
|
203 |
if not current_tts_id or not annotator_id:
|
204 |
gr.Error("Cannot save: Missing TTS ID or User ID.")
|
205 |
+
return # Modified: No return value
|
206 |
+
|
207 |
with get_db() as db:
|
208 |
try:
|
209 |
annotation_obj = db.query(Annotation).filter_by(
|
210 |
tts_data_id=current_tts_id, annotator_id=annotator_id
|
211 |
+
).options(orm.joinedload(Annotation.audio_trims)).first()
|
212 |
+
|
213 |
if not annotation_obj:
|
214 |
annotation_obj = Annotation(
|
215 |
tts_data_id=current_tts_id, annotator_id=annotator_id
|
216 |
)
|
217 |
db.add(annotation_obj)
|
218 |
+
|
219 |
annotation_obj.annotated_sentence = ann_text_to_save
|
|
|
220 |
annotation_obj.annotated_at = datetime.datetime.utcnow()
|
221 |
+
|
222 |
+
# --- Multi-trim handling ---
|
223 |
+
# 1. Delete existing trims for this annotation
|
224 |
+
if annotation_obj.audio_trims:
|
225 |
+
for old_trim in annotation_obj.audio_trims:
|
226 |
+
db.delete(old_trim)
|
227 |
+
annotation_obj.audio_trims = [] # Clear the collection
|
228 |
+
# db.flush() # Ensure deletes are processed before adds if issues arise
|
229 |
+
|
230 |
+
# 2. Add new trims from applied_trims_list
|
231 |
+
if applied_trims_list:
|
232 |
+
if annotation_obj.id is None: # If new annotation, flush to get ID
|
233 |
db.flush()
|
234 |
+
if annotation_obj.id is None:
|
235 |
+
gr.Error("Failed to get annotation ID for saving new trims.")
|
236 |
+
db.rollback(); return # Modified: No return value
|
237 |
+
|
238 |
+
for trim_info in applied_trims_list:
|
239 |
+
start_to_save_ms = trim_info['start_sec'] * 1000.0
|
240 |
+
end_to_save_ms = trim_info['end_sec'] * 1000.0
|
241 |
+
original_data_id_for_trim = current_tts_id
|
242 |
+
|
243 |
+
new_trim_db_obj = AudioTrim(
|
244 |
annotation_id=annotation_obj.id,
|
245 |
+
original_tts_data_id=original_data_id_for_trim,
|
246 |
+
start=start_to_save_ms,
|
247 |
+
end=end_to_save_ms,
|
248 |
)
|
249 |
+
db.add(new_trim_db_obj)
|
250 |
+
# No need to append to annotation_obj.audio_trims if cascade is working correctly
|
251 |
+
# but can be done explicitly: annotation_obj.audio_trims.append(new_trim_db_obj)
|
252 |
+
log.info(f"Saved {len(applied_trims_list)} trims for annotation {annotation_obj.id} (TTS ID: {current_tts_id}).")
|
253 |
+
else:
|
254 |
+
log.info(f"No trims applied for {current_tts_id}, any existing DB trims were cleared.")
|
255 |
+
|
256 |
db.commit()
|
257 |
gr.Info(f"Annotation for ID {current_tts_id} saved.")
|
258 |
+
# Removed 'return True'
|
259 |
except Exception as e:
|
260 |
db.rollback()
|
261 |
+
log.error(f"Failed to save annotation for {current_tts_id}: {e}") # Removed exc_info=True
|
262 |
gr.Error(f"Save failed: {e}")
|
263 |
+
# Removed 'return False'
|
264 |
|
265 |
def show_current_item_fn(items, idx, session):
|
266 |
+
initial_trims_list_sec = []
|
267 |
+
initial_trims_df_data = self._convert_trims_to_df_data([]) # Empty by default
|
268 |
+
ui_trim_start_sec = None # Changed from 0.0 to None
|
269 |
+
ui_trim_end_sec = None # Changed from 0.0 to None
|
270 |
+
|
271 |
+
if not items or idx >= len(items) or idx < 0:
|
272 |
+
return ("", "", "", "", None, ui_trim_start_sec, ui_trim_end_sec,
|
273 |
+
initial_trims_list_sec, initial_trims_df_data,
|
274 |
+
gr.update(value=None, autoplay=False))
|
275 |
+
|
276 |
current_item = items[idx]
|
277 |
tts_data_id = current_item.get("id")
|
278 |
annotator_id = session.get("user_id")
|
279 |
+
ann_text = ""
|
280 |
+
|
281 |
if tts_data_id and annotator_id:
|
282 |
with get_db() as db:
|
283 |
try:
|
284 |
existing_annotation = db.query(Annotation).filter_by(
|
285 |
tts_data_id=tts_data_id, annotator_id=annotator_id
|
286 |
+
).options(orm.joinedload(Annotation.audio_trims)).first() # Changed to audio_trims
|
287 |
if existing_annotation:
|
288 |
ann_text = existing_annotation.annotated_sentence or ""
|
289 |
+
if existing_annotation.audio_trims: # Check the collection
|
290 |
+
initial_trims_list_sec = [
|
291 |
+
{
|
292 |
+
'start_sec': trim.start / 1000.0,
|
293 |
+
'end_sec': trim.end / 1000.0
|
294 |
+
}
|
295 |
+
for trim in existing_annotation.audio_trims # Iterate over the collection
|
296 |
+
]
|
297 |
+
initial_trims_df_data = self._convert_trims_to_df_data(initial_trims_list_sec)
|
298 |
except Exception as e:
|
299 |
+
log.error(f"DB error in show_current_item_fn for TTS ID {tts_data_id}: {e}") # Removed exc_info=True
|
300 |
gr.Error(f"Error loading annotation details: {e}")
|
301 |
+
|
302 |
return (
|
303 |
current_item.get("id", ""), current_item.get("filename", ""),
|
304 |
+
current_item.get("sentence", ""), ann_text,
|
305 |
+
None,
|
306 |
+
ui_trim_start_sec, ui_trim_end_sec,
|
307 |
+
initial_trims_list_sec,
|
308 |
+
initial_trims_df_data,
|
309 |
+
gr.update(value=None, autoplay=False) # Ensure audio does not autoplay on item change
|
310 |
)
|
311 |
|
312 |
def navigate_idx_fn(items, current_idx, direction):
|
|
|
315 |
return new_idx
|
316 |
|
317 |
def load_all_items_fn(sess):
|
318 |
+
user_id = sess.get("user_id") # Use user_id for consistency with other functions
|
319 |
+
user_name = sess.get("user_name") # Keep for logging if needed
|
320 |
+
items_to_load = []
|
321 |
+
initial_idx = 0 # Default to 0
|
322 |
+
|
323 |
+
if not user_id:
|
324 |
+
log.warning("load_all_items_fn: user_id not found in session. Dashboard will display default state until login completes and data is refreshed.")
|
325 |
+
# Prepare default/empty values for all outputs of show_current_item_fn
|
326 |
+
# (tts_id, filename, sentence, ann_text, audio_placeholder,
|
327 |
+
# trim_start_sec_ui, trim_end_sec_ui,
|
328 |
+
# applied_trims_list_state_val, trims_display_val, audio_update_obj)
|
329 |
+
empty_item_display_tuple = ("", "", "", "", None, None, None, [], self._convert_trims_to_df_data([]), gr.update(value=None, autoplay=False))
|
330 |
+
|
331 |
+
# load_all_items_fn returns: [items_to_load, initial_idx] + list(initial_ui_values_tuple) + [progress_str]
|
332 |
+
# Total 13 values.
|
333 |
+
return [[], 0] + list(empty_item_display_tuple) + ["Progress: Waiting for login..."]
|
334 |
+
|
335 |
+
if user_id:
|
336 |
+
with get_db() as db:
|
337 |
+
try:
|
338 |
+
repo = AnnotatorWorkloadRepo(db)
|
339 |
+
# Get all assigned items
|
340 |
+
raw_items = repo.get_tts_data_with_annotations_for_user_id(user_id)
|
341 |
+
|
342 |
+
items_to_load = [
|
343 |
+
{
|
344 |
+
"id": item["tts_data"].id,
|
345 |
+
"filename": item["tts_data"].filename,
|
346 |
+
"sentence": item["tts_data"].sentence,
|
347 |
+
"annotated": item["annotation"] is not None and (item["annotation"].annotated_sentence is not None and item["annotation"].annotated_sentence != "")
|
348 |
+
}
|
349 |
+
for item in raw_items
|
350 |
+
]
|
351 |
+
log.info(f"Loaded {len(items_to_load)} items for user {user_name} (ID: {user_id})")
|
352 |
+
|
353 |
+
# --- Resume Logic: Find first unannotated or last item ---
|
354 |
+
first_unannotated_idx = -1
|
355 |
+
for i, item_data in enumerate(items_to_load):
|
356 |
+
if not item_data["annotated"]:
|
357 |
+
first_unannotated_idx = i
|
358 |
+
break
|
359 |
+
|
360 |
+
if first_unannotated_idx != -1:
|
361 |
+
initial_idx = first_unannotated_idx
|
362 |
+
log.info(f"Resuming at first unannotated item, index: {initial_idx} (ID: {items_to_load[initial_idx]['id']})")
|
363 |
+
elif items_to_load: # All annotated, start at the last one or first if only one
|
364 |
+
initial_idx = len(items_to_load) - 1
|
365 |
+
log.info(f"All items annotated, starting at last item, index: {initial_idx} (ID: {items_to_load[initial_idx]['id']})")
|
366 |
+
else: # No items assigned
|
367 |
+
initial_idx = 0
|
368 |
+
log.info("No items assigned to user.")
|
369 |
+
|
370 |
+
except Exception as e:
|
371 |
+
log.error(f"Failed to load items or determine resume index for user {user_name}: {e}") # Removed exc_info=True
|
372 |
+
gr.Error(f"Could not load your assigned data: {e}")
|
373 |
+
|
374 |
+
initial_ui_values_tuple = show_current_item_fn(items_to_load, initial_idx, sess)
|
375 |
+
progress_str = get_user_progress_fn(sess)
|
376 |
+
return [items_to_load, initial_idx] + list(initial_ui_values_tuple) + [progress_str]
|
377 |
|
378 |
def jump_by_data_id_fn(items, target_data_id_str, current_idx):
|
379 |
if not target_data_id_str: return current_idx
|
|
|
381 |
target_id = int(target_data_id_str)
|
382 |
for i, item_dict in enumerate(items):
|
383 |
if item_dict.get("id") == target_id: return i
|
384 |
+
gr.Warning(f"Data ID {target_id} not found in your assigned items.")
|
385 |
except ValueError:
|
386 |
gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
|
387 |
return current_idx
|
388 |
|
389 |
+
def delete_db_and_ui_fn(items, current_idx, session, original_audio_data_state):
|
390 |
+
# ... (ensure Annotation.audio_trims is used if deleting associated trims) ...
|
391 |
+
# This function already deletes annotation_obj.audio_trim, which will now be annotation_obj.audio_trims
|
392 |
+
# The cascade delete on the relationship should handle deleting all AudioTrim children.
|
393 |
+
# However, explicit deletion loop might be safer if cascade behavior is not fully trusted or for clarity.
|
394 |
+
# For now, relying on cascade from previous model update.
|
395 |
+
# If issues, add explicit loop:
|
396 |
+
# if annotation_obj.audio_trims:
|
397 |
+
# for trim_to_del in annotation_obj.audio_trims:
|
398 |
+
# db.delete(trim_to_del)
|
399 |
+
# annotation_obj.audio_trims = []
|
400 |
+
# ... rest of the function ...
|
401 |
+
new_ann_sentence = ""
|
402 |
+
new_trim_start_sec_ui = None # Changed from 0.0
|
403 |
+
new_trim_end_sec_ui = None # Changed from 0.0
|
404 |
+
new_applied_trims_list = []
|
405 |
+
new_trims_df_data = self._convert_trims_to_df_data([])
|
406 |
+
|
407 |
+
audio_to_display_after_delete = None
|
408 |
+
audio_update_obj_after_delete = gr.update(value=None, autoplay=False)
|
409 |
+
|
410 |
+
if original_audio_data_state:
|
411 |
+
audio_to_display_after_delete = original_audio_data_state
|
412 |
+
audio_update_obj_after_delete = gr.update(value=original_audio_data_state, autoplay=False)
|
413 |
+
|
414 |
+
if not items or current_idx >= len(items) or current_idx < 0:
|
415 |
+
progress_str_err = get_user_progress_fn(session)
|
416 |
+
return (items, current_idx, "", "", "", new_ann_sentence, audio_to_display_after_delete,
|
417 |
+
new_trim_start_sec_ui, new_trim_end_sec_ui, new_applied_trims_list, new_trims_df_data,
|
418 |
+
audio_update_obj_after_delete, progress_str_err)
|
419 |
+
|
420 |
+
current_item = items[current_idx]
|
421 |
+
tts_id_val = current_item.get("id", "")
|
422 |
+
filename_val = current_item.get("filename", "")
|
423 |
+
sentence_val = current_item.get("sentence", "")
|
424 |
+
|
425 |
+
tts_data_id_to_clear = tts_id_val
|
426 |
+
annotator_id_for_clear = session.get("user_id")
|
427 |
+
|
428 |
+
if tts_data_id_to_clear and annotator_id_for_clear:
|
429 |
with get_db() as db:
|
430 |
try:
|
431 |
annotation_obj = db.query(Annotation).filter_by(
|
432 |
+
tts_data_id=tts_data_id_to_clear, annotator_id=annotator_id_for_clear
|
433 |
+
).options(orm.joinedload(Annotation.audio_trims)).first() # Ensure audio_trims are loaded
|
434 |
if annotation_obj:
|
435 |
+
# Cascade delete should handle deleting AudioTrim objects associated with this annotation
|
436 |
+
# If not, uncomment and adapt the loop below:
|
437 |
+
# if annotation_obj.audio_trims:
|
438 |
+
# log.info(f"Deleting {len(annotation_obj.audio_trims)} trims for annotation ID {annotation_obj.id}")
|
439 |
+
# for trim_to_delete in list(annotation_obj.audio_trims): # Iterate over a copy
|
440 |
+
# db.delete(trim_to_delete)
|
441 |
+
# annotation_obj.audio_trims = [] # Clear the collection
|
442 |
+
db.delete(annotation_obj)
|
443 |
db.commit()
|
444 |
+
gr.Info(f"Annotation and associated trims for ID {tts_data_id_to_clear} deleted from DB.")
|
445 |
else:
|
446 |
+
gr.Warning(f"No DB annotation found to delete for ID {tts_data_id_to_clear}.")
|
447 |
except Exception as e:
|
448 |
db.rollback()
|
449 |
+
log.error(f"Error deleting annotation from DB for {tts_data_id_to_clear}: {e}") # Removed exc_info=True
|
450 |
+
gr.Error(f"Failed to delete annotation from database: {e}")
|
451 |
else:
|
452 |
+
gr.Error("Cannot clear/delete annotation from DB: Missing TTS ID or User ID.")
|
|
|
|
|
453 |
|
454 |
+
progress_str = get_user_progress_fn(session)
|
455 |
+
|
456 |
+
return (items, current_idx, tts_id_val, filename_val, sentence_val,
|
457 |
+
new_ann_sentence, audio_to_display_after_delete, new_trim_start_sec_ui, new_trim_end_sec_ui,
|
458 |
+
new_applied_trims_list, new_trims_df_data, audio_update_obj_after_delete, progress_str)
|
459 |
+
|
460 |
+
# ---- New Trim Callbacks ----
|
461 |
+
def add_trim_and_reprocess_ui_fn(start_s, end_s, current_trims_list, original_audio_data):
|
462 |
+
if start_s is None or end_s is None or not (end_s > start_s and start_s >= 0):
|
463 |
+
gr.Warning("Invalid trim times. Start must be >= 0 and End > Start.")
|
464 |
+
# Return current states without change if trim is invalid, also return original start/end for UI
|
465 |
+
return (current_trims_list, self._convert_trims_to_df_data(current_trims_list),
|
466 |
+
original_audio_data, gr.update(value=original_audio_data, autoplay=False),
|
467 |
+
start_s, end_s)
|
468 |
+
|
469 |
+
new_trim = {'start_sec': float(start_s), 'end_sec': float(end_s)}
|
470 |
+
updated_trims_list = current_trims_list + [new_trim]
|
471 |
+
|
472 |
+
processed_audio_data, audio_update = self._apply_multiple_trims_fn(original_audio_data, updated_trims_list)
|
473 |
+
|
474 |
+
# Reset input fields after adding trim
|
475 |
+
ui_trim_start_sec_reset = None # Changed from 0.0
|
476 |
+
ui_trim_end_sec_reset = None # Changed from 0.0
|
477 |
+
|
478 |
+
return (updated_trims_list, self._convert_trims_to_df_data(updated_trims_list),
|
479 |
+
processed_audio_data, audio_update,
|
480 |
+
ui_trim_start_sec_reset, ui_trim_end_sec_reset)
|
481 |
+
|
482 |
+
def undo_last_trim_and_reprocess_ui_fn(current_trims_list, original_audio_data):
|
483 |
+
if not current_trims_list:
|
484 |
+
gr.Info("No trims to undo.")
|
485 |
+
return (current_trims_list, self._convert_trims_to_df_data(current_trims_list),
|
486 |
+
original_audio_data, gr.update(value=original_audio_data, autoplay=False))
|
487 |
+
|
488 |
+
updated_trims_list = current_trims_list[:-1]
|
489 |
+
processed_audio_data, audio_update = self._apply_multiple_trims_fn(original_audio_data, updated_trims_list)
|
490 |
+
|
491 |
+
return (updated_trims_list, self._convert_trims_to_df_data(updated_trims_list),
|
492 |
+
processed_audio_data, audio_update)
|
493 |
+
|
494 |
+
# ---- Callback Wiring ----
|
495 |
+
# outputs_for_display_item: Defines what `show_current_item_fn` and similar full display updates will populate.
|
496 |
+
# It expects 10 values from show_current_item_fn:
|
497 |
+
# (tts_id, filename, sentence, ann_text, audio_placeholder,
|
498 |
+
# trim_start_sec_ui, trim_end_sec_ui,
|
499 |
+
# applied_trims_list_state_val, trims_display_val, audio_update_obj)
|
500 |
+
outputs_for_display_item = [
|
501 |
self.tts_id, self.filename, self.sentence, self.ann_sentence,
|
502 |
+
self.audio, # This will receive the audio data (sr, wav) or None
|
503 |
+
self.trim_start_sec, self.trim_end_sec, # UI fields for new trim
|
504 |
+
self.applied_trims_list_state,
|
505 |
+
self.trims_display,
|
506 |
+
self.audio # This will receive the gr.update object for autoplay etc.
|
507 |
]
|
508 |
+
|
509 |
# Initial Load
|
510 |
+
# Chain: Disable UI -> Load Data (items, idx, initial UI values including trims list & df, progress) ->
|
511 |
+
# Update UI -> Enable UI
|
512 |
+
# Audio is NOT loaded here anymore.
|
513 |
+
|
514 |
root_blocks.load(
|
515 |
+
fn=lambda: update_ui_interactive_state(False),
|
516 |
outputs=self.interactive_ui_elements
|
517 |
).then(
|
518 |
+
fn=load_all_items_fn,
|
519 |
inputs=[session_state],
|
520 |
+
# Outputs: items_state, idx_state, tts_id, filename, sentence, ann_sentence,
|
521 |
+
# audio (None), trim_start_sec, trim_end_sec, applied_trims_list_state,
|
522 |
+
# trims_display, audio (update obj), progress_display
|
523 |
+
outputs=[self.items_state, self.idx_state] + outputs_for_display_item + [self.header.progress_display],
|
524 |
).then(
|
525 |
+
# Explicitly set original_audio_state to None and clear audio display as it's not loaded.
|
526 |
+
# show_current_item_fn already sets self.audio to (None, gr.update(value=None, autoplay=False))
|
527 |
+
# We also need to ensure original_audio_state is None if no audio is loaded.
|
528 |
+
lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
|
529 |
+
outputs=[self.original_audio_state, self.audio, self.audio]
|
530 |
).then(
|
531 |
+
fn=lambda: update_ui_interactive_state(True),
|
|
|
|
|
|
|
|
|
532 |
outputs=self.interactive_ui_elements
|
533 |
)
|
534 |
|
535 |
+
# Navigation (Prev/Save & Next/Next No Save)
|
536 |
+
# Audio is NOT loaded here anymore.
|
537 |
+
for btn_widget, direction_str, performs_save in [
|
538 |
+
(self.btn_prev, "prev", False),
|
539 |
+
(self.btn_save_next, "next", True),
|
540 |
+
(self.btn_next_no_save, "next", False)
|
541 |
]:
|
542 |
event_chain = btn_widget.click(
|
543 |
fn=lambda: update_ui_interactive_state(False),
|
544 |
outputs=self.interactive_ui_elements
|
545 |
)
|
546 |
+
if performs_save:
|
547 |
event_chain = event_chain.then(
|
548 |
fn=save_annotation_db_fn,
|
549 |
inputs=[
|
550 |
self.tts_id, session_state, self.ann_sentence,
|
551 |
+
self.applied_trims_list_state,
|
552 |
],
|
553 |
+
outputs=None
|
554 |
+
).then(
|
555 |
+
fn=get_user_progress_fn,
|
556 |
+
inputs=[session_state],
|
557 |
+
outputs=self.header.progress_display
|
558 |
)
|
559 |
+
|
560 |
+
event_chain = event_chain.then(
|
561 |
+
fn=navigate_idx_fn,
|
562 |
inputs=[self.items_state, self.idx_state, gr.State(direction_str)],
|
563 |
outputs=self.idx_state,
|
564 |
).then(
|
565 |
fn=show_current_item_fn,
|
566 |
inputs=[self.items_state, self.idx_state, session_state],
|
567 |
+
outputs=outputs_for_display_item,
|
568 |
).then(
|
569 |
+
# Explicitly set original_audio_state to None and clear audio display as it's not loaded.
|
570 |
+
lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
|
571 |
+
outputs=[self.original_audio_state, self.audio, self.audio]
|
572 |
).then(
|
573 |
+
lambda: gr.update(value=None), # Clear jump input
|
574 |
+
outputs=self.jump_data_id_input
|
|
|
575 |
).then(
|
576 |
fn=lambda: update_ui_interactive_state(True),
|
577 |
outputs=self.interactive_ui_elements
|
578 |
)
|
579 |
+
|
580 |
+
# Audio is NOT loaded here anymore.
|
581 |
+
self.btn_jump.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
fn=lambda: update_ui_interactive_state(False),
|
583 |
outputs=self.interactive_ui_elements
|
584 |
).then(
|
585 |
+
fn=jump_by_data_id_fn,
|
586 |
inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
|
587 |
+
outputs=self.idx_state
|
588 |
).then(
|
589 |
fn=show_current_item_fn,
|
590 |
inputs=[self.items_state, self.idx_state, session_state],
|
591 |
+
outputs=outputs_for_display_item
|
592 |
).then(
|
593 |
+
# Explicitly set original_audio_state to None and clear audio display as it's not loaded.
|
594 |
+
lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
|
595 |
+
outputs=[self.original_audio_state, self.audio, self.audio]
|
596 |
).then(
|
597 |
+
lambda: gr.update(value=None), # Clear jump input
|
598 |
+
outputs=self.jump_data_id_input
|
|
|
599 |
).then(
|
600 |
fn=lambda: update_ui_interactive_state(True),
|
601 |
outputs=self.interactive_ui_elements
|
602 |
)
|
603 |
|
604 |
+
# Load Audio Button - This is now the ONLY place audio is downloaded and processed.
|
605 |
+
self.btn_load_voice.click(
|
606 |
fn=lambda: update_ui_interactive_state(False),
|
607 |
outputs=self.interactive_ui_elements
|
608 |
).then(
|
609 |
+
fn=download_voice_fn,
|
610 |
+
inputs=[gr.State(GDRIVE_FOLDER), self.filename, gr.State(True)], # Autoplay TRUE
|
611 |
+
outputs=[self.audio, self.original_audio_state, self.audio],
|
612 |
+
).then(
|
613 |
+
fn=self._apply_multiple_trims_fn,
|
614 |
+
inputs=[self.original_audio_state, self.applied_trims_list_state],
|
615 |
+
outputs=[self.audio, self.audio]
|
616 |
).then(
|
617 |
fn=lambda: update_ui_interactive_state(True),
|
618 |
outputs=self.interactive_ui_elements
|
619 |
)
|
620 |
+
|
621 |
+
# Copy Sentence Button
|
622 |
+
self.btn_copy_sentence.click(
|
623 |
+
fn=lambda s: s, inputs=self.sentence, outputs=self.ann_sentence
|
624 |
+
)
|
625 |
|
626 |
+
# Trim Button
|
627 |
+
self.btn_trim.click(
|
628 |
+
fn=add_trim_and_reprocess_ui_fn,
|
629 |
+
inputs=[self.trim_start_sec, self.trim_end_sec, self.applied_trims_list_state, self.original_audio_state],
|
630 |
+
outputs=[self.applied_trims_list_state, self.trims_display,
|
631 |
+
self.audio, self.audio,
|
632 |
+
self.trim_start_sec, self.trim_end_sec]
|
633 |
+
)
|
634 |
+
|
635 |
+
# Undo Trim Button
|
636 |
self.btn_undo_trim.click(
|
637 |
+
fn=undo_last_trim_and_reprocess_ui_fn,
|
638 |
+
inputs=[self.applied_trims_list_state, self.original_audio_state],
|
639 |
+
outputs=[self.applied_trims_list_state, self.trims_display, self.audio, self.audio]
|
|
|
|
|
|
|
|
|
|
|
|
|
640 |
)
|
641 |
|
642 |
+
# Delete Button
|
643 |
+
outputs_for_delete = [
|
644 |
+
self.items_state, self.idx_state, self.tts_id, self.filename, self.sentence,
|
645 |
+
self.ann_sentence, self.audio, self.trim_start_sec, self.trim_end_sec,
|
646 |
+
self.applied_trims_list_state, self.trims_display, self.audio, self.header.progress_display
|
647 |
+
]
|
648 |
self.btn_delete.click(
|
649 |
fn=lambda: update_ui_interactive_state(False),
|
650 |
outputs=self.interactive_ui_elements
|
651 |
).then(
|
652 |
fn=delete_db_and_ui_fn,
|
653 |
+
inputs=[self.items_state, self.idx_state, session_state, self.original_audio_state],
|
654 |
+
outputs=outputs_for_delete
|
655 |
+
).then(
|
656 |
fn=lambda: update_ui_interactive_state(True),
|
657 |
outputs=self.interactive_ui_elements
|
658 |
)
|
659 |
|
660 |
return self.container
|
661 |
+
|
662 |
+
def _apply_multiple_trims_fn(self, original_audio_data, trims_list_sec):
|
663 |
+
if not original_audio_data:
|
664 |
+
log.warning("apply_multiple_trims_fn: No original audio data.")
|
665 |
+
return None, gr.update(value=None, autoplay=False)
|
666 |
+
|
667 |
+
sr, wav_orig = original_audio_data
|
668 |
+
|
669 |
+
if not trims_list_sec: # No trims to apply
|
670 |
+
log.info("apply_multiple_trims_fn: No trims in list, returning original audio.")
|
671 |
+
return (sr, wav_orig.copy()), gr.update(value=(sr, wav_orig.copy()), autoplay=False)
|
672 |
+
|
673 |
+
delete_intervals_samples = []
|
674 |
+
for trim_info in trims_list_sec:
|
675 |
+
start_s = trim_info.get('start_sec')
|
676 |
+
end_s = trim_info.get('end_sec')
|
677 |
+
if start_s is not None and end_s is not None and end_s > start_s and start_s >= 0:
|
678 |
+
start_sample = int(sr * start_s)
|
679 |
+
end_sample = int(sr * end_s)
|
680 |
+
start_sample = max(0, min(start_sample, len(wav_orig)))
|
681 |
+
end_sample = max(start_sample, min(end_sample, len(wav_orig)))
|
682 |
+
if start_sample < end_sample:
|
683 |
+
delete_intervals_samples.append((start_sample, end_sample))
|
684 |
+
else:
|
685 |
+
log.warning(f"apply_multiple_trims_fn: Invalid trim skipped: {trim_info}")
|
686 |
+
|
687 |
+
if not delete_intervals_samples:
|
688 |
+
log.info("apply_multiple_trims_fn: No valid trims to apply, returning original audio.")
|
689 |
+
return (sr, wav_orig.copy()), gr.update(value=(sr, wav_orig.copy()), autoplay=False)
|
690 |
+
|
691 |
+
delete_intervals_samples.sort(key=lambda x: x[0])
|
692 |
+
|
693 |
+
merged_delete_intervals = []
|
694 |
+
if delete_intervals_samples:
|
695 |
+
current_start, current_end = delete_intervals_samples[0]
|
696 |
+
for next_start, next_end in delete_intervals_samples[1:]:
|
697 |
+
if next_start < current_end:
|
698 |
+
current_end = max(current_end, next_end)
|
699 |
+
else:
|
700 |
+
merged_delete_intervals.append((current_start, current_end))
|
701 |
+
current_start, current_end = next_start, next_end
|
702 |
+
merged_delete_intervals.append((current_start, current_end))
|
703 |
+
|
704 |
+
log.info(f"apply_multiple_trims_fn: Original wav shape: {wav_orig.shape}, Merged delete intervals (samples): {merged_delete_intervals}")
|
705 |
+
|
706 |
+
kept_parts_wav = []
|
707 |
+
current_pos_samples = 0
|
708 |
+
for del_start, del_end in merged_delete_intervals:
|
709 |
+
if del_start > current_pos_samples:
|
710 |
+
kept_parts_wav.append(wav_orig[current_pos_samples:del_start])
|
711 |
+
current_pos_samples = del_end
|
712 |
+
|
713 |
+
if current_pos_samples < len(wav_orig):
|
714 |
+
kept_parts_wav.append(wav_orig[current_pos_samples:])
|
715 |
+
|
716 |
+
if not kept_parts_wav:
|
717 |
+
final_wav = np.array([], dtype=wav_orig.dtype)
|
718 |
+
log.info("apply_multiple_trims_fn: All audio trimmed, resulting in empty audio.")
|
719 |
+
else:
|
720 |
+
final_wav = np.concatenate(kept_parts_wav)
|
721 |
+
log.info(f"apply_multiple_trims_fn: Final wav shape after trimming: {final_wav.shape}")
|
722 |
+
|
723 |
+
return (sr, final_wav), gr.update(value=(sr, final_wav), autoplay=False)
|
724 |
+
|
725 |
+
def _convert_trims_to_df_data(self, trims_list_sec):
|
726 |
+
if not trims_list_sec:
|
727 |
+
return None # For gr.DataFrame, None clears it
|
728 |
+
return [[f"{t['start_sec']:.3f}", f"{t['end_sec']:.3f}"] for t in trims_list_sec]
|
components/header.py
CHANGED
@@ -8,17 +8,25 @@ class Header:
|
|
8 |
def __init__(self):
|
9 |
with gr.Row(variant="panel", elem_classes="header-row") as self.container:
|
10 |
self.welcome = gr.Markdown()
|
|
|
11 |
self.logout_btn = gr.Button("Log out", scale=0, min_width=90)
|
12 |
|
13 |
# ---------------- wiring ----------------
|
14 |
def register_callbacks(self, login_page, dashboard_page, session_state):
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
self.logout_btn.click(
|
16 |
-
fn=
|
17 |
-
inputs=[session_state],
|
18 |
outputs=[
|
19 |
-
login_page.container,
|
20 |
-
dashboard_page.container,
|
21 |
-
self.welcome,
|
22 |
-
login_page.message,
|
|
|
23 |
],
|
24 |
)
|
|
|
8 |
def __init__(self):
|
9 |
with gr.Row(variant="panel", elem_classes="header-row") as self.container:
|
10 |
self.welcome = gr.Markdown()
|
11 |
+
self.progress_display = gr.Markdown("") # New element for progress display
|
12 |
self.logout_btn = gr.Button("Log out", scale=0, min_width=90)
|
13 |
|
14 |
# ---------------- wiring ----------------
|
15 |
def register_callbacks(self, login_page, dashboard_page, session_state):
|
16 |
+
def logout_and_clear_progress_fn(current_session_state):
|
17 |
+
# AuthService.logout is expected to return 4 values for the original outputs
|
18 |
+
logout_outputs = AuthService.logout(current_session_state)
|
19 |
+
# Add an empty string to clear the progress_display
|
20 |
+
return list(logout_outputs) + [""]
|
21 |
+
|
22 |
self.logout_btn.click(
|
23 |
+
fn=logout_and_clear_progress_fn,
|
24 |
+
inputs=[session_state],
|
25 |
outputs=[
|
26 |
+
login_page.container,
|
27 |
+
dashboard_page.container,
|
28 |
+
self.welcome,
|
29 |
+
login_page.message,
|
30 |
+
self.progress_display, # Cleared on logout
|
31 |
],
|
32 |
)
|
components/login_page.py
CHANGED
@@ -48,8 +48,6 @@ class LoginPage:
|
|
48 |
dashboard_page.filename,
|
49 |
dashboard_page.sentence,
|
50 |
dashboard_page.ann_sentence,
|
51 |
-
# dashboard_page.ann_at,
|
52 |
-
dashboard_page.validated,
|
53 |
],
|
54 |
)
|
55 |
.then(
|
|
|
48 |
dashboard_page.filename,
|
49 |
dashboard_page.sentence,
|
50 |
dashboard_page.ann_sentence,
|
|
|
|
|
51 |
],
|
52 |
)
|
53 |
.then(
|
data/models.py
CHANGED
@@ -119,12 +119,12 @@ class Annotation(Base):
|
|
119 |
tts_data = relationship("TTSData", back_populates="annotations")
|
120 |
annotator = relationship("Annotator", back_populates="annotations")
|
121 |
|
122 |
-
# Relationship to AudioTrim (one-to-
|
123 |
-
|
124 |
"AudioTrim",
|
125 |
back_populates="annotation",
|
126 |
-
uselist=
|
127 |
-
cascade="all, delete-orphan" # If annotation is deleted, delete its
|
128 |
)
|
129 |
|
130 |
|
@@ -135,13 +135,13 @@ class AudioTrim(Base):
|
|
135 |
__tablename__ = "audio_trims"
|
136 |
|
137 |
id = Column(Integer, primary_key=True)
|
138 |
-
annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=False
|
139 |
original_tts_data_id = Column(Integer, ForeignKey("tts_data.id"), nullable=False) # Link to original audio
|
140 |
start = Column(Float, nullable=False)
|
141 |
end = Column(Float, nullable=False)
|
142 |
|
143 |
# Relationship back to Annotation
|
144 |
-
annotation = relationship("Annotation", back_populates="
|
145 |
original_tts_data = relationship("TTSData") # Optional: if you want to navigate from trim to original TTSData directly
|
146 |
|
147 |
# --------------------------------------------------------------------------- #
|
|
|
119 |
tts_data = relationship("TTSData", back_populates="annotations")
|
120 |
annotator = relationship("Annotator", back_populates="annotations")
|
121 |
|
122 |
+
# Relationship to AudioTrim (one-to-MANY)
|
123 |
+
audio_trims = relationship( # Renamed from audio_trim
|
124 |
"AudioTrim",
|
125 |
back_populates="annotation",
|
126 |
+
uselist=True, # Important for one-to-many
|
127 |
+
cascade="all, delete-orphan" # If annotation is deleted, delete its trims too
|
128 |
)
|
129 |
|
130 |
|
|
|
135 |
__tablename__ = "audio_trims"
|
136 |
|
137 |
id = Column(Integer, primary_key=True)
|
138 |
+
annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=False) # Removed unique=True
|
139 |
original_tts_data_id = Column(Integer, ForeignKey("tts_data.id"), nullable=False) # Link to original audio
|
140 |
start = Column(Float, nullable=False)
|
141 |
end = Column(Float, nullable=False)
|
142 |
|
143 |
# Relationship back to Annotation
|
144 |
+
annotation = relationship("Annotation", back_populates="audio_trims") # Renamed from audio_trim
|
145 |
original_tts_data = relationship("TTSData") # Optional: if you want to navigate from trim to original TTSData directly
|
146 |
|
147 |
# --------------------------------------------------------------------------- #
|
data/repository/annotator_workload_repo.py
CHANGED
@@ -17,6 +17,21 @@ class AnnotatorWorkloadRepo:
|
|
17 |
|
18 |
def get_tts_data_with_annotations(
|
19 |
self, annotator_name: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
) -> List[Dict[str, Optional[Any]]]:
|
21 |
"""
|
22 |
output: [
|
@@ -24,11 +39,6 @@ class AnnotatorWorkloadRepo:
|
|
24 |
...
|
25 |
]
|
26 |
"""
|
27 |
-
|
28 |
-
annotator = self.annotator_repo.get_annotator_by_name(annotator_name)
|
29 |
-
if annotator is None:
|
30 |
-
raise ValueError(f"Annotator '{annotator_name}' not found")
|
31 |
-
|
32 |
query = (
|
33 |
self.db.query(
|
34 |
TTSData,
|
@@ -37,7 +47,7 @@ class AnnotatorWorkloadRepo:
|
|
37 |
.join(
|
38 |
AnnotationInterval,
|
39 |
and_(
|
40 |
-
AnnotationInterval.annotator_id ==
|
41 |
TTSData.id >= AnnotationInterval.start_index,
|
42 |
TTSData.id <= AnnotationInterval.end_index,
|
43 |
),
|
@@ -46,13 +56,15 @@ class AnnotatorWorkloadRepo:
|
|
46 |
Annotation,
|
47 |
and_(
|
48 |
Annotation.tts_data_id == TTSData.id,
|
49 |
-
Annotation.annotator_id ==
|
50 |
),
|
51 |
)
|
52 |
.order_by(TTSData.id)
|
53 |
-
).distinct(TTSData.id)
|
54 |
|
55 |
rows = [{"tts_data": tts, "annotation": ann} for tts, ann in query.all()]
|
56 |
|
57 |
-
log.info(
|
|
|
|
|
58 |
return rows
|
|
|
17 |
|
18 |
def get_tts_data_with_annotations(
|
19 |
self, annotator_name: str
|
20 |
+
) -> List[Dict[str, Optional[Any]]]:
|
21 |
+
# This method is kept for compatibility if used elsewhere, but
|
22 |
+
# get_tts_data_with_annotations_for_user_id is preferred for new logic.
|
23 |
+
annotator = self.annotator_repo.get_annotator_by_name(annotator_name)
|
24 |
+
if annotator is None:
|
25 |
+
log.warning(
|
26 |
+
f"Annotator '{annotator_name}' not found in get_tts_data_with_annotations. Returning empty list."
|
27 |
+
)
|
28 |
+
return []
|
29 |
+
return self.get_tts_data_with_annotations_for_user_id(
|
30 |
+
annotator.id, annotator_name
|
31 |
+
)
|
32 |
+
|
33 |
+
def get_tts_data_with_annotations_for_user_id(
|
34 |
+
self, annotator_id: int, annotator_name_for_log: str = "Unknown"
|
35 |
) -> List[Dict[str, Optional[Any]]]:
|
36 |
"""
|
37 |
output: [
|
|
|
39 |
...
|
40 |
]
|
41 |
"""
|
|
|
|
|
|
|
|
|
|
|
42 |
query = (
|
43 |
self.db.query(
|
44 |
TTSData,
|
|
|
47 |
.join(
|
48 |
AnnotationInterval,
|
49 |
and_(
|
50 |
+
AnnotationInterval.annotator_id == annotator_id,
|
51 |
TTSData.id >= AnnotationInterval.start_index,
|
52 |
TTSData.id <= AnnotationInterval.end_index,
|
53 |
),
|
|
|
56 |
Annotation,
|
57 |
and_(
|
58 |
Annotation.tts_data_id == TTSData.id,
|
59 |
+
Annotation.annotator_id == annotator_id,
|
60 |
),
|
61 |
)
|
62 |
.order_by(TTSData.id)
|
63 |
+
).distinct(TTSData.id) # Ensure distinct TTSData items
|
64 |
|
65 |
rows = [{"tts_data": tts, "annotation": ann} for tts, ann in query.all()]
|
66 |
|
67 |
+
log.info(
|
68 |
+
f"{len(rows)} TTS rows fetched for annotator ID '{annotator_id}' (Name: {annotator_name_for_log})."
|
69 |
+
)
|
70 |
return rows
|
scripts/apply_custom_intervals.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
\
|
2 |
+
# scripts/apply_custom_intervals.py
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
|
6 |
+
# Add project root to Python path
|
7 |
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
8 |
+
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
|
9 |
+
if PROJECT_ROOT not in sys.path:
|
10 |
+
sys.path.insert(0, PROJECT_ROOT)
|
11 |
+
|
12 |
+
from utils.database import get_db
|
13 |
+
from data.models import AnnotationInterval # For direct query and deletion
|
14 |
+
from data.repository.annotator_repo import AnnotatorRepo
|
15 |
+
from data.repository.annotation_interval_repo import AnnotationIntervalRepo
|
16 |
+
from utils.logger import Logger
|
17 |
+
|
18 |
+
log = Logger()
|
19 |
+
|
20 |
+
# User-provided data splits
|
21 |
+
# Format: 'annotator_name': (start_id_from_json, end_id_from_json)
|
22 |
+
ANNOTATOR_SPLITS = {
|
23 |
+
'shahab': (0, 1982),
|
24 |
+
'amir': (1983, 3965),
|
25 |
+
'mohsen': (3966, 5948),
|
26 |
+
'mahya': (5949, 7931),
|
27 |
+
'najmeh': (7932, 9914),
|
28 |
+
'sepehr': (9915, 11897),
|
29 |
+
'zahra': (11898, 13880),
|
30 |
+
'moghim': (13881, 15862),
|
31 |
+
'amin': (15863, 17845)
|
32 |
+
}
|
33 |
+
|
34 |
+
def apply_custom_intervals():
|
35 |
+
log.info("Starting application of custom annotator intervals...")
|
36 |
+
try:
|
37 |
+
with get_db() as db:
|
38 |
+
annot_repo = AnnotatorRepo(db)
|
39 |
+
interval_repo = AnnotationIntervalRepo(db)
|
40 |
+
|
41 |
+
for annotator_name, (start_idx_orig, end_idx_orig) in ANNOTATOR_SPLITS.items():
|
42 |
+
log.info(f"Processing annotator: '{annotator_name}' with original range ({start_idx_orig}, {end_idx_orig})")
|
43 |
+
|
44 |
+
annotator = annot_repo.get_annotator_by_name(annotator_name)
|
45 |
+
|
46 |
+
# Adjust start_idx if it's 0, assuming 1-based indexing for TTSData.id in the database.
|
47 |
+
# If TTSData.id can legitimately be 0, this adjustment should be removed.
|
48 |
+
start_idx = 1 if start_idx_orig == 0 else start_idx_orig
|
49 |
+
end_idx = end_idx_orig
|
50 |
+
|
51 |
+
if start_idx_orig == 0:
|
52 |
+
log.info(f"Adjusted start_index from 0 to 1 for '{annotator_name}' assuming 1-based TTSData IDs.")
|
53 |
+
|
54 |
+
|
55 |
+
if start_idx > end_idx:
|
56 |
+
log.warning(f"Invalid range for '{annotator_name}': effective start_idx ({start_idx}) > end_idx ({end_idx}). Skipping.")
|
57 |
+
continue
|
58 |
+
|
59 |
+
# --- Add this part: Clear existing intervals ---
|
60 |
+
existing_intervals = db.query(AnnotationInterval).filter_by(annotator_id=annotator.id).all()
|
61 |
+
if existing_intervals:
|
62 |
+
log.info(f"Deleting {len(existing_intervals)} existing intervals for annotator '{annotator.name}'.")
|
63 |
+
for interval in existing_intervals:
|
64 |
+
db.delete(interval)
|
65 |
+
db.flush() # Process deletes before adding new ones
|
66 |
+
# --- End of new part ---
|
67 |
+
|
68 |
+
# Assign new interval
|
69 |
+
try:
|
70 |
+
new_interval = interval_repo.assign_interval_to_annotator(
|
71 |
+
annotator_id=annotator.id,
|
72 |
+
start_idx=start_idx,
|
73 |
+
end_idx=end_idx,
|
74 |
+
allow_overlap=False # This will prevent assignment if it overlaps with others (unless intended)
|
75 |
+
)
|
76 |
+
log.info(f"Successfully assigned interval [{new_interval.start_index}, {new_interval.end_index}] to '{annotator_name}'.")
|
77 |
+
except ValueError as e:
|
78 |
+
log.error(f"Could not assign interval to '{annotator_name}': {e}")
|
79 |
+
except Exception as e:
|
80 |
+
log.error(f"An unexpected error occurred while assigning interval to '{annotator_name}': {e}", exc_info=True)
|
81 |
+
|
82 |
+
# db.commit() is handled by the get_db context manager if no exceptions caused a rollback within it.
|
83 |
+
log.info("Custom interval application process completed.")
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
log.error(f"A critical error occurred during the custom interval application: {e}", exc_info=True)
|
87 |
+
# db.rollback() is handled by get_db context manager on exception
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
apply_custom_intervals()
|
scripts/distribute_workload.py
DELETED
@@ -1,170 +0,0 @@
|
|
1 |
-
import sys
|
2 |
-
import os
|
3 |
-
|
4 |
-
# Add project root to Python path
|
5 |
-
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
6 |
-
if project_root not in sys.path:
|
7 |
-
sys.path.insert(0, project_root)
|
8 |
-
|
9 |
-
import math
|
10 |
-
import random # Added for password generation
|
11 |
-
from sqlalchemy.sql import func
|
12 |
-
|
13 |
-
from utils.database import get_db
|
14 |
-
from data.models import TTSData
|
15 |
-
from data.repository.annotator_repo import AnnotatorRepo
|
16 |
-
from data.repository.annotation_interval_repo import AnnotationIntervalRepo
|
17 |
-
from utils.logger import Logger
|
18 |
-
|
19 |
-
log = Logger()
|
20 |
-
|
21 |
-
# --- Configuration ---
|
22 |
-
# List of annotator names to ensure exist and assign work to
|
23 |
-
ANNOTATOR_NAMES = ["shahab", "amir", "mohsen", "mahya", "najmeh", "sepehr", "zahra", "moghim", "amin"]
|
24 |
-
# DEFAULT_PASSWORD is no longer used for new users, random password will be generated.
|
25 |
-
|
26 |
-
def generate_random_password():
|
27 |
-
"""Generates a random 4-digit numerical password."""
|
28 |
-
return str(random.randint(1000, 9999))
|
29 |
-
|
30 |
-
def distribute_workload():
|
31 |
-
log.info("Starting workload distribution script...")
|
32 |
-
processed_annotators_details = [] # Stores dicts: {'annotator_obj': obj, 'password_display': str, 'assigned_start': int, 'assigned_end': int}
|
33 |
-
|
34 |
-
try:
|
35 |
-
with get_db() as db:
|
36 |
-
annot_repo = AnnotatorRepo(db)
|
37 |
-
interval_repo = AnnotationIntervalRepo(db)
|
38 |
-
|
39 |
-
# 1. Ensure all annotators exist, create if not, and collect details
|
40 |
-
log.info("Processing annotators...")
|
41 |
-
for name in ANNOTATOR_NAMES:
|
42 |
-
annotator = annot_repo.get_annotator_by_name(name)
|
43 |
-
password_to_display = "(existing user)"
|
44 |
-
|
45 |
-
if not annotator:
|
46 |
-
try:
|
47 |
-
new_password = generate_random_password()
|
48 |
-
log.info(f"Annotator '{name}' not found, creating with new password...")
|
49 |
-
annotator = annot_repo.add_new_annotator(name, new_password)
|
50 |
-
log.info(f"Annotator '{name}' (id={annotator.id}) created successfully with password '{new_password}'.")
|
51 |
-
password_to_display = new_password
|
52 |
-
except ValueError as e:
|
53 |
-
log.warning(f"Could not create annotator '{name}' (likely already exists or other DB issue): {e}. Attempting to fetch again.")
|
54 |
-
annotator = annot_repo.get_annotator_by_name(name) # Try fetching again
|
55 |
-
if annotator:
|
56 |
-
log.info(f"Found existing annotator '{name}' (id={annotator.id}) after creation attempt.")
|
57 |
-
else:
|
58 |
-
log.error(f"Failed to create or find annotator '{name}'. Skipping.")
|
59 |
-
continue
|
60 |
-
else:
|
61 |
-
log.info(f"Found existing annotator '{name}' (id={annotator.id}).")
|
62 |
-
|
63 |
-
if annotator:
|
64 |
-
processed_annotators_details.append({
|
65 |
-
'annotator_obj': annotator,
|
66 |
-
'password_display': password_to_display,
|
67 |
-
'assigned_start': None,
|
68 |
-
'assigned_end': None
|
69 |
-
})
|
70 |
-
|
71 |
-
if not processed_annotators_details:
|
72 |
-
log.error("No annotators processed or found. Exiting.")
|
73 |
-
return
|
74 |
-
|
75 |
-
# 2. Get total number of TTSData items
|
76 |
-
total_tts_items = db.query(func.count(TTSData.id)).scalar()
|
77 |
-
if total_tts_items is None or total_tts_items == 0:
|
78 |
-
log.info("No TTSData items found in the database. Nothing to assign.")
|
79 |
-
# Still print annotator info even if no items to assign
|
80 |
-
log.info("\\n--- Workload Distribution Summary ---")
|
81 |
-
for details in processed_annotators_details:
|
82 |
-
log.info(f"Annotator: {details['annotator_obj'].name}, Assigned Range: N/A (No data items), Password: {details['password_display']}")
|
83 |
-
return
|
84 |
-
log.info(f"Total TTSData items found: {total_tts_items}")
|
85 |
-
|
86 |
-
# 3. Calculate distribution
|
87 |
-
num_annotators_for_assignment = len(processed_annotators_details)
|
88 |
-
if num_annotators_for_assignment == 0: # Should be caught by earlier check, but as a safeguard
|
89 |
-
log.error("No annotators available for assignment. Exiting.")
|
90 |
-
return
|
91 |
-
|
92 |
-
items_per_annotator_base = total_tts_items // num_annotators_for_assignment
|
93 |
-
remainder_items = total_tts_items % num_annotators_for_assignment
|
94 |
-
|
95 |
-
log.info(f"Distributing {total_tts_items} items among {num_annotators_for_assignment} annotators.")
|
96 |
-
log.info(f"Base items per annotator: {items_per_annotator_base}, Remainder: {remainder_items}")
|
97 |
-
|
98 |
-
# 4. Assign intervals
|
99 |
-
current_start_idx = 1 # Assuming TTSData IDs start from 1
|
100 |
-
for details_dict in processed_annotators_details:
|
101 |
-
annotator = details_dict['annotator_obj']
|
102 |
-
num_items_for_this_annotator = items_per_annotator_base
|
103 |
-
if remainder_items > 0:
|
104 |
-
num_items_for_this_annotator += 1
|
105 |
-
remainder_items -= 1
|
106 |
-
|
107 |
-
if num_items_for_this_annotator == 0:
|
108 |
-
log.info(f"Annotator '{annotator.name}' assigned 0 items (total items might be less than annotators or workload already distributed).")
|
109 |
-
continue
|
110 |
-
|
111 |
-
current_end_idx = current_start_idx + num_items_for_this_annotator - 1
|
112 |
-
|
113 |
-
if current_end_idx > total_tts_items:
|
114 |
-
current_end_idx = total_tts_items
|
115 |
-
|
116 |
-
if current_start_idx > current_end_idx:
|
117 |
-
log.info(f"No items to assign to '{annotator.name}' (start_idx {current_start_idx} > end_idx {current_end_idx}).")
|
118 |
-
continue
|
119 |
-
|
120 |
-
log.info(f"Attempting to assign interval [{current_start_idx}-{current_end_idx}] to '{annotator.name}' (id={annotator.id})")
|
121 |
-
try:
|
122 |
-
existing_intervals = interval_repo.get_intervals_by_annotator(annotator.id)
|
123 |
-
if existing_intervals:
|
124 |
-
log.warning(f"Annotator '{annotator.name}' already has existing intervals. Skipping assignment to avoid conflicts. Manual review/cleanup of old intervals might be needed.")
|
125 |
-
# current_start_idx = current_end_idx + 1 # This line should not be here if we skip the user for this round of assignment.
|
126 |
-
# The items for this user won't be assigned and won't be passed to the next.
|
127 |
-
# This means the total items might not be fully distributed if users are skipped.
|
128 |
-
# For a full distribution even with skips, a more complex item re-allocation would be needed.
|
129 |
-
# For now, skipped users mean their share is not re-distributed.
|
130 |
-
continue # Skip this annotator for assignment
|
131 |
-
|
132 |
-
assigned_interval = interval_repo.assign_interval_to_annotator(
|
133 |
-
annotator_id=annotator.id,
|
134 |
-
start_idx=current_start_idx,
|
135 |
-
end_idx=current_end_idx,
|
136 |
-
allow_overlap=False
|
137 |
-
)
|
138 |
-
details_dict['assigned_start'] = assigned_interval.start_index
|
139 |
-
details_dict['assigned_end'] = assigned_interval.end_index
|
140 |
-
log.info(
|
141 |
-
f"Successfully assigned interval [{details_dict['assigned_start']}-{details_dict['assigned_end']}] "
|
142 |
-
f"to '{annotator.name}' (id={annotator.id})"
|
143 |
-
)
|
144 |
-
except ValueError as e:
|
145 |
-
log.error(f"Could not assign interval [{current_start_idx}-{current_end_idx}] to '{annotator.name}': {e}")
|
146 |
-
except Exception as e:
|
147 |
-
log.error(f"An unexpected error occurred while assigning interval to '{annotator.name}': {e}")
|
148 |
-
|
149 |
-
# Only advance current_start_idx if items were potentially assignable to *this* annotator
|
150 |
-
# If an annotator was skipped due to existing intervals, their share of items is not processed further in this loop.
|
151 |
-
current_start_idx = current_end_idx + 1
|
152 |
-
if current_start_idx > total_tts_items:
|
153 |
-
break
|
154 |
-
|
155 |
-
# 5. Print summary
|
156 |
-
log.info("\\n--- Workload Distribution Summary ---")
|
157 |
-
for details in processed_annotators_details:
|
158 |
-
range_str = "N/A (assignment skipped or failed)"
|
159 |
-
if details['assigned_start'] is not None and details['assigned_end'] is not None:
|
160 |
-
range_str = f"[{details['assigned_start']}-{details['assigned_end']}]"
|
161 |
-
|
162 |
-
log.info(f"Annotator: {details['annotator_obj'].name}, Assigned Range: {range_str}, Password: {details['password_display']}")
|
163 |
-
|
164 |
-
log.info("Workload distribution script finished.")
|
165 |
-
|
166 |
-
except Exception as e:
|
167 |
-
log.error(f"An critical error occurred during workload distribution: {e}", exc_info=True)
|
168 |
-
|
169 |
-
if __name__ == "__main__":
|
170 |
-
distribute_workload()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/import_annotations_from_json.py
ADDED
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
\
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
from datetime import datetime
|
6 |
+
|
7 |
+
# Adjust path to import project modules
|
8 |
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
9 |
+
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR) # e.g. /home/psyborg/Desktop/tts_labeling
|
10 |
+
|
11 |
+
# Ensure the project root is at the beginning of sys.path
|
12 |
+
if PROJECT_ROOT in sys.path and sys.path[0] != PROJECT_ROOT:
|
13 |
+
sys.path.remove(PROJECT_ROOT) # Remove if it exists but not at index 0
|
14 |
+
if PROJECT_ROOT not in sys.path: # Add if it doesn't exist at all (it will be added at index 0)
|
15 |
+
sys.path.insert(0, PROJECT_ROOT)
|
16 |
+
|
17 |
+
from utils.database import get_db, SessionLocal # Changed Session to SessionLocal
|
18 |
+
from sqlalchemy.orm import Session as SQLAlchemySession # Import Session for type hinting
|
19 |
+
from data.models import TTSData, Annotator, Annotation, AudioTrim, AnnotationInterval # Added AnnotationInterval
|
20 |
+
from utils.logger import Logger
|
21 |
+
|
22 |
+
log = Logger()
|
23 |
+
|
24 |
+
ANNOTATIONS_FILE_PATH = os.path.join(PROJECT_ROOT, "annotations.json")
|
25 |
+
BATCH_SIZE = 100 # Define batch size for commits
|
26 |
+
|
27 |
+
def import_annotations(db: SQLAlchemySession, data: dict): # Changed SessionLocal to SQLAlchemySession for type hint
|
28 |
+
samples = data.get("samples", [])
|
29 |
+
imported_count = 0
|
30 |
+
updated_count = 0
|
31 |
+
skipped_count = 0
|
32 |
+
samples_processed_in_batch = 0
|
33 |
+
|
34 |
+
# Caches to potentially reduce DB lookups within the script run
|
35 |
+
tts_data_cache = {}
|
36 |
+
annotator_cache = {}
|
37 |
+
|
38 |
+
annotation_ids_for_trim_deletion_in_batch = [] # For batch deletion of trims
|
39 |
+
|
40 |
+
# Create a mapping from JSON ID to sample data for efficient lookup
|
41 |
+
samples_by_id = {s.get("id"): s for s in samples if s.get("id") is not None}
|
42 |
+
log.info(f"Created a map for {len(samples_by_id)} samples based on their JSON IDs.")
|
43 |
+
|
44 |
+
# Load all annotator intervals from the database
|
45 |
+
db_intervals = db.query(AnnotationInterval).all()
|
46 |
+
annotator_intervals = {interval.annotator_id: (interval.start_index, interval.end_index) for interval in db_intervals}
|
47 |
+
log.info(f"Loaded {len(annotator_intervals)} annotator intervals from the database.")
|
48 |
+
|
49 |
+
for sample_idx, sample_data in enumerate(samples): # Renamed sample to sample_data for clarity
|
50 |
+
current_sample_json_id = sample_data.get("id")
|
51 |
+
if current_sample_json_id is None: # Check for None explicitly
|
52 |
+
log.warning("Sample missing ID, skipping.")
|
53 |
+
skipped_count += 1
|
54 |
+
continue
|
55 |
+
|
56 |
+
# Assuming TTSData.id in DB matches JSON 'id' for lookup,
|
57 |
+
# but interval checks use an adjusted ID.
|
58 |
+
# The effective ID for checking against DB intervals (which are potentially 1-based for JSON's 0).
|
59 |
+
effective_id_for_interval_check = current_sample_json_id + 1
|
60 |
+
|
61 |
+
# Check if TTSData entry exists
|
62 |
+
if current_sample_json_id in tts_data_cache:
|
63 |
+
tts_data_entry = tts_data_cache[current_sample_json_id]
|
64 |
+
else:
|
65 |
+
# Query TTSData using the direct ID from JSON
|
66 |
+
tts_data_entry = db.query(TTSData).filter_by(id=current_sample_json_id).first()
|
67 |
+
if tts_data_entry:
|
68 |
+
tts_data_cache[current_sample_json_id] = tts_data_entry
|
69 |
+
|
70 |
+
if not tts_data_entry:
|
71 |
+
log.warning(f"TTSData with JSON ID {current_sample_json_id} not found in database, skipping sample.")
|
72 |
+
skipped_count += 1
|
73 |
+
continue
|
74 |
+
|
75 |
+
# Use the tts_data_entry.id for foreign keys, which should be the same as current_sample_json_id
|
76 |
+
db_tts_data_id = tts_data_entry.id
|
77 |
+
|
78 |
+
json_annotations = sample_data.get("annotations", [])
|
79 |
+
if not json_annotations:
|
80 |
+
continue
|
81 |
+
|
82 |
+
objects_to_add_this_sample = []
|
83 |
+
|
84 |
+
for json_ann in json_annotations:
|
85 |
+
json_annotator_name = json_ann.get("annotator")
|
86 |
+
|
87 |
+
# Determine the final_annotated_sentence based on the N+1 rule.
|
88 |
+
# Rule: Use original_subtitle from the (logical) next sample (N+1).
|
89 |
+
# Fallback 1: If N+1 doesn't exist, or its original_subtitle is None,
|
90 |
+
# use annotated_subtitle from the current sample's current annotation (json_ann).
|
91 |
+
# Fallback 2: If that's also None, use original_subtitle from the current sample (sample_data, top-level).
|
92 |
+
# Fallback 3: If all else fails, use an empty string.
|
93 |
+
|
94 |
+
sentence_to_use = None
|
95 |
+
used_n_plus_1 = False
|
96 |
+
|
97 |
+
logical_next_sample_json_id = current_sample_json_id - 1
|
98 |
+
next_sample_data_for_sentence = samples_by_id.get(logical_next_sample_json_id)
|
99 |
+
|
100 |
+
if next_sample_data_for_sentence:
|
101 |
+
sentence_from_n_plus_1 = next_sample_data_for_sentence.get("original_subtitle")
|
102 |
+
if sentence_from_n_plus_1 is not None:
|
103 |
+
sentence_to_use = sentence_from_n_plus_1
|
104 |
+
used_n_plus_1 = True
|
105 |
+
# log.debug(f"For sample {current_sample_json_id}, using original_subtitle from next sample {logical_next_sample_json_id}.")
|
106 |
+
# else: N+1 exists but its original_subtitle is None. Fall through.
|
107 |
+
# else: N+1 does not exist. Fall through.
|
108 |
+
|
109 |
+
if not used_n_plus_1:
|
110 |
+
# log.debug(f"For sample {current_sample_json_id}, N+1 rule not applied. Using current sample's subtitles.")
|
111 |
+
sentence_to_use = json_ann.get("annotated_subtitle") # Primary fallback from current annotation
|
112 |
+
if sentence_to_use is None:
|
113 |
+
# Secondary fallback to the top-level original_subtitle of the current sample
|
114 |
+
sentence_to_use = sample_data.get("original_subtitle")
|
115 |
+
# log.debug(f"For sample {current_sample_json_id}, json_ann.annotated_subtitle is None, falling back to sample_data.original_subtitle.")
|
116 |
+
|
117 |
+
final_annotated_sentence = sentence_to_use if sentence_to_use is not None else ""
|
118 |
+
|
119 |
+
if not json_annotator_name:
|
120 |
+
log.warning(f"Annotation for TTSData JSON ID {current_sample_json_id} missing annotator name, skipping.")
|
121 |
+
skipped_count +=1
|
122 |
+
continue
|
123 |
+
|
124 |
+
# Get initial annotator details from JSON
|
125 |
+
initial_annotator_entry = annotator_cache.get(json_annotator_name)
|
126 |
+
if not initial_annotator_entry:
|
127 |
+
initial_annotator_entry = db.query(Annotator).filter_by(name=json_annotator_name).first()
|
128 |
+
if not initial_annotator_entry:
|
129 |
+
log.warning(f"Annotator '{json_annotator_name}' (from JSON) not found in DB for TTSData JSON ID {current_sample_json_id}. Skipping this annotation.")
|
130 |
+
skipped_count += 1
|
131 |
+
continue
|
132 |
+
annotator_cache[json_annotator_name] = initial_annotator_entry
|
133 |
+
|
134 |
+
initial_annotator_id = initial_annotator_entry.id
|
135 |
+
|
136 |
+
# These will be the annotator details used for saving the annotation.
|
137 |
+
# They start as the initial annotator and may be reassigned.
|
138 |
+
save_annotator_id = initial_annotator_id
|
139 |
+
save_annotator_name = json_annotator_name # For logging
|
140 |
+
|
141 |
+
initial_annotator_interval = annotator_intervals.get(initial_annotator_id)
|
142 |
+
|
143 |
+
is_within_initial_interval = False
|
144 |
+
if initial_annotator_interval:
|
145 |
+
db_start_index, db_end_index = initial_annotator_interval
|
146 |
+
if db_start_index is not None and db_end_index is not None and \
|
147 |
+
db_start_index <= effective_id_for_interval_check <= db_end_index:
|
148 |
+
is_within_initial_interval = True
|
149 |
+
|
150 |
+
if not is_within_initial_interval:
|
151 |
+
log_message_prefix = f"TTSData JSON ID {current_sample_json_id} (effective: {effective_id_for_interval_check})"
|
152 |
+
if initial_annotator_interval:
|
153 |
+
log.warning(f"{log_message_prefix} is outside interval [{initial_annotator_interval[0]}, {initial_annotator_interval[1]}] for annotator '{json_annotator_name}'. Attempting to reassign.")
|
154 |
+
else:
|
155 |
+
log.warning(f"{log_message_prefix}: Annotator '{json_annotator_name}' (ID: {initial_annotator_id}) has no defined interval. Attempting to reassign to an interval owner.")
|
156 |
+
|
157 |
+
reassigned_successfully = False
|
158 |
+
for potential_owner_id, (owner_start, owner_end) in annotator_intervals.items():
|
159 |
+
if owner_start is not None and owner_end is not None and \
|
160 |
+
owner_start <= effective_id_for_interval_check <= owner_end:
|
161 |
+
save_annotator_id = potential_owner_id
|
162 |
+
reassigned_annotator_db_entry = db.query(Annotator).filter_by(id=save_annotator_id).first()
|
163 |
+
if reassigned_annotator_db_entry:
|
164 |
+
save_annotator_name = reassigned_annotator_db_entry.name
|
165 |
+
if save_annotator_name not in annotator_cache:
|
166 |
+
annotator_cache[save_annotator_name] = reassigned_annotator_db_entry
|
167 |
+
else:
|
168 |
+
save_annotator_name = f"ID:{save_annotator_id}"
|
169 |
+
log.error(f"Critical: Could not find Annotator DB entry for reassigned ID {save_annotator_id}, though an interval exists. Check data integrity.")
|
170 |
+
|
171 |
+
log.info(f"Reassigning annotation for {log_message_prefix} from '{json_annotator_name}' to '{save_annotator_name}' (ID: {save_annotator_id}) as they own the interval.")
|
172 |
+
reassigned_successfully = True
|
173 |
+
break
|
174 |
+
|
175 |
+
if not reassigned_successfully:
|
176 |
+
log.error(f"No annotator found with an interval covering {log_message_prefix}. Skipping this annotation by '{json_annotator_name}'.")
|
177 |
+
skipped_count += 1
|
178 |
+
continue
|
179 |
+
|
180 |
+
annotator_id = save_annotator_id
|
181 |
+
current_annotator_name_for_logs = save_annotator_name
|
182 |
+
|
183 |
+
annotated_at_str = json_ann.get("update_at") or json_ann.get("create_at")
|
184 |
+
annotated_at_dt = None
|
185 |
+
if annotated_at_str:
|
186 |
+
try:
|
187 |
+
annotated_at_dt = datetime.fromisoformat(annotated_at_str.replace('Z', '+00:00'))
|
188 |
+
except ValueError:
|
189 |
+
try:
|
190 |
+
annotated_at_dt = datetime.strptime(annotated_at_str.split('.')[0], "%Y-%m-%dT%H:%M:%S")
|
191 |
+
except ValueError as e_parse:
|
192 |
+
log.error(f"Could not parse timestamp '{annotated_at_str}' for TTSData JSON ID {current_sample_json_id}, annotator {current_annotator_name_for_logs}: {e_parse}")
|
193 |
+
final_annotated_at = annotated_at_dt
|
194 |
+
|
195 |
+
# Previous N+1 logic and interval checks that led to skipping are removed/replaced by the above.
|
196 |
+
|
197 |
+
annotation_obj = db.query(Annotation).filter_by(
|
198 |
+
tts_data_id=db_tts_data_id,
|
199 |
+
annotator_id=annotator_id
|
200 |
+
).first()
|
201 |
+
|
202 |
+
if annotation_obj:
|
203 |
+
annotation_obj.annotated_sentence = final_annotated_sentence
|
204 |
+
annotation_obj.annotated_at = final_annotated_at
|
205 |
+
updated_count +=1
|
206 |
+
else:
|
207 |
+
annotation_obj = Annotation(
|
208 |
+
tts_data_id=db_tts_data_id,
|
209 |
+
annotator_id=annotator_id,
|
210 |
+
annotated_sentence=final_annotated_sentence,
|
211 |
+
annotated_at=final_annotated_at
|
212 |
+
)
|
213 |
+
db.add(annotation_obj)
|
214 |
+
try:
|
215 |
+
db.flush()
|
216 |
+
imported_count +=1
|
217 |
+
except Exception as e_flush:
|
218 |
+
log.error(f"Error flushing new annotation for TTSData JSON ID {current_sample_json_id}, Annotator {current_annotator_name_for_logs}: {e_flush}")
|
219 |
+
db.rollback()
|
220 |
+
skipped_count +=1
|
221 |
+
continue
|
222 |
+
|
223 |
+
if annotation_obj.id:
|
224 |
+
if annotation_obj.id not in annotation_ids_for_trim_deletion_in_batch:
|
225 |
+
annotation_ids_for_trim_deletion_in_batch.append(annotation_obj.id)
|
226 |
+
|
227 |
+
json_audio_trims = json_ann.get("audio_trims", [])
|
228 |
+
if json_audio_trims:
|
229 |
+
# log.info(f"Preparing to add {len(json_audio_trims)} new trims for Annotation ID {annotation_obj.id}.")
|
230 |
+
for trim_info in json_audio_trims:
|
231 |
+
start_sec = trim_info.get("start")
|
232 |
+
end_sec = trim_info.get("end")
|
233 |
+
|
234 |
+
if start_sec is not None and end_sec is not None:
|
235 |
+
try:
|
236 |
+
start_ms = int(float(start_sec) * 1000.0)
|
237 |
+
end_ms = int(float(end_sec) * 1000.0)
|
238 |
+
if start_ms < 0 or end_ms < 0 or end_ms < start_ms:
|
239 |
+
log.warning(f"Invalid trim values (start_ms={start_ms}, end_ms={end_ms}) for annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}. Skipping.")
|
240 |
+
continue
|
241 |
+
|
242 |
+
new_trim_db_obj = AudioTrim(
|
243 |
+
annotation_id=annotation_obj.id,
|
244 |
+
original_tts_data_id=db_tts_data_id,
|
245 |
+
start=start_ms,
|
246 |
+
end=end_ms
|
247 |
+
)
|
248 |
+
objects_to_add_this_sample.append(new_trim_db_obj)
|
249 |
+
except ValueError:
|
250 |
+
log.warning(f"Invalid start/end format in audio trim for annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}. Skipping: {trim_info}")
|
251 |
+
continue
|
252 |
+
else:
|
253 |
+
log.warning(f"Skipping trim with missing start/end for Annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}: {trim_info}")
|
254 |
+
else:
|
255 |
+
log.warning(f"Annotation ID not available for TTSData JSON ID {current_sample_json_id}, Annotator {current_annotator_name_for_logs}. Cannot process audio trims.")
|
256 |
+
|
257 |
+
if objects_to_add_this_sample:
|
258 |
+
db.add_all(objects_to_add_this_sample)
|
259 |
+
|
260 |
+
samples_processed_in_batch += 1
|
261 |
+
|
262 |
+
if samples_processed_in_batch >= BATCH_SIZE or (sample_idx == len(samples) - 1):
|
263 |
+
if annotation_ids_for_trim_deletion_in_batch:
|
264 |
+
log.info(f"Batch deleting trims for {len(annotation_ids_for_trim_deletion_in_batch)} annotations in current batch.")
|
265 |
+
db.query(AudioTrim).filter(AudioTrim.annotation_id.in_(annotation_ids_for_trim_deletion_in_batch)).delete(synchronize_session=False)
|
266 |
+
annotation_ids_for_trim_deletion_in_batch.clear()
|
267 |
+
|
268 |
+
try:
|
269 |
+
db.commit()
|
270 |
+
log.info(f"Committed batch. Total samples processed so far: {sample_idx + 1} out of {len(samples)}")
|
271 |
+
except Exception as e_commit:
|
272 |
+
db.rollback()
|
273 |
+
log.error(f"Failed to commit batch after sample index {sample_idx} (TTSData JSON ID {current_sample_json_id}): {e_commit}. Rolling back this batch.")
|
274 |
+
annotation_ids_for_trim_deletion_in_batch.clear()
|
275 |
+
finally:
|
276 |
+
samples_processed_in_batch = 0 # Reset for next batch or end
|
277 |
+
|
278 |
+
log.info(f"Finished import attempt. Final counts - New: {imported_count}, Updated: {updated_count}, Skipped: {skipped_count}")
|
279 |
+
|
280 |
+
def main():
|
281 |
+
log.info("Starting annotation import script...")
|
282 |
+
|
283 |
+
if not os.path.exists(ANNOTATIONS_FILE_PATH):
|
284 |
+
log.error(f"Annotations file not found at: {ANNOTATIONS_FILE_PATH}")
|
285 |
+
return
|
286 |
+
|
287 |
+
try:
|
288 |
+
with open(ANNOTATIONS_FILE_PATH, 'r', encoding='utf-8') as f:
|
289 |
+
data = json.load(f)
|
290 |
+
except json.JSONDecodeError as e:
|
291 |
+
log.error(f"Error decoding JSON from {ANNOTATIONS_FILE_PATH}: {e}")
|
292 |
+
return
|
293 |
+
except Exception as e:
|
294 |
+
log.error(f"Error reading file {ANNOTATIONS_FILE_PATH}: {e}")
|
295 |
+
return
|
296 |
+
|
297 |
+
try:
|
298 |
+
with get_db() as db_session:
|
299 |
+
import_annotations(db_session, data)
|
300 |
+
except Exception as e:
|
301 |
+
log.error(f"An error occurred during the import process: {e}")
|
302 |
+
finally:
|
303 |
+
log.info("Annotation import script finished.")
|
304 |
+
|
305 |
+
if __name__ == "__main__":
|
306 |
+
main()
|
utils/auth.py
CHANGED
@@ -35,38 +35,36 @@ class AuthService:
|
|
35 |
annotator = repo.get_annotator_by_name(username)
|
36 |
|
37 |
# ⬇️ توابع کمکی برای تولید خروجی خالی (درصورت خطا)
|
38 |
-
def
|
39 |
return (
|
40 |
[], # items_state
|
41 |
0, # idx_state
|
42 |
-
"",
|
43 |
-
"",
|
44 |
-
"",
|
45 |
-
"",
|
46 |
-
"",
|
47 |
-
False, # شش فیلد
|
48 |
)
|
49 |
|
50 |
# --- کاربر موجود نیست / غیر فعال
|
51 |
if annotator is None or not annotator.is_active:
|
52 |
log.warning("Failed login (not found / inactive)")
|
53 |
return (
|
54 |
-
"❌ Wrong username or password!",
|
55 |
-
gr.update(),
|
56 |
-
gr.update(visible=False),
|
57 |
-
gr.update(value=""),
|
58 |
-
*
|
59 |
)
|
60 |
|
61 |
# --- رمز عبور اشتباه
|
62 |
if not verify_password(password, annotator.password):
|
63 |
log.warning("Failed login (bad password)")
|
64 |
return (
|
65 |
-
"❌ Wrong username or password!",
|
66 |
-
gr.update(),
|
67 |
-
gr.update(visible=False),
|
68 |
-
gr.update(value=""),
|
69 |
-
*
|
70 |
)
|
71 |
|
72 |
# ---------- ورود موفق ---------- #
|
@@ -106,16 +104,16 @@ class AuthService:
|
|
106 |
# مقداردهی فیلدهای رکورد اول (یا مقادیر تهی)
|
107 |
if dashboard_items:
|
108 |
first = dashboard_items[0]
|
109 |
-
|
|
|
|
|
110 |
first["id"],
|
111 |
first["filename"],
|
112 |
first["sentence"],
|
113 |
first["annotated_sentence"],
|
114 |
-
first["annotated_at"],
|
115 |
-
first["validated"],
|
116 |
)
|
117 |
else:
|
118 |
-
|
119 |
|
120 |
log.info(f"User '{username}' logged in successfully.")
|
121 |
|
@@ -127,7 +125,7 @@ class AuthService:
|
|
127 |
gr.update(value=f"👋 Welcome, {annotator.name}!"), # 3
|
128 |
dashboard_items, # 4: items_state
|
129 |
0, # 5: idx_state
|
130 |
-
*
|
131 |
)
|
132 |
|
133 |
# ───────────── LOGOUT ───────────── #
|
|
|
35 |
annotator = repo.get_annotator_by_name(username)
|
36 |
|
37 |
# ⬇️ توابع کمکی برای تولید خروجی خالی (درصورت خطا)
|
38 |
+
def empty_dashboard_outputs_for_ui(): # Renamed and adjusted for UI outputs
|
39 |
return (
|
40 |
[], # items_state
|
41 |
0, # idx_state
|
42 |
+
"", # tts_id
|
43 |
+
"", # filename
|
44 |
+
"", # sentence
|
45 |
+
"", # ann_sentence
|
|
|
|
|
46 |
)
|
47 |
|
48 |
# --- کاربر موجود نیست / غیر فعال
|
49 |
if annotator is None or not annotator.is_active:
|
50 |
log.warning("Failed login (not found / inactive)")
|
51 |
return (
|
52 |
+
"❌ Wrong username or password!", # message
|
53 |
+
gr.update(), # login_container (no change)
|
54 |
+
gr.update(visible=False), # dashboard_container
|
55 |
+
gr.update(value=""), # header_welcome
|
56 |
+
*empty_dashboard_outputs_for_ui(), # items_state, idx_state, and 4 UI textboxes
|
57 |
)
|
58 |
|
59 |
# --- رمز عبور اشتباه
|
60 |
if not verify_password(password, annotator.password):
|
61 |
log.warning("Failed login (bad password)")
|
62 |
return (
|
63 |
+
"❌ Wrong username or password!", # message
|
64 |
+
gr.update(), # login_container (no change)
|
65 |
+
gr.update(visible=False), # dashboard_container
|
66 |
+
gr.update(value=""), # header_welcome
|
67 |
+
*empty_dashboard_outputs_for_ui(), # items_state, idx_state, and 4 UI textboxes
|
68 |
)
|
69 |
|
70 |
# ---------- ورود موفق ---------- #
|
|
|
104 |
# مقداردهی فیلدهای رکورد اول (یا مقادیر تهی)
|
105 |
if dashboard_items:
|
106 |
first = dashboard_items[0]
|
107 |
+
# Only take the first 4 values needed for the 4 textboxes
|
108 |
+
# tts_id, filename, sentence, ann_sentence
|
109 |
+
first_vals_for_ui = (
|
110 |
first["id"],
|
111 |
first["filename"],
|
112 |
first["sentence"],
|
113 |
first["annotated_sentence"],
|
|
|
|
|
114 |
)
|
115 |
else:
|
116 |
+
first_vals_for_ui = ("", "", "", "")
|
117 |
|
118 |
log.info(f"User '{username}' logged in successfully.")
|
119 |
|
|
|
125 |
gr.update(value=f"👋 Welcome, {annotator.name}!"), # 3
|
126 |
dashboard_items, # 4: items_state
|
127 |
0, # 5: idx_state
|
128 |
+
*first_vals_for_ui, # 6-9: چهار فیلد نخست برای UI
|
129 |
)
|
130 |
|
131 |
# ───────────── LOGOUT ───────────── #
|
utils/database.py
CHANGED
@@ -3,6 +3,15 @@
|
|
3 |
from sqlalchemy import create_engine
|
4 |
from sqlalchemy.orm import sessionmaker
|
5 |
from contextlib import contextmanager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from config import conf
|
7 |
from utils.logger import Logger
|
8 |
|
|
|
3 |
from sqlalchemy import create_engine
|
4 |
from sqlalchemy.orm import sessionmaker
|
5 |
from contextlib import contextmanager
|
6 |
+
import sys # Add sys import
|
7 |
+
import os # Add os import
|
8 |
+
|
9 |
+
# Add project root to Python path to ensure local modules are prioritized
|
10 |
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
11 |
+
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
|
12 |
+
if PROJECT_ROOT not in sys.path:
|
13 |
+
sys.path.insert(0, PROJECT_ROOT)
|
14 |
+
|
15 |
from config import conf
|
16 |
from utils.logger import Logger
|
17 |
|