Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on May 26

Commit

f7ef7d3

1 Parent(s): 3c835a7

alligned interface and data import scripts

Browse files

Files changed (10) hide show

components/dashboard_page.py +530 -263
components/header.py +14 -6
components/login_page.py +0 -2
data/models.py +6 -6
data/repository/annotator_workload_repo.py +21 -9
scripts/apply_custom_intervals.py +90 -0
scripts/distribute_workload.py +0 -170
scripts/import_annotations_from_json.py +306 -0
utils/auth.py +20 -22
utils/database.py +9 -0

components/dashboard_page.py CHANGED Viewed

@@ -1,16 +1,17 @@
 import gradio as gr
 import numpy as np
 import datetime
-from sqlalchemy import orm
 from components.header import Header
-from utils.logger import Logger
-from utils.gdrive_downloader import PublicFolderAudioLoader  # Assuming LOADER uses this
 from config import conf
-from utils.database import get_db  # For DB operations
-from data.models import Annotation, AudioTrim, TTSData  # Import your models
-log = Logger()
 LOADER = PublicFolderAudioLoader(conf.GDRIVE_API_KEY)
 GDRIVE_FOLDER = conf.GDRIVE_FOLDER
@@ -18,70 +19,108 @@ GDRIVE_FOLDER = conf.GDRIVE_FOLDER
 class DashboardPage:
     def __init__(self) -> None:
         with gr.Column(visible=False) as self.container:
-            self.header = Header()
             with gr.Row():
-                # ستون چپ
                 with gr.Column(scale=3):
                     with gr.Row():
-                        self.tts_id = gr.Textbox(label="ID", interactive=False)
-                        self.filename = gr.Textbox(label="Filename", interactive=False)
-                    with gr.Row():
-                        self.sentence = gr.Textbox(
-                            label="Sentence", interactive=False, max_lines=5, rtl=True
-                        )
-                        self.btn_copy = gr.Button("📋 Copy", interactive=True)
-                    with gr.Row():
-                        self.ann_sentence = gr.Textbox(
-                            label="Annotated Sentence",
-                            interactive=True,
-                            max_lines=5,
-                            rtl=True,
-                        )
-                        self.btn_paste = gr.Button("📥 Paste", interactive=True)
-                    with gr.Row():
-                        self.validated = gr.Checkbox(
-                            label="Validated", interactive=True
-                        )
                     with gr.Row():
-                        self.btn_prev = gr.Button("⬅️ Previous", interactive=True)
-                        self.btn_next = gr.Button("Next ➡️", interactive=True)
-                        self.btn_delete = gr.Button("🗑️ Delete", interactive=True)
                     with gr.Row():
                         self.jump_data_id_input = gr.Number(
-                            label="Jump to Data ID", value=0, precision=0, interactive=True
-                        )
-                        self.btn_jump = gr.Button("Go", interactive=True)
-                    with gr.Row():
-                        self.trim_start_sec = gr.Number(
-                            label="Trim Start (s)", value=0.0, precision=3, interactive=True
-                        )
-                        self.trim_end_sec = gr.Number(
-                            label="Trim End (s)", value=0.0, precision=3, interactive=True
                         )
-                        self.btn_trim = gr.Button("✂️ Trim", interactive=True)
-                        self.btn_undo_trim = gr.Button("↩️ Undo Trim", interactive=True)
-                # ستون راست
                 with gr.Column(scale=2):
-                    self.btn_load_voice = gr.Button("Load Audio", interactive=True)
                     self.audio = gr.Audio(
                         label="🔊 Audio", interactive=False, autoplay=True
                     )
-            # stateها
             self.items_state = gr.State([])
             self.idx_state = gr.State(0)
-            self.clipboard_state = gr.State("")
             self.original_audio_state = gr.State(None)
-            self.current_trim_params = gr.State(None)
             # List of all interactive UI elements for enabling/disabling
             self.interactive_ui_elements = [
-                self.btn_prev, self.btn_next, self.btn_delete, self.btn_jump,
                 self.jump_data_id_input, self.trim_start_sec, self.trim_end_sec,
                 self.btn_trim, self.btn_undo_trim, self.btn_load_voice,
-                self.ann_sentence, self.validated, self.btn_copy, self.btn_paste
             ]
     # ---------------- wiring ---------------- #
@@ -90,151 +129,184 @@ class DashboardPage:
     ):
         self.header.register_callbacks(login_page, self, session_state)
-        # Helper function to update UI interactive state
         def update_ui_interactive_state(is_interactive: bool):
             updates = []
             for elem in self.interactive_ui_elements:
                 if elem == self.btn_load_voice and not is_interactive:
-                    updates.append(gr.update(value="⏳ Loading...", interactive=False))
                 elif elem == self.btn_load_voice and is_interactive:
-                    updates.append(gr.update(value="Load Audio", interactive=True))
                 else:
                     updates.append(gr.update(interactive=is_interactive))
             return updates
-        # ---- All Helper Functions ----
-        def apply_loaded_trim_fn(audio_data_as_loaded, trim_params_from_state, original_audio_for_state):
-            """
-            Applies trim if trim_params_from_state are available to the audio_data_as_loaded.
-            This is used after loading an item and its original audio.
-            original_audio_for_state is preserved as the true original.
-            """
-            if audio_data_as_loaded and trim_params_from_state:
-                sr, wav = audio_data_as_loaded
-                start = trim_params_from_state.get("start")
-                end = trim_params_from_state.get("end")
-                operation = trim_params_from_state.get("operation")
-                if operation == "delete" and start is not None and end is not None and end > start and start >= 0:
-                    start_sample = int(sr * start / 1000.0)
-                    end_sample = int(sr * end / 1000.0)
-                    audio_duration_samples = len(wav)
-                    start_sample = max(0, min(start_sample, audio_duration_samples))
-                    end_sample = max(start_sample, min(end_sample, audio_duration_samples))
-                    if start_sample == 0 and end_sample == audio_duration_samples:
-                        log.info(f"Applying saved trim: delete entire audio from {start}ms to {end}ms. Resulting in empty audio.")
-                        return (sr, np.array([], dtype=wav.dtype)), original_audio_for_state
-                    part1 = wav[:start_sample]
-                    part2 = wav[end_sample:]
-                    deleted_segment_wav = np.concatenate((part1, part2))
-                    log.info(f"Applied saved trim (delete operation): {start}ms to {end}ms. Original shape: {wav.shape}, New shape: {deleted_segment_wav.shape}")
-                    return (sr, deleted_segment_wav), original_audio_for_state
-                else:
-                    if operation != "delete":
-                        log.warning("Saved trim parameters do not specify a 'delete' operation. Using original audio.")
-                    else:
-                        log.warning("Invalid saved trim parameters for delete operation. Using original audio.")
-                    return audio_data_as_loaded, original_audio_for_state
-            return audio_data_as_loaded, original_audio_for_state
-        def download_voice_fn(folder_link, filename_to_load):
             if not filename_to_load:
-                return None, None
             try:
-                log.info(f"Downloading voice: {filename_to_load}")
                 sr, wav = LOADER.load_audio(folder_link, filename_to_load)
-                return (sr, wav), (sr, wav.copy())
             except Exception as e:
-                log.error(f"GDrive download failed for {filename_to_load}: {e}")
                 gr.Error(f"Failed to load audio: {filename_to_load}. Error: {e}")
-                return None, None
-        def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, is_validated_ui, active_trim_params):
             annotator_id = session.get("user_id")
             if not current_tts_id or not annotator_id:
                 gr.Error("Cannot save: Missing TTS ID or User ID.")
-                return False
-            validated_to_save = bool(is_validated_ui)
             with get_db() as db:
                 try:
                     annotation_obj = db.query(Annotation).filter_by(
                         tts_data_id=current_tts_id, annotator_id=annotator_id
-                    ).first()
                     if not annotation_obj:
                         annotation_obj = Annotation(
                             tts_data_id=current_tts_id, annotator_id=annotator_id
                         )
                         db.add(annotation_obj)
                     annotation_obj.annotated_sentence = ann_text_to_save
-                    annotation_obj.validated = validated_to_save
                     annotation_obj.annotated_at = datetime.datetime.utcnow()
-                    if active_trim_params and active_trim_params.get("operation") == "delete" and active_trim_params.get("start") is not None:
-                        start_to_save = active_trim_params["start"]
-                        end_to_save = active_trim_params["end"]
-                        if not annotation_obj.audio_trim:
                             db.flush()
-                            if annotation_obj.id is None:
-                                gr.Error("Failed to get annotation ID for saving trim.")
-                                db.rollback()
-                                return False
-                            new_trim = AudioTrim(
                                 annotation_id=annotation_obj.id,
-                                original_tts_data_id=current_tts_id,
-                                start=start_to_save,
-                                end=end_to_save,
                             )
-                            annotation_obj.audio_trim = new_trim
-                        else:
-                            annotation_obj.audio_trim.start = start_to_save
-                            annotation_obj.audio_trim.end = end_to_save
-                    elif annotation_obj.audio_trim:
-                        db.delete(annotation_obj.audio_trim)
-                        annotation_obj.audio_trim = None
                     db.commit()
                     gr.Info(f"Annotation for ID {current_tts_id} saved.")
-                    return validated_to_save
                 except Exception as e:
                     db.rollback()
-                    log.error(f"Failed to save annotation for {current_tts_id}: {e}")
                     gr.Error(f"Save failed: {e}")
-                    return False
         def show_current_item_fn(items, idx, session):
-            if not items or idx >= len(items):
-                return "", "", "", "", False, None, 0.0, 0.0, None
             current_item = items[idx]
             tts_data_id = current_item.get("id")
             annotator_id = session.get("user_id")
-            ann_text, is_validated, trim_params_for_ui = "", False, None
-            start_sec_ui, end_sec_ui = 0.0, 0.0
             if tts_data_id and annotator_id:
                 with get_db() as db:
                     try:
                         existing_annotation = db.query(Annotation).filter_by(
                             tts_data_id=tts_data_id, annotator_id=annotator_id
-                        ).options(orm.joinedload(Annotation.audio_trim)).first() # Eager load audio_trim
                         if existing_annotation:
                             ann_text = existing_annotation.annotated_sentence or ""
-                            is_validated = existing_annotation.validated
-                            if existing_annotation.audio_trim:
-                                trim_params_for_ui = {
-                                    "start": existing_annotation.audio_trim.start,
-                                    "end": existing_annotation.audio_trim.end,
-                                    "operation": "delete"
-                                }
-                                start_sec_ui = existing_annotation.audio_trim.start / 1000.0
-                                end_sec_ui = existing_annotation.audio_trim.end / 1000.0
                     except Exception as e:
-                        log.error(f"Database error in show_current_item_fn for TTS ID {tts_data_id}: {e}")
                         gr.Error(f"Error loading annotation details: {e}")
             return (
                 current_item.get("id", ""), current_item.get("filename", ""),
-                current_item.get("sentence", ""), ann_text, is_validated, None,
-                start_sec_ui, end_sec_ui, trim_params_for_ui
             )
         def navigate_idx_fn(items, current_idx, direction):
@@ -243,9 +315,65 @@ class DashboardPage:
             return new_idx
         def load_all_items_fn(sess):
-            items = sess.get("dashboard_items", [])
-            initial_ui_values = show_current_item_fn(items, 0, sess)
-            return items, 0, *initial_ui_values
         def jump_by_data_id_fn(items, target_data_id_str, current_idx):
             if not target_data_id_str: return current_idx
@@ -253,209 +381,348 @@ class DashboardPage:
                 target_id = int(target_data_id_str)
                 for i, item_dict in enumerate(items):
                     if item_dict.get("id") == target_id: return i
-                gr.Warning(f"Data ID {target_id} not found.")
             except ValueError:
                 gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
             return current_idx
-        def perform_trim_fn(original_audio_data, start_sec, end_sec, current_audio_for_fallback):
-            log.info(f"perform_trim_fn called with start_sec: {start_sec}, end_sec: {end_sec}")
-            if original_audio_data is None:
-                gr.Warning("No original audio loaded. Cannot perform new trim.")
-                return current_audio_for_fallback, None
-            if start_sec is None or end_sec is None or start_sec < 0 or end_sec <= start_sec:
-                gr.Warning("Invalid trim times. Start must be >= 0 and End > Start.")
-                return original_audio_data, None
-            try:
-                sr, wav = original_audio_data
-                start_sample, end_sample = int(sr * start_sec), int(sr * end_sec)
-                audio_duration_samples = len(wav)
-                start_sample = max(0, min(start_sample, audio_duration_samples))
-                end_sample = max(start_sample, min(end_sample, audio_duration_samples))
-                trimmed_wav = np.concatenate((wav[:start_sample], wav[end_sample:]))
-                active_trim_params = {"start": start_sec * 1000.0, "end": end_sec * 1000.0, "operation": "delete"}
-                log.info(f"Audio segment deleted. New shape: {trimmed_wav.shape}")
-                if trimmed_wav.size == 0: gr.Warning("Trim resulted in empty audio.")
-                return (sr, trimmed_wav), active_trim_params
-            except Exception as e:
-                log.error(f"Error during audio trimming: {e}")
-                gr.Error(f"Failed to trim audio: {e}")
-                return original_audio_data, None
-        def delete_db_and_ui_fn(items, current_idx, session):
-            item_info = items[current_idx]
-            tts_data_id_to_delete = item_info.get("id")
-            annotator_id_for_delete = session.get("user_id")
-            if tts_data_id_to_delete and annotator_id_for_delete:
                 with get_db() as db:
                     try:
                         annotation_obj = db.query(Annotation).filter_by(
-                            tts_data_id=tts_data_id_to_delete, annotator_id=annotator_id_for_delete
-                        ).first()
                         if annotation_obj:
-                            db.delete(annotation_obj) # Cascade should handle AudioTrim
                             db.commit()
-                            gr.Info(f"Annotation for ID {tts_data_id_to_delete} deleted.")
                         else:
-                            gr.Warning(f"No annotation found to delete for ID {tts_data_id_to_delete}.")
                     except Exception as e:
                         db.rollback()
-                        log.error(f"Error deleting annotation {tts_data_id_to_delete}: {e}")
-                        gr.Error(f"Failed to delete annotation: {e}")
             else:
-                gr.Error("Cannot delete: Missing TTS ID or User ID.")
-            refreshed_ui_values = show_current_item_fn(items, current_idx, session)
-            return items, current_idx, *refreshed_ui_values
-        # ---- Callback Implementations ----
-        outputs_for_show_current = [
             self.tts_id, self.filename, self.sentence, self.ann_sentence,
-            self.validated, self.audio, self.trim_start_sec,
-            self.trim_end_sec, self.current_trim_params,
         ]
         # Initial Load
         root_blocks.load(
-            fn=lambda: update_ui_interactive_state(False),
             outputs=self.interactive_ui_elements
         ).then(
-            fn=load_all_items_fn,
             inputs=[session_state],
-            outputs=[self.items_state, self.idx_state] + outputs_for_show_current,
         ).then(
-            fn=download_voice_fn,
-            inputs=[gr.State(GDRIVE_FOLDER), self.filename],
-            outputs=[self.audio, self.original_audio_state],
         ).then(
-            fn=apply_loaded_trim_fn,
-            inputs=[self.audio, self.current_trim_params, self.original_audio_state],
-            outputs=[self.audio, self.original_audio_state]
-        ).then(
-            fn=lambda: update_ui_interactive_state(True),
             outputs=self.interactive_ui_elements
         )
-        # Navigation (Prev/Next)
-        for btn_widget, direction_str in [
-            (self.btn_prev, "prev"), (self.btn_next, "next"),
         ]:
             event_chain = btn_widget.click(
                 fn=lambda: update_ui_interactive_state(False),
                 outputs=self.interactive_ui_elements
             )
-            if direction_str == "next":
                 event_chain = event_chain.then(
                     fn=save_annotation_db_fn,
                     inputs=[
                         self.tts_id, session_state, self.ann_sentence,
-                        self.validated, self.current_trim_params,
                     ],
-                    outputs=[self.validated]
                 )
-            event_chain.then(
-                fn=navigate_idx_fn,
                 inputs=[self.items_state, self.idx_state, gr.State(direction_str)],
                 outputs=self.idx_state,
             ).then(
                 fn=show_current_item_fn,
                 inputs=[self.items_state, self.idx_state, session_state],
-                outputs=outputs_for_show_current,
             ).then(
-                fn=download_voice_fn,
-                inputs=[gr.State(GDRIVE_FOLDER), self.filename],
-                outputs=[self.audio, self.original_audio_state],
             ).then(
-                fn=apply_loaded_trim_fn,
-                inputs=[self.audio, self.current_trim_params, self.original_audio_state],
-                outputs=[self.audio, self.original_audio_state]
             ).then(
                 fn=lambda: update_ui_interactive_state(True),
                 outputs=self.interactive_ui_elements
             )
-        # Manual Load Audio Button
-        self.btn_load_voice.click(
-            fn=lambda: update_ui_interactive_state(False),
-            outputs=self.interactive_ui_elements
-        ).then(
-            fn=download_voice_fn,
-            inputs=[gr.State(GDRIVE_FOLDER), self.filename],
-            outputs=[self.audio, self.original_audio_state],
-        ).then(
-            fn=apply_loaded_trim_fn,
-            inputs=[self.audio, self.current_trim_params, self.original_audio_state],
-            outputs=[self.audio, self.original_audio_state]
-        ).then(
-            fn=lambda: update_ui_interactive_state(True),
-            outputs=self.interactive_ui_elements
-        )
-        # Copy/Paste (Quick operations, no UI disable needed)
-        self.btn_copy.click(fn=lambda x: x, inputs=self.sentence, outputs=self.clipboard_state)
-        self.btn_paste.click(fn=lambda x: x, inputs=self.clipboard_state, outputs=self.ann_sentence)
-        # Jump to Data ID
-        self.btn_jump.click(
             fn=lambda: update_ui_interactive_state(False),
             outputs=self.interactive_ui_elements
         ).then(
-            fn=jump_by_data_id_fn,
             inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
-            outputs=self.idx_state,
         ).then(
             fn=show_current_item_fn,
             inputs=[self.items_state, self.idx_state, session_state],
-            outputs=outputs_for_show_current,
         ).then(
-            fn=download_voice_fn,
-            inputs=[gr.State(GDRIVE_FOLDER), self.filename],
-            outputs=[self.audio, self.original_audio_state],
         ).then(
-            fn=apply_loaded_trim_fn,
-            inputs=[self.audio, self.current_trim_params, self.original_audio_state],
-            outputs=[self.audio, self.original_audio_state]
         ).then(
             fn=lambda: update_ui_interactive_state(True),
             outputs=self.interactive_ui_elements
         )
-        # Trim Audio
-        self.btn_trim.click(
             fn=lambda: update_ui_interactive_state(False),
             outputs=self.interactive_ui_elements
         ).then(
-            fn=perform_trim_fn,
-            inputs=[self.original_audio_state, self.trim_start_sec, self.trim_end_sec, self.audio],
-            outputs=[self.audio, self.current_trim_params],
         ).then(
             fn=lambda: update_ui_interactive_state(True),
             outputs=self.interactive_ui_elements
         )
-        # Undo Trim
         self.btn_undo_trim.click(
-            fn=lambda: update_ui_interactive_state(False),
-            outputs=self.interactive_ui_elements
-        ).then(
-            fn=lambda orig_audio: (orig_audio, None, 0.0, 0.0) if orig_audio else (None, None, 0.0, 0.0),
-            inputs=[self.original_audio_state],
-            outputs=[self.audio, self.current_trim_params, self.trim_start_sec, self.trim_end_sec],
-        ).then(
-            fn=lambda: update_ui_interactive_state(True),
-            outputs=self.interactive_ui_elements
         )
-        # Delete Annotation
         self.btn_delete.click(
             fn=lambda: update_ui_interactive_state(False),
             outputs=self.interactive_ui_elements
         ).then(
             fn=delete_db_and_ui_fn,
-            inputs=[self.items_state, self.idx_state, session_state],
-            outputs=[self.items_state, self.idx_state] + outputs_for_show_current,
-        ).then(
             fn=lambda: update_ui_interactive_state(True),
             outputs=self.interactive_ui_elements
         )
         return self.container

 import gradio as gr
 import numpy as np
 import datetime
+from sqlalchemy import orm, func # Added func for count
 from components.header import Header
+from utils.logger import Logger # Changed from get_logger to Logger
+from utils.gdrive_downloader import PublicFolderAudioLoader
 from config import conf
+from utils.database import get_db
+from data.models import Annotation, AudioTrim, TTSData, AnnotationInterval # Added AnnotationInterval
+from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo # For progress
+log = Logger() # Changed from get_logger() to Logger()
 LOADER = PublicFolderAudioLoader(conf.GDRIVE_API_KEY)
 GDRIVE_FOLDER = conf.GDRIVE_FOLDER
 class DashboardPage:
     def __init__(self) -> None:
         with gr.Column(visible=False) as self.container:
+            self.header = Header()  # Header now includes progress_display
             with gr.Row():
+                # Left Column
                 with gr.Column(scale=3):
                     with gr.Row():
+                        self.tts_id = gr.Textbox(label="ID", interactive=False, scale=1)
+                        self.filename = gr.Textbox(label="Filename", interactive=False, scale=3)
+                    self.sentence = gr.Textbox(
+                        label="Original Sentence", interactive=False, max_lines=5, rtl=True
+                    )
                     with gr.Row():
+                        with gr.Column(scale=1, min_width=10):  # Left spacer column
+                            pass
+                        self.btn_copy_sentence = gr.Button("📋 Copy to Annotated", min_width=150)
+                        with gr.Column(scale=1, min_width=10):  # Right spacer column
+                            pass
+                    self.ann_sentence = gr.Textbox(
+                        label="Annotated Sentence",
+                        interactive=True,
+                        max_lines=5,
+                        rtl=True,
+                    )
                     with gr.Row():
+                        self.btn_prev = gr.Button("⬅️ Previous", min_width=120)
+                        self.btn_next_no_save = gr.Button("Next ➡️ (No Save)", min_width=150)
+                        self.btn_save_next = gr.Button("Save & Next ➡️", variant="primary", min_width=120)
+                    # Combined row for Delete button and Jump controls
+                    with gr.Row(): # Removed style argument to fix TypeError
+                        # Delete button on the left
+                        self.btn_delete = gr.Button("🗑️ Delete Annotation & Clear Fields", min_width=260)
+                        # Spacer column to push jump controls to the right.
+                        # # This column will expand to fill available space.
+                        # with gr.Column(scale=1, min_width=10):
+                        #     pass
+                        # Jump controls, grouped in a nested Row, appearing on the right.
+                        # 'scale=0' for this nested Row and its children makes them take minimal/intrinsic space.
+                    with gr.Row(scale=0, variant='compact'): # Added variant='compact'
                         self.jump_data_id_input = gr.Number(
+                            # show_label=False,  # Remove label to reduce height
+                            label="Jump to ID (e.g. 123)", # Use placeholder for instruction
+                            value=None, # Ensure placeholder shows initially
+                            precision=0,
+                            interactive=True,
+                            min_width=120, # Adjusted for longer placeholder
+                            # scale=0
                         )
+                        self.btn_jump = gr.Button("Go to data ID", min_width=70) # Compact Go button
+                    # Removed the old separate rows for delete and jump controls
+                # Right Column
                 with gr.Column(scale=2):
+                    self.btn_load_voice = gr.Button("Load Audio (Autoplay)", min_width=150)
                     self.audio = gr.Audio(
                         label="🔊 Audio", interactive=False, autoplay=True
                     )
+                    with gr.Group():  # Grouping trim controls
+                        gr.Markdown("### Audio Trimming")
+                        self.trim_start_sec = gr.Number(
+                            label="Trim Start (s)",
+                            value=None, # Ensure placeholder shows
+                            precision=3,
+                            interactive=True,
+                            min_width=150
+                        )
+                        self.trim_end_sec = gr.Number(
+                            label="Trim End (s)",
+                            value=None, # Ensure placeholder shows
+                            precision=3,
+                            interactive=True,
+                            min_width=150
+                        )
+                        with gr.Row():
+                            self.btn_trim = gr.Button("➕ Add Trim (Delete Segment)", min_width=150)
+                            self.btn_undo_trim = gr.Button("↩️ Undo Last Trim", min_width=150)
+                        self.trims_display = gr.DataFrame(
+                            headers=["Start (s)", "End (s)"],
+                            col_count=(2, "fixed"),
+                            interactive=False,
+                            label="Applied Trims",
+                            wrap=True
+                        )
+            # State variables
             self.items_state = gr.State([])
             self.idx_state = gr.State(0)
             self.original_audio_state = gr.State(None)
+            self.applied_trims_list_state = gr.State([])
             # List of all interactive UI elements for enabling/disabling
             self.interactive_ui_elements = [
+                self.btn_prev, self.btn_save_next, self.btn_next_no_save,
+                self.btn_delete, self.btn_jump,
                 self.jump_data_id_input, self.trim_start_sec, self.trim_end_sec,
                 self.btn_trim, self.btn_undo_trim, self.btn_load_voice,
+                self.ann_sentence, self.btn_copy_sentence
             ]
     # ---------------- wiring ---------------- #
     ):
         self.header.register_callbacks(login_page, self, session_state)
         def update_ui_interactive_state(is_interactive: bool):
             updates = []
             for elem in self.interactive_ui_elements:
                 if elem == self.btn_load_voice and not is_interactive:
+                    updates.append(gr.update(value="⏳ Loading Audio...", interactive=False))
                 elif elem == self.btn_load_voice and is_interactive:
+                    updates.append(gr.update(value="Load Audio (Autoplay)", interactive=True))
+                elif elem == self.btn_save_next and not is_interactive:
+                    updates.append(gr.update(value="�� Saving...", interactive=False))
+                elif elem == self.btn_save_next and is_interactive:
+                     updates.append(gr.update(value="Save & Next ➡️", interactive=True))
+                # Add similar handling for btn_next_no_save if needed for text change during processing
                 else:
                     updates.append(gr.update(interactive=is_interactive))
             return updates
+        def get_user_progress_fn(session):
+            user_id = session.get("user_id")
+            if not user_id:
+                return "Annotation Progress: N/A" # Added label
+            with get_db() as db:
+                try:
+                    # Total items assigned to the user
+                    total_assigned_query = db.query(func.sum(AnnotationInterval.end_index - AnnotationInterval.start_index + 1)).filter(AnnotationInterval.annotator_id == user_id)
+                    total_assigned_result = total_assigned_query.scalar()
+                    total_assigned = total_assigned_result if total_assigned_result is not None else 0
+                    # Count of non-empty annotations by this user within their assigned intervals
+                    completed_count_query = db.query(func.count(Annotation.id)).join(
+                        TTSData, Annotation.tts_data_id == TTSData.id
+                    ).join(
+                        AnnotationInterval,
+                        (AnnotationInterval.annotator_id == user_id) &
+                        (TTSData.id >= AnnotationInterval.start_index) &
+                        (TTSData.id <= AnnotationInterval.end_index)
+                    ).filter(
+                        Annotation.annotator_id == user_id,
+                        Annotation.annotated_sentence != None,
+                        Annotation.annotated_sentence != ""
+                    )
+                    completed_count_result = completed_count_query.scalar()
+                    completed_count = completed_count_result if completed_count_result is not None else 0
+                    if total_assigned > 0:
+                        percent = (completed_count / total_assigned) * 100
+                        bar_length = 20 # Length of the progress bar
+                        filled_length = int(bar_length * completed_count // total_assigned)
+                        bar = '█' * filled_length + '░' * (bar_length - filled_length)
+                        return f"Progress: {bar} {completed_count}/{total_assigned} ({percent:.1f}%)"
+                    elif total_assigned == 0 and completed_count == 0: # Handles case where user has 0 assigned items initially
+                        return "Progress: No items assigned yet."
+                    else: # Should ideally not happen if logic is correct (e.g. completed > total_assigned)
+                        return f"Annotation Progress: {completed_count}/{total_assigned} labeled"
+                except Exception as e:
+                    log.error(f"Error fetching progress for user {user_id}: {e}")
+                    return "Annotation Progress: Error" # Added label
+        def download_voice_fn(folder_link, filename_to_load, autoplay_on_load=False): # Autoplay here is for the btn_load_voice click
             if not filename_to_load:
+                return None, None, gr.update(value=None, autoplay=False)
             try:
+                log.info(f"Downloading voice: {filename_to_load}, Autoplay: {autoplay_on_load}")
                 sr, wav = LOADER.load_audio(folder_link, filename_to_load)
+                return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=autoplay_on_load)
             except Exception as e:
+                log.error(f"GDrive download failed for {filename_to_load}: {e}")
                 gr.Error(f"Failed to load audio: {filename_to_load}. Error: {e}")
+                return None, None, gr.update(value=None, autoplay=False)
+        def save_annotation_db_fn(current_tts_id, session, ann_text_to_save, applied_trims_list):
             annotator_id = session.get("user_id")
             if not current_tts_id or not annotator_id:
                 gr.Error("Cannot save: Missing TTS ID or User ID.")
+                return # Modified: No return value
             with get_db() as db:
                 try:
                     annotation_obj = db.query(Annotation).filter_by(
                         tts_data_id=current_tts_id, annotator_id=annotator_id
+                    ).options(orm.joinedload(Annotation.audio_trims)).first()
                     if not annotation_obj:
                         annotation_obj = Annotation(
                             tts_data_id=current_tts_id, annotator_id=annotator_id
                         )
                         db.add(annotation_obj)
                     annotation_obj.annotated_sentence = ann_text_to_save
                     annotation_obj.annotated_at = datetime.datetime.utcnow()
+                    # --- Multi-trim handling ---
+                    # 1. Delete existing trims for this annotation
+                    if annotation_obj.audio_trims:
+                        for old_trim in annotation_obj.audio_trims:
+                            db.delete(old_trim)
+                        annotation_obj.audio_trims = [] # Clear the collection
+                        # db.flush() # Ensure deletes are processed before adds if issues arise
+                    # 2. Add new trims from applied_trims_list
+                    if applied_trims_list:
+                        if annotation_obj.id is None: # If new annotation, flush to get ID
                             db.flush()
+                        if annotation_obj.id is None:
+                            gr.Error("Failed to get annotation ID for saving new trims.")
+                            db.rollback(); return # Modified: No return value
+                        for trim_info in applied_trims_list:
+                            start_to_save_ms = trim_info['start_sec'] * 1000.0
+                            end_to_save_ms = trim_info['end_sec'] * 1000.0
+                            original_data_id_for_trim = current_tts_id
+                            new_trim_db_obj = AudioTrim(
                                 annotation_id=annotation_obj.id,
+                                original_tts_data_id=original_data_id_for_trim,
+                                start=start_to_save_ms,
+                                end=end_to_save_ms,
                             )
+                            db.add(new_trim_db_obj)
+                            # No need to append to annotation_obj.audio_trims if cascade is working correctly
+                            # but can be done explicitly: annotation_obj.audio_trims.append(new_trim_db_obj)
+                        log.info(f"Saved {len(applied_trims_list)} trims for annotation {annotation_obj.id} (TTS ID: {current_tts_id}).")
+                    else:
+                        log.info(f"No trims applied for {current_tts_id}, any existing DB trims were cleared.")
                     db.commit()
                     gr.Info(f"Annotation for ID {current_tts_id} saved.")
+                    # Removed 'return True'
                 except Exception as e:
                     db.rollback()
+                    log.error(f"Failed to save annotation for {current_tts_id}: {e}") # Removed exc_info=True
                     gr.Error(f"Save failed: {e}")
+                    # Removed 'return False'
         def show_current_item_fn(items, idx, session):
+            initial_trims_list_sec = []
+            initial_trims_df_data = self._convert_trims_to_df_data([]) # Empty by default
+            ui_trim_start_sec = None # Changed from 0.0 to None
+            ui_trim_end_sec = None # Changed from 0.0 to None
+            if not items or idx >= len(items) or idx < 0:
+                return ("", "", "", "", None, ui_trim_start_sec, ui_trim_end_sec,
+                        initial_trims_list_sec, initial_trims_df_data,
+                        gr.update(value=None, autoplay=False))
             current_item = items[idx]
             tts_data_id = current_item.get("id")
             annotator_id = session.get("user_id")
+            ann_text = ""
             if tts_data_id and annotator_id:
                 with get_db() as db:
                     try:
                         existing_annotation = db.query(Annotation).filter_by(
                             tts_data_id=tts_data_id, annotator_id=annotator_id
+                        ).options(orm.joinedload(Annotation.audio_trims)).first() # Changed to audio_trims
                         if existing_annotation:
                             ann_text = existing_annotation.annotated_sentence or ""
+                            if existing_annotation.audio_trims: # Check the collection
+                                initial_trims_list_sec = [
+                                    {
+                                        'start_sec': trim.start / 1000.0,
+                                        'end_sec': trim.end / 1000.0
+                                    }
+                                    for trim in existing_annotation.audio_trims # Iterate over the collection
+                                ]
+                                initial_trims_df_data = self._convert_trims_to_df_data(initial_trims_list_sec)
                     except Exception as e:
+                        log.error(f"DB error in show_current_item_fn for TTS ID {tts_data_id}: {e}") # Removed exc_info=True
                         gr.Error(f"Error loading annotation details: {e}")
             return (
                 current_item.get("id", ""), current_item.get("filename", ""),
+                current_item.get("sentence", ""), ann_text,
+                None,
+                ui_trim_start_sec, ui_trim_end_sec,
+                initial_trims_list_sec,
+                initial_trims_df_data,
+                gr.update(value=None, autoplay=False) # Ensure audio does not autoplay on item change
             )
         def navigate_idx_fn(items, current_idx, direction):
             return new_idx
         def load_all_items_fn(sess):
+            user_id = sess.get("user_id") # Use user_id for consistency with other functions
+            user_name = sess.get("user_name") # Keep for logging if needed
+            items_to_load = []
+            initial_idx = 0 # Default to 0
+            if not user_id:
+                log.warning("load_all_items_fn: user_id not found in session. Dashboard will display default state until login completes and data is refreshed.")
+                # Prepare default/empty values for all outputs of show_current_item_fn
+                # (tts_id, filename, sentence, ann_text, audio_placeholder,
+                #  trim_start_sec_ui, trim_end_sec_ui,
+                #  applied_trims_list_state_val, trims_display_val, audio_update_obj)
+                empty_item_display_tuple = ("", "", "", "", None, None, None, [], self._convert_trims_to_df_data([]), gr.update(value=None, autoplay=False))
+                # load_all_items_fn returns: [items_to_load, initial_idx] + list(initial_ui_values_tuple) + [progress_str]
+                # Total 13 values.
+                return [[], 0] + list(empty_item_display_tuple) + ["Progress: Waiting for login..."]
+            if user_id:
+                with get_db() as db:
+                    try:
+                        repo = AnnotatorWorkloadRepo(db)
+                        # Get all assigned items
+                        raw_items = repo.get_tts_data_with_annotations_for_user_id(user_id)
+                        items_to_load = [
+                            {
+                                "id": item["tts_data"].id,
+                                "filename": item["tts_data"].filename,
+                                "sentence": item["tts_data"].sentence,
+                                "annotated": item["annotation"] is not None and (item["annotation"].annotated_sentence is not None and item["annotation"].annotated_sentence != "")
+                            }
+                            for item in raw_items
+                        ]
+                        log.info(f"Loaded {len(items_to_load)} items for user {user_name} (ID: {user_id})")
+                        # --- Resume Logic: Find first unannotated or last item ---
+                        first_unannotated_idx = -1
+                        for i, item_data in enumerate(items_to_load):
+                            if not item_data["annotated"]:
+                                first_unannotated_idx = i
+                                break
+                        if first_unannotated_idx != -1:
+                            initial_idx = first_unannotated_idx
+                            log.info(f"Resuming at first unannotated item, index: {initial_idx} (ID: {items_to_load[initial_idx]['id']})")
+                        elif items_to_load: # All annotated, start at the last one or first if only one
+                            initial_idx = len(items_to_load) - 1
+                            log.info(f"All items annotated, starting at last item, index: {initial_idx} (ID: {items_to_load[initial_idx]['id']})")
+                        else: # No items assigned
+                            initial_idx = 0
+                            log.info("No items assigned to user.")
+                    except Exception as e:
+                        log.error(f"Failed to load items or determine resume index for user {user_name}: {e}") # Removed exc_info=True
+                        gr.Error(f"Could not load your assigned data: {e}")
+            initial_ui_values_tuple = show_current_item_fn(items_to_load, initial_idx, sess)
+            progress_str = get_user_progress_fn(sess)
+            return [items_to_load, initial_idx] + list(initial_ui_values_tuple) + [progress_str]
         def jump_by_data_id_fn(items, target_data_id_str, current_idx):
             if not target_data_id_str: return current_idx
                 target_id = int(target_data_id_str)
                 for i, item_dict in enumerate(items):
                     if item_dict.get("id") == target_id: return i
+                gr.Warning(f"Data ID {target_id} not found in your assigned items.")
             except ValueError:
                 gr.Warning(f"Invalid Data ID format: {target_data_id_str}")
             return current_idx
+        def delete_db_and_ui_fn(items, current_idx, session, original_audio_data_state):
+            # ... (ensure Annotation.audio_trims is used if deleting associated trims) ...
+            # This function already deletes annotation_obj.audio_trim, which will now be annotation_obj.audio_trims
+            # The cascade delete on the relationship should handle deleting all AudioTrim children.
+            # However, explicit deletion loop might be safer if cascade behavior is not fully trusted or for clarity.
+            # For now, relying on cascade from previous model update.
+            # If issues, add explicit loop:
+            # if annotation_obj.audio_trims:
+            #     for trim_to_del in annotation_obj.audio_trims:
+            #         db.delete(trim_to_del)
+            #     annotation_obj.audio_trims = []
+            # ... rest of the function ...
+            new_ann_sentence = ""
+            new_trim_start_sec_ui = None # Changed from 0.0
+            new_trim_end_sec_ui = None # Changed from 0.0
+            new_applied_trims_list = []
+            new_trims_df_data = self._convert_trims_to_df_data([])
+            audio_to_display_after_delete = None
+            audio_update_obj_after_delete = gr.update(value=None, autoplay=False)
+            if original_audio_data_state:
+                audio_to_display_after_delete = original_audio_data_state
+                audio_update_obj_after_delete = gr.update(value=original_audio_data_state, autoplay=False)
+            if not items or current_idx >= len(items) or current_idx < 0:
+                progress_str_err = get_user_progress_fn(session)
+                return (items, current_idx, "", "", "", new_ann_sentence, audio_to_display_after_delete,
+                       new_trim_start_sec_ui, new_trim_end_sec_ui, new_applied_trims_list, new_trims_df_data,
+                       audio_update_obj_after_delete, progress_str_err)
+            current_item = items[current_idx]
+            tts_id_val = current_item.get("id", "")
+            filename_val = current_item.get("filename", "")
+            sentence_val = current_item.get("sentence", "")
+            tts_data_id_to_clear = tts_id_val
+            annotator_id_for_clear = session.get("user_id")
+            if tts_data_id_to_clear and annotator_id_for_clear:
                 with get_db() as db:
                     try:
                         annotation_obj = db.query(Annotation).filter_by(
+                            tts_data_id=tts_data_id_to_clear, annotator_id=annotator_id_for_clear
+                        ).options(orm.joinedload(Annotation.audio_trims)).first() # Ensure audio_trims are loaded
                         if annotation_obj:
+                            # Cascade delete should handle deleting AudioTrim objects associated with this annotation
+                            # If not, uncomment and adapt the loop below:
+                            # if annotation_obj.audio_trims:
+                            #     log.info(f"Deleting {len(annotation_obj.audio_trims)} trims for annotation ID {annotation_obj.id}")
+                            #     for trim_to_delete in list(annotation_obj.audio_trims): # Iterate over a copy
+                            #         db.delete(trim_to_delete)
+                            #     annotation_obj.audio_trims = [] # Clear the collection
+                            db.delete(annotation_obj)
                             db.commit()
+                            gr.Info(f"Annotation and associated trims for ID {tts_data_id_to_clear} deleted from DB.")
                         else:
+                            gr.Warning(f"No DB annotation found to delete for ID {tts_data_id_to_clear}.")
                     except Exception as e:
                         db.rollback()
+                        log.error(f"Error deleting annotation from DB for {tts_data_id_to_clear}: {e}") # Removed exc_info=True
+                        gr.Error(f"Failed to delete annotation from database: {e}")
             else:
+                 gr.Error("Cannot clear/delete annotation from DB: Missing TTS ID or User ID.")
+            progress_str = get_user_progress_fn(session)
+            return (items, current_idx, tts_id_val, filename_val, sentence_val,
+                   new_ann_sentence, audio_to_display_after_delete, new_trim_start_sec_ui, new_trim_end_sec_ui,
+                   new_applied_trims_list, new_trims_df_data, audio_update_obj_after_delete, progress_str)
+        # ---- New Trim Callbacks ----
+        def add_trim_and_reprocess_ui_fn(start_s, end_s, current_trims_list, original_audio_data):
+            if start_s is None or end_s is None or not (end_s > start_s and start_s >= 0):
+                gr.Warning("Invalid trim times. Start must be >= 0 and End > Start.")
+                # Return current states without change if trim is invalid, also return original start/end for UI
+                return (current_trims_list, self._convert_trims_to_df_data(current_trims_list),
+                        original_audio_data, gr.update(value=original_audio_data, autoplay=False),
+                        start_s, end_s)
+            new_trim = {'start_sec': float(start_s), 'end_sec': float(end_s)}
+            updated_trims_list = current_trims_list + [new_trim]
+            processed_audio_data, audio_update = self._apply_multiple_trims_fn(original_audio_data, updated_trims_list)
+            # Reset input fields after adding trim
+            ui_trim_start_sec_reset = None # Changed from 0.0
+            ui_trim_end_sec_reset = None # Changed from 0.0
+            return (updated_trims_list, self._convert_trims_to_df_data(updated_trims_list),
+                   processed_audio_data, audio_update,
+                   ui_trim_start_sec_reset, ui_trim_end_sec_reset)
+        def undo_last_trim_and_reprocess_ui_fn(current_trims_list, original_audio_data):
+            if not current_trims_list:
+                gr.Info("No trims to undo.")
+                return (current_trims_list, self._convert_trims_to_df_data(current_trims_list),
+                        original_audio_data, gr.update(value=original_audio_data, autoplay=False))
+            updated_trims_list = current_trims_list[:-1]
+            processed_audio_data, audio_update = self._apply_multiple_trims_fn(original_audio_data, updated_trims_list)
+            return (updated_trims_list, self._convert_trims_to_df_data(updated_trims_list),
+                   processed_audio_data, audio_update)
+        # ---- Callback Wiring ----
+        # outputs_for_display_item: Defines what `show_current_item_fn` and similar full display updates will populate.
+        # It expects 10 values from show_current_item_fn:
+        # (tts_id, filename, sentence, ann_text, audio_placeholder,
+        #  trim_start_sec_ui, trim_end_sec_ui,
+        #  applied_trims_list_state_val, trims_display_val, audio_update_obj)
+        outputs_for_display_item = [
             self.tts_id, self.filename, self.sentence, self.ann_sentence,
+            self.audio, # This will receive the audio data (sr, wav) or None
+            self.trim_start_sec, self.trim_end_sec, # UI fields for new trim
+            self.applied_trims_list_state,
+            self.trims_display,
+            self.audio  # This will receive the gr.update object for autoplay etc.
         ]
         # Initial Load
+        # Chain: Disable UI -> Load Data (items, idx, initial UI values including trims list & df, progress) ->
+        #        Update UI -> Enable UI
+        # Audio is NOT loaded here anymore.
         root_blocks.load(
+            fn=lambda: update_ui_interactive_state(False),
             outputs=self.interactive_ui_elements
         ).then(
+            fn=load_all_items_fn,
             inputs=[session_state],
+            # Outputs: items_state, idx_state, tts_id, filename, sentence, ann_sentence,
+            # audio (None), trim_start_sec, trim_end_sec, applied_trims_list_state,
+            # trims_display, audio (update obj), progress_display
+            outputs=[self.items_state, self.idx_state] + outputs_for_display_item + [self.header.progress_display],
         ).then(
+            # Explicitly set original_audio_state to None and clear audio display as it's not loaded.
+            # show_current_item_fn already sets self.audio to (None, gr.update(value=None, autoplay=False))
+            # We also need to ensure original_audio_state is None if no audio is loaded.
+            lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
+            outputs=[self.original_audio_state, self.audio, self.audio]
         ).then(
+            fn=lambda: update_ui_interactive_state(True),
             outputs=self.interactive_ui_elements
         )
+        # Navigation (Prev/Save & Next/Next No Save)
+        # Audio is NOT loaded here anymore.
+        for btn_widget, direction_str, performs_save in [
+            (self.btn_prev, "prev", False),
+            (self.btn_save_next, "next", True),
+            (self.btn_next_no_save, "next", False)
         ]:
             event_chain = btn_widget.click(
                 fn=lambda: update_ui_interactive_state(False),
                 outputs=self.interactive_ui_elements
             )
+            if performs_save:
                 event_chain = event_chain.then(
                     fn=save_annotation_db_fn,
                     inputs=[
                         self.tts_id, session_state, self.ann_sentence,
+                        self.applied_trims_list_state,
                     ],
+                    outputs=None
+                ).then(
+                    fn=get_user_progress_fn,
+                    inputs=[session_state],
+                    outputs=self.header.progress_display
                 )
+            event_chain = event_chain.then(
+                fn=navigate_idx_fn,
                 inputs=[self.items_state, self.idx_state, gr.State(direction_str)],
                 outputs=self.idx_state,
             ).then(
                 fn=show_current_item_fn,
                 inputs=[self.items_state, self.idx_state, session_state],
+                outputs=outputs_for_display_item,
             ).then(
+                # Explicitly set original_audio_state to None and clear audio display as it's not loaded.
+                lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
+                outputs=[self.original_audio_state, self.audio, self.audio]
             ).then(
+                lambda: gr.update(value=None), # Clear jump input
+                outputs=self.jump_data_id_input
             ).then(
                 fn=lambda: update_ui_interactive_state(True),
                 outputs=self.interactive_ui_elements
             )
+        # Audio is NOT loaded here anymore.
+        self.btn_jump.click(
             fn=lambda: update_ui_interactive_state(False),
             outputs=self.interactive_ui_elements
         ).then(
+            fn=jump_by_data_id_fn,
             inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
+            outputs=self.idx_state
         ).then(
             fn=show_current_item_fn,
             inputs=[self.items_state, self.idx_state, session_state],
+            outputs=outputs_for_display_item
         ).then(
+            # Explicitly set original_audio_state to None and clear audio display as it's not loaded.
+            lambda: (None, gr.update(value=None), gr.update(value=None)), # original_audio_state, audio data, audio component
+            outputs=[self.original_audio_state, self.audio, self.audio]
         ).then(
+            lambda: gr.update(value=None), # Clear jump input
+            outputs=self.jump_data_id_input
         ).then(
             fn=lambda: update_ui_interactive_state(True),
             outputs=self.interactive_ui_elements
         )
+        # Load Audio Button - This is now the ONLY place audio is downloaded and processed.
+        self.btn_load_voice.click(
             fn=lambda: update_ui_interactive_state(False),
             outputs=self.interactive_ui_elements
         ).then(
+            fn=download_voice_fn,
+            inputs=[gr.State(GDRIVE_FOLDER), self.filename, gr.State(True)], # Autoplay TRUE
+            outputs=[self.audio, self.original_audio_state, self.audio],
+        ).then(
+            fn=self._apply_multiple_trims_fn,
+            inputs=[self.original_audio_state, self.applied_trims_list_state],
+            outputs=[self.audio, self.audio]
         ).then(
             fn=lambda: update_ui_interactive_state(True),
             outputs=self.interactive_ui_elements
         )
+        # Copy Sentence Button
+        self.btn_copy_sentence.click(
+            fn=lambda s: s, inputs=self.sentence, outputs=self.ann_sentence
+        )
+        # Trim Button
+        self.btn_trim.click(
+            fn=add_trim_and_reprocess_ui_fn,
+            inputs=[self.trim_start_sec, self.trim_end_sec, self.applied_trims_list_state, self.original_audio_state],
+            outputs=[self.applied_trims_list_state, self.trims_display,
+                     self.audio, self.audio,
+                     self.trim_start_sec, self.trim_end_sec]
+        )
+        # Undo Trim Button
         self.btn_undo_trim.click(
+            fn=undo_last_trim_and_reprocess_ui_fn,
+            inputs=[self.applied_trims_list_state, self.original_audio_state],
+            outputs=[self.applied_trims_list_state, self.trims_display, self.audio, self.audio]
         )
+        # Delete Button
+        outputs_for_delete = [
+            self.items_state, self.idx_state, self.tts_id, self.filename, self.sentence,
+            self.ann_sentence, self.audio, self.trim_start_sec, self.trim_end_sec,
+            self.applied_trims_list_state, self.trims_display, self.audio, self.header.progress_display
+        ]
         self.btn_delete.click(
             fn=lambda: update_ui_interactive_state(False),
             outputs=self.interactive_ui_elements
         ).then(
             fn=delete_db_and_ui_fn,
+            inputs=[self.items_state, self.idx_state, session_state, self.original_audio_state],
+            outputs=outputs_for_delete
+        ).then(
             fn=lambda: update_ui_interactive_state(True),
             outputs=self.interactive_ui_elements
         )
         return self.container
+    def _apply_multiple_trims_fn(self, original_audio_data, trims_list_sec):
+        if not original_audio_data:
+            log.warning("apply_multiple_trims_fn: No original audio data.")
+            return None, gr.update(value=None, autoplay=False)
+        sr, wav_orig = original_audio_data
+        if not trims_list_sec: # No trims to apply
+            log.info("apply_multiple_trims_fn: No trims in list, returning original audio.")
+            return (sr, wav_orig.copy()), gr.update(value=(sr, wav_orig.copy()), autoplay=False)
+        delete_intervals_samples = []
+        for trim_info in trims_list_sec:
+            start_s = trim_info.get('start_sec')
+            end_s = trim_info.get('end_sec')
+            if start_s is not None and end_s is not None and end_s > start_s and start_s >= 0:
+                start_sample = int(sr * start_s)
+                end_sample = int(sr * end_s)
+                start_sample = max(0, min(start_sample, len(wav_orig)))
+                end_sample = max(start_sample, min(end_sample, len(wav_orig)))
+                if start_sample < end_sample:
+                     delete_intervals_samples.append((start_sample, end_sample))
+            else:
+                log.warning(f"apply_multiple_trims_fn: Invalid trim skipped: {trim_info}")
+        if not delete_intervals_samples:
+            log.info("apply_multiple_trims_fn: No valid trims to apply, returning original audio.")
+            return (sr, wav_orig.copy()), gr.update(value=(sr, wav_orig.copy()), autoplay=False)
+        delete_intervals_samples.sort(key=lambda x: x[0])
+        merged_delete_intervals = []
+        if delete_intervals_samples:
+            current_start, current_end = delete_intervals_samples[0]
+            for next_start, next_end in delete_intervals_samples[1:]:
+                if next_start < current_end:
+                    current_end = max(current_end, next_end)
+                else:
+                    merged_delete_intervals.append((current_start, current_end))
+                    current_start, current_end = next_start, next_end
+            merged_delete_intervals.append((current_start, current_end))
+        log.info(f"apply_multiple_trims_fn: Original wav shape: {wav_orig.shape}, Merged delete intervals (samples): {merged_delete_intervals}")
+        kept_parts_wav = []
+        current_pos_samples = 0
+        for del_start, del_end in merged_delete_intervals:
+            if del_start > current_pos_samples:
+                kept_parts_wav.append(wav_orig[current_pos_samples:del_start])
+            current_pos_samples = del_end
+        if current_pos_samples < len(wav_orig):
+            kept_parts_wav.append(wav_orig[current_pos_samples:])
+        if not kept_parts_wav:
+            final_wav = np.array([], dtype=wav_orig.dtype)
+            log.info("apply_multiple_trims_fn: All audio trimmed, resulting in empty audio.")
+        else:
+            final_wav = np.concatenate(kept_parts_wav)
+            log.info(f"apply_multiple_trims_fn: Final wav shape after trimming: {final_wav.shape}")
+        return (sr, final_wav), gr.update(value=(sr, final_wav), autoplay=False)
+    def _convert_trims_to_df_data(self, trims_list_sec):
+        if not trims_list_sec:
+            return None # For gr.DataFrame, None clears it
+        return [[f"{t['start_sec']:.3f}", f"{t['end_sec']:.3f}"] for t in trims_list_sec]

components/header.py CHANGED Viewed

@@ -8,17 +8,25 @@ class Header:
     def __init__(self):
         with gr.Row(variant="panel", elem_classes="header-row") as self.container:
             self.welcome = gr.Markdown()
             self.logout_btn = gr.Button("Log out", scale=0, min_width=90)
     # ---------------- wiring ----------------
     def register_callbacks(self, login_page, dashboard_page, session_state):
         self.logout_btn.click(
-            fn=AuthService.logout,
-            inputs=[session_state],            # ←  حتماً داخل لیست
             outputs=[
-                login_page.container,          # 1
-                dashboard_page.container,      # 2
-                self.welcome,                  # 3
-                login_page.message,            # 4
             ],
         )

     def __init__(self):
         with gr.Row(variant="panel", elem_classes="header-row") as self.container:
             self.welcome = gr.Markdown()
+            self.progress_display = gr.Markdown("")  # New element for progress display
             self.logout_btn = gr.Button("Log out", scale=0, min_width=90)
     # ---------------- wiring ----------------
     def register_callbacks(self, login_page, dashboard_page, session_state):
+        def logout_and_clear_progress_fn(current_session_state):
+            # AuthService.logout is expected to return 4 values for the original outputs
+            logout_outputs = AuthService.logout(current_session_state)
+            # Add an empty string to clear the progress_display
+            return list(logout_outputs) + [""]
         self.logout_btn.click(
+            fn=logout_and_clear_progress_fn,
+            inputs=[session_state],
             outputs=[
+                login_page.container,
+                dashboard_page.container,
+                self.welcome,
+                login_page.message,
+                self.progress_display,  # Cleared on logout
             ],
         )

components/login_page.py CHANGED Viewed

@@ -48,8 +48,6 @@ class LoginPage:
                     dashboard_page.filename,
                     dashboard_page.sentence,
                     dashboard_page.ann_sentence,
-                    # dashboard_page.ann_at,
-                    dashboard_page.validated,
                 ],
             )
             .then(

                     dashboard_page.filename,
                     dashboard_page.sentence,
                     dashboard_page.ann_sentence,
                 ],
             )
             .then(

data/models.py CHANGED Viewed

@@ -119,12 +119,12 @@ class Annotation(Base):
     tts_data = relationship("TTSData", back_populates="annotations")
     annotator = relationship("Annotator", back_populates="annotations")
-    # Relationship to AudioTrim (one-to-one)
-    audio_trim = relationship(
         "AudioTrim",
         back_populates="annotation",
-        uselist=False, # Important for one-to-one
-        cascade="all, delete-orphan" # If annotation is deleted, delete its trim too
     )
@@ -135,13 +135,13 @@ class AudioTrim(Base):
     __tablename__ = "audio_trims"
     id = Column(Integer, primary_key=True)
-    annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=False, unique=True) # Enforce one-to-one
     original_tts_data_id = Column(Integer, ForeignKey("tts_data.id"), nullable=False) # Link to original audio
     start = Column(Float, nullable=False)
     end = Column(Float, nullable=False)
     # Relationship back to Annotation
-    annotation = relationship("Annotation", back_populates="audio_trim")
     original_tts_data = relationship("TTSData") # Optional: if you want to navigate from trim to original TTSData directly
 # --------------------------------------------------------------------------- #

     tts_data = relationship("TTSData", back_populates="annotations")
     annotator = relationship("Annotator", back_populates="annotations")
+    # Relationship to AudioTrim (one-to-MANY)
+    audio_trims = relationship(  # Renamed from audio_trim
         "AudioTrim",
         back_populates="annotation",
+        uselist=True, # Important for one-to-many
+        cascade="all, delete-orphan" # If annotation is deleted, delete its trims too
     )
     __tablename__ = "audio_trims"
     id = Column(Integer, primary_key=True)
+    annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=False) # Removed unique=True
     original_tts_data_id = Column(Integer, ForeignKey("tts_data.id"), nullable=False) # Link to original audio
     start = Column(Float, nullable=False)
     end = Column(Float, nullable=False)
     # Relationship back to Annotation
+    annotation = relationship("Annotation", back_populates="audio_trims") # Renamed from audio_trim
     original_tts_data = relationship("TTSData") # Optional: if you want to navigate from trim to original TTSData directly
 # --------------------------------------------------------------------------- #

data/repository/annotator_workload_repo.py CHANGED Viewed

@@ -17,6 +17,21 @@ class AnnotatorWorkloadRepo:
     def get_tts_data_with_annotations(
         self, annotator_name: str
     ) -> List[Dict[str, Optional[Any]]]:
         """
         output:  [
@@ -24,11 +39,6 @@ class AnnotatorWorkloadRepo:
                   ...
                 ]
         """
-        annotator = self.annotator_repo.get_annotator_by_name(annotator_name)
-        if annotator is None:
-            raise ValueError(f"Annotator '{annotator_name}' not found")
         query = (
             self.db.query(
                 TTSData,
@@ -37,7 +47,7 @@ class AnnotatorWorkloadRepo:
             .join(
                 AnnotationInterval,
                 and_(
-                    AnnotationInterval.annotator_id == annotator.id,
                     TTSData.id >= AnnotationInterval.start_index,
                     TTSData.id <= AnnotationInterval.end_index,
                 ),
@@ -46,13 +56,15 @@ class AnnotatorWorkloadRepo:
                 Annotation,
                 and_(
                     Annotation.tts_data_id == TTSData.id,
-                    Annotation.annotator_id == annotator.id,
                 ),
             )
             .order_by(TTSData.id)
-        ).distinct(TTSData.id)
         rows = [{"tts_data": tts, "annotation": ann} for tts, ann in query.all()]
-        log.info(f"{len(rows)} TTS rows fetched for annotator '{annotator_name}'.")
         return rows

     def get_tts_data_with_annotations(
         self, annotator_name: str
+    ) -> List[Dict[str, Optional[Any]]]:
+        # This method is kept for compatibility if used elsewhere, but
+        # get_tts_data_with_annotations_for_user_id is preferred for new logic.
+        annotator = self.annotator_repo.get_annotator_by_name(annotator_name)
+        if annotator is None:
+            log.warning(
+                f"Annotator '{annotator_name}' not found in get_tts_data_with_annotations. Returning empty list."
+            )
+            return []
+        return self.get_tts_data_with_annotations_for_user_id(
+            annotator.id, annotator_name
+        )
+    def get_tts_data_with_annotations_for_user_id(
+        self, annotator_id: int, annotator_name_for_log: str = "Unknown"
     ) -> List[Dict[str, Optional[Any]]]:
         """
         output:  [
                   ...
                 ]
         """
         query = (
             self.db.query(
                 TTSData,
             .join(
                 AnnotationInterval,
                 and_(
+                    AnnotationInterval.annotator_id == annotator_id,
                     TTSData.id >= AnnotationInterval.start_index,
                     TTSData.id <= AnnotationInterval.end_index,
                 ),
                 Annotation,
                 and_(
                     Annotation.tts_data_id == TTSData.id,
+                    Annotation.annotator_id == annotator_id,
                 ),
             )
             .order_by(TTSData.id)
+        ).distinct(TTSData.id)  # Ensure distinct TTSData items
         rows = [{"tts_data": tts, "annotation": ann} for tts, ann in query.all()]
+        log.info(
+            f"{len(rows)} TTS rows fetched for annotator ID '{annotator_id}' (Name: {annotator_name_for_log})."
+        )
         return rows

scripts/apply_custom_intervals.py ADDED Viewed

	@@ -0,0 +1,90 @@

+\
+# scripts/apply_custom_intervals.py
+import os
+import sys
+# Add project root to Python path
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
+if PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, PROJECT_ROOT)
+from utils.database import get_db
+from data.models import AnnotationInterval # For direct query and deletion
+from data.repository.annotator_repo import AnnotatorRepo
+from data.repository.annotation_interval_repo import AnnotationIntervalRepo
+from utils.logger import Logger
+log = Logger()
+# User-provided data splits
+# Format: 'annotator_name': (start_id_from_json, end_id_from_json)
+ANNOTATOR_SPLITS = {
+    'shahab': (0, 1982),
+    'amir': (1983, 3965),
+    'mohsen': (3966, 5948),
+    'mahya': (5949, 7931),
+    'najmeh': (7932, 9914),
+    'sepehr': (9915, 11897),
+    'zahra': (11898, 13880),
+    'moghim': (13881, 15862),
+    'amin': (15863, 17845)
+}
+def apply_custom_intervals():
+    log.info("Starting application of custom annotator intervals...")
+    try:
+        with get_db() as db:
+            annot_repo = AnnotatorRepo(db)
+            interval_repo = AnnotationIntervalRepo(db)
+            for annotator_name, (start_idx_orig, end_idx_orig) in ANNOTATOR_SPLITS.items():
+                log.info(f"Processing annotator: '{annotator_name}' with original range ({start_idx_orig}, {end_idx_orig})")
+                annotator = annot_repo.get_annotator_by_name(annotator_name)
+                # Adjust start_idx if it's 0, assuming 1-based indexing for TTSData.id in the database.
+                # If TTSData.id can legitimately be 0, this adjustment should be removed.
+                start_idx = 1 if start_idx_orig == 0 else start_idx_orig
+                end_idx = end_idx_orig
+                if start_idx_orig == 0:
+                    log.info(f"Adjusted start_index from 0 to 1 for '{annotator_name}' assuming 1-based TTSData IDs.")
+                if start_idx > end_idx:
+                    log.warning(f"Invalid range for '{annotator_name}': effective start_idx ({start_idx}) > end_idx ({end_idx}). Skipping.")
+                    continue
+                # --- Add this part: Clear existing intervals ---
+                existing_intervals = db.query(AnnotationInterval).filter_by(annotator_id=annotator.id).all()
+                if existing_intervals:
+                    log.info(f"Deleting {len(existing_intervals)} existing intervals for annotator '{annotator.name}'.")
+                    for interval in existing_intervals:
+                        db.delete(interval)
+                    db.flush() # Process deletes before adding new ones
+                # --- End of new part ---
+                # Assign new interval
+                try:
+                    new_interval = interval_repo.assign_interval_to_annotator(
+                        annotator_id=annotator.id,
+                        start_idx=start_idx,
+                        end_idx=end_idx,
+                        allow_overlap=False # This will prevent assignment if it overlaps with others (unless intended)
+                    )
+                    log.info(f"Successfully assigned interval [{new_interval.start_index}, {new_interval.end_index}] to '{annotator_name}'.")
+                except ValueError as e:
+                    log.error(f"Could not assign interval to '{annotator_name}': {e}")
+                except Exception as e:
+                    log.error(f"An unexpected error occurred while assigning interval to '{annotator_name}': {e}", exc_info=True)
+            # db.commit() is handled by the get_db context manager if no exceptions caused a rollback within it.
+            log.info("Custom interval application process completed.")
+    except Exception as e:
+        log.error(f"A critical error occurred during the custom interval application: {e}", exc_info=True)
+        # db.rollback() is handled by get_db context manager on exception
+if __name__ == "__main__":
+    apply_custom_intervals()

scripts/distribute_workload.py DELETED Viewed

@@ -1,170 +0,0 @@
-import sys
-import os
-# Add project root to Python path
-project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-import math
-import random # Added for password generation
-from sqlalchemy.sql import func
-from utils.database import get_db
-from data.models import TTSData
-from data.repository.annotator_repo import AnnotatorRepo
-from data.repository.annotation_interval_repo import AnnotationIntervalRepo
-from utils.logger import Logger
-log = Logger()
-# --- Configuration ---
-# List of annotator names to ensure exist and assign work to
-ANNOTATOR_NAMES = ["shahab", "amir", "mohsen", "mahya", "najmeh", "sepehr", "zahra", "moghim", "amin"]
-# DEFAULT_PASSWORD is no longer used for new users, random password will be generated.
-def generate_random_password():
-    """Generates a random 4-digit numerical password."""
-    return str(random.randint(1000, 9999))
-def distribute_workload():
-    log.info("Starting workload distribution script...")
-    processed_annotators_details = [] # Stores dicts: {'annotator_obj': obj, 'password_display': str, 'assigned_start': int, 'assigned_end': int}
-    try:
-        with get_db() as db:
-            annot_repo = AnnotatorRepo(db)
-            interval_repo = AnnotationIntervalRepo(db)
-            # 1. Ensure all annotators exist, create if not, and collect details
-            log.info("Processing annotators...")
-            for name in ANNOTATOR_NAMES:
-                annotator = annot_repo.get_annotator_by_name(name)
-                password_to_display = "(existing user)"
-                if not annotator:
-                    try:
-                        new_password = generate_random_password()
-                        log.info(f"Annotator '{name}' not found, creating with new password...")
-                        annotator = annot_repo.add_new_annotator(name, new_password)
-                        log.info(f"Annotator '{name}' (id={annotator.id}) created successfully with password '{new_password}'.")
-                        password_to_display = new_password
-                    except ValueError as e:
-                        log.warning(f"Could not create annotator '{name}' (likely already exists or other DB issue): {e}. Attempting to fetch again.")
-                        annotator = annot_repo.get_annotator_by_name(name) # Try fetching again
-                        if annotator:
-                            log.info(f"Found existing annotator '{name}' (id={annotator.id}) after creation attempt.")
-                        else:
-                            log.error(f"Failed to create or find annotator '{name}'. Skipping.")
-                            continue
-                else:
-                    log.info(f"Found existing annotator '{name}' (id={annotator.id}).")
-                if annotator:
-                    processed_annotators_details.append({
-                        'annotator_obj': annotator,
-                        'password_display': password_to_display,
-                        'assigned_start': None,
-                        'assigned_end': None
-                    })
-            if not processed_annotators_details:
-                log.error("No annotators processed or found. Exiting.")
-                return
-            # 2. Get total number of TTSData items
-            total_tts_items = db.query(func.count(TTSData.id)).scalar()
-            if total_tts_items is None or total_tts_items == 0:
-                log.info("No TTSData items found in the database. Nothing to assign.")
-                # Still print annotator info even if no items to assign
-                log.info("\\n--- Workload Distribution Summary ---")
-                for details in processed_annotators_details:
-                    log.info(f"Annotator: {details['annotator_obj'].name}, Assigned Range: N/A (No data items), Password: {details['password_display']}")
-                return
-            log.info(f"Total TTSData items found: {total_tts_items}")
-            # 3. Calculate distribution
-            num_annotators_for_assignment = len(processed_annotators_details)
-            if num_annotators_for_assignment == 0: # Should be caught by earlier check, but as a safeguard
-                log.error("No annotators available for assignment. Exiting.")
-                return
-            items_per_annotator_base = total_tts_items // num_annotators_for_assignment
-            remainder_items = total_tts_items % num_annotators_for_assignment
-            log.info(f"Distributing {total_tts_items} items among {num_annotators_for_assignment} annotators.")
-            log.info(f"Base items per annotator: {items_per_annotator_base}, Remainder: {remainder_items}")
-            # 4. Assign intervals
-            current_start_idx = 1 # Assuming TTSData IDs start from 1
-            for details_dict in processed_annotators_details:
-                annotator = details_dict['annotator_obj']
-                num_items_for_this_annotator = items_per_annotator_base
-                if remainder_items > 0:
-                    num_items_for_this_annotator += 1
-                    remainder_items -= 1
-                if num_items_for_this_annotator == 0:
-                    log.info(f"Annotator '{annotator.name}' assigned 0 items (total items might be less than annotators or workload already distributed).")
-                    continue
-                current_end_idx = current_start_idx + num_items_for_this_annotator - 1
-                if current_end_idx > total_tts_items:
-                    current_end_idx = total_tts_items
-                if current_start_idx > current_end_idx:
-                    log.info(f"No items to assign to '{annotator.name}' (start_idx {current_start_idx} > end_idx {current_end_idx}).")
-                    continue
-                log.info(f"Attempting to assign interval [{current_start_idx}-{current_end_idx}] to '{annotator.name}' (id={annotator.id})")
-                try:
-                    existing_intervals = interval_repo.get_intervals_by_annotator(annotator.id)
-                    if existing_intervals:
-                        log.warning(f"Annotator '{annotator.name}' already has existing intervals. Skipping assignment to avoid conflicts. Manual review/cleanup of old intervals might be needed.")
-                        # current_start_idx = current_end_idx + 1 # This line should not be here if we skip the user for this round of assignment.
-                                                            # The items for this user won't be assigned and won't be passed to the next.
-                                                            # This means the total items might not be fully distributed if users are skipped.
-                                                            # For a full distribution even with skips, a more complex item re-allocation would be needed.
-                                                            # For now, skipped users mean their share is not re-distributed.
-                        continue # Skip this annotator for assignment
-                    assigned_interval = interval_repo.assign_interval_to_annotator(
-                        annotator_id=annotator.id,
-                        start_idx=current_start_idx,
-                        end_idx=current_end_idx,
-                        allow_overlap=False
-                    )
-                    details_dict['assigned_start'] = assigned_interval.start_index
-                    details_dict['assigned_end'] = assigned_interval.end_index
-                    log.info(
-                        f"Successfully assigned interval [{details_dict['assigned_start']}-{details_dict['assigned_end']}] "
-                        f"to '{annotator.name}' (id={annotator.id})"
-                    )
-                except ValueError as e:
-                    log.error(f"Could not assign interval [{current_start_idx}-{current_end_idx}] to '{annotator.name}': {e}")
-                except Exception as e:
-                    log.error(f"An unexpected error occurred while assigning interval to '{annotator.name}': {e}")
-                # Only advance current_start_idx if items were potentially assignable to *this* annotator
-                # If an annotator was skipped due to existing intervals, their share of items is not processed further in this loop.
-                current_start_idx = current_end_idx + 1
-                if current_start_idx > total_tts_items:
-                    break
-            # 5. Print summary
-            log.info("\\n--- Workload Distribution Summary ---")
-            for details in processed_annotators_details:
-                range_str = "N/A (assignment skipped or failed)"
-                if details['assigned_start'] is not None and details['assigned_end'] is not None:
-                    range_str = f"[{details['assigned_start']}-{details['assigned_end']}]"
-                log.info(f"Annotator: {details['annotator_obj'].name}, Assigned Range: {range_str}, Password: {details['password_display']}")
-            log.info("Workload distribution script finished.")
-    except Exception as e:
-        log.error(f"An critical error occurred during workload distribution: {e}", exc_info=True)
-if __name__ == "__main__":
-    distribute_workload()

scripts/import_annotations_from_json.py ADDED Viewed

	@@ -0,0 +1,306 @@

+\
+import json
+import os
+import sys
+from datetime import datetime
+# Adjust path to import project modules
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+PROJECT_ROOT = os.path.dirname(SCRIPT_DIR) # e.g. /home/psyborg/Desktop/tts_labeling
+# Ensure the project root is at the beginning of sys.path
+if PROJECT_ROOT in sys.path and sys.path[0] != PROJECT_ROOT:
+    sys.path.remove(PROJECT_ROOT) # Remove if it exists but not at index 0
+if PROJECT_ROOT not in sys.path: # Add if it doesn't exist at all (it will be added at index 0)
+    sys.path.insert(0, PROJECT_ROOT)
+from utils.database import get_db, SessionLocal # Changed Session to SessionLocal
+from sqlalchemy.orm import Session as SQLAlchemySession # Import Session for type hinting
+from data.models import TTSData, Annotator, Annotation, AudioTrim, AnnotationInterval # Added AnnotationInterval
+from utils.logger import Logger
+log = Logger()
+ANNOTATIONS_FILE_PATH = os.path.join(PROJECT_ROOT, "annotations.json")
+BATCH_SIZE = 100  # Define batch size for commits
+def import_annotations(db: SQLAlchemySession, data: dict): # Changed SessionLocal to SQLAlchemySession for type hint
+    samples = data.get("samples", [])
+    imported_count = 0
+    updated_count = 0
+    skipped_count = 0
+    samples_processed_in_batch = 0
+    # Caches to potentially reduce DB lookups within the script run
+    tts_data_cache = {}
+    annotator_cache = {}
+    annotation_ids_for_trim_deletion_in_batch = [] # For batch deletion of trims
+    # Create a mapping from JSON ID to sample data for efficient lookup
+    samples_by_id = {s.get("id"): s for s in samples if s.get("id") is not None}
+    log.info(f"Created a map for {len(samples_by_id)} samples based on their JSON IDs.")
+    # Load all annotator intervals from the database
+    db_intervals = db.query(AnnotationInterval).all()
+    annotator_intervals = {interval.annotator_id: (interval.start_index, interval.end_index) for interval in db_intervals}
+    log.info(f"Loaded {len(annotator_intervals)} annotator intervals from the database.")
+    for sample_idx, sample_data in enumerate(samples): # Renamed sample to sample_data for clarity
+        current_sample_json_id = sample_data.get("id")
+        if current_sample_json_id is None: # Check for None explicitly
+            log.warning("Sample missing ID, skipping.")
+            skipped_count += 1
+            continue
+        # Assuming TTSData.id in DB matches JSON 'id' for lookup,
+        # but interval checks use an adjusted ID.
+        # The effective ID for checking against DB intervals (which are potentially 1-based for JSON's 0).
+        effective_id_for_interval_check = current_sample_json_id + 1
+        # Check if TTSData entry exists
+        if current_sample_json_id in tts_data_cache:
+            tts_data_entry = tts_data_cache[current_sample_json_id]
+        else:
+            # Query TTSData using the direct ID from JSON
+            tts_data_entry = db.query(TTSData).filter_by(id=current_sample_json_id).first()
+            if tts_data_entry:
+                tts_data_cache[current_sample_json_id] = tts_data_entry
+        if not tts_data_entry:
+            log.warning(f"TTSData with JSON ID {current_sample_json_id} not found in database, skipping sample.")
+            skipped_count += 1
+            continue
+        # Use the tts_data_entry.id for foreign keys, which should be the same as current_sample_json_id
+        db_tts_data_id = tts_data_entry.id
+        json_annotations = sample_data.get("annotations", [])
+        if not json_annotations:
+            continue
+        objects_to_add_this_sample = []
+        for json_ann in json_annotations:
+            json_annotator_name = json_ann.get("annotator")
+            # Determine the final_annotated_sentence based on the N+1 rule.
+            # Rule: Use original_subtitle from the (logical) next sample (N+1).
+            # Fallback 1: If N+1 doesn't exist, or its original_subtitle is None,
+            #             use annotated_subtitle from the current sample's current annotation (json_ann).
+            # Fallback 2: If that's also None, use original_subtitle from the current sample (sample_data, top-level).
+            # Fallback 3: If all else fails, use an empty string.
+            sentence_to_use = None
+            used_n_plus_1 = False
+            logical_next_sample_json_id = current_sample_json_id - 1
+            next_sample_data_for_sentence = samples_by_id.get(logical_next_sample_json_id)
+            if next_sample_data_for_sentence:
+                sentence_from_n_plus_1 = next_sample_data_for_sentence.get("original_subtitle")
+                if sentence_from_n_plus_1 is not None:
+                    sentence_to_use = sentence_from_n_plus_1
+                    used_n_plus_1 = True
+                    # log.debug(f"For sample {current_sample_json_id}, using original_subtitle from next sample {logical_next_sample_json_id}.")
+                # else: N+1 exists but its original_subtitle is None. Fall through.
+            # else: N+1 does not exist. Fall through.
+            if not used_n_plus_1:
+                # log.debug(f"For sample {current_sample_json_id}, N+1 rule not applied. Using current sample's subtitles.")
+                sentence_to_use = json_ann.get("annotated_subtitle") # Primary fallback from current annotation
+                if sentence_to_use is None:
+                    # Secondary fallback to the top-level original_subtitle of the current sample
+                    sentence_to_use = sample_data.get("original_subtitle")
+                    # log.debug(f"For sample {current_sample_json_id}, json_ann.annotated_subtitle is None, falling back to sample_data.original_subtitle.")
+            final_annotated_sentence = sentence_to_use if sentence_to_use is not None else ""
+            if not json_annotator_name:
+                log.warning(f"Annotation for TTSData JSON ID {current_sample_json_id} missing annotator name, skipping.")
+                skipped_count +=1
+                continue
+            # Get initial annotator details from JSON
+            initial_annotator_entry = annotator_cache.get(json_annotator_name)
+            if not initial_annotator_entry:
+                initial_annotator_entry = db.query(Annotator).filter_by(name=json_annotator_name).first()
+                if not initial_annotator_entry:
+                    log.warning(f"Annotator '{json_annotator_name}' (from JSON) not found in DB for TTSData JSON ID {current_sample_json_id}. Skipping this annotation.")
+                    skipped_count += 1
+                    continue
+                annotator_cache[json_annotator_name] = initial_annotator_entry
+            initial_annotator_id = initial_annotator_entry.id
+            # These will be the annotator details used for saving the annotation.
+            # They start as the initial annotator and may be reassigned.
+            save_annotator_id = initial_annotator_id
+            save_annotator_name = json_annotator_name # For logging
+            initial_annotator_interval = annotator_intervals.get(initial_annotator_id)
+            is_within_initial_interval = False
+            if initial_annotator_interval:
+                db_start_index, db_end_index = initial_annotator_interval
+                if db_start_index is not None and db_end_index is not None and \
+                   db_start_index <= effective_id_for_interval_check <= db_end_index:
+                    is_within_initial_interval = True
+            if not is_within_initial_interval:
+                log_message_prefix = f"TTSData JSON ID {current_sample_json_id} (effective: {effective_id_for_interval_check})"
+                if initial_annotator_interval:
+                    log.warning(f"{log_message_prefix} is outside interval [{initial_annotator_interval[0]}, {initial_annotator_interval[1]}] for annotator '{json_annotator_name}'. Attempting to reassign.")
+                else:
+                    log.warning(f"{log_message_prefix}: Annotator '{json_annotator_name}' (ID: {initial_annotator_id}) has no defined interval. Attempting to reassign to an interval owner.")
+                reassigned_successfully = False
+                for potential_owner_id, (owner_start, owner_end) in annotator_intervals.items():
+                    if owner_start is not None and owner_end is not None and \
+                       owner_start <= effective_id_for_interval_check <= owner_end:
+                        save_annotator_id = potential_owner_id
+                        reassigned_annotator_db_entry = db.query(Annotator).filter_by(id=save_annotator_id).first()
+                        if reassigned_annotator_db_entry:
+                            save_annotator_name = reassigned_annotator_db_entry.name
+                            if save_annotator_name not in annotator_cache:
+                                annotator_cache[save_annotator_name] = reassigned_annotator_db_entry
+                        else:
+                            save_annotator_name = f"ID:{save_annotator_id}"
+                            log.error(f"Critical: Could not find Annotator DB entry for reassigned ID {save_annotator_id}, though an interval exists. Check data integrity.")
+                        log.info(f"Reassigning annotation for {log_message_prefix} from '{json_annotator_name}' to '{save_annotator_name}' (ID: {save_annotator_id}) as they own the interval.")
+                        reassigned_successfully = True
+                        break
+                if not reassigned_successfully:
+                    log.error(f"No annotator found with an interval covering {log_message_prefix}. Skipping this annotation by '{json_annotator_name}'.")
+                    skipped_count += 1
+                    continue
+            annotator_id = save_annotator_id
+            current_annotator_name_for_logs = save_annotator_name
+            annotated_at_str = json_ann.get("update_at") or json_ann.get("create_at")
+            annotated_at_dt = None
+            if annotated_at_str:
+                try:
+                    annotated_at_dt = datetime.fromisoformat(annotated_at_str.replace('Z', '+00:00'))
+                except ValueError:
+                    try:
+                        annotated_at_dt = datetime.strptime(annotated_at_str.split('.')[0], "%Y-%m-%dT%H:%M:%S")
+                    except ValueError as e_parse:
+                        log.error(f"Could not parse timestamp '{annotated_at_str}' for TTSData JSON ID {current_sample_json_id}, annotator {current_annotator_name_for_logs}: {e_parse}")
+            final_annotated_at = annotated_at_dt
+            # Previous N+1 logic and interval checks that led to skipping are removed/replaced by the above.
+            annotation_obj = db.query(Annotation).filter_by(
+                tts_data_id=db_tts_data_id,
+                annotator_id=annotator_id
+            ).first()
+            if annotation_obj:
+                annotation_obj.annotated_sentence = final_annotated_sentence
+                annotation_obj.annotated_at = final_annotated_at
+                updated_count +=1
+            else:
+                annotation_obj = Annotation(
+                    tts_data_id=db_tts_data_id,
+                    annotator_id=annotator_id,
+                    annotated_sentence=final_annotated_sentence,
+                    annotated_at=final_annotated_at
+                )
+                db.add(annotation_obj)
+                try:
+                    db.flush()
+                    imported_count +=1
+                except Exception as e_flush:
+                    log.error(f"Error flushing new annotation for TTSData JSON ID {current_sample_json_id}, Annotator {current_annotator_name_for_logs}: {e_flush}")
+                    db.rollback()
+                    skipped_count +=1
+                    continue
+            if annotation_obj.id:
+                if annotation_obj.id not in annotation_ids_for_trim_deletion_in_batch:
+                    annotation_ids_for_trim_deletion_in_batch.append(annotation_obj.id)
+                json_audio_trims = json_ann.get("audio_trims", [])
+                if json_audio_trims:
+                    # log.info(f"Preparing to add {len(json_audio_trims)} new trims for Annotation ID {annotation_obj.id}.")
+                    for trim_info in json_audio_trims:
+                        start_sec = trim_info.get("start")
+                        end_sec = trim_info.get("end")
+                        if start_sec is not None and end_sec is not None:
+                            try:
+                                start_ms = int(float(start_sec) * 1000.0)
+                                end_ms = int(float(end_sec) * 1000.0)
+                                if start_ms < 0 or end_ms < 0 or end_ms < start_ms:
+                                    log.warning(f"Invalid trim values (start_ms={start_ms}, end_ms={end_ms}) for annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}. Skipping.")
+                                    continue
+                                new_trim_db_obj = AudioTrim(
+                                    annotation_id=annotation_obj.id,
+                                    original_tts_data_id=db_tts_data_id,
+                                    start=start_ms,
+                                    end=end_ms
+                                )
+                                objects_to_add_this_sample.append(new_trim_db_obj)
+                            except ValueError:
+                                log.warning(f"Invalid start/end format in audio trim for annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}. Skipping: {trim_info}")
+                                continue
+                        else:
+                            log.warning(f"Skipping trim with missing start/end for Annotation ID {annotation_obj.id}, TTSData JSON ID {current_sample_json_id}: {trim_info}")
+            else:
+                log.warning(f"Annotation ID not available for TTSData JSON ID {current_sample_json_id}, Annotator {current_annotator_name_for_logs}. Cannot process audio trims.")
+        if objects_to_add_this_sample:
+            db.add_all(objects_to_add_this_sample)
+        samples_processed_in_batch += 1
+        if samples_processed_in_batch >= BATCH_SIZE or (sample_idx == len(samples) - 1):
+            if annotation_ids_for_trim_deletion_in_batch:
+                log.info(f"Batch deleting trims for {len(annotation_ids_for_trim_deletion_in_batch)} annotations in current batch.")
+                db.query(AudioTrim).filter(AudioTrim.annotation_id.in_(annotation_ids_for_trim_deletion_in_batch)).delete(synchronize_session=False)
+                annotation_ids_for_trim_deletion_in_batch.clear()
+            try:
+                db.commit()
+                log.info(f"Committed batch. Total samples processed so far: {sample_idx + 1} out of {len(samples)}")
+            except Exception as e_commit:
+                db.rollback()
+                log.error(f"Failed to commit batch after sample index {sample_idx} (TTSData JSON ID {current_sample_json_id}): {e_commit}. Rolling back this batch.")
+                annotation_ids_for_trim_deletion_in_batch.clear()
+            finally:
+                 samples_processed_in_batch = 0 # Reset for next batch or end
+    log.info(f"Finished import attempt. Final counts - New: {imported_count}, Updated: {updated_count}, Skipped: {skipped_count}")
+def main():
+    log.info("Starting annotation import script...")
+    if not os.path.exists(ANNOTATIONS_FILE_PATH):
+        log.error(f"Annotations file not found at: {ANNOTATIONS_FILE_PATH}")
+        return
+    try:
+        with open(ANNOTATIONS_FILE_PATH, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+    except json.JSONDecodeError as e:
+        log.error(f"Error decoding JSON from {ANNOTATIONS_FILE_PATH}: {e}")
+        return
+    except Exception as e:
+        log.error(f"Error reading file {ANNOTATIONS_FILE_PATH}: {e}")
+        return
+    try:
+        with get_db() as db_session:
+            import_annotations(db_session, data)
+    except Exception as e:
+        log.error(f"An error occurred during the import process: {e}")
+    finally:
+        log.info("Annotation import script finished.")
+if __name__ == "__main__":
+    main()

utils/auth.py CHANGED Viewed

@@ -35,38 +35,36 @@ class AuthService:
             annotator = repo.get_annotator_by_name(username)
             # ⬇️ توابع کمکی برای تولید خروجی خالی (درصورت خطا)
-            def empty_dashboard_outputs():
                 return (
                     [],  # items_state
                     0,  # idx_state
-                    "",
-                    "",
-                    "",
-                    "",
-                    "",
-                    False,  # شش فیلد
                 )
             # --- کاربر موجود نیست / غیر فعال
             if annotator is None or not annotator.is_active:
                 log.warning("Failed login (not found / inactive)")
                 return (
-                    "❌ Wrong username or password!",
-                    gr.update(),
-                    gr.update(visible=False),
-                    gr.update(value=""),
-                    *empty_dashboard_outputs(),
                 )
             # --- رمز عبور اشتباه
             if not verify_password(password, annotator.password):
                 log.warning("Failed login (bad password)")
                 return (
-                    "❌ Wrong username or password!",
-                    gr.update(),
-                    gr.update(visible=False),
-                    gr.update(value=""),
-                    *empty_dashboard_outputs(),
                 )
             # ---------- ورود موفق ---------- #
@@ -106,16 +104,16 @@ class AuthService:
             # مقداردهی فیلدهای رکورد اول (یا مقادیر تهی)
             if dashboard_items:
                 first = dashboard_items[0]
-                first_vals = (
                     first["id"],
                     first["filename"],
                     first["sentence"],
                     first["annotated_sentence"],
-                    first["annotated_at"],
-                    first["validated"],
                 )
             else:
-                first_vals = ("", "", "", "", "", False)
             log.info(f"User '{username}' logged in successfully.")
@@ -127,7 +125,7 @@ class AuthService:
                 gr.update(value=f"👋 Welcome, {annotator.name}!"),  # 3
                 dashboard_items,  # 4: items_state
                 0,  # 5: idx_state
-                *first_vals,  # 6-11: شش فیلد نخست
             )
     # ───────────── LOGOUT ───────────── #

             annotator = repo.get_annotator_by_name(username)
             # ⬇️ توابع کمکی برای تولید خروجی خالی (درصورت خطا)
+            def empty_dashboard_outputs_for_ui():  # Renamed and adjusted for UI outputs
                 return (
                     [],  # items_state
                     0,  # idx_state
+                    "",  # tts_id
+                    "",  # filename
+                    "",  # sentence
+                    "",  # ann_sentence
                 )
             # --- کاربر موجود نیست / غیر فعال
             if annotator is None or not annotator.is_active:
                 log.warning("Failed login (not found / inactive)")
                 return (
+                    "❌ Wrong username or password!",  # message
+                    gr.update(),  # login_container (no change)
+                    gr.update(visible=False),  # dashboard_container
+                    gr.update(value=""),  # header_welcome
+                    *empty_dashboard_outputs_for_ui(),  # items_state, idx_state, and 4 UI textboxes
                 )
             # --- رمز عبور اشتباه
             if not verify_password(password, annotator.password):
                 log.warning("Failed login (bad password)")
                 return (
+                    "❌ Wrong username or password!",  # message
+                    gr.update(),  # login_container (no change)
+                    gr.update(visible=False),  # dashboard_container
+                    gr.update(value=""),  # header_welcome
+                    *empty_dashboard_outputs_for_ui(),  # items_state, idx_state, and 4 UI textboxes
                 )
             # ---------- ورود موفق ---------- #
             # مقداردهی فیلدهای رکورد اول (یا مقادیر تهی)
             if dashboard_items:
                 first = dashboard_items[0]
+                # Only take the first 4 values needed for the 4 textboxes
+                # tts_id, filename, sentence, ann_sentence
+                first_vals_for_ui = (
                     first["id"],
                     first["filename"],
                     first["sentence"],
                     first["annotated_sentence"],
                 )
             else:
+                first_vals_for_ui = ("", "", "", "")
             log.info(f"User '{username}' logged in successfully.")
                 gr.update(value=f"👋 Welcome, {annotator.name}!"),  # 3
                 dashboard_items,  # 4: items_state
                 0,  # 5: idx_state
+                *first_vals_for_ui,  # 6-9: چهار فیلد نخست برای UI
             )
     # ───────────── LOGOUT ───────────── #

utils/database.py CHANGED Viewed

@@ -3,6 +3,15 @@
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
 from contextlib import contextmanager
 from config import conf
 from utils.logger import Logger

 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
 from contextlib import contextmanager
+import sys  # Add sys import
+import os  # Add os import
+# Add project root to Python path to ensure local modules are prioritized
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
+if PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, PROJECT_ROOT)
 from config import conf
 from utils.logger import Logger