Spaces:

rshakked
/

safe-talk

Running

rshakked commited on May 13

Commit

ac6514b

1 Parent(s): c71da37

feat: add live training log streaming using TrainerCallback and background thread

- Added GradioLoggerCallback to capture trainer log events
- Ran trainer.train() in a background thread to avoid UI blocking
- Streamed logs from queue during training to Gradio UI using yield
- Replaced simulated progress loop with real-time progress and log updates
- Fixes issue where UI showed only progress bar and froze at 99%

Files changed (1) hide show

train_abuse_model.py +33 -19

train_abuse_model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # # Install core packages
 # !pip install -U transformers datasets accelerate
 import logging
 import io
 import os
@@ -256,28 +256,42 @@ def run_training(progress=gr.Progress(track_tqdm=True)):
         )
         logger.info("Training started with %d samples", len(train_dataset))
-        yield "🔄 Training in progress...\n"
-        total_steps = len(train_dataset) * training_args.num_train_epochs // training_args.per_device_train_batch_size
-        intervals = max(total_steps // 20, 1)
-        for i in range(0, total_steps, intervals):
-            time.sleep(0.5)
-            percent = int(100 * i / total_steps)
-            progress(percent / 100)
-            yield f"⏳ Progress: {percent}%\n"
-        # # This checks if any tensor is on GPU too early.
-        # logger.info("🧪 Sample device check from train_dataset:")
-        # sample = train_dataset[0]
-        # for k, v in sample.items():
-        #     logger.info(f"{k}: {v.device}")
         # Start training!
         trainer.train()
-        # Drain queue to UI
-        while not log_queue.empty():
-            yield log_queue.get()
         progress(1.0)
         yield "✅ Progress: 100%\n"

 # # Install core packages
 # !pip install -U transformers datasets accelerate
+import threading
 import logging
 import io
 import os
         )
         logger.info("Training started with %d samples", len(train_dataset))
+        yield "🔄 Training started...\n"
+        progress(0.01)
+        # Run training in background thread
+        trainer_training = [True]
+        def background_train():
+            trainer.train()
+            trainer_training[0] = False  # Mark as done
+        train_thread = threading.Thread(target=background_train)
+        train_thread.start()
+        # Drain log queue live while training runs
+        percent = 0
+        while train_thread.is_alive() or not log_queue.empty():
+            while not log_queue.empty():
+                log_msg = log_queue.get()
+                yield log_msg
+            # Optional: update progress bar slowly toward 1.0
+            if percent < 98:
+                percent += 1
+                progress(percent / 100)
+            time.sleep(1)
+        progress(1.0)
+        yield "✅ Progress: 100%\n"
         # Start training!
         trainer.train()
+        # # Drain queue to UI
+        # while not log_queue.empty():
+        #     yield log_queue.get()
         progress(1.0)
         yield "✅ Progress: 100%\n"