Spaces:

rshakked
/

safe-talk

Running

rshakked commited on May 13

Commit

2032430

1 Parent(s): fedc8f2

feat(app): run training on button click and display logs after completion

- moved training into a callable function
- added logging to both file and in-memory buffer
- updated Gradio interface to safely trigger training and show logs

Files changed (2) hide show

app.py +10 -25
train_abuse_model.py +107 -80

app.py CHANGED Viewed

@@ -1,29 +1,14 @@
 import gradio as gr
-import subprocess
-def run_training():
-    try:
-        process = subprocess.Popen(
-            ["python", "train_abuse_model.py"],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True
-        )
-        output_lines = []
-        for line in process.stdout:
-            output_lines.append(line)
-            yield "".join(output_lines)
-    except Exception as e:
-        yield f"Exception occurred:\n{str(e)}"
-demo = gr.Interface(
-    fn=run_training,
-    inputs=[],
-    outputs=gr.Textbox(lines=25, label="Training Logs"),
-    title="Run Model Training",
-    description="Click the button to start training and see live logs below."
-)
-demo.launch()

 import gradio as gr
+from train_abuse_model import run_training
+with gr.Blocks() as demo:
+    gr.Markdown("## 🚀 Fine-tune DeBERTa on abuse dataset")
+    with gr.Row():
+        start_btn = gr.Button("🚀 Start Training")
+    output_box = gr.Textbox(label="Training Logs", lines=25)
+    start_btn.click(fn=run_training, outputs=output_box)
+if __name__ == "__main__":
+    demo.launch()

train_abuse_model.py CHANGED Viewed

@@ -1,5 +1,8 @@
 # # Install core packages
 # !pip install -U transformers datasets accelerate
 import os
 # Python standard + ML packages
@@ -25,14 +28,28 @@ from transformers import (
     Trainer,
     TrainingArguments
 )
 # Check versions
-print("Transformers version:", transformers.__version__)
 # Check for GPU availability
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print("torch.cuda.is_available():", torch.cuda.is_available())
-print("Using device:", device)
-print("PyTorch version:", torch.__version__)
 # Custom Dataset class
@@ -101,7 +118,7 @@ def tune_thresholds(probs, true_labels, verbose=True):
                 zero_division=0
             )
             if verbose:
-                print(f"low={low:.2f}, high={high:.2f} -> macro F1={f1:.3f}")
             if f1 > best_macro_f1:
                 best_macro_f1 = f1
                 best_low, best_high = low, high
@@ -110,22 +127,22 @@ def tune_thresholds(probs, true_labels, verbose=True):
 def evaluate_model_with_thresholds(trainer, test_dataset):
     """Run full evaluation with automatic threshold tuning."""
-    print("\n🔍 Running model predictions...")
     predictions = trainer.predict(test_dataset)
     probs = torch.sigmoid(torch.tensor(predictions.predictions)).numpy()
     true_soft = np.array(predictions.label_ids)
-    print("\n🔎 Tuning thresholds...")
     best_low, best_high, best_f1 = tune_thresholds(probs, true_soft)
-    print(f"\n✅ Best thresholds: low={best_low:.2f}, high={best_high:.2f} (macro F1={best_f1:.3f})")
     final_pred_soft = map_to_3_classes(probs, best_low, best_high)
     final_pred_str = convert_to_label_strings(final_pred_soft)
     true_str = convert_to_label_strings(true_soft)
-    print("\n📊 Final Evaluation Report (multi-class per label):\n")
-    print(classification_report(
         true_str,
         final_pred_str,
         labels=["no", "plausibly", "yes"],
@@ -163,37 +180,16 @@ label_columns = [
     'access_to_weapons', 'gaslighting'
 ]
-print(np.shape(df))
 # Clean data
 df = df[[text_column] + label_columns]
-print(np.shape(df))
 df = df.dropna(subset=[text_column])
-print(np.shape(df))
 df["label_vector"] = df.apply(label_row_soft, axis=1)
 label_matrix = df["label_vector"].tolist()
-#model_name = "onlplab/alephbert-base"
-model_name = "microsoft/deberta-v3-base"
-# Load pretrained model for fine-tuning
-tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(
-    model_name,
-    num_labels=len(label_columns),
-    problem_type="multi_label_classification"
-).to(device)  # Move model to GPU
-# gradient checkpointing helps cut memory use:
-model.gradient_checkpointing_enable()
-# Freeze bottom 6 layers of DeBERTa encoder
-for name, param in model.named_parameters():
-    if any(f"encoder.layer.{i}." in name for i in range(0, 6)):
-        param.requires_grad = False
 # Proper 3-way split: train / val / test
 train_val_texts, test_texts, train_val_labels, test_labels = train_test_split(
     df[text_column].tolist(), label_matrix, test_size=0.2, random_state=42
@@ -203,51 +199,82 @@ train_texts, val_texts, train_labels, val_labels = train_test_split(
     train_val_texts, train_val_labels, test_size=0.1, random_state=42
 )
-train_dataset = AbuseDataset(train_texts, train_labels)
-val_dataset = AbuseDataset(val_texts, val_labels)
-test_dataset = AbuseDataset(test_texts, test_labels)
-# TrainingArguments for HuggingFace Trainer (logging, saving)
-training_args = TrainingArguments(
-    output_dir="./results",
-    num_train_epochs=3,
-    per_device_train_batch_size=4,
-    per_device_eval_batch_size=4,
-    evaluation_strategy="epoch",
-    save_strategy="epoch",
-    logging_dir="./logs",
-    logging_steps=100,
-)
-# Train using HuggingFace Trainer
-trainer = Trainer(
-    model=model,
-    args=training_args,
-    train_dataset=train_dataset,
-    eval_dataset=val_dataset
-)
-# This checks if any tensor is on GPU too early.
-print("🧪 Sample device check from train_dataset:")
-sample = train_dataset[0]
-for k, v in sample.items():
-    print(f"{k}: {v.device}")
-# Start training!
-trainer.train()
-# Save the model and tokenizer
-if not os.path.exists("saved_model/"):
-    os.makedirs("saved_model/")
-model.save_pretrained("saved_model/")
-tokenizer.save_pretrained("saved_model/")
-# Evaluation
-try:
-    label_map = {0.0: "no", 0.5: "plausibly", 1.0: "yes"}
-    evaluate_model_with_thresholds(trainer, test_dataset)
-except Exception as e:
-    print(f"Evaluation failed: {e}")

 # # Install core packages
 # !pip install -U transformers datasets accelerate
+import logging
+import io
 import os
 # Python standard + ML packages
     Trainer,
     TrainingArguments
 )
+# configure logging
+log_buffer = io.StringIO()
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[
+        logging.FileHandler("training.log"),       # to file
+        logging.StreamHandler(log_buffer)          # to in-memory buffer
+    ]
+)
+logger = logging.getLogger(__name__)
 # Check versions
+logger.info("Transformers version:", transformers.__version__)
 # Check for GPU availability
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+logger.info("torch.cuda.is_available():", torch.cuda.is_available())
+logger.info("Using device:", device)
+logger.info("PyTorch version:", torch.__version__)
 # Custom Dataset class
                 zero_division=0
             )
             if verbose:
+                logger.info(f"low={low:.2f}, high={high:.2f} -> macro F1={f1:.3f}")
             if f1 > best_macro_f1:
                 best_macro_f1 = f1
                 best_low, best_high = low, high
 def evaluate_model_with_thresholds(trainer, test_dataset):
     """Run full evaluation with automatic threshold tuning."""
+    logger.info("\n🔍 Running model predictions...")
     predictions = trainer.predict(test_dataset)
     probs = torch.sigmoid(torch.tensor(predictions.predictions)).numpy()
     true_soft = np.array(predictions.label_ids)
+    logger.info("\n🔎 Tuning thresholds...")
     best_low, best_high, best_f1 = tune_thresholds(probs, true_soft)
+    logger.info(f"\n✅ Best thresholds: low={best_low:.2f}, high={best_high:.2f} (macro F1={best_f1:.3f})")
     final_pred_soft = map_to_3_classes(probs, best_low, best_high)
     final_pred_str = convert_to_label_strings(final_pred_soft)
     true_str = convert_to_label_strings(true_soft)
+    logger.info("\n📊 Final Evaluation Report (multi-class per label):\n")
+    logger.info(classification_report(
         true_str,
         final_pred_str,
         labels=["no", "plausibly", "yes"],
     'access_to_weapons', 'gaslighting'
 ]
+logger.info(np.shape(df))
 # Clean data
 df = df[[text_column] + label_columns]
+logger.info(np.shape(df))
 df = df.dropna(subset=[text_column])
+logger.info(np.shape(df))
 df["label_vector"] = df.apply(label_row_soft, axis=1)
 label_matrix = df["label_vector"].tolist()
 # Proper 3-way split: train / val / test
 train_val_texts, test_texts, train_val_labels, test_labels = train_test_split(
     df[text_column].tolist(), label_matrix, test_size=0.2, random_state=42
     train_val_texts, train_val_labels, test_size=0.1, random_state=42
 )
+#model_name = "onlplab/alephbert-base"
+model_name = "microsoft/deberta-v3-base"
+def run_training():
+    try:
+        logger.info("Starting training run...")
+        # Load pretrained model for fine-tuning
+        tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
+        model = AutoModelForSequenceClassification.from_pretrained(
+            model_name,
+            num_labels=len(label_columns),
+            problem_type="multi_label_classification"
+        ).to(device)  # Move model to GPU
+        # gradient checkpointing helps cut memory use:
+        model.gradient_checkpointing_enable()
+        # Freeze bottom 6 layers of DeBERTa encoder
+        for name, param in model.named_parameters():
+            if any(f"encoder.layer.{i}." in name for i in range(0, 6)):
+                param.requires_grad = False
+        train_dataset = AbuseDataset(train_texts, train_labels)
+        val_dataset = AbuseDataset(val_texts, val_labels)
+        test_dataset = AbuseDataset(test_texts, test_labels)
+        # TrainingArguments for HuggingFace Trainer (logging, saving)
+        training_args = TrainingArguments(
+            output_dir="./results",
+            num_train_epochs=3,
+            per_device_train_batch_size=4,
+            per_device_eval_batch_size=4,
+            evaluation_strategy="epoch",
+            save_strategy="epoch",
+            logging_dir="./logs",
+            logging_steps=500,
+            disable_tqdm=True
+        )
+        # Train using HuggingFace Trainer
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=train_dataset,
+            eval_dataset=val_dataset
+        )
+        # This checks if any tensor is on GPU too early.
+        logger.info("🧪 Sample device check from train_dataset:")
+        sample = train_dataset[0]
+        for k, v in sample.items():
+            logger.info(f"{k}: {v.device}")
+        # Start training!
+        trainer.train()
+        # Save the model and tokenizer
+        if not os.path.exists("saved_model/"):
+            os.makedirs("saved_model/")
+        model.save_pretrained("saved_model/")
+        tokenizer.save_pretrained("saved_model/")
+        logger.info(" Training completed and model saved.")
+    except Exception as e:
+        logger.exception( f"❌ Training failed: {e}")
+    # Evaluation
+    try:
+        label_map = {0.0: "no", 0.5: "plausibly", 1.0: "yes"}
+        evaluate_model_with_thresholds(trainer, test_dataset)
+        logger.info("Evaluation completed")
+    except Exception as e:
+        logger.exception(f"Evaluation failed: {e}")
+    log_buffer.seek(0)
+    return log_buffer.read()