rshakked commited on
Commit
2032430
Β·
1 Parent(s): fedc8f2

feat(app): run training on button click and display logs after completion

Browse files

- moved training into a callable function
- added logging to both file and in-memory buffer
- updated Gradio interface to safely trigger training and show logs

Files changed (2) hide show
  1. app.py +10 -25
  2. train_abuse_model.py +107 -80
app.py CHANGED
@@ -1,29 +1,14 @@
1
  import gradio as gr
2
- import subprocess
3
 
4
- def run_training():
5
- try:
6
- process = subprocess.Popen(
7
- ["python", "train_abuse_model.py"],
8
- stdout=subprocess.PIPE,
9
- stderr=subprocess.STDOUT,
10
- text=True
11
- )
12
 
13
- output_lines = []
14
- for line in process.stdout:
15
- output_lines.append(line)
16
- yield "".join(output_lines)
 
 
 
17
 
18
- except Exception as e:
19
- yield f"Exception occurred:\n{str(e)}"
20
-
21
- demo = gr.Interface(
22
- fn=run_training,
23
- inputs=[],
24
- outputs=gr.Textbox(lines=25, label="Training Logs"),
25
- title="Run Model Training",
26
- description="Click the button to start training and see live logs below."
27
- )
28
-
29
- demo.launch()
 
1
  import gradio as gr
2
+ from train_abuse_model import run_training
3
 
 
 
 
 
 
 
 
 
4
 
5
+ with gr.Blocks() as demo:
6
+ gr.Markdown("## πŸš€ Fine-tune DeBERTa on abuse dataset")
7
+ with gr.Row():
8
+ start_btn = gr.Button("πŸš€ Start Training")
9
+ output_box = gr.Textbox(label="Training Logs", lines=25)
10
+
11
+ start_btn.click(fn=run_training, outputs=output_box)
12
 
13
+ if __name__ == "__main__":
14
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
train_abuse_model.py CHANGED
@@ -1,5 +1,8 @@
1
  # # Install core packages
2
  # !pip install -U transformers datasets accelerate
 
 
 
3
  import os
4
 
5
  # Python standard + ML packages
@@ -25,14 +28,28 @@ from transformers import (
25
  Trainer,
26
  TrainingArguments
27
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Check versions
29
- print("Transformers version:", transformers.__version__)
30
 
31
  # Check for GPU availability
32
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
- print("torch.cuda.is_available():", torch.cuda.is_available())
34
- print("Using device:", device)
35
- print("PyTorch version:", torch.__version__)
36
 
37
  # Custom Dataset class
38
 
@@ -101,7 +118,7 @@ def tune_thresholds(probs, true_labels, verbose=True):
101
  zero_division=0
102
  )
103
  if verbose:
104
- print(f"low={low:.2f}, high={high:.2f} -> macro F1={f1:.3f}")
105
  if f1 > best_macro_f1:
106
  best_macro_f1 = f1
107
  best_low, best_high = low, high
@@ -110,22 +127,22 @@ def tune_thresholds(probs, true_labels, verbose=True):
110
 
111
  def evaluate_model_with_thresholds(trainer, test_dataset):
112
  """Run full evaluation with automatic threshold tuning."""
113
- print("\nπŸ” Running model predictions...")
114
  predictions = trainer.predict(test_dataset)
115
  probs = torch.sigmoid(torch.tensor(predictions.predictions)).numpy()
116
  true_soft = np.array(predictions.label_ids)
117
 
118
- print("\nπŸ”Ž Tuning thresholds...")
119
  best_low, best_high, best_f1 = tune_thresholds(probs, true_soft)
120
 
121
- print(f"\nβœ… Best thresholds: low={best_low:.2f}, high={best_high:.2f} (macro F1={best_f1:.3f})")
122
 
123
  final_pred_soft = map_to_3_classes(probs, best_low, best_high)
124
  final_pred_str = convert_to_label_strings(final_pred_soft)
125
  true_str = convert_to_label_strings(true_soft)
126
 
127
- print("\nπŸ“Š Final Evaluation Report (multi-class per label):\n")
128
- print(classification_report(
129
  true_str,
130
  final_pred_str,
131
  labels=["no", "plausibly", "yes"],
@@ -163,37 +180,16 @@ label_columns = [
163
  'access_to_weapons', 'gaslighting'
164
  ]
165
 
166
- print(np.shape(df))
167
  # Clean data
168
  df = df[[text_column] + label_columns]
169
- print(np.shape(df))
170
  df = df.dropna(subset=[text_column])
171
- print(np.shape(df))
172
 
173
  df["label_vector"] = df.apply(label_row_soft, axis=1)
174
  label_matrix = df["label_vector"].tolist()
175
 
176
-
177
- #model_name = "onlplab/alephbert-base"
178
- model_name = "microsoft/deberta-v3-base"
179
-
180
- # Load pretrained model for fine-tuning
181
- tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
182
- model = AutoModelForSequenceClassification.from_pretrained(
183
- model_name,
184
- num_labels=len(label_columns),
185
- problem_type="multi_label_classification"
186
- ).to(device) # Move model to GPU
187
-
188
- # gradient checkpointing helps cut memory use:
189
- model.gradient_checkpointing_enable()
190
-
191
- # Freeze bottom 6 layers of DeBERTa encoder
192
- for name, param in model.named_parameters():
193
- if any(f"encoder.layer.{i}." in name for i in range(0, 6)):
194
- param.requires_grad = False
195
-
196
-
197
  # Proper 3-way split: train / val / test
198
  train_val_texts, test_texts, train_val_labels, test_labels = train_test_split(
199
  df[text_column].tolist(), label_matrix, test_size=0.2, random_state=42
@@ -203,51 +199,82 @@ train_texts, val_texts, train_labels, val_labels = train_test_split(
203
  train_val_texts, train_val_labels, test_size=0.1, random_state=42
204
  )
205
 
206
- train_dataset = AbuseDataset(train_texts, train_labels)
207
- val_dataset = AbuseDataset(val_texts, val_labels)
208
- test_dataset = AbuseDataset(test_texts, test_labels)
209
-
210
-
211
- # TrainingArguments for HuggingFace Trainer (logging, saving)
212
- training_args = TrainingArguments(
213
- output_dir="./results",
214
- num_train_epochs=3,
215
- per_device_train_batch_size=4,
216
- per_device_eval_batch_size=4,
217
- evaluation_strategy="epoch",
218
- save_strategy="epoch",
219
- logging_dir="./logs",
220
- logging_steps=100,
221
- )
222
-
223
- # Train using HuggingFace Trainer
224
- trainer = Trainer(
225
- model=model,
226
- args=training_args,
227
- train_dataset=train_dataset,
228
- eval_dataset=val_dataset
229
- )
230
-
231
- # This checks if any tensor is on GPU too early.
232
- print("πŸ§ͺ Sample device check from train_dataset:")
233
- sample = train_dataset[0]
234
- for k, v in sample.items():
235
- print(f"{k}: {v.device}")
236
-
237
- # Start training!
238
- trainer.train()
239
-
240
- # Save the model and tokenizer
241
- if not os.path.exists("saved_model/"):
242
- os.makedirs("saved_model/")
243
- model.save_pretrained("saved_model/")
244
- tokenizer.save_pretrained("saved_model/")
245
-
246
- # Evaluation
247
- try:
248
- label_map = {0.0: "no", 0.5: "plausibly", 1.0: "yes"}
249
- evaluate_model_with_thresholds(trainer, test_dataset)
250
- except Exception as e:
251
- print(f"Evaluation failed: {e}")
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
 
1
  # # Install core packages
2
  # !pip install -U transformers datasets accelerate
3
+
4
+ import logging
5
+ import io
6
  import os
7
 
8
  # Python standard + ML packages
 
28
  Trainer,
29
  TrainingArguments
30
  )
31
+
32
+ # configure logging
33
+ log_buffer = io.StringIO()
34
+
35
+ logging.basicConfig(
36
+ level=logging.INFO,
37
+ format="%(asctime)s - %(levelname)s - %(message)s",
38
+ handlers=[
39
+ logging.FileHandler("training.log"), # to file
40
+ logging.StreamHandler(log_buffer) # to in-memory buffer
41
+ ]
42
+ )
43
+ logger = logging.getLogger(__name__)
44
+
45
  # Check versions
46
+ logger.info("Transformers version:", transformers.__version__)
47
 
48
  # Check for GPU availability
49
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
+ logger.info("torch.cuda.is_available():", torch.cuda.is_available())
51
+ logger.info("Using device:", device)
52
+ logger.info("PyTorch version:", torch.__version__)
53
 
54
  # Custom Dataset class
55
 
 
118
  zero_division=0
119
  )
120
  if verbose:
121
+ logger.info(f"low={low:.2f}, high={high:.2f} -> macro F1={f1:.3f}")
122
  if f1 > best_macro_f1:
123
  best_macro_f1 = f1
124
  best_low, best_high = low, high
 
127
 
128
  def evaluate_model_with_thresholds(trainer, test_dataset):
129
  """Run full evaluation with automatic threshold tuning."""
130
+ logger.info("\nπŸ” Running model predictions...")
131
  predictions = trainer.predict(test_dataset)
132
  probs = torch.sigmoid(torch.tensor(predictions.predictions)).numpy()
133
  true_soft = np.array(predictions.label_ids)
134
 
135
+ logger.info("\nπŸ”Ž Tuning thresholds...")
136
  best_low, best_high, best_f1 = tune_thresholds(probs, true_soft)
137
 
138
+ logger.info(f"\nβœ… Best thresholds: low={best_low:.2f}, high={best_high:.2f} (macro F1={best_f1:.3f})")
139
 
140
  final_pred_soft = map_to_3_classes(probs, best_low, best_high)
141
  final_pred_str = convert_to_label_strings(final_pred_soft)
142
  true_str = convert_to_label_strings(true_soft)
143
 
144
+ logger.info("\nπŸ“Š Final Evaluation Report (multi-class per label):\n")
145
+ logger.info(classification_report(
146
  true_str,
147
  final_pred_str,
148
  labels=["no", "plausibly", "yes"],
 
180
  'access_to_weapons', 'gaslighting'
181
  ]
182
 
183
+ logger.info(np.shape(df))
184
  # Clean data
185
  df = df[[text_column] + label_columns]
186
+ logger.info(np.shape(df))
187
  df = df.dropna(subset=[text_column])
188
+ logger.info(np.shape(df))
189
 
190
  df["label_vector"] = df.apply(label_row_soft, axis=1)
191
  label_matrix = df["label_vector"].tolist()
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  # Proper 3-way split: train / val / test
194
  train_val_texts, test_texts, train_val_labels, test_labels = train_test_split(
195
  df[text_column].tolist(), label_matrix, test_size=0.2, random_state=42
 
199
  train_val_texts, train_val_labels, test_size=0.1, random_state=42
200
  )
201
 
202
+ #model_name = "onlplab/alephbert-base"
203
+ model_name = "microsoft/deberta-v3-base"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ def run_training():
206
+ try:
207
+ logger.info("Starting training run...")
208
+
209
+ # Load pretrained model for fine-tuning
210
+ tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
211
+ model = AutoModelForSequenceClassification.from_pretrained(
212
+ model_name,
213
+ num_labels=len(label_columns),
214
+ problem_type="multi_label_classification"
215
+ ).to(device) # Move model to GPU
216
+
217
+ # gradient checkpointing helps cut memory use:
218
+ model.gradient_checkpointing_enable()
219
+
220
+ # Freeze bottom 6 layers of DeBERTa encoder
221
+ for name, param in model.named_parameters():
222
+ if any(f"encoder.layer.{i}." in name for i in range(0, 6)):
223
+ param.requires_grad = False
224
+
225
+
226
+ train_dataset = AbuseDataset(train_texts, train_labels)
227
+ val_dataset = AbuseDataset(val_texts, val_labels)
228
+ test_dataset = AbuseDataset(test_texts, test_labels)
229
+
230
+
231
+ # TrainingArguments for HuggingFace Trainer (logging, saving)
232
+ training_args = TrainingArguments(
233
+ output_dir="./results",
234
+ num_train_epochs=3,
235
+ per_device_train_batch_size=4,
236
+ per_device_eval_batch_size=4,
237
+ evaluation_strategy="epoch",
238
+ save_strategy="epoch",
239
+ logging_dir="./logs",
240
+ logging_steps=500,
241
+ disable_tqdm=True
242
+ )
243
+
244
+ # Train using HuggingFace Trainer
245
+ trainer = Trainer(
246
+ model=model,
247
+ args=training_args,
248
+ train_dataset=train_dataset,
249
+ eval_dataset=val_dataset
250
+ )
251
+
252
+ # This checks if any tensor is on GPU too early.
253
+ logger.info("πŸ§ͺ Sample device check from train_dataset:")
254
+ sample = train_dataset[0]
255
+ for k, v in sample.items():
256
+ logger.info(f"{k}: {v.device}")
257
+
258
+ # Start training!
259
+ trainer.train()
260
+
261
+ # Save the model and tokenizer
262
+ if not os.path.exists("saved_model/"):
263
+ os.makedirs("saved_model/")
264
+ model.save_pretrained("saved_model/")
265
+ tokenizer.save_pretrained("saved_model/")
266
+
267
+ logger.info(" Training completed and model saved.")
268
+ except Exception as e:
269
+ logger.exception( f"❌ Training failed: {e}")
270
+
271
+ # Evaluation
272
+ try:
273
+ label_map = {0.0: "no", 0.5: "plausibly", 1.0: "yes"}
274
+ evaluate_model_with_thresholds(trainer, test_dataset)
275
+ logger.info("Evaluation completed")
276
+ except Exception as e:
277
+ logger.exception(f"Evaluation failed: {e}")
278
+ log_buffer.seek(0)
279
+ return log_buffer.read()
280