feat(app): run training on button click and display logs after completion
Browse files- moved training into a callable function
- added logging to both file and in-memory buffer
- updated Gradio interface to safely trigger training and show logs
- app.py +10 -25
- train_abuse_model.py +107 -80
app.py
CHANGED
@@ -1,29 +1,14 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
|
4 |
-
def run_training():
|
5 |
-
try:
|
6 |
-
process = subprocess.Popen(
|
7 |
-
["python", "train_abuse_model.py"],
|
8 |
-
stdout=subprocess.PIPE,
|
9 |
-
stderr=subprocess.STDOUT,
|
10 |
-
text=True
|
11 |
-
)
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
demo = gr.Interface(
|
22 |
-
fn=run_training,
|
23 |
-
inputs=[],
|
24 |
-
outputs=gr.Textbox(lines=25, label="Training Logs"),
|
25 |
-
title="Run Model Training",
|
26 |
-
description="Click the button to start training and see live logs below."
|
27 |
-
)
|
28 |
-
|
29 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from train_abuse_model import run_training
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
with gr.Blocks() as demo:
|
6 |
+
gr.Markdown("## π Fine-tune DeBERTa on abuse dataset")
|
7 |
+
with gr.Row():
|
8 |
+
start_btn = gr.Button("π Start Training")
|
9 |
+
output_box = gr.Textbox(label="Training Logs", lines=25)
|
10 |
+
|
11 |
+
start_btn.click(fn=run_training, outputs=output_box)
|
12 |
|
13 |
+
if __name__ == "__main__":
|
14 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_abuse_model.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
# # Install core packages
|
2 |
# !pip install -U transformers datasets accelerate
|
|
|
|
|
|
|
3 |
import os
|
4 |
|
5 |
# Python standard + ML packages
|
@@ -25,14 +28,28 @@ from transformers import (
|
|
25 |
Trainer,
|
26 |
TrainingArguments
|
27 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
# Check versions
|
29 |
-
|
30 |
|
31 |
# Check for GPU availability
|
32 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
|
37 |
# Custom Dataset class
|
38 |
|
@@ -101,7 +118,7 @@ def tune_thresholds(probs, true_labels, verbose=True):
|
|
101 |
zero_division=0
|
102 |
)
|
103 |
if verbose:
|
104 |
-
|
105 |
if f1 > best_macro_f1:
|
106 |
best_macro_f1 = f1
|
107 |
best_low, best_high = low, high
|
@@ -110,22 +127,22 @@ def tune_thresholds(probs, true_labels, verbose=True):
|
|
110 |
|
111 |
def evaluate_model_with_thresholds(trainer, test_dataset):
|
112 |
"""Run full evaluation with automatic threshold tuning."""
|
113 |
-
|
114 |
predictions = trainer.predict(test_dataset)
|
115 |
probs = torch.sigmoid(torch.tensor(predictions.predictions)).numpy()
|
116 |
true_soft = np.array(predictions.label_ids)
|
117 |
|
118 |
-
|
119 |
best_low, best_high, best_f1 = tune_thresholds(probs, true_soft)
|
120 |
|
121 |
-
|
122 |
|
123 |
final_pred_soft = map_to_3_classes(probs, best_low, best_high)
|
124 |
final_pred_str = convert_to_label_strings(final_pred_soft)
|
125 |
true_str = convert_to_label_strings(true_soft)
|
126 |
|
127 |
-
|
128 |
-
|
129 |
true_str,
|
130 |
final_pred_str,
|
131 |
labels=["no", "plausibly", "yes"],
|
@@ -163,37 +180,16 @@ label_columns = [
|
|
163 |
'access_to_weapons', 'gaslighting'
|
164 |
]
|
165 |
|
166 |
-
|
167 |
# Clean data
|
168 |
df = df[[text_column] + label_columns]
|
169 |
-
|
170 |
df = df.dropna(subset=[text_column])
|
171 |
-
|
172 |
|
173 |
df["label_vector"] = df.apply(label_row_soft, axis=1)
|
174 |
label_matrix = df["label_vector"].tolist()
|
175 |
|
176 |
-
|
177 |
-
#model_name = "onlplab/alephbert-base"
|
178 |
-
model_name = "microsoft/deberta-v3-base"
|
179 |
-
|
180 |
-
# Load pretrained model for fine-tuning
|
181 |
-
tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
|
182 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
183 |
-
model_name,
|
184 |
-
num_labels=len(label_columns),
|
185 |
-
problem_type="multi_label_classification"
|
186 |
-
).to(device) # Move model to GPU
|
187 |
-
|
188 |
-
# gradient checkpointing helps cut memory use:
|
189 |
-
model.gradient_checkpointing_enable()
|
190 |
-
|
191 |
-
# Freeze bottom 6 layers of DeBERTa encoder
|
192 |
-
for name, param in model.named_parameters():
|
193 |
-
if any(f"encoder.layer.{i}." in name for i in range(0, 6)):
|
194 |
-
param.requires_grad = False
|
195 |
-
|
196 |
-
|
197 |
# Proper 3-way split: train / val / test
|
198 |
train_val_texts, test_texts, train_val_labels, test_labels = train_test_split(
|
199 |
df[text_column].tolist(), label_matrix, test_size=0.2, random_state=42
|
@@ -203,51 +199,82 @@ train_texts, val_texts, train_labels, val_labels = train_test_split(
|
|
203 |
train_val_texts, train_val_labels, test_size=0.1, random_state=42
|
204 |
)
|
205 |
|
206 |
-
|
207 |
-
|
208 |
-
test_dataset = AbuseDataset(test_texts, test_labels)
|
209 |
-
|
210 |
-
|
211 |
-
# TrainingArguments for HuggingFace Trainer (logging, saving)
|
212 |
-
training_args = TrainingArguments(
|
213 |
-
output_dir="./results",
|
214 |
-
num_train_epochs=3,
|
215 |
-
per_device_train_batch_size=4,
|
216 |
-
per_device_eval_batch_size=4,
|
217 |
-
evaluation_strategy="epoch",
|
218 |
-
save_strategy="epoch",
|
219 |
-
logging_dir="./logs",
|
220 |
-
logging_steps=100,
|
221 |
-
)
|
222 |
-
|
223 |
-
# Train using HuggingFace Trainer
|
224 |
-
trainer = Trainer(
|
225 |
-
model=model,
|
226 |
-
args=training_args,
|
227 |
-
train_dataset=train_dataset,
|
228 |
-
eval_dataset=val_dataset
|
229 |
-
)
|
230 |
-
|
231 |
-
# This checks if any tensor is on GPU too early.
|
232 |
-
print("π§ͺ Sample device check from train_dataset:")
|
233 |
-
sample = train_dataset[0]
|
234 |
-
for k, v in sample.items():
|
235 |
-
print(f"{k}: {v.device}")
|
236 |
-
|
237 |
-
# Start training!
|
238 |
-
trainer.train()
|
239 |
-
|
240 |
-
# Save the model and tokenizer
|
241 |
-
if not os.path.exists("saved_model/"):
|
242 |
-
os.makedirs("saved_model/")
|
243 |
-
model.save_pretrained("saved_model/")
|
244 |
-
tokenizer.save_pretrained("saved_model/")
|
245 |
-
|
246 |
-
# Evaluation
|
247 |
-
try:
|
248 |
-
label_map = {0.0: "no", 0.5: "plausibly", 1.0: "yes"}
|
249 |
-
evaluate_model_with_thresholds(trainer, test_dataset)
|
250 |
-
except Exception as e:
|
251 |
-
print(f"Evaluation failed: {e}")
|
252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
|
|
|
1 |
# # Install core packages
|
2 |
# !pip install -U transformers datasets accelerate
|
3 |
+
|
4 |
+
import logging
|
5 |
+
import io
|
6 |
import os
|
7 |
|
8 |
# Python standard + ML packages
|
|
|
28 |
Trainer,
|
29 |
TrainingArguments
|
30 |
)
|
31 |
+
|
32 |
+
# configure logging
|
33 |
+
log_buffer = io.StringIO()
|
34 |
+
|
35 |
+
logging.basicConfig(
|
36 |
+
level=logging.INFO,
|
37 |
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
38 |
+
handlers=[
|
39 |
+
logging.FileHandler("training.log"), # to file
|
40 |
+
logging.StreamHandler(log_buffer) # to in-memory buffer
|
41 |
+
]
|
42 |
+
)
|
43 |
+
logger = logging.getLogger(__name__)
|
44 |
+
|
45 |
# Check versions
|
46 |
+
logger.info("Transformers version:", transformers.__version__)
|
47 |
|
48 |
# Check for GPU availability
|
49 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
50 |
+
logger.info("torch.cuda.is_available():", torch.cuda.is_available())
|
51 |
+
logger.info("Using device:", device)
|
52 |
+
logger.info("PyTorch version:", torch.__version__)
|
53 |
|
54 |
# Custom Dataset class
|
55 |
|
|
|
118 |
zero_division=0
|
119 |
)
|
120 |
if verbose:
|
121 |
+
logger.info(f"low={low:.2f}, high={high:.2f} -> macro F1={f1:.3f}")
|
122 |
if f1 > best_macro_f1:
|
123 |
best_macro_f1 = f1
|
124 |
best_low, best_high = low, high
|
|
|
127 |
|
128 |
def evaluate_model_with_thresholds(trainer, test_dataset):
|
129 |
"""Run full evaluation with automatic threshold tuning."""
|
130 |
+
logger.info("\nπ Running model predictions...")
|
131 |
predictions = trainer.predict(test_dataset)
|
132 |
probs = torch.sigmoid(torch.tensor(predictions.predictions)).numpy()
|
133 |
true_soft = np.array(predictions.label_ids)
|
134 |
|
135 |
+
logger.info("\nπ Tuning thresholds...")
|
136 |
best_low, best_high, best_f1 = tune_thresholds(probs, true_soft)
|
137 |
|
138 |
+
logger.info(f"\nβ
Best thresholds: low={best_low:.2f}, high={best_high:.2f} (macro F1={best_f1:.3f})")
|
139 |
|
140 |
final_pred_soft = map_to_3_classes(probs, best_low, best_high)
|
141 |
final_pred_str = convert_to_label_strings(final_pred_soft)
|
142 |
true_str = convert_to_label_strings(true_soft)
|
143 |
|
144 |
+
logger.info("\nπ Final Evaluation Report (multi-class per label):\n")
|
145 |
+
logger.info(classification_report(
|
146 |
true_str,
|
147 |
final_pred_str,
|
148 |
labels=["no", "plausibly", "yes"],
|
|
|
180 |
'access_to_weapons', 'gaslighting'
|
181 |
]
|
182 |
|
183 |
+
logger.info(np.shape(df))
|
184 |
# Clean data
|
185 |
df = df[[text_column] + label_columns]
|
186 |
+
logger.info(np.shape(df))
|
187 |
df = df.dropna(subset=[text_column])
|
188 |
+
logger.info(np.shape(df))
|
189 |
|
190 |
df["label_vector"] = df.apply(label_row_soft, axis=1)
|
191 |
label_matrix = df["label_vector"].tolist()
|
192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
# Proper 3-way split: train / val / test
|
194 |
train_val_texts, test_texts, train_val_labels, test_labels = train_test_split(
|
195 |
df[text_column].tolist(), label_matrix, test_size=0.2, random_state=42
|
|
|
199 |
train_val_texts, train_val_labels, test_size=0.1, random_state=42
|
200 |
)
|
201 |
|
202 |
+
#model_name = "onlplab/alephbert-base"
|
203 |
+
model_name = "microsoft/deberta-v3-base"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
+
def run_training():
|
206 |
+
try:
|
207 |
+
logger.info("Starting training run...")
|
208 |
+
|
209 |
+
# Load pretrained model for fine-tuning
|
210 |
+
tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
|
211 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
212 |
+
model_name,
|
213 |
+
num_labels=len(label_columns),
|
214 |
+
problem_type="multi_label_classification"
|
215 |
+
).to(device) # Move model to GPU
|
216 |
+
|
217 |
+
# gradient checkpointing helps cut memory use:
|
218 |
+
model.gradient_checkpointing_enable()
|
219 |
+
|
220 |
+
# Freeze bottom 6 layers of DeBERTa encoder
|
221 |
+
for name, param in model.named_parameters():
|
222 |
+
if any(f"encoder.layer.{i}." in name for i in range(0, 6)):
|
223 |
+
param.requires_grad = False
|
224 |
+
|
225 |
+
|
226 |
+
train_dataset = AbuseDataset(train_texts, train_labels)
|
227 |
+
val_dataset = AbuseDataset(val_texts, val_labels)
|
228 |
+
test_dataset = AbuseDataset(test_texts, test_labels)
|
229 |
+
|
230 |
+
|
231 |
+
# TrainingArguments for HuggingFace Trainer (logging, saving)
|
232 |
+
training_args = TrainingArguments(
|
233 |
+
output_dir="./results",
|
234 |
+
num_train_epochs=3,
|
235 |
+
per_device_train_batch_size=4,
|
236 |
+
per_device_eval_batch_size=4,
|
237 |
+
evaluation_strategy="epoch",
|
238 |
+
save_strategy="epoch",
|
239 |
+
logging_dir="./logs",
|
240 |
+
logging_steps=500,
|
241 |
+
disable_tqdm=True
|
242 |
+
)
|
243 |
+
|
244 |
+
# Train using HuggingFace Trainer
|
245 |
+
trainer = Trainer(
|
246 |
+
model=model,
|
247 |
+
args=training_args,
|
248 |
+
train_dataset=train_dataset,
|
249 |
+
eval_dataset=val_dataset
|
250 |
+
)
|
251 |
+
|
252 |
+
# This checks if any tensor is on GPU too early.
|
253 |
+
logger.info("π§ͺ Sample device check from train_dataset:")
|
254 |
+
sample = train_dataset[0]
|
255 |
+
for k, v in sample.items():
|
256 |
+
logger.info(f"{k}: {v.device}")
|
257 |
+
|
258 |
+
# Start training!
|
259 |
+
trainer.train()
|
260 |
+
|
261 |
+
# Save the model and tokenizer
|
262 |
+
if not os.path.exists("saved_model/"):
|
263 |
+
os.makedirs("saved_model/")
|
264 |
+
model.save_pretrained("saved_model/")
|
265 |
+
tokenizer.save_pretrained("saved_model/")
|
266 |
+
|
267 |
+
logger.info(" Training completed and model saved.")
|
268 |
+
except Exception as e:
|
269 |
+
logger.exception( f"β Training failed: {e}")
|
270 |
+
|
271 |
+
# Evaluation
|
272 |
+
try:
|
273 |
+
label_map = {0.0: "no", 0.5: "plausibly", 1.0: "yes"}
|
274 |
+
evaluate_model_with_thresholds(trainer, test_dataset)
|
275 |
+
logger.info("Evaluation completed")
|
276 |
+
except Exception as e:
|
277 |
+
logger.exception(f"Evaluation failed: {e}")
|
278 |
+
log_buffer.seek(0)
|
279 |
+
return log_buffer.read()
|
280 |
|