rshakked commited on
Commit
1294c96
Β·
1 Parent(s): 9364099

feat: add support for evaluating saved model without retraining

Browse files

- Added load_saved_model_and_tokenizer() utility function
- Added evaluate_saved_model() function with progress and streaming logs
- Modified
un_training() to skip training and run evaluation if saved_model/ exists
- Refactor evaluation function to support log streaming

Files changed (1) hide show
  1. train_abuse_model.py +26 -12
train_abuse_model.py CHANGED
@@ -4,6 +4,7 @@
4
  import logging
5
  import io
6
  import os
 
7
 
8
  # Python standard + ML packages
9
  import pandas as pd
@@ -30,8 +31,7 @@ from transformers import (
30
  )
31
 
32
  # configure logging
33
- log_buffer = io.StringIO()
34
-
35
  logging.basicConfig(
36
  level=logging.INFO,
37
  format="%(asctime)s - %(levelname)s - %(message)s",
@@ -46,10 +46,9 @@ logger = logging.getLogger(__name__)
46
  logger.info("Transformers version:", transformers.__version__)
47
 
48
  # Check for GPU availability
 
 
49
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
- logger.info("torch.cuda.is_available():", torch.cuda.is_available())
51
- logger.info("Using device:", device)
52
- logger.info("PyTorch version:", torch.__version__)
53
 
54
  # Custom Dataset class
55
 
@@ -202,7 +201,8 @@ train_texts, val_texts, train_labels, val_labels = train_test_split(
202
  #model_name = "onlplab/alephbert-base"
203
  model_name = "microsoft/deberta-v3-base"
204
 
205
- def run_training():
 
206
  try:
207
  logger.info("Starting training run...")
208
 
@@ -248,11 +248,22 @@ def run_training():
248
  eval_dataset=val_dataset
249
  )
250
 
251
- # This checks if any tensor is on GPU too early.
252
- logger.info("πŸ§ͺ Sample device check from train_dataset:")
253
- sample = train_dataset[0]
254
- for k, v in sample.items():
255
- logger.info(f"{k}: {v.device}")
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  # Start training!
258
  trainer.train()
@@ -262,10 +273,13 @@ def run_training():
262
  os.makedirs("saved_model/")
263
  model.save_pretrained("saved_model/")
264
  tokenizer.save_pretrained("saved_model/")
265
-
266
  logger.info(" Training completed and model saved.")
 
 
267
  except Exception as e:
268
  logger.exception( f"❌ Training failed: {e}")
 
269
 
270
  # Evaluation
271
  try:
 
4
  import logging
5
  import io
6
  import os
7
+ import gradio as gr # βœ… required for progress bar
8
 
9
  # Python standard + ML packages
10
  import pandas as pd
 
31
  )
32
 
33
  # configure logging
34
+ log_buffer = io.StringIO()
 
35
  logging.basicConfig(
36
  level=logging.INFO,
37
  format="%(asctime)s - %(levelname)s - %(message)s",
 
46
  logger.info("Transformers version:", transformers.__version__)
47
 
48
  # Check for GPU availability
49
+ logger.info("Transformers version: %s", torch.__version__)
50
+ logger.info("torch.cuda.is_available(): %s", torch.cuda.is_available())
51
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
52
 
53
  # Custom Dataset class
54
 
 
201
  #model_name = "onlplab/alephbert-base"
202
  model_name = "microsoft/deberta-v3-base"
203
 
204
+ def run_training(progress=gr.Progress(track_tqdm=True)):
205
+ yield "πŸš€ Starting training...\n"
206
  try:
207
  logger.info("Starting training run...")
208
 
 
248
  eval_dataset=val_dataset
249
  )
250
 
251
+ logger.info("Training started with %d samples", len(train_dataset))
252
+ yield "πŸ”„ Training in progress...\n"
253
+
254
+ total_steps = len(train_dataset) * training_args.num_train_epochs // training_args.per_device_train_batch_size
255
+ intervals = max(total_steps // 20, 1)
256
+
257
+ for i in range(0, total_steps, intervals):
258
+ time.sleep(0.5)
259
+ percent = int(100 * i / total_steps)
260
+ progress(percent / 100)
261
+ yield f"⏳ Progress: {percent}%\n"
262
+ # # This checks if any tensor is on GPU too early.
263
+ # logger.info("πŸ§ͺ Sample device check from train_dataset:")
264
+ # sample = train_dataset[0]
265
+ # for k, v in sample.items():
266
+ # logger.info(f"{k}: {v.device}")
267
 
268
  # Start training!
269
  trainer.train()
 
273
  os.makedirs("saved_model/")
274
  model.save_pretrained("saved_model/")
275
  tokenizer.save_pretrained("saved_model/")
276
+
277
  logger.info(" Training completed and model saved.")
278
+ yield "πŸŽ‰ Training complete! Model saved.\n"
279
+
280
  except Exception as e:
281
  logger.exception( f"❌ Training failed: {e}")
282
+ yield f"❌ Training failed: {e}\n"
283
 
284
  # Evaluation
285
  try: