ciyidogan commited on
Commit
17584c6
·
verified ·
1 Parent(s): 01fdd15

Update train_lora_mistral.py

Browse files
Files changed (1) hide show
  1. train_lora_mistral.py +7 -1
train_lora_mistral.py CHANGED
@@ -36,6 +36,7 @@ def run_health_server():
36
  threading.Thread(target=run_health_server, daemon=True).start()
37
 
38
  # === Log
 
39
  def log(message):
40
  timestamp = datetime.now().strftime("%H:%M:%S")
41
  print(f"[{timestamp}] {message}")
@@ -43,7 +44,7 @@ def log(message):
43
 
44
  # === Eğitim Başlıyor
45
  log("🛠️ Ortam hazırlanıyor...")
46
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
47
  if tokenizer.pad_token is None:
48
  tokenizer.pad_token = tokenizer.eos_token
49
 
@@ -95,6 +96,11 @@ for file in selected_files:
95
  log(f"🔍 {len(dataset)} örnek")
96
  if len(dataset) == 0:
97
  continue
 
 
 
 
 
98
  trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
99
  log("🚀 Eğitim başlıyor...")
100
  trainer.train()
 
36
  threading.Thread(target=run_health_server, daemon=True).start()
37
 
38
  # === Log
39
+
40
  def log(message):
41
  timestamp = datetime.now().strftime("%H:%M:%S")
42
  print(f"[{timestamp}] {message}")
 
44
 
45
  # === Eğitim Başlıyor
46
  log("🛠️ Ortam hazırlanıyor...")
47
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
48
  if tokenizer.pad_token is None:
49
  tokenizer.pad_token = tokenizer.eos_token
50
 
 
96
  log(f"🔍 {len(dataset)} örnek")
97
  if len(dataset) == 0:
98
  continue
99
+
100
+ # Eğitim öncesi örnek prompt kontrolü
101
+ first_row = dataset[0]
102
+ log(f"📌 Örnek prompt: {tokenizer.decode(first_row['input_ids'], skip_special_tokens=True)}")
103
+
104
  trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
105
  log("🚀 Eğitim başlıyor...")
106
  trainer.train()