ciyidogan commited on
Commit
2149bc7
·
verified ·
1 Parent(s): 174b70a

Update fine_tune_inference_test.py

Browse files
Files changed (1) hide show
  1. fine_tune_inference_test.py +44 -51
fine_tune_inference_test.py CHANGED
@@ -5,7 +5,6 @@ from fastapi import FastAPI
5
  from fastapi.responses import HTMLResponse, JSONResponse
6
  from pydantic import BaseModel
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
- from datasets import load_dataset
9
  from peft import PeftModel
10
  import torch
11
  from huggingface_hub import hf_hub_download
@@ -13,14 +12,21 @@ import zipfile
13
  from datetime import datetime
14
  import random
15
 
 
 
 
 
 
 
 
 
 
 
16
  # ✅ Sabitler
17
  HF_TOKEN = os.environ.get("HF_TOKEN")
18
  MODEL_BASE = "UcsTurkey/kanarya-750m-fixed"
19
  FINE_TUNE_ZIP = "trained_model_002_005.zip"
20
  FINE_TUNE_REPO = "UcsTurkey/trained-zips"
21
- RAG_DATA_FILE = "merged_dataset_000_100.parquet"
22
- RAG_DATA_REPO = "UcsTurkey/turkish-general-culture-tokenized"
23
- USE_RAG = False
24
  CONFIDENCE_THRESHOLD = -1.5
25
  FALLBACK_ANSWERS = [
26
  "Bu konuda maalesef bilgim yok.",
@@ -28,20 +34,14 @@ FALLBACK_ANSWERS = [
28
  "Bu soruya şu an yanıt veremiyorum."
29
  ]
30
 
31
- class Message(BaseModel):
32
- user_input: str
33
-
34
- # ✅ Zamanlı log fonksiyonu (flush destekli)
35
- def log(message):
36
- timestamp = datetime.now().strftime("%H:%M:%S")
37
- print(f"[{timestamp}] {message}")
38
- os.sys.stdout.flush()
39
-
40
  app = FastAPI()
41
  chat_history = []
42
  model = None
43
  tokenizer = None
44
 
 
 
 
45
  def detect_environment():
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
  supports_bfloat16 = False
@@ -87,7 +87,7 @@ def root():
87
  <html>
88
  <head><title>Fine-Tune Chat</title></head>
89
  <body>
90
- <h2>\U0001f4d8 Fine-tune Chat Test</h2>
91
  <textarea id=\"input\" rows=\"4\" cols=\"60\" placeholder=\"Bir şeyler yaz...\"></textarea><br><br>
92
  <button onclick=\"send()\">Gönder</button>
93
  <pre id=\"output\"></pre>
@@ -110,10 +110,10 @@ def root():
110
  @app.post("/chat")
111
  def chat(msg: Message):
112
  try:
113
- log(f"\U0001f4e6 Kullanıcı mesajı alındı: {msg}")
114
  global model, tokenizer
115
  if model is None or tokenizer is None:
116
- log("\u274c Hata: Model henüz yüklenmedi.")
117
  return {"error": "Model yüklenmedi. Lütfen birkaç saniye sonra tekrar deneyin."}
118
 
119
  user_input = msg.user_input.strip()
@@ -121,13 +121,13 @@ def chat(msg: Message):
121
  return {"error": "Boş giriş"}
122
 
123
  full_prompt = f"SORU: {user_input}\nCEVAP:"
124
- log(f"\U0001f4e8 Prompt: {full_prompt}")
125
 
126
  inputs = tokenizer(full_prompt, return_tensors="pt")
127
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
128
 
129
- log(f"\U0001f522 Tokenizer input_ids: {inputs['input_ids']}")
130
- log(f"\U0001f4cf input shape: {inputs['input_ids'].shape}")
131
 
132
  try:
133
  with torch.no_grad():
@@ -143,10 +143,10 @@ def chat(msg: Message):
143
  suppress_tokens=[tokenizer.pad_token_id] if tokenizer.pad_token_id is not None else None
144
  )
145
  except Exception as e:
146
- log("\u274c generate() sırasında istisna oluştu, input dump ediliyor...")
147
- log(f"\u274c input_ids: {inputs['input_ids']}")
148
- log(f"\u274c attention_mask: {inputs.get('attention_mask', 'yok')}")
149
- log(f"\u274c Hata tipi: {type(e).__name__} → {e}")
150
  fallback = random.choice(FALLBACK_ANSWERS)
151
  return {"answer": fallback, "chat_history": chat_history}
152
 
@@ -157,30 +157,29 @@ def chat(msg: Message):
157
  if output.scores and len(output.scores) > 0:
158
  first_token_logit = output.scores[0][0]
159
  if torch.isnan(first_token_logit).any() or torch.isinf(first_token_logit).any():
160
- log("⚠️ Geçersiz logit (NaN/Inf) tespit edildi, fallback cevabı gönderiliyor.")
161
  fallback = random.choice(FALLBACK_ANSWERS)
162
- answer = fallback
163
- return {"answer": answer, "chat_history": chat_history}
164
  top_logit_score = torch.max(first_token_logit).item()
165
- log(f"🔎 İlk token logit skoru: {top_logit_score:.4f}")
166
 
167
  if top_logit_score < CONFIDENCE_THRESHOLD:
168
  fallback = random.choice(FALLBACK_ANSWERS)
169
- log(f"⚠️ Düşük güven: fallback cevabı gönderiliyor: {fallback}")
170
  answer = fallback
171
 
172
  chat_history.append({"user": user_input, "bot": answer})
173
- log(f"\U0001f5e8️ Soru: {user_input} → Yanıt: {answer[:60]}...")
174
  return {"answer": answer, "chat_history": chat_history}
175
  except Exception as e:
176
- log(f"\u274c /chat sırasında hata oluştu: {e}")
177
  return {"error": str(e)}
178
 
179
  def setup_model():
180
  try:
181
  global model, tokenizer
182
 
183
- log("\U0001f4e6 Fine-tune zip indiriliyor...")
184
  zip_path = hf_hub_download(
185
  repo_id=FINE_TUNE_REPO,
186
  filename=FINE_TUNE_ZIP,
@@ -193,9 +192,9 @@ def setup_model():
193
 
194
  with zipfile.ZipFile(zip_path, "r") as zip_ref:
195
  zip_ref.extractall(extract_dir)
196
- log("\ud83d\udcc2 Zip başarıyla açıldı.")
197
 
198
- log("\ud83d\udd01 Tokenizer yükleniyor...")
199
  tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"))
200
 
201
  if tokenizer.pad_token is None:
@@ -205,28 +204,21 @@ def setup_model():
205
  device = env["device"]
206
  dtype = torch.bfloat16 if env["supports_bfloat16"] else (torch.float16 if device == "cuda" else torch.float32)
207
 
208
- log(f"\U0001f9ea Ortam: GPU = {env['gpu_name']}, Device = {device}, bfloat16 destekleniyor mu: {env['supports_bfloat16']}")
209
- log(f"\ud83d\udcc0 Model {device.upper()} üzerinde {dtype} precision ile yüklenecek.")
210
 
211
- if device != "cuda":
212
- log("⚠️ CUDA bulunamadı → CPU + float32 ile düşük performans modu")
213
- elif not env["supports_bfloat16"]:
214
- log("⚠️ CUDA mevcut ama bfloat16 desteklenmiyor → float16 ile çalışılıyor, hassasiyet kaybı yaşanabilir")
215
- else:
216
- log("🚀 CUDA + bfloat16 destekleniyor → yüksek performans modu")
217
-
218
- log("ℹ️ Beklenen minimum sistem konfigürasyonu:")
219
  log(f"- GPU: {env['expected_config']['gpu']}")
220
  log(f"- GPU Bellek: {env['expected_config']['min_vram']}")
221
  log(f"- CPU: {env['expected_config']['cpu']}")
222
 
223
- log("🧠 Base model indiriliyor...")
224
  base_model = AutoModelForCausalLM.from_pretrained(
225
  MODEL_BASE,
226
  torch_dtype=dtype
227
  ).to(device)
228
 
229
- log("LoRA adapter uygulanıyor...")
230
  peft_model = PeftModel.from_pretrained(
231
  base_model,
232
  os.path.join(extract_dir, "output")
@@ -235,21 +227,22 @@ def setup_model():
235
  model = peft_model.model.to(device)
236
  model.eval()
237
 
238
- log(f"Model başarıyla yüklendi. dtype={next(model.parameters()).dtype}, device={next(model.parameters()).device}")
239
  except Exception as e:
240
- log(f"setup_model() sırasında hata oluştu: {e}")
241
 
242
  def run_server():
243
- log("🚀 Uvicorn sunucusu başlatılıyor...")
244
  uvicorn.run(app, host="0.0.0.0", port=7860)
245
 
 
 
246
  threading.Thread(target=setup_model, daemon=True).start()
247
  threading.Thread(target=run_server, daemon=True).start()
248
-
249
- log("⌛ Model yükleniyor, istekler ve API sunucusu hazırlanıyor...")
250
  while True:
251
  try:
252
  import time
253
  time.sleep(60)
254
  except Exception as e:
255
- log(f"Ana bekleme döngüsünde hata: {e}")
 
5
  from fastapi.responses import HTMLResponse, JSONResponse
6
  from pydantic import BaseModel
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
8
  from peft import PeftModel
9
  import torch
10
  from huggingface_hub import hf_hub_download
 
12
  from datetime import datetime
13
  import random
14
 
15
+ # 🕒 Zamanlı log fonksiyonu (emoji'siz ve güvenli)
16
+ def log(message):
17
+ timestamp = datetime.now().strftime("%H:%M:%S")
18
+ try:
19
+ print(f"[{timestamp}] {message}")
20
+ except UnicodeEncodeError:
21
+ safe_message = message.encode("utf-8", errors="replace").decode("utf-8", errors="ignore")
22
+ print(f"[{timestamp}] {safe_message}")
23
+ os.sys.stdout.flush()
24
+
25
  # ✅ Sabitler
26
  HF_TOKEN = os.environ.get("HF_TOKEN")
27
  MODEL_BASE = "UcsTurkey/kanarya-750m-fixed"
28
  FINE_TUNE_ZIP = "trained_model_002_005.zip"
29
  FINE_TUNE_REPO = "UcsTurkey/trained-zips"
 
 
 
30
  CONFIDENCE_THRESHOLD = -1.5
31
  FALLBACK_ANSWERS = [
32
  "Bu konuda maalesef bilgim yok.",
 
34
  "Bu soruya şu an yanıt veremiyorum."
35
  ]
36
 
 
 
 
 
 
 
 
 
 
37
  app = FastAPI()
38
  chat_history = []
39
  model = None
40
  tokenizer = None
41
 
42
+ class Message(BaseModel):
43
+ user_input: str
44
+
45
  def detect_environment():
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
  supports_bfloat16 = False
 
87
  <html>
88
  <head><title>Fine-Tune Chat</title></head>
89
  <body>
90
+ <h2>Fine-tune Chat Test</h2>
91
  <textarea id=\"input\" rows=\"4\" cols=\"60\" placeholder=\"Bir şeyler yaz...\"></textarea><br><br>
92
  <button onclick=\"send()\">Gönder</button>
93
  <pre id=\"output\"></pre>
 
110
  @app.post("/chat")
111
  def chat(msg: Message):
112
  try:
113
+ log(f"Kullanıcı mesajı alındı: {msg}")
114
  global model, tokenizer
115
  if model is None or tokenizer is None:
116
+ log("Hata: Model henüz yüklenmedi.")
117
  return {"error": "Model yüklenmedi. Lütfen birkaç saniye sonra tekrar deneyin."}
118
 
119
  user_input = msg.user_input.strip()
 
121
  return {"error": "Boş giriş"}
122
 
123
  full_prompt = f"SORU: {user_input}\nCEVAP:"
124
+ log(f"Prompt: {full_prompt}")
125
 
126
  inputs = tokenizer(full_prompt, return_tensors="pt")
127
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
128
 
129
+ log(f"Tokenizer input_ids: {inputs['input_ids']}")
130
+ log(f"input shape: {inputs['input_ids'].shape}")
131
 
132
  try:
133
  with torch.no_grad():
 
143
  suppress_tokens=[tokenizer.pad_token_id] if tokenizer.pad_token_id is not None else None
144
  )
145
  except Exception as e:
146
+ log("generate() sırasında istisna oluştu, input dump ediliyor...")
147
+ log(f"input_ids: {inputs['input_ids']}")
148
+ log(f"attention_mask: {inputs.get('attention_mask', 'yok')}")
149
+ log(f"Hata tipi: {type(e).__name__} → {e}")
150
  fallback = random.choice(FALLBACK_ANSWERS)
151
  return {"answer": fallback, "chat_history": chat_history}
152
 
 
157
  if output.scores and len(output.scores) > 0:
158
  first_token_logit = output.scores[0][0]
159
  if torch.isnan(first_token_logit).any() or torch.isinf(first_token_logit).any():
160
+ log("Geçersiz logit (NaN/Inf) tespit edildi, fallback cevabı gönderiliyor.")
161
  fallback = random.choice(FALLBACK_ANSWERS)
162
+ return {"answer": fallback, "chat_history": chat_history}
 
163
  top_logit_score = torch.max(first_token_logit).item()
164
+ log(f"İlk token logit skoru: {top_logit_score:.4f}")
165
 
166
  if top_logit_score < CONFIDENCE_THRESHOLD:
167
  fallback = random.choice(FALLBACK_ANSWERS)
168
+ log(f"Düşük güven: fallback cevabı gönderiliyor: {fallback}")
169
  answer = fallback
170
 
171
  chat_history.append({"user": user_input, "bot": answer})
172
+ log(f"Soru: {user_input} → Yanıt: {answer[:60]}...")
173
  return {"answer": answer, "chat_history": chat_history}
174
  except Exception as e:
175
+ log(f"/chat sırasında hata oluştu: {e}")
176
  return {"error": str(e)}
177
 
178
  def setup_model():
179
  try:
180
  global model, tokenizer
181
 
182
+ log("Fine-tune zip indiriliyor...")
183
  zip_path = hf_hub_download(
184
  repo_id=FINE_TUNE_REPO,
185
  filename=FINE_TUNE_ZIP,
 
192
 
193
  with zipfile.ZipFile(zip_path, "r") as zip_ref:
194
  zip_ref.extractall(extract_dir)
195
+ log("Zip başarıyla açıldı.")
196
 
197
+ log("Tokenizer yükleniyor...")
198
  tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"))
199
 
200
  if tokenizer.pad_token is None:
 
204
  device = env["device"]
205
  dtype = torch.bfloat16 if env["supports_bfloat16"] else (torch.float16 if device == "cuda" else torch.float32)
206
 
207
+ log(f"Ortam: GPU = {env['gpu_name']}, Device = {device}, bfloat16 destekleniyor mu: {env['supports_bfloat16']}")
208
+ log(f"Model {device.upper()} üzerinde {dtype} precision ile yüklenecek.")
209
 
210
+ log("Beklenen minimum sistem konfigürasyonu:")
 
 
 
 
 
 
 
211
  log(f"- GPU: {env['expected_config']['gpu']}")
212
  log(f"- GPU Bellek: {env['expected_config']['min_vram']}")
213
  log(f"- CPU: {env['expected_config']['cpu']}")
214
 
215
+ log("Base model indiriliyor...")
216
  base_model = AutoModelForCausalLM.from_pretrained(
217
  MODEL_BASE,
218
  torch_dtype=dtype
219
  ).to(device)
220
 
221
+ log("LoRA adapter uygulanıyor...")
222
  peft_model = PeftModel.from_pretrained(
223
  base_model,
224
  os.path.join(extract_dir, "output")
 
227
  model = peft_model.model.to(device)
228
  model.eval()
229
 
230
+ log(f"Model başarıyla yüklendi. dtype={next(model.parameters()).dtype}, device={next(model.parameters()).device}")
231
  except Exception as e:
232
+ log(f"setup_model() sırasında hata oluştu: {e}")
233
 
234
  def run_server():
235
+ log("Uvicorn sunucusu başlatılıyor...")
236
  uvicorn.run(app, host="0.0.0.0", port=7860)
237
 
238
+ # Başlangıç
239
+ log("===== Application Startup =====")
240
  threading.Thread(target=setup_model, daemon=True).start()
241
  threading.Thread(target=run_server, daemon=True).start()
242
+ log("Model yükleniyor, istekler ve API sunucusu hazırlanıyor...")
 
243
  while True:
244
  try:
245
  import time
246
  time.sleep(60)
247
  except Exception as e:
248
+ log(f"Ana bekleme döngüsünde hata: {e}")