Update fine_tune_inference_test.py
Browse files- fine_tune_inference_test.py +44 -51
fine_tune_inference_test.py
CHANGED
@@ -5,7 +5,6 @@ from fastapi import FastAPI
|
|
5 |
from fastapi.responses import HTMLResponse, JSONResponse
|
6 |
from pydantic import BaseModel
|
7 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
8 |
-
from datasets import load_dataset
|
9 |
from peft import PeftModel
|
10 |
import torch
|
11 |
from huggingface_hub import hf_hub_download
|
@@ -13,14 +12,21 @@ import zipfile
|
|
13 |
from datetime import datetime
|
14 |
import random
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# ✅ Sabitler
|
17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
18 |
MODEL_BASE = "UcsTurkey/kanarya-750m-fixed"
|
19 |
FINE_TUNE_ZIP = "trained_model_002_005.zip"
|
20 |
FINE_TUNE_REPO = "UcsTurkey/trained-zips"
|
21 |
-
RAG_DATA_FILE = "merged_dataset_000_100.parquet"
|
22 |
-
RAG_DATA_REPO = "UcsTurkey/turkish-general-culture-tokenized"
|
23 |
-
USE_RAG = False
|
24 |
CONFIDENCE_THRESHOLD = -1.5
|
25 |
FALLBACK_ANSWERS = [
|
26 |
"Bu konuda maalesef bilgim yok.",
|
@@ -28,20 +34,14 @@ FALLBACK_ANSWERS = [
|
|
28 |
"Bu soruya şu an yanıt veremiyorum."
|
29 |
]
|
30 |
|
31 |
-
class Message(BaseModel):
|
32 |
-
user_input: str
|
33 |
-
|
34 |
-
# ✅ Zamanlı log fonksiyonu (flush destekli)
|
35 |
-
def log(message):
|
36 |
-
timestamp = datetime.now().strftime("%H:%M:%S")
|
37 |
-
print(f"[{timestamp}] {message}")
|
38 |
-
os.sys.stdout.flush()
|
39 |
-
|
40 |
app = FastAPI()
|
41 |
chat_history = []
|
42 |
model = None
|
43 |
tokenizer = None
|
44 |
|
|
|
|
|
|
|
45 |
def detect_environment():
|
46 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
47 |
supports_bfloat16 = False
|
@@ -87,7 +87,7 @@ def root():
|
|
87 |
<html>
|
88 |
<head><title>Fine-Tune Chat</title></head>
|
89 |
<body>
|
90 |
-
<h2
|
91 |
<textarea id=\"input\" rows=\"4\" cols=\"60\" placeholder=\"Bir şeyler yaz...\"></textarea><br><br>
|
92 |
<button onclick=\"send()\">Gönder</button>
|
93 |
<pre id=\"output\"></pre>
|
@@ -110,10 +110,10 @@ def root():
|
|
110 |
@app.post("/chat")
|
111 |
def chat(msg: Message):
|
112 |
try:
|
113 |
-
log(f"
|
114 |
global model, tokenizer
|
115 |
if model is None or tokenizer is None:
|
116 |
-
log("
|
117 |
return {"error": "Model yüklenmedi. Lütfen birkaç saniye sonra tekrar deneyin."}
|
118 |
|
119 |
user_input = msg.user_input.strip()
|
@@ -121,13 +121,13 @@ def chat(msg: Message):
|
|
121 |
return {"error": "Boş giriş"}
|
122 |
|
123 |
full_prompt = f"SORU: {user_input}\nCEVAP:"
|
124 |
-
log(f"
|
125 |
|
126 |
inputs = tokenizer(full_prompt, return_tensors="pt")
|
127 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
128 |
|
129 |
-
log(f"
|
130 |
-
log(f"
|
131 |
|
132 |
try:
|
133 |
with torch.no_grad():
|
@@ -143,10 +143,10 @@ def chat(msg: Message):
|
|
143 |
suppress_tokens=[tokenizer.pad_token_id] if tokenizer.pad_token_id is not None else None
|
144 |
)
|
145 |
except Exception as e:
|
146 |
-
log("
|
147 |
-
log(f"
|
148 |
-
log(f"
|
149 |
-
log(f"
|
150 |
fallback = random.choice(FALLBACK_ANSWERS)
|
151 |
return {"answer": fallback, "chat_history": chat_history}
|
152 |
|
@@ -157,30 +157,29 @@ def chat(msg: Message):
|
|
157 |
if output.scores and len(output.scores) > 0:
|
158 |
first_token_logit = output.scores[0][0]
|
159 |
if torch.isnan(first_token_logit).any() or torch.isinf(first_token_logit).any():
|
160 |
-
log("
|
161 |
fallback = random.choice(FALLBACK_ANSWERS)
|
162 |
-
answer
|
163 |
-
return {"answer": answer, "chat_history": chat_history}
|
164 |
top_logit_score = torch.max(first_token_logit).item()
|
165 |
-
log(f"
|
166 |
|
167 |
if top_logit_score < CONFIDENCE_THRESHOLD:
|
168 |
fallback = random.choice(FALLBACK_ANSWERS)
|
169 |
-
log(f"
|
170 |
answer = fallback
|
171 |
|
172 |
chat_history.append({"user": user_input, "bot": answer})
|
173 |
-
log(f"
|
174 |
return {"answer": answer, "chat_history": chat_history}
|
175 |
except Exception as e:
|
176 |
-
log(f"
|
177 |
return {"error": str(e)}
|
178 |
|
179 |
def setup_model():
|
180 |
try:
|
181 |
global model, tokenizer
|
182 |
|
183 |
-
log("
|
184 |
zip_path = hf_hub_download(
|
185 |
repo_id=FINE_TUNE_REPO,
|
186 |
filename=FINE_TUNE_ZIP,
|
@@ -193,9 +192,9 @@ def setup_model():
|
|
193 |
|
194 |
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
195 |
zip_ref.extractall(extract_dir)
|
196 |
-
log("
|
197 |
|
198 |
-
log("
|
199 |
tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"))
|
200 |
|
201 |
if tokenizer.pad_token is None:
|
@@ -205,28 +204,21 @@ def setup_model():
|
|
205 |
device = env["device"]
|
206 |
dtype = torch.bfloat16 if env["supports_bfloat16"] else (torch.float16 if device == "cuda" else torch.float32)
|
207 |
|
208 |
-
log(f"
|
209 |
-
log(f"
|
210 |
|
211 |
-
|
212 |
-
log("⚠️ CUDA bulunamadı → CPU + float32 ile düşük performans modu")
|
213 |
-
elif not env["supports_bfloat16"]:
|
214 |
-
log("⚠️ CUDA mevcut ama bfloat16 desteklenmiyor → float16 ile çalışılıyor, hassasiyet kaybı yaşanabilir")
|
215 |
-
else:
|
216 |
-
log("🚀 CUDA + bfloat16 destekleniyor → yüksek performans modu")
|
217 |
-
|
218 |
-
log("ℹ️ Beklenen minimum sistem konfigürasyonu:")
|
219 |
log(f"- GPU: {env['expected_config']['gpu']}")
|
220 |
log(f"- GPU Bellek: {env['expected_config']['min_vram']}")
|
221 |
log(f"- CPU: {env['expected_config']['cpu']}")
|
222 |
|
223 |
-
log("
|
224 |
base_model = AutoModelForCausalLM.from_pretrained(
|
225 |
MODEL_BASE,
|
226 |
torch_dtype=dtype
|
227 |
).to(device)
|
228 |
|
229 |
-
log("
|
230 |
peft_model = PeftModel.from_pretrained(
|
231 |
base_model,
|
232 |
os.path.join(extract_dir, "output")
|
@@ -235,21 +227,22 @@ def setup_model():
|
|
235 |
model = peft_model.model.to(device)
|
236 |
model.eval()
|
237 |
|
238 |
-
log(f"
|
239 |
except Exception as e:
|
240 |
-
log(f"
|
241 |
|
242 |
def run_server():
|
243 |
-
log("
|
244 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
245 |
|
|
|
|
|
246 |
threading.Thread(target=setup_model, daemon=True).start()
|
247 |
threading.Thread(target=run_server, daemon=True).start()
|
248 |
-
|
249 |
-
log("⌛ Model yükleniyor, istekler ve API sunucusu hazırlanıyor...")
|
250 |
while True:
|
251 |
try:
|
252 |
import time
|
253 |
time.sleep(60)
|
254 |
except Exception as e:
|
255 |
-
log(f"
|
|
|
5 |
from fastapi.responses import HTMLResponse, JSONResponse
|
6 |
from pydantic import BaseModel
|
7 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
8 |
from peft import PeftModel
|
9 |
import torch
|
10 |
from huggingface_hub import hf_hub_download
|
|
|
12 |
from datetime import datetime
|
13 |
import random
|
14 |
|
15 |
+
# 🕒 Zamanlı log fonksiyonu (emoji'siz ve güvenli)
|
16 |
+
def log(message):
|
17 |
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
18 |
+
try:
|
19 |
+
print(f"[{timestamp}] {message}")
|
20 |
+
except UnicodeEncodeError:
|
21 |
+
safe_message = message.encode("utf-8", errors="replace").decode("utf-8", errors="ignore")
|
22 |
+
print(f"[{timestamp}] {safe_message}")
|
23 |
+
os.sys.stdout.flush()
|
24 |
+
|
25 |
# ✅ Sabitler
|
26 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
27 |
MODEL_BASE = "UcsTurkey/kanarya-750m-fixed"
|
28 |
FINE_TUNE_ZIP = "trained_model_002_005.zip"
|
29 |
FINE_TUNE_REPO = "UcsTurkey/trained-zips"
|
|
|
|
|
|
|
30 |
CONFIDENCE_THRESHOLD = -1.5
|
31 |
FALLBACK_ANSWERS = [
|
32 |
"Bu konuda maalesef bilgim yok.",
|
|
|
34 |
"Bu soruya şu an yanıt veremiyorum."
|
35 |
]
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
app = FastAPI()
|
38 |
chat_history = []
|
39 |
model = None
|
40 |
tokenizer = None
|
41 |
|
42 |
+
class Message(BaseModel):
|
43 |
+
user_input: str
|
44 |
+
|
45 |
def detect_environment():
|
46 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
47 |
supports_bfloat16 = False
|
|
|
87 |
<html>
|
88 |
<head><title>Fine-Tune Chat</title></head>
|
89 |
<body>
|
90 |
+
<h2>Fine-tune Chat Test</h2>
|
91 |
<textarea id=\"input\" rows=\"4\" cols=\"60\" placeholder=\"Bir şeyler yaz...\"></textarea><br><br>
|
92 |
<button onclick=\"send()\">Gönder</button>
|
93 |
<pre id=\"output\"></pre>
|
|
|
110 |
@app.post("/chat")
|
111 |
def chat(msg: Message):
|
112 |
try:
|
113 |
+
log(f"Kullanıcı mesajı alındı: {msg}")
|
114 |
global model, tokenizer
|
115 |
if model is None or tokenizer is None:
|
116 |
+
log("Hata: Model henüz yüklenmedi.")
|
117 |
return {"error": "Model yüklenmedi. Lütfen birkaç saniye sonra tekrar deneyin."}
|
118 |
|
119 |
user_input = msg.user_input.strip()
|
|
|
121 |
return {"error": "Boş giriş"}
|
122 |
|
123 |
full_prompt = f"SORU: {user_input}\nCEVAP:"
|
124 |
+
log(f"Prompt: {full_prompt}")
|
125 |
|
126 |
inputs = tokenizer(full_prompt, return_tensors="pt")
|
127 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
128 |
|
129 |
+
log(f"Tokenizer input_ids: {inputs['input_ids']}")
|
130 |
+
log(f"input shape: {inputs['input_ids'].shape}")
|
131 |
|
132 |
try:
|
133 |
with torch.no_grad():
|
|
|
143 |
suppress_tokens=[tokenizer.pad_token_id] if tokenizer.pad_token_id is not None else None
|
144 |
)
|
145 |
except Exception as e:
|
146 |
+
log("generate() sırasında istisna oluştu, input dump ediliyor...")
|
147 |
+
log(f"input_ids: {inputs['input_ids']}")
|
148 |
+
log(f"attention_mask: {inputs.get('attention_mask', 'yok')}")
|
149 |
+
log(f"Hata tipi: {type(e).__name__} → {e}")
|
150 |
fallback = random.choice(FALLBACK_ANSWERS)
|
151 |
return {"answer": fallback, "chat_history": chat_history}
|
152 |
|
|
|
157 |
if output.scores and len(output.scores) > 0:
|
158 |
first_token_logit = output.scores[0][0]
|
159 |
if torch.isnan(first_token_logit).any() or torch.isinf(first_token_logit).any():
|
160 |
+
log("Geçersiz logit (NaN/Inf) tespit edildi, fallback cevabı gönderiliyor.")
|
161 |
fallback = random.choice(FALLBACK_ANSWERS)
|
162 |
+
return {"answer": fallback, "chat_history": chat_history}
|
|
|
163 |
top_logit_score = torch.max(first_token_logit).item()
|
164 |
+
log(f"İlk token logit skoru: {top_logit_score:.4f}")
|
165 |
|
166 |
if top_logit_score < CONFIDENCE_THRESHOLD:
|
167 |
fallback = random.choice(FALLBACK_ANSWERS)
|
168 |
+
log(f"Düşük güven: fallback cevabı gönderiliyor: {fallback}")
|
169 |
answer = fallback
|
170 |
|
171 |
chat_history.append({"user": user_input, "bot": answer})
|
172 |
+
log(f"Soru: {user_input} → Yanıt: {answer[:60]}...")
|
173 |
return {"answer": answer, "chat_history": chat_history}
|
174 |
except Exception as e:
|
175 |
+
log(f"/chat sırasında hata oluştu: {e}")
|
176 |
return {"error": str(e)}
|
177 |
|
178 |
def setup_model():
|
179 |
try:
|
180 |
global model, tokenizer
|
181 |
|
182 |
+
log("Fine-tune zip indiriliyor...")
|
183 |
zip_path = hf_hub_download(
|
184 |
repo_id=FINE_TUNE_REPO,
|
185 |
filename=FINE_TUNE_ZIP,
|
|
|
192 |
|
193 |
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
194 |
zip_ref.extractall(extract_dir)
|
195 |
+
log("Zip başarıyla açıldı.")
|
196 |
|
197 |
+
log("Tokenizer yükleniyor...")
|
198 |
tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"))
|
199 |
|
200 |
if tokenizer.pad_token is None:
|
|
|
204 |
device = env["device"]
|
205 |
dtype = torch.bfloat16 if env["supports_bfloat16"] else (torch.float16 if device == "cuda" else torch.float32)
|
206 |
|
207 |
+
log(f"Ortam: GPU = {env['gpu_name']}, Device = {device}, bfloat16 destekleniyor mu: {env['supports_bfloat16']}")
|
208 |
+
log(f"Model {device.upper()} üzerinde {dtype} precision ile yüklenecek.")
|
209 |
|
210 |
+
log("Beklenen minimum sistem konfigürasyonu:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
log(f"- GPU: {env['expected_config']['gpu']}")
|
212 |
log(f"- GPU Bellek: {env['expected_config']['min_vram']}")
|
213 |
log(f"- CPU: {env['expected_config']['cpu']}")
|
214 |
|
215 |
+
log("Base model indiriliyor...")
|
216 |
base_model = AutoModelForCausalLM.from_pretrained(
|
217 |
MODEL_BASE,
|
218 |
torch_dtype=dtype
|
219 |
).to(device)
|
220 |
|
221 |
+
log("LoRA adapter uygulanıyor...")
|
222 |
peft_model = PeftModel.from_pretrained(
|
223 |
base_model,
|
224 |
os.path.join(extract_dir, "output")
|
|
|
227 |
model = peft_model.model.to(device)
|
228 |
model.eval()
|
229 |
|
230 |
+
log(f"Model başarıyla yüklendi. dtype={next(model.parameters()).dtype}, device={next(model.parameters()).device}")
|
231 |
except Exception as e:
|
232 |
+
log(f"setup_model() sırasında hata oluştu: {e}")
|
233 |
|
234 |
def run_server():
|
235 |
+
log("Uvicorn sunucusu başlatılıyor...")
|
236 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
237 |
|
238 |
+
# Başlangıç
|
239 |
+
log("===== Application Startup =====")
|
240 |
threading.Thread(target=setup_model, daemon=True).start()
|
241 |
threading.Thread(target=run_server, daemon=True).start()
|
242 |
+
log("Model yükleniyor, istekler ve API sunucusu hazırlanıyor...")
|
|
|
243 |
while True:
|
244 |
try:
|
245 |
import time
|
246 |
time.sleep(60)
|
247 |
except Exception as e:
|
248 |
+
log(f"Ana bekleme döngüsünde hata: {e}")
|