File size: 2,899 Bytes
444adae 089f657 444adae 089f657 444adae eb8847f 444adae fffe472 444adae eb8847f 444adae eb8847f 089f657 c7a5eec fcff67e eb8847f 089f657 eb8847f 089f657 fcff67e 089f657 444adae fcff67e 444adae f8a28b3 444adae fcff67e 089f657 ce706b9 fcff67e 089f657 eb8847f 515404c eb8847f f8a28b3 eb8847f f8a28b3 eb8847f f8a28b3 eb8847f 089f657 eb8847f ce706b9 fcff67e 089f657 fcff67e 089f657 444adae fcff67e 444adae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import os
import sys
import time
import threading
import traceback
from datetime import datetime
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
# === Ortam değişkenleri
os.environ.setdefault("HF_HOME", "/app/.cache")
os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")
# === Zamanlı log fonksiyonu
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}", flush=True)
# === FastAPI başlat
app = FastAPI()
tokenizer = None
model = None
@app.on_event("startup")
def load_model():
global tokenizer, model
try:
model_name = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
log(f"⬇️ Model yükleme başlatılıyor: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto", # A100 için bf16
device_map="auto"
)
log("✅ Model ve tokenizer başarıyla hazır.")
except Exception as e:
log(f"❌ Model yükleme hatası: {e}")
traceback.print_exc()
raise
class UserInputRequest(BaseModel):
user_input: str
system_prompt: str
@app.post("/generate")
def generate(req: UserInputRequest):
try:
start_time = time.time()
log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
messages = [
{"role": "system", "content": req.system_prompt},
{"role": "user", "content": req.user_input}
]
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = model.generate(
input_ids,
max_new_tokens=200,
eos_token_id=terminators,
do_sample=False,
temperature=0.0,
top_p=1.0,
repetition_penalty=1.0
)
response = outputs[0][input_ids.shape[-1]:]
answer = tokenizer.decode(response, skip_special_tokens=True)
end_time = time.time()
elapsed = end_time - start_time
log(f"✅ Yanıt süresi: {elapsed:.2f} saniye")
return {"response": answer}
except Exception as e:
log(f"❌ /generate hatası: {e}")
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def health():
return {"status": "ok"}
def run_health_server():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
threading.Thread(target=run_health_server, daemon=True).start()
log("⏸️ Uygulama bekleme modunda...")
while True:
time.sleep(60)
|