test-oncu / app.py
ciyidogan's picture
Update app.py
e7c2c5f verified
raw
history blame
3.05 kB
import time
import sys
from datetime import datetime
from fastapi import FastAPI, Request
import uvicorn
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import threading
# 🕒 Zamanlı log fonksiyonu
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}")
sys.stdout.flush()
# ✅ Health check sunucusu
app = FastAPI()
@app.get("/")
def health():
return {"status": "ok"}
def run_health_server():
uvicorn.run(app, host="0.0.0.0", port=7860)
threading.Thread(target=run_health_server, daemon=True).start()
# ✅ Model yükleme
MODEL_ID = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
log("⬇️ Model ve tokenizer yükleme başlatılıyor...")
start_time = time.time()
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
)
log(f"✅ Model yüklendi. Süre: {time.time() - start_time:.2f} sn")
except Exception as e:
log(f"❌ Model yükleme hatası: {e}")
sys.exit(1)
@app.post("/generate")
async def generate(request: Request):
req_data = await request.json()
user_input = req_data.get("user_input", "")
system_prompt = req_data.get("system_prompt", "")
if not user_input or not system_prompt:
return {"error": "user_input ve system_prompt zorunludur."}
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input},
]
try:
log("🧩 Input preparation başlatılıyor...")
prep_start = time.time()
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
attention_mask = torch.ones(inputs.shape, dtype=torch.long, device=model.device)
log(f"✅ Input ve attention mask hazırlandı. Süre: {time.time() - prep_start:.2f} sn")
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
log("🧠 Generate çağrısı başlatılıyor...")
gen_start = time.time()
outputs = model.generate(
inputs,
attention_mask=attention_mask,
max_new_tokens=256,
eos_token_id=terminators,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
log(f"✅ Generate tamamlandı. Süre: {time.time() - gen_start:.2f} sn")
response = outputs[0][inputs.shape[-1]:]
decoded_output = tokenizer.decode(response, skip_special_tokens=True)
log("✅ Cevap başarıyla decode edildi.")
return {"response": decoded_output}
except Exception as e:
log(f"❌ Generate hatası: {e}")
return {"error": str(e)}
# 🧘 Eğitim sonrası uygulama restart olmasın diye bekleme
log("⏸️ Uygulama hazır, bekleme modunda...")
while True:
time.sleep(60)