test-oncu / app.py
ciyidogan's picture
Update app.py
515404c verified
raw
history blame
3.13 kB
import os
import sys
import time
import threading
import traceback
from datetime import datetime
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# === Ortam değişkenleri
os.environ.setdefault("HF_HOME", "/app/.cache")
os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")
# === Zamanlı log fonksiyonu
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}", flush=True)
# === FastAPI başlat
app = FastAPI()
pipe = None
@app.on_event("startup")
def load_model():
global pipe
try:
model_name = "ytu-ce-cosmos/Turkish-Llama-8b-Instruct-v0.1"
log(f"⬇️ Model yükleme başlatılıyor: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype="auto"
)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
log("✅ Model ve pipeline başarıyla hazır.")
except Exception as e:
log(f"❌ Model yükleme hatası: {e}")
traceback.print_exc()
raise
class UserInputRequest(BaseModel):
user_input: str
system_prompt: str
@app.post("/generate")
def generate(req: UserInputRequest):
try:
start_time = time.time()
log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
# Kısa ve net system prompt, okunabilir bölünmüş
concise_system_prompt = (
f"{req.system_prompt}\n"
"❗ Cevaplarını sadece aşağıdaki formatta döndür, fazladan açıklama yazma, örnek ekleme:\n"
"#ANSWER: <cevap>\n"
"#INTENT: <intent>\n"
"#PARAMS: {...}\n"
"#MISSING: [...]\n"
"#ACTION_JSON: {...}\n"
"Şimdi sadece kullanıcının sorusunu bekliyorsun ve formatlı cevap veriyorsun."
)
# Role separation: System, User, Assistant blokları
full_prompt = (
f"### System:\n{concise_system_prompt}\n\n"
f"### User:\n{req.user_input}\n\n"
f"### Assistant:"
)
result = pipe(
full_prompt,
max_new_tokens=200,
temperature=0.0,
top_p=1.0,
repetition_penalty=1.0,
do_sample=False
)
answer = result[0]["generated_text"]
end_time = time.time()
elapsed = end_time - start_time
log(f"✅ Yanıt süresi: {elapsed:.2f} saniye")
return {"response": answer}
except Exception as e:
log(f"❌ /generate hatası: {e}")
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def health():
return {"status": "ok"}
def run_health_server():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
threading.Thread(target=run_health_server, daemon=True).start()
log("⏸️ Uygulama bekleme modunda...")
while True:
time.sleep(60)