File size: 2,315 Bytes
444adae 089f657 444adae 089f657 444adae fcff67e 444adae fffe472 444adae fcff67e 089f657 fcff67e 089f657 fcff67e 089f657 fcff67e 089f657 444adae fcff67e 444adae fcff67e 089f657 fcff67e 089f657 fcff67e 089f657 fcff67e 089f657 fcff67e 089f657 444adae fcff67e 444adae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import os
import sys
import time
import threading
import traceback
from datetime import datetime
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# === Ortam değişkenleri
os.environ.setdefault("HF_HOME", "/app/.cache")
os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")
# === Zamanlı log fonksiyonu
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}", flush=True)
# === FastAPI başlat
app = FastAPI()
pipe = None
@app.on_event("startup")
def load_model():
global pipe
try:
model_name = "ytu-ce-cosmos/Turkish-Llama-8b-Instruct-v0.1"
log(f"⬇️ Model yükleme başlatılıyor: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype="auto" # A100 ortamında bf16 otomatik seçer
# Eğer istersen load_in_8bit=True parametresini ekleyebiliriz
)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
log("✅ Model ve pipeline başarıyla hazır.")
except Exception as e:
log(f"❌ Model yükleme hatası: {e}")
traceback.print_exc()
raise
class UserInputRequest(BaseModel):
user_input: str
@app.post("/generate")
def generate(req: UserInputRequest):
try:
log(f"💬 Kullanıcı isteği alındı: {req.user_input}")
result = pipe(
req.user_input,
max_new_tokens=200,
temperature=0.2,
top_p=0.95,
repetition_penalty=1.1,
do_sample=True
)
answer = result[0]["generated_text"]
return {"response": answer}
except Exception as e:
log(f"❌ /generate hatası: {e}")
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def health():
return {"status": "ok"}
def run_health_server():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
threading.Thread(target=run_health_server, daemon=True).start()
log("⏸️ Uygulama bekleme modunda...")
while True:
time.sleep(60)
|