File size: 3,133 Bytes
444adae
 
 
 
089f657
444adae
089f657
444adae
fcff67e
444adae
 
 
 
 
fffe472
444adae
 
 
 
 
 
 
 
 
 
fcff67e
089f657
fcff67e
 
 
 
 
 
 
f8a28b3
089f657
 
fcff67e
 
089f657
 
fcff67e
089f657
 
444adae
fcff67e
444adae
f8a28b3
444adae
fcff67e
 
089f657
ce706b9
fcff67e
089f657
515404c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8a28b3
fcff67e
f8a28b3
 
 
 
 
 
089f657
fcff67e
ce706b9
 
 
 
 
fcff67e
089f657
 
fcff67e
089f657
 
444adae
 
 
 
 
 
 
 
 
 
 
fcff67e
444adae
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import sys
import time
import threading
import traceback
from datetime import datetime
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# === Ortam değişkenleri
os.environ.setdefault("HF_HOME", "/app/.cache")
os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")

# === Zamanlı log fonksiyonu
def log(message):
    timestamp = datetime.now().strftime("%H:%M:%S")
    print(f"[{timestamp}] {message}", flush=True)

# === FastAPI başlat
app = FastAPI()
pipe = None

@app.on_event("startup")
def load_model():
    global pipe
    try:
        model_name = "ytu-ce-cosmos/Turkish-Llama-8b-Instruct-v0.1"
        log(f"⬇️ Model yükleme başlatılıyor: {model_name}")

        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="auto",
            torch_dtype="auto"
        )

        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
        log("✅ Model ve pipeline başarıyla hazır.")

    except Exception as e:
        log(f"❌ Model yükleme hatası: {e}")
        traceback.print_exc()
        raise

class UserInputRequest(BaseModel):
    user_input: str
    system_prompt: str

@app.post("/generate")
def generate(req: UserInputRequest):
    try:
        start_time = time.time()
        log(f"💬 Kullanıcı isteği alındı: {req.user_input}")

        # Kısa ve net system prompt, okunabilir bölünmüş
        concise_system_prompt = (
            f"{req.system_prompt}\n"
            "❗ Cevaplarını sadece aşağıdaki formatta döndür, fazladan açıklama yazma, örnek ekleme:\n"
            "#ANSWER: <cevap>\n"
            "#INTENT: <intent>\n"
            "#PARAMS: {...}\n"
            "#MISSING: [...]\n"
            "#ACTION_JSON: {...}\n"
            "Şimdi sadece kullanıcının sorusunu bekliyorsun ve formatlı cevap veriyorsun."
        )

        # Role separation: System, User, Assistant blokları
        full_prompt = (
            f"### System:\n{concise_system_prompt}\n\n"
            f"### User:\n{req.user_input}\n\n"
            f"### Assistant:"
        )

        result = pipe(
            full_prompt,
            max_new_tokens=200,
            temperature=0.0,
            top_p=1.0,
            repetition_penalty=1.0,
            do_sample=False
        )
        answer = result[0]["generated_text"]

        end_time = time.time()
        elapsed = end_time - start_time
        log(f"✅ Yanıt süresi: {elapsed:.2f} saniye")

        return {"response": answer}

    except Exception as e:
        log(f"❌ /generate hatası: {e}")
        traceback.print_exc()
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
def health():
    return {"status": "ok"}

def run_health_server():
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)

threading.Thread(target=run_health_server, daemon=True).start()

log("⏸️ Uygulama bekleme modunda...")
while True:
    time.sleep(60)