File size: 2,875 Bytes
444adae
 
 
 
089f657
444adae
089f657
444adae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
933f767
 
444adae
 
 
933f767
089f657
 
 
d2ce1d3
089f657
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444adae
 
 
 
 
 
089f657
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444adae
 
 
 
 
 
 
 
 
 
 
089f657
444adae
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import sys
import time
import threading
import traceback
from datetime import datetime
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
from unsloth import FastLanguageModel

# === Ortam değişkenleri
os.environ.setdefault("HF_HOME", "/app/.cache")
os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")
os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")

# === Basit log
def log(message):
    timestamp = datetime.now().strftime("%H:%M:%S")
    print(f"[{timestamp}] {message}", flush=True)

# === FastAPI başlat
app = FastAPI()
pipe = None
model = None
tokenizer = None

@app.on_event("startup")
def load_model():
    global pipe, model, tokenizer
    try:
        model_name = "atasoglu/Turkish-Llama-3-8B-function-calling"
        log(f"⬇️ [1] Model yükleme başlatılıyor: {model_name}")

        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_name,
            load_in_4bit=True,
            device_map="auto"
        )
        log("✅ [2] Model ve tokenizer çekildi.")

        FastLanguageModel.for_inference(model)
        log("✅ [3] Model inference moduna alındı.")

        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            device_map="auto"
        )
        log("✅ [4] Pipeline başarıyla kuruldu, test etmeye hazır.")

    except Exception as e:
        log(f"❌ [ERROR] Model yükleme sırasında hata: {e}")
        traceback.print_exc()
        raise

class TestRequest(BaseModel):
    user_input: str

@app.post("/test")
def test(req: TestRequest):
    try:
        prompt = f"Kullanıcı: {req.user_input}\nAsistan:"
        log(f"💬 [5] Prompt alındı: {req.user_input}")

        inputs = tokenizer([prompt], return_tensors="pt")
        log("🧠 [6] Tokenizer çıktılarını hazırladı, generate başlıyor...")

        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.2,
            top_p=0.95,
            repetition_penalty=1.1,
            do_sample=True
        )
        log("✅ [7] Generate tamamlandı, cevap dönülüyor.")

        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
        answer_clean = answer.split("Asistan:")[-1].strip()
        return {"response": answer_clean}

    except Exception as e:
        log(f"❌ [ERROR] /test sırasında hata: {e}")
        traceback.print_exc()
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
def health():
    return {"status": "ok"}

def run_health_server():
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)

threading.Thread(target=run_health_server, daemon=True).start()

log("⏸️ [0] Uygulama bekleme modunda, startup bekleniyor...")
while True:
    time.sleep(60)