File size: 1,904 Bytes
444adae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import sys
import time
import threading
from datetime import datetime
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
from unsloth import FastLanguageModel

# === Ortam değişkenleri
os.environ.setdefault("HF_HOME", "/app/.cache")
os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")
os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")

# === Basit log
def log(message):
    timestamp = datetime.now().strftime("%H:%M:%S")
    print(f"[{timestamp}] {message}", flush=True)

# === FastAPI başlat
app = FastAPI()
pipe = None

@app.on_event("startup")
def load_model():
    global pipe
    model_name = "atasoglu/Turkish-Llama-3-8B-function-calling"
    log(f"⬇️ Model yükleniyor: {model_name}")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        load_in_4bit=True,
        device_map="auto"
    )
    FastLanguageModel.for_inference(model)
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto"
    )
    log("✅ Model yüklendi, test etmeye hazır.")

class TestRequest(BaseModel):
    user_input: str

@app.post("/test")
def test(req: TestRequest):
    prompt = f"Kullanıcı: {req.user_input}\nAsistan:"
    log(f"💬 Prompt alındı: {req.user_input}")
    outputs = pipe(
        prompt,
        max_new_tokens=256,
        temperature=0.2,
        top_p=0.95,
        repetition_penalty=1.1
    )
    answer = outputs[0]["generated_text"].replace(prompt, "").strip()
    log("✅ Cevap üretildi.")
    return {"response": answer}

@app.get("/")
def health():
    return {"status": "ok"}

def run_health_server():
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)

threading.Thread(target=run_health_server, daemon=True).start()

log("⏸️ Uygulama bekleme modunda...")
while True:
    time.sleep(60)