test-oncu / app.py
ciyidogan's picture
Create app.py
444adae verified
raw
history blame
1.9 kB
import os
import sys
import time
import threading
from datetime import datetime
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
from unsloth import FastLanguageModel
# === Ortam değişkenleri
os.environ.setdefault("HF_HOME", "/app/.cache")
os.environ.setdefault("HF_HUB_CACHE", "/app/.cache")
os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")
# === Basit log
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}", flush=True)
# === FastAPI başlat
app = FastAPI()
pipe = None
@app.on_event("startup")
def load_model():
global pipe
model_name = "atasoglu/Turkish-Llama-3-8B-function-calling"
log(f"⬇️ Model yükleniyor: {model_name}")
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model_name,
load_in_4bit=True,
device_map="auto"
)
FastLanguageModel.for_inference(model)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto"
)
log("✅ Model yüklendi, test etmeye hazır.")
class TestRequest(BaseModel):
user_input: str
@app.post("/test")
def test(req: TestRequest):
prompt = f"Kullanıcı: {req.user_input}\nAsistan:"
log(f"💬 Prompt alındı: {req.user_input}")
outputs = pipe(
prompt,
max_new_tokens=256,
temperature=0.2,
top_p=0.95,
repetition_penalty=1.1
)
answer = outputs[0]["generated_text"].replace(prompt, "").strip()
log("✅ Cevap üretildi.")
return {"response": answer}
@app.get("/")
def health():
return {"status": "ok"}
def run_health_server():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
threading.Thread(target=run_health_server, daemon=True).start()
log("⏸️ Uygulama bekleme modunda...")
while True:
time.sleep(60)