import os import sys import time import threading import traceback from datetime import datetime from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import pipeline from unsloth import FastLanguageModel # === Ortam değişkenleri os.environ.setdefault("HF_HOME", "/app/.cache") os.environ.setdefault("HF_HUB_CACHE", "/app/.cache") os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1") # === Basit log def log(message): timestamp = datetime.now().strftime("%H:%M:%S") print(f"[{timestamp}] {message}", flush=True) # === FastAPI başlat app = FastAPI() pipe = None model = None tokenizer = None @app.on_event("startup") def load_model(): global pipe, model, tokenizer try: model_name = "atasoglu/Turkish-Llama-3-8B-function-calling" log(f"⬇️ [1] Model yükleme başlatılıyor: {model_name}") model, tokenizer = FastLanguageModel.from_pretrained( model_name=model_name, load_in_4bit=True, device_map="auto" ) log("✅ [2] Model ve tokenizer çekildi.") FastLanguageModel.for_inference(model) log("✅ [3] Model inference moduna alındı.") pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, device_map="auto" ) log("✅ [4] Pipeline başarıyla kuruldu, test etmeye hazır.") except Exception as e: log(f"❌ [ERROR] Model yükleme sırasında hata: {e}") traceback.print_exc() raise class TestRequest(BaseModel): user_input: str @app.post("/test") def test(req: TestRequest): try: prompt = f"Kullanıcı: {req.user_input}\nAsistan:" log(f"💬 [5] Prompt alındı: {req.user_input}") inputs = tokenizer([prompt], return_tensors="pt") log("🧠 [6] Tokenizer çıktılarını hazırladı, generate başlıyor...") outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.2, top_p=0.95, repetition_penalty=1.1, do_sample=True ) log("✅ [7] Generate tamamlandı, cevap dönülüyor.") answer = tokenizer.decode(outputs[0], skip_special_tokens=True) answer_clean = answer.split("Asistan:")[-1].strip() return {"response": answer_clean} except Exception as e: log(f"❌ [ERROR] /test sırasında hata: {e}") traceback.print_exc() raise HTTPException(status_code=500, detail=str(e)) @app.get("/") def health(): return {"status": "ok"} def run_health_server(): import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860) threading.Thread(target=run_health_server, daemon=True).start() log("⏸️ [0] Uygulama bekleme modunda, startup bekleniyor...") while True: time.sleep(60)