import os, torch, zipfile, threading, uvicorn from fastapi import FastAPI from fastapi.responses import HTMLResponse, JSONResponse from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel from huggingface_hub import hf_hub_download from datetime import datetime import random # === Sabitler === HF_TOKEN = os.environ.get("HF_TOKEN") MODEL_BASE = "mistralai/Mistral-7B-Instruct-v0.2" FINE_TUNE_ZIP = "trained_model_000_009.zip" FINE_TUNE_REPO = "UcsTurkey/trained-zips" USE_SAMPLING = False CONFIDENCE_THRESHOLD = -1.5 FALLBACK_ANSWERS = [ "Bu konuda maalesef bilgim yok.", "Ne demek istediğinizi tam anlayamadım.", "Bu soruya şu an yanıt veremiyorum." ] # === Log def log(message): timestamp = datetime.now().strftime("%H:%M:%S") print(f"[{timestamp}] {message}") os.sys.stdout.flush() # === FastAPI app = FastAPI() chat_history = [] model = None tokenizer = None class Message(BaseModel): user_input: str @app.get("/") def health(): return {"status": "ok"} @app.get("/start", response_class=HTMLResponse) def root(): return """

Mistral 7B Chat



        
    
    
    """

@app.post("/chat")
def chat(msg: Message):
    global model, tokenizer
    try:
        if model is None:
            return {"error": "Model yüklenmedi"}
        user_input = msg.user_input.strip()
        if not user_input:
            return {"error": "Boş giriş"}
        prompt = f"SORU: {user_input}\nCEVAP:"
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=128,
                do_sample=USE_SAMPLING,
                temperature=0.7 if USE_SAMPLING else None,
                top_p=0.9 if USE_SAMPLING else None,
                top_k=50 if USE_SAMPLING else None,
                return_dict_in_generate=True,
                output_scores=True,
                suppress_tokens=[tokenizer.pad_token_id]
            )
        decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True)
        answer = decoded[len(prompt):].strip()

        if output.scores and len(output.scores) > 0:
            first_token_score = output.scores[0][0]
            if torch.isnan(first_token_score).any() or torch.isinf(first_token_score).any():
                log("⚠️ Geçersiz logit (NaN/Inf) tespit edildi.")
                return {"answer": random.choice(FALLBACK_ANSWERS)}
            max_score = torch.max(first_token_score).item()
            log(f"🔍 İlk token skoru: {max_score:.4f}")
            if max_score < CONFIDENCE_THRESHOLD:
                answer = random.choice(FALLBACK_ANSWERS)

        chat_history.append({"user": user_input, "bot": answer})
        log(f"Soru: {user_input} → Cevap: {answer[:60]}...")
        return {"answer": answer, "chat_history": chat_history}
    except Exception as e:
        log(f"❌ /chat hatası: {e}")
        return {"error": str(e)}

def detect_env():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    supports_bf16 = torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8
    return device, supports_bf16

def setup_model():
    global model, tokenizer
    try:
        log("📦 Zip indiriliyor...")
        zip_path = hf_hub_download(
            repo_id=FINE_TUNE_REPO,
            filename=FINE_TUNE_ZIP,
            repo_type="model",
            token=HF_TOKEN
        )
        extract_path = "/app/extracted"
        os.makedirs(extract_path, exist_ok=True)
        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(extract_path)
        tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_path, "output"))
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        device, supports_bf16 = detect_env()
        dtype = torch.bfloat16 if supports_bf16 else torch.float32
        log(f"🧠 Ortam: {device.upper()}, dtype: {dtype}")
        base = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device)
        peft = PeftModel.from_pretrained(base, os.path.join(extract_path, "output"))
        model = peft.model.to(device)
        model.eval()
        log("✅ Model yüklendi.")
    except Exception as e:
        log(f"❌ Model setup hatası: {e}")

def run_server():
    log("🌐 Uvicorn başlatılıyor...")
    uvicorn.run(app, host="0.0.0.0", port=7860)

log("🚀 Başlatılıyor...")
threading.Thread(target=setup_model, daemon=True).start()
threading.Thread(target=run_server, daemon=True).start()
while True:
    time.sleep(60)