import os import sys import time import threading import traceback from datetime import datetime from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForCausalLM # === Ortam değişkenleri os.environ.setdefault("HF_HOME", "/app/.cache") os.environ.setdefault("HF_HUB_CACHE", "/app/.cache") # === Zamanlı log fonksiyonu def log(message): timestamp = datetime.now().strftime("%H:%M:%S") print(f"[{timestamp}] {message}", flush=True) # === FastAPI başlat app = FastAPI() tokenizer = None model = None @app.on_event("startup") def load_model(): global tokenizer, model try: model_name = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1" log(f"⬇️ Model yükleme başlatılıyor: {model_name}") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype="auto", # A100 için bf16 device_map="auto" ) log("✅ Model ve tokenizer başarıyla hazır.") except Exception as e: log(f"❌ Model yükleme hatası: {e}") traceback.print_exc() raise class UserInputRequest(BaseModel): user_input: str system_prompt: str @app.post("/generate") def generate(req: UserInputRequest): try: start_time = time.time() log(f"💬 Kullanıcı isteği alındı: {req.user_input}") messages = [ {"role": "system", "content": req.system_prompt}, {"role": "user", "content": req.user_input} ] input_ids = tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt" ).to(model.device) terminators = [ tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = model.generate( input_ids, max_new_tokens=200, eos_token_id=terminators, do_sample=False, temperature=0.0, top_p=1.0, repetition_penalty=1.0 ) response = outputs[0][input_ids.shape[-1]:] answer = tokenizer.decode(response, skip_special_tokens=True) end_time = time.time() elapsed = end_time - start_time log(f"✅ Yanıt süresi: {elapsed:.2f} saniye") return {"response": answer} except Exception as e: log(f"❌ /generate hatası: {e}") traceback.print_exc() raise HTTPException(status_code=500, detail=str(e)) @app.get("/") def health(): return {"status": "ok"} def run_health_server(): import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860) threading.Thread(target=run_health_server, daemon=True).start() log("⏸️ Uygulama bekleme modunda...") while True: time.sleep(60)