import os import sys import time import threading import traceback from datetime import datetime from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # === Ortam değişkenleri os.environ.setdefault("HF_HOME", "/app/.cache") os.environ.setdefault("HF_HUB_CACHE", "/app/.cache") # === Zamanlı log fonksiyonu def log(message): timestamp = datetime.now().strftime("%H:%M:%S") print(f"[{timestamp}] {message}", flush=True) # === FastAPI başlat app = FastAPI() pipe = None @app.on_event("startup") def load_model(): global pipe try: model_name = "ytu-ce-cosmos/Turkish-Llama-8b-Instruct-v0.1" log(f"⬇️ Model yükleme başlatılıyor: {model_name}") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype="auto" ) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto") log("✅ Model ve pipeline başarıyla hazır.") except Exception as e: log(f"❌ Model yükleme hatası: {e}") traceback.print_exc() raise class UserInputRequest(BaseModel): user_input: str system_prompt: str @app.post("/generate") def generate(req: UserInputRequest): try: start_time = time.time() log(f"💬 Kullanıcı isteği alındı: {req.user_input}") # Kısa ve net system prompt, okunabilir bölünmüş concise_system_prompt = ( f"{req.system_prompt}\n" "❗ Cevaplarını sadece aşağıdaki formatta döndür, fazladan açıklama yazma, örnek ekleme:\n" "#ANSWER: \n" "#INTENT: \n" "#PARAMS: {...}\n" "#MISSING: [...]\n" "#ACTION_JSON: {...}\n" "Şimdi sadece kullanıcının sorusunu bekliyorsun ve formatlı cevap veriyorsun." ) # Role separation: System, User, Assistant blokları full_prompt = ( f"### System:\n{concise_system_prompt}\n\n" f"### User:\n{req.user_input}\n\n" f"### Assistant:" ) result = pipe( full_prompt, max_new_tokens=200, temperature=0.0, top_p=1.0, repetition_penalty=1.0, do_sample=False ) answer = result[0]["generated_text"] end_time = time.time() elapsed = end_time - start_time log(f"✅ Yanıt süresi: {elapsed:.2f} saniye") return {"response": answer} except Exception as e: log(f"❌ /generate hatası: {e}") traceback.print_exc() raise HTTPException(status_code=500, detail=str(e)) @app.get("/") def health(): return {"status": "ok"} def run_health_server(): import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860) threading.Thread(target=run_health_server, daemon=True).start() log("⏸️ Uygulama bekleme modunda...") while True: time.sleep(60)