import os
import threading
import uvicorn
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, JSONResponse
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
from datasets import load_dataset
from peft import PeftModel
import torch
from huggingface_hub import hf_hub_download
import zipfile
from datetime import datetime
# ✅ Zamanlı log fonksiyonu (flush destekli)
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}")
os.sys.stdout.flush()
# ✅ Sabitler
HF_TOKEN = os.environ.get("HF_TOKEN")
MODEL_BASE = "UcsTurkey/kanarya-750m-fixed"
FINE_TUNE_ZIP = "trained_model_002_005.zip"
FINE_TUNE_REPO = "UcsTurkey/trained-zips"
RAG_DATA_FILE = "merged_dataset_000_100.parquet"
RAG_DATA_REPO = "UcsTurkey/turkish-general-culture-tokenized"
USE_RAG = False # ✅ RAG kullanımını opsiyonel hale getiren sabit
app = FastAPI()
chat_history = []
pipe = None # global text-generation pipeline
class Message(BaseModel):
user_input: str
@app.get("/health")
def health():
return {"status": "ok"}
@app.get("/start", response_class=HTMLResponse)
def root():
return """
Fine-Tune Chat
📘 Fine-tune Chat Test
"""
@app.post("/chat")
def chat(msg: Message):
try:
global pipe
if pipe is None:
log("🚫 Hata: Model henüz yüklenmedi.")
return {"error": "Model yüklenmedi. Lütfen birkaç saniye sonra tekrar deneyin."}
user_input = msg.user_input.strip()
if not user_input:
return {"error": "Boş giriş"}
full_prompt = ""
for turn in chat_history:
full_prompt += f"Kullanıcı: {turn['user']}\nAsistan: {turn['bot']}\n"
full_prompt += f"Kullanıcı: {user_input}\nAsistan:"
result = pipe(full_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
answer = result[0]["generated_text"][len(full_prompt):].strip()
chat_history.append({"user": user_input, "bot": answer})
log(f"🗨️ Soru: {user_input} → Yanıt: {answer[:60]}...")
return {"answer": answer, "chat_history": chat_history}
except Exception as e:
log(f"❌ /chat sırasında hata oluştu: {e}")
return {"error": str(e)}
def setup_model():
try:
global pipe
log("📦 Fine-tune zip indiriliyor...")
zip_path = hf_hub_download(
repo_id=FINE_TUNE_REPO,
filename=FINE_TUNE_ZIP,
repo_type="model",
token=HF_TOKEN
)
extract_dir = "/app/extracted"
os.makedirs(extract_dir, exist_ok=True)
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(extract_dir)
log("📂 Zip başarıyla açıldı.")
log("🔁 Tokenizer yükleniyor...")
tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"))
log("🧠 Base model indiriliyor...")
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_BASE,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
log("➕ LoRA adapter uygulanıyor...")
peft_model = PeftModel.from_pretrained(base_model, os.path.join(extract_dir, "output"))
if USE_RAG:
log("📚 RAG dataseti yükleniyor...")
rag = load_dataset(
RAG_DATA_REPO,
data_files=RAG_DATA_FILE,
split="train",
token=HF_TOKEN
)
log(f"🔍 RAG boyutu: {len(rag)}")
log("🚀 Pipeline oluşturuluyor...")
pipe = TextGenerationPipeline(
model=peft_model.model,
tokenizer=tokenizer,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device=0 if torch.cuda.is_available() else -1
)
log("✅ Model ve pipeline başarıyla yüklendi.")
except Exception as e:
log(f"❌ setup_model() sırasında hata oluştu: {e}")
# ✅ Uygulama başlangıcı
threading.Thread(target=setup_model, daemon=True).start()
log("⌛ Model yükleniyor, istekler için hazır olunacak...")
while True:
try:
import time
time.sleep(60)
except Exception as e:
log(f"❌ Ana bekleme döngüsünde hata: {e}")