|
|
|
import os |
|
import json |
|
import re |
|
import torch |
|
import asyncio |
|
import shutil |
|
import zipfile |
|
import threading |
|
import uvicorn |
|
import time |
|
import traceback |
|
import random |
|
from fastapi import FastAPI, Request |
|
from fastapi.responses import JSONResponse, HTMLResponse |
|
from pydantic import BaseModel |
|
from datetime import datetime |
|
from datasets import Dataset |
|
from huggingface_hub import hf_hub_download |
|
from transformers import ( |
|
AutoTokenizer, |
|
AutoModelForSequenceClassification, |
|
AutoModelForCausalLM, |
|
Trainer, |
|
TrainingArguments, |
|
pipeline |
|
) |
|
from peft import PeftModel |
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
MODEL_BASE = "malhajar/Mistral-7B-Instruct-v0.2-turkish" |
|
USE_FINE_TUNE = False |
|
FINE_TUNE_REPO = "UcsTurkey/trained-zips" |
|
FINE_TUNE_ZIP = "trained_model_000_009.zip" |
|
USE_SAMPLING = False |
|
CONFIDENCE_THRESHOLD = -1.5 |
|
FALLBACK_ANSWERS = [ |
|
"Bu konuda maalesef bilgim yok.", |
|
"Ne demek istediğinizi tam anlayamadım.", |
|
"Bu soruya şu an yanıt veremiyorum." |
|
] |
|
|
|
INTENT_MODEL_PATH = "intent_model" |
|
INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased" |
|
USE_CUDA = torch.cuda.is_available() |
|
INTENT_MODEL = None |
|
INTENT_TOKENIZER = None |
|
LABEL2ID = {} |
|
model = None |
|
tokenizer = None |
|
chat_history = [] |
|
|
|
app = FastAPI() |
|
|
|
def log(msg): |
|
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True) |
|
|
|
def pattern_to_regex(pattern): |
|
return re.sub(r"\{(\w+?)\}", r"(?P<\1>.+?)", pattern) |
|
|
|
class ChatInput(BaseModel): |
|
user_input: str |
|
|
|
class TrainInput(BaseModel): |
|
intents: list |
|
|
|
@app.get("/") |
|
def health(): |
|
return {"status": "ok"} |
|
|
|
@app.get("/start", response_class=HTMLResponse) |
|
def root(): |
|
return """ |
|
<html> |
|
<body> |
|
<h2>Mistral 7B Instruct Chat</h2> |
|
<textarea id="input" rows="4" cols="60" placeholder="Write your instruction..."></textarea><br> |
|
<button onclick="send()">Gönder</button><br><br> |
|
<label>Model Cevabı:</label><br> |
|
<textarea id="output" rows="10" cols="80" readonly style="white-space: pre-wrap;"></textarea> |
|
<script> |
|
async function send() { |
|
const input = document.getElementById("input").value; |
|
const res = await fetch('/chat', { |
|
method: 'POST', |
|
headers: { 'Content-Type': 'application/json' }, |
|
body: JSON.stringify({ user_input: input }) |
|
}); |
|
const data = await res.json(); |
|
document.getElementById('output').value = data.answer || data.response || data.error || 'Hata oluştu.'; |
|
} |
|
</script> |
|
</body> |
|
</html> |
|
""" |
|
|
|
@app.post("/train_intents") |
|
def train_intents(train_input: TrainInput): |
|
try: |
|
intents = train_input.intents |
|
log(f"🎯 Intent eğitimi başlatıldı. Intent sayısı: {len(intents)}") |
|
|
|
texts, labels = [], [] |
|
label2id = {} |
|
for idx, intent in enumerate(intents): |
|
label2id[intent["name"]] = idx |
|
for ex in intent["examples"]: |
|
if "{" not in ex: |
|
texts.append(ex) |
|
labels.append(idx) |
|
|
|
dataset = Dataset.from_dict({"text": texts, "label": labels}) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID) |
|
model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, num_labels=len(label2id)) |
|
|
|
def tokenize(batch): |
|
return tokenizer(batch["text"], truncation=True, padding=True) |
|
|
|
tokenized = dataset.map(tokenize, batched=True) |
|
args = TrainingArguments("./intent_train_output", per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]) |
|
trainer = Trainer(model=model, args=args, train_dataset=tokenized) |
|
trainer.train() |
|
|
|
if os.path.exists(INTENT_MODEL_PATH): |
|
shutil.rmtree(INTENT_MODEL_PATH) |
|
model.save_pretrained(INTENT_MODEL_PATH) |
|
tokenizer.save_pretrained(INTENT_MODEL_PATH) |
|
with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f: |
|
json.dump(label2id, f) |
|
|
|
log("✅ Intent modeli kaydedildi.") |
|
return {"status": "ok", "message": "Intent modeli eğitildi ve kaydedildi."} |
|
|
|
except Exception as e: |
|
log(f"❌ Intent eğitimi hatası: {e}") |
|
return JSONResponse(content={"error": str(e)}, status_code=500) |
|
|
|
@app.post("/load_intent_model") |
|
def load_intent_model(): |
|
global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID |
|
try: |
|
if not os.path.exists(INTENT_MODEL_PATH): |
|
return JSONResponse(content={"error": "intent_model klasörü bulunamadı."}, status_code=400) |
|
|
|
INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH) |
|
INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH) |
|
with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f: |
|
LABEL2ID = json.load(f) |
|
log("✅ Intent modeli belleğe yüklendi.") |
|
return {"status": "ok", "message": "Intent modeli yüklendi."} |
|
|
|
except Exception as e: |
|
log(f"❌ Intent modeli yükleme hatası: {e}") |
|
return JSONResponse(content={"error": str(e)}, status_code=500) |
|
|
|
async def detect_intent(text): |
|
inputs = INTENT_TOKENIZER(text, return_tensors="pt") |
|
outputs = INTENT_MODEL(**inputs) |
|
pred_id = outputs.logits.argmax().item() |
|
id2label = {v: k for k, v in LABEL2ID.items()} |
|
return id2label[pred_id] |
|
|
|
async def generate_response(text): |
|
messages = [ |
|
{"role": "system", "content": "Sen yardımcı bir Türkçe yapay zeka asistanısın. Soruları açık ve doğru şekilde yanıtla."}, |
|
{"role": "user", "content": text} |
|
] |
|
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True) |
|
inputs = {k: v.to(model.device) for k, v in inputs.items()} |
|
generate_args = { |
|
"max_new_tokens": 512, |
|
"return_dict_in_generate": True, |
|
"output_scores": True, |
|
"do_sample": USE_SAMPLING |
|
} |
|
if USE_SAMPLING: |
|
generate_args.update({"temperature": 0.7, "top_p": 0.9, "top_k": 50}) |
|
|
|
with torch.no_grad(): |
|
output = model.generate(**inputs, **generate_args) |
|
|
|
prompt_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True) |
|
decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True) |
|
answer = decoded.replace(prompt_text, "").strip() |
|
|
|
if output.scores and len(output.scores) > 0: |
|
first_token_score = output.scores[0][0] |
|
if torch.isnan(first_token_score).any() or torch.isinf(first_token_score).any(): |
|
log("⚠️ Geçersiz logit (NaN/Inf) tespit edildi.") |
|
return random.choice(FALLBACK_ANSWERS) |
|
max_score = torch.max(first_token_score).item() |
|
log(f"🔍 İlk token skoru: {max_score:.4f}") |
|
if max_score < CONFIDENCE_THRESHOLD: |
|
return random.choice(FALLBACK_ANSWERS) |
|
|
|
return answer |
|
|
|
@app.post("/chat") |
|
async def chat(input: ChatInput): |
|
user_input = input.user_input.strip() |
|
try: |
|
if model is None or tokenizer is None: |
|
return {"error": "Model veya tokenizer henüz yüklenmedi."} |
|
|
|
if INTENT_MODEL: |
|
intent_task = asyncio.create_task(detect_intent(user_input)) |
|
response_task = asyncio.create_task(generate_response(user_input)) |
|
intent = await intent_task |
|
response = await response_task |
|
log(f"✅ Intent: {intent}") |
|
return {"intent": intent, "response": response} |
|
else: |
|
response = await generate_response(user_input) |
|
log("💬 Intent modeli yok, yalnızca LLM cevabı verildi.") |
|
return {"response": response} |
|
|
|
except Exception as e: |
|
log(f"❌ /chat hatası: {e}") |
|
traceback.print_exc() |
|
return JSONResponse(content={"error": str(e)}, status_code=500) |
|
|
|
def setup_model(): |
|
global model, tokenizer |
|
try: |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
dtype = torch.float32 |
|
|
|
if USE_FINE_TUNE: |
|
log("📦 Fine-tune zip indiriliyor...") |
|
zip_path = hf_hub_download(repo_id=FINE_TUNE_REPO, filename=FINE_TUNE_ZIP, repo_type="model", token=HF_TOKEN) |
|
extract_dir = "/app/extracted" |
|
os.makedirs(extract_dir, exist_ok=True) |
|
with zipfile.ZipFile(zip_path, "r") as zip_ref: |
|
zip_ref.extractall(extract_dir) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"), use_fast=False) |
|
base_model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device) |
|
model = PeftModel.from_pretrained(base_model, os.path.join(extract_dir, "output")).to(device) |
|
else: |
|
log("🧠 Ana model indiriliyor...") |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False) |
|
model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device) |
|
|
|
tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token |
|
model.eval() |
|
log("✅ LLM model başarıyla yüklendi.") |
|
except Exception as e: |
|
log(f"❌ LLM model yükleme hatası: {e}") |
|
traceback.print_exc() |
|
|
|
def run(): |
|
log("===== Application Startup =====") |
|
threading.Thread(target=setup_model, daemon=True).start() |
|
threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start() |
|
while True: |
|
time.sleep(60) |
|
|
|
|
|
run() |
|
|