# Fine-tune + Intent + LLM + System Prompt import os import json import re import torch import asyncio import shutil import zipfile import threading import uvicorn import time import traceback import random from fastapi import FastAPI, Request from fastapi.responses import JSONResponse, HTMLResponse from pydantic import BaseModel from datetime import datetime from datasets import Dataset from huggingface_hub import hf_hub_download from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline ) from peft import PeftModel HF_TOKEN = os.getenv("HF_TOKEN") MODEL_BASE = "malhajar/Mistral-7B-Instruct-v0.2-turkish" USE_FINE_TUNE = False FINE_TUNE_REPO = "UcsTurkey/trained-zips" FINE_TUNE_ZIP = "trained_model_000_009.zip" USE_SAMPLING = False CONFIDENCE_THRESHOLD = -1.5 FALLBACK_ANSWERS = [ "Bu konuda maalesef bilgim yok.", "Ne demek istediğinizi tam anlayamadım.", "Bu soruya şu an yanıt veremiyorum." ] INTENT_MODEL_PATH = "intent_model" INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased" USE_CUDA = torch.cuda.is_available() INTENT_MODEL = None INTENT_TOKENIZER = None LABEL2ID = {} model = None tokenizer = None chat_history = [] app = FastAPI() def log(msg): print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True) def pattern_to_regex(pattern): return re.sub(r"\{(\w+?)\}", r"(?P<\1>.+?)", pattern) class ChatInput(BaseModel): user_input: str class TrainInput(BaseModel): intents: list @app.get("/") def health(): return {"status": "ok"} @app.get("/start", response_class=HTMLResponse) def root(): return """

Mistral 7B Instruct Chat





""" @app.post("/train_intents") def train_intents(train_input: TrainInput): try: intents = train_input.intents log(f"🎯 Intent eğitimi başlatıldı. Intent sayısı: {len(intents)}") texts, labels = [], [] label2id = {} for idx, intent in enumerate(intents): label2id[intent["name"]] = idx for ex in intent["examples"]: if "{" not in ex: texts.append(ex) labels.append(idx) dataset = Dataset.from_dict({"text": texts, "label": labels}) tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID) model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, num_labels=len(label2id)) def tokenize(batch): return tokenizer(batch["text"], truncation=True, padding=True) tokenized = dataset.map(tokenize, batched=True) args = TrainingArguments("./intent_train_output", per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]) trainer = Trainer(model=model, args=args, train_dataset=tokenized) trainer.train() if os.path.exists(INTENT_MODEL_PATH): shutil.rmtree(INTENT_MODEL_PATH) model.save_pretrained(INTENT_MODEL_PATH) tokenizer.save_pretrained(INTENT_MODEL_PATH) with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f: json.dump(label2id, f) log("✅ Intent modeli kaydedildi.") return {"status": "ok", "message": "Intent modeli eğitildi ve kaydedildi."} except Exception as e: log(f"❌ Intent eğitimi hatası: {e}") return JSONResponse(content={"error": str(e)}, status_code=500) @app.post("/load_intent_model") def load_intent_model(): global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID try: if not os.path.exists(INTENT_MODEL_PATH): return JSONResponse(content={"error": "intent_model klasörü bulunamadı."}, status_code=400) INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH) INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH) with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f: LABEL2ID = json.load(f) log("✅ Intent modeli belleğe yüklendi.") return {"status": "ok", "message": "Intent modeli yüklendi."} except Exception as e: log(f"❌ Intent modeli yükleme hatası: {e}") return JSONResponse(content={"error": str(e)}, status_code=500) async def detect_intent(text): inputs = INTENT_TOKENIZER(text, return_tensors="pt") outputs = INTENT_MODEL(**inputs) pred_id = outputs.logits.argmax().item() id2label = {v: k for k, v in LABEL2ID.items()} return id2label[pred_id] async def generate_response(text): messages = [ {"role": "system", "content": "Sen yardımcı bir Türkçe yapay zeka asistanısın. Soruları açık ve doğru şekilde yanıtla."}, {"role": "user", "content": text} ] inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True) inputs = {k: v.to(model.device) for k, v in inputs.items()} generate_args = { "max_new_tokens": 512, "return_dict_in_generate": True, "output_scores": True, "do_sample": USE_SAMPLING } if USE_SAMPLING: generate_args.update({"temperature": 0.7, "top_p": 0.9, "top_k": 50}) with torch.no_grad(): output = model.generate(**inputs, **generate_args) prompt_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True) decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True) answer = decoded.replace(prompt_text, "").strip() if output.scores and len(output.scores) > 0: first_token_score = output.scores[0][0] if torch.isnan(first_token_score).any() or torch.isinf(first_token_score).any(): log("⚠️ Geçersiz logit (NaN/Inf) tespit edildi.") return random.choice(FALLBACK_ANSWERS) max_score = torch.max(first_token_score).item() log(f"🔍 İlk token skoru: {max_score:.4f}") if max_score < CONFIDENCE_THRESHOLD: return random.choice(FALLBACK_ANSWERS) return answer @app.post("/chat") async def chat(input: ChatInput): user_input = input.user_input.strip() try: if model is None or tokenizer is None: return {"error": "Model veya tokenizer henüz yüklenmedi."} if INTENT_MODEL: intent_task = asyncio.create_task(detect_intent(user_input)) response_task = asyncio.create_task(generate_response(user_input)) intent = await intent_task response = await response_task log(f"✅ Intent: {intent}") return {"intent": intent, "response": response} else: response = await generate_response(user_input) log("💬 Intent modeli yok, yalnızca LLM cevabı verildi.") return {"response": response} except Exception as e: log(f"❌ /chat hatası: {e}") traceback.print_exc() return JSONResponse(content={"error": str(e)}, status_code=500) def setup_model(): global model, tokenizer try: device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float32 if USE_FINE_TUNE: log("📦 Fine-tune zip indiriliyor...") zip_path = hf_hub_download(repo_id=FINE_TUNE_REPO, filename=FINE_TUNE_ZIP, repo_type="model", token=HF_TOKEN) extract_dir = "/app/extracted" os.makedirs(extract_dir, exist_ok=True) with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(extract_dir) tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"), use_fast=False) base_model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device) model = PeftModel.from_pretrained(base_model, os.path.join(extract_dir, "output")).to(device) else: log("🧠 Ana model indiriliyor...") tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False) model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device) tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token model.eval() log("✅ LLM model başarıyla yüklendi.") except Exception as e: log(f"❌ LLM model yükleme hatası: {e}") traceback.print_exc() def run(): log("===== Application Startup =====") threading.Thread(target=setup_model, daemon=True).start() threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start() while True: time.sleep(60) # Uygulamayı çalıştır run()