Spaces:

UcsTurkey
/

mistral7b

Paused

App Files Files Community

mistral7b / interence_test_with_intent_detection.py

ciyidogan

Update interence_test_with_intent_detection.py

ea55d0f verified 3 months ago

raw

history blame

9.57 kB

	# Fine-tune + Intent + LLM + System Prompt
	import os
	import json
	import re
	import torch
	import asyncio
	import shutil
	import zipfile
	import threading
	import uvicorn
	import time
	import traceback
	import random
	from fastapi import FastAPI, Request
	from fastapi.responses import JSONResponse, HTMLResponse
	from pydantic import BaseModel
	from datetime import datetime
	from datasets import Dataset
	from huggingface_hub import hf_hub_download
	from transformers import (
	AutoTokenizer,
	AutoModelForSequenceClassification,
	AutoModelForCausalLM,
	Trainer,
	TrainingArguments,
	pipeline
	)
	from peft import PeftModel

	HF_TOKEN = os.getenv("HF_TOKEN")
	MODEL_BASE = "malhajar/Mistral-7B-Instruct-v0.2-turkish"
	USE_FINE_TUNE = False
	FINE_TUNE_REPO = "UcsTurkey/trained-zips"
	FINE_TUNE_ZIP = "trained_model_000_009.zip"
	USE_SAMPLING = False
	CONFIDENCE_THRESHOLD = -1.5
	FALLBACK_ANSWERS = [
	"Bu konuda maalesef bilgim yok.",
	"Ne demek istediğinizi tam anlayamadım.",
	"Bu soruya şu an yanıt veremiyorum."
	]

	INTENT_MODEL_PATH = "intent_model"
	INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
	USE_CUDA = torch.cuda.is_available()
	INTENT_MODEL = None
	INTENT_TOKENIZER = None
	LABEL2ID = {}
	model = None
	tokenizer = None
	chat_history = []

	app = FastAPI()

	def log(msg):
	print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)

	def pattern_to_regex(pattern):
	return re.sub(r"\{(\w+?)\}", r"(?P<\1>.+?)", pattern)

	class ChatInput(BaseModel):
	user_input: str

	class TrainInput(BaseModel):
	intents: list

	@app.get("/")
	def health():
	return {"status": "ok"}

	@app.get("/start", response_class=HTMLResponse)
	def root():
	return """
	<html>
	<body>
	<h2>Mistral 7B Instruct Chat</h2>
	<textarea id="input" rows="4" cols="60" placeholder="Write your instruction..."></textarea><br>
	<button onclick="send()">Gönder</button><br><br>
	<label>Model Cevabı:</label><br>
	<textarea id="output" rows="10" cols="80" readonly style="white-space: pre-wrap;"></textarea>
	<script>
	async function send() {
	const input = document.getElementById("input").value;
	const res = await fetch('/chat', {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: JSON.stringify({ user_input: input })
	});
	const data = await res.json();
	document.getElementById('output').value = data.answer \|\| data.response \|\| data.error \|\| 'Hata oluştu.';
	}
	</script>
	</body>
	</html>
	"""

	@app.post("/train_intents")
	def train_intents(train_input: TrainInput):
	try:
	intents = train_input.intents
	log(f"🎯 Intent eğitimi başlatıldı. Intent sayısı: {len(intents)}")

	texts, labels = [], []
	label2id = {}
	for idx, intent in enumerate(intents):
	label2id[intent["name"]] = idx
	for ex in intent["examples"]:
	if "{" not in ex:
	texts.append(ex)
	labels.append(idx)

	dataset = Dataset.from_dict({"text": texts, "label": labels})

	tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
	model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID, num_labels=len(label2id))

	def tokenize(batch):
	return tokenizer(batch["text"], truncation=True, padding=True)

	tokenized = dataset.map(tokenize, batched=True)
	args = TrainingArguments("./intent_train_output", per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[])
	trainer = Trainer(model=model, args=args, train_dataset=tokenized)
	trainer.train()

	if os.path.exists(INTENT_MODEL_PATH):
	shutil.rmtree(INTENT_MODEL_PATH)
	model.save_pretrained(INTENT_MODEL_PATH)
	tokenizer.save_pretrained(INTENT_MODEL_PATH)
	with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
	json.dump(label2id, f)

	log("✅ Intent modeli kaydedildi.")
	return {"status": "ok", "message": "Intent modeli eğitildi ve kaydedildi."}

	except Exception as e:
	log(f"❌ Intent eğitimi hatası: {e}")
	return JSONResponse(content={"error": str(e)}, status_code=500)

	@app.post("/load_intent_model")
	def load_intent_model():
	global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
	try:
	if not os.path.exists(INTENT_MODEL_PATH):
	return JSONResponse(content={"error": "intent_model klasörü bulunamadı."}, status_code=400)

	INTENT_TOKENIZER = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH)
	INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH)
	with open(os.path.join(INTENT_MODEL_PATH, "label2id.json")) as f:
	LABEL2ID = json.load(f)
	log("✅ Intent modeli belleğe yüklendi.")
	return {"status": "ok", "message": "Intent modeli yüklendi."}

	except Exception as e:
	log(f"❌ Intent modeli yükleme hatası: {e}")
	return JSONResponse(content={"error": str(e)}, status_code=500)

	async def detect_intent(text):
	inputs = INTENT_TOKENIZER(text, return_tensors="pt")
	outputs = INTENT_MODEL(**inputs)
	pred_id = outputs.logits.argmax().item()
	id2label = {v: k for k, v in LABEL2ID.items()}
	return id2label[pred_id]

	async def generate_response(text):
	messages = [
	{"role": "system", "content": "Sen yardımcı bir Türkçe yapay zeka asistanısın. Soruları açık ve doğru şekilde yanıtla."},
	{"role": "user", "content": text}
	]
	inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
	inputs = {k: v.to(model.device) for k, v in inputs.items()}
	generate_args = {
	"max_new_tokens": 512,
	"return_dict_in_generate": True,
	"output_scores": True,
	"do_sample": USE_SAMPLING
	}
	if USE_SAMPLING:
	generate_args.update({"temperature": 0.7, "top_p": 0.9, "top_k": 50})

	with torch.no_grad():
	output = model.generate(inputs, generate_args)

	prompt_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
	decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True)
	answer = decoded.replace(prompt_text, "").strip()

	if output.scores and len(output.scores) > 0:
	first_token_score = output.scores[0][0]
	if torch.isnan(first_token_score).any() or torch.isinf(first_token_score).any():
	log("⚠️ Geçersiz logit (NaN/Inf) tespit edildi.")
	return random.choice(FALLBACK_ANSWERS)
	max_score = torch.max(first_token_score).item()
	log(f"🔍 İlk token skoru: {max_score:.4f}")
	if max_score < CONFIDENCE_THRESHOLD:
	return random.choice(FALLBACK_ANSWERS)

	return answer

	@app.post("/chat")
	async def chat(input: ChatInput):
	user_input = input.user_input.strip()
	try:
	if model is None or tokenizer is None:
	return {"error": "Model veya tokenizer henüz yüklenmedi."}

	if INTENT_MODEL:
	intent_task = asyncio.create_task(detect_intent(user_input))
	response_task = asyncio.create_task(generate_response(user_input))
	intent = await intent_task
	response = await response_task
	log(f"✅ Intent: {intent}")
	return {"intent": intent, "response": response}
	else:
	response = await generate_response(user_input)
	log("💬 Intent modeli yok, yalnızca LLM cevabı verildi.")
	return {"response": response}

	except Exception as e:
	log(f"❌ /chat hatası: {e}")
	traceback.print_exc()
	return JSONResponse(content={"error": str(e)}, status_code=500)

	def setup_model():
	global model, tokenizer
	try:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float32

	if USE_FINE_TUNE:
	log("📦 Fine-tune zip indiriliyor...")
	zip_path = hf_hub_download(repo_id=FINE_TUNE_REPO, filename=FINE_TUNE_ZIP, repo_type="model", token=HF_TOKEN)
	extract_dir = "/app/extracted"
	os.makedirs(extract_dir, exist_ok=True)
	with zipfile.ZipFile(zip_path, "r") as zip_ref:
	zip_ref.extractall(extract_dir)

	tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"), use_fast=False)
	base_model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device)
	model = PeftModel.from_pretrained(base_model, os.path.join(extract_dir, "output")).to(device)
	else:
	log("🧠 Ana model indiriliyor...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
	model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device)

	tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
	model.eval()
	log("✅ LLM model başarıyla yüklendi.")
	except Exception as e:
	log(f"❌ LLM model yükleme hatası: {e}")
	traceback.print_exc()

	def run():
	log("===== Application Startup =====")
	threading.Thread(target=setup_model, daemon=True).start()
	threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start()
	while True:
	time.sleep(60)

	# Uygulamayı çalıştır
	run()