Spaces:

UcsTurkey
/

mistral7b

Paused

App Files Files Community

ciyidogan commited on May 22

Commit

55daf79

verified ·

1 Parent(s): acfdca9

Update inference_test_turkcell_with_intents.py

Browse files

Files changed (1) hide show

inference_test_turkcell_with_intents.py +47 -49

inference_test_turkcell_with_intents.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# fine_tune_inference_with_intent.py
 import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse, JSONResponse
@@ -7,6 +6,10 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequen
 from peft import PeftModel
 from datasets import Dataset
 from datetime import datetime
 # === Ortam
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -20,6 +23,10 @@ USE_FINE_TUNE = False
 FINE_TUNE_REPO = "UcsTurkey/trained-zips"
 FINE_TUNE_ZIP = "trained_model_000_009.zip"
 USE_SAMPLING = False
 INTENT_CONFIDENCE_THRESHOLD = 0.5
 LLM_CONFIDENCE_THRESHOLD = 0.2
 TRAIN_CONFIDENCE_THRESHOLD = 0.7
@@ -29,19 +36,22 @@ FALLBACK_ANSWERS = [
     "Bu soruya şu an yanıt veremiyorum."
 ]
 INTENT_MODEL_PATH = "intent_model"
 INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
 INTENT_MODEL = None
 INTENT_TOKENIZER = None
 LABEL2ID = {}
 INTENT_DEFINITIONS = {}
-# === FastAPI
-app = FastAPI()
-chat_history = []
 model = None
 tokenizer = None
 eos_token_id = None
 class Message(BaseModel):
     user_input: str
@@ -95,7 +105,6 @@ def background_training(intents):
             for ex in intent["examples"]:
                 texts.append(ex)
                 labels.append(idx)
         dataset = Dataset.from_dict({"text": texts, "label": labels})
         tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
         config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
@@ -109,7 +118,6 @@ def background_training(intents):
             tokenized_data["input_ids"].append(out["input_ids"])
             tokenized_data["attention_mask"].append(out["attention_mask"])
             tokenized_data["label"].append(row["label"])
         tokenized = Dataset.from_dict(tokenized_data)
         tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
@@ -123,7 +131,7 @@ def background_training(intents):
         )
         trainer.train()
-        # ✅ Başarı raporu üret
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model.to(device)
         input_ids_tensor = tokenized["input_ids"].to(device)
@@ -134,8 +142,7 @@ def background_training(intents):
             predictions = outputs.logits.argmax(dim=-1).tolist()
         actuals = tokenized["label"]
-        counts = {}
-        correct = {}
         for pred, actual in zip(predictions, actuals):
             intent = list(label2id.keys())[list(label2id.values()).index(actual)]
             counts[intent] = counts.get(intent, 0) + 1
@@ -153,9 +160,7 @@ def background_training(intents):
         tokenizer.save_pretrained(INTENT_MODEL_PATH)
         with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
             json.dump(label2id, f)
         log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
     except Exception as e:
         log(f"❌ Intent eğitimi hatası: {e}")
         traceback.print_exc()
@@ -186,7 +191,6 @@ async def generate_response(text):
     eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
     input_ids = encodeds.to(model.device)
     attention_mask = (input_ids != tokenizer.pad_token_id).long()
     with torch.no_grad():
         output = model.generate(
             input_ids=input_ids,
@@ -198,14 +202,11 @@ async def generate_response(text):
             return_dict_in_generate=True,
             output_scores=True
         )
     if not USE_SAMPLING:
         scores = torch.stack(output.scores, dim=1)
         probs = torch.nn.functional.softmax(scores[0], dim=-1)
         top_conf = probs.max().item()
-    else:
-        top_conf = None
     decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
     for tag in ["assistant", "<|im_start|>assistant"]:
         start = decoded.find(tag)
@@ -214,45 +215,40 @@ async def generate_response(text):
             break
     return decoded, top_conf
-def extract_parameters(variables_list, user_input):
-    for pattern in variables_list:
-        regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
-        match = re.match(regex, user_input)
-        if match:
-            return [{"key": k, "value": v} for k, v in match.groupdict().items()]
-    return []
-def execute_intent(intent_name, user_input):
-    if intent_name in INTENT_DEFINITIONS:
-        definition = INTENT_DEFINITIONS[intent_name]
-        variables = extract_parameters(definition.get("variables", []), user_input)
-        log(f"🚀 execute_intent('{intent_name}', {variables})")
-        return {"intent": intent_name, "parameters": variables}
-    return {"intent": intent_name, "parameters": []}
 @app.post("/chat")
 async def chat(msg: Message):
     user_input = msg.user_input.strip()
     try:
-        if model is None or tokenizer is None:
-            return {"error": "Model yüklenmedi."}
         if INTENT_MODEL:
             intent_task = asyncio.create_task(detect_intent(user_input))
             response_task = asyncio.create_task(generate_response(user_input))
             intent, intent_conf = await intent_task
             log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
             if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
-                result = execute_intent(intent, user_input)
-                return result
-            else:
-                response, response_conf = await response_task
-                if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
-                    return {"response": random.choice(FALLBACK_ANSWERS)}
-                return {"response": response}
         else:
             response, response_conf = await generate_response(user_input)
             if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
                 return {"response": random.choice(FALLBACK_ANSWERS)}
             return {"response": response}
     except Exception as e:
@@ -264,25 +260,27 @@ def log(message):
     print(f"[{timestamp}] {message}", flush=True)
 def setup_model():
-    global model, tokenizer, eos_token_id
     try:
         log("🧠 setup_model() başladı")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         log(f"📡 Kullanılan cihaz: {device}")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
-        log("📦 Tokenizer yüklendi.")
         model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
-        log("📦 Model indirildi ve yüklendi.")
         tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
         model.config.pad_token_id = tokenizer.pad_token_id
         eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
         model.eval()
-        log("✅ Ana model eval() çağrıldı")
-        log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}")
         _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
         _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
-        log("✅ Intent modeli indirildi (önbelleğe alındı).")
-        log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
     except Exception as e:
         log(f"❌ setup_model() hatası: {e}")
         traceback.print_exc()

 import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse, JSONResponse
 from peft import PeftModel
 from datasets import Dataset
 from datetime import datetime
+import faiss
+import numpy as np
+import pandas as pd
+from sentence_transformers import SentenceTransformer
 # === Ortam
 HF_TOKEN = os.getenv("HF_TOKEN")
 FINE_TUNE_REPO = "UcsTurkey/trained-zips"
 FINE_TUNE_ZIP = "trained_model_000_009.zip"
 USE_SAMPLING = False
+USE_RAG = True
+RAG_INDEX_PATH = "/app/faiss/faiss_index_000_100.index"
+RAG_METADATA_PATH = "/app/faiss/faiss_index_000_100_metadata.parquet"
+RAG_EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
 INTENT_CONFIDENCE_THRESHOLD = 0.5
 LLM_CONFIDENCE_THRESHOLD = 0.2
 TRAIN_CONFIDENCE_THRESHOLD = 0.7
     "Bu soruya şu an yanıt veremiyorum."
 ]
+# === Global Değişkenler
 INTENT_MODEL_PATH = "intent_model"
 INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
 INTENT_MODEL = None
 INTENT_TOKENIZER = None
 LABEL2ID = {}
 INTENT_DEFINITIONS = {}
 model = None
 tokenizer = None
 eos_token_id = None
+faiss_index = None
+rag_metadata = None
+rag_embedder = None
+# === FastAPI
+app = FastAPI()
 class Message(BaseModel):
     user_input: str
             for ex in intent["examples"]:
                 texts.append(ex)
                 labels.append(idx)
         dataset = Dataset.from_dict({"text": texts, "label": labels})
         tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
         config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
             tokenized_data["input_ids"].append(out["input_ids"])
             tokenized_data["attention_mask"].append(out["attention_mask"])
             tokenized_data["label"].append(row["label"])
         tokenized = Dataset.from_dict(tokenized_data)
         tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
         )
         trainer.train()
+        log("🔧 Başarı raporu üretiliyor...")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model.to(device)
         input_ids_tensor = tokenized["input_ids"].to(device)
             predictions = outputs.logits.argmax(dim=-1).tolist()
         actuals = tokenized["label"]
+        counts, correct = {}, {}
         for pred, actual in zip(predictions, actuals):
             intent = list(label2id.keys())[list(label2id.values()).index(actual)]
             counts[intent] = counts.get(intent, 0) + 1
         tokenizer.save_pretrained(INTENT_MODEL_PATH)
         with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
             json.dump(label2id, f)
         log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
     except Exception as e:
         log(f"❌ Intent eğitimi hatası: {e}")
         traceback.print_exc()
     eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
     input_ids = encodeds.to(model.device)
     attention_mask = (input_ids != tokenizer.pad_token_id).long()
     with torch.no_grad():
         output = model.generate(
             input_ids=input_ids,
             return_dict_in_generate=True,
             output_scores=True
         )
+    top_conf = None
     if not USE_SAMPLING:
         scores = torch.stack(output.scores, dim=1)
         probs = torch.nn.functional.softmax(scores[0], dim=-1)
         top_conf = probs.max().item()
     decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
     for tag in ["assistant", "<|im_start|>assistant"]:
         start = decoded.find(tag)
             break
     return decoded, top_conf
+def search_rag(query, top_k=1):
+    if faiss_index is None or rag_metadata is None:
+        return None
+    emb = rag_embedder.encode([query], convert_to_numpy=True)
+    D, I = faiss_index.search(emb, top_k)
+    results = rag_metadata.iloc[I[0]]
+    return results.iloc[0]["output"] if not results.empty else None
 @app.post("/chat")
 async def chat(msg: Message):
     user_input = msg.user_input.strip()
     try:
         if INTENT_MODEL:
             intent_task = asyncio.create_task(detect_intent(user_input))
             response_task = asyncio.create_task(generate_response(user_input))
             intent, intent_conf = await intent_task
             log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
             if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
+                return execute_intent(intent, user_input)
+            response, response_conf = await response_task
+            if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
+                if USE_RAG:
+                    rag_result = search_rag(user_input)
+                    if rag_result:
+                        return {"response": rag_result}
+                return {"response": random.choice(FALLBACK_ANSWERS)}
+            return {"response": response}
         else:
             response, response_conf = await generate_response(user_input)
             if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
+                if USE_RAG:
+                    rag_result = search_rag(user_input)
+                    if rag_result:
+                        return {"response": rag_result}
                 return {"response": random.choice(FALLBACK_ANSWERS)}
             return {"response": response}
     except Exception as e:
     print(f"[{timestamp}] {message}", flush=True)
 def setup_model():
+    global model, tokenizer, eos_token_id, faiss_index, rag_metadata, rag_embedder
     try:
         log("🧠 setup_model() başladı")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         log(f"📡 Kullanılan cihaz: {device}")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
         model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
         tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
         model.config.pad_token_id = tokenizer.pad_token_id
         eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
         model.eval()
+        log("✅ Ana model yüklendi ve hazır.")
         _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
         _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
+        log("✅ Intent modeli önbelleğe alındı.")
+        if USE_RAG:
+            log("📥 FAISS index yükleniyor...")
+            faiss_index = faiss.read_index(RAG_INDEX_PATH)
+            rag_metadata = pd.read_parquet(RAG_METADATA_PATH)
+            rag_embedder = SentenceTransformer(RAG_EMBEDDING_MODEL_NAME)
+            log("✅ FAISS index ve metadata yüklendi.")
     except Exception as e:
         log(f"❌ setup_model() hatası: {e}")
         traceback.print_exc()