ciyidogan commited on
Commit
55daf79
·
verified ·
1 Parent(s): acfdca9

Update inference_test_turkcell_with_intents.py

Browse files
inference_test_turkcell_with_intents.py CHANGED
@@ -1,4 +1,3 @@
1
- # fine_tune_inference_with_intent.py
2
  import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re
3
  from fastapi import FastAPI
4
  from fastapi.responses import HTMLResponse, JSONResponse
@@ -7,6 +6,10 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequen
7
  from peft import PeftModel
8
  from datasets import Dataset
9
  from datetime import datetime
 
 
 
 
10
 
11
  # === Ortam
12
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -20,6 +23,10 @@ USE_FINE_TUNE = False
20
  FINE_TUNE_REPO = "UcsTurkey/trained-zips"
21
  FINE_TUNE_ZIP = "trained_model_000_009.zip"
22
  USE_SAMPLING = False
 
 
 
 
23
  INTENT_CONFIDENCE_THRESHOLD = 0.5
24
  LLM_CONFIDENCE_THRESHOLD = 0.2
25
  TRAIN_CONFIDENCE_THRESHOLD = 0.7
@@ -29,19 +36,22 @@ FALLBACK_ANSWERS = [
29
  "Bu soruya şu an yanıt veremiyorum."
30
  ]
31
 
 
32
  INTENT_MODEL_PATH = "intent_model"
33
  INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
34
  INTENT_MODEL = None
35
  INTENT_TOKENIZER = None
36
  LABEL2ID = {}
37
  INTENT_DEFINITIONS = {}
38
-
39
- # === FastAPI
40
- app = FastAPI()
41
- chat_history = []
42
  model = None
43
  tokenizer = None
44
  eos_token_id = None
 
 
 
 
 
 
45
 
46
  class Message(BaseModel):
47
  user_input: str
@@ -95,7 +105,6 @@ def background_training(intents):
95
  for ex in intent["examples"]:
96
  texts.append(ex)
97
  labels.append(idx)
98
-
99
  dataset = Dataset.from_dict({"text": texts, "label": labels})
100
  tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
101
  config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
@@ -109,7 +118,6 @@ def background_training(intents):
109
  tokenized_data["input_ids"].append(out["input_ids"])
110
  tokenized_data["attention_mask"].append(out["attention_mask"])
111
  tokenized_data["label"].append(row["label"])
112
-
113
  tokenized = Dataset.from_dict(tokenized_data)
114
  tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
115
 
@@ -123,7 +131,7 @@ def background_training(intents):
123
  )
124
  trainer.train()
125
 
126
- # Başarı raporu üret
127
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
128
  model.to(device)
129
  input_ids_tensor = tokenized["input_ids"].to(device)
@@ -134,8 +142,7 @@ def background_training(intents):
134
  predictions = outputs.logits.argmax(dim=-1).tolist()
135
 
136
  actuals = tokenized["label"]
137
- counts = {}
138
- correct = {}
139
  for pred, actual in zip(predictions, actuals):
140
  intent = list(label2id.keys())[list(label2id.values()).index(actual)]
141
  counts[intent] = counts.get(intent, 0) + 1
@@ -153,9 +160,7 @@ def background_training(intents):
153
  tokenizer.save_pretrained(INTENT_MODEL_PATH)
154
  with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
155
  json.dump(label2id, f)
156
-
157
  log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
158
-
159
  except Exception as e:
160
  log(f"❌ Intent eğitimi hatası: {e}")
161
  traceback.print_exc()
@@ -186,7 +191,6 @@ async def generate_response(text):
186
  eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
187
  input_ids = encodeds.to(model.device)
188
  attention_mask = (input_ids != tokenizer.pad_token_id).long()
189
-
190
  with torch.no_grad():
191
  output = model.generate(
192
  input_ids=input_ids,
@@ -198,14 +202,11 @@ async def generate_response(text):
198
  return_dict_in_generate=True,
199
  output_scores=True
200
  )
201
-
202
  if not USE_SAMPLING:
203
  scores = torch.stack(output.scores, dim=1)
204
  probs = torch.nn.functional.softmax(scores[0], dim=-1)
205
  top_conf = probs.max().item()
206
- else:
207
- top_conf = None
208
-
209
  decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
210
  for tag in ["assistant", "<|im_start|>assistant"]:
211
  start = decoded.find(tag)
@@ -214,45 +215,40 @@ async def generate_response(text):
214
  break
215
  return decoded, top_conf
216
 
217
- def extract_parameters(variables_list, user_input):
218
- for pattern in variables_list:
219
- regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
220
- match = re.match(regex, user_input)
221
- if match:
222
- return [{"key": k, "value": v} for k, v in match.groupdict().items()]
223
- return []
224
-
225
- def execute_intent(intent_name, user_input):
226
- if intent_name in INTENT_DEFINITIONS:
227
- definition = INTENT_DEFINITIONS[intent_name]
228
- variables = extract_parameters(definition.get("variables", []), user_input)
229
- log(f"🚀 execute_intent('{intent_name}', {variables})")
230
- return {"intent": intent_name, "parameters": variables}
231
- return {"intent": intent_name, "parameters": []}
232
 
233
  @app.post("/chat")
234
  async def chat(msg: Message):
235
  user_input = msg.user_input.strip()
236
  try:
237
- if model is None or tokenizer is None:
238
- return {"error": "Model yüklenmedi."}
239
-
240
  if INTENT_MODEL:
241
  intent_task = asyncio.create_task(detect_intent(user_input))
242
  response_task = asyncio.create_task(generate_response(user_input))
243
  intent, intent_conf = await intent_task
244
  log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
245
  if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
246
- result = execute_intent(intent, user_input)
247
- return result
248
- else:
249
- response, response_conf = await response_task
250
- if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
251
- return {"response": random.choice(FALLBACK_ANSWERS)}
252
- return {"response": response}
 
 
253
  else:
254
  response, response_conf = await generate_response(user_input)
255
  if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
 
 
 
 
256
  return {"response": random.choice(FALLBACK_ANSWERS)}
257
  return {"response": response}
258
  except Exception as e:
@@ -264,25 +260,27 @@ def log(message):
264
  print(f"[{timestamp}] {message}", flush=True)
265
 
266
  def setup_model():
267
- global model, tokenizer, eos_token_id
268
  try:
269
  log("🧠 setup_model() başladı")
270
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
271
  log(f"📡 Kullanılan cihaz: {device}")
272
  tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
273
- log("📦 Tokenizer yüklendi.")
274
  model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
275
- log("📦 Model indirildi ve yüklendi.")
276
  tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
277
  model.config.pad_token_id = tokenizer.pad_token_id
278
  eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
279
  model.eval()
280
- log("✅ Ana model eval() çağrıldı")
281
- log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}")
282
  _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
283
  _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
284
- log("✅ Intent modeli indirildi (önbelleğe alındı).")
285
- log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
 
 
 
 
 
286
  except Exception as e:
287
  log(f"❌ setup_model() hatası: {e}")
288
  traceback.print_exc()
 
 
1
  import os, torch, threading, uvicorn, time, traceback, zipfile, random, json, shutil, asyncio, re
2
  from fastapi import FastAPI
3
  from fastapi.responses import HTMLResponse, JSONResponse
 
6
  from peft import PeftModel
7
  from datasets import Dataset
8
  from datetime import datetime
9
+ import faiss
10
+ import numpy as np
11
+ import pandas as pd
12
+ from sentence_transformers import SentenceTransformer
13
 
14
  # === Ortam
15
  HF_TOKEN = os.getenv("HF_TOKEN")
 
23
  FINE_TUNE_REPO = "UcsTurkey/trained-zips"
24
  FINE_TUNE_ZIP = "trained_model_000_009.zip"
25
  USE_SAMPLING = False
26
+ USE_RAG = True
27
+ RAG_INDEX_PATH = "/app/faiss/faiss_index_000_100.index"
28
+ RAG_METADATA_PATH = "/app/faiss/faiss_index_000_100_metadata.parquet"
29
+ RAG_EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
30
  INTENT_CONFIDENCE_THRESHOLD = 0.5
31
  LLM_CONFIDENCE_THRESHOLD = 0.2
32
  TRAIN_CONFIDENCE_THRESHOLD = 0.7
 
36
  "Bu soruya şu an yanıt veremiyorum."
37
  ]
38
 
39
+ # === Global Değişkenler
40
  INTENT_MODEL_PATH = "intent_model"
41
  INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
42
  INTENT_MODEL = None
43
  INTENT_TOKENIZER = None
44
  LABEL2ID = {}
45
  INTENT_DEFINITIONS = {}
 
 
 
 
46
  model = None
47
  tokenizer = None
48
  eos_token_id = None
49
+ faiss_index = None
50
+ rag_metadata = None
51
+ rag_embedder = None
52
+
53
+ # === FastAPI
54
+ app = FastAPI()
55
 
56
  class Message(BaseModel):
57
  user_input: str
 
105
  for ex in intent["examples"]:
106
  texts.append(ex)
107
  labels.append(idx)
 
108
  dataset = Dataset.from_dict({"text": texts, "label": labels})
109
  tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
110
  config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
 
118
  tokenized_data["input_ids"].append(out["input_ids"])
119
  tokenized_data["attention_mask"].append(out["attention_mask"])
120
  tokenized_data["label"].append(row["label"])
 
121
  tokenized = Dataset.from_dict(tokenized_data)
122
  tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
123
 
 
131
  )
132
  trainer.train()
133
 
134
+ log("🔧 Başarı raporu üretiliyor...")
135
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
136
  model.to(device)
137
  input_ids_tensor = tokenized["input_ids"].to(device)
 
142
  predictions = outputs.logits.argmax(dim=-1).tolist()
143
 
144
  actuals = tokenized["label"]
145
+ counts, correct = {}, {}
 
146
  for pred, actual in zip(predictions, actuals):
147
  intent = list(label2id.keys())[list(label2id.values()).index(actual)]
148
  counts[intent] = counts.get(intent, 0) + 1
 
160
  tokenizer.save_pretrained(INTENT_MODEL_PATH)
161
  with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
162
  json.dump(label2id, f)
 
163
  log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
 
164
  except Exception as e:
165
  log(f"❌ Intent eğitimi hatası: {e}")
166
  traceback.print_exc()
 
191
  eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
192
  input_ids = encodeds.to(model.device)
193
  attention_mask = (input_ids != tokenizer.pad_token_id).long()
 
194
  with torch.no_grad():
195
  output = model.generate(
196
  input_ids=input_ids,
 
202
  return_dict_in_generate=True,
203
  output_scores=True
204
  )
205
+ top_conf = None
206
  if not USE_SAMPLING:
207
  scores = torch.stack(output.scores, dim=1)
208
  probs = torch.nn.functional.softmax(scores[0], dim=-1)
209
  top_conf = probs.max().item()
 
 
 
210
  decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
211
  for tag in ["assistant", "<|im_start|>assistant"]:
212
  start = decoded.find(tag)
 
215
  break
216
  return decoded, top_conf
217
 
218
+ def search_rag(query, top_k=1):
219
+ if faiss_index is None or rag_metadata is None:
220
+ return None
221
+ emb = rag_embedder.encode([query], convert_to_numpy=True)
222
+ D, I = faiss_index.search(emb, top_k)
223
+ results = rag_metadata.iloc[I[0]]
224
+ return results.iloc[0]["output"] if not results.empty else None
 
 
 
 
 
 
 
 
225
 
226
  @app.post("/chat")
227
  async def chat(msg: Message):
228
  user_input = msg.user_input.strip()
229
  try:
 
 
 
230
  if INTENT_MODEL:
231
  intent_task = asyncio.create_task(detect_intent(user_input))
232
  response_task = asyncio.create_task(generate_response(user_input))
233
  intent, intent_conf = await intent_task
234
  log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
235
  if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
236
+ return execute_intent(intent, user_input)
237
+ response, response_conf = await response_task
238
+ if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
239
+ if USE_RAG:
240
+ rag_result = search_rag(user_input)
241
+ if rag_result:
242
+ return {"response": rag_result}
243
+ return {"response": random.choice(FALLBACK_ANSWERS)}
244
+ return {"response": response}
245
  else:
246
  response, response_conf = await generate_response(user_input)
247
  if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
248
+ if USE_RAG:
249
+ rag_result = search_rag(user_input)
250
+ if rag_result:
251
+ return {"response": rag_result}
252
  return {"response": random.choice(FALLBACK_ANSWERS)}
253
  return {"response": response}
254
  except Exception as e:
 
260
  print(f"[{timestamp}] {message}", flush=True)
261
 
262
  def setup_model():
263
+ global model, tokenizer, eos_token_id, faiss_index, rag_metadata, rag_embedder
264
  try:
265
  log("🧠 setup_model() başladı")
266
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
267
  log(f"📡 Kullanılan cihaz: {device}")
268
  tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
 
269
  model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
 
270
  tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
271
  model.config.pad_token_id = tokenizer.pad_token_id
272
  eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
273
  model.eval()
274
+ log("✅ Ana model yüklendi ve hazır.")
 
275
  _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
276
  _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
277
+ log("✅ Intent modeli önbelleğe alındı.")
278
+ if USE_RAG:
279
+ log("📥 FAISS index yükleniyor...")
280
+ faiss_index = faiss.read_index(RAG_INDEX_PATH)
281
+ rag_metadata = pd.read_parquet(RAG_METADATA_PATH)
282
+ rag_embedder = SentenceTransformer(RAG_EMBEDDING_MODEL_NAME)
283
+ log("✅ FAISS index ve metadata yüklendi.")
284
  except Exception as e:
285
  log(f"❌ setup_model() hatası: {e}")
286
  traceback.print_exc()