ciyidogan commited on
Commit
0b8c8b5
·
verified ·
1 Parent(s): d5ed5d0

Update inference_test_turkcell_with_intents.py

Browse files
inference_test_turkcell_with_intents.py CHANGED
@@ -6,10 +6,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequen
6
  from peft import PeftModel
7
  from datasets import Dataset
8
  from datetime import datetime
9
- import faiss
10
- import numpy as np
11
- import pandas as pd
12
- from sentence_transformers import SentenceTransformer
13
 
14
  # === Ortam
15
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -23,10 +19,6 @@ USE_FINE_TUNE = False
23
  FINE_TUNE_REPO = "UcsTurkey/trained-zips"
24
  FINE_TUNE_ZIP = "trained_model_000_009.zip"
25
  USE_SAMPLING = False
26
- USE_RAG = True
27
- RAG_INDEX_PATH = "/app/faiss/faiss_index_000_100.index"
28
- RAG_METADATA_PATH = "/app/faiss/faiss_index_000_100_metadata.parquet"
29
- RAG_EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
30
  INTENT_CONFIDENCE_THRESHOLD = 0.5
31
  LLM_CONFIDENCE_THRESHOLD = 0.2
32
  TRAIN_CONFIDENCE_THRESHOLD = 0.7
@@ -36,22 +28,19 @@ FALLBACK_ANSWERS = [
36
  "Bu soruya şu an yanıt veremiyorum."
37
  ]
38
 
39
- # === Global Değişkenler
40
  INTENT_MODEL_PATH = "intent_model"
41
  INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
42
  INTENT_MODEL = None
43
  INTENT_TOKENIZER = None
44
  LABEL2ID = {}
45
  INTENT_DEFINITIONS = {}
46
- model = None
47
- tokenizer = None
48
- eos_token_id = None
49
- faiss_index = None
50
- rag_metadata = None
51
- rag_embedder = None
52
 
53
  # === FastAPI
54
  app = FastAPI()
 
 
 
 
55
 
56
  class Message(BaseModel):
57
  user_input: str
@@ -105,6 +94,7 @@ def background_training(intents):
105
  for ex in intent["examples"]:
106
  texts.append(ex)
107
  labels.append(idx)
 
108
  dataset = Dataset.from_dict({"text": texts, "label": labels})
109
  tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
110
  config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
@@ -118,6 +108,7 @@ def background_training(intents):
118
  tokenized_data["input_ids"].append(out["input_ids"])
119
  tokenized_data["attention_mask"].append(out["attention_mask"])
120
  tokenized_data["label"].append(row["label"])
 
121
  tokenized = Dataset.from_dict(tokenized_data)
122
  tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
123
 
@@ -131,6 +122,7 @@ def background_training(intents):
131
  )
132
  trainer.train()
133
 
 
134
  log("🔧 Başarı raporu üretiliyor...")
135
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
136
  model.to(device)
@@ -142,7 +134,8 @@ def background_training(intents):
142
  predictions = outputs.logits.argmax(dim=-1).tolist()
143
 
144
  actuals = tokenized["label"]
145
- counts, correct = {}, {}
 
146
  for pred, actual in zip(predictions, actuals):
147
  intent = list(label2id.keys())[list(label2id.values()).index(actual)]
148
  counts[intent] = counts.get(intent, 0) + 1
@@ -154,13 +147,16 @@ def background_training(intents):
154
  if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
155
  log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
156
 
 
157
  if os.path.exists(INTENT_MODEL_PATH):
158
  shutil.rmtree(INTENT_MODEL_PATH)
159
  model.save_pretrained(INTENT_MODEL_PATH)
160
  tokenizer.save_pretrained(INTENT_MODEL_PATH)
161
  with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
162
  json.dump(label2id, f)
 
163
  log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
 
164
  except Exception as e:
165
  log(f"❌ Intent eğitimi hatası: {e}")
166
  traceback.print_exc()
@@ -191,6 +187,7 @@ async def generate_response(text):
191
  eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
192
  input_ids = encodeds.to(model.device)
193
  attention_mask = (input_ids != tokenizer.pad_token_id).long()
 
194
  with torch.no_grad():
195
  output = model.generate(
196
  input_ids=input_ids,
@@ -202,11 +199,14 @@ async def generate_response(text):
202
  return_dict_in_generate=True,
203
  output_scores=True
204
  )
205
- top_conf = None
206
  if not USE_SAMPLING:
207
  scores = torch.stack(output.scores, dim=1)
208
  probs = torch.nn.functional.softmax(scores[0], dim=-1)
209
  top_conf = probs.max().item()
 
 
 
210
  decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
211
  for tag in ["assistant", "<|im_start|>assistant"]:
212
  start = decoded.find(tag)
@@ -215,40 +215,45 @@ async def generate_response(text):
215
  break
216
  return decoded, top_conf
217
 
218
- def search_rag(query, top_k=1):
219
- if faiss_index is None or rag_metadata is None:
220
- return None
221
- emb = rag_embedder.encode([query], convert_to_numpy=True)
222
- D, I = faiss_index.search(emb, top_k)
223
- results = rag_metadata.iloc[I[0]]
224
- return results.iloc[0]["output"] if not results.empty else None
 
 
 
 
 
 
 
 
225
 
226
  @app.post("/chat")
227
  async def chat(msg: Message):
228
  user_input = msg.user_input.strip()
229
  try:
 
 
 
230
  if INTENT_MODEL:
231
  intent_task = asyncio.create_task(detect_intent(user_input))
232
  response_task = asyncio.create_task(generate_response(user_input))
233
  intent, intent_conf = await intent_task
234
  log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
235
  if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
236
- return execute_intent(intent, user_input)
237
- response, response_conf = await response_task
238
- if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
239
- if USE_RAG:
240
- rag_result = search_rag(user_input)
241
- if rag_result:
242
- return {"response": rag_result}
243
- return {"response": random.choice(FALLBACK_ANSWERS)}
244
- return {"response": response}
245
  else:
246
  response, response_conf = await generate_response(user_input)
247
  if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
248
- if USE_RAG:
249
- rag_result = search_rag(user_input)
250
- if rag_result:
251
- return {"response": rag_result}
252
  return {"response": random.choice(FALLBACK_ANSWERS)}
253
  return {"response": response}
254
  except Exception as e:
@@ -260,27 +265,25 @@ def log(message):
260
  print(f"[{timestamp}] {message}", flush=True)
261
 
262
  def setup_model():
263
- global model, tokenizer, eos_token_id, faiss_index, rag_metadata, rag_embedder
264
  try:
265
  log("🧠 setup_model() başladı")
266
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
267
  log(f"📡 Kullanılan cihaz: {device}")
268
  tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
 
269
  model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
 
270
  tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
271
  model.config.pad_token_id = tokenizer.pad_token_id
272
  eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
273
  model.eval()
274
- log("✅ Ana model yüklendi ve hazır.")
 
275
  _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
276
  _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
277
  log("✅ Intent modeli önbelleğe alındı.")
278
- if USE_RAG:
279
- log("📥 FAISS index yükleniyor...")
280
- faiss_index = faiss.read_index(RAG_INDEX_PATH)
281
- rag_metadata = pd.read_parquet(RAG_METADATA_PATH)
282
- rag_embedder = SentenceTransformer(RAG_EMBEDDING_MODEL_NAME)
283
- log("✅ FAISS index ve metadata yüklendi.")
284
  except Exception as e:
285
  log(f"❌ setup_model() hatası: {e}")
286
  traceback.print_exc()
 
6
  from peft import PeftModel
7
  from datasets import Dataset
8
  from datetime import datetime
 
 
 
 
9
 
10
  # === Ortam
11
  HF_TOKEN = os.getenv("HF_TOKEN")
 
19
  FINE_TUNE_REPO = "UcsTurkey/trained-zips"
20
  FINE_TUNE_ZIP = "trained_model_000_009.zip"
21
  USE_SAMPLING = False
 
 
 
 
22
  INTENT_CONFIDENCE_THRESHOLD = 0.5
23
  LLM_CONFIDENCE_THRESHOLD = 0.2
24
  TRAIN_CONFIDENCE_THRESHOLD = 0.7
 
28
  "Bu soruya şu an yanıt veremiyorum."
29
  ]
30
 
 
31
  INTENT_MODEL_PATH = "intent_model"
32
  INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
33
  INTENT_MODEL = None
34
  INTENT_TOKENIZER = None
35
  LABEL2ID = {}
36
  INTENT_DEFINITIONS = {}
 
 
 
 
 
 
37
 
38
  # === FastAPI
39
  app = FastAPI()
40
+ chat_history = []
41
+ model = None
42
+ tokenizer = None
43
+ eos_token_id = None
44
 
45
  class Message(BaseModel):
46
  user_input: str
 
94
  for ex in intent["examples"]:
95
  texts.append(ex)
96
  labels.append(idx)
97
+
98
  dataset = Dataset.from_dict({"text": texts, "label": labels})
99
  tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
100
  config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
 
108
  tokenized_data["input_ids"].append(out["input_ids"])
109
  tokenized_data["attention_mask"].append(out["attention_mask"])
110
  tokenized_data["label"].append(row["label"])
111
+
112
  tokenized = Dataset.from_dict(tokenized_data)
113
  tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
114
 
 
122
  )
123
  trainer.train()
124
 
125
+ # ✅ Başarı raporu üret
126
  log("🔧 Başarı raporu üretiliyor...")
127
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
128
  model.to(device)
 
134
  predictions = outputs.logits.argmax(dim=-1).tolist()
135
 
136
  actuals = tokenized["label"]
137
+ counts = {}
138
+ correct = {}
139
  for pred, actual in zip(predictions, actuals):
140
  intent = list(label2id.keys())[list(label2id.values()).index(actual)]
141
  counts[intent] = counts.get(intent, 0) + 1
 
147
  if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
148
  log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
149
 
150
+ log("📦 Intent modeli eğitimi kaydediliyor...")
151
  if os.path.exists(INTENT_MODEL_PATH):
152
  shutil.rmtree(INTENT_MODEL_PATH)
153
  model.save_pretrained(INTENT_MODEL_PATH)
154
  tokenizer.save_pretrained(INTENT_MODEL_PATH)
155
  with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
156
  json.dump(label2id, f)
157
+
158
  log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
159
+
160
  except Exception as e:
161
  log(f"❌ Intent eğitimi hatası: {e}")
162
  traceback.print_exc()
 
187
  eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
188
  input_ids = encodeds.to(model.device)
189
  attention_mask = (input_ids != tokenizer.pad_token_id).long()
190
+
191
  with torch.no_grad():
192
  output = model.generate(
193
  input_ids=input_ids,
 
199
  return_dict_in_generate=True,
200
  output_scores=True
201
  )
202
+
203
  if not USE_SAMPLING:
204
  scores = torch.stack(output.scores, dim=1)
205
  probs = torch.nn.functional.softmax(scores[0], dim=-1)
206
  top_conf = probs.max().item()
207
+ else:
208
+ top_conf = None
209
+
210
  decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
211
  for tag in ["assistant", "<|im_start|>assistant"]:
212
  start = decoded.find(tag)
 
215
  break
216
  return decoded, top_conf
217
 
218
+ def extract_parameters(variables_list, user_input):
219
+ for pattern in variables_list:
220
+ regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
221
+ match = re.match(regex, user_input)
222
+ if match:
223
+ return [{"key": k, "value": v} for k, v in match.groupdict().items()]
224
+ return []
225
+
226
+ def execute_intent(intent_name, user_input):
227
+ if intent_name in INTENT_DEFINITIONS:
228
+ definition = INTENT_DEFINITIONS[intent_name]
229
+ variables = extract_parameters(definition.get("variables", []), user_input)
230
+ log(f"🚀 execute_intent('{intent_name}', {variables})")
231
+ return {"intent": intent_name, "parameters": variables}
232
+ return {"intent": intent_name, "parameters": []}
233
 
234
  @app.post("/chat")
235
  async def chat(msg: Message):
236
  user_input = msg.user_input.strip()
237
  try:
238
+ if model is None or tokenizer is None:
239
+ return {"error": "Model yüklenmedi."}
240
+
241
  if INTENT_MODEL:
242
  intent_task = asyncio.create_task(detect_intent(user_input))
243
  response_task = asyncio.create_task(generate_response(user_input))
244
  intent, intent_conf = await intent_task
245
  log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
246
  if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
247
+ result = execute_intent(intent, user_input)
248
+ return result
249
+ else:
250
+ response, response_conf = await response_task
251
+ if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
252
+ return {"response": random.choice(FALLBACK_ANSWERS)}
253
+ return {"response": response}
 
 
254
  else:
255
  response, response_conf = await generate_response(user_input)
256
  if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
 
 
 
 
257
  return {"response": random.choice(FALLBACK_ANSWERS)}
258
  return {"response": response}
259
  except Exception as e:
 
265
  print(f"[{timestamp}] {message}", flush=True)
266
 
267
  def setup_model():
268
+ global model, tokenizer, eos_token_id
269
  try:
270
  log("🧠 setup_model() başladı")
271
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
272
  log(f"📡 Kullanılan cihaz: {device}")
273
  tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
274
+ log("📦 Tokenizer yüklendi. Ana model indiriliyor...")
275
  model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
276
+ log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...")
277
  tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
278
  model.config.pad_token_id = tokenizer.pad_token_id
279
  eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
280
  model.eval()
281
+ log("✅ Ana model eval() çağrıldı")
282
+ log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}")
283
  _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
284
  _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
285
  log("✅ Intent modeli önbelleğe alındı.")
286
+ log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
 
 
 
 
 
287
  except Exception as e:
288
  log(f"❌ setup_model() hatası: {e}")
289
  traceback.print_exc()