Spaces:
Running
Running
proje dosyaları yüklendi
Browse files- Dockerfile +26 -0
- app.py +1 -0
- app_config.py +29 -0
- inference_test.py +166 -0
- intent.py +150 -0
- intent_api.py +185 -0
- intent_system_design.md +137 -0
- intent_test_runner.py +130 -0
- llm_model.py +70 -0
- log.py +5 -0
- requirements.txt +27 -0
Dockerfile
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# === Hugging Face Spaces için temel imaj
|
2 |
+
FROM python:3.10
|
3 |
+
|
4 |
+
# === Hugging Face Spaces özel dizinleri
|
5 |
+
RUN mkdir -p /data/chunks /data/tokenized_chunks /data/zip_temp /data/output /app/.cache /app/.torch_cache && chmod -R 777 /data /app
|
6 |
+
|
7 |
+
# === Ortam değişkenleri
|
8 |
+
ENV HF_HOME=/app/.cache \
|
9 |
+
HF_DATASETS_CACHE=/app/.cache \
|
10 |
+
HF_HUB_CACHE=/app/.cache \
|
11 |
+
TORCH_HOME=/app/.torch_cache
|
12 |
+
|
13 |
+
# PyTorch kernel cache için klasör ve izin tanımı
|
14 |
+
ENV TORCH_HOME=/app/.torch_cache
|
15 |
+
RUN mkdir -p /app/.torch_cache && chmod -R 777 /app/.torch_cache
|
16 |
+
|
17 |
+
# === Gereken kütüphaneleri yükle
|
18 |
+
COPY requirements.txt ./
|
19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
20 |
+
|
21 |
+
# === Uygulama dosyalarını kopyala
|
22 |
+
COPY . /app
|
23 |
+
WORKDIR /app
|
24 |
+
|
25 |
+
# === Başlangıç komutu
|
26 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from inference_test import app
|
app_config.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
class AppConfig:
|
4 |
+
def __init__(self):
|
5 |
+
self.HF_TOKEN = os.getenv("HF_TOKEN")
|
6 |
+
self.MODEL_BASE = "TURKCELL/Turkcell-LLM-7b-v1"
|
7 |
+
self.USE_FINE_TUNE = False
|
8 |
+
self.FINE_TUNE_REPO = "UcsTurkey/trained-zips"
|
9 |
+
self.FINE_TUNE_ZIP = "trained_model_000_009.zip"
|
10 |
+
self.USE_SAMPLING = False
|
11 |
+
self.INTENT_CONFIDENCE_THRESHOLD = 0.5
|
12 |
+
self.LLM_CONFIDENCE_THRESHOLD = 0.2
|
13 |
+
self.TRAIN_CONFIDENCE_THRESHOLD = 0.7
|
14 |
+
self.FALLBACK_ANSWERS = [
|
15 |
+
"Bu konuda maalesef bilgim yok.",
|
16 |
+
"Ne demek istediğinizi tam anlayamadım.",
|
17 |
+
"Bu soruya şu an yanıt veremiyorum."
|
18 |
+
]
|
19 |
+
self.INTENT_MODEL_PATH = "intent_model"
|
20 |
+
self.INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
|
21 |
+
self.INTENT_MODEL = None
|
22 |
+
self.INTENT_TOKENIZER = None
|
23 |
+
self.LABEL2ID = {}
|
24 |
+
self.INTENT_DEFINITIONS = {}
|
25 |
+
|
26 |
+
def setup_environment():
|
27 |
+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
|
28 |
+
os.environ["TORCH_HOME"] = "/app/.torch_cache"
|
29 |
+
os.makedirs("/app/.torch_cache", exist_ok=True)
|
inference_test.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, threading, uvicorn, time, traceback, random, json, asyncio, uuid
|
2 |
+
from fastapi import FastAPI, Request
|
3 |
+
from fastapi.responses import HTMLResponse, JSONResponse
|
4 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
+
import intent_test_runner
|
6 |
+
from app_config import AppConfig
|
7 |
+
import intent, log, intent, llm_model
|
8 |
+
|
9 |
+
app_config = AppConfig()
|
10 |
+
app_config.setup_environment()
|
11 |
+
|
12 |
+
# === FastAPI
|
13 |
+
app = FastAPI()
|
14 |
+
chat_history = []
|
15 |
+
|
16 |
+
@app.get("/")
|
17 |
+
def health():
|
18 |
+
return {"status": "ok"}
|
19 |
+
|
20 |
+
import uuid # yukarıda zaten eklendiğini varsayıyoruz
|
21 |
+
|
22 |
+
@app.post("/run_tests", status_code=202)
|
23 |
+
def run_tests():
|
24 |
+
log("🚦 /run_tests çağrıldı. Testler başlatılıyor...")
|
25 |
+
threading.Thread(target=intent_test_runner.run_all_tests, daemon=True).start()
|
26 |
+
return {"status": "running", "message": "Test süreci başlatıldı."}
|
27 |
+
|
28 |
+
@app.get("/start", response_class=HTMLResponse)
|
29 |
+
def root():
|
30 |
+
# Yeni session ID üret
|
31 |
+
session_id = str(uuid.uuid4())
|
32 |
+
session_info = {
|
33 |
+
"session_id": session_id,
|
34 |
+
"variables": {},
|
35 |
+
"auth_tokens": {},
|
36 |
+
"last_intent": None,
|
37 |
+
"awaiting_variable": None
|
38 |
+
}
|
39 |
+
|
40 |
+
# Session store başlatıldıysa ekle
|
41 |
+
if not hasattr(app.state, "session_store"):
|
42 |
+
app.state.session_store = {}
|
43 |
+
app.state.session_store[session_id] = session_info
|
44 |
+
|
45 |
+
log(f"🌐 /start ile yeni session başlatıldı: {session_id}")
|
46 |
+
|
47 |
+
# HTML + session_id gömülü
|
48 |
+
return f"""
|
49 |
+
<html><body>
|
50 |
+
<h2>Turkcell LLM Chat</h2>
|
51 |
+
<textarea id='input' rows='4' cols='60'></textarea><br>
|
52 |
+
<button onclick='send()'>Gönder</button><br><br>
|
53 |
+
<label>Model Cevabı:</label><br>
|
54 |
+
<textarea id='output' rows='10' cols='80' readonly style='white-space: pre-wrap;'></textarea>
|
55 |
+
<script>
|
56 |
+
const sessionId = "{session_id}";
|
57 |
+
localStorage.setItem("session_id", sessionId);
|
58 |
+
|
59 |
+
async function send() {{
|
60 |
+
const input = document.getElementById("input").value;
|
61 |
+
const res = await fetch('/chat', {{
|
62 |
+
method: 'POST',
|
63 |
+
headers: {{
|
64 |
+
'Content-Type': 'application/json',
|
65 |
+
'X-Session-ID': sessionId
|
66 |
+
}},
|
67 |
+
body: JSON.stringify({{ user_input: input }})
|
68 |
+
}});
|
69 |
+
const data = await res.json();
|
70 |
+
document.getElementById('output').value = data.reply || data.response || data.error || 'Hata oluştu.';
|
71 |
+
}}
|
72 |
+
</script>
|
73 |
+
</body></html>
|
74 |
+
"""
|
75 |
+
|
76 |
+
@app.post("/start_chat")
|
77 |
+
def start_chat():
|
78 |
+
if not hasattr(app.state, "session_store"):
|
79 |
+
app.state.session_store = {}
|
80 |
+
|
81 |
+
session_id = str(uuid.uuid4())
|
82 |
+
session_info = {
|
83 |
+
"session_id": session_id,
|
84 |
+
"variables": {},
|
85 |
+
"auth_tokens": {},
|
86 |
+
"last_intent": None,
|
87 |
+
"awaiting_variable": None
|
88 |
+
}
|
89 |
+
app.state.session_store[session_id] = session_info
|
90 |
+
log(f"🆕 Yeni session başlatıldı: {session_id}")
|
91 |
+
return {"session_id": session_id}
|
92 |
+
|
93 |
+
@app.post("/train_intents", status_code=202)
|
94 |
+
def train_intents(train_input: intent.TrainInput):
|
95 |
+
log("📥 POST /train_intents çağrıldı.")
|
96 |
+
intents = train_input.intents
|
97 |
+
intent.INTENT_DEFINITIONS = {intent["name"]: intent for intent in intents}
|
98 |
+
threading.Thread(target=lambda: intent.background_training(intents, app_config), daemon=True).start()
|
99 |
+
return {"status": "accepted", "message": "Intent eğitimi arka planda başlatıldı."}
|
100 |
+
|
101 |
+
@app.post("/load_intent_model")
|
102 |
+
def load_intent_model():
|
103 |
+
try:
|
104 |
+
intent.INTENT_TOKENIZER = AutoTokenizer.from_pretrained(app_config.INTENT_MODEL_PATH)
|
105 |
+
intent.INTENT_MODEL = AutoModelForSequenceClassification.from_pretrained(app_config.INTENT_MODEL_PATH)
|
106 |
+
with open(os.path.join(app_config.INTENT_MODEL_PATH, "label2id.json")) as f:
|
107 |
+
intent.LABEL2ID = json.load(f)
|
108 |
+
return {"status": "ok", "message": "Intent modeli yüklendi."}
|
109 |
+
except Exception as e:
|
110 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
111 |
+
|
112 |
+
@app.post("/chat")
|
113 |
+
async def chat(msg: llm_model.Message, request: Request):
|
114 |
+
user_input = msg.user_input.strip()
|
115 |
+
session_id = request.headers.get("X-Session-ID", "demo-session")
|
116 |
+
|
117 |
+
if not hasattr(app.state, "session_store"):
|
118 |
+
app.state.session_store = {}
|
119 |
+
|
120 |
+
session_store = getattr(app.state, "session_store", {})
|
121 |
+
session_info = {
|
122 |
+
"session_id": session_id,
|
123 |
+
"variables": {},
|
124 |
+
"auth_tokens": {},
|
125 |
+
"last_intent": None
|
126 |
+
}
|
127 |
+
session = session_store.get(session_id, session_info)
|
128 |
+
try:
|
129 |
+
if llm_model.model is None or llm_model.tokenizer is None:
|
130 |
+
return {"error": "Model yüklenmedi."}
|
131 |
+
|
132 |
+
if app_config.INTENT_MODEL:
|
133 |
+
intent_task = asyncio.create_task(intent.detect_intent(user_input))
|
134 |
+
response_task = asyncio.create_task(llm_model.generate_response(user_input, app_config))
|
135 |
+
intent, intent_conf = await intent_task
|
136 |
+
log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
|
137 |
+
if intent_conf > app_config.INTENT_CONFIDENCE_THRESHOLD and intent in app_config.INTENT_DEFINITIONS:
|
138 |
+
result = intent.execute_intent(intent, user_input, session)
|
139 |
+
if "reply" in result:
|
140 |
+
session_store[session_id] = result["session"]
|
141 |
+
app.state.session_store = session_store
|
142 |
+
return {"reply": result["reply"]}
|
143 |
+
elif "errors" in result:
|
144 |
+
session_store[session_id] = result["session"]
|
145 |
+
app.state.session_store = session_store
|
146 |
+
return {"response": list(result["errors"].values())[0]}
|
147 |
+
else:
|
148 |
+
return {"response": random.choice(app_config.FALLBACK_ANSWERS)}
|
149 |
+
else:
|
150 |
+
response, response_conf = await response_task
|
151 |
+
if response_conf is not None and response_conf < app_config.LLM_CONFIDENCE_THRESHOLD:
|
152 |
+
return {"response": random.choice(app_config.FALLBACK_ANSWERS)}
|
153 |
+
return {"response": response}
|
154 |
+
else:
|
155 |
+
response, response_conf = await llm_model.generate_response(user_input, app_config)
|
156 |
+
if response_conf is not None and response_conf < app_config.LLM_CONFIDENCE_THRESHOLD:
|
157 |
+
return {"response": random.choice(app_config.FALLBACK_ANSWERS)}
|
158 |
+
return {"response": response}
|
159 |
+
except Exception as e:
|
160 |
+
traceback.print_exc()
|
161 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
162 |
+
|
163 |
+
threading.Thread(target=llm_model.setup_model, kwargs={"app_config": app_config}, daemon=True).start()
|
164 |
+
threading.Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=7860), daemon=True).start()
|
165 |
+
while True:
|
166 |
+
time.sleep(60)
|
intent.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, torch, traceback, json, shutil, re
|
2 |
+
from datasets import Dataset
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, default_data_collator, AutoConfig
|
4 |
+
import log
|
5 |
+
from pydantic import BaseModel
|
6 |
+
|
7 |
+
global INTENT_DEFINITIONS
|
8 |
+
global INTENT_MODEL, INTENT_TOKENIZER, LABEL2ID
|
9 |
+
|
10 |
+
class TrainInput(BaseModel):
|
11 |
+
intents: list
|
12 |
+
|
13 |
+
def background_training(intents, app_config):
|
14 |
+
try:
|
15 |
+
log("🔧 Intent eğitimi başlatıldı...")
|
16 |
+
texts, labels, label2id = [], [], {}
|
17 |
+
for idx, intent in enumerate(intents):
|
18 |
+
label2id[intent["name"]] = idx
|
19 |
+
for ex in intent["examples"]:
|
20 |
+
texts.append(ex)
|
21 |
+
labels.append(idx)
|
22 |
+
|
23 |
+
dataset = Dataset.from_dict({"text": texts, "label": labels})
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained(app_config.INTENT_MODEL_ID)
|
25 |
+
config = AutoConfig.from_pretrained(app_config.INTENT_MODEL_ID)
|
26 |
+
config.problem_type = "single_label_classification"
|
27 |
+
config.num_labels = len(label2id)
|
28 |
+
model = AutoModelForSequenceClassification.from_pretrained(app_config.INTENT_MODEL_ID, config=config)
|
29 |
+
|
30 |
+
tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
|
31 |
+
for row in dataset:
|
32 |
+
out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128)
|
33 |
+
tokenized_data["input_ids"].append(out["input_ids"])
|
34 |
+
tokenized_data["attention_mask"].append(out["attention_mask"])
|
35 |
+
tokenized_data["label"].append(row["label"])
|
36 |
+
|
37 |
+
tokenized = Dataset.from_dict(tokenized_data)
|
38 |
+
tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
|
39 |
+
|
40 |
+
output_dir = "/app/intent_train_output"
|
41 |
+
os.makedirs(output_dir, exist_ok=True)
|
42 |
+
trainer = Trainer(
|
43 |
+
model=model,
|
44 |
+
args=TrainingArguments(output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]),
|
45 |
+
train_dataset=tokenized,
|
46 |
+
data_collator=default_data_collator
|
47 |
+
)
|
48 |
+
trainer.train()
|
49 |
+
|
50 |
+
# ✅ Başarı raporu üret
|
51 |
+
log("🔧 Başarı raporu üretiliyor...")
|
52 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
53 |
+
model.to(device)
|
54 |
+
input_ids_tensor = tokenized["input_ids"].to(device)
|
55 |
+
attention_mask_tensor = tokenized["attention_mask"].to(device)
|
56 |
+
|
57 |
+
with torch.no_grad():
|
58 |
+
outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor)
|
59 |
+
predictions = outputs.logits.argmax(dim=-1).tolist()
|
60 |
+
|
61 |
+
actuals = tokenized["label"]
|
62 |
+
counts = {}
|
63 |
+
correct = {}
|
64 |
+
for pred, actual in zip(predictions, actuals):
|
65 |
+
intent = list(label2id.keys())[list(label2id.values()).index(actual)]
|
66 |
+
counts[intent] = counts.get(intent, 0) + 1
|
67 |
+
if pred == actual:
|
68 |
+
correct[intent] = correct.get(intent, 0) + 1
|
69 |
+
for intent, total in counts.items():
|
70 |
+
accuracy = correct.get(intent, 0) / total
|
71 |
+
log(f"📊 Intent '{intent}' doğruluk: {accuracy:.2f} — {total} örnek")
|
72 |
+
if accuracy < app_config.TRAIN_CONFIDENCE_THRESHOLD or total < 5:
|
73 |
+
log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
|
74 |
+
|
75 |
+
log("📦 Intent modeli eğitimi kaydediliyor...")
|
76 |
+
if os.path.exists(app_config.INTENT_MODEL_PATH):
|
77 |
+
shutil.rmtree(app_config.INTENT_MODEL_PATH)
|
78 |
+
model.save_pretrained(app_config.INTENT_MODEL_PATH)
|
79 |
+
tokenizer.save_pretrained(app_config.INTENT_MODEL_PATH)
|
80 |
+
with open(os.path.join(app_config.INTENT_MODEL_PATH, "label2id.json"), "w") as f:
|
81 |
+
json.dump(label2id, f)
|
82 |
+
|
83 |
+
log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
log(f"❌ Intent eğitimi hatası: {e}")
|
87 |
+
traceback.print_exc()
|
88 |
+
|
89 |
+
async def detect_intent(text):
|
90 |
+
inputs = INTENT_TOKENIZER(text, return_tensors="pt")
|
91 |
+
outputs = INTENT_MODEL(**inputs)
|
92 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
93 |
+
confidence, pred_id = torch.max(probs, dim=-1)
|
94 |
+
id2label = {v: k for k, v in LABEL2ID.items()}
|
95 |
+
return id2label[pred_id.item()], confidence.item()
|
96 |
+
|
97 |
+
def extract_parameters(variables_list, user_input):
|
98 |
+
for pattern in variables_list:
|
99 |
+
regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
|
100 |
+
match = re.match(regex, user_input)
|
101 |
+
if match:
|
102 |
+
return [{"key": k, "value": v} for k, v in match.groupdict().items()]
|
103 |
+
return []
|
104 |
+
|
105 |
+
def resolve_placeholders(text: str, session: dict, variables: dict) -> str:
|
106 |
+
def replacer(match):
|
107 |
+
full = match.group(1)
|
108 |
+
try:
|
109 |
+
if full.startswith("variables."):
|
110 |
+
key = full.split(".", 1)[1]
|
111 |
+
return str(variables.get(key, f"{{{full}}}"))
|
112 |
+
elif full.startswith("session."):
|
113 |
+
key = full.split(".", 1)[1]
|
114 |
+
return str(session.get("variables", {}).get(key, f"{{{full}}}")) # session.variables içinden
|
115 |
+
elif full.startswith("auth_tokens."):
|
116 |
+
# auth_tokens.intent.token veya refresh_token
|
117 |
+
parts = full.split(".")
|
118 |
+
if len(parts) == 3:
|
119 |
+
intent, token_type = parts[1], parts[2]
|
120 |
+
return str(session.get("auth_tokens", {}).get(intent, {}).get(token_type, f"{{{full}}}"))
|
121 |
+
else:
|
122 |
+
return f"{{{full}}}"
|
123 |
+
else:
|
124 |
+
return f"{{{full}}}" # bilinmeyen yapı
|
125 |
+
except Exception as e:
|
126 |
+
return f"{{{full}}}"
|
127 |
+
|
128 |
+
return re.sub(r"\{([^{}]+)\}", replacer, text)
|
129 |
+
|
130 |
+
def validate_variable_formats(variables, variable_format_map, data_formats):
|
131 |
+
errors = {}
|
132 |
+
for var_name, format_name in variable_format_map.items():
|
133 |
+
value = variables.get(var_name)
|
134 |
+
if value is None:
|
135 |
+
continue # eksik parametre kontrolü zaten başka yerde yapılacak
|
136 |
+
|
137 |
+
format_def = next((fmt for fmt in data_formats if fmt["name"] == format_name), None)
|
138 |
+
if not format_def:
|
139 |
+
continue # tanımsız format
|
140 |
+
|
141 |
+
# valid_options kontrolü
|
142 |
+
if "valid_options" in format_def:
|
143 |
+
if value not in format_def["valid_options"]:
|
144 |
+
errors[var_name] = format_def.get("error_message", f"{var_name} değeri geçersiz.")
|
145 |
+
# pattern kontrolü
|
146 |
+
elif "pattern" in format_def:
|
147 |
+
if not re.fullmatch(format_def["pattern"], value):
|
148 |
+
errors[var_name] = format_def.get("error_message", f"{var_name} formatı geçersiz.")
|
149 |
+
|
150 |
+
return len(errors) == 0, errors
|
intent_api.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import intent, requests, log, traceback
|
2 |
+
|
3 |
+
def auth_token_handler(intent_name, auth_config, session):
|
4 |
+
try:
|
5 |
+
token_info = session.get("auth_tokens", {}).get(intent_name)
|
6 |
+
if token_info and "token" in token_info:
|
7 |
+
return token_info["token"], session # Token zaten mevcut
|
8 |
+
|
9 |
+
# Auth token alınmamışsa:
|
10 |
+
auth_endpoint = auth_config.get("auth_endpoint")
|
11 |
+
auth_body = auth_config.get("auth_body", {})
|
12 |
+
token_path = auth_config.get("auth_token_path")
|
13 |
+
|
14 |
+
if not auth_endpoint or not token_path:
|
15 |
+
raise Exception("auth_endpoint veya token_path tanımsız")
|
16 |
+
|
17 |
+
# Placeholder çözümü (auth_body içinde {session.xxx} varsa çözülür)
|
18 |
+
resolved_body = {
|
19 |
+
k: intent.resolve_placeholders(str(v), session, {}) for k, v in auth_body.items()
|
20 |
+
}
|
21 |
+
|
22 |
+
response = requests.post(auth_endpoint, json=resolved_body, timeout=5)
|
23 |
+
response.raise_for_status()
|
24 |
+
json_resp = response.json()
|
25 |
+
|
26 |
+
# Token path (örnek: "token" → json_resp["token"], "data.access_token" → json_resp["data"]["access_token"])
|
27 |
+
token_parts = token_path.split(".")
|
28 |
+
token = json_resp
|
29 |
+
for part in token_parts:
|
30 |
+
token = token.get(part)
|
31 |
+
if token is None:
|
32 |
+
raise Exception(f"Token path çözülemedi: {token_path}")
|
33 |
+
|
34 |
+
# Refresh token varsa
|
35 |
+
refresh_token = json_resp.get("refresh_token")
|
36 |
+
|
37 |
+
# Session’a kaydet
|
38 |
+
session.setdefault("auth_tokens", {})[intent_name] = {
|
39 |
+
"token": token,
|
40 |
+
"refresh_token": refresh_token
|
41 |
+
}
|
42 |
+
|
43 |
+
return token, session
|
44 |
+
|
45 |
+
except Exception as e:
|
46 |
+
log(f"❌ Auth token alınamadı: {e}")
|
47 |
+
raise e
|
48 |
+
|
49 |
+
def refresh_auth_token(intent_name, auth_config, session):
|
50 |
+
try:
|
51 |
+
refresh_endpoint = auth_config.get("auth_refresh_endpoint")
|
52 |
+
refresh_body = auth_config.get("refresh_body", {})
|
53 |
+
token_path = auth_config.get("auth_token_path")
|
54 |
+
|
55 |
+
if not refresh_endpoint or not token_path:
|
56 |
+
raise Exception("Refresh yapılandırması eksik")
|
57 |
+
|
58 |
+
# Refresh token mevcut mu?
|
59 |
+
refresh_token = session.get("auth_tokens", {}).get(intent_name, {}).get("refresh_token")
|
60 |
+
if not refresh_token:
|
61 |
+
raise Exception("Mevcut refresh token bulunamadı")
|
62 |
+
|
63 |
+
# Refresh body içinde placeholder varsa çöz
|
64 |
+
resolved_body = {
|
65 |
+
k: intent.resolve_placeholders(str(v), session, {}) for k, v in refresh_body.items()
|
66 |
+
}
|
67 |
+
|
68 |
+
response = requests.post(refresh_endpoint, json=resolved_body, timeout=5)
|
69 |
+
response.raise_for_status()
|
70 |
+
json_resp = response.json()
|
71 |
+
|
72 |
+
# Yeni token çıkar
|
73 |
+
token_parts = token_path.split(".")
|
74 |
+
token = json_resp
|
75 |
+
for part in token_parts:
|
76 |
+
token = token.get(part)
|
77 |
+
if token is None:
|
78 |
+
raise Exception(f"Token path çözülemedi: {token_path}")
|
79 |
+
|
80 |
+
# Yeni refresh_token varsa onu da al
|
81 |
+
new_refresh_token = json_resp.get("refresh_token", refresh_token)
|
82 |
+
|
83 |
+
# Güncelle
|
84 |
+
session["auth_tokens"][intent_name] = {
|
85 |
+
"token": token,
|
86 |
+
"refresh_token": new_refresh_token
|
87 |
+
}
|
88 |
+
|
89 |
+
log(f"🔁 Token başarıyla yenilendi: {intent_name}")
|
90 |
+
return token, session
|
91 |
+
|
92 |
+
except Exception as e:
|
93 |
+
log(f"❌ Token yenileme başarısız: {e}")
|
94 |
+
raise e
|
95 |
+
|
96 |
+
def execute_intent(intent_name, user_input, session, intent_definitions, data_formats):
|
97 |
+
try:
|
98 |
+
definition = intent_definitions[intent_name]
|
99 |
+
variables_raw = intent.extract_parameters(definition.get("variables", []), user_input)
|
100 |
+
variables = {item["key"]: item["value"] for item in variables_raw}
|
101 |
+
|
102 |
+
log(f"🚀 execute_intent('{intent_name}')")
|
103 |
+
log(f"🔍 Çıkarılan parametreler: {variables}")
|
104 |
+
|
105 |
+
# Validasyon
|
106 |
+
variable_format_map = definition.get("variable_formats", {})
|
107 |
+
is_valid, validation_errors = intent.validate_variable_formats(variables, variable_format_map, data_formats)
|
108 |
+
if not is_valid:
|
109 |
+
log(f"⚠️ Validasyon hatası: {validation_errors}")
|
110 |
+
return {
|
111 |
+
"errors": validation_errors,
|
112 |
+
"awaiting_variable": list(validation_errors.keys())[0],
|
113 |
+
"session": session
|
114 |
+
}
|
115 |
+
|
116 |
+
# Auth
|
117 |
+
headers = definition["action"].get("headers", [])
|
118 |
+
body = definition["action"].get("body", {})
|
119 |
+
method = definition["action"].get("method", "POST")
|
120 |
+
url = definition["action"]["url"]
|
121 |
+
timeout = definition["action"].get("timeout", 5)
|
122 |
+
retry_count = definition["action"].get("retry_count", 0)
|
123 |
+
auth_config = definition["action"].get("auth", None)
|
124 |
+
tls = definition["action"].get("tls", {})
|
125 |
+
verify = tls.get("verify", True)
|
126 |
+
verify_path = tls.get("ca_bundle") if verify and tls.get("ca_bundle") else verify
|
127 |
+
|
128 |
+
if auth_config:
|
129 |
+
token, session = auth_token_handler(intent_name, auth_config, session)
|
130 |
+
|
131 |
+
# Header ve Body placeholder çöz
|
132 |
+
resolved_headers = {
|
133 |
+
h["key"]: intent.resolve_placeholders(h["value"], session, variables)
|
134 |
+
for h in headers
|
135 |
+
}
|
136 |
+
resolved_body = {
|
137 |
+
k: intent.resolve_placeholders(str(v), session, variables)
|
138 |
+
for k, v in body.items()
|
139 |
+
}
|
140 |
+
|
141 |
+
# API çağrısı
|
142 |
+
for attempt in range(retry_count + 1):
|
143 |
+
try:
|
144 |
+
response = requests.request(
|
145 |
+
method=method,
|
146 |
+
url=url,
|
147 |
+
headers=resolved_headers,
|
148 |
+
json=resolved_body,
|
149 |
+
timeout=timeout,
|
150 |
+
verify=verify_path
|
151 |
+
)
|
152 |
+
if response.status_code == 401 and auth_config and attempt < retry_count:
|
153 |
+
log("🔁 Token expired. Yenileniyor...")
|
154 |
+
token, session = refresh_auth_token(intent_name, auth_config, session)
|
155 |
+
continue
|
156 |
+
response.raise_for_status()
|
157 |
+
break
|
158 |
+
except requests.HTTPError as e:
|
159 |
+
if response.status_code != 401 or attempt == retry_count:
|
160 |
+
raise e
|
161 |
+
|
162 |
+
log("✅ API çağrısı başarılı")
|
163 |
+
json_resp = response.json()
|
164 |
+
|
165 |
+
# Yanıtı parse et
|
166 |
+
field = definition["action"].get("response_parser", {}).get("field")
|
167 |
+
rate = json_resp.get(field) if field else json_resp
|
168 |
+
template = definition["action"].get("reply_template", str(rate))
|
169 |
+
reply = intent.resolve_placeholders(template, session, {**variables, field: str(rate)})
|
170 |
+
|
171 |
+
# Session güncelle
|
172 |
+
session.setdefault("variables", {}).update(variables)
|
173 |
+
session["last_intent"] = intent_name
|
174 |
+
return {
|
175 |
+
"reply": reply,
|
176 |
+
"session": session
|
177 |
+
}
|
178 |
+
|
179 |
+
except Exception as e:
|
180 |
+
log(f"❌ execute_intent() hatası: {e}")
|
181 |
+
traceback.print_exc()
|
182 |
+
return {
|
183 |
+
"error": str(e),
|
184 |
+
"session": session
|
185 |
+
}
|
intent_system_design.md
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Intent Tabanlı Chatbot Sistem Tasarımı (v1)
|
2 |
+
|
3 |
+
Bu doküman, niyet (intent) tespiti, parametre çıkarımı, REST API entegrasyonu, session yönetimi, veri doğrulama ve LLM destekli hibrit karar mekanizmasını içeren chatbot altyapısını detaylı olarak tanımlar.
|
4 |
+
|
5 |
+
---
|
6 |
+
|
7 |
+
## 🧠 Amaç
|
8 |
+
|
9 |
+
- Kullanıcı ifadelerinden "intent" algılanması
|
10 |
+
- Parametrelerin (variables) otomatik çıkarılması
|
11 |
+
- Eksik ya da hatalı parametrelerin sorgulanması
|
12 |
+
- Format kontrolleri ve validasyon
|
13 |
+
- Gerekirse REST API çağrıları
|
14 |
+
- Session bazlı token yönetimi
|
15 |
+
- Gelen cevabın insani cevaplaşa dönüşü
|
16 |
+
- Gelişmiş kararlar için hibrit sistem (LLM + logic)
|
17 |
+
|
18 |
+
---
|
19 |
+
|
20 |
+
## 📂 JSON Şema (Intent + Data Format)
|
21 |
+
|
22 |
+
```json
|
23 |
+
{
|
24 |
+
"data_formats": [
|
25 |
+
{
|
26 |
+
"name": "currency_format",
|
27 |
+
"valid_options": ["dolar", "euro", "TL"],
|
28 |
+
"error_message": "Geçerli bir döviz cinsi belirtmelisiniz."
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"name": "client_no_format",
|
32 |
+
"pattern": "^[0-9]{6}$",
|
33 |
+
"error_message": "Müşteri numaranız 6 haneli olmalıdır."
|
34 |
+
}
|
35 |
+
],
|
36 |
+
"intents": [
|
37 |
+
{
|
38 |
+
"name": "doviz-kuru-intent",
|
39 |
+
"examples": ["dolar kuru nedir?"],
|
40 |
+
"variables": ["currency:{dolar} kuru nedir?"],
|
41 |
+
"variable_formats": {
|
42 |
+
"currency": "currency_format"
|
43 |
+
},
|
44 |
+
"action": {
|
45 |
+
"url": "https://api.ex.com/doviz",
|
46 |
+
"method": "POST",
|
47 |
+
"headers": [
|
48 |
+
{ "key": "Authorization", "value": "Bearer {auth_tokens.doviz-kuru-intent.token}" }
|
49 |
+
],
|
50 |
+
"body": {
|
51 |
+
"currency": "{variables.currency}"
|
52 |
+
},
|
53 |
+
"timeout": 5,
|
54 |
+
"retry_count": 1,
|
55 |
+
"tls": {
|
56 |
+
"verify": true,
|
57 |
+
"ca_bundle": "/app/certs/my-ca.pem"
|
58 |
+
},
|
59 |
+
"auth": {
|
60 |
+
"auth_endpoint": "https://api.ex.com/auth",
|
61 |
+
"auth_body": { "username": "user", "password": "pass" },
|
62 |
+
"auth_token_path": "token",
|
63 |
+
"auth_refresh_endpoint": "https://api.ex.com/refresh",
|
64 |
+
"refresh_body": { "refresh_token": "{session.refresh_token}" }
|
65 |
+
},
|
66 |
+
"response_parser": {
|
67 |
+
"field": "rate",
|
68 |
+
"format": "{variables.currency} kuru: {rate} TL"
|
69 |
+
},
|
70 |
+
"reply_template": "{variables.currency} kuru şu an {rate} TL."
|
71 |
+
}
|
72 |
+
}
|
73 |
+
]
|
74 |
+
}
|
75 |
+
```
|
76 |
+
|
77 |
+
---
|
78 |
+
|
79 |
+
## 🧭 Chat Akışı (Hibrit)
|
80 |
+
|
81 |
+
1. Kullanıcıdan mesaj alınır (`POST /chat`)
|
82 |
+
2. Session bulunur (`X-Session-ID` header)
|
83 |
+
3. Intent tespiti (bert model)
|
84 |
+
4. Parametreler `extract_parameters()` ile alınır
|
85 |
+
5. `variable_formats` varsa validasyon yapılır
|
86 |
+
6. Eksik/hatalı varsa `awaiting_variable` kaydedilir, soru sorulur
|
87 |
+
7. Tüm parametreler tamamsa:
|
88 |
+
- Gerekirse auth token üretilir / yenilenir
|
89 |
+
- API çağrısı yapılır
|
90 |
+
- `response_parser` uygulanır
|
91 |
+
- `reply_template` ile mesaj hazır
|
92 |
+
8. Session güncellenir, cevap döndürülür
|
93 |
+
|
94 |
+
---
|
95 |
+
|
96 |
+
## 📦 Session Yapısı
|
97 |
+
|
98 |
+
```json
|
99 |
+
{
|
100 |
+
"session_id": "abc-123",
|
101 |
+
"variables": {
|
102 |
+
"tckn": "12345678900"
|
103 |
+
},
|
104 |
+
"auth_tokens": {
|
105 |
+
"doviz-kuru-intent": {
|
106 |
+
"token": "...",
|
107 |
+
"refresh_token": "..."
|
108 |
+
}
|
109 |
+
},
|
110 |
+
"awaiting_variable": "currency",
|
111 |
+
"last_intent": "doviz-kuru-intent"
|
112 |
+
}
|
113 |
+
```
|
114 |
+
|
115 |
+
---
|
116 |
+
|
117 |
+
## 🧩 Placeholder Kullanımı
|
118 |
+
|
119 |
+
| Amaç | Yazım |
|
120 |
+
|---------------|-------------------------------|
|
121 |
+
| Parametre | `{variables.currency}` |
|
122 |
+
| Session verisi| `{session.tckn}` |
|
123 |
+
| Token | `{auth_tokens.intent.token}` |
|
124 |
+
|
125 |
+
---
|
126 |
+
|
127 |
+
## ✅ Sonraki Adımlar
|
128 |
+
|
129 |
+
- `resolve_placeholders()` fonksiyonu
|
130 |
+
- `validate_variable_formats()`
|
131 |
+
- `auth_token_handler()` → create + refresh
|
132 |
+
- `execute_intent()` tam mantık
|
133 |
+
- `log()` yapısı
|
134 |
+
|
135 |
+
---
|
136 |
+
|
137 |
+
Bu tasarım, modüler ve üretim ortamı için ölçeklenebilir sağlam bir altyapı sunar.
|
intent_test_runner.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, requests, time
|
2 |
+
from log import log
|
3 |
+
|
4 |
+
BASE_URL = "http://localhost:7860"
|
5 |
+
MOCK_BASE = os.getenv("MOCK_BASE_URL") # Örnek: https://abc123.ngrok.io
|
6 |
+
|
7 |
+
test_results = []
|
8 |
+
|
9 |
+
def assert_test(name, actual, expected_substring):
|
10 |
+
if expected_substring in str(actual):
|
11 |
+
log(f"[TEST] {name:<45} ✅")
|
12 |
+
test_results.append((name, True))
|
13 |
+
else:
|
14 |
+
log(f"[TEST] {name:<45} ❌ — Beklenen: {expected_substring}, Gelen: {actual}")
|
15 |
+
test_results.append((name, False))
|
16 |
+
|
17 |
+
def summarize_tests():
|
18 |
+
total = len(test_results)
|
19 |
+
success = sum(1 for _, ok in test_results if ok)
|
20 |
+
fail = total - success
|
21 |
+
log("🧾 TEST SONUCU ÖZETİ")
|
22 |
+
log(f"🔢 Toplam Test : {total}")
|
23 |
+
log(f"✅ Başarılı : {success}")
|
24 |
+
log(f"❌ Başarısız : {fail}")
|
25 |
+
|
26 |
+
def wait_for_intent_training(timeout_sec=60):
|
27 |
+
log("⏳ Intent eğitimi tamamlanıyor mu kontrol ediliyor...")
|
28 |
+
for _ in range(timeout_sec // 3):
|
29 |
+
logs = open("/tmp/logs.txt").read() if os.path.exists("/tmp/logs.txt") else ""
|
30 |
+
if "✅ Intent eğitimi tamamlandı" in logs:
|
31 |
+
return True
|
32 |
+
time.sleep(3)
|
33 |
+
return False
|
34 |
+
|
35 |
+
def run_all_tests():
|
36 |
+
try:
|
37 |
+
log("🚀 Test süreci başlatıldı.")
|
38 |
+
session_id = requests.post(f"{BASE_URL}/start_chat").json().get("session_id")
|
39 |
+
headers = {"X-Session-ID": session_id}
|
40 |
+
|
41 |
+
# 1. LLM fallback testi
|
42 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "lorem ipsum"}, headers=headers)
|
43 |
+
assert_test("LLM fallback", r.json(), "Bu konuda maalesef bilgim yok")
|
44 |
+
|
45 |
+
# 2. Intent eğitimi (doviz + yol intentleri)
|
46 |
+
intents = {
|
47 |
+
"intents": [
|
48 |
+
{
|
49 |
+
"name": "doviz-kuru",
|
50 |
+
"examples": ["dolar kuru nedir", "euro kuru nedir"],
|
51 |
+
"variables": ["currency:{dolar} kuru nedir", "currency:{euro} kuru nedir"],
|
52 |
+
"variable_formats": {"currency": "currency_format"},
|
53 |
+
"action": {
|
54 |
+
"url": f"{MOCK_BASE}/doviz",
|
55 |
+
"method": "POST",
|
56 |
+
"headers": [{"key": "Authorization", "value": "Bearer {auth_tokens.doviz-kuru.token}"}],
|
57 |
+
"body": {"currency": "{variables.currency}"},
|
58 |
+
"auth": {
|
59 |
+
"auth_endpoint": f"{MOCK_BASE}/auth",
|
60 |
+
"auth_body": {"username": "user", "password": "pass"},
|
61 |
+
"auth_token_path": "token",
|
62 |
+
"auth_refresh_endpoint": f"{MOCK_BASE}/refresh",
|
63 |
+
"refresh_body": {"refresh_token": "{auth_tokens.doviz-kuru.refresh_token}"}
|
64 |
+
},
|
65 |
+
"response_parser": {"field": "rate"},
|
66 |
+
"reply_template": "{variables.currency} kuru şu an {rate} TL."
|
67 |
+
}
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "yol-durumu",
|
71 |
+
"examples": ["Ankara'dan İstanbul'a yol durumu"],
|
72 |
+
"variables": ["from_location:{Ankara} to_location:{İstanbul} yol durumu"],
|
73 |
+
"action": {
|
74 |
+
"url": f"{MOCK_BASE}/yol",
|
75 |
+
"method": "POST",
|
76 |
+
"headers": [{"key": "Authorization", "value": "Bearer {auth_tokens.yol-durumu.token}"}],
|
77 |
+
"body": {
|
78 |
+
"from_location": "{variables.from_location}",
|
79 |
+
"to_location": "{variables.to_location}"
|
80 |
+
},
|
81 |
+
"auth": {
|
82 |
+
"auth_endpoint": f"{MOCK_BASE}/auth",
|
83 |
+
"auth_body": {"username": "user", "password": "pass"},
|
84 |
+
"auth_token_path": "token",
|
85 |
+
"auth_refresh_endpoint": f"{MOCK_BASE}/refresh",
|
86 |
+
"refresh_body": {"refresh_token": "{auth_tokens.yol-durumu.refresh_token}"}
|
87 |
+
},
|
88 |
+
"response_parser": {"field": "status"},
|
89 |
+
"reply_template": "{status}"
|
90 |
+
}
|
91 |
+
}
|
92 |
+
]
|
93 |
+
}
|
94 |
+
requests.post(f"{BASE_URL}/train_intents", json=intents)
|
95 |
+
if not wait_for_intent_training():
|
96 |
+
assert_test("Intent eğitimi zamanında tamamlandı", "False", "True")
|
97 |
+
summarize_tests()
|
98 |
+
return
|
99 |
+
|
100 |
+
r = requests.post(f"{BASE_URL}/load_intent_model")
|
101 |
+
assert_test("Intent modeli yüklendi", r.json(), "ok")
|
102 |
+
|
103 |
+
# 3. Eksik parametre — doviz-kuru
|
104 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "döviz kuru nedir"}, headers=headers)
|
105 |
+
assert_test("Eksik parametre — currency", r.json(), "bir döviz cinsi")
|
106 |
+
|
107 |
+
# 4. Parametre tamamlanması
|
108 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "euro"}, headers=headers)
|
109 |
+
assert_test("Parametre tamamlandı — euro", r.json(), "euro kuru şu an")
|
110 |
+
|
111 |
+
# 5. Eksik parametre — yol-durumu
|
112 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "Ankara'dan yol durumu"}, headers=headers)
|
113 |
+
assert_test("Eksik parametre — to_location", r.json(), "hangi şehir")
|
114 |
+
|
115 |
+
# 6. Parametre tamamlanması — yol
|
116 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "İstanbul"}, headers=headers)
|
117 |
+
assert_test("Parametre tamamlandı — yol durumu", r.json(), "trafik açık")
|
118 |
+
|
119 |
+
# 7. Konu değişikliği testi
|
120 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "hava nasıl"}, headers=headers)
|
121 |
+
assert_test("Konu değişikliği sonrası fallback", r.json(), "Bu konuda maalesef")
|
122 |
+
|
123 |
+
# 8. Geçersiz parametre — doviz
|
124 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "yenidolar kuru nedir"}, headers=headers)
|
125 |
+
assert_test("Geçersiz parametre — currency", r.json(), "geçerli bir döviz")
|
126 |
+
|
127 |
+
summarize_tests()
|
128 |
+
|
129 |
+
except Exception as e:
|
130 |
+
log(f"❌ run_all_tests sırasında hata oluştu: {e}")
|
llm_model.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch, traceback
|
2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
|
3 |
+
import log
|
4 |
+
from pydantic import BaseModel
|
5 |
+
|
6 |
+
global model, tokenizer, eos_token_id
|
7 |
+
|
8 |
+
model = None
|
9 |
+
tokenizer = None
|
10 |
+
eos_token_id = None
|
11 |
+
|
12 |
+
class Message(BaseModel):
|
13 |
+
user_input: str
|
14 |
+
|
15 |
+
def setup_model(app_config):
|
16 |
+
try:
|
17 |
+
log("🧠 setup_model() başladı")
|
18 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
19 |
+
log(f"📡 Kullanılan cihaz: {device}")
|
20 |
+
tokenizer = AutoTokenizer.from_pretrained(app_config.MODEL_BASE, use_fast=False)
|
21 |
+
log("📦 Tokenizer yüklendi. Ana model indiriliyor...")
|
22 |
+
model = AutoModelForCausalLM.from_pretrained(app_config.MODEL_BASE, torch_dtype=torch.float32).to(device)
|
23 |
+
log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...")
|
24 |
+
tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
|
25 |
+
model.config.pad_token_id = tokenizer.pad_token_id
|
26 |
+
eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
|
27 |
+
model.eval()
|
28 |
+
log("✅ Ana model eval() çağrıldı")
|
29 |
+
log(f"📦 Intent modeli indiriliyor: {app_config.INTENT_MODEL_ID}")
|
30 |
+
_ = AutoTokenizer.from_pretrained(app_config.INTENT_MODEL_ID)
|
31 |
+
_ = AutoModelForSequenceClassification.from_pretrained(app_config.INTENT_MODEL_ID)
|
32 |
+
log("✅ Intent modeli önbelleğe alındı.")
|
33 |
+
log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
|
34 |
+
except Exception as e:
|
35 |
+
log(f"❌ setup_model() hatası: {e}")
|
36 |
+
traceback.print_exc()
|
37 |
+
|
38 |
+
async def generate_response(text, app_config):
|
39 |
+
messages = [{"role": "user", "content": text}]
|
40 |
+
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
41 |
+
eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
|
42 |
+
input_ids = encodeds.to(model.device)
|
43 |
+
attention_mask = (input_ids != tokenizer.pad_token_id).long()
|
44 |
+
|
45 |
+
with torch.no_grad():
|
46 |
+
output = model.generate(
|
47 |
+
input_ids=input_ids,
|
48 |
+
attention_mask=attention_mask,
|
49 |
+
max_new_tokens=128,
|
50 |
+
do_sample=app_config.USE_SAMPLING,
|
51 |
+
eos_token_id=eos_token,
|
52 |
+
pad_token_id=tokenizer.pad_token_id,
|
53 |
+
return_dict_in_generate=True,
|
54 |
+
output_scores=True
|
55 |
+
)
|
56 |
+
|
57 |
+
if not app_config.USE_SAMPLING:
|
58 |
+
scores = torch.stack(output.scores, dim=1)
|
59 |
+
probs = torch.nn.functional.softmax(scores[0], dim=-1)
|
60 |
+
top_conf = probs.max().item()
|
61 |
+
else:
|
62 |
+
top_conf = None
|
63 |
+
|
64 |
+
decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
|
65 |
+
for tag in ["assistant", "<|im_start|>assistant"]:
|
66 |
+
start = decoded.find(tag)
|
67 |
+
if start != -1:
|
68 |
+
decoded = decoded[start + len(tag):].strip()
|
69 |
+
break
|
70 |
+
return decoded, top_conf
|
log.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
|
3 |
+
def log(message):
|
4 |
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
5 |
+
print(f"[{timestamp}] {message}", flush=True)
|
requirements.txt
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# === FastAPI ve sunucu ===
|
2 |
+
fastapi==0.110.1
|
3 |
+
uvicorn==0.29.0
|
4 |
+
|
5 |
+
# === Transformers ve NLP araçları ===
|
6 |
+
transformers==4.40.1
|
7 |
+
datasets==2.19.1
|
8 |
+
peft==0.10.0
|
9 |
+
accelerate==0.29.3
|
10 |
+
sentence-transformers==2.6.1
|
11 |
+
sentencepiece==0.1.99
|
12 |
+
|
13 |
+
# === FAISS ve numpy/pandas ===
|
14 |
+
faiss-cpu==1.7.4
|
15 |
+
numpy==1.26.4
|
16 |
+
pandas==2.2.2
|
17 |
+
|
18 |
+
# === Diğer yardımcılar ===
|
19 |
+
scikit-learn==1.4.2
|
20 |
+
torch==2.2.2
|
21 |
+
tokenizers==0.19.1
|
22 |
+
|
23 |
+
# === Hugging Face Hub entegrasyonu ===
|
24 |
+
huggingface_hub==0.23.0
|
25 |
+
|
26 |
+
# === Parquet desteği için
|
27 |
+
pyarrow==15.0.2
|