Spaces:
Running
Running
Upload 20 files
Browse files- Dockerfile +1 -0
- app.py +31 -80
- chat_handler_debug.py +135 -135
- controllers/admin_controller.py +22 -0
- controllers/chat_controller.py +23 -0
- controllers/health_controller.py +7 -0
- controllers/test_controller.py +12 -0
- intent_system_design.md +137 -137
- intent_test_runner.py +69 -75
- llm_model.py +83 -83
Dockerfile
CHANGED
@@ -16,6 +16,7 @@ RUN mkdir -p /app/.torch_cache && chmod -R 777 /app/.torch_cache
|
|
16 |
|
17 |
# === Gereken kütüphaneleri yükle
|
18 |
COPY requirements.txt ./
|
|
|
19 |
RUN pip install --no-cache-dir -r requirements.txt
|
20 |
|
21 |
# === Uygulama dosyalarını kopyala
|
|
|
16 |
|
17 |
# === Gereken kütüphaneleri yükle
|
18 |
COPY requirements.txt ./
|
19 |
+
COPY controllers/ ./controllers/
|
20 |
RUN pip install --no-cache-dir -r requirements.txt
|
21 |
|
22 |
# === Uygulama dosyalarını kopyala
|
app.py
CHANGED
@@ -1,80 +1,31 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
-
import
|
4 |
-
from
|
5 |
-
from
|
6 |
-
from
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
session = session_store.create_session(project_name)
|
34 |
-
log(f"🆕 Yeni session başlatıldı: {session.session_id} (proje: {project_name})")
|
35 |
-
return {"session_id": session.session_id}
|
36 |
-
|
37 |
-
@app.post("/reload_config")
|
38 |
-
def reload_config():
|
39 |
-
try:
|
40 |
-
service_config.load(is_reload=True)
|
41 |
-
log("🔁 Config reload sonrası tüm projeler için modeller yeniden yükleniyor...")
|
42 |
-
for project_name in service_config.projects:
|
43 |
-
llm_config = service_config.get_project_llm_config(project_name)
|
44 |
-
model_base = llm_config["model_base"]
|
45 |
-
model_instance = LLMModel()
|
46 |
-
model_instance.setup(service_config, llm_config)
|
47 |
-
llm_models[project_name] = model_instance
|
48 |
-
log(f"✅ '{project_name}' için model yüklendi.")
|
49 |
-
return {"status": "ok", "message": "Konfigürasyon ve modeller yeniden yüklendi."}
|
50 |
-
except Exception as e:
|
51 |
-
return JSONResponse(content={"error": str(e)}, status_code=500)
|
52 |
-
|
53 |
-
@app.post("/chat")
|
54 |
-
async def chat(msg: Message, request: Request):
|
55 |
-
session_id = request.headers.get("X-Session-ID")
|
56 |
-
if not session_id:
|
57 |
-
return JSONResponse(content={"error": "Session ID eksik."}, status_code=400)
|
58 |
-
|
59 |
-
session = session_store.get_session(session_id)
|
60 |
-
if not session:
|
61 |
-
return JSONResponse(content={"error": "Geçersiz veya süresi dolmuş session."}, status_code=400)
|
62 |
-
|
63 |
-
project_name = session.project_name
|
64 |
-
llm_model = llm_models.get(project_name)
|
65 |
-
if llm_model is None:
|
66 |
-
return JSONResponse(content={"error": f"{project_name} için model yüklenmemiş."}, status_code=500)
|
67 |
-
|
68 |
-
return await handle_chat(msg, request, app, service_config, session, llm_model)
|
69 |
-
|
70 |
-
if __name__ == "__main__":
|
71 |
-
log("🌐 Servis başlatılıyor...")
|
72 |
-
service_config.load(is_reload=False)
|
73 |
-
for project_name in service_config.projects:
|
74 |
-
llm_config = service_config.get_project_llm_config(project_name)
|
75 |
-
model_base = llm_config["model_base"]
|
76 |
-
model_instance = LLMModel()
|
77 |
-
model_instance.setup(service_config, llm_config)
|
78 |
-
llm_models[project_name] = model_instance
|
79 |
-
log(f"✅ '{project_name}' için model yüklendi.")
|
80 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from controllers import chat_controller, test_controller, admin_controller, health_controller
|
3 |
+
from service_config import ServiceConfig
|
4 |
+
from session import SessionStore
|
5 |
+
from llm_model import LLMModel
|
6 |
+
from log import log
|
7 |
+
|
8 |
+
service_config = ServiceConfig()
|
9 |
+
session_store = SessionStore()
|
10 |
+
llm_models = {}
|
11 |
+
|
12 |
+
app = FastAPI()
|
13 |
+
|
14 |
+
app.include_router(health_controller.router)
|
15 |
+
app.include_router(chat_controller.router)
|
16 |
+
app.include_router(test_controller.router)
|
17 |
+
app.include_router(admin_controller.router)
|
18 |
+
|
19 |
+
if __name__ == "__main__":
|
20 |
+
log("🌐 Servis başlatılıyor...")
|
21 |
+
service_config.load(is_reload=False)
|
22 |
+
for project_name in service_config.projects:
|
23 |
+
llm_config = service_config.get_project_llm_config(project_name)
|
24 |
+
model_instance = LLMModel()
|
25 |
+
model_instance.setup(service_config, llm_config)
|
26 |
+
llm_models[project_name] = model_instance
|
27 |
+
log(f"✅ '{project_name}' için model yüklendi.")
|
28 |
+
|
29 |
+
import uvicorn
|
30 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
31 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chat_handler_debug.py
CHANGED
@@ -1,135 +1,135 @@
|
|
1 |
-
from fastapi import Request
|
2 |
-
from fastapi.responses import JSONResponse
|
3 |
-
import traceback, random
|
4 |
-
from intent import extract_parameters, validate_variable_formats, detect_intent
|
5 |
-
from intent_api import execute_intent
|
6 |
-
from llm_model import generate_response, get_model, get_tokenizer, Message
|
7 |
-
from log import log
|
8 |
-
|
9 |
-
DEBUG = True
|
10 |
-
|
11 |
-
async def handle_chat(msg: Message, request: Request, app, s_config):
|
12 |
-
user_input = msg.user_input.strip()
|
13 |
-
session_id = request.headers.get("X-Session-ID", "demo-session")
|
14 |
-
|
15 |
-
if not hasattr(app.state, "session_store"):
|
16 |
-
app.state.session_store = {}
|
17 |
-
|
18 |
-
session_store = getattr(app.state, "session_store", {})
|
19 |
-
session = session_store.get(session_id, {
|
20 |
-
"session_id": session_id,
|
21 |
-
"variables": {},
|
22 |
-
"auth_tokens": {},
|
23 |
-
"last_intent": None,
|
24 |
-
"awaiting_variable": None
|
25 |
-
})
|
26 |
-
|
27 |
-
try:
|
28 |
-
if get_model() is None or get_tokenizer() is None:
|
29 |
-
return {"error": "Model yüklenmedi."}
|
30 |
-
|
31 |
-
detected_intent, intent_conf = await detect_intent(user_input)
|
32 |
-
if DEBUG:
|
33 |
-
log(f"🎯 Intent tespiti: {detected_intent}, Confidence: {intent_conf:.2f}")
|
34 |
-
|
35 |
-
current_intent = session.get("last_intent")
|
36 |
-
awaiting_variable = session.get("awaiting_variable")
|
37 |
-
|
38 |
-
if DEBUG:
|
39 |
-
log(f"📦 Session: {session}")
|
40 |
-
log(f"🧩 Awaiting: {awaiting_variable}, Last Intent: {current_intent}")
|
41 |
-
|
42 |
-
if (
|
43 |
-
awaiting_variable and
|
44 |
-
detected_intent and
|
45 |
-
detected_intent != current_intent and
|
46 |
-
intent_conf > s_config.INTENT_CONFIDENCE_THRESHOLD
|
47 |
-
):
|
48 |
-
log("🧹 Konu değişikliği algılandı → context sıfırlanıyor")
|
49 |
-
session["awaiting_variable"] = None
|
50 |
-
session["variables"] = {}
|
51 |
-
session["last_intent"] = detected_intent
|
52 |
-
current_intent = detected_intent
|
53 |
-
|
54 |
-
intent_is_valid = (
|
55 |
-
detected_intent and
|
56 |
-
intent_conf > s_config.INTENT_CONFIDENCE_THRESHOLD and
|
57 |
-
detected_intent in s_config.INTENT_DEFINITIONS
|
58 |
-
)
|
59 |
-
if DEBUG:
|
60 |
-
log(f"✅ Intent geçerli mi?: {intent_is_valid}")
|
61 |
-
|
62 |
-
if intent_is_valid:
|
63 |
-
session["last_intent"] = detected_intent
|
64 |
-
definition = s_config.INTENT_DEFINITIONS[detected_intent]
|
65 |
-
pattern_list = definition.get("variables", [])
|
66 |
-
data_formats = s_config.DATA_FORMATS
|
67 |
-
variable_format_map = definition.get("variable_formats", {})
|
68 |
-
|
69 |
-
if awaiting_variable:
|
70 |
-
extracted = extract_parameters(pattern_list, user_input)
|
71 |
-
for p in extracted:
|
72 |
-
if p["key"] == awaiting_variable:
|
73 |
-
session["variables"][awaiting_variable] = p["value"]
|
74 |
-
session["awaiting_variable"] = None
|
75 |
-
log(f"✅ Awaiting parametre tamamlandı: {awaiting_variable} = {p['value']}")
|
76 |
-
break
|
77 |
-
|
78 |
-
extracted = extract_parameters(pattern_list, user_input)
|
79 |
-
variables = {p["key"]: p["value"] for p in extracted}
|
80 |
-
session.setdefault("variables", {}).update(variables)
|
81 |
-
|
82 |
-
if DEBUG:
|
83 |
-
log(f"🧪 Tespit edilen parametreler: {variables}")
|
84 |
-
|
85 |
-
is_valid, validation_errors = validate_variable_formats(session["variables"], variable_format_map, data_formats)
|
86 |
-
if DEBUG:
|
87 |
-
log(f"📛 Validasyon hataları: {validation_errors}")
|
88 |
-
|
89 |
-
if not is_valid:
|
90 |
-
session["awaiting_variable"] = list(validation_errors.keys())[0]
|
91 |
-
session_store[session_id] = session
|
92 |
-
app.state.session_store = session_store
|
93 |
-
return {"response": list(validation_errors.values())[0]}
|
94 |
-
|
95 |
-
expected_vars = list(variable_format_map.keys())
|
96 |
-
missing_vars = [v for v in expected_vars if v not in session["variables"]]
|
97 |
-
if DEBUG:
|
98 |
-
log(f"📌 Beklenen parametreler: {expected_vars}, Eksik: {missing_vars}")
|
99 |
-
|
100 |
-
if missing_vars:
|
101 |
-
session["awaiting_variable"] = missing_vars[0]
|
102 |
-
session_store[session_id] = session
|
103 |
-
app.state.session_store = session_store
|
104 |
-
return {"response": f"Lütfen {missing_vars[0]} bilgisini belirtir misiniz?"}
|
105 |
-
|
106 |
-
log("🚀 execute_intent() çağrılıyor...")
|
107 |
-
result = execute_intent(
|
108 |
-
detected_intent,
|
109 |
-
user_input,
|
110 |
-
session,
|
111 |
-
s_config.INTENT_DEFINITIONS,
|
112 |
-
s_config.DATA_FORMATS
|
113 |
-
)
|
114 |
-
if "reply" in result:
|
115 |
-
session_store[session_id] = result["session"]
|
116 |
-
app.state.session_store = session_store
|
117 |
-
return {"reply": result["reply"]}
|
118 |
-
elif "errors" in result:
|
119 |
-
session_store[session_id] = result["session"]
|
120 |
-
app.state.session_store = session_store
|
121 |
-
return {"response": list(result["errors"].values())[0]}
|
122 |
-
else:
|
123 |
-
return {"response": random.choice(s_config.FALLBACK_ANSWERS)}
|
124 |
-
|
125 |
-
log("🤖 execute_intent çağrılmadı → LLM fallback devrede")
|
126 |
-
session["awaiting_variable"] = None
|
127 |
-
session["variables"] = {}
|
128 |
-
response, response_conf = await generate_response(user_input, s_config)
|
129 |
-
if response_conf is not None and response_conf < s_config.LLM_CONFIDENCE_THRESHOLD:
|
130 |
-
return {"response": random.choice(s_config.FALLBACK_ANSWERS)}
|
131 |
-
return {"response": response}
|
132 |
-
|
133 |
-
except Exception as e:
|
134 |
-
traceback.print_exc()
|
135 |
-
return JSONResponse(content={"error": str(e)}, status_code=500)
|
|
|
1 |
+
from fastapi import Request
|
2 |
+
from fastapi.responses import JSONResponse
|
3 |
+
import traceback, random
|
4 |
+
from intent import extract_parameters, validate_variable_formats, detect_intent
|
5 |
+
from intent_api import execute_intent
|
6 |
+
from llm_model import generate_response, get_model, get_tokenizer, Message
|
7 |
+
from log import log
|
8 |
+
|
9 |
+
DEBUG = True
|
10 |
+
|
11 |
+
async def handle_chat(msg: Message, request: Request, app, s_config):
|
12 |
+
user_input = msg.user_input.strip()
|
13 |
+
session_id = request.headers.get("X-Session-ID", "demo-session")
|
14 |
+
|
15 |
+
if not hasattr(app.state, "session_store"):
|
16 |
+
app.state.session_store = {}
|
17 |
+
|
18 |
+
session_store = getattr(app.state, "session_store", {})
|
19 |
+
session = session_store.get(session_id, {
|
20 |
+
"session_id": session_id,
|
21 |
+
"variables": {},
|
22 |
+
"auth_tokens": {},
|
23 |
+
"last_intent": None,
|
24 |
+
"awaiting_variable": None
|
25 |
+
})
|
26 |
+
|
27 |
+
try:
|
28 |
+
if get_model() is None or get_tokenizer() is None:
|
29 |
+
return {"error": "Model yüklenmedi."}
|
30 |
+
|
31 |
+
detected_intent, intent_conf = await detect_intent(user_input)
|
32 |
+
if DEBUG:
|
33 |
+
log(f"🎯 Intent tespiti: {detected_intent}, Confidence: {intent_conf:.2f}")
|
34 |
+
|
35 |
+
current_intent = session.get("last_intent")
|
36 |
+
awaiting_variable = session.get("awaiting_variable")
|
37 |
+
|
38 |
+
if DEBUG:
|
39 |
+
log(f"📦 Session: {session}")
|
40 |
+
log(f"🧩 Awaiting: {awaiting_variable}, Last Intent: {current_intent}")
|
41 |
+
|
42 |
+
if (
|
43 |
+
awaiting_variable and
|
44 |
+
detected_intent and
|
45 |
+
detected_intent != current_intent and
|
46 |
+
intent_conf > s_config.INTENT_CONFIDENCE_THRESHOLD
|
47 |
+
):
|
48 |
+
log("🧹 Konu değişikliği algılandı → context sıfırlanıyor")
|
49 |
+
session["awaiting_variable"] = None
|
50 |
+
session["variables"] = {}
|
51 |
+
session["last_intent"] = detected_intent
|
52 |
+
current_intent = detected_intent
|
53 |
+
|
54 |
+
intent_is_valid = (
|
55 |
+
detected_intent and
|
56 |
+
intent_conf > s_config.INTENT_CONFIDENCE_THRESHOLD and
|
57 |
+
detected_intent in s_config.INTENT_DEFINITIONS
|
58 |
+
)
|
59 |
+
if DEBUG:
|
60 |
+
log(f"✅ Intent geçerli mi?: {intent_is_valid}")
|
61 |
+
|
62 |
+
if intent_is_valid:
|
63 |
+
session["last_intent"] = detected_intent
|
64 |
+
definition = s_config.INTENT_DEFINITIONS[detected_intent]
|
65 |
+
pattern_list = definition.get("variables", [])
|
66 |
+
data_formats = s_config.DATA_FORMATS
|
67 |
+
variable_format_map = definition.get("variable_formats", {})
|
68 |
+
|
69 |
+
if awaiting_variable:
|
70 |
+
extracted = extract_parameters(pattern_list, user_input)
|
71 |
+
for p in extracted:
|
72 |
+
if p["key"] == awaiting_variable:
|
73 |
+
session["variables"][awaiting_variable] = p["value"]
|
74 |
+
session["awaiting_variable"] = None
|
75 |
+
log(f"✅ Awaiting parametre tamamlandı: {awaiting_variable} = {p['value']}")
|
76 |
+
break
|
77 |
+
|
78 |
+
extracted = extract_parameters(pattern_list, user_input)
|
79 |
+
variables = {p["key"]: p["value"] for p in extracted}
|
80 |
+
session.setdefault("variables", {}).update(variables)
|
81 |
+
|
82 |
+
if DEBUG:
|
83 |
+
log(f"🧪 Tespit edilen parametreler: {variables}")
|
84 |
+
|
85 |
+
is_valid, validation_errors = validate_variable_formats(session["variables"], variable_format_map, data_formats)
|
86 |
+
if DEBUG:
|
87 |
+
log(f"📛 Validasyon hataları: {validation_errors}")
|
88 |
+
|
89 |
+
if not is_valid:
|
90 |
+
session["awaiting_variable"] = list(validation_errors.keys())[0]
|
91 |
+
session_store[session_id] = session
|
92 |
+
app.state.session_store = session_store
|
93 |
+
return {"response": list(validation_errors.values())[0]}
|
94 |
+
|
95 |
+
expected_vars = list(variable_format_map.keys())
|
96 |
+
missing_vars = [v for v in expected_vars if v not in session["variables"]]
|
97 |
+
if DEBUG:
|
98 |
+
log(f"📌 Beklenen parametreler: {expected_vars}, Eksik: {missing_vars}")
|
99 |
+
|
100 |
+
if missing_vars:
|
101 |
+
session["awaiting_variable"] = missing_vars[0]
|
102 |
+
session_store[session_id] = session
|
103 |
+
app.state.session_store = session_store
|
104 |
+
return {"response": f"Lütfen {missing_vars[0]} bilgisini belirtir misiniz?"}
|
105 |
+
|
106 |
+
log("🚀 execute_intent() çağrılıyor...")
|
107 |
+
result = execute_intent(
|
108 |
+
detected_intent,
|
109 |
+
user_input,
|
110 |
+
session,
|
111 |
+
s_config.INTENT_DEFINITIONS,
|
112 |
+
s_config.DATA_FORMATS
|
113 |
+
)
|
114 |
+
if "reply" in result:
|
115 |
+
session_store[session_id] = result["session"]
|
116 |
+
app.state.session_store = session_store
|
117 |
+
return {"reply": result["reply"]}
|
118 |
+
elif "errors" in result:
|
119 |
+
session_store[session_id] = result["session"]
|
120 |
+
app.state.session_store = session_store
|
121 |
+
return {"response": list(result["errors"].values())[0]}
|
122 |
+
else:
|
123 |
+
return {"response": random.choice(s_config.FALLBACK_ANSWERS)}
|
124 |
+
|
125 |
+
log("🤖 execute_intent çağrılmadı → LLM fallback devrede")
|
126 |
+
session["awaiting_variable"] = None
|
127 |
+
session["variables"] = {}
|
128 |
+
response, response_conf = await generate_response(user_input, s_config)
|
129 |
+
if response_conf is not None and response_conf < s_config.LLM_CONFIDENCE_THRESHOLD:
|
130 |
+
return {"response": random.choice(s_config.FALLBACK_ANSWERS)}
|
131 |
+
return {"response": response}
|
132 |
+
|
133 |
+
except Exception as e:
|
134 |
+
traceback.print_exc()
|
135 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
controllers/admin_controller.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from app import service_config, llm_models
|
3 |
+
from llm_model import LLMModel
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
+
from log import log
|
6 |
+
|
7 |
+
router = APIRouter()
|
8 |
+
|
9 |
+
@router.post("/reload_config")
|
10 |
+
def reload_config():
|
11 |
+
try:
|
12 |
+
service_config.load(is_reload=True)
|
13 |
+
log("🔁 Config reload sonrası tüm projeler için modeller yeniden yükleniyor...")
|
14 |
+
for project_name in service_config.projects:
|
15 |
+
llm_config = service_config.get_project_llm_config(project_name)
|
16 |
+
model_instance = LLMModel()
|
17 |
+
model_instance.setup(service_config, llm_config)
|
18 |
+
llm_models[project_name] = model_instance
|
19 |
+
log(f"✅ '{project_name}' için model yüklendi.")
|
20 |
+
return {"status": "ok", "message": "Konfigürasyon ve modeller yeniden yüklendi."}
|
21 |
+
except Exception as e:
|
22 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
controllers/chat_controller.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, Request
|
2 |
+
from llm_model import Message
|
3 |
+
from chat_handler import handle_chat
|
4 |
+
from app import service_config, session_store, llm_models
|
5 |
+
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
@router.post("/chat")
|
9 |
+
async def chat_endpoint(msg: Message, request: Request):
|
10 |
+
session_id = request.headers.get("X-Session-ID")
|
11 |
+
if not session_id:
|
12 |
+
return {"error": "Session ID eksik."}
|
13 |
+
|
14 |
+
session = session_store.get_session(session_id)
|
15 |
+
if not session:
|
16 |
+
return {"error": "Geçersiz veya süresi dolmuş session."}
|
17 |
+
|
18 |
+
project_name = session.project_name
|
19 |
+
llm_model = llm_models.get(project_name)
|
20 |
+
if llm_model is None:
|
21 |
+
return {"error": f"{project_name} için model yüklenmemiş."}
|
22 |
+
|
23 |
+
return await handle_chat(msg, request, None, service_config, session, llm_model)
|
controllers/health_controller.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
|
3 |
+
router = APIRouter()
|
4 |
+
|
5 |
+
@router.get("/")
|
6 |
+
def health():
|
7 |
+
return {"status": "ok"}
|
controllers/test_controller.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
import threading
|
3 |
+
import intent_test_runner
|
4 |
+
from log import log
|
5 |
+
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
@router.post("/run_tests", status_code=202)
|
9 |
+
def run_tests():
|
10 |
+
log("🚦 /run_tests çağrıldı. Testler başlatılıyor...")
|
11 |
+
threading.Thread(target=intent_test_runner.run_all_tests, daemon=True).start()
|
12 |
+
return {"status": "running", "message": "Test süreci başlatıldı."}
|
intent_system_design.md
CHANGED
@@ -1,137 +1,137 @@
|
|
1 |
-
# Intent Tabanlı Chatbot Sistem Tasarımı (v1)
|
2 |
-
|
3 |
-
Bu doküman, niyet (intent) tespiti, parametre çıkarımı, REST API entegrasyonu, session yönetimi, veri doğrulama ve LLM destekli hibrit karar mekanizmasını içeren chatbot altyapısını detaylı olarak tanımlar.
|
4 |
-
|
5 |
-
---
|
6 |
-
|
7 |
-
## 🧠 Amaç
|
8 |
-
|
9 |
-
- Kullanıcı ifadelerinden "intent" algılanması
|
10 |
-
- Parametrelerin (variables) otomatik çıkarılması
|
11 |
-
- Eksik ya da hatalı parametrelerin sorgulanması
|
12 |
-
- Format kontrolleri ve validasyon
|
13 |
-
- Gerekirse REST API çağrıları
|
14 |
-
- Session bazlı token yönetimi
|
15 |
-
- Gelen cevabın insani cevaplaşa dönüşü
|
16 |
-
- Gelişmiş kararlar için hibrit sistem (LLM + logic)
|
17 |
-
|
18 |
-
---
|
19 |
-
|
20 |
-
## 📂 JSON Şema (Intent + Data Format)
|
21 |
-
|
22 |
-
```json
|
23 |
-
{
|
24 |
-
"data_formats": [
|
25 |
-
{
|
26 |
-
"name": "currency_format",
|
27 |
-
"valid_options": ["dolar", "euro", "TL"],
|
28 |
-
"error_message": "Geçerli bir döviz cinsi belirtmelisiniz."
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"name": "client_no_format",
|
32 |
-
"pattern": "^[0-9]{6}$",
|
33 |
-
"error_message": "Müşteri numaranız 6 haneli olmalıdır."
|
34 |
-
}
|
35 |
-
],
|
36 |
-
"intents": [
|
37 |
-
{
|
38 |
-
"name": "doviz-kuru-intent",
|
39 |
-
"examples": ["dolar kuru nedir?"],
|
40 |
-
"variables": ["currency:{dolar} kuru nedir?"],
|
41 |
-
"variable_formats": {
|
42 |
-
"currency": "currency_format"
|
43 |
-
},
|
44 |
-
"action": {
|
45 |
-
"url": "https://api.ex.com/doviz",
|
46 |
-
"method": "POST",
|
47 |
-
"headers": [
|
48 |
-
{ "key": "Authorization", "value": "Bearer {auth_tokens.doviz-kuru-intent.token}" }
|
49 |
-
],
|
50 |
-
"body": {
|
51 |
-
"currency": "{variables.currency}"
|
52 |
-
},
|
53 |
-
"timeout": 5,
|
54 |
-
"retry_count": 1,
|
55 |
-
"tls": {
|
56 |
-
"verify": true,
|
57 |
-
"ca_bundle": "/app/certs/my-ca.pem"
|
58 |
-
},
|
59 |
-
"auth": {
|
60 |
-
"auth_endpoint": "https://api.ex.com/auth",
|
61 |
-
"auth_body": { "username": "user", "password": "pass" },
|
62 |
-
"auth_token_path": "token",
|
63 |
-
"auth_refresh_endpoint": "https://api.ex.com/refresh",
|
64 |
-
"refresh_body": { "refresh_token": "{session.refresh_token}" }
|
65 |
-
},
|
66 |
-
"response_parser": {
|
67 |
-
"field": "rate",
|
68 |
-
"format": "{variables.currency} kuru: {rate} TL"
|
69 |
-
},
|
70 |
-
"reply_template": "{variables.currency} kuru şu an {rate} TL."
|
71 |
-
}
|
72 |
-
}
|
73 |
-
]
|
74 |
-
}
|
75 |
-
```
|
76 |
-
|
77 |
-
---
|
78 |
-
|
79 |
-
## 🧭 Chat Akışı (Hibrit)
|
80 |
-
|
81 |
-
1. Kullanıcıdan mesaj alınır (`POST /chat`)
|
82 |
-
2. Session bulunur (`X-Session-ID` header)
|
83 |
-
3. Intent tespiti (bert model)
|
84 |
-
4. Parametreler `extract_parameters()` ile alınır
|
85 |
-
5. `variable_formats` varsa validasyon yapılır
|
86 |
-
6. Eksik/hatalı varsa `awaiting_variable` kaydedilir, soru sorulur
|
87 |
-
7. Tüm parametreler tamamsa:
|
88 |
-
- Gerekirse auth token üretilir / yenilenir
|
89 |
-
- API çağrısı yapılır
|
90 |
-
- `response_parser` uygulanır
|
91 |
-
- `reply_template` ile mesaj hazır
|
92 |
-
8. Session güncellenir, cevap döndürülür
|
93 |
-
|
94 |
-
---
|
95 |
-
|
96 |
-
## 📦 Session Yapısı
|
97 |
-
|
98 |
-
```json
|
99 |
-
{
|
100 |
-
"session_id": "abc-123",
|
101 |
-
"variables": {
|
102 |
-
"tckn": "12345678900"
|
103 |
-
},
|
104 |
-
"auth_tokens": {
|
105 |
-
"doviz-kuru-intent": {
|
106 |
-
"token": "...",
|
107 |
-
"refresh_token": "..."
|
108 |
-
}
|
109 |
-
},
|
110 |
-
"awaiting_variable": "currency",
|
111 |
-
"last_intent": "doviz-kuru-intent"
|
112 |
-
}
|
113 |
-
```
|
114 |
-
|
115 |
-
---
|
116 |
-
|
117 |
-
## 🧩 Placeholder Kullanımı
|
118 |
-
|
119 |
-
| Amaç | Yazım |
|
120 |
-
|---------------|-------------------------------|
|
121 |
-
| Parametre | `{variables.currency}` |
|
122 |
-
| Session verisi| `{session.tckn}` |
|
123 |
-
| Token | `{auth_tokens.intent.token}` |
|
124 |
-
|
125 |
-
---
|
126 |
-
|
127 |
-
## ✅ Sonraki Adımlar
|
128 |
-
|
129 |
-
- `resolve_placeholders()` fonksiyonu
|
130 |
-
- `validate_variable_formats()`
|
131 |
-
- `auth_token_handler()` → create + refresh
|
132 |
-
- `execute_intent()` tam mantık
|
133 |
-
- `log()` yapısı
|
134 |
-
|
135 |
-
---
|
136 |
-
|
137 |
-
Bu tasarım, modüler ve üretim ortamı için ölçeklenebilir sağlam bir altyapı sunar.
|
|
|
1 |
+
# Intent Tabanlı Chatbot Sistem Tasarımı (v1)
|
2 |
+
|
3 |
+
Bu doküman, niyet (intent) tespiti, parametre çıkarımı, REST API entegrasyonu, session yönetimi, veri doğrulama ve LLM destekli hibrit karar mekanizmasını içeren chatbot altyapısını detaylı olarak tanımlar.
|
4 |
+
|
5 |
+
---
|
6 |
+
|
7 |
+
## 🧠 Amaç
|
8 |
+
|
9 |
+
- Kullanıcı ifadelerinden "intent" algılanması
|
10 |
+
- Parametrelerin (variables) otomatik çıkarılması
|
11 |
+
- Eksik ya da hatalı parametrelerin sorgulanması
|
12 |
+
- Format kontrolleri ve validasyon
|
13 |
+
- Gerekirse REST API çağrıları
|
14 |
+
- Session bazlı token yönetimi
|
15 |
+
- Gelen cevabın insani cevaplaşa dönüşü
|
16 |
+
- Gelişmiş kararlar için hibrit sistem (LLM + logic)
|
17 |
+
|
18 |
+
---
|
19 |
+
|
20 |
+
## 📂 JSON Şema (Intent + Data Format)
|
21 |
+
|
22 |
+
```json
|
23 |
+
{
|
24 |
+
"data_formats": [
|
25 |
+
{
|
26 |
+
"name": "currency_format",
|
27 |
+
"valid_options": ["dolar", "euro", "TL"],
|
28 |
+
"error_message": "Geçerli bir döviz cinsi belirtmelisiniz."
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"name": "client_no_format",
|
32 |
+
"pattern": "^[0-9]{6}$",
|
33 |
+
"error_message": "Müşteri numaranız 6 haneli olmalıdır."
|
34 |
+
}
|
35 |
+
],
|
36 |
+
"intents": [
|
37 |
+
{
|
38 |
+
"name": "doviz-kuru-intent",
|
39 |
+
"examples": ["dolar kuru nedir?"],
|
40 |
+
"variables": ["currency:{dolar} kuru nedir?"],
|
41 |
+
"variable_formats": {
|
42 |
+
"currency": "currency_format"
|
43 |
+
},
|
44 |
+
"action": {
|
45 |
+
"url": "https://api.ex.com/doviz",
|
46 |
+
"method": "POST",
|
47 |
+
"headers": [
|
48 |
+
{ "key": "Authorization", "value": "Bearer {auth_tokens.doviz-kuru-intent.token}" }
|
49 |
+
],
|
50 |
+
"body": {
|
51 |
+
"currency": "{variables.currency}"
|
52 |
+
},
|
53 |
+
"timeout": 5,
|
54 |
+
"retry_count": 1,
|
55 |
+
"tls": {
|
56 |
+
"verify": true,
|
57 |
+
"ca_bundle": "/app/certs/my-ca.pem"
|
58 |
+
},
|
59 |
+
"auth": {
|
60 |
+
"auth_endpoint": "https://api.ex.com/auth",
|
61 |
+
"auth_body": { "username": "user", "password": "pass" },
|
62 |
+
"auth_token_path": "token",
|
63 |
+
"auth_refresh_endpoint": "https://api.ex.com/refresh",
|
64 |
+
"refresh_body": { "refresh_token": "{session.refresh_token}" }
|
65 |
+
},
|
66 |
+
"response_parser": {
|
67 |
+
"field": "rate",
|
68 |
+
"format": "{variables.currency} kuru: {rate} TL"
|
69 |
+
},
|
70 |
+
"reply_template": "{variables.currency} kuru şu an {rate} TL."
|
71 |
+
}
|
72 |
+
}
|
73 |
+
]
|
74 |
+
}
|
75 |
+
```
|
76 |
+
|
77 |
+
---
|
78 |
+
|
79 |
+
## 🧭 Chat Akışı (Hibrit)
|
80 |
+
|
81 |
+
1. Kullanıcıdan mesaj alınır (`POST /chat`)
|
82 |
+
2. Session bulunur (`X-Session-ID` header)
|
83 |
+
3. Intent tespiti (bert model)
|
84 |
+
4. Parametreler `extract_parameters()` ile alınır
|
85 |
+
5. `variable_formats` varsa validasyon yapılır
|
86 |
+
6. Eksik/hatalı varsa `awaiting_variable` kaydedilir, soru sorulur
|
87 |
+
7. Tüm parametreler tamamsa:
|
88 |
+
- Gerekirse auth token üretilir / yenilenir
|
89 |
+
- API çağrısı yapılır
|
90 |
+
- `response_parser` uygulanır
|
91 |
+
- `reply_template` ile mesaj hazır
|
92 |
+
8. Session güncellenir, cevap döndürülür
|
93 |
+
|
94 |
+
---
|
95 |
+
|
96 |
+
## 📦 Session Yapısı
|
97 |
+
|
98 |
+
```json
|
99 |
+
{
|
100 |
+
"session_id": "abc-123",
|
101 |
+
"variables": {
|
102 |
+
"tckn": "12345678900"
|
103 |
+
},
|
104 |
+
"auth_tokens": {
|
105 |
+
"doviz-kuru-intent": {
|
106 |
+
"token": "...",
|
107 |
+
"refresh_token": "..."
|
108 |
+
}
|
109 |
+
},
|
110 |
+
"awaiting_variable": "currency",
|
111 |
+
"last_intent": "doviz-kuru-intent"
|
112 |
+
}
|
113 |
+
```
|
114 |
+
|
115 |
+
---
|
116 |
+
|
117 |
+
## 🧩 Placeholder Kullanımı
|
118 |
+
|
119 |
+
| Amaç | Yazım |
|
120 |
+
|---------------|-------------------------------|
|
121 |
+
| Parametre | `{variables.currency}` |
|
122 |
+
| Session verisi| `{session.tckn}` |
|
123 |
+
| Token | `{auth_tokens.intent.token}` |
|
124 |
+
|
125 |
+
---
|
126 |
+
|
127 |
+
## ✅ Sonraki Adımlar
|
128 |
+
|
129 |
+
- `resolve_placeholders()` fonksiyonu
|
130 |
+
- `validate_variable_formats()`
|
131 |
+
- `auth_token_handler()` → create + refresh
|
132 |
+
- `execute_intent()` tam mantık
|
133 |
+
- `log()` yapısı
|
134 |
+
|
135 |
+
---
|
136 |
+
|
137 |
+
Bu tasarım, modüler ve üretim ortamı için ölçeklenebilir sağlam bir altyapı sunar.
|
intent_test_runner.py
CHANGED
@@ -1,76 +1,70 @@
|
|
1 |
-
import os
|
2 |
-
import requests
|
3 |
-
from log import log
|
4 |
-
|
5 |
-
BASE_URL = "http://localhost:7860"
|
6 |
-
|
7 |
-
|
8 |
-
test_results = []
|
9 |
-
|
10 |
-
def assert_test(name, actual, expected_substring, explanation=None):
|
11 |
-
if explanation:
|
12 |
-
log(f"🧪 TEST: {name} → {explanation}")
|
13 |
-
actual_str = str(actual)
|
14 |
-
if expected_substring in actual_str:
|
15 |
-
log(f"[TEST] {name:<45} ✅")
|
16 |
-
test_results.append((name, True))
|
17 |
-
else:
|
18 |
-
log(f"[TEST] {name:<45} ❌ — Beklenen: {expected_substring}, Gelen: {actual_str[:100]}...")
|
19 |
-
test_results.append((name, False))
|
20 |
-
|
21 |
-
def summarize_tests():
|
22 |
-
total = len(test_results)
|
23 |
-
success = sum(1 for _, ok in test_results if ok)
|
24 |
-
fail = total - success
|
25 |
-
log("🧾 TEST SONUCU ÖZETİ")
|
26 |
-
log(f"🔢 Toplam Test : {total}")
|
27 |
-
log(f"✅ Başarılı : {success}")
|
28 |
-
log(f"❌ Başarısız : {fail}")
|
29 |
-
|
30 |
-
def run_all_tests():
|
31 |
-
try:
|
32 |
-
log("🚀 Test süreci başlatıldı.")
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "
|
65 |
-
assert_test("
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "İstanbul"}, headers=headers)
|
71 |
-
assert_test("Parametre tamamlandı — yol durumu", r.json(), "trafik açık")
|
72 |
-
|
73 |
-
summarize_tests()
|
74 |
-
|
75 |
-
except Exception as e:
|
76 |
log(f"❌ run_all_tests sırasında hata oluştu: {e}")
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from log import log
|
4 |
+
|
5 |
+
BASE_URL = "http://localhost:7860"
|
6 |
+
MOCK_BASE = os.getenv("MOCK_BASE_URL") # Örnek: https://abc123.ngrok.io
|
7 |
+
|
8 |
+
test_results = []
|
9 |
+
|
10 |
+
def assert_test(name, actual, expected_substring, explanation=None):
|
11 |
+
if explanation:
|
12 |
+
log(f"🧪 TEST: {name} → {explanation}")
|
13 |
+
actual_str = str(actual)
|
14 |
+
if expected_substring in actual_str:
|
15 |
+
log(f"[TEST] {name:<45} ✅")
|
16 |
+
test_results.append((name, True))
|
17 |
+
else:
|
18 |
+
log(f"[TEST] {name:<45} ❌ — Beklenen: {expected_substring}, Gelen: {actual_str[:100]}...")
|
19 |
+
test_results.append((name, False))
|
20 |
+
|
21 |
+
def summarize_tests():
|
22 |
+
total = len(test_results)
|
23 |
+
success = sum(1 for _, ok in test_results if ok)
|
24 |
+
fail = total - success
|
25 |
+
log("🧾 TEST SONUCU ÖZETİ")
|
26 |
+
log(f"🔢 Toplam Test : {total}")
|
27 |
+
log(f"✅ Başarılı : {success}")
|
28 |
+
log(f"❌ Başarısız : {fail}")
|
29 |
+
|
30 |
+
def run_all_tests():
|
31 |
+
try:
|
32 |
+
log("🚀 Test süreci başlatıldı.")
|
33 |
+
response = requests.post(f"{BASE_URL}/start_chat?project_name=project1")
|
34 |
+
session_id = response.json().get("session_id")
|
35 |
+
headers = {"X-Session-ID": session_id}
|
36 |
+
|
37 |
+
# 1. LLM fallback testi (intent bulunamadığında)
|
38 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "bilinmeyen bir soru"}, headers=headers)
|
39 |
+
assert_test("LLM fallback", r.json(), "maalesef")
|
40 |
+
|
41 |
+
# 2. Eksik parametre testi (döviz kuru)
|
42 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "döviz kuru nedir"}, headers=headers)
|
43 |
+
assert_test("Eksik parametre — currency", r.json(), "Lütfen currency")
|
44 |
+
|
45 |
+
# 3. Eksik parametre tamamlanınca tekrar deneme
|
46 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "dolar"}, headers=headers)
|
47 |
+
assert_test("Parametre tamamlandı — dolar", r.json(), "dolar kuru şu an")
|
48 |
+
|
49 |
+
# 4. Geçersiz parametre validasyonu
|
50 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "yenidolar kuru nedir"}, headers=headers)
|
51 |
+
assert_test("Geçersiz parametre — currency", r.json(), "geçerli bir döviz")
|
52 |
+
|
53 |
+
# 5. Konu değişikliği → awaiting reset
|
54 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "hava nasıl"}, headers=headers)
|
55 |
+
assert_test("Konu değişikliği sonrası fallback", r.json(), "maalesef")
|
56 |
+
|
57 |
+
# 6. Yol durumu testi (iki parametre eksik veya biri eksik)
|
58 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "yol durumu"}, headers=headers)
|
59 |
+
assert_test("Eksik parametre — from_location", r.json(), "Lütfen from_location")
|
60 |
+
|
61 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "Ankara"}, headers=headers)
|
62 |
+
assert_test("Eksik parametre — to_location", r.json(), "Lütfen to_location")
|
63 |
+
|
64 |
+
r = requests.post(f"{BASE_URL}/chat", json={"user_input": "İstanbul"}, headers=headers)
|
65 |
+
assert_test("Parametre tamamlandı — yol durumu", r.json(), "trafik açık")
|
66 |
+
|
67 |
+
summarize_tests()
|
68 |
+
|
69 |
+
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
log(f"❌ run_all_tests sırasında hata oluştu: {e}")
|
llm_model.py
CHANGED
@@ -1,83 +1,83 @@
|
|
1 |
-
import torch
|
2 |
-
import traceback
|
3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
-
from log import log
|
5 |
-
from pydantic import BaseModel
|
6 |
-
|
7 |
-
class Message(BaseModel):
|
8 |
-
user_input: str
|
9 |
-
|
10 |
-
class LLMModel:
|
11 |
-
def __init__(self):
|
12 |
-
self.model = None
|
13 |
-
self.tokenizer = None
|
14 |
-
self.eos_token_id = None
|
15 |
-
|
16 |
-
def setup(self, s_config, project_config):
|
17 |
-
try:
|
18 |
-
log("🧠 LLMModel setup() başladı")
|
19 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
-
log(f"📡 Kullanılan cihaz: {device}")
|
21 |
-
|
22 |
-
model_base = project_config["model_base"]
|
23 |
-
|
24 |
-
if s_config.work_mode == "hfcloud":
|
25 |
-
token = s_config.get_auth_token()
|
26 |
-
log(f"📦 Hugging Face cloud modeli yükleniyor: {model_base}")
|
27 |
-
self.tokenizer = AutoTokenizer.from_pretrained(model_base, token=token, use_fast=False)
|
28 |
-
self.model = AutoModelForCausalLM.from_pretrained(model_base, token=token, torch_dtype=torch.float32).to(device)
|
29 |
-
elif s_config.work_mode == "cloud":
|
30 |
-
log(f"📦 Diğer cloud ortamından model indiriliyor: {model_base}")
|
31 |
-
self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
|
32 |
-
self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
|
33 |
-
|
34 |
-
elif s_config.work_mode == "on-prem":
|
35 |
-
log(f"📦 On-prem model path: {model_base}")
|
36 |
-
self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
|
37 |
-
self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
|
38 |
-
|
39 |
-
else:
|
40 |
-
raise Exception(f"Bilinmeyen work_mode: {s_config.work_mode}")
|
41 |
-
|
42 |
-
self.tokenizer.pad_token = self.tokenizer.pad_token or self.tokenizer.eos_token
|
43 |
-
self.model.config.pad_token_id = self.tokenizer.pad_token_id
|
44 |
-
self.eos_token_id = self.tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
|
45 |
-
self.model.eval()
|
46 |
-
|
47 |
-
log("✅ LLMModel setup() başarıyla tamamlandı.")
|
48 |
-
except Exception as e:
|
49 |
-
log(f"❌ LLMModel setup() hatası: {e}")
|
50 |
-
traceback.print_exc()
|
51 |
-
|
52 |
-
async def generate_response(self, text, project_config):
|
53 |
-
messages = [{"role": "user", "content": text}]
|
54 |
-
encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
55 |
-
input_ids = encodeds.to(self.model.device)
|
56 |
-
attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
|
57 |
-
|
58 |
-
with torch.no_grad():
|
59 |
-
output = self.model.generate(
|
60 |
-
input_ids=input_ids,
|
61 |
-
attention_mask=attention_mask,
|
62 |
-
max_new_tokens=128,
|
63 |
-
do_sample=project_config["use_sampling"],
|
64 |
-
eos_token_id=self.eos_token_id,
|
65 |
-
pad_token_id=self.tokenizer.pad_token_id,
|
66 |
-
return_dict_in_generate=True,
|
67 |
-
output_scores=True
|
68 |
-
)
|
69 |
-
|
70 |
-
if not project_config["use_sampling"]:
|
71 |
-
scores = torch.stack(output.scores, dim=1)
|
72 |
-
probs = torch.nn.functional.softmax(scores[0], dim=-1)
|
73 |
-
top_conf = probs.max().item()
|
74 |
-
else:
|
75 |
-
top_conf = None
|
76 |
-
|
77 |
-
decoded = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
|
78 |
-
for tag in ["assistant", "<|im_start|>assistant"]:
|
79 |
-
start = decoded.find(tag)
|
80 |
-
if start != -1:
|
81 |
-
decoded = decoded[start + len(tag):].strip()
|
82 |
-
break
|
83 |
-
return decoded, top_conf
|
|
|
1 |
+
import torch
|
2 |
+
import traceback
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
+
from log import log
|
5 |
+
from pydantic import BaseModel
|
6 |
+
|
7 |
+
class Message(BaseModel):
|
8 |
+
user_input: str
|
9 |
+
|
10 |
+
class LLMModel:
|
11 |
+
def __init__(self):
|
12 |
+
self.model = None
|
13 |
+
self.tokenizer = None
|
14 |
+
self.eos_token_id = None
|
15 |
+
|
16 |
+
def setup(self, s_config, project_config):
|
17 |
+
try:
|
18 |
+
log("🧠 LLMModel setup() başladı")
|
19 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
+
log(f"📡 Kullanılan cihaz: {device}")
|
21 |
+
|
22 |
+
model_base = project_config["model_base"]
|
23 |
+
|
24 |
+
if s_config.work_mode == "hfcloud":
|
25 |
+
token = s_config.get_auth_token()
|
26 |
+
log(f"📦 Hugging Face cloud modeli yükleniyor: {model_base}")
|
27 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_base, token=token, use_fast=False)
|
28 |
+
self.model = AutoModelForCausalLM.from_pretrained(model_base, token=token, torch_dtype=torch.float32).to(device)
|
29 |
+
elif s_config.work_mode == "cloud":
|
30 |
+
log(f"📦 Diğer cloud ortamından model indiriliyor: {model_base}")
|
31 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
|
32 |
+
self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
|
33 |
+
|
34 |
+
elif s_config.work_mode == "on-prem":
|
35 |
+
log(f"📦 On-prem model path: {model_base}")
|
36 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
|
37 |
+
self.model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float32).to(device)
|
38 |
+
|
39 |
+
else:
|
40 |
+
raise Exception(f"Bilinmeyen work_mode: {s_config.work_mode}")
|
41 |
+
|
42 |
+
self.tokenizer.pad_token = self.tokenizer.pad_token or self.tokenizer.eos_token
|
43 |
+
self.model.config.pad_token_id = self.tokenizer.pad_token_id
|
44 |
+
self.eos_token_id = self.tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
|
45 |
+
self.model.eval()
|
46 |
+
|
47 |
+
log("✅ LLMModel setup() başarıyla tamamlandı.")
|
48 |
+
except Exception as e:
|
49 |
+
log(f"❌ LLMModel setup() hatası: {e}")
|
50 |
+
traceback.print_exc()
|
51 |
+
|
52 |
+
async def generate_response(self, text, project_config):
|
53 |
+
messages = [{"role": "user", "content": text}]
|
54 |
+
encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
55 |
+
input_ids = encodeds.to(self.model.device)
|
56 |
+
attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
|
57 |
+
|
58 |
+
with torch.no_grad():
|
59 |
+
output = self.model.generate(
|
60 |
+
input_ids=input_ids,
|
61 |
+
attention_mask=attention_mask,
|
62 |
+
max_new_tokens=128,
|
63 |
+
do_sample=project_config["use_sampling"],
|
64 |
+
eos_token_id=self.eos_token_id,
|
65 |
+
pad_token_id=self.tokenizer.pad_token_id,
|
66 |
+
return_dict_in_generate=True,
|
67 |
+
output_scores=True
|
68 |
+
)
|
69 |
+
|
70 |
+
if not project_config["use_sampling"]:
|
71 |
+
scores = torch.stack(output.scores, dim=1)
|
72 |
+
probs = torch.nn.functional.softmax(scores[0], dim=-1)
|
73 |
+
top_conf = probs.max().item()
|
74 |
+
else:
|
75 |
+
top_conf = None
|
76 |
+
|
77 |
+
decoded = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
|
78 |
+
for tag in ["assistant", "<|im_start|>assistant"]:
|
79 |
+
start = decoded.find(tag)
|
80 |
+
if start != -1:
|
81 |
+
decoded = decoded[start + len(tag):].strip()
|
82 |
+
break
|
83 |
+
return decoded, top_conf
|