Spaces:
No application file
No application file
fix
Browse files- app/app.py +12 -3
- app/app_routes.py +275 -180
app/app.py
CHANGED
@@ -46,12 +46,20 @@ if not ADMIN_PASSWORD:
|
|
46 |
# --- ๋ก์ปฌ ๋ชจ๋ ์ํฌํธ ---
|
47 |
# MockComponent ์ ์ (์ํฌํธ ์คํจ ์ ๋์ฒด)
|
48 |
class MockComponent:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
def __getattr__(self, name):
|
50 |
# Mock ๊ฐ์ฒด์ ์ด๋ค ์์ฑ์ด๋ ๋ฉ์๋ ํธ์ถ ์ ๊ฒฝ๊ณ ๋ก๊ทธ ์ถ๋ ฅ ๋ฐ ๊ธฐ๋ณธ๊ฐ ๋ฐํ
|
51 |
logger.warning(f"MockComponent์์ '{name}' ์ ๊ทผ ์๋๋จ (์ค์ ๋ชจ๋ ๋ก๋ ์คํจ)")
|
52 |
# ๋ฉ์๋ ํธ์ถ ์์๋ ์๋ฌด๊ฒ๋ ์ ํ๋ ํจ์ ๋ฐํ
|
53 |
-
if name in ['
|
54 |
-
return lambda *args, **kwargs: logger.warning(f"Mocked method '{name}' called") or
|
55 |
# ์์ฑ ์ ๊ทผ ์์๋ None ๋ฐํ
|
56 |
return None
|
57 |
|
@@ -188,7 +196,8 @@ def init_retriever():
|
|
188 |
logger.error(f"๊ธฐ๋ณธ ๊ฒ์๊ธฐ ์ด๊ธฐํ/๋ก๋ ์คํจ: {e}", exc_info=True)
|
189 |
base_retriever = MockComponent() # ์คํจ ์ Mock ์ฌ์ฉ
|
190 |
retriever = MockComponent()
|
191 |
-
|
|
|
192 |
|
193 |
# 2. ๋ฐ์ดํฐ ํด๋ ๋ฌธ์ ๋ก๋ (๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ๋น์ด์์ ๋)
|
194 |
needs_loading = not hasattr(base_retriever, 'documents') or not getattr(base_retriever, 'documents', [])
|
|
|
46 |
# --- ๋ก์ปฌ ๋ชจ๋ ์ํฌํธ ---
|
47 |
# MockComponent ์ ์ (์ํฌํธ ์คํจ ์ ๋์ฒด)
|
48 |
class MockComponent:
|
49 |
+
def __init__(self):
|
50 |
+
self.is_mock = True
|
51 |
+
|
52 |
+
def search(self, query, top_k=5, first_stage_k=None):
|
53 |
+
"""๋น ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค."""
|
54 |
+
logger.warning(f"MockComponent.search ํธ์ถ๋จ (์ฟผ๋ฆฌ: {query[:30]}...)")
|
55 |
+
return []
|
56 |
+
|
57 |
def __getattr__(self, name):
|
58 |
# Mock ๊ฐ์ฒด์ ์ด๋ค ์์ฑ์ด๋ ๋ฉ์๋ ํธ์ถ ์ ๊ฒฝ๊ณ ๋ก๊ทธ ์ถ๋ ฅ ๋ฐ ๊ธฐ๋ณธ๊ฐ ๋ฐํ
|
59 |
logger.warning(f"MockComponent์์ '{name}' ์ ๊ทผ ์๋๋จ (์ค์ ๋ชจ๋ ๋ก๋ ์คํจ)")
|
60 |
# ๋ฉ์๋ ํธ์ถ ์์๋ ์๋ฌด๊ฒ๋ ์ ํ๋ ํจ์ ๋ฐํ
|
61 |
+
if name in ['add_documents', 'save', 'transcribe_audio', 'rag_generate', 'set_llm', 'get_current_llm_details', 'prepare_rag_context', 'csv_to_documents', 'text_to_documents', 'load_documents_from_directory']:
|
62 |
+
return lambda *args, **kwargs: logger.warning(f"Mocked method '{name}' called") or None
|
63 |
# ์์ฑ ์ ๊ทผ ์์๋ None ๋ฐํ
|
64 |
return None
|
65 |
|
|
|
196 |
logger.error(f"๊ธฐ๋ณธ ๊ฒ์๊ธฐ ์ด๊ธฐํ/๋ก๋ ์คํจ: {e}", exc_info=True)
|
197 |
base_retriever = MockComponent() # ์คํจ ์ Mock ์ฌ์ฉ
|
198 |
retriever = MockComponent()
|
199 |
+
logger.info("Mock ๊ฒ์๊ธฐ๋ฅผ ๋์ฒด๋ก ์ฌ์ฉํฉ๋๋ค.")
|
200 |
+
return retriever # ์ด๊ธฐํ ์คํจํด๋ Mock ๊ฒ์๊ธฐ ๋ฐํ (None ๋์ )
|
201 |
|
202 |
# 2. ๋ฐ์ดํฐ ํด๋ ๋ฌธ์ ๋ก๋ (๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ๋น์ด์์ ๋)
|
203 |
needs_loading = not hasattr(base_retriever, 'documents') or not getattr(base_retriever, 'documents', [])
|
app/app_routes.py
CHANGED
@@ -191,205 +191,300 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
191 |
@login_required
|
192 |
def chat():
|
193 |
"""ํ
์คํธ ๊ธฐ๋ฐ ์ฑ๋ด API"""
|
194 |
-
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
|
195 |
-
if not is_ready:
|
196 |
-
return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค...", "answer": "์ฃ์กํฉ๋๋ค. ์์คํ
์ด ์์ง ์ค๋น ์ค์
๋๋ค.", "sources": []}), 503
|
197 |
-
|
198 |
-
# retriever ๊ฐ์ฒด ๋ฐ ํ์ ๋ฉ์๋ ํ์ธ
|
199 |
-
if retriever is None or not hasattr(retriever, 'search'):
|
200 |
-
logger.warning("์ฑํ
API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
201 |
-
return jsonify({
|
202 |
-
"answer": "์ฃ์กํฉ๋๋ค. ๊ฒ์ ์์ง์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.",
|
203 |
-
"sources": [],
|
204 |
-
"error": "Retriever not ready"
|
205 |
-
}), 503 # ์๋น์ค ๋ถ๊ฐ ์ํ
|
206 |
-
|
207 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
data = request.get_json()
|
209 |
if not data or 'query' not in data:
|
210 |
return jsonify({"error": "์ฟผ๋ฆฌ๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
211 |
|
212 |
query = data['query']
|
213 |
logger.info(f"ํ
์คํธ ์ฟผ๋ฆฌ ์์ : {query[:100]}...")
|
214 |
-
|
215 |
-
#
|
216 |
-
search_results =
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
#
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
sources.append(source_info)
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
except Exception as e:
|
279 |
-
logger.error(f"์ฑํ
|
280 |
-
return jsonify({
|
|
|
|
|
|
|
|
|
281 |
|
282 |
# --- Voice Chat API ---
|
283 |
@app.route('/api/voice', methods=['POST'])
|
284 |
@login_required
|
285 |
def voice_chat():
|
286 |
"""์์ฑ ์ฑ API ์๋ํฌ์ธํธ"""
|
287 |
-
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
|
288 |
-
if not is_ready:
|
289 |
-
return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค..."}), 503
|
290 |
-
|
291 |
-
# ํ์ ์ปดํฌ๋ํธ ํ์ธ
|
292 |
-
if retriever is None or not hasattr(retriever, 'search'):
|
293 |
-
logger.error("์์ฑ API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์")
|
294 |
-
return jsonify({"error": "๊ฒ์ ์์ง ์ค๋น ์๋จ"}), 503
|
295 |
-
if stt_client is None or not hasattr(stt_client, 'transcribe_audio'):
|
296 |
-
logger.error("์์ฑ API ์์ฒญ ์ STT ํด๋ผ์ด์ธํธ๊ฐ ์ค๋น๋์ง ์์")
|
297 |
-
return jsonify({"error": "์์ฑ ์ธ์ ์๋น์ค ์ค๋น ์๋จ"}), 503
|
298 |
-
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
299 |
-
logger.error("์์ฑ API ์์ฒญ ์ LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์")
|
300 |
-
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
301 |
-
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
302 |
-
logger.error("์์ฑ API ์์ฒญ ์ DocumentProcessor๊ฐ ์ค๋น๋์ง ์์")
|
303 |
-
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500
|
304 |
-
|
305 |
-
logger.info("์์ฑ ์ฑ ์์ฒญ ์์ ")
|
306 |
-
|
307 |
-
if 'audio' not in request.files:
|
308 |
-
logger.error("์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์")
|
309 |
-
return jsonify({"error": "์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
310 |
-
|
311 |
-
audio_file = request.files['audio']
|
312 |
-
logger.info(f"์์ ๋ ์ค๋์ค ํ์ผ: {audio_file.filename} ({audio_file.content_type})")
|
313 |
-
|
314 |
try:
|
315 |
-
#
|
316 |
-
|
317 |
-
|
318 |
-
logger.
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
return jsonify({
|
334 |
-
"
|
335 |
-
"
|
336 |
-
"
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...")
|
341 |
-
|
342 |
-
# --- RAG ๋ฐ LLM ํธ์ถ (Chat API์ ๋์ผ ๋ก์ง) ---
|
343 |
-
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
|
344 |
-
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
345 |
-
|
346 |
-
llm_id = request.form.get('llm_id', None) # form ๋ฐ์ดํฐ์์ llm_id ๊ฐ์ ธ์ค๊ธฐ
|
347 |
-
if not context:
|
348 |
-
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
349 |
-
logger.info("์ปจํ
์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
350 |
-
else:
|
351 |
-
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
|
352 |
-
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
353 |
-
|
354 |
-
# ์์ค ์ ๋ณด ์ถ์ถ (Chat API์ ๋์ผ ๋ก์ง)
|
355 |
-
sources = []
|
356 |
-
if search_results:
|
357 |
-
for result in search_results:
|
358 |
-
if not isinstance(result, dict): continue
|
359 |
-
source_info = {}
|
360 |
-
source_key = result.get("source")
|
361 |
-
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
|
362 |
-
source_key = result["metadata"].get("source")
|
363 |
-
if source_key:
|
364 |
-
source_info["source"] = source_key
|
365 |
-
source_info["score"] = result.get("rerank_score", result.get("score", 0))
|
366 |
-
filetype = result.get("filetype")
|
367 |
-
if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
|
368 |
-
filetype = result["metadata"].get("filetype")
|
369 |
-
if "text" in result and filetype == "csv":
|
370 |
-
try:
|
371 |
-
text_lines = result["text"].strip().split('\n')
|
372 |
-
if text_lines:
|
373 |
-
first_line = text_lines[0].strip()
|
374 |
-
if ',' in first_line:
|
375 |
-
first_column = first_line.split(',')[0].strip()
|
376 |
-
source_info["id"] = first_column
|
377 |
-
except Exception as e:
|
378 |
-
logger.warning(f"[์์ฑ์ฑ] CSV ์์ค ID ์ถ์ถ ์คํจ ({source_info.get('source')}): {e}")
|
379 |
-
sources.append(source_info)
|
380 |
-
|
381 |
-
# ์ต์ข
์๋ต
|
382 |
-
response_data = {
|
383 |
-
"transcription": transcription,
|
384 |
-
"answer": answer,
|
385 |
-
"sources": sources,
|
386 |
-
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
|
387 |
-
}
|
388 |
-
return jsonify(response_data)
|
389 |
-
|
390 |
except Exception as e:
|
391 |
-
logger.error(f"์์ฑ
|
392 |
-
return jsonify({
|
|
|
|
|
|
|
393 |
|
394 |
# --- Document Upload API ---
|
395 |
@app.route('/api/upload', methods=['POST'])
|
|
|
191 |
@login_required
|
192 |
def chat():
|
193 |
"""ํ
์คํธ ๊ธฐ๋ฐ ์ฑ๋ด API"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
try:
|
195 |
+
# ์ฑ์ด ์ค๋น๋์๋์ง ํ์ธ
|
196 |
+
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
|
197 |
+
if not is_ready:
|
198 |
+
logger.warning("์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์
๋๋ค.")
|
199 |
+
return jsonify({
|
200 |
+
"error": "์ฑ ์ด๊ธฐํ ์ค...",
|
201 |
+
"answer": "์ฃ์กํฉ๋๋ค. ์์คํ
์ด ์์ง ์ค๋น ์ค์
๋๋ค.",
|
202 |
+
"sources": []
|
203 |
+
}), 503
|
204 |
+
|
205 |
data = request.get_json()
|
206 |
if not data or 'query' not in data:
|
207 |
return jsonify({"error": "์ฟผ๋ฆฌ๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
208 |
|
209 |
query = data['query']
|
210 |
logger.info(f"ํ
์คํธ ์ฟผ๋ฆฌ ์์ : {query[:100]}...")
|
211 |
+
|
212 |
+
# ๊ฒ์ ์์ง ์ฒ๋ฆฌ ๋ถ๋ถ ์์
|
213 |
+
search_results = []
|
214 |
+
search_warning = None
|
215 |
+
try:
|
216 |
+
# retriever ์ํ ๊ฒ์ฆ
|
217 |
+
if retriever is None:
|
218 |
+
logger.warning("Retriever๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
219 |
+
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค."
|
220 |
+
elif hasattr(retriever, 'is_mock') and retriever.is_mock:
|
221 |
+
logger.info("Mock Retriever ์ฌ์ฉ ์ค - ๊ฒ์ ๊ฒฐ๊ณผ ์์.")
|
222 |
+
search_warning = "๊ฒ์ ์ธ๋ฑ์ค๊ฐ ์์ง ๊ตฌ์ถ ์ค์
๋๋ค. ๊ธฐ๋ณธ ์๋ต๋ง ์ ๊ณต๋ฉ๋๋ค."
|
223 |
+
elif not hasattr(retriever, 'search'):
|
224 |
+
logger.warning("Retriever์ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
225 |
+
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ํ์ฌ ์ ํ๋์ด ์์ต๋๋ค."
|
226 |
+
else:
|
227 |
+
logger.info(f"๊ฒ์ ์ํ: {query[:50]}...")
|
228 |
+
search_results = retriever.search(query, top_k=5, first_stage_k=6)
|
229 |
+
if not search_results:
|
230 |
+
logger.info("๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.")
|
231 |
+
else:
|
232 |
+
logger.info(f"๊ฒ์ ๊ฒฐ๊ณผ: {len(search_results)}๊ฐ ํญ๋ชฉ")
|
233 |
+
except Exception as e:
|
234 |
+
logger.error(f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True)
|
235 |
+
search_results = []
|
236 |
+
search_warning = f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"
|
237 |
+
|
238 |
+
# LLM ์๋ต ์์ฑ
|
239 |
+
try:
|
240 |
+
# DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ
|
241 |
+
context = ""
|
242 |
+
if search_results:
|
243 |
+
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
244 |
+
logger.warning("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ prepare_rag_context ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
245 |
+
else:
|
246 |
+
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
247 |
+
logger.info(f"์ปจํ
์คํธ ์ค๋น ์๋ฃ (๊ธธ์ด: {len(context) if context else 0}์)")
|
248 |
+
|
249 |
+
# LLM ์ธํฐํ์ด์ค ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ
|
250 |
+
llm_id = data.get('llm_id', None)
|
251 |
+
|
252 |
+
if not context:
|
253 |
+
if search_warning:
|
254 |
+
logger.info(f"์ปจํ
์คํธ ์์, ๊ฒ์ ๊ฒฝ๊ณ : {search_warning}")
|
255 |
+
answer = f"์ฃ์กํฉ๋๋ค. ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ฐพ์ ์ ์์ต๋๋ค. ({search_warning})"
|
256 |
+
else:
|
257 |
+
logger.info("์ปจํ
์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
258 |
+
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
259 |
+
else:
|
260 |
+
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
261 |
+
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ rag_generate ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
262 |
+
answer = "์ฃ์กํฉ๋๋ค. ํ์ฌ LLM ์๋น์ค๋ฅผ ์ฌ์ฉํ ์ ์์ต๋๋ค."
|
263 |
+
else:
|
264 |
+
# LLM ํธ์ถ ์ ์ ๊ฒฝ๊ณ ๋ฉ์์ง ์ถ๊ฐ
|
265 |
+
if search_warning:
|
266 |
+
modified_query = f"{query}\n\n์ฐธ๊ณ : {search_warning}"
|
267 |
+
logger.info(f"๊ฒฝ๊ณ ๋ฉ์์ง์ ํจ๊ป ์ฟผ๋ฆฌ ์์ฑ: {modified_query[:100]}...")
|
268 |
+
else:
|
269 |
+
modified_query = query
|
270 |
+
|
271 |
+
answer = llm_interface.rag_generate(modified_query, context, llm_id=llm_id)
|
272 |
+
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
273 |
+
|
274 |
+
# ์์ค ์ ๋ณด ์ถ์ถ
|
275 |
+
sources = []
|
276 |
+
if search_results:
|
277 |
+
for result in search_results:
|
278 |
+
if not isinstance(result, dict):
|
279 |
+
logger.warning(f"์์์น ๋ชปํ ๊ฒ์ ๊ฒฐ๊ณผ ํ์: {type(result)}")
|
280 |
+
continue
|
281 |
+
source_info = {}
|
282 |
+
source_key = result.get("source")
|
283 |
+
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
|
284 |
+
source_key = result["metadata"].get("source")
|
285 |
+
|
286 |
+
if source_key:
|
287 |
+
source_info["name"] = os.path.basename(source_key)
|
288 |
+
source_info["path"] = source_key
|
289 |
+
else:
|
290 |
+
source_info["name"] = "์ ์ ์๋ ์์ค"
|
291 |
+
|
292 |
+
if "score" in result:
|
293 |
+
source_info["score"] = result["score"]
|
294 |
+
if "rerank_score" in result:
|
295 |
+
source_info["rerank_score"] = result["rerank_score"]
|
296 |
+
|
297 |
sources.append(source_info)
|
298 |
|
299 |
+
return jsonify({
|
300 |
+
"answer": answer,
|
301 |
+
"sources": sources,
|
302 |
+
"search_warning": search_warning
|
303 |
+
})
|
304 |
+
|
305 |
+
except Exception as e:
|
306 |
+
logger.error(f"LLM ์๋ต ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True)
|
307 |
+
return jsonify({
|
308 |
+
"answer": f"์ฃ์กํฉ๋๋ค. ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
309 |
+
"sources": [],
|
310 |
+
"error": str(e)
|
311 |
+
})
|
312 |
+
|
313 |
except Exception as e:
|
314 |
+
logger.error(f"์ฑํ
API์์ ์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True)
|
315 |
+
return jsonify({
|
316 |
+
"error": f"์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {str(e)}",
|
317 |
+
"answer": "์ฃ์กํฉ๋๋ค. ์๋ฒ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.",
|
318 |
+
"sources": []
|
319 |
+
}), 500
|
320 |
|
321 |
# --- Voice Chat API ---
|
322 |
@app.route('/api/voice', methods=['POST'])
|
323 |
@login_required
|
324 |
def voice_chat():
|
325 |
"""์์ฑ ์ฑ API ์๋ํฌ์ธํธ"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
try:
|
327 |
+
# ์ฑ์ด ์ค๋น๋์๋์ง ํ์ธ
|
328 |
+
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
|
329 |
+
if not is_ready:
|
330 |
+
logger.warning("์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์
๋๋ค.")
|
331 |
+
return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค...", "answer": "์ฃ์กํฉ๋๋ค. ์์คํ
์ด ์์ง ์ค๋น ์ค์
๋๋ค."}), 503
|
332 |
+
|
333 |
+
# STT ํด๋ผ์ด์ธํธ ํ์ธ
|
334 |
+
if stt_client is None or not hasattr(stt_client, 'transcribe_audio'):
|
335 |
+
logger.error("์์ฑ API ์์ฒญ ์ STT ํด๋ผ์ด์ธํธ๊ฐ ์ค๋น๋์ง ์์")
|
336 |
+
return jsonify({"error": "์์ฑ ์ธ์ ์๋น์ค ์ค๋น ์๋จ"}), 503
|
337 |
+
|
338 |
+
logger.info("์์ฑ ์ฑ ์์ฒญ ์์ ")
|
339 |
+
|
340 |
+
if 'audio' not in request.files:
|
341 |
+
logger.error("์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์")
|
342 |
+
return jsonify({"error": "์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
343 |
+
|
344 |
+
audio_file = request.files['audio']
|
345 |
+
logger.info(f"์์ ๋ ์ค๋์ค ํ์ผ: {audio_file.filename} ({audio_file.content_type})")
|
346 |
+
|
347 |
+
try:
|
348 |
+
# ์ค๋์ค ํ์ผ ์์ ์ ์ฅ ๋ฐ ์ฒ๋ฆฌ
|
349 |
+
with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
|
350 |
+
audio_file.save(temp_audio.name)
|
351 |
+
logger.info(f"์ค๋์ค ํ์ผ์ ์์ ์ ์ฅ: {temp_audio.name}")
|
352 |
+
# STT ์ํ (๋ฐ์ดํธ ์ ๋ฌ ๊ฐ์ )
|
353 |
+
with open(temp_audio.name, 'rb') as f_bytes:
|
354 |
+
audio_bytes = f_bytes.read()
|
355 |
+
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
|
356 |
+
|
357 |
+
# STT ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
|
358 |
+
if not isinstance(stt_result, dict) or not stt_result.get("success"):
|
359 |
+
error_msg = stt_result.get("error", "์ ์ ์๋ STT ์ค๋ฅ") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ์ ์ค๋ฅ"
|
360 |
+
logger.error(f"์์ฑ์ธ์ ์คํจ: {error_msg}")
|
361 |
+
return jsonify({"error": "์์ฑ์ธ์ ์คํจ", "details": error_msg}), 500
|
362 |
+
|
363 |
+
transcription = stt_result.get("text", "")
|
364 |
+
if not transcription:
|
365 |
+
logger.warning("์์ฑ์ธ์ ๊ฒฐ๊ณผ๊ฐ ๋น์ด์์ต๋๋ค.")
|
366 |
+
return jsonify({
|
367 |
+
"transcription": "",
|
368 |
+
"answer": "์์ฑ์์ ํ
์คํธ๋ฅผ ์ธ์ํ์ง ๋ชปํ์ต๋๋ค.",
|
369 |
+
"sources": []
|
370 |
+
}), 200 # 200 OK์ ๋ฉ์์ง
|
371 |
+
|
372 |
+
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...")
|
373 |
+
|
374 |
+
# --- RAG ๋ฐ LLM ํธ์ถ (Chat API์ ๋์ผ ๋ก์ง) ---
|
375 |
+
# ๊ฒ์ ์์ง ์ฒ๋ฆฌ ๋ถ๋ถ
|
376 |
+
search_results = []
|
377 |
+
search_warning = None
|
378 |
+
try:
|
379 |
+
# retriever ์ํ ๊ฒ์ฆ
|
380 |
+
if retriever is None:
|
381 |
+
logger.warning("Retriever๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
382 |
+
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค."
|
383 |
+
elif hasattr(retriever, 'is_mock') and retriever.is_mock:
|
384 |
+
logger.info("Mock Retriever ์ฌ์ฉ ์ค - ๊ฒ์ ๊ฒฐ๊ณผ ์์.")
|
385 |
+
search_warning = "๊ฒ์ ์ธ๋ฑ์ค๊ฐ ์์ง ๊ตฌ์ถ ์ค์
๋๋ค. ๊ธฐ๋ณธ ์๋ต๋ง ์ ๊ณต๋ฉ๋๋ค."
|
386 |
+
elif not hasattr(retriever, 'search'):
|
387 |
+
logger.warning("Retriever์ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
388 |
+
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ํ์ฌ ์ ํ๋์ด ์์ต๋๋ค."
|
389 |
+
else:
|
390 |
+
logger.info(f"๊ฒ์ ์ํ: {transcription[:50]}...")
|
391 |
+
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
|
392 |
+
if not search_results:
|
393 |
+
logger.info("๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.")
|
394 |
+
else:
|
395 |
+
logger.info(f"๊ฒ์ ๊ฒฐ๊ณผ: {len(search_results)}๊ฐ ํญ๋ชฉ")
|
396 |
+
except Exception as e:
|
397 |
+
logger.error(f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True)
|
398 |
+
search_results = []
|
399 |
+
search_warning = f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"
|
400 |
+
|
401 |
+
# LLM ์๋ต ์์ฑ
|
402 |
+
context = ""
|
403 |
+
if search_results:
|
404 |
+
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
405 |
+
logger.warning("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ prepare_rag_context ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
406 |
+
else:
|
407 |
+
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
408 |
+
logger.info(f"์ปจํ
์คํธ ์ค๋น ์๋ฃ (๊ธธ์ด: {len(context) if context else 0}์)")
|
409 |
+
|
410 |
+
# LLM ์ธํฐํ์ด์ค ํธ์ถ
|
411 |
+
llm_id = request.form.get('llm_id', None) # form ๋ฐ์ดํฐ์์ llm_id ๊ฐ์ ธ์ค๊ธฐ
|
412 |
+
|
413 |
+
if not context:
|
414 |
+
if search_warning:
|
415 |
+
logger.info(f"์ปจํ
์คํธ ์์, ๊ฒ์ ๊ฒฝ๊ณ : {search_warning}")
|
416 |
+
answer = f"์ฃ์กํฉ๋๋ค. ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ฐพ์ ์ ์์ต๋๋ค. ({search_warning})"
|
417 |
+
else:
|
418 |
+
logger.info("์ปจํ
์คํธ ์๏ฟฝ๏ฟฝ๏ฟฝ ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
419 |
+
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
420 |
+
else:
|
421 |
+
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
422 |
+
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ rag_generate ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
423 |
+
answer = "์ฃ์กํฉ๋๋ค. ํ์ฌ LLM ์๋น์ค๋ฅผ ์ฌ์ฉํ ์ ์์ต๋๋ค."
|
424 |
+
else:
|
425 |
+
# LLM ํธ์ถ ์ ์ ๊ฒฝ๊ณ ๋ฉ์์ง ์ถ๊ฐ
|
426 |
+
if search_warning:
|
427 |
+
modified_query = f"{transcription}\n\n์ฐธ๊ณ : {search_warning}"
|
428 |
+
logger.info(f"๊ฒฝ๊ณ ๋ฉ์์ง์ ํจ๊ป ์ฟผ๋ฆฌ ์์ฑ: {modified_query[:100]}...")
|
429 |
+
else:
|
430 |
+
modified_query = transcription
|
431 |
+
|
432 |
+
answer = llm_interface.rag_generate(modified_query, context, llm_id=llm_id)
|
433 |
+
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
434 |
+
|
435 |
+
# ์์ค ์ ๋ณด ์ถ์ถ
|
436 |
+
sources = []
|
437 |
+
if search_results:
|
438 |
+
for result in search_results:
|
439 |
+
if not isinstance(result, dict):
|
440 |
+
logger.warning(f"์์์น ๋ชปํ ๊ฒ์ ๊ฒฐ๊ณผ ํ์: {type(result)}")
|
441 |
+
continue
|
442 |
+
source_info = {}
|
443 |
+
source_key = result.get("source")
|
444 |
+
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
|
445 |
+
source_key = result["metadata"].get("source")
|
446 |
+
|
447 |
+
if source_key:
|
448 |
+
source_info["name"] = os.path.basename(source_key)
|
449 |
+
source_info["path"] = source_key
|
450 |
+
else:
|
451 |
+
source_info["name"] = "์ ์ ์๋ ์์ค"
|
452 |
+
|
453 |
+
if "score" in result:
|
454 |
+
source_info["score"] = result["score"]
|
455 |
+
if "rerank_score" in result:
|
456 |
+
source_info["rerank_score"] = result["rerank_score"]
|
457 |
+
|
458 |
+
sources.append(source_info)
|
459 |
+
|
460 |
+
# ์ต์ข
์๋ต
|
461 |
+
response_data = {
|
462 |
+
"transcription": transcription,
|
463 |
+
"answer": answer,
|
464 |
+
"sources": sources,
|
465 |
+
"search_warning": search_warning
|
466 |
+
}
|
467 |
+
|
468 |
+
# LLM ์ ๋ณด ์ถ๊ฐ (์ต์
)
|
469 |
+
if hasattr(llm_interface, 'get_current_llm_details'):
|
470 |
+
response_data["llm"] = llm_interface.get_current_llm_details()
|
471 |
+
|
472 |
+
return jsonify(response_data)
|
473 |
+
|
474 |
+
except Exception as e:
|
475 |
+
logger.error(f"์์ฑ ์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
476 |
return jsonify({
|
477 |
+
"error": "์์ฑ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์",
|
478 |
+
"details": str(e),
|
479 |
+
"answer": "์ฃ์กํฉ๋๋ค. ์ค๋์ค ์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค."
|
480 |
+
}), 500
|
481 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
except Exception as e:
|
483 |
+
logger.error(f"์์ฑ API์์ ์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True)
|
484 |
+
return jsonify({
|
485 |
+
"error": f"์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {str(e)}",
|
486 |
+
"answer": "์ฃ์กํฉ๋๋ค. ์๋ฒ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค."
|
487 |
+
}), 500
|
488 |
|
489 |
# --- Document Upload API ---
|
490 |
@app.route('/api/upload', methods=['POST'])
|