Spaces:
No application file
No application file
fix
Browse files- app/app_routes.py +354 -257
app/app_routes.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
"""
|
2 |
-
RAG κ²μ μ±λ΄ μΉ μ ν리μΌμ΄μ
- API λΌμ°νΈ μ μ
|
3 |
"""
|
4 |
|
5 |
import os
|
@@ -7,6 +7,7 @@ import json
|
|
7 |
import logging
|
8 |
import tempfile
|
9 |
import requests
|
|
|
10 |
from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
|
11 |
from datetime import datetime
|
12 |
from werkzeug.utils import secure_filename
|
@@ -14,10 +15,13 @@ from werkzeug.utils import secure_filename
|
|
14 |
# λ‘κ±° κ°μ Έμ€κΈ°
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
17 |
-
|
|
|
|
|
|
|
18 |
"""Flask μ ν리μΌμ΄μ
μ κΈ°λ³Έ λΌμ°νΈ λ±λ‘"""
|
19 |
-
|
20 |
-
# ν¬νΌ ν¨μ
|
21 |
def allowed_audio_file(filename):
|
22 |
"""νμΌμ΄ νμ©λ μ€λμ€ νμ₯μλ₯Ό κ°μ§λμ§ νμΈ"""
|
23 |
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a'}
|
@@ -28,6 +32,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
28 |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
29 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
|
30 |
|
|
|
31 |
@app.route('/login', methods=['GET', 'POST'])
|
32 |
def login():
|
33 |
error = None
|
@@ -40,41 +45,37 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
40 |
username = request.form.get('username', '')
|
41 |
password = request.form.get('password', '')
|
42 |
logger.info(f"μ
λ ₯λ μ¬μ©μλͺ
: {username}")
|
43 |
-
logger.info(f"λΉλ°λ²νΈ μ
λ ₯ μ¬λΆ: {len(password) > 0}")
|
44 |
|
45 |
-
# νκ²½ λ³μ λλ κΈ°λ³Έκ°κ³Ό λΉκ΅
|
46 |
valid_username = ADMIN_USERNAME
|
47 |
valid_password = ADMIN_PASSWORD
|
48 |
logger.info(f"κ²μ¦μ© μ¬μ©μλͺ
: {valid_username}")
|
49 |
-
logger.info(f"κ²μ¦μ© λΉλ°λ²νΈ μ‘΄μ¬ μ¬λΆ: {valid_password is not None and len(valid_password) > 0}")
|
50 |
|
51 |
if username == valid_username and password == valid_password:
|
52 |
logger.info(f"λ‘κ·ΈμΈ μ±κ³΅: {username}")
|
53 |
-
# μΈμ
μ€μ
|
54 |
-
logger.debug(f"μΈμ
μ€μ μ : {session}")
|
55 |
|
56 |
-
# μΈμ
μ λ‘κ·ΈμΈ μ 보 μ μ₯
|
57 |
session.permanent = True
|
58 |
session['logged_in'] = True
|
59 |
session['username'] = username
|
60 |
-
session.modified = True
|
61 |
|
62 |
-
logger.info(f"μΈμ
μ€μ
|
63 |
-
logger.info("μΈμ
μ€μ μλ£, 리λλ μ
μλ")
|
64 |
|
65 |
-
# λ‘κ·ΈμΈ μ±κ³΅ ν 리λλ μ
|
66 |
redirect_to = next_url or url_for('index')
|
67 |
logger.info(f"리λλ μ
λμ: {redirect_to}")
|
68 |
response = redirect(redirect_to)
|
|
|
|
|
69 |
return response
|
70 |
else:
|
71 |
logger.warning("λ‘κ·ΈμΈ μ€ν¨: μμ΄λ λλ λΉλ°λ²νΈ λΆμΌμΉ")
|
72 |
-
|
73 |
-
if password != valid_password: logger.warning("λΉλ°λ²νΈ λΆμΌμΉ")
|
74 |
error = 'μμ΄λ λλ λΉλ°λ²νΈκ° μ¬λ°λ₯΄μ§ μμ΅λλ€.'
|
75 |
-
else:
|
76 |
logger.info("λ‘κ·ΈμΈ νμ΄μ§ GET μμ²")
|
77 |
-
if 'logged_in'
|
78 |
logger.info("μ΄λ―Έ λ‘κ·ΈμΈλ μ¬μ©μ, λ©μΈ νμ΄μ§λ‘ 리λλ μ
")
|
79 |
return redirect(url_for('index'))
|
80 |
|
@@ -85,40 +86,49 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
85 |
@app.route('/logout')
|
86 |
def logout():
|
87 |
"""λ‘κ·Έμμ μ²λ¦¬"""
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
session.modified = True
|
94 |
-
logger.info(f"μΈμ
μ 보 μμ μλ£. νμ¬ μΈμ
: {session}")
|
95 |
else:
|
96 |
-
|
97 |
|
98 |
logger.info("λ‘κ·ΈμΈ νμ΄μ§λ‘ 리λλ μ
")
|
99 |
response = redirect(url_for('login'))
|
|
|
|
|
100 |
return response
|
101 |
|
102 |
-
|
103 |
@app.route('/')
|
104 |
@login_required
|
105 |
def index():
|
106 |
"""λ©μΈ νμ΄μ§"""
|
107 |
-
|
108 |
-
|
109 |
-
#
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
logger.info("λ©μΈ νμ΄μ§ μμ²")
|
123 |
return render_template('index.html')
|
124 |
|
@@ -127,22 +137,31 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
127 |
@login_required
|
128 |
def app_status():
|
129 |
"""μ± μ΄κΈ°ν μν νμΈ API"""
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
|
|
|
134 |
@app.route('/api/llm', methods=['GET', 'POST'])
|
135 |
@login_required
|
136 |
def llm_api():
|
137 |
"""μ¬μ© κ°λ₯ν LLM λͺ©λ‘ λ° μ ν API"""
|
138 |
-
if
|
139 |
-
|
|
|
|
|
|
|
140 |
|
141 |
if request.method == 'GET':
|
142 |
logger.info("LLM λͺ©λ‘ μμ²")
|
143 |
try:
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
146 |
supported_list = [{
|
147 |
"name": name, "id": id, "current": id == current_details.get("id")
|
148 |
} for name, id in supported_llms_dict.items()]
|
@@ -152,7 +171,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
152 |
"current_llm": current_details
|
153 |
})
|
154 |
except Exception as e:
|
155 |
-
logger.error(f"LLM μ 보 μ‘°ν μ€λ₯: {e}")
|
156 |
return jsonify({"error": "LLM μ 보 μ‘°ν μ€ μ€λ₯ λ°μ"}), 500
|
157 |
|
158 |
elif request.method == 'POST':
|
@@ -164,8 +183,10 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
164 |
logger.info(f"LLM λ³κ²½ μμ²: {llm_id}")
|
165 |
|
166 |
try:
|
167 |
-
|
168 |
-
|
|
|
|
|
169 |
|
170 |
if llm_id not in llm_interface.llm_clients:
|
171 |
return jsonify({"error": f"μ§μλμ§ μλ LLM ID: {llm_id}"}), 400
|
@@ -186,18 +207,20 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
186 |
logger.error(f"LLM λ³κ²½ μ²λ¦¬ μ€ μ€λ₯: {e}", exc_info=True)
|
187 |
return jsonify({"error": f"LLM λ³κ²½ μ€ μ€λ₯ λ°μ: {str(e)}"}), 500
|
188 |
|
189 |
-
|
190 |
@app.route('/api/chat', methods=['POST'])
|
191 |
@login_required
|
192 |
def chat():
|
193 |
"""ν
μ€νΈ κΈ°λ° μ±λ΄ API"""
|
194 |
-
#
|
195 |
-
if retriever is None:
|
196 |
-
logger.warning("μ±ν
API
|
|
|
197 |
return jsonify({
|
198 |
-
"answer": "μ£μ‘ν©λλ€.
|
199 |
-
"sources": []
|
200 |
-
|
|
|
201 |
|
202 |
try:
|
203 |
data = request.get_json()
|
@@ -208,22 +231,24 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
208 |
logger.info(f"ν
μ€νΈ 쿼리 μμ : {query[:100]}...")
|
209 |
|
210 |
# RAG κ²μ μν
|
211 |
-
|
212 |
-
raise NotImplementedError("Retrieverμ search λ©μλκ° μμ΅λλ€.")
|
213 |
-
search_results = retriever.search(query, top_k=5, first_stage_k=6)
|
214 |
|
215 |
# 컨ν
μ€νΈ μ€λΉ
|
216 |
-
if not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
217 |
-
|
218 |
-
|
|
|
219 |
|
220 |
if not context:
|
221 |
-
logger.warning("
|
|
|
|
|
222 |
|
223 |
# LLMμ μ§μ
|
224 |
-
llm_id = data.get('llm_id', None)
|
225 |
-
if not hasattr(llm_interface, 'rag_generate'):
|
226 |
-
|
|
|
227 |
|
228 |
if not context:
|
229 |
answer = "μ£μ‘ν©λλ€. κ΄λ ¨ μ 보λ₯Ό μ°Ύμ μ μμ΅λλ€."
|
@@ -232,7 +257,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
232 |
answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
|
233 |
logger.info(f"LLM μλ΅ μμ± μλ£ (κΈΈμ΄: {len(answer)})")
|
234 |
|
235 |
-
# μμ€ μ 보 μΆμΆ (
|
236 |
sources = []
|
237 |
if search_results:
|
238 |
for result in search_results:
|
@@ -240,14 +265,21 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
240 |
logger.warning(f"μμμΉ λͺ»ν κ²μ κ²°κ³Ό νμ: {type(result)}")
|
241 |
continue
|
242 |
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
|
|
|
|
|
|
248 |
|
249 |
-
# CSV
|
250 |
-
|
|
|
|
|
|
|
|
|
251 |
try:
|
252 |
text_lines = result["text"].strip().split('\n')
|
253 |
if text_lines:
|
@@ -255,9 +287,9 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
255 |
if ',' in first_line:
|
256 |
first_column = first_line.split(',')[0].strip()
|
257 |
source_info["id"] = first_column
|
258 |
-
logger.debug(f"CSV μμ€ ID μΆμΆ: {first_column} from {source_info['source']}")
|
259 |
except Exception as e:
|
260 |
-
logger.warning(f"CSV μμ€ ID μΆμΆ μ€ν¨ ({
|
261 |
|
262 |
sources.append(source_info)
|
263 |
|
@@ -273,33 +305,27 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
273 |
logger.error(f"μ±ν
μ²λ¦¬ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
|
274 |
return jsonify({"error": f"μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"}), 500
|
275 |
|
276 |
-
|
277 |
@app.route('/api/voice', methods=['POST'])
|
278 |
@login_required
|
279 |
def voice_chat():
|
280 |
"""μμ± μ± API μλν¬μΈνΈ"""
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
if stt_client is None:
|
295 |
-
return jsonify({
|
296 |
-
"transcription": "(μμ± μΈμ κΈ°λ₯μ΄ μ€λΉ μ€μ
λλ€)",
|
297 |
-
"answer": "μ£μ‘ν©λλ€. νμ¬ μμ± μΈμ μλΉμ€κ° μ΄κΈ°ν μ€μ
λλ€. μ μ ν λ€μ μλν΄μ£ΌμΈμ.",
|
298 |
-
"sources": []
|
299 |
-
})
|
300 |
|
301 |
logger.info("μμ± μ± μμ² μμ ")
|
302 |
-
|
303 |
if 'audio' not in request.files:
|
304 |
logger.error("μ€λμ€ νμΌμ΄ μ 곡λμ§ μμ")
|
305 |
return jsonify({"error": "μ€λμ€ νμΌμ΄ μ 곡λμ§ μμμ΅λλ€."}), 400
|
@@ -308,57 +334,40 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
308 |
logger.info(f"μμ λ μ€λμ€ νμΌ: {audio_file.filename} ({audio_file.content_type})")
|
309 |
|
310 |
try:
|
311 |
-
# μ€λμ€ νμΌ μ²λ¦¬
|
312 |
-
|
313 |
-
with tempfile.NamedTemporaryFile(delete=True) as temp_audio:
|
314 |
audio_file.save(temp_audio.name)
|
315 |
logger.info(f"μ€λμ€ νμΌμ μμ μ μ₯: {temp_audio.name}")
|
316 |
-
|
317 |
-
#
|
318 |
-
|
319 |
-
raise NotImplementedError("STT ν΄λΌμ΄μΈνΈμ transcribe_audio λ©μλκ° μμ΅λλ€.")
|
320 |
-
|
321 |
-
# νμΌ κ²½λ‘λ‘ μ λ¬ μ
|
322 |
-
# stt_result = stt_client.transcribe_audio(temp_audio.name, language="ko")
|
323 |
-
# λ°μ΄νΈλ‘ μ λ¬ μ
|
324 |
with open(temp_audio.name, 'rb') as f_bytes:
|
325 |
audio_bytes = f_bytes.read()
|
326 |
-
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
|
327 |
-
|
328 |
|
329 |
if not isinstance(stt_result, dict) or not stt_result.get("success"):
|
330 |
error_msg = stt_result.get("error", "μ μ μλ STT μ€λ₯") if isinstance(stt_result, dict) else "STT κ²°κ³Ό νμ μ€λ₯"
|
331 |
logger.error(f"μμ±μΈμ μ€ν¨: {error_msg}")
|
332 |
-
return jsonify({
|
333 |
-
"error": "μμ±μΈμ μ€ν¨",
|
334 |
-
"details": error_msg
|
335 |
-
}), 500
|
336 |
|
337 |
transcription = stt_result.get("text", "")
|
338 |
if not transcription:
|
339 |
logger.warning("μμ±μΈμ κ²°κ³Όκ° λΉμ΄μμ΅λλ€.")
|
340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
logger.info(f"μμ±μΈμ μ±κ³΅: {transcription[:50]}...")
|
343 |
-
|
344 |
-
|
345 |
-
return jsonify({
|
346 |
-
"transcription": transcription,
|
347 |
-
"answer": "μμ±μ μΈμνμ§λ§, νμ¬ κ²μ μμ€ν
μ΄ μ€λΉλμ§ μμμ΅λλ€. μ μ ν λ€μ μλν΄μ£ΌμΈμ.",
|
348 |
-
"sources": []
|
349 |
-
})
|
350 |
-
# --- μ΄ν λ‘μ§μ /api/chatκ³Ό κ±°μ λμΌ ---
|
351 |
-
# RAG κ²μ μν
|
352 |
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
|
353 |
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
354 |
|
355 |
-
|
356 |
-
logger.warning("μμ± μΏΌλ¦¬μ λν κ²μ κ²°κ³Ό μμ.")
|
357 |
-
# answer = "μ£μ‘ν©λλ€. κ΄λ ¨ μ 보λ₯Ό μ°Ύμ μ μμ΅λλ€." (μλ LLM νΈμΆ λ‘μ§μμ μ²λ¦¬)
|
358 |
-
pass
|
359 |
-
|
360 |
-
# LLM νΈμΆ
|
361 |
-
llm_id = request.form.get('llm_id', None) # μμ± μμ²μ form λ°μ΄ν°λ‘ LLM ID λ°μ μ μμ
|
362 |
if not context:
|
363 |
answer = "μ£μ‘ν©λλ€. κ΄λ ¨ μ 보λ₯Ό μ°Ύμ μ μμ΅λλ€."
|
364 |
logger.info("컨ν
μ€νΈ μμ΄ κΈ°λ³Έ μλ΅ μμ±")
|
@@ -366,201 +375,243 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
366 |
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
|
367 |
logger.info(f"LLM μλ΅ μμ± μλ£ (κΈΈμ΄: {len(answer)})")
|
368 |
|
369 |
-
|
370 |
-
|
371 |
-
enhanced_sources = []
|
372 |
if search_results:
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
|
|
|
|
|
|
|
|
|
|
391 |
|
392 |
# μ΅μ’
μλ΅
|
393 |
response_data = {
|
394 |
"transcription": transcription,
|
395 |
"answer": answer,
|
396 |
-
"sources":
|
397 |
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
|
398 |
}
|
399 |
return jsonify(response_data)
|
400 |
|
401 |
except Exception as e:
|
402 |
logger.error(f"μμ± μ± μ²λ¦¬ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
|
403 |
-
return jsonify({
|
404 |
-
"error": "μμ± μ²λ¦¬ μ€ λ΄λΆ μ€λ₯ λ°μ",
|
405 |
-
"details": str(e)
|
406 |
-
}), 500
|
407 |
-
|
408 |
|
|
|
409 |
@app.route('/api/upload', methods=['POST'])
|
410 |
@login_required
|
411 |
def upload_document():
|
412 |
"""μ§μλ² μ΄μ€ λ¬Έμ μ
λ‘λ API"""
|
413 |
-
|
414 |
-
|
|
|
|
|
415 |
|
416 |
if 'document' not in request.files:
|
417 |
return jsonify({"error": "λ¬Έμ νμΌμ΄ μ 곡λμ§ μμμ΅λλ€."}), 400
|
418 |
|
419 |
doc_file = request.files['document']
|
420 |
-
if doc_file.filename
|
421 |
return jsonify({"error": "μ νλ νμΌμ΄ μμ΅λλ€."}), 400
|
422 |
|
423 |
if not allowed_doc_file(doc_file.filename):
|
424 |
-
|
|
|
425 |
return jsonify({"error": f"νμ©λμ§ μλ νμΌ νμμ
λλ€. νμ©: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
|
426 |
|
427 |
try:
|
428 |
filename = secure_filename(doc_file.filename)
|
429 |
-
|
|
|
|
|
|
|
|
|
430 |
doc_file.save(filepath)
|
431 |
logger.info(f"λ¬Έμ μ μ₯ μλ£: {filepath}")
|
432 |
|
433 |
-
# λ¬Έμ μ²λ¦¬ (
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
return jsonify({"error": "νμΌ μΈμ½λ©μ μ½μ μ μμ΅λλ€ (UTF-8, CP949 μλ μ€ν¨)."}), 400
|
445 |
-
except Exception as e_read:
|
446 |
-
logger.error(f"νμΌ μ½κΈ° μ€λ₯ ({filename}): {e_read}")
|
447 |
-
return jsonify({"error": f"νμΌ μ½κΈ° μ€ μ€λ₯ λ°μ: {str(e_read)}"}), 500
|
448 |
-
|
449 |
-
|
450 |
-
# λ©νλ°μ΄ν° λ° λ¬Έμ λΆν /μ²λ¦¬
|
451 |
-
metadata = {
|
452 |
-
"source": filename, "filename": filename,
|
453 |
-
"filetype": filename.rsplit('.', 1)[1].lower(),
|
454 |
-
"filepath": filepath
|
455 |
-
}
|
456 |
-
file_ext = metadata["filetype"]
|
457 |
docs = []
|
458 |
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
docs = DocumentProcessor.text_to_documents(
|
478 |
content, metadata=metadata,
|
479 |
-
chunk_size=512, chunk_overlap=50
|
480 |
)
|
|
|
481 |
|
482 |
-
# κ²μκΈ°μ
|
483 |
if docs:
|
484 |
-
if not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
|
485 |
-
raise NotImplementedError("κΈ°λ³Έ κ²μκΈ°μ add_documents λλ save λ©μλ μμ")
|
486 |
-
|
487 |
logger.info(f"{len(docs)}κ° λ¬Έμ μ²ν¬λ₯Ό κ²μκΈ°μ μΆκ°ν©λλ€...")
|
488 |
base_retriever.add_documents(docs)
|
489 |
|
490 |
-
# μΈλ±μ€ μ μ₯ (μ
λ‘λλ§λ€ μ μ₯ - λΉν¨μ¨μ μΌ μ μμ)
|
491 |
logger.info(f"κ²μκΈ° μνλ₯Ό μ μ₯ν©λλ€...")
|
492 |
-
index_path = app.config
|
|
|
493 |
try:
|
494 |
base_retriever.save(index_path)
|
495 |
logger.info("μΈλ±μ€ μ μ₯ μλ£")
|
496 |
-
# μ¬μμν
|
497 |
-
# μ: retriever.update_base_retriever(base_retriever)
|
498 |
return jsonify({
|
499 |
"success": True,
|
500 |
"message": f"νμΌ '{filename}' μ
λ‘λ λ° μ²λ¦¬ μλ£ ({len(docs)}κ° μ²ν¬ μΆκ°)."
|
501 |
})
|
502 |
except Exception as e_save:
|
503 |
-
logger.error(f"μΈλ±μ€ μ μ₯ μ€ μ€λ₯ λ°μ: {e_save}")
|
|
|
504 |
return jsonify({"error": f"μΈλ±μ€ μ μ₯ μ€ μ€λ₯: {str(e_save)}"}), 500
|
505 |
else:
|
506 |
logger.warning(f"νμΌ '{filename}'μμ μ²λ¦¬ν λ΄μ©μ΄ μκ±°λ μ§μλμ§ μλ νμμ
λλ€.")
|
507 |
-
# νμΌμ μ μ₯λμμΌλ―λ‘
|
508 |
return jsonify({
|
509 |
-
"warning": True,
|
510 |
-
"message": f"νμΌ '{filename}'μ΄ μ μ₯λμμ§λ§ μ²λ¦¬ν λ΄μ©μ΄
|
511 |
})
|
512 |
|
513 |
except Exception as e:
|
514 |
logger.error(f"νμΌ μ
λ‘λ λλ μ²λ¦¬ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
515 |
return jsonify({"error": f"νμΌ μ
λ‘λ μ€ μ€λ₯: {str(e)}"}), 500
|
516 |
|
517 |
-
|
518 |
@app.route('/api/documents', methods=['GET'])
|
519 |
@login_required
|
520 |
def list_documents():
|
521 |
"""μ§μλ² μ΄μ€ λ¬Έμ λͺ©λ‘ API"""
|
522 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
523 |
if base_retriever is None:
|
524 |
-
logger.warning("λ¬Έμ API
|
|
|
525 |
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
526 |
|
527 |
try:
|
528 |
sources = {}
|
529 |
total_chunks = 0
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
total_chunks += 1
|
558 |
-
|
559 |
-
|
|
|
560 |
|
561 |
-
# λͺ©λ‘ νμ λ³ν λ° μ λ ¬
|
562 |
documents = [{"source": src, **info} for src, info in sources.items()]
|
563 |
-
documents.sort(key=lambda x: x
|
564 |
|
565 |
logger.info(f"λ¬Έμ λͺ©λ‘ μ‘°ν μλ£: {len(documents)}κ° μμ€ νμΌ, {total_chunks}κ° μ²ν¬")
|
566 |
return jsonify({
|
@@ -570,5 +621,51 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
570 |
})
|
571 |
|
572 |
except Exception as e:
|
573 |
-
|
574 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""
|
2 |
+
RAG κ²μ μ±λ΄ μΉ μ ν리μΌμ΄μ
- API λΌμ°νΈ μ μ (μμ μ μ ν¬ν¨)
|
3 |
"""
|
4 |
|
5 |
import os
|
|
|
7 |
import logging
|
8 |
import tempfile
|
9 |
import requests
|
10 |
+
import time # μ± μμ μκ° κΈ°λ‘ μν΄ μΆκ°
|
11 |
from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
|
12 |
from datetime import datetime
|
13 |
from werkzeug.utils import secure_filename
|
|
|
15 |
# λ‘κ±° κ°μ Έμ€κΈ°
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
18 |
+
# μ± μμ μκ° κΈ°λ‘ (λͺ¨λ λ‘λ μμ )
|
19 |
+
APP_START_TIME = time.time()
|
20 |
+
|
21 |
+
def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready_flag, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL):
|
22 |
"""Flask μ ν리μΌμ΄μ
μ κΈ°λ³Έ λΌμ°νΈ λ±λ‘"""
|
23 |
+
|
24 |
+
# ν¬νΌ ν¨μ (λ³κ²½ μμ)
|
25 |
def allowed_audio_file(filename):
|
26 |
"""νμΌμ΄ νμ©λ μ€λμ€ νμ₯μλ₯Ό κ°μ§λμ§ νμΈ"""
|
27 |
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a'}
|
|
|
32 |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
33 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
|
34 |
|
35 |
+
# --- λ‘κ·ΈμΈ/λ‘κ·Έμμ λΌμ°νΈ (ν° λ¬Έμ μμ, λ‘κΉ
κ°ν) ---
|
36 |
@app.route('/login', methods=['GET', 'POST'])
|
37 |
def login():
|
38 |
error = None
|
|
|
45 |
username = request.form.get('username', '')
|
46 |
password = request.form.get('password', '')
|
47 |
logger.info(f"μ
λ ₯λ μ¬μ©μλͺ
: {username}")
|
48 |
+
# logger.info(f"λΉλ°λ²νΈ μ
λ ₯ μ¬λΆ: {len(password) > 0}") # μ€μ λΉλ°λ²νΈ λ‘κΉ
μ 보μμ μ’μ§ μμ
|
49 |
|
|
|
50 |
valid_username = ADMIN_USERNAME
|
51 |
valid_password = ADMIN_PASSWORD
|
52 |
logger.info(f"κ²μ¦μ© μ¬μ©μλͺ
: {valid_username}")
|
53 |
+
# logger.info(f"κ²μ¦μ© λΉλ°λ²νΈ μ‘΄μ¬ μ¬λΆ: {valid_password is not None and len(valid_password) > 0}")
|
54 |
|
55 |
if username == valid_username and password == valid_password:
|
56 |
logger.info(f"λ‘κ·ΈμΈ μ±κ³΅: {username}")
|
57 |
+
# logger.debug(f"μΈμ
μ€μ μ : {session}") # λλ²κ·Έ λ λ²¨λ‘ λ³κ²½
|
|
|
58 |
|
|
|
59 |
session.permanent = True
|
60 |
session['logged_in'] = True
|
61 |
session['username'] = username
|
62 |
+
# session.modified = True # Flaskλ μΈμ
λ³κ²½ μ μλμΌλ‘ modified νλκ·Έλ₯Ό μ€μ νλ―λ‘ λͺ
μμ νΈμΆ λΆνμ
|
63 |
|
64 |
+
logger.info(f"μΈμ
μ€μ μλ£: {session}")
|
|
|
65 |
|
|
|
66 |
redirect_to = next_url or url_for('index')
|
67 |
logger.info(f"리λλ μ
λμ: {redirect_to}")
|
68 |
response = redirect(redirect_to)
|
69 |
+
# μΈμ
μΏ ν€κ° μ λλ‘ μ€μ λλλ‘ μλ΅ λ°ν μ νμΈ (λλ²κΉ
μ©)
|
70 |
+
logger.debug(f"λ‘κ·ΈμΈ μλ΅ ν€λ (Set-Cookie νμΈ): {response.headers.getlist('Set-Cookie')}")
|
71 |
return response
|
72 |
else:
|
73 |
logger.warning("λ‘κ·ΈμΈ μ€ν¨: μμ΄λ λλ λΉλ°λ²νΈ λΆμΌμΉ")
|
74 |
+
# μ€ν¨ μμΈ μμΈ λ‘κΉ
μ 보μ μν μμ§κ° μμΌλ―λ‘ μ£Όμ
|
|
|
75 |
error = 'μμ΄λ λλ λΉλ°λ²νΈκ° μ¬λ°λ₯΄μ§ μμ΅λλ€.'
|
76 |
+
else: # GET μμ²
|
77 |
logger.info("λ‘κ·ΈμΈ νμ΄μ§ GET μμ²")
|
78 |
+
if session.get('logged_in'): # .get() μ¬μ©μ΄ λ μμ
|
79 |
logger.info("μ΄λ―Έ λ‘κ·ΈμΈλ μ¬μ©μ, λ©μΈ νμ΄μ§λ‘ 리λλ μ
")
|
80 |
return redirect(url_for('index'))
|
81 |
|
|
|
86 |
@app.route('/logout')
|
87 |
def logout():
|
88 |
"""λ‘κ·Έμμ μ²λ¦¬"""
|
89 |
+
username = session.get('username', 'unknown') # λ¨Όμ μ¬μ©μ μ΄λ¦ κ°μ Έμ€κΈ°
|
90 |
+
if session.pop('logged_in', None): # popμΌλ‘ μ κ±° μλ λ° μ±κ³΅ μ¬λΆ νμΈ
|
91 |
+
session.pop('username', None)
|
92 |
+
# session.modified = True # pop μ¬μ© μ μλ μ²λ¦¬λ¨
|
93 |
+
logger.info(f"μ¬μ©μ {username} λ‘κ·Έμμ μ²λ¦¬ μλ£. νμ¬ μΈμ
: {session}")
|
|
|
|
|
94 |
else:
|
95 |
+
logger.warning("λ‘κ·ΈμΈλμ§ μμ μνμμ λ‘κ·Έμμ μλ")
|
96 |
|
97 |
logger.info("λ‘κ·ΈμΈ νμ΄μ§λ‘ 리λλ μ
")
|
98 |
response = redirect(url_for('login'))
|
99 |
+
# λ‘κ·Έμμ μ μΏ ν€ μμ νμΈ (λλ²κΉ
μ©)
|
100 |
+
logger.debug(f"λ‘κ·Έμμ μλ΅ ν€λ (Set-Cookie νμΈ): {response.headers.getlist('Set-Cookie')}")
|
101 |
return response
|
102 |
|
103 |
+
# --- λ©μΈ νμ΄μ§ λ° μν νμΈ ---
|
104 |
@app.route('/')
|
105 |
@login_required
|
106 |
def index():
|
107 |
"""λ©μΈ νμ΄μ§"""
|
108 |
+
# app_ready_flagλ register_routes νΈμΆ μμ μ κ°μΌλ‘ κ³ μ λ¨.
|
109 |
+
# μ€μκ° μνλ₯Ό λ°μνλ €λ©΄ app.pyμ μ μ λ³μλ₯Ό μ§μ μ°Έμ‘°νκ±°λ λ€λ₯Έ λ°©λ² νμ.
|
110 |
+
# μ¬κΈ°μλ μ λ¬λ°μ νλκ·Έλ₯Ό μ¬μ©νλ€κ³ κ°μ .
|
111 |
+
|
112 |
+
# !! μ€μ: app_ready_flagλ register_routes μμ μ κ°μ
λλ€.
|
113 |
+
# μ€μκ° μνλ₯Ό λ³΄λ €λ©΄ app.pyμ app_ready λ³μλ₯Ό μ§μ μ°Έμ‘°ν΄μΌ ν©λλ€.
|
114 |
+
# μ: from app import app_ready (μν μ°Έμ‘° λ¬Έμ μμ κ²½μ°)
|
115 |
+
# μ¬κΈ°μλ μΌλ¨ μ λ¬λ κ° μ¬μ©
|
116 |
+
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag # Event κ°μ²΄ λλ bool κ°μ
|
117 |
+
|
118 |
+
# μ± μμ ν κ²½κ³Ό μκ° κ³μ° (νμΌ μμ μκ° λμ μ€μ μμ μκ° μ¬μ©)
|
119 |
+
time_elapsed = time.time() - APP_START_TIME
|
120 |
+
|
121 |
+
# 30μ΄ κ°μ Ready λ‘μ§ μ κ±° λλ μμ κΆμ₯
|
122 |
+
# if not is_ready and time_elapsed > 30:
|
123 |
+
# logger.warning(f"μ±μ΄ {time_elapsed:.1f}μ΄ μ΄μ μ΄κΈ°ν μ€ μνμ
λλ€. (κ°μ Ready λ‘μ§ λΉνμ±νλ¨)")
|
124 |
+
# app_ready = True # μ μ λ³μλ₯Ό μ§μ μμ ν΄μΌ ν¨
|
125 |
+
|
126 |
+
if not is_ready:
|
127 |
+
logger.info(f"μ±μ΄ μμ§ μ€λΉλμ§ μμ λ‘λ© νμ΄μ§ νμ (κ²½κ³Ό μκ°: {time_elapsed:.1f}μ΄)")
|
128 |
+
# 503 λμ λ‘λ© νμ΄μ§λ₯Ό μ μμ μΌλ‘ 보μ¬μ£Όλ κ²μ΄ μ¬μ©μ κ²½νμ λ μ’μ μ μμ
|
129 |
+
return render_template('loading.html') # 503 λμ 200 OKμ λ‘λ© νμ΄μ§
|
130 |
+
# return render_template('loading.html'), 503 # κΈ°μ‘΄ λ‘μ§
|
131 |
+
|
132 |
logger.info("λ©μΈ νμ΄μ§ μμ²")
|
133 |
return render_template('index.html')
|
134 |
|
|
|
137 |
@login_required
|
138 |
def app_status():
|
139 |
"""μ± μ΄κΈ°ν μν νμΈ API"""
|
140 |
+
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
|
141 |
+
logger.info(f"μ± μν νμΈ μμ²: {'Ready' if is_ready else 'Not Ready'}")
|
142 |
+
return jsonify({"ready": is_ready})
|
143 |
|
144 |
+
# --- LLM API (ν° λ¬Έμ μμ΄ λ³΄μ, λ°©μ΄ μ½λ μΆκ°) ---
|
145 |
@app.route('/api/llm', methods=['GET', 'POST'])
|
146 |
@login_required
|
147 |
def llm_api():
|
148 |
"""μ¬μ© κ°λ₯ν LLM λͺ©λ‘ λ° μ ν API"""
|
149 |
+
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
|
150 |
+
if not is_ready:
|
151 |
+
# LLM APIλ μ΄κΈ°ν μ€μ΄μ΄λ λͺ©λ‘ μ‘°νλ κ°λ₯νκ² ν μ μμ (μ νμ )
|
152 |
+
# return jsonify({"error": "μ±μ΄ μμ§ μ΄κΈ°ν μ€μ
λλ€. μ μ ν λ€μ μλν΄μ£ΌμΈμ."}), 503
|
153 |
+
pass # μΌλ¨ μ§ν νμ©
|
154 |
|
155 |
if request.method == 'GET':
|
156 |
logger.info("LLM λͺ©λ‘ μμ²")
|
157 |
try:
|
158 |
+
# llm_interface κ°μ²΄ μ‘΄μ¬ λ° μμ± νμΈ κ°ν
|
159 |
+
if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
|
160 |
+
logger.error("LLM μΈν°νμ΄μ€κ° μ€λΉλμ§ μμκ±°λ νμν μμ±μ΄ μμ΅λλ€.")
|
161 |
+
return jsonify({"error": "LLM μΈν°νμ΄μ€ μ€λ₯"}), 500
|
162 |
+
|
163 |
+
current_details = llm_interface.get_current_llm_details()
|
164 |
+
supported_llms_dict = llm_interface.SUPPORTED_LLMS
|
165 |
supported_list = [{
|
166 |
"name": name, "id": id, "current": id == current_details.get("id")
|
167 |
} for name, id in supported_llms_dict.items()]
|
|
|
171 |
"current_llm": current_details
|
172 |
})
|
173 |
except Exception as e:
|
174 |
+
logger.error(f"LLM μ 보 μ‘°ν μ€λ₯: {e}", exc_info=True) # exc_info μΆκ°
|
175 |
return jsonify({"error": "LLM μ 보 μ‘°ν μ€ μ€λ₯ λ°μ"}), 500
|
176 |
|
177 |
elif request.method == 'POST':
|
|
|
183 |
logger.info(f"LLM λ³κ²½ μμ²: {llm_id}")
|
184 |
|
185 |
try:
|
186 |
+
# llm_interface κ°μ²΄ μ‘΄μ¬ λ° μμ± νμΈ κ°ν
|
187 |
+
if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
|
188 |
+
logger.error("LLM μΈν°νμ΄μ€κ° μ€λΉλμ§ μμκ±°λ νμν μμ±/λ©μλκ° μμ΅λλ€.")
|
189 |
+
return jsonify({"error": "LLM μΈν°νμ΄μ€ μ€λ₯"}), 500
|
190 |
|
191 |
if llm_id not in llm_interface.llm_clients:
|
192 |
return jsonify({"error": f"μ§μλμ§ μλ LLM ID: {llm_id}"}), 400
|
|
|
207 |
logger.error(f"LLM λ³κ²½ μ²λ¦¬ μ€ μ€λ₯: {e}", exc_info=True)
|
208 |
return jsonify({"error": f"LLM λ³κ²½ μ€ μ€λ₯ λ°μ: {str(e)}"}), 500
|
209 |
|
210 |
+
# --- Chat API (retriever None μ²΄ν¬ μμ ) ---
|
211 |
@app.route('/api/chat', methods=['POST'])
|
212 |
@login_required
|
213 |
def chat():
|
214 |
"""ν
μ€νΈ κΈ°λ° μ±λ΄ API"""
|
215 |
+
# retriever κ°μ²΄κ° NoneμΈμ§, κ·Έλ¦¬κ³ search λ©μλκ° μλμ§ νμΈ
|
216 |
+
if retriever is None or not hasattr(retriever, 'search'):
|
217 |
+
logger.warning("μ±ν
API μμ² μ retrieverκ° μ€λΉλμ§ μμκ±°λ search λ©μλκ° μμ΅λλ€.")
|
218 |
+
# 503 λμ μ¬μ©μ μΉνμ μΈ λ©μμ§ λ°ν
|
219 |
return jsonify({
|
220 |
+
"answer": "μ£μ‘ν©λλ€. κ²μ μμ§μ΄ μμ§ μ€λΉλμ§ μμμ΅λλ€. μ μ ν λ€μ μλν΄μ£ΌμΈμ.",
|
221 |
+
"sources": [],
|
222 |
+
"error": "Retriever not ready" # ν΄λΌμ΄μΈνΈμμ ꡬλΆν μ μλλ‘ μΆκ°
|
223 |
+
}), 200 # λλ 503
|
224 |
|
225 |
try:
|
226 |
data = request.get_json()
|
|
|
231 |
logger.info(f"ν
μ€νΈ 쿼리 μμ : {query[:100]}...")
|
232 |
|
233 |
# RAG κ²μ μν
|
234 |
+
search_results = retriever.search(query, top_k=5, first_stage_k=6) # first_stage_kοΏ½οΏ½οΏ½ base_retrieverμ μ λ¬λ μ μμ
|
|
|
|
|
235 |
|
236 |
# 컨ν
μ€νΈ μ€λΉ
|
237 |
+
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
238 |
+
logger.error("DocumentProcessorκ° μ€λΉλμ§ μμκ±°λ prepare_rag_context λ©μλκ° μμ΅λλ€.")
|
239 |
+
return jsonify({"error": "λ¬Έμ μ²λ¦¬κΈ° μ€λ₯"}), 500
|
240 |
+
context = DocumentProcessor.prepare_rag_context(search_results, field="text") # 'text' νλκ° μλ€κ³ κ°μ
|
241 |
|
242 |
if not context:
|
243 |
+
logger.warning(f"쿼리 '{query[:50]}...'μ λν κ²μ κ²°κ³Ό μμ.")
|
244 |
+
# 컨ν
μ€νΈ μμ΄ LLM νΈμΆ μλ λλ κΈ°λ³Έ μλ΅ λ°ν κ²°μ νμ
|
245 |
+
# μ¬κΈ°μλ LLM νΈμΆ λ‘μ§μμ μ²λ¦¬νλλ‘ ν¨
|
246 |
|
247 |
# LLMμ μ§μ
|
248 |
+
llm_id = data.get('llm_id', None) # μμ²μμ llm_id κ°μ Έμ€κΈ°
|
249 |
+
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
250 |
+
logger.error("LLM μΈν°νμ΄μ€κ° μ€λΉλμ§ μμκ±°λ rag_generate λ©μλκ° μμ΅λλ€.")
|
251 |
+
return jsonify({"error": "LLM μΈν°νμ΄μ€ μ€λ₯"}), 500
|
252 |
|
253 |
if not context:
|
254 |
answer = "μ£μ‘ν©λλ€. κ΄λ ¨ μ 보λ₯Ό μ°Ύμ μ μμ΅λλ€."
|
|
|
257 |
answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
|
258 |
logger.info(f"LLM μλ΅ μμ± μλ£ (κΈΈμ΄: {len(answer)})")
|
259 |
|
260 |
+
# μμ€ μ 보 μΆμΆ (κΈ°μ‘΄ λ‘μ§ μ μ§, λ°©μ΄ μ½λ κ°ν)
|
261 |
sources = []
|
262 |
if search_results:
|
263 |
for result in search_results:
|
|
|
265 |
logger.warning(f"μμμΉ λͺ»ν κ²μ κ²°κ³Ό νμ: {type(result)}")
|
266 |
continue
|
267 |
|
268 |
+
source_info = {}
|
269 |
+
source_key = result.get("source") # Langchain Document νΈνμ± μν΄ metadataλ νμΈ
|
270 |
+
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
|
271 |
+
source_key = result["metadata"].get("source")
|
272 |
+
|
273 |
+
if source_key:
|
274 |
+
source_info["source"] = source_key
|
275 |
+
source_info["score"] = result.get("rerank_score", result.get("score", 0))
|
276 |
|
277 |
+
# CSV ID μΆμΆ λ‘μ§
|
278 |
+
filetype = result.get("filetype")
|
279 |
+
if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
|
280 |
+
filetype = result["metadata"].get("filetype")
|
281 |
+
|
282 |
+
if "text" in result and filetype == "csv":
|
283 |
try:
|
284 |
text_lines = result["text"].strip().split('\n')
|
285 |
if text_lines:
|
|
|
287 |
if ',' in first_line:
|
288 |
first_column = first_line.split(',')[0].strip()
|
289 |
source_info["id"] = first_column
|
290 |
+
# logger.debug(f"CSV μμ€ ID μΆμΆ: {first_column} from {source_info['source']}")
|
291 |
except Exception as e:
|
292 |
+
logger.warning(f"CSV μμ€ ID μΆμΆ μ€ν¨ ({source_info.get('source')}): {e}")
|
293 |
|
294 |
sources.append(source_info)
|
295 |
|
|
|
305 |
logger.error(f"μ±ν
μ²λ¦¬ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
|
306 |
return jsonify({"error": f"μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"}), 500
|
307 |
|
308 |
+
# --- Voice Chat API (retriever, stt_client None μ²΄ν¬ κ°ν) ---
|
309 |
@app.route('/api/voice', methods=['POST'])
|
310 |
@login_required
|
311 |
def voice_chat():
|
312 |
"""μμ± μ± API μλν¬μΈνΈ"""
|
313 |
+
# νμ μ»΄ν¬λνΈ νμΈ
|
314 |
+
if retriever is None or not hasattr(retriever, 'search'):
|
315 |
+
logger.error("μμ± API μμ² μ retrieverκ° μ€λΉλμ§ μμ")
|
316 |
+
return jsonify({"error": "κ²μ μμ§ μ€λΉ μλ¨"}), 503
|
317 |
+
if stt_client is None or not hasattr(stt_client, 'transcribe_audio'):
|
318 |
+
logger.error("μμ± API μμ² μ STT ν΄λΌμ΄μΈνΈκ° μ€λΉλμ§ μμ")
|
319 |
+
return jsonify({"error": "μμ± μΈμ μλΉμ€ μ€λΉ μλ¨"}), 503
|
320 |
+
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
321 |
+
logger.error("μμ± API μμ² μ LLM μΈν°νμ΄μ€κ° μ€λΉλμ§ μμ")
|
322 |
+
return jsonify({"error": "LLM μΈν°νμ΄μ€ μ€λ₯"}), 500
|
323 |
+
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
324 |
+
logger.error("μμ± API μμ² μ DocumentProcessorκ° μ€λΉλμ§ μμ")
|
325 |
+
return jsonify({"error": "λ¬Έμ μ²λ¦¬κΈ° μ€λ₯"}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
|
327 |
logger.info("μμ± μ± μμ² μμ ")
|
328 |
+
|
329 |
if 'audio' not in request.files:
|
330 |
logger.error("μ€λμ€ νμΌμ΄ μ 곡λμ§ μμ")
|
331 |
return jsonify({"error": "μ€λμ€ νμΌμ΄ μ 곡λμ§ μμμ΅λλ€."}), 400
|
|
|
334 |
logger.info(f"μμ λ μ€λμ€ νμΌ: {audio_file.filename} ({audio_file.content_type})")
|
335 |
|
336 |
try:
|
337 |
+
# μ€λμ€ νμΌ μμ μ μ₯ λ° μ²λ¦¬
|
338 |
+
with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
|
|
|
339 |
audio_file.save(temp_audio.name)
|
340 |
logger.info(f"μ€λμ€ νμΌμ μμ μ μ₯: {temp_audio.name}")
|
341 |
+
|
342 |
+
# STT μν (λ°μ΄νΈ λλ κ²½λ‘ μ λ¬)
|
343 |
+
# μ: λ°μ΄νΈ μ λ¬
|
|
|
|
|
|
|
|
|
|
|
344 |
with open(temp_audio.name, 'rb') as f_bytes:
|
345 |
audio_bytes = f_bytes.read()
|
346 |
+
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko") # VitoSTTκ° λ°μ΄νΈλ₯Ό λ°λλ€κ³ κ°μ
|
|
|
347 |
|
348 |
if not isinstance(stt_result, dict) or not stt_result.get("success"):
|
349 |
error_msg = stt_result.get("error", "μ μ μλ STT μ€λ₯") if isinstance(stt_result, dict) else "STT κ²°κ³Ό νμ μ€λ₯"
|
350 |
logger.error(f"μμ±μΈμ μ€ν¨: {error_msg}")
|
351 |
+
return jsonify({"error": "μμ±μΈμ μ€ν¨", "details": error_msg}), 500
|
|
|
|
|
|
|
352 |
|
353 |
transcription = stt_result.get("text", "")
|
354 |
if not transcription:
|
355 |
logger.warning("μμ±μΈμ κ²°κ³Όκ° λΉμ΄μμ΅λλ€.")
|
356 |
+
# λΉ ν
μ€νΈλΌλ μλ΅ κ΅¬μ‘°λ μ μ§
|
357 |
+
return jsonify({
|
358 |
+
"transcription": "",
|
359 |
+
"answer": "μμ±μμ ν
μ€νΈλ₯Ό μΈμνμ§ λͺ»νμ΅λλ€.",
|
360 |
+
"sources": [],
|
361 |
+
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
|
362 |
+
}), 200 # 400 λμ 200 OKμ λ©μμ§
|
363 |
|
364 |
logger.info(f"μμ±μΈμ μ±κ³΅: {transcription[:50]}...")
|
365 |
+
|
366 |
+
# --- μ΄ν λ‘μ§μ /api/chatκ³Ό λμΌ ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
|
368 |
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
369 |
|
370 |
+
llm_id = request.form.get('llm_id', None) # form λ°μ΄ν°μμ llm_id κ°μ Έμ€κΈ°
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
if not context:
|
372 |
answer = "μ£μ‘ν©λλ€. κ΄λ ¨ μ 보λ₯Ό μ°Ύμ μ μμ΅λλ€."
|
373 |
logger.info("컨ν
μ€νΈ μμ΄ κΈ°λ³Έ μλ΅ μμ±")
|
|
|
375 |
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
|
376 |
logger.info(f"LLM μλ΅ μμ± μλ£ (κΈΈμ΄: {len(answer)})")
|
377 |
|
378 |
+
# μμ€ μ 보 μΆμΆ (chat APIμ λμΌ λ‘μ§ μ¬μ©)
|
379 |
+
sources = []
|
|
|
380 |
if search_results:
|
381 |
+
for result in search_results:
|
382 |
+
if not isinstance(result, dict): continue
|
383 |
+
source_info = {}
|
384 |
+
source_key = result.get("source")
|
385 |
+
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
|
386 |
+
source_key = result["metadata"].get("source")
|
387 |
+
if source_key:
|
388 |
+
source_info["source"] = source_key
|
389 |
+
source_info["score"] = result.get("rerank_score", result.get("score", 0))
|
390 |
+
filetype = result.get("filetype")
|
391 |
+
if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
|
392 |
+
filetype = result["metadata"].get("filetype")
|
393 |
+
if "text" in result and filetype == "csv":
|
394 |
+
try:
|
395 |
+
text_lines = result["text"].strip().split('\n')
|
396 |
+
if text_lines:
|
397 |
+
first_line = text_lines[0].strip()
|
398 |
+
if ',' in first_line:
|
399 |
+
first_column = first_line.split(',')[0].strip()
|
400 |
+
source_info["id"] = first_column
|
401 |
+
except Exception as e:
|
402 |
+
logger.warning(f"[μμ±μ±] CSV μμ€ ID μΆμΆ μ€ν¨ ({source_info.get('source')}): {e}")
|
403 |
+
sources.append(source_info)
|
404 |
|
405 |
# μ΅μ’
μλ΅
|
406 |
response_data = {
|
407 |
"transcription": transcription,
|
408 |
"answer": answer,
|
409 |
+
"sources": sources,
|
410 |
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
|
411 |
}
|
412 |
return jsonify(response_data)
|
413 |
|
414 |
except Exception as e:
|
415 |
logger.error(f"μμ± μ± μ²λ¦¬ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
|
416 |
+
return jsonify({"error": "μμ± μ²λ¦¬ μ€ λ΄λΆ μ€λ₯ λ°μ", "details": str(e)}), 500
|
|
|
|
|
|
|
|
|
417 |
|
418 |
+
# --- Document Upload API (base_retriever None μ²΄ν¬ κ°ν) ---
|
419 |
@app.route('/api/upload', methods=['POST'])
|
420 |
@login_required
|
421 |
def upload_document():
|
422 |
"""μ§μλ² μ΄μ€ λ¬Έμ μ
λ‘λ API"""
|
423 |
+
# base_retriever κ°μ²΄ λ° νμ λ©μλ νμΈ
|
424 |
+
if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
|
425 |
+
logger.error("λ¬Έμ μ
λ‘λ API μμ² μ base_retrieverκ° μ€λΉλμ§ μμκ±°λ νμ λ©μλκ° μμ΅λλ€.")
|
426 |
+
return jsonify({"error": "κΈ°λ³Έ κ²μκΈ°κ° μ€λΉλμ§ μμμ΅λλ€."}), 503
|
427 |
|
428 |
if 'document' not in request.files:
|
429 |
return jsonify({"error": "λ¬Έμ νμΌμ΄ μ 곡λμ§ μμμ΅λλ€."}), 400
|
430 |
|
431 |
doc_file = request.files['document']
|
432 |
+
if not doc_file or not doc_file.filename: # νμΌ μ‘΄μ¬ λ° νμΌλͺ
νμΈ
|
433 |
return jsonify({"error": "μ νλ νμΌμ΄ μμ΅λλ€."}), 400
|
434 |
|
435 |
if not allowed_doc_file(doc_file.filename):
|
436 |
+
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'} # μ¬κΈ°μ λ€μ μ μ νμ
|
437 |
+
logger.warning(f"νμ©λμ§ μλ νμΌ νμ: {doc_file.filename}")
|
438 |
return jsonify({"error": f"νμ©λμ§ μλ νμΌ νμμ
λλ€. νμ©: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
|
439 |
|
440 |
try:
|
441 |
filename = secure_filename(doc_file.filename)
|
442 |
+
# DATA_FOLDERκ° app.configμ μ€μ λμ΄ μλ€κ³ κ°μ
|
443 |
+
data_folder = app.config.get('DATA_FOLDER', os.path.join(os.path.dirname(__file__), '..', 'data')) # κΈ°λ³Έκ° μ€μ
|
444 |
+
os.makedirs(data_folder, exist_ok=True) # ν΄λ μμΌλ©΄ μμ±
|
445 |
+
filepath = os.path.join(data_folder, filename)
|
446 |
+
|
447 |
doc_file.save(filepath)
|
448 |
logger.info(f"λ¬Έμ μ μ₯ μλ£: {filepath}")
|
449 |
|
450 |
+
# λ¬Έμ μ²λ¦¬ (DocumentProcessor κ°μ²΄ λ° λ©μλ νμΈ)
|
451 |
+
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
|
452 |
+
logger.error("DocumentProcessorκ° μ€λΉλμ§ μμκ±°λ νμν λ©μλκ° μμ΅λλ€.")
|
453 |
+
# μ΄λ―Έ μ μ₯λ νμΌ μμ κ³ λ €
|
454 |
+
try: os.remove(filepath)
|
455 |
+
except OSError: pass
|
456 |
+
return jsonify({"error": "λ¬Έμ μ²λ¦¬κΈ° μ€λ₯"}), 500
|
457 |
+
|
458 |
+
content = None
|
459 |
+
file_ext = filename.rsplit('.', 1)[1].lower()
|
460 |
+
metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
docs = []
|
462 |
|
463 |
+
# ν
μ€νΈ κΈ°λ° νμΌ μ½κΈ° (μΈμ½λ© μ²λ¦¬ ν¬ν¨)
|
464 |
+
if file_ext in ['txt', 'md', 'csv']:
|
465 |
+
try:
|
466 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
467 |
+
content = f.read()
|
468 |
+
except UnicodeDecodeError:
|
469 |
+
logger.info(f"UTF-8 λμ½λ© μ€ν¨, CP949λ‘ μλ: {filename}")
|
470 |
+
try:
|
471 |
+
with open(filepath, 'r', encoding='cp949') as f:
|
472 |
+
content = f.read()
|
473 |
+
except Exception as e_cp949:
|
474 |
+
logger.error(f"CP949 λμ½λ© μ€ν¨ ({filename}): {e_cp949}")
|
475 |
+
return jsonify({"error": "νμΌ μΈμ½λ©μ μ½μ μ μμ΅λλ€ (UTF-8, CP949 μλ μ€ν¨)."}), 400
|
476 |
+
except Exception as e_read:
|
477 |
+
logger.error(f"νμΌ μ½κΈ° μ€λ₯ ({filename}): {e_read}")
|
478 |
+
return jsonify({"error": f"νμΌ μ½κΈ° μ€ μ€λ₯ λ°μ: {str(e_read)}"}), 500
|
479 |
+
# PDF/DOCX μ²λ¦¬ λ‘μ§ (λ³λ λΌμ΄λΈλ¬λ¦¬ νμ)
|
480 |
+
elif file_ext == 'pdf':
|
481 |
+
logger.warning("PDF μ²λ¦¬λ ꡬνλμ§ μμμ΅λλ€.")
|
482 |
+
# content = extract_text_from_pdf(filepath) # μμ
|
483 |
+
elif file_ext == 'docx':
|
484 |
+
logger.warning("DOCX μ²λ¦¬λ ꡬνλμ§ μμμ΅λλ€.")
|
485 |
+
# content = extract_text_from_docx(filepath) # μμ
|
486 |
+
|
487 |
+
# λ¬Έμ λΆν /μ²λ¦¬
|
488 |
+
if content is not None: # λ΄μ©μ΄ μ±κ³΅μ μΌλ‘ μ½νμ λλ§
|
489 |
+
if file_ext == 'csv':
|
490 |
+
logger.info(f"CSV νμΌ μ²λ¦¬ μμ: {filename}")
|
491 |
+
docs = DocumentProcessor.csv_to_documents(content, metadata)
|
492 |
+
elif file_ext in ['txt', 'md']: # κΈ°ν ν
μ€νΈ
|
493 |
+
logger.info(f"ν
μ€νΈ λ¬Έμ μ²λ¦¬ μμ: {filename}")
|
494 |
docs = DocumentProcessor.text_to_documents(
|
495 |
content, metadata=metadata,
|
496 |
+
chunk_size=512, chunk_overlap=50 # μ€μ κ° μ¬μ©
|
497 |
)
|
498 |
+
# PDF/DOCXμμ μΆμΆλ content μ²λ¦¬ λ‘μ§ μΆκ° κ°λ₯
|
499 |
|
500 |
+
# κ²μκΈ°μ μΆκ° λ° μ μ₯
|
501 |
if docs:
|
|
|
|
|
|
|
502 |
logger.info(f"{len(docs)}κ° λ¬Έμ μ²ν¬λ₯Ό κ²μκΈ°μ μΆκ°ν©λλ€...")
|
503 |
base_retriever.add_documents(docs)
|
504 |
|
|
|
505 |
logger.info(f"κ²μκΈ° μνλ₯Ό μ μ₯ν©λλ€...")
|
506 |
+
index_path = app.config.get('INDEX_PATH', os.path.join(data_folder, 'index')) # κΈ°λ³Έκ° μ€μ
|
507 |
+
os.makedirs(os.path.dirname(index_path), exist_ok=True) # μΈλ±μ€ ν΄λ μμΌλ©΄ μμ±
|
508 |
try:
|
509 |
base_retriever.save(index_path)
|
510 |
logger.info("μΈλ±μ€ μ μ₯ μλ£")
|
511 |
+
# TODO: μ¬μμν κ²μκΈ°(retriever) μ
λ°μ΄νΈ λ‘μ§ νμ μ μΆκ°
|
|
|
512 |
return jsonify({
|
513 |
"success": True,
|
514 |
"message": f"νμΌ '{filename}' μ
λ‘λ λ° μ²λ¦¬ μλ£ ({len(docs)}κ° μ²ν¬ μΆκ°)."
|
515 |
})
|
516 |
except Exception as e_save:
|
517 |
+
logger.error(f"μΈλ±μ€ μ μ₯ μ€ μ€λ₯ λ°μ: {e_save}", exc_info=True)
|
518 |
+
# μ μ₯ μ€ν¨ μ μΆκ°λ λ¬Έμ λ‘€λ°± κ³ λ €?
|
519 |
return jsonify({"error": f"μΈλ±μ€ μ μ₯ μ€ μ€λ₯: {str(e_save)}"}), 500
|
520 |
else:
|
521 |
logger.warning(f"νμΌ '{filename}'μμ μ²λ¦¬ν λ΄μ©μ΄ μκ±°λ μ§μλμ§ μλ νμμ
λλ€.")
|
522 |
+
# νμΌμ μ μ₯λμμΌλ―λ‘ warning λ°ν
|
523 |
return jsonify({
|
524 |
+
"warning": True, # 'success' λμ 'warning' μ¬μ©
|
525 |
+
"message": f"νμΌ '{filename}'μ΄ μ μ₯λμμ§λ§ μ²λ¦¬ν λ΄μ©μ΄ μκ±°λ μ§μλμ§ μλ νμμ
λλ€."
|
526 |
})
|
527 |
|
528 |
except Exception as e:
|
529 |
logger.error(f"νμΌ μ
λ‘λ λλ μ²λ¦¬ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
|
530 |
+
# μ€λ₯ λ°μ μ μ μ₯λ νμΌ μμ κ³ λ €
|
531 |
+
if 'filepath' in locals() and os.path.exists(filepath):
|
532 |
+
try: os.remove(filepath)
|
533 |
+
except OSError: pass
|
534 |
return jsonify({"error": f"νμΌ μ
λ‘λ μ€ μ€λ₯: {str(e)}"}), 500
|
535 |
|
536 |
+
# --- Document List API (μ€λ₯ μμΈ λΆμ νμ) ---
|
537 |
@app.route('/api/documents', methods=['GET'])
|
538 |
@login_required
|
539 |
def list_documents():
|
540 |
"""μ§μλ² μ΄μ€ λ¬Έμ λͺ©λ‘ API"""
|
541 |
+
# !! μ€μ: μ΄ APIκ° 503μ λ°ννλ μμΈμ μ°ΎμμΌ ν¨ !!
|
542 |
+
# νμ¬ μ½λ μμΌλ‘λ base_retrieverκ° NoneμΌ λ 503μ΄ μλ λΉ λͺ©λ‘μ λ°νν¨.
|
543 |
+
# 503 μ€λ₯λ μ΄ ν¨μ μ€ν *μ * λ¨κ³(μ: λ€λ₯Έ λ°μ½λ μ΄ν°, λ―Έλ€μ¨μ΄, Flask λ΄λΆ μ€λ₯)
|
544 |
+
# λλ base_retriever μ κ·Ό μ λ°μνλ μμΈ μ²λ¦¬ κ³Όμ μμ λμ¬ κ°λ₯μ± μμ.
|
545 |
+
|
546 |
+
logger.info("λ¬Έμ λͺ©λ‘ API μμ² μμ") # λ‘κ·Έ μΆκ°
|
547 |
+
|
548 |
+
# base_retriever μν μμΈ λ‘κΉ
|
549 |
if base_retriever is None:
|
550 |
+
logger.warning("λ¬Έμ API μμ² μ base_retrieverκ° Noneμ
λλ€.")
|
551 |
+
# 503 λμ λΉ λͺ©λ‘ λ°ν (μλλ λμ)
|
552 |
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
|
553 |
+
elif not hasattr(base_retriever, 'documents'):
|
554 |
+
logger.warning("λ¬Έμ API μμ² μ base_retrieverμ 'documents' μμ±μ΄ μμ΅λλ€.")
|
555 |
+
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
|
556 |
+
|
557 |
+
logger.info(f"base_retriever κ°μ²΄ νμ
: {type(base_retriever)}")
|
558 |
+
logger.info(f"base_retriever.documents μ‘΄μ¬ μ¬λΆ: {hasattr(base_retriever, 'documents')}")
|
559 |
+
if hasattr(base_retriever, 'documents'):
|
560 |
+
logger.info(f"base_retriever.documents νμ
: {type(base_retriever.documents)}")
|
561 |
+
logger.info(f"base_retriever.documents κΈΈμ΄: {len(base_retriever.documents) if isinstance(base_retriever.documents, list) else 'N/A'}")
|
562 |
+
|
563 |
|
564 |
try:
|
565 |
sources = {}
|
566 |
total_chunks = 0
|
567 |
+
doc_list = base_retriever.documents # μμ±μ΄ μλ€κ³ κ°μ νκ³ μ κ·Ό
|
568 |
+
|
569 |
+
if not isinstance(doc_list, list):
|
570 |
+
logger.error(f"base_retriever.documentsκ° λ¦¬μ€νΈκ° μλ: {type(doc_list)}")
|
571 |
+
# μ΄ κ²½μ° 500 μ€λ₯λ₯Ό λ°ννκ±°λ λΉ λͺ©λ‘ λ°ν
|
572 |
+
return jsonify({"error": "λ΄λΆ λ°μ΄ν° ꡬ쑰 μ€λ₯"}), 500
|
573 |
+
|
574 |
+
logger.info(f"μ΄ {len(doc_list)}κ° λ¬Έμ μ²ν¬μμ μμ€ λͺ©λ‘ μμ± μ€...")
|
575 |
+
for i, doc in enumerate(doc_list):
|
576 |
+
# κ° λ¬Έμ μ²ν¬ μ²λ¦¬ λ‘κΉ
μΆκ°
|
577 |
+
# logger.debug(f"μ²λ¦¬ μ€μΈ μ²ν¬ {i}: {doc}") # λ무 μμΈνλ©΄ μ£Όμ μ²λ¦¬
|
578 |
+
|
579 |
+
if not isinstance(doc, dict):
|
580 |
+
logger.warning(f"μ²ν¬ {i}κ° λμ
λ리 νμ
μ΄ μλ: {type(doc)}")
|
581 |
+
continue # λ€μ μ²ν¬λ‘ λμ΄κ°
|
582 |
+
|
583 |
+
# μμ€ μ 보 μΆμΆ (κΈ°μ‘΄ λ‘μ§ κ°μ )
|
584 |
+
source = "unknown"
|
585 |
+
metadata = doc.get("metadata") # metadata λ¨Όμ νμΈ (Langchain Document ꡬ쑰)
|
586 |
+
if isinstance(metadata, dict):
|
587 |
+
source = metadata.get("source", "unknown")
|
588 |
+
if source == "unknown": # metadataμ μμΌλ©΄ doc μ체μμ μ°ΎκΈ°
|
589 |
+
source = doc.get("source", "unknown")
|
590 |
+
|
591 |
+
if source != "unknown":
|
592 |
+
if source in sources:
|
593 |
+
sources[source]["chunks"] += 1
|
594 |
+
else:
|
595 |
+
# λ©νλ°μ΄ν° μ°μ μ¬μ©
|
596 |
+
filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
|
597 |
+
filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
|
598 |
+
# λ©νλ°μ΄ν° μμΌλ©΄ doc μ체μμ μ°ΎκΈ°
|
599 |
+
if filename == source and "filename" in doc: filename = doc["filename"]
|
600 |
+
if filetype == "unknown" and "filetype" in doc: filetype = doc["filetype"]
|
601 |
+
|
602 |
+
sources[source] = {
|
603 |
+
"filename": filename,
|
604 |
+
"chunks": 1,
|
605 |
+
"filetype": filetype
|
606 |
+
}
|
607 |
total_chunks += 1
|
608 |
+
else:
|
609 |
+
logger.warning(f"μ²ν¬ {i}μμ μμ€ μ 보λ₯Ό μ°Ύμ μ μμ: {doc}")
|
610 |
+
|
611 |
|
612 |
+
# λͺ©λ‘ νμ λ³ν λ° μ λ ¬ (λ³κ²½ μμ)
|
613 |
documents = [{"source": src, **info} for src, info in sources.items()]
|
614 |
+
documents.sort(key=lambda x: x.get("filename", ""), reverse=False) # νμΌλͺ
κΈ°μ€ μ λ ¬
|
615 |
|
616 |
logger.info(f"λ¬Έμ λͺ©λ‘ μ‘°ν μλ£: {len(documents)}κ° μμ€ νμΌ, {total_chunks}κ° μ²ν¬")
|
617 |
return jsonify({
|
|
|
621 |
})
|
622 |
|
623 |
except Exception as e:
|
624 |
+
# !! μ€μ: μ¬κΈ°μ λ°μνλ μμΈκ° 503μΌλ‘ μ΄μ΄μ§ μ μλμ§ νμΈ !!
|
625 |
+
logger.error(f"λ¬Έμ λͺ©λ‘ μ‘°ν μ€ μ¬κ°ν μ€λ₯ λ°μ: {e}", exc_info=True)
|
626 |
+
# μΌλ°μ μΈ λ΄λΆ μ€λ₯λ 500 λ°ν
|
627 |
+
return jsonify({"error": f"λ¬Έμ λͺ©λ‘ μ‘°ν μ€ μ€λ₯: {str(e)}"}), 500
|
628 |
+
|
629 |
+
```
|
630 |
+
|
631 |
+
**μ£Όμ λ¬Έμ μ λ° μμ μ μ:**
|
632 |
+
|
633 |
+
1. **`/api/documents` 503 μ€λ₯μ λ―Έμ€ν°λ¦¬:**
|
634 |
+
* μ 곡λ `list_documents` ν¨μ μ½λ μ체μλ `app_ready` μνλ `base_retriever`κ° `None`μΈ μνλ₯Ό νμΈνμ¬ 503 μ€λ₯λ₯Ό λ°ννλ λ‘μ§μ΄ **μμ΅λλ€.** λ‘κ·Έμμ 503μ΄ λ°μνλ€λ©΄, μμΈμ λ€μ μ€ νλμΌ κ°λ₯μ±μ΄ λμ΅λλ€:
|
635 |
+
* **μ€μ μ€ν μ€μΈ μ½λ λΆμΌμΉ:** νμ¬ μλ²μμ μ€ν μ€μΈ μ½λκ° μ 곡ν΄μ£Όμ μ½λμ λ€λ₯Ό μ μμ΅λλ€. (μ: μ΄μ λ²μ μ `if not app_ready: return ..., 503` μ½λκ° λ¨μμμ)
|
636 |
+
* **`base_retriever` μ κ·Ό μ€λ₯:** `base_retriever.documents` μμ±μ μ κ·Όνλ κ³Όμ μμ μκΈ°μΉ μμ μ€λ₯κ° λ°μνκ³ , Flaskμ μ μ μ€λ₯ νΈλ€λ¬λ νΉμ λ―Έλ€μ¨μ΄κ° μ΄λ₯Ό 503μΌλ‘ μ²λ¦¬ν μ μμ΅λλ€. (μΌλ°μ μΌλ‘λ 500 Internal Server Errorκ° λ°νλ©λλ€.)
|
637 |
+
* **μΈλΆ μμΈ:** μΉ μλ²(Nginx λ±) μ€μ μ΄λ λ‘λ λ°Έλ°μ λ± Flask μ ν리μΌμ΄μ
μλ¨μ λ€λ₯Έ μμ€ν
μμ 503 μ€λ₯λ₯Ό λ°νν μλ μμ΅λλ€.
|
638 |
+
* **μμ μ μ:**
|
639 |
+
* `list_documents` ν¨μ μμ λΆλΆκ³Ό `try...except` λΈλ‘ λ΄λΆμ **λ μμΈν λ‘κ·Έ**λ₯Ό μΆκ°νμ¬ ν¨μ μ€ν νλ¦κ³Ό `base_retriever` κ°μ²΄ μνλ₯Ό λͺ
νν νμ
ν©λλ€. (μ μ½λμ λ‘κΉ
μΆκ°λ¨)
|
640 |
+
* μ€ν μ€μΈ μ½λκ° μ΅μ λ²μ μΈμ§ λ€μ νμΈν©λλ€.
|
641 |
+
* `base_retriever` κ°μ²΄ μ체 (`VectorRetriever` ν΄λμ€)μ `documents` μμ± κ΅¬νμ νμΈν©λλ€.
|
642 |
+
|
643 |
+
2. **`app_ready` μν κ΄λ¦¬ λ° μ¬μ©:**
|
644 |
+
* `register_routes` ν¨μλ μ± μμ μ ν λ²λ§ νΈμΆλλ―λ‘, μΈμλ‘ μ λ¬λ `app_ready` κ°μ **νΈμΆ μμ μ μ€λ
μ·**μ
λλ€. λ°±κ·ΈλΌμ΄λ μ€λ λκ° λμ€μ `app.py`μ μ μ `app_ready` κ°μ λ³κ²½ν΄λ `register_routes` λ΄λΆμ μ§μ λ³μ `app_ready` (μ½λμμλ `app_ready_flag`λ‘ λͺ
μΉ λ³κ²½ μ μ)λ μ
λ°μ΄νΈλμ§ μμ΅λλ€.
|
645 |
+
* `index` ν¨μ λ΄μμ `nonlocal app_ready` μ¬μ©μ μλͺ»λμμ΅λλ€. `app_ready`λ μ μ λ³μμ΄λ―λ‘ `global app_ready`λ₯Ό μ¬μ©νκ±°λ, λ μ’μ λ°©λ²μ Flaskμ `app.before_request` λ°μ½λ μ΄ν°λ `g` κ°μ²΄λ₯Ό μ¬μ©νμ¬ μμ² μ»¨ν
μ€νΈ λ΄μμ μνλ₯Ό νμΈνλ κ²μ
λλ€. νΉμ `threading.Event` κ°μ²΄λ₯Ό μ¬μ©νμ¬ μ€λ λ κ° μνλ₯Ό μμ νκ² κ³΅μ ν μ μμ΅λλ€.
|
646 |
+
* `index` ν¨μμ 30μ΄ κ°μ Ready λ‘μ§μ `os.path.getmtime(__file__)`μ μ¬μ©νλλ°, μ΄λ νμΌ μμ μκ°μ κΈ°μ€μΌλ‘ νλ―λ‘ μ±μ μ€μ μμ μκ°κ³Ό λ¬λΌ λΆμ νν©λλ€.
|
647 |
+
* **μμ μ μ:**
|
648 |
+
* `app.py`μμ `app_ready`λ₯Ό `threading.Event` κ°μ²΄λ‘ κ΄λ¦¬νκ³ , μ΄λ₯Ό `register_routes`μ μ λ¬ν©λλ€. κ° λΌμ°νΈ νΈλ€λ¬μμλ `app_ready_event.is_set()`μΌλ‘ μνλ₯Ό νμΈν©λλ€. (μ μ½λμ λ°μλ¨)
|
649 |
+
* μ± μμ μκ°μ λͺ¨λ λ‘λ μμ μ `time.time()`μΌλ‘ κΈ°λ‘νκ³ , `index` ν¨μμμ μ΄λ₯Ό μ¬μ©νμ¬ κ²½κ³Ό μκ°μ κ³μ°ν©λλ€. (μ μ½λμ λ°μλ¨)
|
650 |
+
* 30μ΄ κ°μ Ready λ‘μ§μ μ£Όμ μ²λ¦¬νκ±°λ μ κ±°νλ κ²μ κΆμ₯ν©λλ€. μ΄κΈ°νκ° μ€λ 걸리λ κ·Όλ³Έ μμΈμ ν΄κ²°νλ κ²μ΄ μ’μ΅λλ€.
|
651 |
+
|
652 |
+
3. **κ°μ²΄ λ° μμ± μ‘΄μ¬ μ¬λΆ νμΈ (λ°©μ΄ μ½λ):**
|
653 |
+
* `llm_interface`, `retriever`, `stt_client`, `DocumentProcessor`, `base_retriever` λ±μ κ°μ²΄κ° Noneμ΄κ±°λ νμν λ©μλ/μμ±(`search`, `transcribe_audio`, `add_documents`, `documents` λ±)μ΄ μμ κ²½μ° `AttributeError`λ `TypeError`κ° λ°μν μ μμ΅λλ€.
|
654 |
+
* **μμ μ μ:** κ° API νΈλ€λ¬ μμ λΆλΆμ΄λ κ°μ²΄ μ¬μ© μ§μ μ ν΄λΉ κ°μ²΄μ νμν μμ±/λ©μλκ° μ‘΄μ¬νλμ§ νμΈνλ λ°©μ΄ μ½λλ₯Ό μΆκ°ν©λλ€. (μ μ½λμ μΌλΆ λ°μλ¨)
|
655 |
+
|
656 |
+
4. **μ€λ₯ λ‘κΉ
:**
|
657 |
+
* `except Exception as e:` λΈλ‘μμ `logger.error(f"...", exc_info=True)`λ₯Ό μ¬μ©νμ¬ μ€ν νΈλ μ΄μ€ μ 체λ₯Ό λ‘κΉ
νλ©΄ λλ²κΉ
μ λ μ μ©ν©λλ€.
|
658 |
+
* **μμ μ μ:** μ£Όμ `except` λΈλ‘μ `exc_info=True`λ₯Ό μΆκ°ν©λλ€. (μ μ½λμ λ°μλ¨)
|
659 |
+
|
660 |
+
5. **`/api/documents` λ‘μ§ κ°μ :**
|
661 |
+
* `base_retriever.documents`κ° Langchainμ `Document` κ°μ²΄ 리μ€νΈμΌ κ²½μ°, `source` λ±μ μ 보λ `doc.metadata['source']` μ κ°μ΄ μ κ·Όν΄μΌ ν μ μμ΅λλ€. νμ¬ μ½λλ λμ
λ리μ Langchain `Document` ꡬ쑰λ₯Ό νΌμ©νμ¬ μ²λ¦¬νλ €κ³ μλνκ³ μμ΅λλ€. `base_retriever.documents`μ μ€μ λ°μ΄ν° ꡬ쑰λ₯Ό λͺ
νν νκ³ κ·Έμ λ§κ² μ½λλ₯Ό μμ ν΄μΌ ν©λλ€.
|
662 |
+
* λ¬Έμ λͺ©λ‘ μ λ ¬ κΈ°μ€μ νμΌλͺ
(`filename`)μΌλ‘ λ³κ²½νλ κ²μ΄ λ μ§κ΄μ μΌ μ μμ΅λλ€.
|
663 |
+
* **μμ μ μ:** `base_retriever.documents`μ ꡬ쑰λ₯Ό νμΈνκ³ `source`, `filename`, `filetype` μΆμΆ λ‘μ§μ λͺ
νν ν©λλ€. μ λ ¬ κΈ°μ€μ `filename`μΌλ‘ λ³κ²½νμ΅λλ€. (μ μ½λ μ°Έμ‘°)
|
664 |
+
|
665 |
+
**μμ½ λ° λ€μ λ¨κ³:**
|
666 |
+
|
667 |
+
* `/api/documents`μ 503 μ€λ₯λ νμ¬ μ½λλ§μΌλ‘λ μ€λͺ
νκΈ° μ΄λ ΅μ΅λλ€. **μ€ν νκ²½μ μ½λ λ²μ νμΈ** λ° **μμΈ λ‘κΉ
μΆκ°**λ₯Ό ν΅ν΄ μμΈμ μΆμ ν΄μΌ ν©λλ€.
|
668 |
+
* `app_ready` μν κ΄λ¦¬ λ°©μμ `threading.Event` λ±μΌλ‘ κ°μ νκ³ , `index` ν¨μμ μκ° κ³μ° λ‘μ§μ μμ νλ κ²μ΄ μ’μ΅λλ€.
|
669 |
+
* μ½λ μ λ°μ κ±Έμ³ κ°μ²΄ λ° μμ± μ‘΄μ¬ μ¬λΆλ₯Ό νμΈνλ λ°©μ΄ μ½λλ₯Ό μΆκ°νκ³ , μ€λ₯ λ‘κΉ
μ κ°νν©λλ€.
|
670 |
+
|
671 |
+
**κ°μ₯ λ¨Όμ λΈλΌμ°μ κ°λ°μ λꡬμ 'Network' νμμ `/api/documents` μμ²μ μλ΅(Response) λ³Έλ¬Έμ νΉμ λ μμΈν μ€λ₯ λ©μμ§κ° μλμ§ νμΈν΄ 보μΈμ
|