Spaces:
No application file
No application file
fix
Browse files- app/app_routes.py +104 -164
app/app_routes.py
CHANGED
@@ -7,8 +7,8 @@ import json
|
|
7 |
import logging
|
8 |
import tempfile
|
9 |
import requests
|
10 |
-
import time
|
11 |
-
import threading
|
12 |
from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
|
13 |
from datetime import datetime
|
14 |
from werkzeug.utils import secure_filename
|
@@ -19,8 +19,7 @@ logger = logging.getLogger(__name__)
|
|
19 |
# ์ฑ ์์ ์๊ฐ ๊ธฐ๋ก (๋ชจ๋ ๋ก๋ ์์ )
|
20 |
APP_START_TIME = time.time()
|
21 |
|
22 |
-
|
23 |
-
def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready_event, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL):
|
24 |
"""Flask ์ ํ๋ฆฌ์ผ์ด์
์ ๊ธฐ๋ณธ ๋ผ์ฐํธ ๋ฑ๋ก"""
|
25 |
|
26 |
# ํฌํผ ํจ์ (๋ณ๊ฒฝ ์์)
|
@@ -34,7 +33,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
34 |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
35 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
|
36 |
|
37 |
-
# --- ๋ก๊ทธ์ธ/๋ก๊ทธ์์ ๋ผ์ฐํธ ---
|
38 |
@app.route('/login', methods=['GET', 'POST'])
|
39 |
def login():
|
40 |
error = None
|
@@ -66,7 +65,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
66 |
else:
|
67 |
logger.warning("๋ก๊ทธ์ธ ์คํจ: ์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ ๋ถ์ผ์น")
|
68 |
error = '์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ๊ฐ ์ฌ๋ฐ๋ฅด์ง ์์ต๋๋ค.'
|
69 |
-
else:
|
70 |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง GET ์์ฒญ")
|
71 |
if session.get('logged_in'):
|
72 |
logger.info("์ด๋ฏธ ๋ก๊ทธ์ธ๋ ์ฌ์ฉ์, ๋ฉ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
@@ -75,65 +74,62 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
75 |
logger.info("---------- ๋ก๊ทธ์ธ ํ์ด์ง ๋ ๋๋ง ----------")
|
76 |
return render_template('login.html', error=error, next=next_url)
|
77 |
|
78 |
-
|
79 |
@app.route('/logout')
|
80 |
def logout():
|
81 |
"""๋ก๊ทธ์์ ์ฒ๋ฆฌ"""
|
82 |
username = session.get('username', 'unknown')
|
83 |
if session.pop('logged_in', None):
|
84 |
-
|
85 |
-
|
86 |
else:
|
87 |
-
|
88 |
|
89 |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
90 |
response = redirect(url_for('login'))
|
91 |
logger.debug(f"๋ก๊ทธ์์ ์๋ต ํค๋ (Set-Cookie ํ์ธ): {response.headers.getlist('Set-Cookie')}")
|
92 |
return response
|
93 |
|
94 |
-
# --- ๋ฉ์ธ ํ์ด์ง ๋ฐ ์ํ ํ์ธ (
|
95 |
@app.route('/')
|
96 |
@login_required
|
97 |
def index():
|
98 |
"""๋ฉ์ธ ํ์ด์ง"""
|
99 |
-
#
|
100 |
-
is_ready =
|
101 |
|
102 |
time_elapsed = time.time() - APP_START_TIME
|
103 |
|
104 |
if not is_ready:
|
105 |
logger.info(f"์ฑ์ด ์์ง ์ค๋น๋์ง ์์ ๋ก๋ฉ ํ์ด์ง ํ์ (๊ฒฝ๊ณผ ์๊ฐ: {time_elapsed:.1f}์ด)")
|
106 |
-
|
107 |
-
return render_template('loading.html') # 200 OK์ ๋ก๋ฉ ํ์ด์ง
|
108 |
|
109 |
logger.info("๋ฉ์ธ ํ์ด์ง ์์ฒญ")
|
110 |
-
# index.html ํ
ํ๋ฆฟ์ด ์๋ค๊ณ ๊ฐ์
|
111 |
return render_template('index.html')
|
112 |
|
113 |
-
|
114 |
@app.route('/api/status')
|
115 |
@login_required
|
116 |
def app_status():
|
117 |
"""์ฑ ์ด๊ธฐํ ์ํ ํ์ธ API"""
|
118 |
-
is_ready =
|
119 |
logger.info(f"์ฑ ์ํ ํ์ธ ์์ฒญ: {'Ready' if is_ready else 'Not Ready'}")
|
120 |
return jsonify({"ready": is_ready})
|
121 |
|
122 |
-
# --- LLM API ---
|
123 |
@app.route('/api/llm', methods=['GET', 'POST'])
|
124 |
@login_required
|
125 |
def llm_api():
|
126 |
"""์ฌ์ฉ ๊ฐ๋ฅํ LLM ๋ชฉ๋ก ๋ฐ ์ ํ API"""
|
127 |
-
|
128 |
-
# LLM ๋ชฉ๋ก ์กฐํ๋
|
|
|
|
|
129 |
|
130 |
if request.method == 'GET':
|
131 |
logger.info("LLM ๋ชฉ๋ก ์์ฒญ")
|
132 |
try:
|
133 |
-
# ๊ฐ์ฒด ๋ฐ ์์ฑ ํ์ธ ๊ฐํ
|
134 |
if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
|
135 |
-
|
136 |
-
|
137 |
|
138 |
current_details = llm_interface.get_current_llm_details()
|
139 |
supported_llms_dict = llm_interface.SUPPORTED_LLMS
|
@@ -150,10 +146,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
150 |
return jsonify({"error": "LLM ์ ๋ณด ์กฐํ ์ค ์ค๋ฅ ๋ฐ์"}), 500
|
151 |
|
152 |
elif request.method == 'POST':
|
153 |
-
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
|
154 |
-
if not is_ready: # LLM ๋ณ๊ฒฝ์ ์ฑ ์ค๋น ์๋ฃ ํ ๊ฐ๋ฅ
|
155 |
-
return jsonify({"error": "์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์
๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์."}), 503
|
156 |
-
|
157 |
data = request.get_json()
|
158 |
if not data or 'llm_id' not in data:
|
159 |
return jsonify({"error": "LLM ID๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
@@ -162,10 +154,9 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
162 |
logger.info(f"LLM ๋ณ๊ฒฝ ์์ฒญ: {llm_id}")
|
163 |
|
164 |
try:
|
165 |
-
# ๊ฐ์ฒด ๋ฐ ์์ฑ/๋ฉ์๋ ํ์ธ ๊ฐํ
|
166 |
if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
|
167 |
-
|
168 |
-
|
169 |
|
170 |
if llm_id not in llm_interface.llm_clients:
|
171 |
return jsonify({"error": f"์ง์๋์ง ์๋ LLM ID: {llm_id}"}), 400
|
@@ -186,23 +177,18 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
186 |
logger.error(f"LLM ๋ณ๊ฒฝ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}", exc_info=True)
|
187 |
return jsonify({"error": f"LLM ๋ณ๊ฒฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"}), 500
|
188 |
|
189 |
-
# --- Chat API ---
|
190 |
@app.route('/api/chat', methods=['POST'])
|
191 |
@login_required
|
192 |
def chat():
|
193 |
"""ํ
์คํธ ๊ธฐ๋ฐ ์ฑ๋ด API"""
|
194 |
-
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
|
195 |
-
if not is_ready:
|
196 |
-
return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค...", "answer": "์ฃ์กํฉ๋๋ค. ์์คํ
์ด ์์ง ์ค๋น ์ค์
๋๋ค.", "sources": []}), 503
|
197 |
-
|
198 |
-
# retriever ๊ฐ์ฒด ๋ฐ ํ์ ๋ฉ์๋ ํ์ธ
|
199 |
if retriever is None or not hasattr(retriever, 'search'):
|
200 |
logger.warning("์ฑํ
API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
201 |
return jsonify({
|
202 |
"answer": "์ฃ์กํฉ๋๋ค. ๊ฒ์ ์์ง์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.",
|
203 |
"sources": [],
|
204 |
"error": "Retriever not ready"
|
205 |
-
}), 503
|
206 |
|
207 |
try:
|
208 |
data = request.get_json()
|
@@ -212,25 +198,21 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
212 |
query = data['query']
|
213 |
logger.info(f"ํ
์คํธ ์ฟผ๋ฆฌ ์์ : {query[:100]}...")
|
214 |
|
215 |
-
# RAG ๊ฒ์ ์ํ
|
216 |
search_results = retriever.search(query, top_k=5, first_stage_k=6)
|
217 |
|
218 |
-
# DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ
|
219 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
220 |
-
|
221 |
-
|
222 |
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
223 |
|
224 |
if not context:
|
225 |
logger.warning(f"์ฟผ๋ฆฌ '{query[:50]}...'์ ๋ํ ๊ฒ์ ๊ฒฐ๊ณผ ์์.")
|
226 |
|
227 |
-
# LLM ์ธํฐํ์ด์ค ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ
|
228 |
llm_id = data.get('llm_id', None)
|
229 |
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
230 |
-
|
231 |
-
|
232 |
|
233 |
-
# LLM ํธ์ถ
|
234 |
if not context:
|
235 |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
236 |
logger.info("์ปจํ
์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
@@ -238,7 +220,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
238 |
answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
|
239 |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
240 |
|
241 |
-
# ์์ค ์ ๋ณด ์ถ์ถ
|
242 |
sources = []
|
243 |
if search_results:
|
244 |
for result in search_results:
|
@@ -267,7 +248,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
267 |
logger.warning(f"CSV ์์ค ID ์ถ์ถ ์คํจ ({source_info.get('source')}): {e}")
|
268 |
sources.append(source_info)
|
269 |
|
270 |
-
# ์ต์ข
์๋ต
|
271 |
response_data = {
|
272 |
"answer": answer,
|
273 |
"sources": sources,
|
@@ -279,16 +259,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
279 |
logger.error(f"์ฑํ
์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
280 |
return jsonify({"error": f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}), 500
|
281 |
|
282 |
-
# --- Voice Chat API ---
|
283 |
@app.route('/api/voice', methods=['POST'])
|
284 |
@login_required
|
285 |
def voice_chat():
|
286 |
"""์์ฑ ์ฑ API ์๋ํฌ์ธํธ"""
|
287 |
-
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
|
288 |
-
if not is_ready:
|
289 |
-
return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค..."}), 503
|
290 |
-
|
291 |
-
# ํ์ ์ปดํฌ๋ํธ ํ์ธ
|
292 |
if retriever is None or not hasattr(retriever, 'search'):
|
293 |
logger.error("์์ฑ API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์")
|
294 |
return jsonify({"error": "๊ฒ์ ์์ง ์ค๋น ์๋จ"}), 503
|
@@ -296,11 +271,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
296 |
logger.error("์์ฑ API ์์ฒญ ์ STT ํด๋ผ์ด์ธํธ๊ฐ ์ค๋น๋์ง ์์")
|
297 |
return jsonify({"error": "์์ฑ ์ธ์ ์๋น์ค ์ค๋น ์๋จ"}), 503
|
298 |
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
299 |
-
|
300 |
-
|
301 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
302 |
-
|
303 |
-
|
304 |
|
305 |
logger.info("์์ฑ ์ฑ ์์ฒญ ์์ ")
|
306 |
|
@@ -312,16 +287,13 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
312 |
logger.info(f"์์ ๋ ์ค๋์ค ํ์ผ: {audio_file.filename} ({audio_file.content_type})")
|
313 |
|
314 |
try:
|
315 |
-
# ์ค๋์ค ํ์ผ ์์ ์ ์ฅ ๋ฐ ์ฒ๋ฆฌ
|
316 |
with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
|
317 |
audio_file.save(temp_audio.name)
|
318 |
logger.info(f"์ค๋์ค ํ์ผ์ ์์ ์ ์ฅ: {temp_audio.name}")
|
319 |
-
# STT ์ํ (๋ฐ์ดํธ ์ ๋ฌ ๊ฐ์ )
|
320 |
with open(temp_audio.name, 'rb') as f_bytes:
|
321 |
audio_bytes = f_bytes.read()
|
322 |
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
|
323 |
|
324 |
-
# STT ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
|
325 |
if not isinstance(stt_result, dict) or not stt_result.get("success"):
|
326 |
error_msg = stt_result.get("error", "์ ์ ์๋ STT ์ค๋ฅ") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ์ ์ค๋ฅ"
|
327 |
logger.error(f"์์ฑ์ธ์ ์คํจ: {error_msg}")
|
@@ -335,15 +307,14 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
335 |
"answer": "์์ฑ์์ ํ
์คํธ๋ฅผ ์ธ์ํ์ง ๋ชปํ์ต๋๋ค.",
|
336 |
"sources": [],
|
337 |
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
|
338 |
-
}), 200
|
339 |
|
340 |
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...")
|
341 |
|
342 |
-
# --- RAG ๋ฐ LLM ํธ์ถ (Chat API์ ๋์ผ ๋ก์ง) ---
|
343 |
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
|
344 |
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
345 |
|
346 |
-
llm_id = request.form.get('llm_id', None)
|
347 |
if not context:
|
348 |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
349 |
logger.info("์ปจํ
์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
@@ -351,34 +322,33 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
351 |
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
|
352 |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
353 |
|
354 |
-
# ์์ค ์ ๋ณด ์ถ์ถ (Chat API์ ๋์ผ ๋ก์ง)
|
355 |
sources = []
|
356 |
if search_results:
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
response_data = {
|
383 |
"transcription": transcription,
|
384 |
"answer": answer,
|
@@ -391,16 +361,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
391 |
logger.error(f"์์ฑ ์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
392 |
return jsonify({"error": "์์ฑ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์", "details": str(e)}), 500
|
393 |
|
394 |
-
# --- Document Upload API ---
|
395 |
@app.route('/api/upload', methods=['POST'])
|
396 |
@login_required
|
397 |
def upload_document():
|
398 |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ์
๋ก๋ API"""
|
399 |
-
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
|
400 |
-
if not is_ready:
|
401 |
-
return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค..."}), 503
|
402 |
-
|
403 |
-
# base_retriever ๊ฐ์ฒด ๋ฐ ํ์ ๋ฉ์๋ ํ์ธ
|
404 |
if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
|
405 |
logger.error("๋ฌธ์ ์
๋ก๋ API ์์ฒญ ์ base_retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
406 |
return jsonify({"error": "๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ์ค๋น๋์ง ์์์ต๋๋ค."}), 503
|
@@ -412,38 +377,33 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
412 |
if not doc_file or not doc_file.filename:
|
413 |
return jsonify({"error": "์ ํ๋ ํ์ผ์ด ์์ต๋๋ค."}), 400
|
414 |
|
415 |
-
# ALLOWED_DOC_EXTENSIONS๋ฅผ ํจ์ ๋ด์์ ๋ค์ ์ ์ํ๊ฑฐ๋ ์ ์ญ ์์๋ก ์ฌ์ฉ
|
416 |
-
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
417 |
if not allowed_doc_file(doc_file.filename):
|
|
|
418 |
logger.warning(f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์: {doc_file.filename}")
|
419 |
return jsonify({"error": f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์์
๋๋ค. ํ์ฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
|
420 |
|
421 |
try:
|
422 |
filename = secure_filename(doc_file.filename)
|
423 |
-
|
424 |
-
if 'DATA_FOLDER' not in app.config:
|
425 |
-
logger.error("Flask app.config์ DATA_FOLDER๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.")
|
426 |
-
return jsonify({"error": "์๋ฒ ์ค์ ์ค๋ฅ (DATA_FOLDER)"}), 500
|
427 |
-
data_folder = app.config['DATA_FOLDER']
|
428 |
os.makedirs(data_folder, exist_ok=True)
|
429 |
filepath = os.path.join(data_folder, filename)
|
430 |
|
431 |
doc_file.save(filepath)
|
432 |
logger.info(f"๋ฌธ์ ์ ์ฅ ์๋ฃ: {filepath}")
|
433 |
|
434 |
-
# DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ
|
435 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
|
|
|
|
440 |
|
441 |
content = None
|
442 |
file_ext = filename.rsplit('.', 1)[1].lower()
|
443 |
metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
|
444 |
docs = []
|
445 |
|
446 |
-
# ํ์ผ ์ฝ๊ธฐ ๋ฐ ๋ด์ฉ ์ถ์ถ
|
447 |
if file_ext in ['txt', 'md', 'csv']:
|
448 |
try:
|
449 |
with open(filepath, 'r', encoding='utf-8') as f:
|
@@ -460,115 +420,94 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
460 |
logger.error(f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ ({filename}): {e_read}")
|
461 |
return jsonify({"error": f"ํ์ผ ์ฝ๊ธฐ ์ค ์ค๋ฅ ๋ฐ์: {str(e_read)}"}), 500
|
462 |
elif file_ext == 'pdf':
|
463 |
-
|
464 |
-
# ์ฌ๊ธฐ์ PDF ํ
์คํธ ์ถ์ถ ๋ก์ง ์ถ๊ฐ (์: PyPDF2 ์ฌ์ฉ)
|
465 |
-
# content = extract_text_from_pdf(filepath)
|
466 |
elif file_ext == 'docx':
|
467 |
-
|
468 |
-
# ์ฌ๊ธฐ์ DOCX ํ
์คํธ ์ถ์ถ ๋ก์ง ์ถ๊ฐ (์: python-docx ์ฌ์ฉ)
|
469 |
-
# content = extract_text_from_docx(filepath)
|
470 |
|
471 |
-
|
472 |
-
if content is not None: # ๋ด์ฉ์ด ์ฑ๊ณต์ ์ผ๋ก ์ฝํ๊ฑฐ๋ ์ถ์ถ๋์์ ๋๋ง
|
473 |
if file_ext == 'csv':
|
474 |
logger.info(f"CSV ํ์ผ ์ฒ๋ฆฌ ์์: {filename}")
|
475 |
docs = DocumentProcessor.csv_to_documents(content, metadata)
|
476 |
-
elif file_ext in ['txt', 'md']
|
477 |
-
logger.info(f"ํ
์คํธ
|
478 |
-
# text_to_documents ํจ์๊ฐ ์ฒญํฌ ๋ถํ ๋ฑ์ ์ํํ๋ค๊ณ ๊ฐ์
|
479 |
docs = DocumentProcessor.text_to_documents(
|
480 |
content, metadata=metadata,
|
481 |
-
chunk_size=512, chunk_overlap=50
|
482 |
)
|
483 |
|
484 |
-
# ๊ฒ์๊ธฐ์ ์ถ๊ฐ ๋ฐ ์ ์ฅ
|
485 |
if docs:
|
486 |
logger.info(f"{len(docs)}๊ฐ ๋ฌธ์ ์ฒญํฌ๋ฅผ ๊ฒ์๊ธฐ์ ์ถ๊ฐํฉ๋๋ค...")
|
487 |
base_retriever.add_documents(docs)
|
488 |
|
489 |
logger.info(f"๊ฒ์๊ธฐ ์ํ๋ฅผ ์ ์ฅํฉ๋๋ค...")
|
490 |
-
|
491 |
-
if 'INDEX_PATH' not in app.config:
|
492 |
-
logger.error("Flask app.config์ INDEX_PATH๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.")
|
493 |
-
return jsonify({"error": "์๋ฒ ์ค์ ์ค๋ฅ (INDEX_PATH)"}), 500
|
494 |
-
index_path = app.config['INDEX_PATH']
|
495 |
-
# ์ธ๋ฑ์ค ์ ์ฅ ๊ฒฝ๋ก๊ฐ ํด๋์ธ์ง ํ์ผ์ธ์ง ํ์ธ ํ์ (VectorRetriever.save ๊ตฌํ์ ๋ฐ๋ผ ๋ค๋ฆ)
|
496 |
-
# ์ฌ๊ธฐ์๋ index_path๊ฐ ๋๋ ํ ๋ฆฌ๋ผ๊ณ ๊ฐ์ ํ๊ณ ๋ถ๋ชจ ๋๋ ํ ๋ฆฌ ์์ฑ
|
497 |
os.makedirs(os.path.dirname(index_path), exist_ok=True)
|
498 |
try:
|
499 |
base_retriever.save(index_path)
|
500 |
logger.info("์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ")
|
501 |
-
# TODO: ์ฌ์์ํ ๊ฒ์๊ธฐ(retriever) ์
๋ฐ์ดํธ ๋ก์ง ํ์ ์ ์ถ๊ฐ
|
502 |
-
# ์: if retriever and hasattr(retriever, 'update_base_retriever'): retriever.update_base_retriever(base_retriever)
|
503 |
return jsonify({
|
504 |
"success": True,
|
505 |
"message": f"ํ์ผ '{filename}' ์
๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ ({len(docs)}๊ฐ ์ฒญํฌ ์ถ๊ฐ)."
|
506 |
})
|
507 |
except Exception as e_save:
|
508 |
logger.error(f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {e_save}", exc_info=True)
|
509 |
-
# ์ ์ฅ ์คํจ ์ ์ถ๊ฐ๋ ๋ฌธ์ ๋กค๋ฐฑ ๊ณ ๋ ค?
|
510 |
return jsonify({"error": f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ: {str(e_save)}"}), 500
|
511 |
else:
|
512 |
logger.warning(f"ํ์ผ '{filename}'์์ ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์
๋๋ค.")
|
513 |
-
# ํ์ผ์ ์ ์ฅ๋์์ผ๋ฏ๋ก warning ๋ฐํ
|
514 |
return jsonify({
|
515 |
-
"warning": True,
|
516 |
"message": f"ํ์ผ '{filename}'์ด ์ ์ฅ๋์์ง๋ง ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์
๋๋ค."
|
517 |
})
|
518 |
|
519 |
except Exception as e:
|
520 |
logger.error(f"ํ์ผ ์
๋ก๋ ๋๋ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
521 |
-
# ์ค๋ฅ ๋ฐ์ ์ ์ ์ฅ๋ ํ์ผ ์ญ์
|
522 |
if 'filepath' in locals() and os.path.exists(filepath):
|
523 |
-
try:
|
524 |
-
|
|
|
|
|
525 |
return jsonify({"error": f"ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
526 |
|
527 |
-
# --- Document List API ---
|
528 |
@app.route('/api/documents', methods=['GET'])
|
529 |
@login_required
|
530 |
def list_documents():
|
531 |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ๋ชฉ๋ก API"""
|
|
|
532 |
logger.info("๋ฌธ์ ๋ชฉ๋ก API ์์ฒญ ์์")
|
533 |
|
534 |
-
# base_retriever ์ํ ํ์ธ
|
535 |
if base_retriever is None:
|
536 |
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever๊ฐ None์
๋๋ค.")
|
537 |
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
|
538 |
elif not hasattr(base_retriever, 'documents'):
|
539 |
-
|
540 |
-
|
541 |
|
542 |
-
# ๋ก๊น
์ถ๊ฐ
|
543 |
logger.info(f"base_retriever ๊ฐ์ฒด ํ์
: {type(base_retriever)}")
|
544 |
logger.info(f"base_retriever.documents ์กด์ฌ ์ฌ๋ถ: {hasattr(base_retriever, 'documents')}")
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
|
549 |
try:
|
550 |
sources = {}
|
551 |
total_chunks = 0
|
552 |
-
doc_list =
|
553 |
|
554 |
-
# doc_list๊ฐ ๋ฆฌ์คํธ์ธ์ง ํ์ธ
|
555 |
if not isinstance(doc_list, list):
|
556 |
-
|
557 |
-
|
558 |
|
559 |
logger.info(f"์ด {len(doc_list)}๊ฐ ๋ฌธ์ ์ฒญํฌ์์ ์์ค ๋ชฉ๋ก ์์ฑ ์ค...")
|
560 |
for i, doc in enumerate(doc_list):
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
continue
|
565 |
|
566 |
-
# ์์ค ์ ๋ณด ์ถ์ถ (metadata ์ฐ์ )
|
567 |
source = "unknown"
|
568 |
metadata = doc.get("metadata")
|
569 |
if isinstance(metadata, dict):
|
570 |
source = metadata.get("source", "unknown")
|
571 |
-
# metadata์ ์์ผ๋ฉด doc ์์ฒด์์ ์ฐพ๊ธฐ (ํ์ ํธํ์ฑ)
|
572 |
if source == "unknown":
|
573 |
source = doc.get("source", "unknown")
|
574 |
|
@@ -576,12 +515,12 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
576 |
if source in sources:
|
577 |
sources[source]["chunks"] += 1
|
578 |
else:
|
579 |
-
# filename, filetype ์ถ์ถ (metadata ์ฐ์ )
|
580 |
filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
|
581 |
filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
|
582 |
-
|
583 |
-
|
584 |
-
if filetype == "unknown" and
|
|
|
585 |
|
586 |
sources[source] = {
|
587 |
"filename": filename,
|
@@ -590,12 +529,10 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
590 |
}
|
591 |
total_chunks += 1
|
592 |
else:
|
593 |
-
|
594 |
-
logger.warning(f"์ฒญํฌ {i}์์ ์์ค ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์: {str(doc)[:200]}...") # ๋ด์ฉ ์ผ๋ถ ๋ก๊น
|
595 |
|
596 |
-
# ์ต์ข
๋ชฉ๋ก ์์ฑ ๋ฐ ์ ๋ ฌ
|
597 |
documents = [{"source": src, **info} for src, info in sources.items()]
|
598 |
-
documents.sort(key=lambda x: x.get("filename", ""), reverse=False)
|
599 |
|
600 |
logger.info(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์๋ฃ: {len(documents)}๊ฐ ์์ค ํ์ผ, {total_chunks}๊ฐ ์ฒญํฌ")
|
601 |
return jsonify({
|
@@ -605,6 +542,9 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
605 |
})
|
606 |
|
607 |
except Exception as e:
|
|
|
608 |
logger.error(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ฌ๊ฐํ ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
609 |
-
# 503 ๋์ 500 ๋ฐํ
|
610 |
return jsonify({"error": f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
|
|
|
|
|
|
|
7 |
import logging
|
8 |
import tempfile
|
9 |
import requests
|
10 |
+
import time # ์ฑ ์์ ์๊ฐ ๊ธฐ๋ก ์ํด ์ถ๊ฐ
|
11 |
+
import threading # threading.Event ์ฌ์ฉ ์ํด ์ถ๊ฐ
|
12 |
from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
|
13 |
from datetime import datetime
|
14 |
from werkzeug.utils import secure_filename
|
|
|
19 |
# ์ฑ ์์ ์๊ฐ ๊ธฐ๋ก (๋ชจ๋ ๋ก๋ ์์ )
|
20 |
APP_START_TIME = time.time()
|
21 |
|
22 |
+
def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready_flag, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL):
|
|
|
23 |
"""Flask ์ ํ๋ฆฌ์ผ์ด์
์ ๊ธฐ๋ณธ ๋ผ์ฐํธ ๋ฑ๋ก"""
|
24 |
|
25 |
# ํฌํผ ํจ์ (๋ณ๊ฒฝ ์์)
|
|
|
33 |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
34 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
|
35 |
|
36 |
+
# --- ๋ก๊ทธ์ธ/๋ก๊ทธ์์ ๋ผ์ฐํธ (๋ณ๊ฒฝ ์์) ---
|
37 |
@app.route('/login', methods=['GET', 'POST'])
|
38 |
def login():
|
39 |
error = None
|
|
|
65 |
else:
|
66 |
logger.warning("๋ก๊ทธ์ธ ์คํจ: ์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ ๋ถ์ผ์น")
|
67 |
error = '์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ๊ฐ ์ฌ๋ฐ๋ฅด์ง ์์ต๋๋ค.'
|
68 |
+
else: # GET ์์ฒญ
|
69 |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง GET ์์ฒญ")
|
70 |
if session.get('logged_in'):
|
71 |
logger.info("์ด๋ฏธ ๋ก๊ทธ์ธ๋ ์ฌ์ฉ์, ๋ฉ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
|
|
74 |
logger.info("---------- ๋ก๊ทธ์ธ ํ์ด์ง ๋ ๋๋ง ----------")
|
75 |
return render_template('login.html', error=error, next=next_url)
|
76 |
|
|
|
77 |
@app.route('/logout')
|
78 |
def logout():
|
79 |
"""๋ก๊ทธ์์ ์ฒ๋ฆฌ"""
|
80 |
username = session.get('username', 'unknown')
|
81 |
if session.pop('logged_in', None):
|
82 |
+
session.pop('username', None)
|
83 |
+
logger.info(f"์ฌ์ฉ์ {username} ๋ก๊ทธ์์ ์ฒ๋ฆฌ ์๋ฃ. ํ์ฌ ์ธ์
: {session}")
|
84 |
else:
|
85 |
+
logger.warning("๋ก๊ทธ์ธ๋์ง ์์ ์ํ์์ ๋ก๊ทธ์์ ์๋")
|
86 |
|
87 |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
88 |
response = redirect(url_for('login'))
|
89 |
logger.debug(f"๋ก๊ทธ์์ ์๋ต ํค๋ (Set-Cookie ํ์ธ): {response.headers.getlist('Set-Cookie')}")
|
90 |
return response
|
91 |
|
92 |
+
# --- ๋ฉ์ธ ํ์ด์ง ๋ฐ ์ํ ํ์ธ (app_ready_flag ์ฌ์ฉ) ---
|
93 |
@app.route('/')
|
94 |
@login_required
|
95 |
def index():
|
96 |
"""๋ฉ์ธ ํ์ด์ง"""
|
97 |
+
# app_ready_flag๊ฐ Event ๊ฐ์ฒด์ธ์ง bool์ธ์ง ํ์ธํ๊ณ ์ํ ๊ฐ์ ธ์ค๊ธฐ
|
98 |
+
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
|
99 |
|
100 |
time_elapsed = time.time() - APP_START_TIME
|
101 |
|
102 |
if not is_ready:
|
103 |
logger.info(f"์ฑ์ด ์์ง ์ค๋น๋์ง ์์ ๋ก๋ฉ ํ์ด์ง ํ์ (๊ฒฝ๊ณผ ์๊ฐ: {time_elapsed:.1f}์ด)")
|
104 |
+
return render_template('loading.html') # 200 OK์ ๋ก๋ฉ ํ์ด์ง
|
|
|
105 |
|
106 |
logger.info("๋ฉ์ธ ํ์ด์ง ์์ฒญ")
|
|
|
107 |
return render_template('index.html')
|
108 |
|
|
|
109 |
@app.route('/api/status')
|
110 |
@login_required
|
111 |
def app_status():
|
112 |
"""์ฑ ์ด๊ธฐํ ์ํ ํ์ธ API"""
|
113 |
+
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
|
114 |
logger.info(f"์ฑ ์ํ ํ์ธ ์์ฒญ: {'Ready' if is_ready else 'Not Ready'}")
|
115 |
return jsonify({"ready": is_ready})
|
116 |
|
117 |
+
# --- LLM API (๋ณ๊ฒฝ ์์) ---
|
118 |
@app.route('/api/llm', methods=['GET', 'POST'])
|
119 |
@login_required
|
120 |
def llm_api():
|
121 |
"""์ฌ์ฉ ๊ฐ๋ฅํ LLM ๋ชฉ๋ก ๋ฐ ์ ํ API"""
|
122 |
+
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
|
123 |
+
# ์ด๊ธฐํ ์ค์๋ LLM ๋ชฉ๋ก ์กฐํ๋ ๊ฐ๋ฅํ๋๋ก ํ์ฉ
|
124 |
+
# if not is_ready:
|
125 |
+
# return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค..."}), 503
|
126 |
|
127 |
if request.method == 'GET':
|
128 |
logger.info("LLM ๋ชฉ๋ก ์์ฒญ")
|
129 |
try:
|
|
|
130 |
if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
|
131 |
+
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ์์ฑ์ด ์์ต๋๋ค.")
|
132 |
+
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
133 |
|
134 |
current_details = llm_interface.get_current_llm_details()
|
135 |
supported_llms_dict = llm_interface.SUPPORTED_LLMS
|
|
|
146 |
return jsonify({"error": "LLM ์ ๋ณด ์กฐํ ์ค ์ค๋ฅ ๋ฐ์"}), 500
|
147 |
|
148 |
elif request.method == 'POST':
|
|
|
|
|
|
|
|
|
149 |
data = request.get_json()
|
150 |
if not data or 'llm_id' not in data:
|
151 |
return jsonify({"error": "LLM ID๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
|
|
154 |
logger.info(f"LLM ๋ณ๊ฒฝ ์์ฒญ: {llm_id}")
|
155 |
|
156 |
try:
|
|
|
157 |
if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
|
158 |
+
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ์์ฑ/๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
159 |
+
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
160 |
|
161 |
if llm_id not in llm_interface.llm_clients:
|
162 |
return jsonify({"error": f"์ง์๋์ง ์๋ LLM ID: {llm_id}"}), 400
|
|
|
177 |
logger.error(f"LLM ๋ณ๊ฒฝ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}", exc_info=True)
|
178 |
return jsonify({"error": f"LLM ๋ณ๊ฒฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"}), 500
|
179 |
|
180 |
+
# --- Chat API (๋ณ๊ฒฝ ์์) ---
|
181 |
@app.route('/api/chat', methods=['POST'])
|
182 |
@login_required
|
183 |
def chat():
|
184 |
"""ํ
์คํธ ๊ธฐ๋ฐ ์ฑ๋ด API"""
|
|
|
|
|
|
|
|
|
|
|
185 |
if retriever is None or not hasattr(retriever, 'search'):
|
186 |
logger.warning("์ฑํ
API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
187 |
return jsonify({
|
188 |
"answer": "์ฃ์กํฉ๋๋ค. ๊ฒ์ ์์ง์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.",
|
189 |
"sources": [],
|
190 |
"error": "Retriever not ready"
|
191 |
+
}), 200 # 503 ๋์ 200 OK
|
192 |
|
193 |
try:
|
194 |
data = request.get_json()
|
|
|
198 |
query = data['query']
|
199 |
logger.info(f"ํ
์คํธ ์ฟผ๋ฆฌ ์์ : {query[:100]}...")
|
200 |
|
|
|
201 |
search_results = retriever.search(query, top_k=5, first_stage_k=6)
|
202 |
|
|
|
203 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
204 |
+
logger.error("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ prepare_rag_context ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
205 |
+
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500
|
206 |
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
207 |
|
208 |
if not context:
|
209 |
logger.warning(f"์ฟผ๋ฆฌ '{query[:50]}...'์ ๋ํ ๊ฒ์ ๊ฒฐ๊ณผ ์์.")
|
210 |
|
|
|
211 |
llm_id = data.get('llm_id', None)
|
212 |
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
213 |
+
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ rag_generate ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
214 |
+
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
215 |
|
|
|
216 |
if not context:
|
217 |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
218 |
logger.info("์ปจํ
์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
|
|
220 |
answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
|
221 |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
222 |
|
|
|
223 |
sources = []
|
224 |
if search_results:
|
225 |
for result in search_results:
|
|
|
248 |
logger.warning(f"CSV ์์ค ID ์ถ์ถ ์คํจ ({source_info.get('source')}): {e}")
|
249 |
sources.append(source_info)
|
250 |
|
|
|
251 |
response_data = {
|
252 |
"answer": answer,
|
253 |
"sources": sources,
|
|
|
259 |
logger.error(f"์ฑํ
์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
260 |
return jsonify({"error": f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}), 500
|
261 |
|
262 |
+
# --- Voice Chat API (๋ณ๊ฒฝ ์์) ---
|
263 |
@app.route('/api/voice', methods=['POST'])
|
264 |
@login_required
|
265 |
def voice_chat():
|
266 |
"""์์ฑ ์ฑ API ์๋ํฌ์ธํธ"""
|
|
|
|
|
|
|
|
|
|
|
267 |
if retriever is None or not hasattr(retriever, 'search'):
|
268 |
logger.error("์์ฑ API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์")
|
269 |
return jsonify({"error": "๊ฒ์ ์์ง ์ค๋น ์๋จ"}), 503
|
|
|
271 |
logger.error("์์ฑ API ์์ฒญ ์ STT ํด๋ผ์ด์ธํธ๊ฐ ์ค๋น๋์ง ์์")
|
272 |
return jsonify({"error": "์์ฑ ์ธ์ ์๋น์ค ์ค๋น ์๋จ"}), 503
|
273 |
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
274 |
+
logger.error("์์ฑ API ์์ฒญ ์ LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์")
|
275 |
+
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
276 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
277 |
+
logger.error("์์ฑ API ์์ฒญ ์ DocumentProcessor๊ฐ ์ค๋น๋์ง ์์")
|
278 |
+
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500
|
279 |
|
280 |
logger.info("์์ฑ ์ฑ ์์ฒญ ์์ ")
|
281 |
|
|
|
287 |
logger.info(f"์์ ๋ ์ค๋์ค ํ์ผ: {audio_file.filename} ({audio_file.content_type})")
|
288 |
|
289 |
try:
|
|
|
290 |
with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
|
291 |
audio_file.save(temp_audio.name)
|
292 |
logger.info(f"์ค๋์ค ํ์ผ์ ์์ ์ ์ฅ: {temp_audio.name}")
|
|
|
293 |
with open(temp_audio.name, 'rb') as f_bytes:
|
294 |
audio_bytes = f_bytes.read()
|
295 |
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
|
296 |
|
|
|
297 |
if not isinstance(stt_result, dict) or not stt_result.get("success"):
|
298 |
error_msg = stt_result.get("error", "์ ์ ์๋ STT ์ค๋ฅ") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ์ ์ค๋ฅ"
|
299 |
logger.error(f"์์ฑ์ธ์ ์คํจ: {error_msg}")
|
|
|
307 |
"answer": "์์ฑ์์ ํ
์คํธ๋ฅผ ์ธ์ํ์ง ๋ชปํ์ต๋๋ค.",
|
308 |
"sources": [],
|
309 |
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
|
310 |
+
}), 200
|
311 |
|
312 |
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...")
|
313 |
|
|
|
314 |
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
|
315 |
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
316 |
|
317 |
+
llm_id = request.form.get('llm_id', None)
|
318 |
if not context:
|
319 |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
320 |
logger.info("์ปจํ
์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
|
|
322 |
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
|
323 |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
324 |
|
|
|
325 |
sources = []
|
326 |
if search_results:
|
327 |
+
for result in search_results:
|
328 |
+
if not isinstance(result, dict):
|
329 |
+
continue
|
330 |
+
source_info = {}
|
331 |
+
source_key = result.get("source")
|
332 |
+
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
|
333 |
+
source_key = result["metadata"].get("source")
|
334 |
+
if source_key:
|
335 |
+
source_info["source"] = source_key
|
336 |
+
source_info["score"] = result.get("rerank_score", result.get("score", 0))
|
337 |
+
filetype = result.get("filetype")
|
338 |
+
if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
|
339 |
+
filetype = result["metadata"].get("filetype")
|
340 |
+
if "text" in result and filetype == "csv":
|
341 |
+
try:
|
342 |
+
text_lines = result["text"].strip().split('\n')
|
343 |
+
if text_lines:
|
344 |
+
first_line = text_lines[0].strip()
|
345 |
+
if ',' in first_line:
|
346 |
+
first_column = first_line.split(',')[0].strip()
|
347 |
+
source_info["id"] = first_column
|
348 |
+
except Exception as e:
|
349 |
+
logger.warning(f"[์์ฑ์ฑ] CSV ์์ค ID ์ถ์ถ ์คํจ ({source_info.get('source')}): {e}")
|
350 |
+
sources.append(source_info)
|
351 |
+
|
352 |
response_data = {
|
353 |
"transcription": transcription,
|
354 |
"answer": answer,
|
|
|
361 |
logger.error(f"์์ฑ ์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
362 |
return jsonify({"error": "์์ฑ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์", "details": str(e)}), 500
|
363 |
|
364 |
+
# --- Document Upload API (๋ณ๊ฒฝ ์์) ---
|
365 |
@app.route('/api/upload', methods=['POST'])
|
366 |
@login_required
|
367 |
def upload_document():
|
368 |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ์
๋ก๋ API"""
|
|
|
|
|
|
|
|
|
|
|
369 |
if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
|
370 |
logger.error("๋ฌธ์ ์
๋ก๋ API ์์ฒญ ์ base_retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
371 |
return jsonify({"error": "๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ์ค๋น๋์ง ์์์ต๋๋ค."}), 503
|
|
|
377 |
if not doc_file or not doc_file.filename:
|
378 |
return jsonify({"error": "์ ํ๋ ํ์ผ์ด ์์ต๋๋ค."}), 400
|
379 |
|
|
|
|
|
380 |
if not allowed_doc_file(doc_file.filename):
|
381 |
+
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
382 |
logger.warning(f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์: {doc_file.filename}")
|
383 |
return jsonify({"error": f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์์
๋๋ค. ํ์ฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
|
384 |
|
385 |
try:
|
386 |
filename = secure_filename(doc_file.filename)
|
387 |
+
data_folder = app.config.get('DATA_FOLDER', os.path.join(os.path.dirname(__file__), '..', 'data'))
|
|
|
|
|
|
|
|
|
388 |
os.makedirs(data_folder, exist_ok=True)
|
389 |
filepath = os.path.join(data_folder, filename)
|
390 |
|
391 |
doc_file.save(filepath)
|
392 |
logger.info(f"๋ฌธ์ ์ ์ฅ ์๋ฃ: {filepath}")
|
393 |
|
|
|
394 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
|
395 |
+
logger.error("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
396 |
+
try:
|
397 |
+
os.remove(filepath)
|
398 |
+
except OSError:
|
399 |
+
pass
|
400 |
+
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500
|
401 |
|
402 |
content = None
|
403 |
file_ext = filename.rsplit('.', 1)[1].lower()
|
404 |
metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
|
405 |
docs = []
|
406 |
|
|
|
407 |
if file_ext in ['txt', 'md', 'csv']:
|
408 |
try:
|
409 |
with open(filepath, 'r', encoding='utf-8') as f:
|
|
|
420 |
logger.error(f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ ({filename}): {e_read}")
|
421 |
return jsonify({"error": f"ํ์ผ ์ฝ๊ธฐ ์ค ์ค๋ฅ ๋ฐ์: {str(e_read)}"}), 500
|
422 |
elif file_ext == 'pdf':
|
423 |
+
logger.warning("PDF ์ฒ๋ฆฌ๋ ๊ตฌํ๋์ง ์์์ต๋๋ค.")
|
|
|
|
|
424 |
elif file_ext == 'docx':
|
425 |
+
logger.warning("DOCX ์ฒ๋ฆฌ๋ ๊ตฌํ๋์ง ์์์ต๋๋ค.")
|
|
|
|
|
426 |
|
427 |
+
if content is not None:
|
|
|
428 |
if file_ext == 'csv':
|
429 |
logger.info(f"CSV ํ์ผ ์ฒ๋ฆฌ ์์: {filename}")
|
430 |
docs = DocumentProcessor.csv_to_documents(content, metadata)
|
431 |
+
elif file_ext in ['txt', 'md']:
|
432 |
+
logger.info(f"ํ
์คํธ ๋ฌธ์ ์ฒ๋ฆฌ ์์: {filename}")
|
|
|
433 |
docs = DocumentProcessor.text_to_documents(
|
434 |
content, metadata=metadata,
|
435 |
+
chunk_size=512, chunk_overlap=50
|
436 |
)
|
437 |
|
|
|
438 |
if docs:
|
439 |
logger.info(f"{len(docs)}๊ฐ ๋ฌธ์ ์ฒญํฌ๋ฅผ ๊ฒ์๊ธฐ์ ์ถ๊ฐํฉ๋๋ค...")
|
440 |
base_retriever.add_documents(docs)
|
441 |
|
442 |
logger.info(f"๊ฒ์๊ธฐ ์ํ๋ฅผ ์ ์ฅํฉ๋๋ค...")
|
443 |
+
index_path = app.config.get('INDEX_PATH', os.path.join(data_folder, 'index'))
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
os.makedirs(os.path.dirname(index_path), exist_ok=True)
|
445 |
try:
|
446 |
base_retriever.save(index_path)
|
447 |
logger.info("์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ")
|
|
|
|
|
448 |
return jsonify({
|
449 |
"success": True,
|
450 |
"message": f"ํ์ผ '{filename}' ์
๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ ({len(docs)}๊ฐ ์ฒญํฌ ์ถ๊ฐ)."
|
451 |
})
|
452 |
except Exception as e_save:
|
453 |
logger.error(f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {e_save}", exc_info=True)
|
|
|
454 |
return jsonify({"error": f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ: {str(e_save)}"}), 500
|
455 |
else:
|
456 |
logger.warning(f"ํ์ผ '{filename}'์์ ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์
๋๋ค.")
|
|
|
457 |
return jsonify({
|
458 |
+
"warning": True,
|
459 |
"message": f"ํ์ผ '{filename}'์ด ์ ์ฅ๋์์ง๋ง ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์
๋๋ค."
|
460 |
})
|
461 |
|
462 |
except Exception as e:
|
463 |
logger.error(f"ํ์ผ ์
๋ก๋ ๋๋ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
|
|
464 |
if 'filepath' in locals() and os.path.exists(filepath):
|
465 |
+
try:
|
466 |
+
os.remove(filepath)
|
467 |
+
except OSError:
|
468 |
+
pass
|
469 |
return jsonify({"error": f"ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
470 |
|
471 |
+
# --- Document List API (SyntaxError ์ ๋ฐ ์ฃผ์ ์ ๊ฑฐ) ---
|
472 |
@app.route('/api/documents', methods=['GET'])
|
473 |
@login_required
|
474 |
def list_documents():
|
475 |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ๋ชฉ๋ก API"""
|
476 |
+
# SyntaxError๋ฅผ ์ ๋ฐํ๋ ์ค๋ช
์ฃผ์๋ค์ ์ ๊ฑฐํ์ต๋๋ค.
|
477 |
logger.info("๋ฌธ์ ๋ชฉ๋ก API ์์ฒญ ์์")
|
478 |
|
|
|
479 |
if base_retriever is None:
|
480 |
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever๊ฐ None์
๋๋ค.")
|
481 |
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
|
482 |
elif not hasattr(base_retriever, 'documents'):
|
483 |
+
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever์ 'documents' ์์ฑ์ด ์์ต๋๋ค.")
|
484 |
+
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
|
485 |
|
|
|
486 |
logger.info(f"base_retriever ๊ฐ์ฒด ํ์
: {type(base_retriever)}")
|
487 |
logger.info(f"base_retriever.documents ์กด์ฌ ์ฌ๋ถ: {hasattr(base_retriever, 'documents')}")
|
488 |
+
if hasattr(base_retriever, 'documents'):
|
489 |
+
logger.info(f"base_retriever.documents ํ์
: {type(base_retriever.documents)}")
|
490 |
+
logger.info(f"base_retriever.documents ๊ธธ์ด: {len(base_retriever.documents) if isinstance(base_retriever.documents, list) else 'N/A'}")
|
491 |
|
492 |
try:
|
493 |
sources = {}
|
494 |
total_chunks = 0
|
495 |
+
doc_list = base_retriever.documents
|
496 |
|
|
|
497 |
if not isinstance(doc_list, list):
|
498 |
+
logger.error(f"base_retriever.documents๊ฐ ๋ฆฌ์คํธ๊ฐ ์๋: {type(doc_list)}")
|
499 |
+
return jsonify({"error": "๋ด๋ถ ๋ฐ์ดํฐ ๊ตฌ์กฐ ์ค๋ฅ"}), 500
|
500 |
|
501 |
logger.info(f"์ด {len(doc_list)}๊ฐ ๋ฌธ์ ์ฒญํฌ์์ ์์ค ๋ชฉ๋ก ์์ฑ ์ค...")
|
502 |
for i, doc in enumerate(doc_list):
|
503 |
+
if not isinstance(doc, dict):
|
504 |
+
logger.warning(f"์ฒญํฌ {i}๊ฐ ๋์
๋๋ฆฌ ํ์
์ด ์๋: {type(doc)}")
|
505 |
+
continue
|
|
|
506 |
|
|
|
507 |
source = "unknown"
|
508 |
metadata = doc.get("metadata")
|
509 |
if isinstance(metadata, dict):
|
510 |
source = metadata.get("source", "unknown")
|
|
|
511 |
if source == "unknown":
|
512 |
source = doc.get("source", "unknown")
|
513 |
|
|
|
515 |
if source in sources:
|
516 |
sources[source]["chunks"] += 1
|
517 |
else:
|
|
|
518 |
filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
|
519 |
filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
|
520 |
+
if filename == source and "filename" in doc:
|
521 |
+
filename = doc["filename"]
|
522 |
+
if filetype == "unknown" and "filetype" in doc:
|
523 |
+
filetype = doc["filetype"]
|
524 |
|
525 |
sources[source] = {
|
526 |
"filename": filename,
|
|
|
529 |
}
|
530 |
total_chunks += 1
|
531 |
else:
|
532 |
+
logger.warning(f"์ฒญํฌ {i}์์ ์์ค ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์: {doc}")
|
|
|
533 |
|
|
|
534 |
documents = [{"source": src, **info} for src, info in sources.items()]
|
535 |
+
documents.sort(key=lambda x: x.get("filename", ""), reverse=False)
|
536 |
|
537 |
logger.info(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์๋ฃ: {len(documents)}๊ฐ ์์ค ํ์ผ, {total_chunks}๊ฐ ์ฒญํฌ")
|
538 |
return jsonify({
|
|
|
542 |
})
|
543 |
|
544 |
except Exception as e:
|
545 |
+
# ์ฌ๊ธฐ์ ๋ฐ์ํ๋ ์์ธ๊ฐ 503์ผ๋ก ์ด์ด์ง ์ ์๋์ง ํ์ธ
|
546 |
logger.error(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ฌ๊ฐํ ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
|
|
547 |
return jsonify({"error": f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
548 |
+
|
549 |
+
# SyntaxError ์์ : ๋๋ฝ๋ '#' ์ถ๊ฐ
|
550 |
+
# * ์ ๊ณต๋ `list_documents` ํจ์ ์ฝ๋ ์์ฒด์๋ `app_ready` ์ํ๋ `base_retriever`๊ฐ `None`์ธ ์ํ๋ฅผ ํ์ธํ์ฌ 503 ์ค๋ฅ๋ฅผ ๋ฐํํ๋ ๋ก์ง์ด **์์ต๋๋ค.** ๋ก๊ทธ์์ 503์ด ๋ฐ์ํ๋ค๋ฉด, ์์ธ์ ๋ค์ ์ค ํ๋์ผ ๊ฐ๋ฅ์ฑ์ด ๋์ต๋๋ค:
|