Spaces:
No application file
No application file
fix
Browse files- app/app_routes.py +49 -173
app/app_routes.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
"""
|
2 |
-
RAG ๊ฒ์ ์ฑ๋ด ์น ์ ํ๋ฆฌ์ผ์ด์
- API ๋ผ์ฐํธ ์ ์ (์์
|
3 |
"""
|
4 |
|
5 |
import os
|
@@ -8,6 +8,7 @@ import logging
|
|
8 |
import tempfile
|
9 |
import requests
|
10 |
import time # ์ฑ ์์ ์๊ฐ ๊ธฐ๋ก ์ํด ์ถ๊ฐ
|
|
|
11 |
from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
|
12 |
from datetime import datetime
|
13 |
from werkzeug.utils import secure_filename
|
@@ -32,7 +33,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
32 |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
33 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
|
34 |
|
35 |
-
# --- ๋ก๊ทธ์ธ/๋ก๊ทธ์์ ๋ผ์ฐํธ (
|
36 |
@app.route('/login', methods=['GET', 'POST'])
|
37 |
def login():
|
38 |
error = None
|
@@ -45,37 +46,28 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
45 |
username = request.form.get('username', '')
|
46 |
password = request.form.get('password', '')
|
47 |
logger.info(f"์
๋ ฅ๋ ์ฌ์ฉ์๋ช
: {username}")
|
48 |
-
# logger.info(f"๋น๋ฐ๋ฒํธ ์
๋ ฅ ์ฌ๋ถ: {len(password) > 0}") # ์ค์ ๋น๋ฐ๋ฒํธ ๋ก๊น
์ ๋ณด์์ ์ข์ง ์์
|
49 |
|
50 |
valid_username = ADMIN_USERNAME
|
51 |
valid_password = ADMIN_PASSWORD
|
52 |
logger.info(f"๊ฒ์ฆ์ฉ ์ฌ์ฉ์๋ช
: {valid_username}")
|
53 |
-
# logger.info(f"๊ฒ์ฆ์ฉ ๋น๋ฐ๋ฒํธ ์กด์ฌ ์ฌ๋ถ: {valid_password is not None and len(valid_password) > 0}")
|
54 |
|
55 |
if username == valid_username and password == valid_password:
|
56 |
logger.info(f"๋ก๊ทธ์ธ ์ฑ๊ณต: {username}")
|
57 |
-
# logger.debug(f"์ธ์
์ค์ ์ : {session}") # ๋๋ฒ๊ทธ ๋ ๋ฒจ๋ก ๋ณ๊ฒฝ
|
58 |
-
|
59 |
session.permanent = True
|
60 |
session['logged_in'] = True
|
61 |
session['username'] = username
|
62 |
-
# session.modified = True # Flask๋ ์ธ์
๋ณ๊ฒฝ ์ ์๋์ผ๋ก modified ํ๋๊ทธ๋ฅผ ์ค์ ํ๋ฏ๋ก ๋ช
์์ ํธ์ถ ๋ถํ์
|
63 |
-
|
64 |
logger.info(f"์ธ์
์ค์ ์๋ฃ: {session}")
|
65 |
-
|
66 |
redirect_to = next_url or url_for('index')
|
67 |
logger.info(f"๋ฆฌ๋๋ ์
๋์: {redirect_to}")
|
68 |
response = redirect(redirect_to)
|
69 |
-
# ์ธ์
์ฟ ํค๊ฐ ์ ๋๋ก ์ค์ ๋๋๋ก ์๋ต ๋ฐํ ์ ํ์ธ (๋๋ฒ๊น
์ฉ)
|
70 |
logger.debug(f"๋ก๊ทธ์ธ ์๋ต ํค๋ (Set-Cookie ํ์ธ): {response.headers.getlist('Set-Cookie')}")
|
71 |
return response
|
72 |
else:
|
73 |
logger.warning("๋ก๊ทธ์ธ ์คํจ: ์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ ๋ถ์ผ์น")
|
74 |
-
# ์คํจ ์์ธ ์์ธ ๋ก๊น
์ ๋ณด์ ์ํ ์์ง๊ฐ ์์ผ๋ฏ๋ก ์ฃผ์
|
75 |
error = '์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ๊ฐ ์ฌ๋ฐ๋ฅด์ง ์์ต๋๋ค.'
|
76 |
else: # GET ์์ฒญ
|
77 |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง GET ์์ฒญ")
|
78 |
-
if session.get('logged_in'):
|
79 |
logger.info("์ด๋ฏธ ๋ก๊ทธ์ธ๋ ์ฌ์ฉ์, ๋ฉ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
80 |
return redirect(url_for('index'))
|
81 |
|
@@ -86,48 +78,31 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
86 |
@app.route('/logout')
|
87 |
def logout():
|
88 |
"""๋ก๊ทธ์์ ์ฒ๋ฆฌ"""
|
89 |
-
username = session.get('username', 'unknown')
|
90 |
-
if session.pop('logged_in', None):
|
91 |
session.pop('username', None)
|
92 |
-
# session.modified = True # pop ์ฌ์ฉ ์ ์๋ ์ฒ๋ฆฌ๋จ
|
93 |
logger.info(f"์ฌ์ฉ์ {username} ๋ก๊ทธ์์ ์ฒ๋ฆฌ ์๋ฃ. ํ์ฌ ์ธ์
: {session}")
|
94 |
else:
|
95 |
logger.warning("๋ก๊ทธ์ธ๋์ง ์์ ์ํ์์ ๋ก๊ทธ์์ ์๋")
|
96 |
|
97 |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
98 |
response = redirect(url_for('login'))
|
99 |
-
# ๋ก๊ทธ์์ ์ ์ฟ ํค ์ญ์ ํ์ธ (๋๋ฒ๊น
์ฉ)
|
100 |
logger.debug(f"๋ก๊ทธ์์ ์๋ต ํค๋ (Set-Cookie ํ์ธ): {response.headers.getlist('Set-Cookie')}")
|
101 |
return response
|
102 |
|
103 |
-
# --- ๋ฉ์ธ ํ์ด์ง ๋ฐ ์ํ ํ์ธ ---
|
104 |
@app.route('/')
|
105 |
@login_required
|
106 |
def index():
|
107 |
"""๋ฉ์ธ ํ์ด์ง"""
|
108 |
-
# app_ready_flag
|
109 |
-
|
110 |
-
# ์ฌ๊ธฐ์๋ ์ ๋ฌ๋ฐ์ ํ๋๊ทธ๋ฅผ ์ฌ์ฉํ๋ค๊ณ ๊ฐ์ .
|
111 |
-
|
112 |
-
# !! ์ค์: app_ready_flag๋ register_routes ์์ ์ ๊ฐ์
๋๏ฟฝ๏ฟฝ.
|
113 |
-
# ์ค์๊ฐ ์ํ๋ฅผ ๋ณด๋ ค๋ฉด app.py์ app_ready ๋ณ์๋ฅผ ์ง์ ์ฐธ์กฐํด์ผ ํฉ๋๋ค.
|
114 |
-
# ์: from app import app_ready (์ํ ์ฐธ์กฐ ๋ฌธ์ ์์ ๊ฒฝ์ฐ)
|
115 |
-
# ์ฌ๊ธฐ์๋ ์ผ๋จ ์ ๋ฌ๋ ๊ฐ ์ฌ์ฉ
|
116 |
-
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag # Event ๊ฐ์ฒด ๋๋ bool ๊ฐ์
|
117 |
|
118 |
-
# ์ฑ ์์ ํ ๊ฒฝ๊ณผ ์๊ฐ ๊ณ์ฐ (ํ์ผ ์์ ์๊ฐ ๋์ ์ค์ ์์ ์๊ฐ ์ฌ์ฉ)
|
119 |
time_elapsed = time.time() - APP_START_TIME
|
120 |
|
121 |
-
# 30์ด ๊ฐ์ Ready ๋ก์ง ์ ๊ฑฐ ๋๋ ์์ ๊ถ์ฅ
|
122 |
-
# if not is_ready and time_elapsed > 30:
|
123 |
-
# logger.warning(f"์ฑ์ด {time_elapsed:.1f}์ด ์ด์ ์ด๊ธฐํ ์ค ์ํ์
๋๋ค. (๊ฐ์ Ready ๋ก์ง ๋นํ์ฑํ๋จ)")
|
124 |
-
# app_ready = True # ์ ์ญ ๋ณ์๋ฅผ ์ง์ ์์ ํด์ผ ํจ
|
125 |
-
|
126 |
if not is_ready:
|
127 |
logger.info(f"์ฑ์ด ์์ง ์ค๋น๋์ง ์์ ๋ก๋ฉ ํ์ด์ง ํ์ (๊ฒฝ๊ณผ ์๊ฐ: {time_elapsed:.1f}์ด)")
|
128 |
-
#
|
129 |
-
return render_template('loading.html') # 503 ๋์ 200 OK์ ๋ก๋ฉ ํ์ด์ง
|
130 |
-
# return render_template('loading.html'), 503 # ๊ธฐ์กด ๋ก์ง
|
131 |
|
132 |
logger.info("๋ฉ์ธ ํ์ด์ง ์์ฒญ")
|
133 |
return render_template('index.html')
|
@@ -141,21 +116,19 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
141 |
logger.info(f"์ฑ ์ํ ํ์ธ ์์ฒญ: {'Ready' if is_ready else 'Not Ready'}")
|
142 |
return jsonify({"ready": is_ready})
|
143 |
|
144 |
-
# --- LLM API (
|
145 |
@app.route('/api/llm', methods=['GET', 'POST'])
|
146 |
@login_required
|
147 |
def llm_api():
|
148 |
"""์ฌ์ฉ ๊ฐ๋ฅํ LLM ๋ชฉ๋ก ๋ฐ ์ ํ API"""
|
149 |
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
pass # ์ผ๋จ ์งํ ํ์ฉ
|
154 |
|
155 |
if request.method == 'GET':
|
156 |
logger.info("LLM ๋ชฉ๋ก ์์ฒญ")
|
157 |
try:
|
158 |
-
# llm_interface ๊ฐ์ฒด ์กด์ฌ ๋ฐ ์์ฑ ํ์ธ ๊ฐํ
|
159 |
if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
|
160 |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ์์ฑ์ด ์์ต๋๋ค.")
|
161 |
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
@@ -171,7 +144,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
171 |
"current_llm": current_details
|
172 |
})
|
173 |
except Exception as e:
|
174 |
-
logger.error(f"LLM ์ ๋ณด ์กฐํ ์ค๋ฅ: {e}", exc_info=True)
|
175 |
return jsonify({"error": "LLM ์ ๋ณด ์กฐํ ์ค ์ค๋ฅ ๋ฐ์"}), 500
|
176 |
|
177 |
elif request.method == 'POST':
|
@@ -183,7 +156,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
183 |
logger.info(f"LLM ๋ณ๊ฒฝ ์์ฒญ: {llm_id}")
|
184 |
|
185 |
try:
|
186 |
-
# llm_interface ๊ฐ์ฒด ์กด์ฌ ๋ฐ ์์ฑ ํ์ธ ๊ฐํ
|
187 |
if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
|
188 |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ์์ฑ/๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
189 |
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
@@ -207,20 +179,18 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
207 |
logger.error(f"LLM ๋ณ๊ฒฝ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}", exc_info=True)
|
208 |
return jsonify({"error": f"LLM ๋ณ๊ฒฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"}), 500
|
209 |
|
210 |
-
# --- Chat API (
|
211 |
@app.route('/api/chat', methods=['POST'])
|
212 |
@login_required
|
213 |
def chat():
|
214 |
"""ํ
์คํธ ๊ธฐ๋ฐ ์ฑ๋ด API"""
|
215 |
-
# retriever ๊ฐ์ฒด๊ฐ None์ธ์ง, ๊ทธ๋ฆฌ๊ณ search ๋ฉ์๋๊ฐ ์๋์ง ํ์ธ
|
216 |
if retriever is None or not hasattr(retriever, 'search'):
|
217 |
logger.warning("์ฑํ
API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
218 |
-
# 503 ๋์ ์ฌ์ฉ์ ์นํ์ ์ธ ๋ฉ์์ง ๋ฐํ
|
219 |
return jsonify({
|
220 |
"answer": "์ฃ์กํฉ๋๋ค. ๊ฒ์ ์์ง์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.",
|
221 |
"sources": [],
|
222 |
-
"error": "Retriever not ready"
|
223 |
-
}), 200 #
|
224 |
|
225 |
try:
|
226 |
data = request.get_json()
|
@@ -230,22 +200,17 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
230 |
query = data['query']
|
231 |
logger.info(f"ํ
์คํธ ์ฟผ๋ฆฌ ์์ : {query[:100]}...")
|
232 |
|
233 |
-
|
234 |
-
search_results = retriever.search(query, top_k=5, first_stage_k=6) # first_stage_k๋ base_retriever์ ์ ๋ฌ๋ ์ ์์
|
235 |
|
236 |
-
# ์ปจํ
์คํธ ์ค๋น
|
237 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
238 |
logger.error("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ prepare_rag_context ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
239 |
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500
|
240 |
-
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
241 |
|
242 |
if not context:
|
243 |
logger.warning(f"์ฟผ๋ฆฌ '{query[:50]}...'์ ๋ํ ๊ฒ์ ๊ฒฐ๊ณผ ์์.")
|
244 |
-
# ์ปจํ
์คํธ ์์ด LLM ํธ์ถ ์๋ ๋๋ ๊ธฐ๋ณธ ์๋ต ๋ฐํ ๊ฒฐ์ ํ์
|
245 |
-
# ์ฌ๊ธฐ์๋ LLM ํธ์ถ ๋ก์ง์์ ์ฒ๋ฆฌํ๋๋ก ํจ
|
246 |
|
247 |
-
|
248 |
-
llm_id = data.get('llm_id', None) # ์์ฒญ์์ llm_id ๊ฐ์ ธ์ค๊ธฐ
|
249 |
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
250 |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ rag_generate ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
251 |
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
@@ -257,28 +222,22 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
257 |
answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
|
258 |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
259 |
|
260 |
-
# ์์ค ์ ๋ณด ์ถ์ถ (๊ธฐ์กด ๋ก์ง ์ ์ง, ๋ฐฉ์ด ์ฝ๋ ๊ฐํ)
|
261 |
sources = []
|
262 |
if search_results:
|
263 |
for result in search_results:
|
264 |
if not isinstance(result, dict):
|
265 |
logger.warning(f"์์์น ๋ชปํ ๊ฒ์ ๊ฒฐ๊ณผ ํ์: {type(result)}")
|
266 |
continue
|
267 |
-
|
268 |
source_info = {}
|
269 |
-
source_key = result.get("source")
|
270 |
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
|
271 |
source_key = result["metadata"].get("source")
|
272 |
-
|
273 |
if source_key:
|
274 |
source_info["source"] = source_key
|
275 |
source_info["score"] = result.get("rerank_score", result.get("score", 0))
|
276 |
-
|
277 |
-
# CSV ID ์ถ์ถ ๋ก์ง
|
278 |
filetype = result.get("filetype")
|
279 |
if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
|
280 |
filetype = result["metadata"].get("filetype")
|
281 |
-
|
282 |
if "text" in result and filetype == "csv":
|
283 |
try:
|
284 |
text_lines = result["text"].strip().split('\n')
|
@@ -287,13 +246,10 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
287 |
if ',' in first_line:
|
288 |
first_column = first_line.split(',')[0].strip()
|
289 |
source_info["id"] = first_column
|
290 |
-
# logger.debug(f"CSV ์์ค ID ์ถ์ถ: {first_column} from {source_info['source']}")
|
291 |
except Exception as e:
|
292 |
logger.warning(f"CSV ์์ค ID ์ถ์ถ ์คํจ ({source_info.get('source')}): {e}")
|
293 |
-
|
294 |
sources.append(source_info)
|
295 |
|
296 |
-
# ์ต์ข
์๋ต
|
297 |
response_data = {
|
298 |
"answer": answer,
|
299 |
"sources": sources,
|
@@ -305,12 +261,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
305 |
logger.error(f"์ฑํ
์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
306 |
return jsonify({"error": f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}), 500
|
307 |
|
308 |
-
# --- Voice Chat API (
|
309 |
@app.route('/api/voice', methods=['POST'])
|
310 |
@login_required
|
311 |
def voice_chat():
|
312 |
"""์์ฑ ์ฑ API ์๋ํฌ์ธํธ"""
|
313 |
-
# ํ์ ์ปดํฌ๋ํธ ํ์ธ
|
314 |
if retriever is None or not hasattr(retriever, 'search'):
|
315 |
logger.error("์์ฑ API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์")
|
316 |
return jsonify({"error": "๊ฒ์ ์์ง ์ค๋น ์๋จ"}), 503
|
@@ -334,16 +289,12 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
334 |
logger.info(f"์์ ๋ ์ค๋์ค ํ์ผ: {audio_file.filename} ({audio_file.content_type})")
|
335 |
|
336 |
try:
|
337 |
-
# ์ค๋์ค ํ์ผ ์์ ์ ์ฅ ๋ฐ ์ฒ๋ฆฌ
|
338 |
with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
|
339 |
audio_file.save(temp_audio.name)
|
340 |
logger.info(f"์ค๋์ค ํ์ผ์ ์์ ์ ์ฅ: {temp_audio.name}")
|
341 |
-
|
342 |
-
# STT ์ํ (๋ฐ์ดํธ ๋๋ ๊ฒฝ๋ก ์ ๋ฌ)
|
343 |
-
# ์: ๋ฐ์ดํธ ์ ๋ฌ
|
344 |
with open(temp_audio.name, 'rb') as f_bytes:
|
345 |
audio_bytes = f_bytes.read()
|
346 |
-
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
|
347 |
|
348 |
if not isinstance(stt_result, dict) or not stt_result.get("success"):
|
349 |
error_msg = stt_result.get("error", "์ ์ ์๋ STT ์ค๋ฅ") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ์ ์ค๋ฅ"
|
@@ -353,21 +304,19 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
353 |
transcription = stt_result.get("text", "")
|
354 |
if not transcription:
|
355 |
logger.warning("์์ฑ์ธ์ ๊ฒฐ๊ณผ๊ฐ ๋น์ด์์ต๋๋ค.")
|
356 |
-
# ๋น ํ
์คํธ๋ผ๋ ์๋ต ๊ตฌ์กฐ๋ ์ ์ง
|
357 |
return jsonify({
|
358 |
"transcription": "",
|
359 |
"answer": "์์ฑ์์ ํ
์คํธ๋ฅผ ์ธ์ํ์ง ๋ชปํ์ต๋๋ค.",
|
360 |
"sources": [],
|
361 |
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
|
362 |
-
}), 200
|
363 |
|
364 |
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...")
|
365 |
|
366 |
-
# --- ์ดํ ๋ก์ง์ /api/chat๊ณผ ๋์ผ ---
|
367 |
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
|
368 |
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
369 |
|
370 |
-
llm_id = request.form.get('llm_id', None)
|
371 |
if not context:
|
372 |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
373 |
logger.info("์ปจํ
์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
@@ -375,7 +324,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
375 |
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
|
376 |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
377 |
|
378 |
-
# ์์ค ์ ๋ณด ์ถ์ถ (chat API์ ๋์ผ ๋ก์ง ์ฌ์ฉ)
|
379 |
sources = []
|
380 |
if search_results:
|
381 |
for result in search_results:
|
@@ -402,7 +350,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
402 |
logger.warning(f"[์์ฑ์ฑ] CSV ์์ค ID ์ถ์ถ ์คํจ ({source_info.get('source')}): {e}")
|
403 |
sources.append(source_info)
|
404 |
|
405 |
-
# ์ต์ข
์๋ต
|
406 |
response_data = {
|
407 |
"transcription": transcription,
|
408 |
"answer": answer,
|
@@ -415,12 +362,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
415 |
logger.error(f"์์ฑ ์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
416 |
return jsonify({"error": "์์ฑ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์", "details": str(e)}), 500
|
417 |
|
418 |
-
# --- Document Upload API (
|
419 |
@app.route('/api/upload', methods=['POST'])
|
420 |
@login_required
|
421 |
def upload_document():
|
422 |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ์
๋ก๋ API"""
|
423 |
-
# base_retriever ๊ฐ์ฒด ๋ฐ ํ์ ๋ฉ์๋ ํ์ธ
|
424 |
if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
|
425 |
logger.error("๋ฌธ์ ์
๋ก๋ API ์์ฒญ ์ base_retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
426 |
return jsonify({"error": "๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ์ค๋น๋์ง ์์์ต๋๋ค."}), 503
|
@@ -429,28 +375,25 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
429 |
return jsonify({"error": "๋ฌธ์ ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
430 |
|
431 |
doc_file = request.files['document']
|
432 |
-
if not doc_file or not doc_file.filename:
|
433 |
return jsonify({"error": "์ ํ๋ ํ์ผ์ด ์์ต๋๋ค."}), 400
|
434 |
|
435 |
if not allowed_doc_file(doc_file.filename):
|
436 |
-
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
437 |
logger.warning(f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์: {doc_file.filename}")
|
438 |
return jsonify({"error": f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์์
๋๋ค. ํ์ฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
|
439 |
|
440 |
try:
|
441 |
filename = secure_filename(doc_file.filename)
|
442 |
-
|
443 |
-
|
444 |
-
os.makedirs(data_folder, exist_ok=True) # ํด๋ ์์ผ๋ฉด ์์ฑ
|
445 |
filepath = os.path.join(data_folder, filename)
|
446 |
|
447 |
doc_file.save(filepath)
|
448 |
logger.info(f"๋ฌธ์ ์ ์ฅ ์๋ฃ: {filepath}")
|
449 |
|
450 |
-
# ๋ฌธ์ ์ฒ๋ฆฌ (DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ)
|
451 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
|
452 |
logger.error("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
453 |
-
# ์ด๋ฏธ ์ ์ฅ๋ ํ์ผ ์ญ์ ๊ณ ๋ ค
|
454 |
try: os.remove(filepath)
|
455 |
except OSError: pass
|
456 |
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500
|
@@ -460,7 +403,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
460 |
metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
|
461 |
docs = []
|
462 |
|
463 |
-
# ํ
์คํธ ๊ธฐ๋ฐ ํ์ผ ์ฝ๊ธฐ (์ธ์ฝ๋ฉ ์ฒ๋ฆฌ ํฌํจ)
|
464 |
if file_ext in ['txt', 'md', 'csv']:
|
465 |
try:
|
466 |
with open(filepath, 'r', encoding='utf-8') as f:
|
@@ -476,79 +418,63 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
476 |
except Exception as e_read:
|
477 |
logger.error(f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ ({filename}): {e_read}")
|
478 |
return jsonify({"error": f"ํ์ผ ์ฝ๊ธฐ ์ค ์ค๋ฅ ๋ฐ์: {str(e_read)}"}), 500
|
479 |
-
# PDF/DOCX ์ฒ๋ฆฌ ๋ก์ง (๋ณ๋ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ํ์)
|
480 |
elif file_ext == 'pdf':
|
481 |
logger.warning("PDF ์ฒ๋ฆฌ๋ ๊ตฌํ๋์ง ์์์ต๋๋ค.")
|
482 |
-
# content = extract_text_from_pdf(filepath) # ์์
|
483 |
elif file_ext == 'docx':
|
484 |
logger.warning("DOCX ์ฒ๋ฆฌ๋ ๊ตฌํ๋์ง ์์์ต๋๋ค.")
|
485 |
-
# content = extract_text_from_docx(filepath) # ์์
|
486 |
|
487 |
-
|
488 |
-
if content is not None: # ๋ด์ฉ์ด ์ฑ๊ณต์ ์ผ๋ก ์ฝํ์ ๋๋ง
|
489 |
if file_ext == 'csv':
|
490 |
logger.info(f"CSV ํ์ผ ์ฒ๋ฆฌ ์์: {filename}")
|
491 |
docs = DocumentProcessor.csv_to_documents(content, metadata)
|
492 |
-
elif file_ext in ['txt', 'md']:
|
493 |
logger.info(f"ํ
์คํธ ๋ฌธ์ ์ฒ๋ฆฌ ์์: {filename}")
|
494 |
docs = DocumentProcessor.text_to_documents(
|
495 |
content, metadata=metadata,
|
496 |
-
chunk_size=512, chunk_overlap=50
|
497 |
)
|
498 |
-
# PDF/DOCX์์ ์ถ์ถ๋ content ์ฒ๋ฆฌ ๋ก์ง ์ถ๊ฐ ๊ฐ๋ฅ
|
499 |
|
500 |
-
# ๊ฒ์๊ธฐ์ ์ถ๊ฐ ๋ฐ ์ ์ฅ
|
501 |
if docs:
|
502 |
logger.info(f"{len(docs)}๊ฐ ๋ฌธ์ ์ฒญํฌ๋ฅผ ๊ฒ์๊ธฐ์ ์ถ๊ฐํฉ๋๋ค...")
|
503 |
base_retriever.add_documents(docs)
|
504 |
|
505 |
logger.info(f"๊ฒ์๊ธฐ ์ํ๋ฅผ ์ ์ฅํฉ๋๋ค...")
|
506 |
-
index_path = app.config.get('INDEX_PATH', os.path.join(data_folder, 'index'))
|
507 |
-
os.makedirs(os.path.dirname(index_path), exist_ok=True)
|
508 |
try:
|
509 |
base_retriever.save(index_path)
|
510 |
logger.info("์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ")
|
511 |
-
# TODO: ์ฌ์์ํ ๊ฒ์๊ธฐ(retriever) ์
๋ฐ์ดํธ ๋ก์ง ํ์ ์ ์ถ๊ฐ
|
512 |
return jsonify({
|
513 |
"success": True,
|
514 |
"message": f"ํ์ผ '{filename}' ์
๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ ({len(docs)}๊ฐ ์ฒญํฌ ์ถ๊ฐ)."
|
515 |
})
|
516 |
except Exception as e_save:
|
517 |
logger.error(f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {e_save}", exc_info=True)
|
518 |
-
# ์ ์ฅ ์คํจ ์ ์ถ๊ฐ๋ ๋ฌธ์ ๋กค๋ฐฑ ๊ณ ๋ ค?
|
519 |
return jsonify({"error": f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ: {str(e_save)}"}), 500
|
520 |
else:
|
521 |
logger.warning(f"ํ์ผ '{filename}'์์ ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์
๋๋ค.")
|
522 |
-
# ํ์ผ์ ์ ์ฅ๋์์ผ๋ฏ๋ก warning ๋ฐํ
|
523 |
return jsonify({
|
524 |
-
"warning": True,
|
525 |
"message": f"ํ์ผ '{filename}'์ด ์ ์ฅ๋์์ง๋ง ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์
๋๋ค."
|
526 |
})
|
527 |
|
528 |
except Exception as e:
|
529 |
logger.error(f"ํ์ผ ์
๋ก๋ ๋๋ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
530 |
-
# ์ค๋ฅ ๋ฐ์ ์ ์ ์ฅ๋ ํ์ผ ์ญ์ ๊ณ ๋ ค
|
531 |
if 'filepath' in locals() and os.path.exists(filepath):
|
532 |
try: os.remove(filepath)
|
533 |
except OSError: pass
|
534 |
return jsonify({"error": f"ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
535 |
|
536 |
-
# --- Document List API (
|
537 |
@app.route('/api/documents', methods=['GET'])
|
538 |
@login_required
|
539 |
def list_documents():
|
540 |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ๋ชฉ๋ก API"""
|
541 |
-
#
|
542 |
-
|
543 |
-
# 503 ์ค๋ฅ๋ ์ด ํจ์ ์คํ *์ * ๋จ๊ณ(์: ๋ค๋ฅธ ๋ฐ์ฝ๋ ์ดํฐ, ๋ฏธ๋ค์จ์ด, Flask ๋ด๋ถ ์ค๋ฅ)
|
544 |
-
# ๋๋ base_retriever ์ ๊ทผ ์ ๋ฐ์ํ๋ ์์ธ ์ฒ๋ฆฌ ๊ณผ์ ์์ ๋์ฌ ๊ฐ๋ฅ์ฑ ์์.
|
545 |
-
|
546 |
-
logger.info("๋ฌธ์ ๋ชฉ๋ก API ์์ฒญ ์์") # ๋ก๊ทธ ์ถ๊ฐ
|
547 |
|
548 |
-
# base_retriever ์ํ ์์ธ ๋ก๊น
|
549 |
if base_retriever is None:
|
550 |
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever๊ฐ None์
๋๋ค.")
|
551 |
-
# 503 ๋์ ๋น ๋ชฉ๋ก ๋ฐํ (์๋๋ ๋์)
|
552 |
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
|
553 |
elif not hasattr(base_retriever, 'documents'):
|
554 |
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever์ 'documents' ์์ฑ์ด ์์ต๋๋ค.")
|
@@ -560,42 +486,34 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
560 |
logger.info(f"base_retriever.documents ํ์
: {type(base_retriever.documents)}")
|
561 |
logger.info(f"base_retriever.documents ๊ธธ์ด: {len(base_retriever.documents) if isinstance(base_retriever.documents, list) else 'N/A'}")
|
562 |
|
563 |
-
|
564 |
try:
|
565 |
sources = {}
|
566 |
total_chunks = 0
|
567 |
-
doc_list = base_retriever.documents
|
568 |
|
569 |
if not isinstance(doc_list, list):
|
570 |
logger.error(f"base_retriever.documents๊ฐ ๋ฆฌ์คํธ๊ฐ ์๋: {type(doc_list)}")
|
571 |
-
# ์ด ๊ฒฝ์ฐ 500 ์ค๋ฅ๋ฅผ ๋ฐํํ๊ฑฐ๋ ๋น ๋ชฉ๋ก ๋ฐํ
|
572 |
return jsonify({"error": "๋ด๋ถ ๋ฐ์ดํฐ ๊ตฌ์กฐ ์ค๋ฅ"}), 500
|
573 |
|
574 |
logger.info(f"์ด {len(doc_list)}๊ฐ ๋ฌธ์ ์ฒญํฌ์์ ์์ค ๋ชฉ๋ก ์์ฑ ์ค...")
|
575 |
for i, doc in enumerate(doc_list):
|
576 |
-
# ๊ฐ ๋ฌธ์ ์ฒญํฌ ์ฒ๋ฆฌ ๋ก๊น
์ถ๊ฐ
|
577 |
-
# logger.debug(f"์ฒ๋ฆฌ ์ค์ธ ์ฒญํฌ {i}: {doc}") # ๋๋ฌด ์์ธํ๋ฉด ์ฃผ์ ์ฒ๋ฆฌ
|
578 |
-
|
579 |
if not isinstance(doc, dict):
|
580 |
logger.warning(f"์ฒญํฌ {i}๊ฐ ๋์
๋๋ฆฌ ํ์
์ด ์๋: {type(doc)}")
|
581 |
-
continue
|
582 |
|
583 |
-
# ์์ค ์ ๋ณด ์ถ์ถ (๊ธฐ์กด ๋ก์ง ๊ฐ์ )
|
584 |
source = "unknown"
|
585 |
-
metadata = doc.get("metadata")
|
586 |
if isinstance(metadata, dict):
|
587 |
source = metadata.get("source", "unknown")
|
588 |
-
if source == "unknown":
|
589 |
source = doc.get("source", "unknown")
|
590 |
|
591 |
if source != "unknown":
|
592 |
if source in sources:
|
593 |
sources[source]["chunks"] += 1
|
594 |
else:
|
595 |
-
# ๋ฉํ๋ฐ์ดํฐ ์ฐ์ ์ฌ์ฉ
|
596 |
filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
|
597 |
filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
|
598 |
-
# ๋ฉํ๋ฐ์ดํฐ ์์ผ๋ฉด doc ์์ฒด์์ ์ฐพ๊ธฐ
|
599 |
if filename == source and "filename" in doc: filename = doc["filename"]
|
600 |
if filetype == "unknown" and "filetype" in doc: filetype = doc["filetype"]
|
601 |
|
@@ -608,10 +526,8 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
608 |
else:
|
609 |
logger.warning(f"์ฒญํฌ {i}์์ ์์ค ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์: {doc}")
|
610 |
|
611 |
-
|
612 |
-
# ๋ชฉ๋ก ํ์ ๋ณํ ๋ฐ ์ ๋ ฌ (๋ณ๊ฒฝ ์์)
|
613 |
documents = [{"source": src, **info} for src, info in sources.items()]
|
614 |
-
documents.sort(key=lambda x: x.get("filename", ""), reverse=False)
|
615 |
|
616 |
logger.info(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์๋ฃ: {len(documents)}๊ฐ ์์ค ํ์ผ, {total_chunks}๊ฐ ์ฒญํฌ")
|
617 |
return jsonify({
|
@@ -621,51 +537,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
|
|
621 |
})
|
622 |
|
623 |
except Exception as e:
|
624 |
-
#
|
625 |
logger.error(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ฌ๊ฐํ ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
626 |
-
# ์ผ๋ฐ์ ์ธ ๋ด๋ถ ์ค๋ฅ๋ 500 ๋ฐํ
|
627 |
return jsonify({"error": f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
628 |
-
|
629 |
```
|
630 |
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
* ์ ๊ณต๋ `list_documents` ํจ์ ์ฝ๋ ์์ฒด์๋ `app_ready` ์ํ๋ `base_retriever`๊ฐ `None`์ธ ์ํ๋ฅผ ํ์ธํ์ฌ 503 ์ค๋ฅ๋ฅผ ๋ฐํํ๋ ๋ก์ง์ด **์์ต๋๋ค.** ๋ก๊ทธ์์ 503์ด ๋ฐ์ํ๋ค๋ฉด, ์์ธ์ ๋ค์ ์ค ํ๋์ผ ๊ฐ๋ฅ์ฑ์ด ๋์ต๋๋ค:
|
635 |
-
* **์ค์ ์คํ ์ค์ธ ์ฝ๋ ๋ถ์ผ์น:** ํ์ฌ ์๋ฒ์์ ์คํ ์ค์ธ ์ฝ๋๊ฐ ์ ๊ณตํด์ฃผ์ ์ฝ๋์ ๋ค๋ฅผ ์ ์์ต๋๋ค. (์: ์ด์ ๋ฒ์ ์ `if not app_ready: return ..., 503` ์ฝ๋๊ฐ ๋จ์์์)
|
636 |
-
* **`base_retriever` ์ ๊ทผ ์ค๋ฅ:** `base_retriever.documents` ์์ฑ์ ์ ๊ทผํ๋ ๊ณผ์ ์์ ์๊ธฐ์น ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ๊ณ , Flask์ ์ ์ญ ์ค๋ฅ ํธ๋ค๋ฌ๋ ํน์ ๋ฏธ๋ค์จ์ด๊ฐ ์ด๋ฅผ 503์ผ๋ก ์ฒ๋ฆฌํ ์ ์์ต๋๋ค. (์ผ๋ฐ์ ์ผ๋ก๋ 500 Internal Server Error๊ฐ ๋ฐํ๋ฉ๋๋ค.)
|
637 |
-
* **์ธ๋ถ ์์ธ:** ์น ์๋ฒ(Nginx ๋ฑ) ์ค์ ์ด๋ ๋ก๋ ๋ฐธ๋ฐ์ ๋ฑ Flask ์ ํ๋ฆฌ์ผ์ด์
์๋จ์ ๋ค๋ฅธ ์์คํ
์์ 503 ์ค๋ฅ๋ฅผ ๋ฐํํ ์๋ ์์ต๋๋ค.
|
638 |
-
* **์์ ์ ์:**
|
639 |
-
* `list_documents` ํจ์ ์์ ๋ถ๋ถ๊ณผ `try...except` ๋ธ๋ก ๋ด๋ถ์ **๋ ์์ธํ ๋ก๊ทธ**๋ฅผ ์ถ๊ฐํ์ฌ ํจ์ ์คํ ํ๋ฆ๊ณผ `base_retriever` ๊ฐ์ฒด ์ํ๋ฅผ ๋ช
ํํ ํ์
ํฉ๋๋ค. (์ ์ฝ๋์ ๋ก๊น
์ถ๊ฐ๋จ)
|
640 |
-
* ์คํ ์ค์ธ ์ฝ๋๊ฐ ์ต์ ๋ฒ์ ์ธ์ง ๋ค์ ํ์ธํฉ๋๋ค.
|
641 |
-
* `base_retriever` ๊ฐ์ฒด ์์ฒด (`VectorRetriever` ํด๋์ค)์ `documents` ์์ฑ ๊ตฌํ์ ํ์ธํฉ๋๋ค.
|
642 |
-
|
643 |
-
2. **`app_ready` ์ํ ๊ด๋ฆฌ ๋ฐ ์ฌ์ฉ:**
|
644 |
-
* `register_routes` ํจ์๋ ์ฑ ์์ ์ ํ ๋ฒ๋ง ํธ์ถ๋๋ฏ๋ก, ์ธ์๋ก ์ ๋ฌ๋ `app_ready` ๊ฐ์ **ํธ์ถ ์์ ์ ์ค๋
์ท**์
๋๋ค. ๋ฐฑ๊ทธ๋ผ์ด๋ ์ค๋ ๋๊ฐ ๋์ค์ `app.py`์ ์ ์ญ `app_ready` ๊ฐ์ ๋ณ๊ฒฝํด๋ `register_routes` ๋ด๋ถ์ ์ง์ญ ๋ณ์ `app_ready` (์ฝ๋์์๋ `app_ready_flag`๋ก ๋ช
์นญ ๋ณ๊ฒฝ ์ ์)๋ ์
๋ฐ์ดํธ๋์ง ์์ต๋๋ค.
|
645 |
-
* `index` ํจ์ ๋ด์์ `nonlocal app_ready` ์ฌ์ฉ์ ์๋ชป๋์์ต๋๋ค. `app_ready`๋ ์ ์ญ ๋ณ์์ด๋ฏ๋ก `global app_ready`๋ฅผ ์ฌ์ฉํ๊ฑฐ๋, ๋ ์ข์ ๋ฐฉ๋ฒ์ Flask์ `app.before_request` ๋ฐ์ฝ๋ ์ดํฐ๋ `g` ๊ฐ์ฒด๋ฅผ ์ฌ์ฉํ์ฌ ์์ฒญ ์ปจํ
์คํธ ๋ด์์ ์ํ๋ฅผ ํ์ธํ๋ ๊ฒ์
๋๋ค. ํน์ `threading.Event` ๊ฐ์ฒด๋ฅผ ์ฌ์ฉํ์ฌ ์ค๋ ๋ ๊ฐ ์ํ๋ฅผ ์์ ํ๊ฒ ๊ณต์ ํ ์ ์์ต๋๋ค.
|
646 |
-
* `index` ํจ์์ 30์ด ๊ฐ์ Ready ๋ก์ง์ `os.path.getmtime(__file__)`์ ์ฌ์ฉํ๋๋ฐ, ์ด๋ ํ์ผ ์์ ์๊ฐ์ ๊ธฐ์ค์ผ๋ก ํ๋ฏ๋ก ์ฑ์ ์ค์ ์์ ์๊ฐ๊ณผ ๋ฌ๋ผ ๋ถ์ ํํฉ๋๋ค.
|
647 |
-
* **์์ ์ ์:**
|
648 |
-
* `app.py`์์ `app_ready`๋ฅผ `threading.Event` ๊ฐ์ฒด๋ก ๊ด๋ฆฌํ๊ณ , ์ด๋ฅผ `register_routes`์ ์ ๋ฌํฉ๋๋ค. ๊ฐ ๋ผ์ฐํธ ํธ๋ค๋ฌ์์๋ `app_ready_event.is_set()`์ผ๋ก ์ํ๋ฅผ ํ์ธํฉ๋๋ค. (์ ์ฝ๋์ ๋ฐ์๋จ)
|
649 |
-
* ์ฑ ์์ ์๊ฐ์ ๋ชจ๋ ๋ก๋ ์์ ์ `time.time()`์ผ๋ก ๊ธฐ๋กํ๊ณ , `index` ํจ์์์ ์ด๋ฅผ ์ฌ์ฉํ์ฌ ๊ฒฝ๊ณผ ์๊ฐ์ ๊ณ์ฐํฉ๋๋ค. (์ ์ฝ๋์ ๋ฐ์๋จ)
|
650 |
-
* 30์ด ๊ฐ์ Ready ๋ก์ง์ ์ฃผ์ ์ฒ๋ฆฌํ๊ฑฐ๋ ์ ๊ฑฐํ๋ ๊ฒ์ ๊ถ์ฅํฉ๋๋ค. ์ด๊ธฐํ๊ฐ ์ค๋ ๊ฑธ๋ฆฌ๋ ๊ทผ๋ณธ ์์ธ์ ํด๊ฒฐํ๋ ๊ฒ์ด ์ข์ต๋๋ค.
|
651 |
-
|
652 |
-
3. **๊ฐ์ฒด ๋ฐ ์์ฑ ์กด์ฌ ์ฌ๋ถ ํ์ธ (๋ฐฉ์ด ์ฝ๋):**
|
653 |
-
* `llm_interface`, `retriever`, `stt_client`, `DocumentProcessor`, `base_retriever` ๋ฑ์ ๊ฐ์ฒด๊ฐ None์ด๊ฑฐ๋ ํ์ํ ๋ฉ์๋/์์ฑ(`search`, `transcribe_audio`, `add_documents`, `documents` ๋ฑ)์ด ์์ ๊ฒฝ์ฐ `AttributeError`๋ `TypeError`๊ฐ ๋ฐ์ํ ์ ์์ต๋๋ค.
|
654 |
-
* **์์ ์ ์:** ๊ฐ API ํธ๋ค๋ฌ ์์ ๋ถ๋ถ์ด๋ ๊ฐ์ฒด ์ฌ์ฉ ์ง์ ์ ํด๋น ๊ฐ์ฒด์ ํ์ํ ์์ฑ/๋ฉ์๋๊ฐ ์กด์ฌํ๋์ง ํ์ธํ๋ ๋ฐฉ์ด ์ฝ๋๋ฅผ ์ถ๊ฐํฉ๋๋ค. (์ ์ฝ๋์ ์ผ๋ถ ๋ฐ์๋จ)
|
655 |
-
|
656 |
-
4. **์ค๋ฅ ๋ก๊น
:**
|
657 |
-
* `except Exception as e:` ๋ธ๋ก์์ `logger.error(f"...", exc_info=True)`๋ฅผ ์ฌ์ฉํ์ฌ ์คํ ํธ๋ ์ด์ค ์ ์ฒด๋ฅผ ๋ก๊น
ํ๋ฉด ๋๋ฒ๊น
์ ๋ ์ ์ฉํฉ๋๋ค.
|
658 |
-
* **์์ ์ ์:** ์ฃผ์ `except` ๋ธ๋ก์ `exc_info=True`๋ฅผ ์ถ๊ฐํฉ๋๋ค. (์ ์ฝ๋์ ๋ฐ์๋จ)
|
659 |
-
|
660 |
-
5. **`/api/documents` ๋ก์ง ๊ฐ์ :**
|
661 |
-
* `base_retriever.documents`๊ฐ Langchain์ `Document` ๊ฐ์ฒด ๋ฆฌ์คํธ์ผ ๊ฒฝ์ฐ, `source` ๋ฑ์ ์ ๋ณด๋ `doc.metadata['source']` ์ ๊ฐ์ด ์ ๊ทผํด์ผ ํ ์ ์์ต๋๋ค. ํ์ฌ ์ฝ๋๋ ๋์
๋๋ฆฌ์ Langchain `Document` ๊ตฌ์กฐ๋ฅผ ํผ์ฉํ์ฌ ์ฒ๋ฆฌํ๋ ค๊ณ ์๋ํ๊ณ ์์ต๋๋ค. `base_retriever.documents`์ ์ค์ ๋ฐ์ดํฐ ๊ตฌ์กฐ๋ฅผ ๋ช
ํํ ํ๊ณ ๊ทธ์ ๋ง๊ฒ ์ฝ๋๋ฅผ ์์ ํด์ผ ํฉ๋๋ค.
|
662 |
-
* ๋ฌธ์ ๋ชฉ๋ก ์ ๋ ฌ ๊ธฐ์ค์ ํ์ผ๋ช
(`filename`)์ผ๋ก ๋ณ๊ฒฝํ๋ ๊ฒ์ด ๋ ์ง๊ด์ ์ผ ์ ์์ต๋๋ค.
|
663 |
-
* **์์ ์ ์:** `base_retriever.documents`์ ๊ตฌ์กฐ๋ฅผ ํ์ธํ๊ณ `source`, `filename`, `filetype` ์ถ์ถ ๋ก์ง์ ๋ช
ํํ ํฉ๋๋ค. ์ ๋ ฌ ๊ธฐ์ค์ `filename`์ผ๋ก ๋ณ๊ฒฝํ์ต๋๋ค. (์ ์ฝ๋ ์ฐธ์กฐ)
|
664 |
-
|
665 |
-
**์์ฝ ๋ฐ ๋ค์ ๋จ๊ณ:**
|
666 |
-
|
667 |
-
* `/api/documents`์ 503 ์ค๋ฅ๋ ํ์ฌ ์ฝ๋๋ง์ผ๋ก๋ ์ค๋ช
ํ๊ธฐ ์ด๋ ต์ต๋๋ค. **์คํ ํ๊ฒฝ์ ์ฝ๋ ๋ฒ์ ํ์ธ** ๋ฐ **์์ธ ๋ก๊น
์ถ๊ฐ**๋ฅผ ํตํด ์์ธ์ ์ถ์ ํด์ผ ํฉ๋๋ค.
|
668 |
-
* `app_ready` ์ํ ๊ด๋ฆฌ ๋ฐฉ์์ `threading.Event` ๋ฑ์ผ๋ก ๊ฐ์ ํ๊ณ , `index` ํจ์์ ์๊ฐ ๊ณ์ฐ ๋ก์ง์ ์์ ํ๋ ๊ฒ์ด ์ข์ต๋๋ค.
|
669 |
-
* ์ฝ๋ ์ ๋ฐ์ ๊ฑธ์ณ ๊ฐ์ฒด ๋ฐ ์์ฑ ์กด์ฌ ์ฌ๋ถ๋ฅผ ํ์ธํ๋ ๋ฐฉ์ด ์ฝ๋๋ฅผ ์ถ๊ฐํ๊ณ , ์ค๋ฅ ๋ก๊น
์ ๊ฐํํฉ๋๋ค.
|
670 |
-
|
671 |
-
**๊ฐ์ฅ ๋จผ์ ๋ธ๋ผ์ฐ์ ๊ฐ๋ฐ์ ๋๊ตฌ์ 'Network' ํญ์์ `/api/documents` ์์ฒญ์ ์๋ต(Response) ๋ณธ๋ฌธ์ ํน์ ๋ ์์ธํ ์ค๋ฅ ๋ฉ์์ง๊ฐ ์๋์ง ํ์ธํด ๋ณด์ธ์
|
|
|
1 |
"""
|
2 |
+
RAG ๊ฒ์ ์ฑ๋ด ์น ์ ํ๋ฆฌ์ผ์ด์
- API ๋ผ์ฐํธ ์ ์ (SyntaxError ์์ )
|
3 |
"""
|
4 |
|
5 |
import os
|
|
|
8 |
import tempfile
|
9 |
import requests
|
10 |
import time # ์ฑ ์์ ์๊ฐ ๊ธฐ๋ก ์ํด ์ถ๊ฐ
|
11 |
+
import threading # threading.Event ์ฌ์ฉ ์ํด ์ถ๊ฐ
|
12 |
from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
|
13 |
from datetime import datetime
|
14 |
from werkzeug.utils import secure_filename
|
|
|
33 |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
34 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
|
35 |
|
36 |
+
# --- ๋ก๊ทธ์ธ/๋ก๊ทธ์์ ๋ผ์ฐํธ (๋ณ๊ฒฝ ์์) ---
|
37 |
@app.route('/login', methods=['GET', 'POST'])
|
38 |
def login():
|
39 |
error = None
|
|
|
46 |
username = request.form.get('username', '')
|
47 |
password = request.form.get('password', '')
|
48 |
logger.info(f"์
๋ ฅ๋ ์ฌ์ฉ์๋ช
: {username}")
|
|
|
49 |
|
50 |
valid_username = ADMIN_USERNAME
|
51 |
valid_password = ADMIN_PASSWORD
|
52 |
logger.info(f"๊ฒ์ฆ์ฉ ์ฌ์ฉ์๋ช
: {valid_username}")
|
|
|
53 |
|
54 |
if username == valid_username and password == valid_password:
|
55 |
logger.info(f"๋ก๊ทธ์ธ ์ฑ๊ณต: {username}")
|
|
|
|
|
56 |
session.permanent = True
|
57 |
session['logged_in'] = True
|
58 |
session['username'] = username
|
|
|
|
|
59 |
logger.info(f"์ธ์
์ค์ ์๋ฃ: {session}")
|
|
|
60 |
redirect_to = next_url or url_for('index')
|
61 |
logger.info(f"๋ฆฌ๋๋ ์
๋์: {redirect_to}")
|
62 |
response = redirect(redirect_to)
|
|
|
63 |
logger.debug(f"๋ก๊ทธ์ธ ์๋ต ํค๋ (Set-Cookie ํ์ธ): {response.headers.getlist('Set-Cookie')}")
|
64 |
return response
|
65 |
else:
|
66 |
logger.warning("๋ก๊ทธ์ธ ์คํจ: ์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ ๋ถ์ผ์น")
|
|
|
67 |
error = '์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ๊ฐ ์ฌ๋ฐ๋ฅด์ง ์์ต๋๋ค.'
|
68 |
else: # GET ์์ฒญ
|
69 |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง GET ์์ฒญ")
|
70 |
+
if session.get('logged_in'):
|
71 |
logger.info("์ด๋ฏธ ๋ก๊ทธ์ธ๋ ์ฌ์ฉ์, ๋ฉ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
72 |
return redirect(url_for('index'))
|
73 |
|
|
|
78 |
@app.route('/logout')
|
79 |
def logout():
|
80 |
"""๋ก๊ทธ์์ ์ฒ๋ฆฌ"""
|
81 |
+
username = session.get('username', 'unknown')
|
82 |
+
if session.pop('logged_in', None):
|
83 |
session.pop('username', None)
|
|
|
84 |
logger.info(f"์ฌ์ฉ์ {username} ๋ก๊ทธ์์ ์ฒ๋ฆฌ ์๋ฃ. ํ์ฌ ์ธ์
: {session}")
|
85 |
else:
|
86 |
logger.warning("๋ก๊ทธ์ธ๋์ง ์์ ์ํ์์ ๋ก๊ทธ์์ ์๋")
|
87 |
|
88 |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
89 |
response = redirect(url_for('login'))
|
|
|
90 |
logger.debug(f"๋ก๊ทธ์์ ์๋ต ํค๋ (Set-Cookie ํ์ธ): {response.headers.getlist('Set-Cookie')}")
|
91 |
return response
|
92 |
|
93 |
+
# --- ๋ฉ์ธ ํ์ด์ง ๋ฐ ์ํ ํ์ธ (app_ready_flag ์ฌ์ฉ) ---
|
94 |
@app.route('/')
|
95 |
@login_required
|
96 |
def index():
|
97 |
"""๋ฉ์ธ ํ์ด์ง"""
|
98 |
+
# app_ready_flag๊ฐ Event ๊ฐ์ฒด์ธ์ง bool์ธ์ง ํ์ธํ๊ณ ์ํ ๊ฐ์ ธ์ค๊ธฐ
|
99 |
+
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
|
|
101 |
time_elapsed = time.time() - APP_START_TIME
|
102 |
|
|
|
|
|
|
|
|
|
|
|
103 |
if not is_ready:
|
104 |
logger.info(f"์ฑ์ด ์์ง ์ค๋น๋์ง ์์ ๋ก๋ฉ ํ์ด์ง ํ์ (๊ฒฝ๊ณผ ์๊ฐ: {time_elapsed:.1f}์ด)")
|
105 |
+
return render_template('loading.html') # 200 OK์ ๋ก๋ฉ ํ์ด์ง
|
|
|
|
|
106 |
|
107 |
logger.info("๋ฉ์ธ ํ์ด์ง ์์ฒญ")
|
108 |
return render_template('index.html')
|
|
|
116 |
logger.info(f"์ฑ ์ํ ํ์ธ ์์ฒญ: {'Ready' if is_ready else 'Not Ready'}")
|
117 |
return jsonify({"ready": is_ready})
|
118 |
|
119 |
+
# --- LLM API (๋ณ๊ฒฝ ์์) ---
|
120 |
@app.route('/api/llm', methods=['GET', 'POST'])
|
121 |
@login_required
|
122 |
def llm_api():
|
123 |
"""์ฌ์ฉ ๊ฐ๋ฅํ LLM ๋ชฉ๋ก ๋ฐ ์ ํ API"""
|
124 |
is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
|
125 |
+
# ์ด๊ธฐํ ์ค์๋ LLM ๋ชฉ๋ก ์กฐํ๋ ๊ฐ๋ฅํ๋๋ก ํ์ฉ
|
126 |
+
# if not is_ready:
|
127 |
+
# return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค..."}), 503
|
|
|
128 |
|
129 |
if request.method == 'GET':
|
130 |
logger.info("LLM ๋ชฉ๋ก ์์ฒญ")
|
131 |
try:
|
|
|
132 |
if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
|
133 |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ์์ฑ์ด ์์ต๋๋ค.")
|
134 |
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
|
|
144 |
"current_llm": current_details
|
145 |
})
|
146 |
except Exception as e:
|
147 |
+
logger.error(f"LLM ์ ๋ณด ์กฐํ ์ค๋ฅ: {e}", exc_info=True)
|
148 |
return jsonify({"error": "LLM ์ ๋ณด ์กฐํ ์ค ์ค๋ฅ ๋ฐ์"}), 500
|
149 |
|
150 |
elif request.method == 'POST':
|
|
|
156 |
logger.info(f"LLM ๋ณ๊ฒฝ ์์ฒญ: {llm_id}")
|
157 |
|
158 |
try:
|
|
|
159 |
if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
|
160 |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ์์ฑ/๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
161 |
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
|
|
179 |
logger.error(f"LLM ๋ณ๊ฒฝ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}", exc_info=True)
|
180 |
return jsonify({"error": f"LLM ๋ณ๊ฒฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"}), 500
|
181 |
|
182 |
+
# --- Chat API (๋ณ๊ฒฝ ์์) ---
|
183 |
@app.route('/api/chat', methods=['POST'])
|
184 |
@login_required
|
185 |
def chat():
|
186 |
"""ํ
์คํธ ๊ธฐ๋ฐ ์ฑ๋ด API"""
|
|
|
187 |
if retriever is None or not hasattr(retriever, 'search'):
|
188 |
logger.warning("์ฑํ
API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
|
|
189 |
return jsonify({
|
190 |
"answer": "์ฃ์กํฉ๋๋ค. ๊ฒ์ ์์ง์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.",
|
191 |
"sources": [],
|
192 |
+
"error": "Retriever not ready"
|
193 |
+
}), 200 # 503 ๋์ 200 OK
|
194 |
|
195 |
try:
|
196 |
data = request.get_json()
|
|
|
200 |
query = data['query']
|
201 |
logger.info(f"ํ
์คํธ ์ฟผ๋ฆฌ ์์ : {query[:100]}...")
|
202 |
|
203 |
+
search_results = retriever.search(query, top_k=5, first_stage_k=6)
|
|
|
204 |
|
|
|
205 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
|
206 |
logger.error("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ prepare_rag_context ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
207 |
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500
|
208 |
+
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
209 |
|
210 |
if not context:
|
211 |
logger.warning(f"์ฟผ๋ฆฌ '{query[:50]}...'์ ๋ํ ๊ฒ์ ๊ฒฐ๊ณผ ์์.")
|
|
|
|
|
212 |
|
213 |
+
llm_id = data.get('llm_id', None)
|
|
|
214 |
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
|
215 |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ rag_generate ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
216 |
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500
|
|
|
222 |
answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
|
223 |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
224 |
|
|
|
225 |
sources = []
|
226 |
if search_results:
|
227 |
for result in search_results:
|
228 |
if not isinstance(result, dict):
|
229 |
logger.warning(f"์์์น ๋ชปํ ๊ฒ์ ๊ฒฐ๊ณผ ํ์: {type(result)}")
|
230 |
continue
|
|
|
231 |
source_info = {}
|
232 |
+
source_key = result.get("source")
|
233 |
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
|
234 |
source_key = result["metadata"].get("source")
|
|
|
235 |
if source_key:
|
236 |
source_info["source"] = source_key
|
237 |
source_info["score"] = result.get("rerank_score", result.get("score", 0))
|
|
|
|
|
238 |
filetype = result.get("filetype")
|
239 |
if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
|
240 |
filetype = result["metadata"].get("filetype")
|
|
|
241 |
if "text" in result and filetype == "csv":
|
242 |
try:
|
243 |
text_lines = result["text"].strip().split('\n')
|
|
|
246 |
if ',' in first_line:
|
247 |
first_column = first_line.split(',')[0].strip()
|
248 |
source_info["id"] = first_column
|
|
|
249 |
except Exception as e:
|
250 |
logger.warning(f"CSV ์์ค ID ์ถ์ถ ์คํจ ({source_info.get('source')}): {e}")
|
|
|
251 |
sources.append(source_info)
|
252 |
|
|
|
253 |
response_data = {
|
254 |
"answer": answer,
|
255 |
"sources": sources,
|
|
|
261 |
logger.error(f"์ฑํ
์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
262 |
return jsonify({"error": f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}), 500
|
263 |
|
264 |
+
# --- Voice Chat API (๋ณ๊ฒฝ ์์) ---
|
265 |
@app.route('/api/voice', methods=['POST'])
|
266 |
@login_required
|
267 |
def voice_chat():
|
268 |
"""์์ฑ ์ฑ API ์๋ํฌ์ธํธ"""
|
|
|
269 |
if retriever is None or not hasattr(retriever, 'search'):
|
270 |
logger.error("์์ฑ API ์์ฒญ ์ retriever๊ฐ ์ค๋น๋์ง ์์")
|
271 |
return jsonify({"error": "๊ฒ์ ์์ง ์ค๋น ์๋จ"}), 503
|
|
|
289 |
logger.info(f"์์ ๋ ์ค๋์ค ํ์ผ: {audio_file.filename} ({audio_file.content_type})")
|
290 |
|
291 |
try:
|
|
|
292 |
with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
|
293 |
audio_file.save(temp_audio.name)
|
294 |
logger.info(f"์ค๋์ค ํ์ผ์ ์์ ์ ์ฅ: {temp_audio.name}")
|
|
|
|
|
|
|
295 |
with open(temp_audio.name, 'rb') as f_bytes:
|
296 |
audio_bytes = f_bytes.read()
|
297 |
+
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
|
298 |
|
299 |
if not isinstance(stt_result, dict) or not stt_result.get("success"):
|
300 |
error_msg = stt_result.get("error", "์ ์ ์๋ STT ์ค๋ฅ") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ์ ์ค๋ฅ"
|
|
|
304 |
transcription = stt_result.get("text", "")
|
305 |
if not transcription:
|
306 |
logger.warning("์์ฑ์ธ์ ๊ฒฐ๊ณผ๊ฐ ๋น์ด์์ต๋๋ค.")
|
|
|
307 |
return jsonify({
|
308 |
"transcription": "",
|
309 |
"answer": "์์ฑ์์ ํ
์คํธ๋ฅผ ์ธ์ํ์ง ๋ชปํ์ต๋๋ค.",
|
310 |
"sources": [],
|
311 |
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
|
312 |
+
}), 200
|
313 |
|
314 |
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...")
|
315 |
|
|
|
316 |
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
|
317 |
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
318 |
|
319 |
+
llm_id = request.form.get('llm_id', None)
|
320 |
if not context:
|
321 |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
322 |
logger.info("์ปจํ
์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ")
|
|
|
324 |
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
|
325 |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})")
|
326 |
|
|
|
327 |
sources = []
|
328 |
if search_results:
|
329 |
for result in search_results:
|
|
|
350 |
logger.warning(f"[์์ฑ์ฑ] CSV ์์ค ID ์ถ์ถ ์คํจ ({source_info.get('source')}): {e}")
|
351 |
sources.append(source_info)
|
352 |
|
|
|
353 |
response_data = {
|
354 |
"transcription": transcription,
|
355 |
"answer": answer,
|
|
|
362 |
logger.error(f"์์ฑ ์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
363 |
return jsonify({"error": "์์ฑ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์", "details": str(e)}), 500
|
364 |
|
365 |
+
# --- Document Upload API (๋ณ๊ฒฝ ์์) ---
|
366 |
@app.route('/api/upload', methods=['POST'])
|
367 |
@login_required
|
368 |
def upload_document():
|
369 |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ์
๋ก๋ API"""
|
|
|
370 |
if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
|
371 |
logger.error("๋ฌธ์ ์
๋ก๋ API ์์ฒญ ์ base_retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
372 |
return jsonify({"error": "๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ์ค๋น๋์ง ์์์ต๋๋ค."}), 503
|
|
|
375 |
return jsonify({"error": "๋ฌธ์ ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
376 |
|
377 |
doc_file = request.files['document']
|
378 |
+
if not doc_file or not doc_file.filename:
|
379 |
return jsonify({"error": "์ ํ๋ ํ์ผ์ด ์์ต๋๋ค."}), 400
|
380 |
|
381 |
if not allowed_doc_file(doc_file.filename):
|
382 |
+
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
|
383 |
logger.warning(f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์: {doc_file.filename}")
|
384 |
return jsonify({"error": f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์์
๋๋ค. ํ์ฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
|
385 |
|
386 |
try:
|
387 |
filename = secure_filename(doc_file.filename)
|
388 |
+
data_folder = app.config.get('DATA_FOLDER', os.path.join(os.path.dirname(__file__), '..', 'data'))
|
389 |
+
os.makedirs(data_folder, exist_ok=True)
|
|
|
390 |
filepath = os.path.join(data_folder, filename)
|
391 |
|
392 |
doc_file.save(filepath)
|
393 |
logger.info(f"๋ฌธ์ ์ ์ฅ ์๋ฃ: {filepath}")
|
394 |
|
|
|
395 |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
|
396 |
logger.error("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ๋ฉ์๋๊ฐ ์์ต๋๋ค.")
|
|
|
397 |
try: os.remove(filepath)
|
398 |
except OSError: pass
|
399 |
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500
|
|
|
403 |
metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
|
404 |
docs = []
|
405 |
|
|
|
406 |
if file_ext in ['txt', 'md', 'csv']:
|
407 |
try:
|
408 |
with open(filepath, 'r', encoding='utf-8') as f:
|
|
|
418 |
except Exception as e_read:
|
419 |
logger.error(f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ ({filename}): {e_read}")
|
420 |
return jsonify({"error": f"ํ์ผ ์ฝ๊ธฐ ์ค ์ค๋ฅ ๋ฐ์: {str(e_read)}"}), 500
|
|
|
421 |
elif file_ext == 'pdf':
|
422 |
logger.warning("PDF ์ฒ๋ฆฌ๋ ๊ตฌํ๋์ง ์์์ต๋๋ค.")
|
|
|
423 |
elif file_ext == 'docx':
|
424 |
logger.warning("DOCX ์ฒ๋ฆฌ๋ ๊ตฌํ๋์ง ์์์ต๋๋ค.")
|
|
|
425 |
|
426 |
+
if content is not None:
|
|
|
427 |
if file_ext == 'csv':
|
428 |
logger.info(f"CSV ํ์ผ ์ฒ๋ฆฌ ์์: {filename}")
|
429 |
docs = DocumentProcessor.csv_to_documents(content, metadata)
|
430 |
+
elif file_ext in ['txt', 'md']:
|
431 |
logger.info(f"ํ
์คํธ ๋ฌธ์ ์ฒ๋ฆฌ ์์: {filename}")
|
432 |
docs = DocumentProcessor.text_to_documents(
|
433 |
content, metadata=metadata,
|
434 |
+
chunk_size=512, chunk_overlap=50
|
435 |
)
|
|
|
436 |
|
|
|
437 |
if docs:
|
438 |
logger.info(f"{len(docs)}๊ฐ ๋ฌธ์ ์ฒญํฌ๋ฅผ ๊ฒ์๊ธฐ์ ์ถ๊ฐํฉ๋๋ค...")
|
439 |
base_retriever.add_documents(docs)
|
440 |
|
441 |
logger.info(f"๊ฒ์๊ธฐ ์ํ๋ฅผ ์ ์ฅํฉ๋๋ค...")
|
442 |
+
index_path = app.config.get('INDEX_PATH', os.path.join(data_folder, 'index'))
|
443 |
+
os.makedirs(os.path.dirname(index_path), exist_ok=True)
|
444 |
try:
|
445 |
base_retriever.save(index_path)
|
446 |
logger.info("์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ")
|
|
|
447 |
return jsonify({
|
448 |
"success": True,
|
449 |
"message": f"ํ์ผ '{filename}' ์
๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ ({len(docs)}๊ฐ ์ฒญํฌ ์ถ๊ฐ)."
|
450 |
})
|
451 |
except Exception as e_save:
|
452 |
logger.error(f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {e_save}", exc_info=True)
|
|
|
453 |
return jsonify({"error": f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ: {str(e_save)}"}), 500
|
454 |
else:
|
455 |
logger.warning(f"ํ์ผ '{filename}'์์ ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์
๋๋ค.")
|
|
|
456 |
return jsonify({
|
457 |
+
"warning": True,
|
458 |
"message": f"ํ์ผ '{filename}'์ด ์ ์ฅ๋์์ง๋ง ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์
๋๋ค."
|
459 |
})
|
460 |
|
461 |
except Exception as e:
|
462 |
logger.error(f"ํ์ผ ์
๋ก๋ ๋๋ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
|
|
463 |
if 'filepath' in locals() and os.path.exists(filepath):
|
464 |
try: os.remove(filepath)
|
465 |
except OSError: pass
|
466 |
return jsonify({"error": f"ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
467 |
|
468 |
+
# --- Document List API (SyntaxError ์ ๋ฐ ์ฃผ์ ์ ๊ฑฐ) ---
|
469 |
@app.route('/api/documents', methods=['GET'])
|
470 |
@login_required
|
471 |
def list_documents():
|
472 |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ๋ชฉ๋ก API"""
|
473 |
+
# SyntaxError๋ฅผ ์ ๋ฐํ๋ ์ค๋ช
์ฃผ์๋ค์ ์ ๊ฑฐํ์ต๋๋ค.
|
474 |
+
logger.info("๋ฌธ์ ๋ชฉ๋ก API ์์ฒญ ์์")
|
|
|
|
|
|
|
|
|
475 |
|
|
|
476 |
if base_retriever is None:
|
477 |
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever๊ฐ None์
๋๋ค.")
|
|
|
478 |
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
|
479 |
elif not hasattr(base_retriever, 'documents'):
|
480 |
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever์ 'documents' ์์ฑ์ด ์์ต๋๋ค.")
|
|
|
486 |
logger.info(f"base_retriever.documents ํ์
: {type(base_retriever.documents)}")
|
487 |
logger.info(f"base_retriever.documents ๊ธธ์ด: {len(base_retriever.documents) if isinstance(base_retriever.documents, list) else 'N/A'}")
|
488 |
|
|
|
489 |
try:
|
490 |
sources = {}
|
491 |
total_chunks = 0
|
492 |
+
doc_list = base_retriever.documents
|
493 |
|
494 |
if not isinstance(doc_list, list):
|
495 |
logger.error(f"base_retriever.documents๊ฐ ๋ฆฌ์คํธ๊ฐ ์๋: {type(doc_list)}")
|
|
|
496 |
return jsonify({"error": "๋ด๋ถ ๋ฐ์ดํฐ ๊ตฌ์กฐ ์ค๋ฅ"}), 500
|
497 |
|
498 |
logger.info(f"์ด {len(doc_list)}๊ฐ ๋ฌธ์ ์ฒญํฌ์์ ์์ค ๋ชฉ๋ก ์์ฑ ์ค...")
|
499 |
for i, doc in enumerate(doc_list):
|
|
|
|
|
|
|
500 |
if not isinstance(doc, dict):
|
501 |
logger.warning(f"์ฒญํฌ {i}๊ฐ ๋์
๋๋ฆฌ ํ์
์ด ์๋: {type(doc)}")
|
502 |
+
continue
|
503 |
|
|
|
504 |
source = "unknown"
|
505 |
+
metadata = doc.get("metadata")
|
506 |
if isinstance(metadata, dict):
|
507 |
source = metadata.get("source", "unknown")
|
508 |
+
if source == "unknown":
|
509 |
source = doc.get("source", "unknown")
|
510 |
|
511 |
if source != "unknown":
|
512 |
if source in sources:
|
513 |
sources[source]["chunks"] += 1
|
514 |
else:
|
|
|
515 |
filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
|
516 |
filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
|
|
|
517 |
if filename == source and "filename" in doc: filename = doc["filename"]
|
518 |
if filetype == "unknown" and "filetype" in doc: filetype = doc["filetype"]
|
519 |
|
|
|
526 |
else:
|
527 |
logger.warning(f"์ฒญํฌ {i}์์ ์์ค ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์: {doc}")
|
528 |
|
|
|
|
|
529 |
documents = [{"source": src, **info} for src, info in sources.items()]
|
530 |
+
documents.sort(key=lambda x: x.get("filename", ""), reverse=False)
|
531 |
|
532 |
logger.info(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์๋ฃ: {len(documents)}๊ฐ ์์ค ํ์ผ, {total_chunks}๊ฐ ์ฒญํฌ")
|
533 |
return jsonify({
|
|
|
537 |
})
|
538 |
|
539 |
except Exception as e:
|
540 |
+
# ์ฌ๊ธฐ์ ๋ฐ์ํ๋ ์์ธ๊ฐ 503์ผ๋ก ์ด์ด์ง ์ ์๋์ง ํ์ธ
|
541 |
logger.error(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ฌ๊ฐํ ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
|
|
542 |
return jsonify({"error": f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
|
|
543 |
```
|
544 |
|
545 |
+
์ด ์ฝ๋๋ก ์
๋ฐ์ดํธํ๋ฉด `SyntaxError` ์์ด ์ ํ๋ฆฌ์ผ์ด์
์ด ์ ์์ ์ผ๋ก ์์๋ ๊ฒ์
๋๋ค. ์ ํ๋ฆฌ์ผ์ด์
์ ๋ค์ ์์ํ๊ณ ๋ฌธ์ ๊ฐ ํด๊ฒฐ๋์๋์ง ํ์ธํด ๋ณด์ธ์.
|
546 |
+
|
547 |
+
๋ง์ฝ ์ด ์์ ํ์๋ '๋ฌธ์๊ด๋ฆฌ' ๋๋ '์ฅ์น๊ด๋ฆฌ' ํญ ๋ก๋ฉ ๋ฌธ์ ๊ฐ ๊ณ์๋๋ค๋ฉด, ์ด์ ์ ๋ง์๋๋ฆฐ ๋๋ก **๋ธ๋ผ์ฐ์ ๊ฐ๋ฐ์ ๋๊ตฌ์ ์ฝ์ ๋ฐ ๋คํธ์ํฌ ํญ ์ ๋ณด**๋ฅผ ํ์ธํ์ฌ ์ ํํ ์์ธ์ ํ์
ํด์ผ ํฉ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|