jeongsoo commited on
Commit
4bd974a
ยท
1 Parent(s): 21600e4
Files changed (1) hide show
  1. app/app_routes.py +104 -164
app/app_routes.py CHANGED
@@ -7,8 +7,8 @@ import json
7
  import logging
8
  import tempfile
9
  import requests
10
- import time # ์•ฑ ์‹œ์ž‘ ์‹œ๊ฐ„ ๊ธฐ๋ก ์œ„ํ•ด ์ถ”๊ฐ€
11
- import threading # threading.Event ์‚ฌ์šฉ ์œ„ํ•ด ์ถ”๊ฐ€
12
  from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
13
  from datetime import datetime
14
  from werkzeug.utils import secure_filename
@@ -19,8 +19,7 @@ logger = logging.getLogger(__name__)
19
  # ์•ฑ ์‹œ์ž‘ ์‹œ๊ฐ„ ๊ธฐ๋ก (๋ชจ๋“ˆ ๋กœ๋“œ ์‹œ์ )
20
  APP_START_TIME = time.time()
21
 
22
- # register_routes ํ•จ์ˆ˜ ์ธ์ž ๋ชฉ๋ก์—์„œ app_ready_flag ๋Œ€์‹  app_ready_event ์‚ฌ์šฉ ๊ฐ€์ •
23
- def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready_event, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL):
24
  """Flask ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜์— ๊ธฐ๋ณธ ๋ผ์šฐํŠธ ๋“ฑ๋ก"""
25
 
26
  # ํ—ฌํผ ํ•จ์ˆ˜ (๋ณ€๊ฒฝ ์—†์Œ)
@@ -34,7 +33,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
34
  ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
35
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
36
 
37
- # --- ๋กœ๊ทธ์ธ/๋กœ๊ทธ์•„์›ƒ ๋ผ์šฐํŠธ ---
38
  @app.route('/login', methods=['GET', 'POST'])
39
  def login():
40
  error = None
@@ -66,7 +65,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
66
  else:
67
  logger.warning("๋กœ๊ทธ์ธ ์‹คํŒจ: ์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ ๋ถˆ์ผ์น˜")
68
  error = '์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ๊ฐ€ ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค.'
69
- else: # GET ์š”์ฒญ
70
  logger.info("๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ GET ์š”์ฒญ")
71
  if session.get('logged_in'):
72
  logger.info("์ด๋ฏธ ๋กœ๊ทธ์ธ๋œ ์‚ฌ์šฉ์ž, ๋ฉ”์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
@@ -75,65 +74,62 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
75
  logger.info("---------- ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ๋ Œ๋”๋ง ----------")
76
  return render_template('login.html', error=error, next=next_url)
77
 
78
-
79
  @app.route('/logout')
80
  def logout():
81
  """๋กœ๊ทธ์•„์›ƒ ์ฒ˜๋ฆฌ"""
82
  username = session.get('username', 'unknown')
83
  if session.pop('logged_in', None):
84
- session.pop('username', None)
85
- logger.info(f"์‚ฌ์šฉ์ž {username} ๋กœ๊ทธ์•„์›ƒ ์ฒ˜๋ฆฌ ์™„๋ฃŒ. ํ˜„์žฌ ์„ธ์…˜: {session}")
86
  else:
87
- logger.warning("๋กœ๊ทธ์ธ๋˜์ง€ ์•Š์€ ์ƒํƒœ์—์„œ ๋กœ๊ทธ์•„์›ƒ ์‹œ๋„")
88
 
89
  logger.info("๋กœ๊ทธ์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
90
  response = redirect(url_for('login'))
91
  logger.debug(f"๋กœ๊ทธ์•„์›ƒ ์‘๋‹ต ํ—ค๋” (Set-Cookie ํ™•์ธ): {response.headers.getlist('Set-Cookie')}")
92
  return response
93
 
94
- # --- ๋ฉ”์ธ ํŽ˜์ด์ง€ ๋ฐ ์ƒํƒœ ํ™•์ธ (app_ready_event ์‚ฌ์šฉ) ---
95
  @app.route('/')
96
  @login_required
97
  def index():
98
  """๋ฉ”์ธ ํŽ˜์ด์ง€"""
99
- # app_ready_event๊ฐ€ Event ๊ฐ์ฒด์ธ์ง€ ํ™•์ธํ•˜๊ณ  ์ƒํƒœ ๊ฐ€์ ธ์˜ค๊ธฐ
100
- is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False # ๊ธฐ๋ณธ๊ฐ’ False
101
 
102
  time_elapsed = time.time() - APP_START_TIME
103
 
104
  if not is_ready:
105
  logger.info(f"์•ฑ์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•„ ๋กœ๋”ฉ ํŽ˜์ด์ง€ ํ‘œ์‹œ (๊ฒฝ๊ณผ ์‹œ๊ฐ„: {time_elapsed:.1f}์ดˆ)")
106
- # loading.html ํ…œํ”Œ๋ฆฟ์ด ์žˆ๋‹ค๊ณ  ๊ฐ€์ •
107
- return render_template('loading.html') # 200 OK์™€ ๋กœ๋”ฉ ํŽ˜์ด์ง€
108
 
109
  logger.info("๋ฉ”์ธ ํŽ˜์ด์ง€ ์š”์ฒญ")
110
- # index.html ํ…œํ”Œ๋ฆฟ์ด ์žˆ๋‹ค๊ณ  ๊ฐ€์ •
111
  return render_template('index.html')
112
 
113
-
114
  @app.route('/api/status')
115
  @login_required
116
  def app_status():
117
  """์•ฑ ์ดˆ๊ธฐํ™” ์ƒํƒœ ํ™•์ธ API"""
118
- is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
119
  logger.info(f"์•ฑ ์ƒํƒœ ํ™•์ธ ์š”์ฒญ: {'Ready' if is_ready else 'Not Ready'}")
120
  return jsonify({"ready": is_ready})
121
 
122
- # --- LLM API ---
123
  @app.route('/api/llm', methods=['GET', 'POST'])
124
  @login_required
125
  def llm_api():
126
  """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ LLM ๋ชฉ๋ก ๋ฐ ์„ ํƒ API"""
127
- # is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
128
- # LLM ๋ชฉ๋ก ์กฐํšŒ๋Š” ์ดˆ๊ธฐํ™” ์ค‘์—๋„ ๊ฐ€๋Šฅํ•˜๋„๋ก ํ—ˆ์šฉ
 
 
129
 
130
  if request.method == 'GET':
131
  logger.info("LLM ๋ชฉ๋ก ์š”์ฒญ")
132
  try:
133
- # ๊ฐ์ฒด ๋ฐ ์†์„ฑ ํ™•์ธ ๊ฐ•ํ™”
134
  if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
135
- logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ์†์„ฑ์ด ์—†์Šต๋‹ˆ๋‹ค.")
136
- return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
137
 
138
  current_details = llm_interface.get_current_llm_details()
139
  supported_llms_dict = llm_interface.SUPPORTED_LLMS
@@ -150,10 +146,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
150
  return jsonify({"error": "LLM ์ •๋ณด ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ"}), 500
151
 
152
  elif request.method == 'POST':
153
- is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
154
- if not is_ready: # LLM ๋ณ€๊ฒฝ์€ ์•ฑ ์ค€๋น„ ์™„๋ฃŒ ํ›„ ๊ฐ€๋Šฅ
155
- return jsonify({"error": "์•ฑ์ด ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."}), 503
156
-
157
  data = request.get_json()
158
  if not data or 'llm_id' not in data:
159
  return jsonify({"error": "LLM ID๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
@@ -162,10 +154,9 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
162
  logger.info(f"LLM ๋ณ€๊ฒฝ ์š”์ฒญ: {llm_id}")
163
 
164
  try:
165
- # ๊ฐ์ฒด ๋ฐ ์†์„ฑ/๋ฉ”์†Œ๋“œ ํ™•์ธ ๊ฐ•ํ™”
166
  if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
167
- logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ์†์„ฑ/๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
168
- return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
169
 
170
  if llm_id not in llm_interface.llm_clients:
171
  return jsonify({"error": f"์ง€์›๋˜์ง€ ์•Š๋Š” LLM ID: {llm_id}"}), 400
@@ -186,23 +177,18 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
186
  logger.error(f"LLM ๋ณ€๊ฒฝ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {e}", exc_info=True)
187
  return jsonify({"error": f"LLM ๋ณ€๊ฒฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"}), 500
188
 
189
- # --- Chat API ---
190
  @app.route('/api/chat', methods=['POST'])
191
  @login_required
192
  def chat():
193
  """ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์ฑ„๋ด‡ API"""
194
- is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
195
- if not is_ready:
196
- return jsonify({"error": "์•ฑ ์ดˆ๊ธฐํ™” ์ค‘...", "answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‹œ์Šคํ…œ์ด ์•„์ง ์ค€๋น„ ์ค‘์ž…๋‹ˆ๋‹ค.", "sources": []}), 503
197
-
198
- # retriever ๊ฐ์ฒด ๋ฐ ํ•„์ˆ˜ ๋ฉ”์†Œ๋“œ ํ™•์ธ
199
  if retriever is None or not hasattr(retriever, 'search'):
200
  logger.warning("์ฑ„ํŒ… API ์š”์ฒญ ์‹œ retriever๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
201
  return jsonify({
202
  "answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ฒ€์ƒ‰ ์—”์ง„์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.",
203
  "sources": [],
204
  "error": "Retriever not ready"
205
- }), 503 # ์„œ๋น„์Šค ๋ถˆ๊ฐ€ ์ƒํƒœ
206
 
207
  try:
208
  data = request.get_json()
@@ -212,25 +198,21 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
212
  query = data['query']
213
  logger.info(f"ํ…์ŠคํŠธ ์ฟผ๋ฆฌ ์ˆ˜์‹ : {query[:100]}...")
214
 
215
- # RAG ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰
216
  search_results = retriever.search(query, top_k=5, first_stage_k=6)
217
 
218
- # DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ”์†Œ๋“œ ํ™•์ธ
219
  if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
220
- logger.error("DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ prepare_rag_context ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
221
- return jsonify({"error": "๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์˜ค๋ฅ˜"}), 500
222
  context = DocumentProcessor.prepare_rag_context(search_results, field="text")
223
 
224
  if not context:
225
  logger.warning(f"์ฟผ๋ฆฌ '{query[:50]}...'์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ.")
226
 
227
- # LLM ์ธํ„ฐํŽ˜์ด์Šค ๊ฐ์ฒด ๋ฐ ๋ฉ”์†Œ๋“œ ํ™•์ธ
228
  llm_id = data.get('llm_id', None)
229
  if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
230
- logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ rag_generate ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
231
- return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
232
 
233
- # LLM ํ˜ธ์ถœ
234
  if not context:
235
  answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
236
  logger.info("์ปจํ…์ŠคํŠธ ์—†์ด ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ")
@@ -238,7 +220,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
238
  answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
239
  logger.info(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ (๊ธธ์ด: {len(answer)})")
240
 
241
- # ์†Œ์Šค ์ •๋ณด ์ถ”์ถœ
242
  sources = []
243
  if search_results:
244
  for result in search_results:
@@ -267,7 +248,6 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
267
  logger.warning(f"CSV ์†Œ์Šค ID ์ถ”์ถœ ์‹คํŒจ ({source_info.get('source')}): {e}")
268
  sources.append(source_info)
269
 
270
- # ์ตœ์ข… ์‘๋‹ต
271
  response_data = {
272
  "answer": answer,
273
  "sources": sources,
@@ -279,16 +259,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
279
  logger.error(f"์ฑ„ํŒ… ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
280
  return jsonify({"error": f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"}), 500
281
 
282
- # --- Voice Chat API ---
283
  @app.route('/api/voice', methods=['POST'])
284
  @login_required
285
  def voice_chat():
286
  """์Œ์„ฑ ์ฑ— API ์—”๋“œํฌ์ธํŠธ"""
287
- is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
288
- if not is_ready:
289
- return jsonify({"error": "์•ฑ ์ดˆ๊ธฐํ™” ์ค‘..."}), 503
290
-
291
- # ํ•„์ˆ˜ ์ปดํฌ๋„ŒํŠธ ํ™•์ธ
292
  if retriever is None or not hasattr(retriever, 'search'):
293
  logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ retriever๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
294
  return jsonify({"error": "๊ฒ€์ƒ‰ ์—”์ง„ ์ค€๋น„ ์•ˆ๋จ"}), 503
@@ -296,11 +271,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
296
  logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ STT ํด๋ผ์ด์–ธํŠธ๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
297
  return jsonify({"error": "์Œ์„ฑ ์ธ์‹ ์„œ๋น„์Šค ์ค€๋น„ ์•ˆ๋จ"}), 503
298
  if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
299
- logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
300
- return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
301
  if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
302
- logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
303
- return jsonify({"error": "๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์˜ค๋ฅ˜"}), 500
304
 
305
  logger.info("์Œ์„ฑ ์ฑ— ์š”์ฒญ ์ˆ˜์‹ ")
306
 
@@ -312,16 +287,13 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
312
  logger.info(f"์ˆ˜์‹ ๋œ ์˜ค๋””์˜ค ํŒŒ์ผ: {audio_file.filename} ({audio_file.content_type})")
313
 
314
  try:
315
- # ์˜ค๋””์˜ค ํŒŒ์ผ ์ž„์‹œ ์ €์žฅ ๋ฐ ์ฒ˜๋ฆฌ
316
  with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
317
  audio_file.save(temp_audio.name)
318
  logger.info(f"์˜ค๋””์˜ค ํŒŒ์ผ์„ ์ž„์‹œ ์ €์žฅ: {temp_audio.name}")
319
- # STT ์ˆ˜ํ–‰ (๋ฐ”์ดํŠธ ์ „๋‹ฌ ๊ฐ€์ •)
320
  with open(temp_audio.name, 'rb') as f_bytes:
321
  audio_bytes = f_bytes.read()
322
  stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
323
 
324
- # STT ๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ
325
  if not isinstance(stt_result, dict) or not stt_result.get("success"):
326
  error_msg = stt_result.get("error", "์•Œ ์ˆ˜ ์—†๋Š” STT ์˜ค๋ฅ˜") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ˜•์‹ ์˜ค๋ฅ˜"
327
  logger.error(f"์Œ์„ฑ์ธ์‹ ์‹คํŒจ: {error_msg}")
@@ -335,15 +307,14 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
335
  "answer": "์Œ์„ฑ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ธ์‹ํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.",
336
  "sources": [],
337
  "llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
338
- }), 200 # 200 OK์™€ ๋ฉ”์‹œ์ง€
339
 
340
  logger.info(f"์Œ์„ฑ์ธ์‹ ์„ฑ๊ณต: {transcription[:50]}...")
341
 
342
- # --- RAG ๋ฐ LLM ํ˜ธ์ถœ (Chat API์™€ ๋™์ผ ๋กœ์ง) ---
343
  search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
344
  context = DocumentProcessor.prepare_rag_context(search_results, field="text")
345
 
346
- llm_id = request.form.get('llm_id', None) # form ๋ฐ์ดํ„ฐ์—์„œ llm_id ๊ฐ€์ ธ์˜ค๊ธฐ
347
  if not context:
348
  answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
349
  logger.info("์ปจํ…์ŠคํŠธ ์—†์ด ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ")
@@ -351,34 +322,33 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
351
  answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
352
  logger.info(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ (๊ธธ์ด: {len(answer)})")
353
 
354
- # ์†Œ์Šค ์ •๋ณด ์ถ”์ถœ (Chat API์™€ ๋™์ผ ๋กœ์ง)
355
  sources = []
356
  if search_results:
357
- for result in search_results:
358
- if not isinstance(result, dict): continue
359
- source_info = {}
360
- source_key = result.get("source")
361
- if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
362
- source_key = result["metadata"].get("source")
363
- if source_key:
364
- source_info["source"] = source_key
365
- source_info["score"] = result.get("rerank_score", result.get("score", 0))
366
- filetype = result.get("filetype")
367
- if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
368
- filetype = result["metadata"].get("filetype")
369
- if "text" in result and filetype == "csv":
370
- try:
371
- text_lines = result["text"].strip().split('\n')
372
- if text_lines:
373
- first_line = text_lines[0].strip()
374
- if ',' in first_line:
375
- first_column = first_line.split(',')[0].strip()
376
- source_info["id"] = first_column
377
- except Exception as e:
378
- logger.warning(f"[์Œ์„ฑ์ฑ—] CSV ์†Œ์Šค ID ์ถ”์ถœ ์‹คํŒจ ({source_info.get('source')}): {e}")
379
- sources.append(source_info)
380
-
381
- # ์ตœ์ข… ์‘๋‹ต
382
  response_data = {
383
  "transcription": transcription,
384
  "answer": answer,
@@ -391,16 +361,11 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
391
  logger.error(f"์Œ์„ฑ ์ฑ— ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
392
  return jsonify({"error": "์Œ์„ฑ ์ฒ˜๋ฆฌ ์ค‘ ๋‚ด๋ถ€ ์˜ค๋ฅ˜ ๋ฐœ์ƒ", "details": str(e)}), 500
393
 
394
- # --- Document Upload API ---
395
  @app.route('/api/upload', methods=['POST'])
396
  @login_required
397
  def upload_document():
398
  """์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ์—…๋กœ๋“œ API"""
399
- is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
400
- if not is_ready:
401
- return jsonify({"error": "์•ฑ ์ดˆ๊ธฐํ™” ์ค‘..."}), 503
402
-
403
- # base_retriever ๊ฐ์ฒด ๋ฐ ํ•„์ˆ˜ ๋ฉ”์†Œ๋“œ ํ™•์ธ
404
  if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
405
  logger.error("๋ฌธ์„œ ์—…๋กœ๋“œ API ์š”์ฒญ ์‹œ base_retriever๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์ˆ˜ ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
406
  return jsonify({"error": "๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 503
@@ -412,38 +377,33 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
412
  if not doc_file or not doc_file.filename:
413
  return jsonify({"error": "์„ ํƒ๋œ ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค."}), 400
414
 
415
- # ALLOWED_DOC_EXTENSIONS๋ฅผ ํ•จ์ˆ˜ ๋‚ด์—์„œ ๋‹ค์‹œ ์ •์˜ํ•˜๊ฑฐ๋‚˜ ์ „์—ญ ์ƒ์ˆ˜๋กœ ์‚ฌ์šฉ
416
- ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
417
  if not allowed_doc_file(doc_file.filename):
 
418
  logger.warning(f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {doc_file.filename}")
419
  return jsonify({"error": f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹์ž…๋‹ˆ๋‹ค. ํ—ˆ์šฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
420
 
421
  try:
422
  filename = secure_filename(doc_file.filename)
423
- # app.config ์‚ฌ์šฉ ํ™•์ธ
424
- if 'DATA_FOLDER' not in app.config:
425
- logger.error("Flask app.config์— DATA_FOLDER๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
426
- return jsonify({"error": "์„œ๋ฒ„ ์„ค์ • ์˜ค๋ฅ˜ (DATA_FOLDER)"}), 500
427
- data_folder = app.config['DATA_FOLDER']
428
  os.makedirs(data_folder, exist_ok=True)
429
  filepath = os.path.join(data_folder, filename)
430
 
431
  doc_file.save(filepath)
432
  logger.info(f"๋ฌธ์„œ ์ €์žฅ ์™„๋ฃŒ: {filepath}")
433
 
434
- # DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ”์†Œ๋“œ ํ™•์ธ
435
  if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
436
- logger.error("DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
437
- try: os.remove(filepath) # ์ €์žฅ๋œ ํŒŒ์ผ ์‚ญ์ œ
438
- except OSError: pass
439
- return jsonify({"error": "๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์˜ค๋ฅ˜"}), 500
 
 
440
 
441
  content = None
442
  file_ext = filename.rsplit('.', 1)[1].lower()
443
  metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
444
  docs = []
445
 
446
- # ํŒŒ์ผ ์ฝ๊ธฐ ๋ฐ ๋‚ด์šฉ ์ถ”์ถœ
447
  if file_ext in ['txt', 'md', 'csv']:
448
  try:
449
  with open(filepath, 'r', encoding='utf-8') as f:
@@ -460,115 +420,94 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
460
  logger.error(f"ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜ ({filename}): {e_read}")
461
  return jsonify({"error": f"ํŒŒ์ผ ์ฝ๊ธฐ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e_read)}"}), 500
462
  elif file_ext == 'pdf':
463
- logger.warning("PDF ์ฒ˜๋ฆฌ๋Š” ๊ตฌํ˜„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
464
- # ์—ฌ๊ธฐ์— PDF ํ…์ŠคํŠธ ์ถ”์ถœ ๋กœ์ง ์ถ”๊ฐ€ (์˜ˆ: PyPDF2 ์‚ฌ์šฉ)
465
- # content = extract_text_from_pdf(filepath)
466
  elif file_ext == 'docx':
467
- logger.warning("DOCX ์ฒ˜๋ฆฌ๋Š” ๊ตฌํ˜„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
468
- # ์—ฌ๊ธฐ์— DOCX ํ…์ŠคํŠธ ์ถ”์ถœ ๋กœ์ง ์ถ”๊ฐ€ (์˜ˆ: python-docx ์‚ฌ์šฉ)
469
- # content = extract_text_from_docx(filepath)
470
 
471
- # ๋ฌธ์„œ ๋ถ„ํ• /์ฒ˜๋ฆฌ
472
- if content is not None: # ๋‚ด์šฉ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์ฝํ˜”๊ฑฐ๋‚˜ ์ถ”์ถœ๋˜์—ˆ์„ ๋•Œ๋งŒ
473
  if file_ext == 'csv':
474
  logger.info(f"CSV ํŒŒ์ผ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {filename}")
475
  docs = DocumentProcessor.csv_to_documents(content, metadata)
476
- elif file_ext in ['txt', 'md'] or (file_ext in ['pdf', 'docx'] and content): # ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ๋˜๋Š” ์ถ”์ถœ๋œ ๋‚ด์šฉ
477
- logger.info(f"ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {filename}")
478
- # text_to_documents ํ•จ์ˆ˜๊ฐ€ ์ฒญํฌ ๋ถ„ํ•  ๋“ฑ์„ ์ˆ˜ํ–‰ํ•œ๋‹ค๊ณ  ๊ฐ€์ •
479
  docs = DocumentProcessor.text_to_documents(
480
  content, metadata=metadata,
481
- chunk_size=512, chunk_overlap=50 # ์„ค์ •๊ฐ’ ์‚ฌ์šฉ
482
  )
483
 
484
- # ๊ฒ€์ƒ‰๊ธฐ์— ์ถ”๊ฐ€ ๋ฐ ์ €์žฅ
485
  if docs:
486
  logger.info(f"{len(docs)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ๋ฅผ ๊ฒ€์ƒ‰๊ธฐ์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค...")
487
  base_retriever.add_documents(docs)
488
 
489
  logger.info(f"๊ฒ€์ƒ‰๊ธฐ ์ƒํƒœ๋ฅผ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค...")
490
- # app.config ์‚ฌ์šฉ ํ™•์ธ
491
- if 'INDEX_PATH' not in app.config:
492
- logger.error("Flask app.config์— INDEX_PATH๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
493
- return jsonify({"error": "์„œ๋ฒ„ ์„ค์ • ์˜ค๋ฅ˜ (INDEX_PATH)"}), 500
494
- index_path = app.config['INDEX_PATH']
495
- # ์ธ๋ฑ์Šค ์ €์žฅ ๊ฒฝ๋กœ๊ฐ€ ํด๋”์ธ์ง€ ํŒŒ์ผ์ธ์ง€ ํ™•์ธ ํ•„์š” (VectorRetriever.save ๊ตฌํ˜„์— ๋”ฐ๋ผ ๋‹ค๋ฆ„)
496
- # ์—ฌ๊ธฐ์„œ๋Š” index_path๊ฐ€ ๋””๋ ‰ํ† ๋ฆฌ๋ผ๊ณ  ๊ฐ€์ •ํ•˜๊ณ  ๋ถ€๋ชจ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
497
  os.makedirs(os.path.dirname(index_path), exist_ok=True)
498
  try:
499
  base_retriever.save(index_path)
500
  logger.info("์ธ๋ฑ์Šค ์ €์žฅ ์™„๋ฃŒ")
501
- # TODO: ์žฌ์ˆœ์œ„ํ™” ๊ฒ€์ƒ‰๊ธฐ(retriever) ์—…๋ฐ์ดํŠธ ๋กœ์ง ํ•„์š” ์‹œ ์ถ”๊ฐ€
502
- # ์˜ˆ: if retriever and hasattr(retriever, 'update_base_retriever'): retriever.update_base_retriever(base_retriever)
503
  return jsonify({
504
  "success": True,
505
  "message": f"ํŒŒ์ผ '{filename}' ์—…๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ์™„๋ฃŒ ({len(docs)}๊ฐœ ์ฒญํฌ ์ถ”๊ฐ€)."
506
  })
507
  except Exception as e_save:
508
  logger.error(f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e_save}", exc_info=True)
509
- # ์ €์žฅ ์‹คํŒจ ์‹œ ์ถ”๊ฐ€๋œ ๋ฌธ์„œ ๋กค๋ฐฑ ๊ณ ๋ ค?
510
  return jsonify({"error": f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜: {str(e_save)}"}), 500
511
  else:
512
  logger.warning(f"ํŒŒ์ผ '{filename}'์—์„œ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค.")
513
- # ํŒŒ์ผ์€ ์ €์žฅ๋˜์—ˆ์œผ๋ฏ€๋กœ warning ๋ฐ˜ํ™˜
514
  return jsonify({
515
- "warning": True, # 'success' ๋Œ€์‹  'warning' ์‚ฌ์šฉ
516
  "message": f"ํŒŒ์ผ '{filename}'์ด ์ €์žฅ๋˜์—ˆ์ง€๋งŒ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค."
517
  })
518
 
519
  except Exception as e:
520
  logger.error(f"ํŒŒ์ผ ์—…๋กœ๋“œ ๋˜๋Š” ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
521
- # ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ €์žฅ๋œ ํŒŒ์ผ ์‚ญ์ œ
522
  if 'filepath' in locals() and os.path.exists(filepath):
523
- try: os.remove(filepath)
524
- except OSError as e_del: logger.error(f"์—…๋กœ๋“œ ์‹คํŒจ ํ›„ ํŒŒ์ผ ์‚ญ์ œ ์˜ค๋ฅ˜: {e_del}")
 
 
525
  return jsonify({"error": f"ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
526
 
527
- # --- Document List API ---
528
  @app.route('/api/documents', methods=['GET'])
529
  @login_required
530
  def list_documents():
531
  """์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ๋ชฉ๋ก API"""
 
532
  logger.info("๋ฌธ์„œ ๋ชฉ๋ก API ์š”์ฒญ ์‹œ์ž‘")
533
 
534
- # base_retriever ์ƒํƒœ ํ™•์ธ
535
  if base_retriever is None:
536
  logger.warning("๋ฌธ์„œ API ์š”์ฒญ ์‹œ base_retriever๊ฐ€ None์ž…๋‹ˆ๋‹ค.")
537
  return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
538
  elif not hasattr(base_retriever, 'documents'):
539
- logger.warning("๋ฌธ์„œ API ์š”์ฒญ ์‹œ base_retriever์— 'documents' ์†์„ฑ์ด ์—†์Šต๋‹ˆ๋‹ค.")
540
- return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
541
 
542
- # ๋กœ๊น… ์ถ”๊ฐ€
543
  logger.info(f"base_retriever ๊ฐ์ฒด ํƒ€์ž…: {type(base_retriever)}")
544
  logger.info(f"base_retriever.documents ์กด์žฌ ์—ฌ๋ถ€: {hasattr(base_retriever, 'documents')}")
545
- doc_list_attr = getattr(base_retriever, 'documents', None) # ์•ˆ์ „ํ•˜๊ฒŒ ์†์„ฑ ๊ฐ€์ ธ์˜ค๊ธฐ
546
- logger.info(f"base_retriever.documents ํƒ€์ž…: {type(doc_list_attr)}")
547
- logger.info(f"base_retriever.documents ๊ธธ์ด: {len(doc_list_attr) if isinstance(doc_list_attr, list) else 'N/A'}")
548
 
549
  try:
550
  sources = {}
551
  total_chunks = 0
552
- doc_list = doc_list_attr # ์œ„์—์„œ ๊ฐ€์ ธ์˜จ ์†์„ฑ ์‚ฌ์šฉ
553
 
554
- # doc_list๊ฐ€ ๋ฆฌ์ŠคํŠธ์ธ์ง€ ํ™•์ธ
555
  if not isinstance(doc_list, list):
556
- logger.error(f"base_retriever.documents๊ฐ€ ๋ฆฌ์ŠคํŠธ๊ฐ€ ์•„๋‹˜: {type(doc_list)}")
557
- return jsonify({"error": "๋‚ด๋ถ€ ๋ฐ์ดํ„ฐ ๊ตฌ์กฐ ์˜ค๋ฅ˜"}), 500
558
 
559
  logger.info(f"์ด {len(doc_list)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ์—์„œ ์†Œ์Šค ๋ชฉ๋ก ์ƒ์„ฑ ์ค‘...")
560
  for i, doc in enumerate(doc_list):
561
- # ๊ฐ ์ฒญํฌ๊ฐ€ ๋”•์…”๋„ˆ๋ฆฌ ํ˜•ํƒœ์ธ์ง€ ํ™•์ธ (Langchain Document ๊ฐ์ฒด๋„ ๋”•์…”๋„ˆ๋ฆฌ์ฒ˜๋Ÿผ ๋™์ž‘ ๊ฐ€๋Šฅ)
562
- if not hasattr(doc, 'get'): # ๋”•์…”๋„ˆ๋ฆฌ ๋˜๋Š” ์œ ์‚ฌ ๊ฐ์ฒด์ธ์ง€ ํ™•์ธ
563
- logger.warning(f"์ฒญํฌ {i}๊ฐ€ ๋”•์…”๋„ˆ๋ฆฌ ํƒ€์ž…์ด ์•„๋‹˜: {type(doc)}")
564
- continue
565
 
566
- # ์†Œ์Šค ์ •๋ณด ์ถ”์ถœ (metadata ์šฐ์„ )
567
  source = "unknown"
568
  metadata = doc.get("metadata")
569
  if isinstance(metadata, dict):
570
  source = metadata.get("source", "unknown")
571
- # metadata์— ์—†์œผ๋ฉด doc ์ž์ฒด์—์„œ ์ฐพ๊ธฐ (ํ•˜์œ„ ํ˜ธํ™˜์„ฑ)
572
  if source == "unknown":
573
  source = doc.get("source", "unknown")
574
 
@@ -576,12 +515,12 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
576
  if source in sources:
577
  sources[source]["chunks"] += 1
578
  else:
579
- # filename, filetype ์ถ”์ถœ (metadata ์šฐ์„ )
580
  filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
581
  filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
582
- # metadata์— ์—†์œผ๋ฉด doc ์ž์ฒด์—์„œ ์ฐพ๊ธฐ
583
- if filename == source and doc.get("filename"): filename = doc["filename"]
584
- if filetype == "unknown" and doc.get("filetype"): filetype = doc["filetype"]
 
585
 
586
  sources[source] = {
587
  "filename": filename,
@@ -590,12 +529,10 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
590
  }
591
  total_chunks += 1
592
  else:
593
- # ์†Œ์Šค ์ •๋ณด๊ฐ€ ์—†๋Š” ์ฒญํฌ ๋กœ๊น… (๋„ˆ๋ฌด ๋งŽ์œผ๋ฉด ์ฃผ์„ ์ฒ˜๋ฆฌ)
594
- logger.warning(f"์ฒญํฌ {i}์—์„œ ์†Œ์Šค ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ: {str(doc)[:200]}...") # ๋‚ด์šฉ ์ผ๋ถ€ ๋กœ๊น…
595
 
596
- # ์ตœ์ข… ๋ชฉ๋ก ์ƒ์„ฑ ๋ฐ ์ •๋ ฌ
597
  documents = [{"source": src, **info} for src, info in sources.items()]
598
- documents.sort(key=lambda x: x.get("filename", ""), reverse=False) # ํŒŒ์ผ๋ช… ๊ธฐ์ค€ ์˜ค๋ฆ„์ฐจ์ˆœ ์ •๋ ฌ
599
 
600
  logger.info(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์™„๋ฃŒ: {len(documents)}๊ฐœ ์†Œ์Šค ํŒŒ์ผ, {total_chunks}๊ฐœ ์ฒญํฌ")
601
  return jsonify({
@@ -605,6 +542,9 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
605
  })
606
 
607
  except Exception as e:
 
608
  logger.error(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์‹ฌ๊ฐํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
609
- # 503 ๋Œ€์‹  500 ๋ฐ˜ํ™˜
610
  return jsonify({"error": f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
 
 
 
 
7
  import logging
8
  import tempfile
9
  import requests
10
+ import time # ์•ฑ ์‹œ์ž‘ ์‹œ๊ฐ„ ๊ธฐ๋ก ์œ„ํ•ด ์ถ”๊ฐ€
11
+ import threading # threading.Event ์‚ฌ์šฉ ์œ„ํ•ด ์ถ”๊ฐ€
12
  from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
13
  from datetime import datetime
14
  from werkzeug.utils import secure_filename
 
19
  # ์•ฑ ์‹œ์ž‘ ์‹œ๊ฐ„ ๊ธฐ๋ก (๋ชจ๋“ˆ ๋กœ๋“œ ์‹œ์ )
20
  APP_START_TIME = time.time()
21
 
22
+ def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready_flag, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL):
 
23
  """Flask ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜์— ๊ธฐ๋ณธ ๋ผ์šฐํŠธ ๋“ฑ๋ก"""
24
 
25
  # ํ—ฌํผ ํ•จ์ˆ˜ (๋ณ€๊ฒฝ ์—†์Œ)
 
33
  ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
34
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
35
 
36
+ # --- ๋กœ๊ทธ์ธ/๋กœ๊ทธ์•„์›ƒ ๋ผ์šฐํŠธ (๋ณ€๊ฒฝ ์—†์Œ) ---
37
  @app.route('/login', methods=['GET', 'POST'])
38
  def login():
39
  error = None
 
65
  else:
66
  logger.warning("๋กœ๊ทธ์ธ ์‹คํŒจ: ์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ ๋ถˆ์ผ์น˜")
67
  error = '์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ๊ฐ€ ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค.'
68
+ else: # GET ์š”์ฒญ
69
  logger.info("๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ GET ์š”์ฒญ")
70
  if session.get('logged_in'):
71
  logger.info("์ด๋ฏธ ๋กœ๊ทธ์ธ๋œ ์‚ฌ์šฉ์ž, ๋ฉ”์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
 
74
  logger.info("---------- ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ๋ Œ๋”๋ง ----------")
75
  return render_template('login.html', error=error, next=next_url)
76
 
 
77
  @app.route('/logout')
78
  def logout():
79
  """๋กœ๊ทธ์•„์›ƒ ์ฒ˜๋ฆฌ"""
80
  username = session.get('username', 'unknown')
81
  if session.pop('logged_in', None):
82
+ session.pop('username', None)
83
+ logger.info(f"์‚ฌ์šฉ์ž {username} ๋กœ๊ทธ์•„์›ƒ ์ฒ˜๋ฆฌ ์™„๋ฃŒ. ํ˜„์žฌ ์„ธ์…˜: {session}")
84
  else:
85
+ logger.warning("๋กœ๊ทธ์ธ๋˜์ง€ ์•Š์€ ์ƒํƒœ์—์„œ ๋กœ๊ทธ์•„์›ƒ ์‹œ๋„")
86
 
87
  logger.info("๋กœ๊ทธ์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
88
  response = redirect(url_for('login'))
89
  logger.debug(f"๋กœ๊ทธ์•„์›ƒ ์‘๋‹ต ํ—ค๋” (Set-Cookie ํ™•์ธ): {response.headers.getlist('Set-Cookie')}")
90
  return response
91
 
92
+ # --- ๋ฉ”์ธ ํŽ˜์ด์ง€ ๋ฐ ์ƒํƒœ ํ™•์ธ (app_ready_flag ์‚ฌ์šฉ) ---
93
  @app.route('/')
94
  @login_required
95
  def index():
96
  """๋ฉ”์ธ ํŽ˜์ด์ง€"""
97
+ # app_ready_flag๊ฐ€ Event ๊ฐ์ฒด์ธ์ง€ bool์ธ์ง€ ํ™•์ธํ•˜๊ณ  ์ƒํƒœ ๊ฐ€์ ธ์˜ค๊ธฐ
98
+ is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
99
 
100
  time_elapsed = time.time() - APP_START_TIME
101
 
102
  if not is_ready:
103
  logger.info(f"์•ฑ์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•„ ๋กœ๋”ฉ ํŽ˜์ด์ง€ ํ‘œ์‹œ (๊ฒฝ๊ณผ ์‹œ๊ฐ„: {time_elapsed:.1f}์ดˆ)")
104
+ return render_template('loading.html') # 200 OK์™€ ๋กœ๋”ฉ ํŽ˜์ด์ง€
 
105
 
106
  logger.info("๋ฉ”์ธ ํŽ˜์ด์ง€ ์š”์ฒญ")
 
107
  return render_template('index.html')
108
 
 
109
  @app.route('/api/status')
110
  @login_required
111
  def app_status():
112
  """์•ฑ ์ดˆ๊ธฐํ™” ์ƒํƒœ ํ™•์ธ API"""
113
+ is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
114
  logger.info(f"์•ฑ ์ƒํƒœ ํ™•์ธ ์š”์ฒญ: {'Ready' if is_ready else 'Not Ready'}")
115
  return jsonify({"ready": is_ready})
116
 
117
+ # --- LLM API (๋ณ€๊ฒฝ ์—†์Œ) ---
118
  @app.route('/api/llm', methods=['GET', 'POST'])
119
  @login_required
120
  def llm_api():
121
  """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ LLM ๋ชฉ๋ก ๋ฐ ์„ ํƒ API"""
122
+ is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
123
+ # ์ดˆ๊ธฐํ™” ์ค‘์—๋„ LLM ๋ชฉ๋ก ์กฐํšŒ๋Š” ๊ฐ€๋Šฅํ•˜๋„๋ก ํ—ˆ์šฉ
124
+ # if not is_ready:
125
+ # return jsonify({"error": "์•ฑ ์ดˆ๊ธฐํ™” ์ค‘..."}), 503
126
 
127
  if request.method == 'GET':
128
  logger.info("LLM ๋ชฉ๋ก ์š”์ฒญ")
129
  try:
 
130
  if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
131
+ logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ์†์„ฑ์ด ์—†์Šต๋‹ˆ๋‹ค.")
132
+ return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
133
 
134
  current_details = llm_interface.get_current_llm_details()
135
  supported_llms_dict = llm_interface.SUPPORTED_LLMS
 
146
  return jsonify({"error": "LLM ์ •๋ณด ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ"}), 500
147
 
148
  elif request.method == 'POST':
 
 
 
 
149
  data = request.get_json()
150
  if not data or 'llm_id' not in data:
151
  return jsonify({"error": "LLM ID๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
 
154
  logger.info(f"LLM ๋ณ€๊ฒฝ ์š”์ฒญ: {llm_id}")
155
 
156
  try:
 
157
  if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
158
+ logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ์†์„ฑ/๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
159
+ return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
160
 
161
  if llm_id not in llm_interface.llm_clients:
162
  return jsonify({"error": f"์ง€์›๋˜์ง€ ์•Š๋Š” LLM ID: {llm_id}"}), 400
 
177
  logger.error(f"LLM ๋ณ€๊ฒฝ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {e}", exc_info=True)
178
  return jsonify({"error": f"LLM ๋ณ€๊ฒฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"}), 500
179
 
180
+ # --- Chat API (๋ณ€๊ฒฝ ์—†์Œ) ---
181
  @app.route('/api/chat', methods=['POST'])
182
  @login_required
183
  def chat():
184
  """ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์ฑ„๋ด‡ API"""
 
 
 
 
 
185
  if retriever is None or not hasattr(retriever, 'search'):
186
  logger.warning("์ฑ„ํŒ… API ์š”์ฒญ ์‹œ retriever๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
187
  return jsonify({
188
  "answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ฒ€์ƒ‰ ์—”์ง„์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.",
189
  "sources": [],
190
  "error": "Retriever not ready"
191
+ }), 200 # 503 ๋Œ€์‹  200 OK
192
 
193
  try:
194
  data = request.get_json()
 
198
  query = data['query']
199
  logger.info(f"ํ…์ŠคํŠธ ์ฟผ๋ฆฌ ์ˆ˜์‹ : {query[:100]}...")
200
 
 
201
  search_results = retriever.search(query, top_k=5, first_stage_k=6)
202
 
 
203
  if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
204
+ logger.error("DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ prepare_rag_context ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
205
+ return jsonify({"error": "๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์˜ค๋ฅ˜"}), 500
206
  context = DocumentProcessor.prepare_rag_context(search_results, field="text")
207
 
208
  if not context:
209
  logger.warning(f"์ฟผ๋ฆฌ '{query[:50]}...'์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ.")
210
 
 
211
  llm_id = data.get('llm_id', None)
212
  if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
213
+ logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ rag_generate ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
214
+ return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
215
 
 
216
  if not context:
217
  answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
218
  logger.info("์ปจํ…์ŠคํŠธ ์—†์ด ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ")
 
220
  answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
221
  logger.info(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ (๊ธธ์ด: {len(answer)})")
222
 
 
223
  sources = []
224
  if search_results:
225
  for result in search_results:
 
248
  logger.warning(f"CSV ์†Œ์Šค ID ์ถ”์ถœ ์‹คํŒจ ({source_info.get('source')}): {e}")
249
  sources.append(source_info)
250
 
 
251
  response_data = {
252
  "answer": answer,
253
  "sources": sources,
 
259
  logger.error(f"์ฑ„ํŒ… ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
260
  return jsonify({"error": f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"}), 500
261
 
262
+ # --- Voice Chat API (๋ณ€๊ฒฝ ์—†์Œ) ---
263
  @app.route('/api/voice', methods=['POST'])
264
  @login_required
265
  def voice_chat():
266
  """์Œ์„ฑ ์ฑ— API ์—”๋“œํฌ์ธํŠธ"""
 
 
 
 
 
267
  if retriever is None or not hasattr(retriever, 'search'):
268
  logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ retriever๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
269
  return jsonify({"error": "๊ฒ€์ƒ‰ ์—”์ง„ ์ค€๋น„ ์•ˆ๋จ"}), 503
 
271
  logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ STT ํด๋ผ์ด์–ธํŠธ๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
272
  return jsonify({"error": "์Œ์„ฑ ์ธ์‹ ์„œ๋น„์Šค ์ค€๋น„ ์•ˆ๋จ"}), 503
273
  if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
274
+ logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
275
+ return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
276
  if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
277
+ logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
278
+ return jsonify({"error": "๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์˜ค๋ฅ˜"}), 500
279
 
280
  logger.info("์Œ์„ฑ ์ฑ— ์š”์ฒญ ์ˆ˜์‹ ")
281
 
 
287
  logger.info(f"์ˆ˜์‹ ๋œ ์˜ค๋””์˜ค ํŒŒ์ผ: {audio_file.filename} ({audio_file.content_type})")
288
 
289
  try:
 
290
  with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
291
  audio_file.save(temp_audio.name)
292
  logger.info(f"์˜ค๋””์˜ค ํŒŒ์ผ์„ ์ž„์‹œ ์ €์žฅ: {temp_audio.name}")
 
293
  with open(temp_audio.name, 'rb') as f_bytes:
294
  audio_bytes = f_bytes.read()
295
  stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
296
 
 
297
  if not isinstance(stt_result, dict) or not stt_result.get("success"):
298
  error_msg = stt_result.get("error", "์•Œ ์ˆ˜ ์—†๋Š” STT ์˜ค๋ฅ˜") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ˜•์‹ ์˜ค๋ฅ˜"
299
  logger.error(f"์Œ์„ฑ์ธ์‹ ์‹คํŒจ: {error_msg}")
 
307
  "answer": "์Œ์„ฑ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ธ์‹ํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.",
308
  "sources": [],
309
  "llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
310
+ }), 200
311
 
312
  logger.info(f"์Œ์„ฑ์ธ์‹ ์„ฑ๊ณต: {transcription[:50]}...")
313
 
 
314
  search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
315
  context = DocumentProcessor.prepare_rag_context(search_results, field="text")
316
 
317
+ llm_id = request.form.get('llm_id', None)
318
  if not context:
319
  answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
320
  logger.info("์ปจํ…์ŠคํŠธ ์—†์ด ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ")
 
322
  answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
323
  logger.info(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ (๊ธธ์ด: {len(answer)})")
324
 
 
325
  sources = []
326
  if search_results:
327
+ for result in search_results:
328
+ if not isinstance(result, dict):
329
+ continue
330
+ source_info = {}
331
+ source_key = result.get("source")
332
+ if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
333
+ source_key = result["metadata"].get("source")
334
+ if source_key:
335
+ source_info["source"] = source_key
336
+ source_info["score"] = result.get("rerank_score", result.get("score", 0))
337
+ filetype = result.get("filetype")
338
+ if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
339
+ filetype = result["metadata"].get("filetype")
340
+ if "text" in result and filetype == "csv":
341
+ try:
342
+ text_lines = result["text"].strip().split('\n')
343
+ if text_lines:
344
+ first_line = text_lines[0].strip()
345
+ if ',' in first_line:
346
+ first_column = first_line.split(',')[0].strip()
347
+ source_info["id"] = first_column
348
+ except Exception as e:
349
+ logger.warning(f"[์Œ์„ฑ์ฑ—] CSV ์†Œ์Šค ID ์ถ”์ถœ ์‹คํŒจ ({source_info.get('source')}): {e}")
350
+ sources.append(source_info)
351
+
352
  response_data = {
353
  "transcription": transcription,
354
  "answer": answer,
 
361
  logger.error(f"์Œ์„ฑ ์ฑ— ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
362
  return jsonify({"error": "์Œ์„ฑ ์ฒ˜๋ฆฌ ์ค‘ ๋‚ด๋ถ€ ์˜ค๋ฅ˜ ๋ฐœ์ƒ", "details": str(e)}), 500
363
 
364
+ # --- Document Upload API (๋ณ€๊ฒฝ ์—†์Œ) ---
365
  @app.route('/api/upload', methods=['POST'])
366
  @login_required
367
  def upload_document():
368
  """์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ์—…๋กœ๋“œ API"""
 
 
 
 
 
369
  if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
370
  logger.error("๋ฌธ์„œ ์—…๋กœ๋“œ API ์š”์ฒญ ์‹œ base_retriever๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์ˆ˜ ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
371
  return jsonify({"error": "๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 503
 
377
  if not doc_file or not doc_file.filename:
378
  return jsonify({"error": "์„ ํƒ๋œ ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค."}), 400
379
 
 
 
380
  if not allowed_doc_file(doc_file.filename):
381
+ ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
382
  logger.warning(f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {doc_file.filename}")
383
  return jsonify({"error": f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹์ž…๋‹ˆ๋‹ค. ํ—ˆ์šฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
384
 
385
  try:
386
  filename = secure_filename(doc_file.filename)
387
+ data_folder = app.config.get('DATA_FOLDER', os.path.join(os.path.dirname(__file__), '..', 'data'))
 
 
 
 
388
  os.makedirs(data_folder, exist_ok=True)
389
  filepath = os.path.join(data_folder, filename)
390
 
391
  doc_file.save(filepath)
392
  logger.info(f"๋ฌธ์„œ ์ €์žฅ ์™„๋ฃŒ: {filepath}")
393
 
 
394
  if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
395
+ logger.error("DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
396
+ try:
397
+ os.remove(filepath)
398
+ except OSError:
399
+ pass
400
+ return jsonify({"error": "๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์˜ค๋ฅ˜"}), 500
401
 
402
  content = None
403
  file_ext = filename.rsplit('.', 1)[1].lower()
404
  metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
405
  docs = []
406
 
 
407
  if file_ext in ['txt', 'md', 'csv']:
408
  try:
409
  with open(filepath, 'r', encoding='utf-8') as f:
 
420
  logger.error(f"ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜ ({filename}): {e_read}")
421
  return jsonify({"error": f"ํŒŒ์ผ ์ฝ๊ธฐ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e_read)}"}), 500
422
  elif file_ext == 'pdf':
423
+ logger.warning("PDF ์ฒ˜๋ฆฌ๋Š” ๊ตฌํ˜„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
 
 
424
  elif file_ext == 'docx':
425
+ logger.warning("DOCX ์ฒ˜๋ฆฌ๋Š” ๊ตฌํ˜„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
 
 
426
 
427
+ if content is not None:
 
428
  if file_ext == 'csv':
429
  logger.info(f"CSV ํŒŒ์ผ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {filename}")
430
  docs = DocumentProcessor.csv_to_documents(content, metadata)
431
+ elif file_ext in ['txt', 'md']:
432
+ logger.info(f"ํ…์ŠคํŠธ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {filename}")
 
433
  docs = DocumentProcessor.text_to_documents(
434
  content, metadata=metadata,
435
+ chunk_size=512, chunk_overlap=50
436
  )
437
 
 
438
  if docs:
439
  logger.info(f"{len(docs)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ๋ฅผ ๊ฒ€์ƒ‰๊ธฐ์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค...")
440
  base_retriever.add_documents(docs)
441
 
442
  logger.info(f"๊ฒ€์ƒ‰๊ธฐ ์ƒํƒœ๋ฅผ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค...")
443
+ index_path = app.config.get('INDEX_PATH', os.path.join(data_folder, 'index'))
 
 
 
 
 
 
444
  os.makedirs(os.path.dirname(index_path), exist_ok=True)
445
  try:
446
  base_retriever.save(index_path)
447
  logger.info("์ธ๋ฑ์Šค ์ €์žฅ ์™„๋ฃŒ")
 
 
448
  return jsonify({
449
  "success": True,
450
  "message": f"ํŒŒ์ผ '{filename}' ์—…๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ์™„๋ฃŒ ({len(docs)}๊ฐœ ์ฒญํฌ ์ถ”๊ฐ€)."
451
  })
452
  except Exception as e_save:
453
  logger.error(f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e_save}", exc_info=True)
 
454
  return jsonify({"error": f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜: {str(e_save)}"}), 500
455
  else:
456
  logger.warning(f"ํŒŒ์ผ '{filename}'์—์„œ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค.")
 
457
  return jsonify({
458
+ "warning": True,
459
  "message": f"ํŒŒ์ผ '{filename}'์ด ์ €์žฅ๋˜์—ˆ์ง€๋งŒ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค."
460
  })
461
 
462
  except Exception as e:
463
  logger.error(f"ํŒŒ์ผ ์—…๋กœ๋“œ ๋˜๋Š” ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
 
464
  if 'filepath' in locals() and os.path.exists(filepath):
465
+ try:
466
+ os.remove(filepath)
467
+ except OSError:
468
+ pass
469
  return jsonify({"error": f"ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
470
 
471
+ # --- Document List API (SyntaxError ์œ ๋ฐœ ์ฃผ์„ ์ œ๊ฑฐ) ---
472
  @app.route('/api/documents', methods=['GET'])
473
  @login_required
474
  def list_documents():
475
  """์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ๋ชฉ๋ก API"""
476
+ # SyntaxError๋ฅผ ์œ ๋ฐœํ–ˆ๋˜ ์„ค๋ช… ์ฃผ์„๋“ค์„ ์ œ๊ฑฐํ–ˆ์Šต๋‹ˆ๋‹ค.
477
  logger.info("๋ฌธ์„œ ๋ชฉ๋ก API ์š”์ฒญ ์‹œ์ž‘")
478
 
 
479
  if base_retriever is None:
480
  logger.warning("๋ฌธ์„œ API ์š”์ฒญ ์‹œ base_retriever๊ฐ€ None์ž…๋‹ˆ๋‹ค.")
481
  return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
482
  elif not hasattr(base_retriever, 'documents'):
483
+ logger.warning("๋ฌธ์„œ API ์š”์ฒญ ์‹œ base_retriever์— 'documents' ์†์„ฑ์ด ์—†์Šต๋‹ˆ๋‹ค.")
484
+ return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
485
 
 
486
  logger.info(f"base_retriever ๊ฐ์ฒด ํƒ€์ž…: {type(base_retriever)}")
487
  logger.info(f"base_retriever.documents ์กด์žฌ ์—ฌ๋ถ€: {hasattr(base_retriever, 'documents')}")
488
+ if hasattr(base_retriever, 'documents'):
489
+ logger.info(f"base_retriever.documents ํƒ€์ž…: {type(base_retriever.documents)}")
490
+ logger.info(f"base_retriever.documents ๊ธธ์ด: {len(base_retriever.documents) if isinstance(base_retriever.documents, list) else 'N/A'}")
491
 
492
  try:
493
  sources = {}
494
  total_chunks = 0
495
+ doc_list = base_retriever.documents
496
 
 
497
  if not isinstance(doc_list, list):
498
+ logger.error(f"base_retriever.documents๊ฐ€ ๋ฆฌ์ŠคํŠธ๊ฐ€ ์•„๋‹˜: {type(doc_list)}")
499
+ return jsonify({"error": "๋‚ด๋ถ€ ๋ฐ์ดํ„ฐ ๊ตฌ์กฐ ์˜ค๋ฅ˜"}), 500
500
 
501
  logger.info(f"์ด {len(doc_list)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ์—์„œ ์†Œ์Šค ๋ชฉ๋ก ์ƒ์„ฑ ์ค‘...")
502
  for i, doc in enumerate(doc_list):
503
+ if not isinstance(doc, dict):
504
+ logger.warning(f"์ฒญํฌ {i}๊ฐ€ ๋”•์…”๋„ˆ๋ฆฌ ํƒ€์ž…์ด ์•„๋‹˜: {type(doc)}")
505
+ continue
 
506
 
 
507
  source = "unknown"
508
  metadata = doc.get("metadata")
509
  if isinstance(metadata, dict):
510
  source = metadata.get("source", "unknown")
 
511
  if source == "unknown":
512
  source = doc.get("source", "unknown")
513
 
 
515
  if source in sources:
516
  sources[source]["chunks"] += 1
517
  else:
 
518
  filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
519
  filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
520
+ if filename == source and "filename" in doc:
521
+ filename = doc["filename"]
522
+ if filetype == "unknown" and "filetype" in doc:
523
+ filetype = doc["filetype"]
524
 
525
  sources[source] = {
526
  "filename": filename,
 
529
  }
530
  total_chunks += 1
531
  else:
532
+ logger.warning(f"์ฒญํฌ {i}์—์„œ ์†Œ์Šค ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ: {doc}")
 
533
 
 
534
  documents = [{"source": src, **info} for src, info in sources.items()]
535
+ documents.sort(key=lambda x: x.get("filename", ""), reverse=False)
536
 
537
  logger.info(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์™„๋ฃŒ: {len(documents)}๊ฐœ ์†Œ์Šค ํŒŒ์ผ, {total_chunks}๊ฐœ ์ฒญํฌ")
538
  return jsonify({
 
542
  })
543
 
544
  except Exception as e:
545
+ # ์—ฌ๊ธฐ์„œ ๋ฐœ์ƒํ•˜๋Š” ์˜ˆ์™ธ๊ฐ€ 503์œผ๋กœ ์ด์–ด์งˆ ์ˆ˜ ์žˆ๋Š”์ง€ ํ™•์ธ
546
  logger.error(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์‹ฌ๊ฐํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
 
547
  return jsonify({"error": f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
548
+
549
+ # SyntaxError ์ˆ˜์ •: ๋ˆ„๋ฝ๋œ '#' ์ถ”๊ฐ€
550
+ # * ์ œ๊ณต๋œ `list_documents` ํ•จ์ˆ˜ ์ฝ”๋“œ ์ž์ฒด์—๋Š” `app_ready` ์ƒํƒœ๋‚˜ `base_retriever`๊ฐ€ `None`์ธ ์ƒํƒœ๋ฅผ ํ™•์ธํ•˜์—ฌ 503 ์˜ค๋ฅ˜๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” ๋กœ์ง์ด **์—†์Šต๋‹ˆ๋‹ค.** ๋กœ๊ทธ์—์„œ 503์ด ๋ฐœ์ƒํ–ˆ๋‹ค๋ฉด, ์›์ธ์€ ๋‹ค์Œ ์ค‘ ํ•˜๋‚˜์ผ ๊ฐ€๋Šฅ์„ฑ์ด ๋†’์Šต๋‹ˆ๋‹ค: