jeongsoo commited on
Commit
61e46cb
Β·
1 Parent(s): 74188c7
Files changed (1) hide show
  1. app/app_routes.py +354 -257
app/app_routes.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- RAG 검색 챗봇 μ›Ή μ• ν”Œλ¦¬μΌ€μ΄μ…˜ - API 라우트 μ •μ˜
3
  """
4
 
5
  import os
@@ -7,6 +7,7 @@ import json
7
  import logging
8
  import tempfile
9
  import requests
 
10
  from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
11
  from datetime import datetime
12
  from werkzeug.utils import secure_filename
@@ -14,10 +15,13 @@ from werkzeug.utils import secure_filename
14
  # 둜거 κ°€μ Έμ˜€κΈ°
15
  logger = logging.getLogger(__name__)
16
 
17
- def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL):
 
 
 
18
  """Flask μ• ν”Œλ¦¬μΌ€μ΄μ…˜μ— κΈ°λ³Έ 라우트 등둝"""
19
-
20
- # 헬퍼 ν•¨μˆ˜
21
  def allowed_audio_file(filename):
22
  """파일이 ν—ˆμš©λœ μ˜€λ””μ˜€ ν™•μž₯자λ₯Ό κ°€μ§€λŠ”μ§€ 확인"""
23
  ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a'}
@@ -28,6 +32,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
28
  ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
29
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
30
 
 
31
  @app.route('/login', methods=['GET', 'POST'])
32
  def login():
33
  error = None
@@ -40,41 +45,37 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
40
  username = request.form.get('username', '')
41
  password = request.form.get('password', '')
42
  logger.info(f"μž…λ ₯된 μ‚¬μš©μžλͺ…: {username}")
43
- logger.info(f"λΉ„λ°€λ²ˆν˜Έ μž…λ ₯ μ—¬λΆ€: {len(password) > 0}")
44
 
45
- # ν™˜κ²½ λ³€μˆ˜ λ˜λŠ” κΈ°λ³Έκ°’κ³Ό 비ꡐ
46
  valid_username = ADMIN_USERNAME
47
  valid_password = ADMIN_PASSWORD
48
  logger.info(f"κ²€μ¦μš© μ‚¬μš©μžλͺ…: {valid_username}")
49
- logger.info(f"κ²€μ¦μš© λΉ„λ°€λ²ˆν˜Έ 쑴재 μ—¬λΆ€: {valid_password is not None and len(valid_password) > 0}")
50
 
51
  if username == valid_username and password == valid_password:
52
  logger.info(f"둜그인 성곡: {username}")
53
- # μ„Έμ…˜ μ„€μ • μ „ ν˜„μž¬ μ„Έμ…˜ μƒνƒœ λ‘œκΉ…
54
- logger.debug(f"μ„Έμ…˜ μ„€μ • μ „: {session}")
55
 
56
- # μ„Έμ…˜μ— 둜그인 정보 μ €μž₯
57
  session.permanent = True
58
  session['logged_in'] = True
59
  session['username'] = username
60
- session.modified = True
61
 
62
- logger.info(f"μ„Έμ…˜ μ„€μ • ν›„: {session}")
63
- logger.info("μ„Έμ…˜ μ„€μ • μ™„λ£Œ, λ¦¬λ””λ ‰μ…˜ μ‹œλ„")
64
 
65
- # 둜그인 성곡 ν›„ λ¦¬λ””λ ‰μ…˜
66
  redirect_to = next_url or url_for('index')
67
  logger.info(f"λ¦¬λ””λ ‰μ…˜ λŒ€μƒ: {redirect_to}")
68
  response = redirect(redirect_to)
 
 
69
  return response
70
  else:
71
  logger.warning("둜그인 μ‹€νŒ¨: 아이디 λ˜λŠ” λΉ„λ°€λ²ˆν˜Έ 뢈일치")
72
- if username != valid_username: logger.warning("μ‚¬μš©μžλͺ… 뢈일치")
73
- if password != valid_password: logger.warning("λΉ„λ°€λ²ˆν˜Έ 뢈일치")
74
  error = '아이디 λ˜λŠ” λΉ„λ°€λ²ˆν˜Έκ°€ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.'
75
- else:
76
  logger.info("둜그인 νŽ˜μ΄μ§€ GET μš”μ²­")
77
- if 'logged_in' in session:
78
  logger.info("이미 둜그인된 μ‚¬μš©μž, 메인 νŽ˜μ΄μ§€λ‘œ λ¦¬λ””λ ‰μ…˜")
79
  return redirect(url_for('index'))
80
 
@@ -85,40 +86,49 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
85
  @app.route('/logout')
86
  def logout():
87
  """λ‘œκ·Έμ•„μ›ƒ 처리"""
88
- if 'logged_in' in session:
89
- username = session.get('username', 'unknown')
90
- logger.info(f"μ‚¬μš©μž {username} λ‘œκ·Έμ•„μ›ƒ 처리 μ‹œμž‘")
91
- session.pop('logged_in', None)
92
- session.pop('username', None)
93
- session.modified = True
94
- logger.info(f"μ„Έμ…˜ 정보 μ‚­μ œ μ™„λ£Œ. ν˜„μž¬ μ„Έμ…˜: {session}")
95
  else:
96
- logger.warning("λ‘œκ·ΈμΈλ˜μ§€ μ•Šμ€ μƒνƒœμ—μ„œ λ‘œκ·Έμ•„μ›ƒ μ‹œλ„")
97
 
98
  logger.info("둜그인 νŽ˜μ΄μ§€λ‘œ λ¦¬λ””λ ‰μ…˜")
99
  response = redirect(url_for('login'))
 
 
100
  return response
101
 
102
-
103
  @app.route('/')
104
  @login_required
105
  def index():
106
  """메인 νŽ˜μ΄μ§€"""
107
- nonlocal app_ready
108
-
109
- # μ•± μ€€λΉ„ μƒνƒœ 확인 - 30초 이상 μ§€λ‚¬μœΌλ©΄ κ°•μ œλ‘œ ready μƒνƒœλ‘œ λ³€κ²½
110
- current_time = datetime.now()
111
- start_time = datetime.fromtimestamp(os.path.getmtime(__file__))
112
- time_diff = (current_time - start_time).total_seconds()
113
-
114
- if not app_ready and time_diff > 30:
115
- logger.warning(f"앱이 30초 이상 μ΄ˆκΈ°ν™” 쀑 μƒνƒœμž…λ‹ˆλ‹€. κ°•μ œλ‘œ ready μƒνƒœλ‘œ λ³€κ²½ν•©λ‹ˆλ‹€.")
116
- app_ready = True
117
-
118
- if not app_ready:
119
- logger.info("앱이 아직 μ€€λΉ„λ˜μ§€ μ•Šμ•„ λ‘œλ”© νŽ˜μ΄μ§€ ν‘œμ‹œ")
120
- return render_template('loading.html'), 503 # μ„œλΉ„μŠ€ μ€€λΉ„ μ•ˆλ¨ μƒνƒœ μ½”λ“œ
121
-
 
 
 
 
 
 
 
 
 
122
  logger.info("메인 νŽ˜μ΄μ§€ μš”μ²­")
123
  return render_template('index.html')
124
 
@@ -127,22 +137,31 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
127
  @login_required
128
  def app_status():
129
  """μ•± μ΄ˆκΈ°ν™” μƒνƒœ 확인 API"""
130
- logger.info(f"μ•± μƒνƒœ 확인 μš”μ²­: {'Ready' if app_ready else 'Not Ready'}")
131
- return jsonify({"ready": app_ready})
132
-
133
 
 
134
  @app.route('/api/llm', methods=['GET', 'POST'])
135
  @login_required
136
  def llm_api():
137
  """μ‚¬μš© κ°€λŠ₯ν•œ LLM λͺ©λ‘ 및 선택 API"""
138
- if not app_ready:
139
- return jsonify({"error": "앱이 아직 μ΄ˆκΈ°ν™” μ€‘μž…λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”."}), 503
 
 
 
140
 
141
  if request.method == 'GET':
142
  logger.info("LLM λͺ©λ‘ μš”μ²­")
143
  try:
144
- current_details = llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {"id": "unknown", "name": "Unknown"}
145
- supported_llms_dict = llm_interface.SUPPORTED_LLMS if hasattr(llm_interface, 'SUPPORTED_LLMS') else {}
 
 
 
 
 
146
  supported_list = [{
147
  "name": name, "id": id, "current": id == current_details.get("id")
148
  } for name, id in supported_llms_dict.items()]
@@ -152,7 +171,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
152
  "current_llm": current_details
153
  })
154
  except Exception as e:
155
- logger.error(f"LLM 정보 쑰회 였λ₯˜: {e}")
156
  return jsonify({"error": "LLM 정보 쑰회 쀑 였λ₯˜ λ°œμƒ"}), 500
157
 
158
  elif request.method == 'POST':
@@ -164,8 +183,10 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
164
  logger.info(f"LLM λ³€κ²½ μš”μ²­: {llm_id}")
165
 
166
  try:
167
- if not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients'):
168
- raise NotImplementedError("LLM μΈν„°νŽ˜μ΄μŠ€μ— ν•„μš”ν•œ λ©”μ†Œλ“œ/속성 μ—†μŒ")
 
 
169
 
170
  if llm_id not in llm_interface.llm_clients:
171
  return jsonify({"error": f"μ§€μ›λ˜μ§€ μ•ŠλŠ” LLM ID: {llm_id}"}), 400
@@ -186,18 +207,20 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
186
  logger.error(f"LLM λ³€κ²½ 처리 쀑 였λ₯˜: {e}", exc_info=True)
187
  return jsonify({"error": f"LLM λ³€κ²½ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"}), 500
188
 
189
-
190
  @app.route('/api/chat', methods=['POST'])
191
  @login_required
192
  def chat():
193
  """ν…μŠ€νŠΈ 기반 채봇 API"""
194
- # μˆ˜μ •λœ λΆ€λΆ„: app_ready 체크 μ œκ±°ν•˜κ³  retrieverκ°€ None일 λ•Œ 였λ₯˜ λŒ€μ‹  λ©”μ‹œμ§€ λ°˜ν™˜
195
- if retriever is None:
196
- logger.warning("μ±„νŒ… API μš”μ²­μ‹œ retrieverκ°€ Noneμž„")
 
197
  return jsonify({
198
- "answer": "μ£„μ†‘ν•©λ‹ˆλ‹€. 검색기가 아직 μ΄ˆκΈ°ν™” μ€‘μž…λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.",
199
- "sources": []
200
- })
 
201
 
202
  try:
203
  data = request.get_json()
@@ -208,22 +231,24 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
208
  logger.info(f"ν…μŠ€νŠΈ 쿼리 μˆ˜μ‹ : {query[:100]}...")
209
 
210
  # RAG 검색 μˆ˜ν–‰
211
- if not hasattr(retriever, 'search'):
212
- raise NotImplementedError("Retriever에 search λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
213
- search_results = retriever.search(query, top_k=5, first_stage_k=6)
214
 
215
  # μ»¨ν…μŠ€νŠΈ μ€€λΉ„
216
- if not hasattr(DocumentProcessor, 'prepare_rag_context'):
217
- raise NotImplementedError("DocumentProcessor에 prepare_rag_context λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
218
- context = DocumentProcessor.prepare_rag_context(search_results, field="text")
 
219
 
220
  if not context:
221
- logger.warning("검색 κ²°κ³Όκ°€ μ—†μ–΄ μ»¨ν…μŠ€νŠΈλ₯Ό μƒμ„±ν•˜μ§€ λͺ»ν•¨.")
 
 
222
 
223
  # LLM에 질의
224
- llm_id = data.get('llm_id', None)
225
- if not hasattr(llm_interface, 'rag_generate'):
226
- raise NotImplementedError("LLMInterface에 rag_generate λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
 
227
 
228
  if not context:
229
  answer = "μ£„μ†‘ν•©λ‹ˆλ‹€. κ΄€λ ¨ 정보λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
@@ -232,7 +257,7 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
232
  answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
233
  logger.info(f"LLM 응닡 생성 μ™„λ£Œ (길이: {len(answer)})")
234
 
235
- # μ†ŒμŠ€ 정보 μΆ”μΆœ (CSV ID μΆ”μΆœ 둜직 포함)
236
  sources = []
237
  if search_results:
238
  for result in search_results:
@@ -240,14 +265,21 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
240
  logger.warning(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 검색 κ²°κ³Ό ν˜•μ‹: {type(result)}")
241
  continue
242
 
243
- if "source" in result:
244
- source_info = {
245
- "source": result.get("source", "Unknown"),
246
- "score": result.get("rerank_score", result.get("score", 0))
247
- }
 
 
 
248
 
249
- # CSV 파일 νŠΉμ • 처리
250
- if "text" in result and result.get("filetype") == "csv":
 
 
 
 
251
  try:
252
  text_lines = result["text"].strip().split('\n')
253
  if text_lines:
@@ -255,9 +287,9 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
255
  if ',' in first_line:
256
  first_column = first_line.split(',')[0].strip()
257
  source_info["id"] = first_column
258
- logger.debug(f"CSV μ†ŒμŠ€ ID μΆ”μΆœ: {first_column} from {source_info['source']}")
259
  except Exception as e:
260
- logger.warning(f"CSV μ†ŒμŠ€ ID μΆ”μΆœ μ‹€νŒ¨ ({result.get('source')}): {e}")
261
 
262
  sources.append(source_info)
263
 
@@ -273,33 +305,27 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
273
  logger.error(f"μ±„νŒ… 처리 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
274
  return jsonify({"error": f"처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"}), 500
275
 
276
-
277
  @app.route('/api/voice', methods=['POST'])
278
  @login_required
279
  def voice_chat():
280
  """μŒμ„± μ±— API μ—”λ“œν¬μΈνŠΈ"""
281
- if not app_ready:
282
- logger.warning("μ•± μ΄ˆκΈ°ν™”κ°€ μ™„λ£Œλ˜μ§€ μ•Šμ•˜μ§€λ§Œ μŒμ„± API μš”μ²­ 처리 μ‹œλ„")
283
- # μ—¬κΈ°μ„œ λ°”λ‘œ λ¦¬ν„΄ν•˜μ§€ μ•Šκ³  계속 μ§„ν–‰
284
- # 사전 검사: retriever와 stt_clientκ°€ μ œλŒ€λ‘œ μ΄ˆκΈ°ν™”λ˜μ—ˆλŠ”μ§€ 확인
285
-
286
- if retriever is None:
287
- logger.error("retrieverκ°€ 아직 μ΄ˆκΈ°ν™”λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€")
288
- return jsonify({
289
- "transcription": "(μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν–ˆμ§€λ§Œ 검색 엔진이 아직 μ€€λΉ„λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€)",
290
- "answer": "μ£„μ†‘ν•©λ‹ˆλ‹€. 검색 엔진이 아직 μ΄ˆκΈ°ν™” μ€‘μž…λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.",
291
- "sources": []
292
- })
293
- # λ˜λŠ” ν•„μˆ˜ μ»΄ν¬λ„ŒνŠΈκ°€ 없을 λ•Œλ§Œ νŠΉλ³„ 응닡 λ°˜ν™˜
294
- if stt_client is None:
295
- return jsonify({
296
- "transcription": "(μŒμ„± 인식 κΈ°λŠ₯이 μ€€λΉ„ μ€‘μž…λ‹ˆλ‹€)",
297
- "answer": "μ£„μ†‘ν•©λ‹ˆλ‹€. ν˜„μž¬ μŒμ„± 인식 μ„œλΉ„μŠ€κ°€ μ΄ˆκΈ°ν™” μ€‘μž…λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.",
298
- "sources": []
299
- })
300
 
301
  logger.info("μŒμ„± μ±— μš”μ²­ μˆ˜μ‹ ")
302
-
303
  if 'audio' not in request.files:
304
  logger.error("μ˜€λ””μ˜€ 파일이 μ œκ³΅λ˜μ§€ μ•ŠμŒ")
305
  return jsonify({"error": "μ˜€λ””μ˜€ 파일이 μ œκ³΅λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}), 400
@@ -308,57 +334,40 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
308
  logger.info(f"μˆ˜μ‹ λœ μ˜€λ””μ˜€ 파일: {audio_file.filename} ({audio_file.content_type})")
309
 
310
  try:
311
- # μ˜€λ””μ˜€ 파일 처리
312
- # μž„μ‹œ 파일 μ‚¬μš© κ³ λ € (λ©”λͺ¨λ¦¬ λΆ€λ‹΄ 쀄이기 μœ„ν•΄)
313
- with tempfile.NamedTemporaryFile(delete=True) as temp_audio:
314
  audio_file.save(temp_audio.name)
315
  logger.info(f"μ˜€λ””μ˜€ νŒŒμΌμ„ μž„μ‹œ μ €μž₯: {temp_audio.name}")
316
- # VitoSTT.transcribe_audio κ°€ 파일 경둜 λ˜λŠ” λ°”μ΄νŠΈλ₯Ό 받을 수 μžˆλ„λ‘ κ΅¬ν˜„λ˜μ–΄μ•Ό 함
317
- # μ—¬κΈ°μ„œλŠ” 파일 경둜λ₯Ό μ‚¬μš©ν•œλ‹€κ³  κ°€μ •
318
- if not hasattr(stt_client, 'transcribe_audio'):
319
- raise NotImplementedError("STT ν΄λΌμ΄μ–ΈνŠΈμ— transcribe_audio λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
320
-
321
- # 파일 경둜둜 전달 μ‹œ
322
- # stt_result = stt_client.transcribe_audio(temp_audio.name, language="ko")
323
- # λ°”μ΄νŠΈλ‘œ 전달 μ‹œ
324
  with open(temp_audio.name, 'rb') as f_bytes:
325
  audio_bytes = f_bytes.read()
326
- stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
327
-
328
 
329
  if not isinstance(stt_result, dict) or not stt_result.get("success"):
330
  error_msg = stt_result.get("error", "μ•Œ 수 μ—†λŠ” STT 였λ₯˜") if isinstance(stt_result, dict) else "STT κ²°κ³Ό ν˜•μ‹ 였λ₯˜"
331
  logger.error(f"μŒμ„±μΈμ‹ μ‹€νŒ¨: {error_msg}")
332
- return jsonify({
333
- "error": "μŒμ„±μΈμ‹ μ‹€νŒ¨",
334
- "details": error_msg
335
- }), 500
336
 
337
  transcription = stt_result.get("text", "")
338
  if not transcription:
339
  logger.warning("μŒμ„±μΈμ‹ κ²°κ³Όκ°€ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
340
- return jsonify({"error": "μŒμ„±μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΈμ‹ν•˜μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.", "transcription": ""}), 400
 
 
 
 
 
 
341
 
342
  logger.info(f"μŒμ„±μΈμ‹ 성곡: {transcription[:50]}...")
343
- if retriever is None:
344
- logger.error("STT 성곡 ν›„ 검색 μ‹œλ„ 쀑 retrieverκ°€ Noneμž„")
345
- return jsonify({
346
- "transcription": transcription,
347
- "answer": "μŒμ„±μ„ μΈμ‹ν–ˆμ§€λ§Œ, ν˜„μž¬ 검색 μ‹œμŠ€ν…œμ΄ μ€€λΉ„λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.",
348
- "sources": []
349
- })
350
- # --- 이후 λ‘œμ§μ€ /api/chatκ³Ό 거의 동일 ---
351
- # RAG 검색 μˆ˜ν–‰
352
  search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
353
  context = DocumentProcessor.prepare_rag_context(search_results, field="text")
354
 
355
- if not context:
356
- logger.warning("μŒμ„± 쿼리에 λŒ€ν•œ 검색 κ²°κ³Ό μ—†μŒ.")
357
- # answer = "μ£„μ†‘ν•©λ‹ˆλ‹€. κ΄€λ ¨ 정보λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€." (μ•„λž˜ LLM 호좜 λ‘œμ§μ—μ„œ 처리)
358
- pass
359
-
360
- # LLM 호좜
361
- llm_id = request.form.get('llm_id', None) # μŒμ„± μš”μ²­μ€ form λ°μ΄ν„°λ‘œ LLM ID 받을 수 있음
362
  if not context:
363
  answer = "μ£„μ†‘ν•©λ‹ˆλ‹€. κ΄€λ ¨ 정보λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
364
  logger.info("μ»¨ν…μŠ€νŠΈ 없이 κΈ°λ³Έ 응닡 생성")
@@ -366,201 +375,243 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
366
  answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
367
  logger.info(f"LLM 응닡 생성 μ™„λ£Œ (길이: {len(answer)})")
368
 
369
-
370
- # μ†ŒμŠ€ 정보 μΆ”μΆœ
371
- enhanced_sources = []
372
  if search_results:
373
- for doc in search_results:
374
- if not isinstance(doc, dict): continue # A
375
- if "source" in doc:
376
- source_info = {
377
- "source": doc.get("source", "Unknown"),
378
- "score": doc.get("rerank_score", doc.get("score", 0))
379
- }
380
- if "text" in doc and doc.get("filetype") == "csv":
381
- try:
382
- text_lines = doc["text"].strip().split('\n')
383
- if text_lines:
384
- first_line = text_lines[0].strip()
385
- if ',' in first_line:
386
- first_column = first_line.split(',')[0].strip()
387
- source_info["id"] = first_column
388
- except Exception as e:
389
- logger.warning(f"[μŒμ„±μ±—] CSV μ†ŒμŠ€ ID μΆ”μΆœ μ‹€νŒ¨ ({doc.get('source')}): {e}")
390
- enhanced_sources.append(source_info)
 
 
 
 
 
391
 
392
  # μ΅œμ’… 응닡
393
  response_data = {
394
  "transcription": transcription,
395
  "answer": answer,
396
- "sources": enhanced_sources,
397
  "llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
398
  }
399
  return jsonify(response_data)
400
 
401
  except Exception as e:
402
  logger.error(f"μŒμ„± μ±— 처리 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
403
- return jsonify({
404
- "error": "μŒμ„± 처리 쀑 λ‚΄λΆ€ 였λ₯˜ λ°œμƒ",
405
- "details": str(e)
406
- }), 500
407
-
408
 
 
409
  @app.route('/api/upload', methods=['POST'])
410
  @login_required
411
  def upload_document():
412
  """μ§€μ‹λ² μ΄μŠ€ λ¬Έμ„œ μ—…λ‘œλ“œ API"""
413
- if not app_ready or base_retriever is None:
414
- return jsonify({"error": "μ•±/κΈ°λ³Έ 검색기가 아직 μ΄ˆκΈ°ν™” μ€‘μž…λ‹ˆλ‹€."}), 503
 
 
415
 
416
  if 'document' not in request.files:
417
  return jsonify({"error": "λ¬Έμ„œ 파일이 μ œκ³΅λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}), 400
418
 
419
  doc_file = request.files['document']
420
- if doc_file.filename == '':
421
  return jsonify({"error": "μ„ νƒλœ 파일이 μ—†μŠ΅λ‹ˆλ‹€."}), 400
422
 
423
  if not allowed_doc_file(doc_file.filename):
424
- logger.error(f"ν—ˆμš©λ˜μ§€ μ•ŠλŠ” 파일 ν˜•μ‹: {doc_file.filename}")
 
425
  return jsonify({"error": f"ν—ˆμš©λ˜μ§€ μ•ŠλŠ” 파일 ν˜•μ‹μž…λ‹ˆλ‹€. ν—ˆμš©: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
426
 
427
  try:
428
  filename = secure_filename(doc_file.filename)
429
- filepath = os.path.join(app.config['DATA_FOLDER'], filename)
 
 
 
 
430
  doc_file.save(filepath)
431
  logger.info(f"λ¬Έμ„œ μ €μž₯ μ™„λ£Œ: {filepath}")
432
 
433
- # λ¬Έμ„œ 처리 (인코딩 처리 포함)
434
- try:
435
- with open(filepath, 'r', encoding='utf-8') as f:
436
- content = f.read()
437
- except UnicodeDecodeError:
438
- logger.info(f"UTF-8 λ””μ½”λ”© μ‹€νŒ¨, CP949둜 μ‹œλ„: {filename}")
439
- try:
440
- with open(filepath, 'r', encoding='cp949') as f:
441
- content = f.read()
442
- except Exception as e_cp949:
443
- logger.error(f"CP949 λ””μ½”λ”© μ‹€νŒ¨ ({filename}): {e_cp949}")
444
- return jsonify({"error": "파일 인코딩을 읽을 수 μ—†μŠ΅λ‹ˆλ‹€ (UTF-8, CP949 μ‹œλ„ μ‹€νŒ¨)."}), 400
445
- except Exception as e_read:
446
- logger.error(f"파일 읽기 였λ₯˜ ({filename}): {e_read}")
447
- return jsonify({"error": f"파일 읽기 쀑 였λ₯˜ λ°œμƒ: {str(e_read)}"}), 500
448
-
449
-
450
- # 메타데이터 및 λ¬Έμ„œ λΆ„ν• /처리
451
- metadata = {
452
- "source": filename, "filename": filename,
453
- "filetype": filename.rsplit('.', 1)[1].lower(),
454
- "filepath": filepath
455
- }
456
- file_ext = metadata["filetype"]
457
  docs = []
458
 
459
- if not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
460
- raise NotImplementedError("DocumentProcessor에 ν•„μš”ν•œ λ©”μ†Œλ“œ μ—†μŒ")
461
-
462
- if file_ext == 'csv':
463
- logger.info(f"CSV 파일 처리 μ‹œμž‘: {filename}")
464
- docs = DocumentProcessor.csv_to_documents(content, metadata) # ν–‰ λ‹¨μœ„ 처리 κ°€μ •
465
- else: # 기타 ν…μŠ€νŠΈ 기반 λ¬Έμ„œ
466
- logger.info(f"일반 ν…μŠ€νŠΈ λ¬Έμ„œ 처리 μ‹œμž‘: {filename}")
467
- # PDF, DOCX 등은 별도 라이브러리(pypdf, python-docx) ν•„μš”
468
- if file_ext in ['pdf', 'docx']:
469
- logger.warning(f".{file_ext} 파일 μ²˜λ¦¬λŠ” ν˜„μž¬ κ΅¬ν˜„λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. ν…μŠ€νŠΈ μΆ”μΆœ 둜직 μΆ”κ°€ ν•„μš”.")
470
- # 여기에 pdf/docx ν…μŠ€νŠΈ μΆ”μΆœ 둜직 μΆ”κ°€
471
- # 예: content = extract_text_from_pdf(filepath)
472
- # content = extract_text_from_docx(filepath)
473
- # μž„μ‹œλ‘œ λΉ„μ›Œλ‘ 
474
- content = ""
475
-
476
- if content: # ν…μŠ€νŠΈ λ‚΄μš©μ΄ μžˆμ„ λ•Œλ§Œ 처리
 
 
 
 
 
 
 
 
 
 
 
 
 
477
  docs = DocumentProcessor.text_to_documents(
478
  content, metadata=metadata,
479
- chunk_size=512, chunk_overlap=50
480
  )
 
481
 
482
- # 검색기에 λ¬Έμ„œ μΆ”κ°€ 및 인덱슀 μ €μž₯
483
  if docs:
484
- if not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
485
- raise NotImplementedError("κΈ°λ³Έ 검색기에 add_documents λ˜λŠ” save λ©”μ†Œλ“œ μ—†μŒ")
486
-
487
  logger.info(f"{len(docs)}개 λ¬Έμ„œ 청크λ₯Ό 검색기에 μΆ”κ°€ν•©λ‹ˆλ‹€...")
488
  base_retriever.add_documents(docs)
489
 
490
- # 인덱슀 μ €μž₯ (μ—…λ‘œλ“œλ§ˆλ‹€ μ €μž₯ - λΉ„νš¨μœ¨μ μΌ 수 있음)
491
  logger.info(f"검색기 μƒνƒœλ₯Ό μ €μž₯ν•©λ‹ˆλ‹€...")
492
- index_path = app.config['INDEX_PATH']
 
493
  try:
494
  base_retriever.save(index_path)
495
  logger.info("인덱슀 μ €μž₯ μ™„λ£Œ")
496
- # μž¬μˆœμœ„ν™” 검색기도 μ—…λ°μ΄νŠΈ ν•„μš” μ‹œ 둜직 μΆ”κ°€
497
- # 예: retriever.update_base_retriever(base_retriever)
498
  return jsonify({
499
  "success": True,
500
  "message": f"파일 '{filename}' μ—…λ‘œλ“œ 및 처리 μ™„λ£Œ ({len(docs)}개 청크 μΆ”κ°€)."
501
  })
502
  except Exception as e_save:
503
- logger.error(f"인덱슀 μ €μž₯ 쀑 였λ₯˜ λ°œμƒ: {e_save}")
 
504
  return jsonify({"error": f"인덱슀 μ €μž₯ 쀑 였λ₯˜: {str(e_save)}"}), 500
505
  else:
506
  logger.warning(f"파일 '{filename}'μ—μ„œ μ²˜λ¦¬ν•  λ‚΄μš©μ΄ μ—†κ±°λ‚˜ μ§€μ›λ˜μ§€ μ•ŠλŠ” ν˜•μ‹μž…λ‹ˆλ‹€.")
507
- # νŒŒμΌμ€ μ €μž₯λ˜μ—ˆμœΌλ―€λ‘œ μ„±κ³΅μœΌλ‘œ κ°„μ£Όν• μ§€ κ²°μ • ν•„μš”
508
  return jsonify({
509
- "warning": True,
510
- "message": f"파일 '{filename}'이 μ €μž₯λ˜μ—ˆμ§€λ§Œ μ²˜λ¦¬ν•  λ‚΄μš©μ΄ μ—†μŠ΅λ‹ˆλ‹€."
511
  })
512
 
513
  except Exception as e:
514
  logger.error(f"파일 μ—…λ‘œλ“œ λ˜λŠ” 처리 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
 
 
 
 
515
  return jsonify({"error": f"파일 μ—…λ‘œλ“œ 쀑 였λ₯˜: {str(e)}"}), 500
516
 
517
-
518
  @app.route('/api/documents', methods=['GET'])
519
  @login_required
520
  def list_documents():
521
  """μ§€μ‹λ² μ΄μŠ€ λ¬Έμ„œ λͺ©λ‘ API"""
522
- # app_ready 검사 제거 - λ¬Έμ„œ λͺ©λ‘ λΆˆλŸ¬μ˜€κΈ°λŠ” 항상 ν—ˆμš©
 
 
 
 
 
 
 
523
  if base_retriever is None:
524
- logger.warning("λ¬Έμ„œ API μš”μ²­μ‹œ base_retrieverκ°€ Noneμž„")
 
525
  return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
 
 
 
 
 
 
 
 
 
 
526
 
527
  try:
528
  sources = {}
529
  total_chunks = 0
530
- # base_retriever.documents 와 같은 속성이 μ‹€μ œ ν΄λž˜μŠ€μ— μžˆλ‹€κ³  κ°€μ •
531
- if hasattr(base_retriever, 'documents') and base_retriever.documents:
532
- logger.info(f"총 {len(base_retriever.documents)}개 λ¬Έμ„œ μ²­ν¬μ—μ„œ μ†ŒμŠ€ λͺ©λ‘ 생성 쀑...")
533
- for doc in base_retriever.documents:
534
- # λ¬Έμ„œ 청크가 λ”•μ…”λ„ˆλ¦¬ ν˜•νƒœλΌκ³  κ°€μ •
535
- if not isinstance(doc, dict): continue
536
-
537
- source = doc.get("source", "unknown") # λ©”νƒ€λ°μ΄ν„°μ—μ„œ source κ°€μ Έμ˜€κΈ°
538
- if source == "unknown" and "metadata" in doc and isinstance(doc["metadata"], dict):
539
- source = doc["metadata"].get("source", "unknown") # Langchain Document ꡬ쑰 고렀
540
-
541
- if source != "unknown":
542
- if source in sources:
543
- sources[source]["chunks"] += 1
544
- else:
545
- # λ©”νƒ€λ°μ΄ν„°μ—μ„œ μΆ”κ°€ 정보 κ°€μ Έμ˜€κΈ°
546
- filename = doc.get("filename", source)
547
- filetype = doc.get("filetype", "unknown")
548
- if "metadata" in doc and isinstance(doc["metadata"], dict):
549
- filename = doc["metadata"].get("filename", filename)
550
- filetype = doc["metadata"].get("filetype", filetype)
551
-
552
- sources[source] = {
553
- "filename": filename,
554
- "chunks": 1,
555
- "filetype": filetype
556
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  total_chunks += 1
558
- else:
559
- logger.info("검색기에 λ¬Έμ„œκ°€ μ—†κ±°λ‚˜ documents 속성을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
 
560
 
561
- # λͺ©λ‘ ν˜•μ‹ λ³€ν™˜ 및 μ •λ ¬
562
  documents = [{"source": src, **info} for src, info in sources.items()]
563
- documents.sort(key=lambda x: x["chunks"], reverse=True)
564
 
565
  logger.info(f"λ¬Έμ„œ λͺ©λ‘ 쑰회 μ™„λ£Œ: {len(documents)}개 μ†ŒμŠ€ 파일, {total_chunks}개 청크")
566
  return jsonify({
@@ -570,5 +621,51 @@ def register_routes(app, login_required, llm_interface, retriever, stt_client, D
570
  })
571
 
572
  except Exception as e:
573
- logger.error(f"λ¬Έμ„œ λͺ©λ‘ 쑰회 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
574
- return jsonify({"error": f"λ¬Έμ„œ λͺ©λ‘ 쑰회 쀑 였λ₯˜: {str(e)}"}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ RAG 검색 챗봇 μ›Ή μ• ν”Œλ¦¬μΌ€μ΄μ…˜ - API 라우트 μ •μ˜ (μˆ˜μ • μ œμ•ˆ 포함)
3
  """
4
 
5
  import os
 
7
  import logging
8
  import tempfile
9
  import requests
10
+ import time # μ•± μ‹œμž‘ μ‹œκ°„ 기둝 μœ„ν•΄ μΆ”κ°€
11
  from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
12
  from datetime import datetime
13
  from werkzeug.utils import secure_filename
 
15
  # 둜거 κ°€μ Έμ˜€κΈ°
16
  logger = logging.getLogger(__name__)
17
 
18
+ # μ•± μ‹œμž‘ μ‹œκ°„ 기둝 (λͺ¨λ“ˆ λ‘œλ“œ μ‹œμ )
19
+ APP_START_TIME = time.time()
20
+
21
+ def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready_flag, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL):
22
  """Flask μ• ν”Œλ¦¬μΌ€μ΄μ…˜μ— κΈ°λ³Έ 라우트 등둝"""
23
+
24
+ # 헬퍼 ν•¨μˆ˜ (λ³€κ²½ μ—†μŒ)
25
  def allowed_audio_file(filename):
26
  """파일이 ν—ˆμš©λœ μ˜€λ””μ˜€ ν™•μž₯자λ₯Ό κ°€μ§€λŠ”μ§€ 확인"""
27
  ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a'}
 
32
  ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
33
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
34
 
35
+ # --- 둜그인/λ‘œκ·Έμ•„μ›ƒ 라우트 (큰 문제 μ—†μŒ, λ‘œκΉ… κ°•ν™”) ---
36
  @app.route('/login', methods=['GET', 'POST'])
37
  def login():
38
  error = None
 
45
  username = request.form.get('username', '')
46
  password = request.form.get('password', '')
47
  logger.info(f"μž…λ ₯된 μ‚¬μš©μžλͺ…: {username}")
48
+ # logger.info(f"λΉ„λ°€λ²ˆν˜Έ μž…λ ₯ μ—¬λΆ€: {len(password) > 0}") # μ‹€μ œ λΉ„λ°€λ²ˆν˜Έ λ‘œκΉ…μ€ λ³΄μ•ˆμƒ μ’‹μ§€ μ•ŠμŒ
49
 
 
50
  valid_username = ADMIN_USERNAME
51
  valid_password = ADMIN_PASSWORD
52
  logger.info(f"κ²€μ¦μš© μ‚¬μš©μžλͺ…: {valid_username}")
53
+ # logger.info(f"κ²€μ¦μš© λΉ„λ°€λ²ˆν˜Έ 쑴재 μ—¬λΆ€: {valid_password is not None and len(valid_password) > 0}")
54
 
55
  if username == valid_username and password == valid_password:
56
  logger.info(f"둜그인 성곡: {username}")
57
+ # logger.debug(f"μ„Έμ…˜ μ„€μ • μ „: {session}") # 디버그 레벨둜 λ³€κ²½
 
58
 
 
59
  session.permanent = True
60
  session['logged_in'] = True
61
  session['username'] = username
62
+ # session.modified = True # FlaskλŠ” μ„Έμ…˜ λ³€κ²½ μ‹œ μžλ™μœΌλ‘œ modified ν”Œλž˜κ·Έλ₯Ό μ„€μ •ν•˜λ―€λ‘œ λͺ…μ‹œμ  호좜 λΆˆν•„μš”
63
 
64
+ logger.info(f"μ„Έμ…˜ μ„€μ • μ™„λ£Œ: {session}")
 
65
 
 
66
  redirect_to = next_url or url_for('index')
67
  logger.info(f"λ¦¬λ””λ ‰μ…˜ λŒ€μƒ: {redirect_to}")
68
  response = redirect(redirect_to)
69
+ # μ„Έμ…˜ μΏ ν‚€κ°€ μ œλŒ€λ‘œ μ„€μ •λ˜λ„λ‘ 응닡 λ°˜ν™˜ μ „ 확인 (λ””λ²„κΉ…μš©)
70
+ logger.debug(f"둜그인 응닡 헀더 (Set-Cookie 확인): {response.headers.getlist('Set-Cookie')}")
71
  return response
72
  else:
73
  logger.warning("둜그인 μ‹€νŒ¨: 아이디 λ˜λŠ” λΉ„λ°€λ²ˆν˜Έ 뢈일치")
74
+ # μ‹€νŒ¨ 원인 상세 λ‘œκΉ…μ€ λ³΄μ•ˆ μœ„ν—˜ μ†Œμ§€κ°€ μžˆμœΌλ―€λ‘œ 주의
 
75
  error = '아이디 λ˜λŠ” λΉ„λ°€λ²ˆν˜Έκ°€ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.'
76
+ else: # GET μš”μ²­
77
  logger.info("둜그인 νŽ˜μ΄μ§€ GET μš”μ²­")
78
+ if session.get('logged_in'): # .get() μ‚¬μš©μ΄ 더 μ•ˆμ „
79
  logger.info("이미 둜그인된 μ‚¬μš©μž, 메인 νŽ˜μ΄μ§€λ‘œ λ¦¬λ””λ ‰μ…˜")
80
  return redirect(url_for('index'))
81
 
 
86
  @app.route('/logout')
87
  def logout():
88
  """λ‘œκ·Έμ•„μ›ƒ 처리"""
89
+ username = session.get('username', 'unknown') # λ¨Όμ € μ‚¬μš©μž 이름 κ°€μ Έμ˜€κΈ°
90
+ if session.pop('logged_in', None): # pop으둜 제거 μ‹œλ„ 및 성곡 μ—¬λΆ€ 확인
91
+ session.pop('username', None)
92
+ # session.modified = True # pop μ‚¬μš© μ‹œ μžλ™ 처리됨
93
+ logger.info(f"μ‚¬μš©μž {username} λ‘œκ·Έμ•„μ›ƒ 처리 μ™„λ£Œ. ν˜„μž¬ μ„Έμ…˜: {session}")
 
 
94
  else:
95
+ logger.warning("λ‘œκ·ΈμΈλ˜μ§€ μ•Šμ€ μƒνƒœμ—μ„œ λ‘œκ·Έμ•„μ›ƒ μ‹œλ„")
96
 
97
  logger.info("둜그인 νŽ˜μ΄μ§€λ‘œ λ¦¬λ””λ ‰μ…˜")
98
  response = redirect(url_for('login'))
99
+ # λ‘œκ·Έμ•„μ›ƒ μ‹œ μΏ ν‚€ μ‚­μ œ 확인 (λ””λ²„κΉ…μš©)
100
+ logger.debug(f"λ‘œκ·Έμ•„μ›ƒ 응닡 헀더 (Set-Cookie 확인): {response.headers.getlist('Set-Cookie')}")
101
  return response
102
 
103
+ # --- 메인 νŽ˜μ΄μ§€ 및 μƒνƒœ 확인 ---
104
  @app.route('/')
105
  @login_required
106
  def index():
107
  """메인 νŽ˜μ΄μ§€"""
108
+ # app_ready_flagλŠ” register_routes 호좜 μ‹œμ μ˜ κ°’μœΌλ‘œ 고정됨.
109
+ # μ‹€μ‹œκ°„ μƒνƒœλ₯Ό λ°˜μ˜ν•˜λ €λ©΄ app.py의 μ „μ—­ λ³€μˆ˜λ₯Ό 직접 μ°Έμ‘°ν•˜κ±°λ‚˜ λ‹€λ₯Έ 방법 ν•„μš”.
110
+ # μ—¬κΈ°μ„œλŠ” 전달받은 ν”Œλž˜κ·Έλ₯Ό μ‚¬μš©ν•œλ‹€κ³  κ°€μ •.
111
+
112
+ # !! μ€‘μš”: app_ready_flagλŠ” register_routes μ‹œμ μ˜ κ°’μž…λ‹ˆλ‹€.
113
+ # μ‹€μ‹œκ°„ μƒνƒœλ₯Ό 보렀면 app.py의 app_ready λ³€μˆ˜λ₯Ό 직접 μ°Έμ‘°ν•΄μ•Ό ν•©λ‹ˆλ‹€.
114
+ # 예: from app import app_ready (μˆœν™˜ μ°Έμ‘° 문제 없을 경우)
115
+ # μ—¬κΈ°μ„œλŠ” 일단 μ „λ‹¬λœ κ°’ μ‚¬μš©
116
+ is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag # Event 객체 λ˜λŠ” bool κ°€μ •
117
+
118
+ # μ•± μ‹œμž‘ ν›„ κ²½κ³Ό μ‹œκ°„ 계산 (파일 μˆ˜μ • μ‹œκ°„ λŒ€μ‹  μ‹€μ œ μ‹œμž‘ μ‹œκ°„ μ‚¬μš©)
119
+ time_elapsed = time.time() - APP_START_TIME
120
+
121
+ # 30초 κ°•μ œ Ready 둜직 제거 λ˜λŠ” μˆ˜μ • ꢌμž₯
122
+ # if not is_ready and time_elapsed > 30:
123
+ # logger.warning(f"앱이 {time_elapsed:.1f}초 이상 μ΄ˆκΈ°ν™” 쀑 μƒνƒœμž…λ‹ˆλ‹€. (κ°•μ œ Ready 둜직 λΉ„ν™œμ„±ν™”λ¨)")
124
+ # app_ready = True # μ „μ—­ λ³€μˆ˜λ₯Ό 직접 μˆ˜μ •ν•΄μ•Ό 함
125
+
126
+ if not is_ready:
127
+ logger.info(f"앱이 아직 μ€€λΉ„λ˜μ§€ μ•Šμ•„ λ‘œλ”© νŽ˜μ΄μ§€ ν‘œμ‹œ (κ²½κ³Ό μ‹œκ°„: {time_elapsed:.1f}초)")
128
+ # 503 λŒ€μ‹  λ‘œλ”© νŽ˜μ΄μ§€λ₯Ό μ •μƒμ μœΌλ‘œ λ³΄μ—¬μ£ΌλŠ” 것이 μ‚¬μš©μž κ²½ν—˜μ— 더 쒋을 수 있음
129
+ return render_template('loading.html') # 503 λŒ€μ‹  200 OK와 λ‘œλ”© νŽ˜μ΄μ§€
130
+ # return render_template('loading.html'), 503 # 기쑴 둜직
131
+
132
  logger.info("메인 νŽ˜μ΄μ§€ μš”μ²­")
133
  return render_template('index.html')
134
 
 
137
  @login_required
138
  def app_status():
139
  """μ•± μ΄ˆκΈ°ν™” μƒνƒœ 확인 API"""
140
+ is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
141
+ logger.info(f"μ•± μƒνƒœ 확인 μš”μ²­: {'Ready' if is_ready else 'Not Ready'}")
142
+ return jsonify({"ready": is_ready})
143
 
144
+ # --- LLM API (큰 문제 μ—†μ–΄ λ³΄μž„, λ°©μ–΄ μ½”λ“œ μΆ”κ°€) ---
145
  @app.route('/api/llm', methods=['GET', 'POST'])
146
  @login_required
147
  def llm_api():
148
  """μ‚¬μš© κ°€λŠ₯ν•œ LLM λͺ©λ‘ 및 선택 API"""
149
+ is_ready = app_ready_flag.is_set() if isinstance(app_ready_flag, threading.Event) else app_ready_flag
150
+ if not is_ready:
151
+ # LLM APIλŠ” μ΄ˆκΈ°ν™” 쀑이어도 λͺ©λ‘ μ‘°νšŒλŠ” κ°€λŠ₯ν•˜κ²Œ ν•  수 있음 (선택적)
152
+ # return jsonify({"error": "앱이 아직 μ΄ˆκΈ°ν™” μ€‘μž…λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”."}), 503
153
+ pass # 일단 μ§„ν–‰ ν—ˆμš©
154
 
155
  if request.method == 'GET':
156
  logger.info("LLM λͺ©λ‘ μš”μ²­")
157
  try:
158
+ # llm_interface 객체 쑴재 및 속성 확인 κ°•ν™”
159
+ if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
160
+ logger.error("LLM μΈν„°νŽ˜μ΄μŠ€κ°€ μ€€λΉ„λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ ν•„μš”ν•œ 속성이 μ—†μŠ΅λ‹ˆλ‹€.")
161
+ return jsonify({"error": "LLM μΈν„°νŽ˜μ΄μŠ€ 였λ₯˜"}), 500
162
+
163
+ current_details = llm_interface.get_current_llm_details()
164
+ supported_llms_dict = llm_interface.SUPPORTED_LLMS
165
  supported_list = [{
166
  "name": name, "id": id, "current": id == current_details.get("id")
167
  } for name, id in supported_llms_dict.items()]
 
171
  "current_llm": current_details
172
  })
173
  except Exception as e:
174
+ logger.error(f"LLM 정보 쑰회 였λ₯˜: {e}", exc_info=True) # exc_info μΆ”κ°€
175
  return jsonify({"error": "LLM 정보 쑰회 쀑 였λ₯˜ λ°œμƒ"}), 500
176
 
177
  elif request.method == 'POST':
 
183
  logger.info(f"LLM λ³€κ²½ μš”μ²­: {llm_id}")
184
 
185
  try:
186
+ # llm_interface 객체 쑴재 및 속성 확인 κ°•ν™”
187
+ if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
188
+ logger.error("LLM μΈν„°νŽ˜μ΄μŠ€κ°€ μ€€λΉ„λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ ν•„μš”ν•œ 속성/λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
189
+ return jsonify({"error": "LLM μΈν„°νŽ˜μ΄μŠ€ 였λ₯˜"}), 500
190
 
191
  if llm_id not in llm_interface.llm_clients:
192
  return jsonify({"error": f"μ§€μ›λ˜μ§€ μ•ŠλŠ” LLM ID: {llm_id}"}), 400
 
207
  logger.error(f"LLM λ³€κ²½ 처리 쀑 였λ₯˜: {e}", exc_info=True)
208
  return jsonify({"error": f"LLM λ³€κ²½ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"}), 500
209
 
210
+ # --- Chat API (retriever None 체크 μˆ˜μ •) ---
211
  @app.route('/api/chat', methods=['POST'])
212
  @login_required
213
  def chat():
214
  """ν…μŠ€νŠΈ 기반 채봇 API"""
215
+ # retriever 객체가 None인지, 그리고 search λ©”μ†Œλ“œκ°€ μžˆλŠ”μ§€ 확인
216
+ if retriever is None or not hasattr(retriever, 'search'):
217
+ logger.warning("μ±„νŒ… API μš”μ²­ μ‹œ retrieverκ°€ μ€€λΉ„λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ search λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
218
+ # 503 λŒ€μ‹  μ‚¬μš©μž μΉœν™”μ μΈ λ©”μ‹œμ§€ λ°˜ν™˜
219
  return jsonify({
220
+ "answer": "μ£„μ†‘ν•©λ‹ˆλ‹€. 검색 엔진이 아직 μ€€λΉ„λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.",
221
+ "sources": [],
222
+ "error": "Retriever not ready" # ν΄λΌμ΄μ–ΈνŠΈμ—μ„œ ꡬ뢄할 수 μžˆλ„λ‘ μΆ”κ°€
223
+ }), 200 # λ˜λŠ” 503
224
 
225
  try:
226
  data = request.get_json()
 
231
  logger.info(f"ν…μŠ€νŠΈ 쿼리 μˆ˜μ‹ : {query[:100]}...")
232
 
233
  # RAG 검색 μˆ˜ν–‰
234
+ search_results = retriever.search(query, top_k=5, first_stage_k=6) # first_stage_kοΏ½οΏ½οΏ½ base_retriever에 전달될 수 있음
 
 
235
 
236
  # μ»¨ν…μŠ€νŠΈ μ€€λΉ„
237
+ if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
238
+ logger.error("DocumentProcessorκ°€ μ€€λΉ„λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ prepare_rag_context λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
239
+ return jsonify({"error": "λ¬Έμ„œ 처리기 였λ₯˜"}), 500
240
+ context = DocumentProcessor.prepare_rag_context(search_results, field="text") # 'text' ν•„λ“œκ°€ μžˆλ‹€κ³  κ°€μ •
241
 
242
  if not context:
243
+ logger.warning(f"쿼리 '{query[:50]}...'에 λŒ€ν•œ 검색 κ²°κ³Ό μ—†μŒ.")
244
+ # μ»¨ν…μŠ€νŠΈ 없이 LLM 호좜 μ‹œλ„ λ˜λŠ” κΈ°λ³Έ 응닡 λ°˜ν™˜ κ²°μ • ν•„μš”
245
+ # μ—¬κΈ°μ„œλŠ” LLM 호좜 λ‘œμ§μ—μ„œ μ²˜λ¦¬ν•˜λ„λ‘ 함
246
 
247
  # LLM에 질의
248
+ llm_id = data.get('llm_id', None) # μš”μ²­μ—μ„œ llm_id κ°€μ Έμ˜€κΈ°
249
+ if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
250
+ logger.error("LLM μΈν„°νŽ˜μ΄μŠ€κ°€ μ€€λΉ„λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ rag_generate λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
251
+ return jsonify({"error": "LLM μΈν„°νŽ˜μ΄μŠ€ 였λ₯˜"}), 500
252
 
253
  if not context:
254
  answer = "μ£„μ†‘ν•©λ‹ˆλ‹€. κ΄€λ ¨ 정보λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
 
257
  answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
258
  logger.info(f"LLM 응닡 생성 μ™„λ£Œ (길이: {len(answer)})")
259
 
260
+ # μ†ŒμŠ€ 정보 μΆ”μΆœ (κΈ°μ‘΄ 둜직 μœ μ§€, λ°©μ–΄ μ½”λ“œ κ°•ν™”)
261
  sources = []
262
  if search_results:
263
  for result in search_results:
 
265
  logger.warning(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 검색 κ²°κ³Ό ν˜•μ‹: {type(result)}")
266
  continue
267
 
268
+ source_info = {}
269
+ source_key = result.get("source") # Langchain Document ν˜Έν™˜μ„± μœ„ν•΄ metadata도 확인
270
+ if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
271
+ source_key = result["metadata"].get("source")
272
+
273
+ if source_key:
274
+ source_info["source"] = source_key
275
+ source_info["score"] = result.get("rerank_score", result.get("score", 0))
276
 
277
+ # CSV ID μΆ”μΆœ 둜직
278
+ filetype = result.get("filetype")
279
+ if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
280
+ filetype = result["metadata"].get("filetype")
281
+
282
+ if "text" in result and filetype == "csv":
283
  try:
284
  text_lines = result["text"].strip().split('\n')
285
  if text_lines:
 
287
  if ',' in first_line:
288
  first_column = first_line.split(',')[0].strip()
289
  source_info["id"] = first_column
290
+ # logger.debug(f"CSV μ†ŒμŠ€ ID μΆ”μΆœ: {first_column} from {source_info['source']}")
291
  except Exception as e:
292
+ logger.warning(f"CSV μ†ŒμŠ€ ID μΆ”μΆœ μ‹€νŒ¨ ({source_info.get('source')}): {e}")
293
 
294
  sources.append(source_info)
295
 
 
305
  logger.error(f"μ±„νŒ… 처리 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
306
  return jsonify({"error": f"처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"}), 500
307
 
308
+ # --- Voice Chat API (retriever, stt_client None 체크 κ°•ν™”) ---
309
  @app.route('/api/voice', methods=['POST'])
310
  @login_required
311
  def voice_chat():
312
  """μŒμ„± μ±— API μ—”λ“œν¬μΈνŠΈ"""
313
+ # ν•„μˆ˜ μ»΄ν¬λ„ŒνŠΈ 확인
314
+ if retriever is None or not hasattr(retriever, 'search'):
315
+ logger.error("μŒμ„± API μš”μ²­ μ‹œ retrieverκ°€ μ€€λΉ„λ˜μ§€ μ•ŠμŒ")
316
+ return jsonify({"error": "검색 μ—”μ§„ μ€€λΉ„ μ•ˆλ¨"}), 503
317
+ if stt_client is None or not hasattr(stt_client, 'transcribe_audio'):
318
+ logger.error("μŒμ„± API μš”μ²­ μ‹œ STT ν΄λΌμ΄μ–ΈνŠΈκ°€ μ€€λΉ„λ˜μ§€ μ•ŠμŒ")
319
+ return jsonify({"error": "μŒμ„± 인식 μ„œλΉ„μŠ€ μ€€λΉ„ μ•ˆλ¨"}), 503
320
+ if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
321
+ logger.error("μŒμ„± API μš”μ²­ μ‹œ LLM μΈν„°νŽ˜μ΄μŠ€κ°€ μ€€λΉ„λ˜μ§€ μ•ŠμŒ")
322
+ return jsonify({"error": "LLM μΈν„°νŽ˜μ΄μŠ€ 였λ₯˜"}), 500
323
+ if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
324
+ logger.error("μŒμ„± API μš”μ²­ μ‹œ DocumentProcessorκ°€ μ€€λΉ„λ˜μ§€ μ•ŠμŒ")
325
+ return jsonify({"error": "λ¬Έμ„œ 처리기 였λ₯˜"}), 500
 
 
 
 
 
 
326
 
327
  logger.info("μŒμ„± μ±— μš”μ²­ μˆ˜μ‹ ")
328
+
329
  if 'audio' not in request.files:
330
  logger.error("μ˜€λ””μ˜€ 파일이 μ œκ³΅λ˜μ§€ μ•ŠμŒ")
331
  return jsonify({"error": "μ˜€λ””μ˜€ 파일이 μ œκ³΅λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}), 400
 
334
  logger.info(f"μˆ˜μ‹ λœ μ˜€λ””μ˜€ 파일: {audio_file.filename} ({audio_file.content_type})")
335
 
336
  try:
337
+ # μ˜€λ””μ˜€ 파일 μž„μ‹œ μ €μž₯ 및 처리
338
+ with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
 
339
  audio_file.save(temp_audio.name)
340
  logger.info(f"μ˜€λ””μ˜€ νŒŒμΌμ„ μž„μ‹œ μ €μž₯: {temp_audio.name}")
341
+
342
+ # STT μˆ˜ν–‰ (λ°”μ΄νŠΈ λ˜λŠ” 경둜 전달)
343
+ # 예: λ°”μ΄νŠΈ 전달
 
 
 
 
 
344
  with open(temp_audio.name, 'rb') as f_bytes:
345
  audio_bytes = f_bytes.read()
346
+ stt_result = stt_client.transcribe_audio(audio_bytes, language="ko") # VitoSTTκ°€ λ°”μ΄νŠΈλ₯Ό λ°›λŠ”λ‹€κ³  κ°€μ •
 
347
 
348
  if not isinstance(stt_result, dict) or not stt_result.get("success"):
349
  error_msg = stt_result.get("error", "μ•Œ 수 μ—†λŠ” STT 였λ₯˜") if isinstance(stt_result, dict) else "STT κ²°κ³Ό ν˜•μ‹ 였λ₯˜"
350
  logger.error(f"μŒμ„±μΈμ‹ μ‹€νŒ¨: {error_msg}")
351
+ return jsonify({"error": "μŒμ„±μΈμ‹ μ‹€νŒ¨", "details": error_msg}), 500
 
 
 
352
 
353
  transcription = stt_result.get("text", "")
354
  if not transcription:
355
  logger.warning("μŒμ„±μΈμ‹ κ²°κ³Όκ°€ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
356
+ # 빈 ν…μŠ€νŠΈλΌλ„ 응닡 κ΅¬μ‘°λŠ” μœ μ§€
357
+ return jsonify({
358
+ "transcription": "",
359
+ "answer": "μŒμ„±μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΈμ‹ν•˜μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.",
360
+ "sources": [],
361
+ "llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
362
+ }), 200 # 400 λŒ€μ‹  200 OK와 λ©”μ‹œμ§€
363
 
364
  logger.info(f"μŒμ„±μΈμ‹ 성곡: {transcription[:50]}...")
365
+
366
+ # --- 이후 λ‘œμ§μ€ /api/chatκ³Ό 동일 ---
 
 
 
 
 
 
 
367
  search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
368
  context = DocumentProcessor.prepare_rag_context(search_results, field="text")
369
 
370
+ llm_id = request.form.get('llm_id', None) # form λ°μ΄ν„°μ—μ„œ llm_id κ°€μ Έμ˜€κΈ°
 
 
 
 
 
 
371
  if not context:
372
  answer = "μ£„μ†‘ν•©λ‹ˆλ‹€. κ΄€λ ¨ 정보λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
373
  logger.info("μ»¨ν…μŠ€νŠΈ 없이 κΈ°λ³Έ 응닡 생성")
 
375
  answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
376
  logger.info(f"LLM 응닡 생성 μ™„λ£Œ (길이: {len(answer)})")
377
 
378
+ # μ†ŒμŠ€ 정보 μΆ”μΆœ (chat API와 동일 둜직 μ‚¬μš©)
379
+ sources = []
 
380
  if search_results:
381
+ for result in search_results:
382
+ if not isinstance(result, dict): continue
383
+ source_info = {}
384
+ source_key = result.get("source")
385
+ if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
386
+ source_key = result["metadata"].get("source")
387
+ if source_key:
388
+ source_info["source"] = source_key
389
+ source_info["score"] = result.get("rerank_score", result.get("score", 0))
390
+ filetype = result.get("filetype")
391
+ if not filetype and "metadata" in result and isinstance(result["metadata"], dict):
392
+ filetype = result["metadata"].get("filetype")
393
+ if "text" in result and filetype == "csv":
394
+ try:
395
+ text_lines = result["text"].strip().split('\n')
396
+ if text_lines:
397
+ first_line = text_lines[0].strip()
398
+ if ',' in first_line:
399
+ first_column = first_line.split(',')[0].strip()
400
+ source_info["id"] = first_column
401
+ except Exception as e:
402
+ logger.warning(f"[μŒμ„±μ±—] CSV μ†ŒμŠ€ ID μΆ”μΆœ μ‹€νŒ¨ ({source_info.get('source')}): {e}")
403
+ sources.append(source_info)
404
 
405
  # μ΅œμ’… 응닡
406
  response_data = {
407
  "transcription": transcription,
408
  "answer": answer,
409
+ "sources": sources,
410
  "llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
411
  }
412
  return jsonify(response_data)
413
 
414
  except Exception as e:
415
  logger.error(f"μŒμ„± μ±— 처리 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
416
+ return jsonify({"error": "μŒμ„± 처리 쀑 λ‚΄λΆ€ 였λ₯˜ λ°œμƒ", "details": str(e)}), 500
 
 
 
 
417
 
418
+ # --- Document Upload API (base_retriever None 체크 κ°•ν™”) ---
419
  @app.route('/api/upload', methods=['POST'])
420
  @login_required
421
  def upload_document():
422
  """μ§€μ‹λ² μ΄μŠ€ λ¬Έμ„œ μ—…λ‘œλ“œ API"""
423
+ # base_retriever 객체 및 ν•„μˆ˜ λ©”μ†Œλ“œ 확인
424
+ if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
425
+ logger.error("λ¬Έμ„œ μ—…λ‘œλ“œ API μš”μ²­ μ‹œ base_retrieverκ°€ μ€€λΉ„λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ ν•„μˆ˜ λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
426
+ return jsonify({"error": "κΈ°λ³Έ 검색기가 μ€€λΉ„λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}), 503
427
 
428
  if 'document' not in request.files:
429
  return jsonify({"error": "λ¬Έμ„œ 파일이 μ œκ³΅λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}), 400
430
 
431
  doc_file = request.files['document']
432
+ if not doc_file or not doc_file.filename: # 파일 쑴재 및 파일λͺ… 확인
433
  return jsonify({"error": "μ„ νƒλœ 파일이 μ—†μŠ΅λ‹ˆλ‹€."}), 400
434
 
435
  if not allowed_doc_file(doc_file.filename):
436
+ ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'} # μ—¬κΈ°μ„œ λ‹€μ‹œ μ •μ˜ ν•„μš”
437
+ logger.warning(f"ν—ˆμš©λ˜μ§€ μ•ŠλŠ” 파일 ν˜•μ‹: {doc_file.filename}")
438
  return jsonify({"error": f"ν—ˆμš©λ˜μ§€ μ•ŠλŠ” 파일 ν˜•μ‹μž…λ‹ˆλ‹€. ν—ˆμš©: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
439
 
440
  try:
441
  filename = secure_filename(doc_file.filename)
442
+ # DATA_FOLDERκ°€ app.config에 μ„€μ •λ˜μ–΄ μžˆλ‹€κ³  κ°€μ •
443
+ data_folder = app.config.get('DATA_FOLDER', os.path.join(os.path.dirname(__file__), '..', 'data')) # κΈ°λ³Έκ°’ μ„€μ •
444
+ os.makedirs(data_folder, exist_ok=True) # 폴더 μ—†μœΌλ©΄ 생성
445
+ filepath = os.path.join(data_folder, filename)
446
+
447
  doc_file.save(filepath)
448
  logger.info(f"λ¬Έμ„œ μ €μž₯ μ™„λ£Œ: {filepath}")
449
 
450
+ # λ¬Έμ„œ 처리 (DocumentProcessor 객체 및 λ©”μ†Œλ“œ 확인)
451
+ if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
452
+ logger.error("DocumentProcessorκ°€ μ€€λΉ„λ˜μ§€ μ•Šμ•˜κ±°λ‚˜ ν•„μš”ν•œ λ©”μ†Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
453
+ # 이미 μ €μž₯된 파일 μ‚­μ œ κ³ λ €
454
+ try: os.remove(filepath)
455
+ except OSError: pass
456
+ return jsonify({"error": "λ¬Έμ„œ 처리기 였λ₯˜"}), 500
457
+
458
+ content = None
459
+ file_ext = filename.rsplit('.', 1)[1].lower()
460
+ metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  docs = []
462
 
463
+ # ν…μŠ€νŠΈ 기반 파일 읽기 (인코딩 처리 포함)
464
+ if file_ext in ['txt', 'md', 'csv']:
465
+ try:
466
+ with open(filepath, 'r', encoding='utf-8') as f:
467
+ content = f.read()
468
+ except UnicodeDecodeError:
469
+ logger.info(f"UTF-8 λ””μ½”λ”© μ‹€νŒ¨, CP949둜 μ‹œλ„: {filename}")
470
+ try:
471
+ with open(filepath, 'r', encoding='cp949') as f:
472
+ content = f.read()
473
+ except Exception as e_cp949:
474
+ logger.error(f"CP949 λ””μ½”λ”© μ‹€νŒ¨ ({filename}): {e_cp949}")
475
+ return jsonify({"error": "파일 인코딩을 읽을 수 μ—†μŠ΅λ‹ˆλ‹€ (UTF-8, CP949 μ‹œλ„ μ‹€νŒ¨)."}), 400
476
+ except Exception as e_read:
477
+ logger.error(f"파일 읽기 였λ₯˜ ({filename}): {e_read}")
478
+ return jsonify({"error": f"파일 읽기 쀑 였λ₯˜ λ°œμƒ: {str(e_read)}"}), 500
479
+ # PDF/DOCX 처리 둜직 (별도 라이브러리 ν•„μš”)
480
+ elif file_ext == 'pdf':
481
+ logger.warning("PDF μ²˜λ¦¬λŠ” κ΅¬ν˜„λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
482
+ # content = extract_text_from_pdf(filepath) # μ˜ˆμ‹œ
483
+ elif file_ext == 'docx':
484
+ logger.warning("DOCX μ²˜λ¦¬λŠ” κ΅¬ν˜„λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
485
+ # content = extract_text_from_docx(filepath) # μ˜ˆμ‹œ
486
+
487
+ # λ¬Έμ„œ λΆ„ν• /처리
488
+ if content is not None: # λ‚΄μš©μ΄ μ„±κ³΅μ μœΌλ‘œ μ½ν˜”μ„ λ•Œλ§Œ
489
+ if file_ext == 'csv':
490
+ logger.info(f"CSV 파일 처리 μ‹œμž‘: {filename}")
491
+ docs = DocumentProcessor.csv_to_documents(content, metadata)
492
+ elif file_ext in ['txt', 'md']: # 기타 ν…μŠ€νŠΈ
493
+ logger.info(f"ν…μŠ€νŠΈ λ¬Έμ„œ 처리 μ‹œμž‘: {filename}")
494
  docs = DocumentProcessor.text_to_documents(
495
  content, metadata=metadata,
496
+ chunk_size=512, chunk_overlap=50 # μ„€μ •κ°’ μ‚¬μš©
497
  )
498
+ # PDF/DOCXμ—μ„œ μΆ”μΆœλœ content 처리 둜직 μΆ”κ°€ κ°€λŠ₯
499
 
500
+ # 검색기에 μΆ”κ°€ 및 μ €μž₯
501
  if docs:
 
 
 
502
  logger.info(f"{len(docs)}개 λ¬Έμ„œ 청크λ₯Ό 검색기에 μΆ”κ°€ν•©λ‹ˆλ‹€...")
503
  base_retriever.add_documents(docs)
504
 
 
505
  logger.info(f"검색기 μƒνƒœλ₯Ό μ €μž₯ν•©λ‹ˆλ‹€...")
506
+ index_path = app.config.get('INDEX_PATH', os.path.join(data_folder, 'index')) # κΈ°λ³Έκ°’ μ„€μ •
507
+ os.makedirs(os.path.dirname(index_path), exist_ok=True) # 인덱슀 폴더 μ—†μœΌλ©΄ 생성
508
  try:
509
  base_retriever.save(index_path)
510
  logger.info("인덱슀 μ €μž₯ μ™„λ£Œ")
511
+ # TODO: μž¬μˆœμœ„ν™” 검색기(retriever) μ—…λ°μ΄νŠΈ 둜직 ν•„μš” μ‹œ μΆ”κ°€
 
512
  return jsonify({
513
  "success": True,
514
  "message": f"파일 '{filename}' μ—…λ‘œλ“œ 및 처리 μ™„λ£Œ ({len(docs)}개 청크 μΆ”κ°€)."
515
  })
516
  except Exception as e_save:
517
+ logger.error(f"인덱슀 μ €μž₯ 쀑 였λ₯˜ λ°œμƒ: {e_save}", exc_info=True)
518
+ # μ €μž₯ μ‹€νŒ¨ μ‹œ μΆ”κ°€λœ λ¬Έμ„œ λ‘€λ°± κ³ λ €?
519
  return jsonify({"error": f"인덱슀 μ €μž₯ 쀑 였λ₯˜: {str(e_save)}"}), 500
520
  else:
521
  logger.warning(f"파일 '{filename}'μ—μ„œ μ²˜λ¦¬ν•  λ‚΄μš©μ΄ μ—†κ±°λ‚˜ μ§€μ›λ˜μ§€ μ•ŠλŠ” ν˜•μ‹μž…λ‹ˆλ‹€.")
522
+ # νŒŒμΌμ€ μ €μž₯λ˜μ—ˆμœΌλ―€λ‘œ warning λ°˜ν™˜
523
  return jsonify({
524
+ "warning": True, # 'success' λŒ€μ‹  'warning' μ‚¬μš©
525
+ "message": f"파일 '{filename}'이 μ €μž₯λ˜μ—ˆμ§€λ§Œ μ²˜λ¦¬ν•  λ‚΄μš©μ΄ μ—†κ±°λ‚˜ μ§€μ›λ˜μ§€ μ•ŠλŠ” ν˜•μ‹μž…λ‹ˆλ‹€."
526
  })
527
 
528
  except Exception as e:
529
  logger.error(f"파일 μ—…λ‘œλ“œ λ˜λŠ” 처리 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
530
+ # 였λ₯˜ λ°œμƒ μ‹œ μ €μž₯된 파일 μ‚­μ œ κ³ λ €
531
+ if 'filepath' in locals() and os.path.exists(filepath):
532
+ try: os.remove(filepath)
533
+ except OSError: pass
534
  return jsonify({"error": f"파일 μ—…λ‘œλ“œ 쀑 였λ₯˜: {str(e)}"}), 500
535
 
536
+ # --- Document List API (였λ₯˜ 원인 뢄석 ν•„μš”) ---
537
  @app.route('/api/documents', methods=['GET'])
538
  @login_required
539
  def list_documents():
540
  """μ§€μ‹λ² μ΄μŠ€ λ¬Έμ„œ λͺ©λ‘ API"""
541
+ # !! μ€‘μš”: 이 APIκ°€ 503을 λ°˜ν™˜ν•˜λŠ” 원인을 μ°Ύμ•„μ•Ό 함 !!
542
+ # ν˜„μž¬ μ½”λ“œ μƒμœΌλ‘œλŠ” base_retrieverκ°€ None일 λ•Œ 503이 μ•„λ‹Œ 빈 λͺ©λ‘μ„ λ°˜ν™˜ν•¨.
543
+ # 503 였λ₯˜λŠ” 이 ν•¨μˆ˜ μ‹€ν–‰ *μ „* 단계(예: λ‹€λ₯Έ λ°μ½”λ ˆμ΄ν„°, 미듀웨어, Flask λ‚΄λΆ€ 였λ₯˜)
544
+ # λ˜λŠ” base_retriever μ ‘κ·Ό μ‹œ λ°œμƒν•˜λŠ” μ˜ˆμ™Έ 처리 κ³Όμ •μ—μ„œ λ‚˜μ˜¬ κ°€λŠ₯μ„± 있음.
545
+
546
+ logger.info("λ¬Έμ„œ λͺ©λ‘ API μš”μ²­ μ‹œμž‘") # 둜그 μΆ”κ°€
547
+
548
+ # base_retriever μƒνƒœ 상세 λ‘œκΉ…
549
  if base_retriever is None:
550
+ logger.warning("λ¬Έμ„œ API μš”μ²­ μ‹œ base_retrieverκ°€ Noneμž…λ‹ˆλ‹€.")
551
+ # 503 λŒ€μ‹  빈 λͺ©λ‘ λ°˜ν™˜ (μ˜λ„λœ λ™μž‘)
552
  return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
553
+ elif not hasattr(base_retriever, 'documents'):
554
+ logger.warning("λ¬Έμ„œ API μš”μ²­ μ‹œ base_retriever에 'documents' 속성이 μ—†μŠ΅λ‹ˆλ‹€.")
555
+ return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
556
+
557
+ logger.info(f"base_retriever 객체 νƒ€μž…: {type(base_retriever)}")
558
+ logger.info(f"base_retriever.documents 쑴재 μ—¬λΆ€: {hasattr(base_retriever, 'documents')}")
559
+ if hasattr(base_retriever, 'documents'):
560
+ logger.info(f"base_retriever.documents νƒ€μž…: {type(base_retriever.documents)}")
561
+ logger.info(f"base_retriever.documents 길이: {len(base_retriever.documents) if isinstance(base_retriever.documents, list) else 'N/A'}")
562
+
563
 
564
  try:
565
  sources = {}
566
  total_chunks = 0
567
+ doc_list = base_retriever.documents # 속성이 μžˆλ‹€κ³  κ°€μ •ν•˜κ³  μ ‘κ·Ό
568
+
569
+ if not isinstance(doc_list, list):
570
+ logger.error(f"base_retriever.documentsκ°€ λ¦¬μŠ€νŠΈκ°€ μ•„λ‹˜: {type(doc_list)}")
571
+ # 이 경우 500 였λ₯˜λ₯Ό λ°˜ν™˜ν•˜κ±°λ‚˜ 빈 λͺ©λ‘ λ°˜ν™˜
572
+ return jsonify({"error": "λ‚΄λΆ€ 데이터 ꡬ쑰 였λ₯˜"}), 500
573
+
574
+ logger.info(f"총 {len(doc_list)}개 λ¬Έμ„œ μ²­ν¬μ—μ„œ μ†ŒμŠ€ λͺ©λ‘ 생성 쀑...")
575
+ for i, doc in enumerate(doc_list):
576
+ # 각 λ¬Έμ„œ 청크 처리 λ‘œκΉ… μΆ”κ°€
577
+ # logger.debug(f"처리 쀑인 청크 {i}: {doc}") # λ„ˆλ¬΄ μƒμ„Έν•˜λ©΄ 주석 처리
578
+
579
+ if not isinstance(doc, dict):
580
+ logger.warning(f"청크 {i}κ°€ λ”•μ…”λ„ˆλ¦¬ νƒ€μž…μ΄ μ•„λ‹˜: {type(doc)}")
581
+ continue # λ‹€μŒ 청크둜 λ„˜μ–΄κ°
582
+
583
+ # μ†ŒμŠ€ 정보 μΆ”μΆœ (κΈ°μ‘΄ 둜직 κ°œμ„ )
584
+ source = "unknown"
585
+ metadata = doc.get("metadata") # metadata λ¨Όμ € 확인 (Langchain Document ꡬ쑰)
586
+ if isinstance(metadata, dict):
587
+ source = metadata.get("source", "unknown")
588
+ if source == "unknown": # metadata에 μ—†μœΌλ©΄ doc μžμ²΄μ—μ„œ μ°ΎκΈ°
589
+ source = doc.get("source", "unknown")
590
+
591
+ if source != "unknown":
592
+ if source in sources:
593
+ sources[source]["chunks"] += 1
594
+ else:
595
+ # 메타데이터 μš°μ„  μ‚¬μš©
596
+ filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
597
+ filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
598
+ # 메타데이터 μ—†μœΌλ©΄ doc μžμ²΄μ—μ„œ μ°ΎκΈ°
599
+ if filename == source and "filename" in doc: filename = doc["filename"]
600
+ if filetype == "unknown" and "filetype" in doc: filetype = doc["filetype"]
601
+
602
+ sources[source] = {
603
+ "filename": filename,
604
+ "chunks": 1,
605
+ "filetype": filetype
606
+ }
607
  total_chunks += 1
608
+ else:
609
+ logger.warning(f"청크 {i}μ—μ„œ μ†ŒμŠ€ 정보λ₯Ό 찾을 수 μ—†μŒ: {doc}")
610
+
611
 
612
+ # λͺ©λ‘ ν˜•μ‹ λ³€ν™˜ 및 μ •λ ¬ (λ³€κ²½ μ—†μŒ)
613
  documents = [{"source": src, **info} for src, info in sources.items()]
614
+ documents.sort(key=lambda x: x.get("filename", ""), reverse=False) # 파일λͺ… κΈ°μ€€ μ •λ ¬
615
 
616
  logger.info(f"λ¬Έμ„œ λͺ©λ‘ 쑰회 μ™„λ£Œ: {len(documents)}개 μ†ŒμŠ€ 파일, {total_chunks}개 청크")
617
  return jsonify({
 
621
  })
622
 
623
  except Exception as e:
624
+ # !! μ€‘μš”: μ—¬κΈ°μ„œ λ°œμƒν•˜λŠ” μ˜ˆμ™Έκ°€ 503으둜 μ΄μ–΄μ§ˆ 수 μžˆλŠ”μ§€ 확인 !!
625
+ logger.error(f"λ¬Έμ„œ λͺ©λ‘ 쑰회 쀑 μ‹¬κ°ν•œ 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
626
+ # 일반적인 λ‚΄λΆ€ 였λ₯˜λŠ” 500 λ°˜ν™˜
627
+ return jsonify({"error": f"λ¬Έμ„œ λͺ©λ‘ 쑰회 쀑 였λ₯˜: {str(e)}"}), 500
628
+
629
+ ```
630
+
631
+ **μ£Όμš” 문제점 및 μˆ˜μ • μ œμ•ˆ:**
632
+
633
+ 1. **`/api/documents` 503 였λ₯˜μ˜ λ―ΈμŠ€ν„°λ¦¬:**
634
+ * 제곡된 `list_documents` ν•¨μˆ˜ μ½”λ“œ μžμ²΄μ—λŠ” `app_ready` μƒνƒœλ‚˜ `base_retriever`κ°€ `None`인 μƒνƒœλ₯Ό ν™•μΈν•˜μ—¬ 503 였λ₯˜λ₯Ό λ°˜ν™˜ν•˜λŠ” 둜직이 **μ—†μŠ΅λ‹ˆλ‹€.** λ‘œκ·Έμ—μ„œ 503이 λ°œμƒν–ˆλ‹€λ©΄, 원인은 λ‹€μŒ 쀑 ν•˜λ‚˜μΌ κ°€λŠ₯성이 λ†’μŠ΅λ‹ˆλ‹€:
635
+ * **μ‹€μ œ μ‹€ν–‰ 쀑인 μ½”λ“œ 뢈일치:** ν˜„μž¬ μ„œλ²„μ—μ„œ μ‹€ν–‰ 쀑인 μ½”λ“œκ°€ μ œκ³΅ν•΄μ£Όμ‹  μ½”λ“œμ™€ λ‹€λ₯Ό 수 μžˆμŠ΅λ‹ˆλ‹€. (예: 이전 버전에 `if not app_ready: return ..., 503` μ½”λ“œκ°€ λ‚¨μ•„μžˆμŒ)
636
+ * **`base_retriever` μ ‘κ·Ό 였λ₯˜:** `base_retriever.documents` 속성에 μ ‘κ·Όν•˜λŠ” κ³Όμ •μ—μ„œ 예기치 μ•Šμ€ 였λ₯˜κ°€ λ°œμƒν•˜κ³ , Flask의 μ „μ—­ 였λ₯˜ ν•Έλ“€λŸ¬λ‚˜ νŠΉμ • 미듀웨어가 이λ₯Ό 503으둜 μ²˜λ¦¬ν•  수 μžˆμŠ΅λ‹ˆλ‹€. (μΌλ°˜μ μœΌλ‘œλŠ” 500 Internal Server Errorκ°€ λ°˜ν™˜λ©λ‹ˆλ‹€.)
637
+ * **μ™ΈλΆ€ μš”μΈ:** μ›Ή μ„œλ²„(Nginx λ“±) μ„€μ •μ΄λ‚˜ λ‘œλ“œ λ°ΈλŸ°μ„œ λ“± Flask μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ•žλ‹¨μ˜ λ‹€λ₯Έ μ‹œμŠ€ν…œμ—μ„œ 503 였λ₯˜λ₯Ό λ°˜ν™˜ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€.
638
+ * **μˆ˜μ • μ œμ•ˆ:**
639
+ * `list_documents` ν•¨μˆ˜ μ‹œμž‘ λΆ€λΆ„κ³Ό `try...except` 블둝 내뢀에 **더 μƒμ„Έν•œ 둜그**λ₯Ό μΆ”κ°€ν•˜μ—¬ ν•¨μˆ˜ μ‹€ν–‰ 흐름과 `base_retriever` 객체 μƒνƒœλ₯Ό λͺ…ν™•νžˆ νŒŒμ•…ν•©λ‹ˆλ‹€. (μœ„ μ½”λ“œμ— λ‘œκΉ… 좔가됨)
640
+ * μ‹€ν–‰ 쀑인 μ½”λ“œκ°€ μ΅œμ‹  버전인지 λ‹€μ‹œ ν™•μΈν•©λ‹ˆλ‹€.
641
+ * `base_retriever` 객체 자체 (`VectorRetriever` 클래슀)의 `documents` 속성 κ΅¬ν˜„μ„ ν™•μΈν•©λ‹ˆλ‹€.
642
+
643
+ 2. **`app_ready` μƒνƒœ 관리 및 μ‚¬μš©:**
644
+ * `register_routes` ν•¨μˆ˜λŠ” μ•± μ‹œμž‘ μ‹œ ν•œ 번만 ν˜ΈμΆœλ˜λ―€λ‘œ, 인자둜 μ „λ‹¬λœ `app_ready` 값은 **호좜 μ‹œμ μ˜ μŠ€λƒ…μƒ·**μž…λ‹ˆλ‹€. λ°±κ·ΈλΌμš΄λ“œ μŠ€λ ˆλ“œκ°€ λ‚˜μ€‘μ— `app.py`의 μ „μ—­ `app_ready` 값을 변경해도 `register_routes` λ‚΄λΆ€μ˜ μ§€μ—­ λ³€μˆ˜ `app_ready` (μ½”λ“œμ—μ„œλŠ” `app_ready_flag`둜 λͺ…μΉ­ λ³€κ²½ μ œμ•ˆ)λŠ” μ—…λ°μ΄νŠΈλ˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.
645
+ * `index` ν•¨μˆ˜ λ‚΄μ—μ„œ `nonlocal app_ready` μ‚¬μš©μ€ 잘λͺ»λ˜μ—ˆμŠ΅λ‹ˆλ‹€. `app_ready`λŠ” μ „μ—­ λ³€μˆ˜μ΄λ―€λ‘œ `global app_ready`λ₯Ό μ‚¬μš©ν•˜κ±°λ‚˜, 더 쒋은 방법은 Flask의 `app.before_request` λ°μ½”λ ˆμ΄ν„°λ‚˜ `g` 객체λ₯Ό μ‚¬μš©ν•˜μ—¬ μš”μ²­ μ»¨ν…μŠ€νŠΈ λ‚΄μ—μ„œ μƒνƒœλ₯Ό ν™•μΈν•˜λŠ” κ²ƒμž…λ‹ˆλ‹€. ν˜Ήμ€ `threading.Event` 객체λ₯Ό μ‚¬μš©ν•˜μ—¬ μŠ€λ ˆλ“œ κ°„ μƒνƒœλ₯Ό μ•ˆμ „ν•˜κ²Œ κ³΅μœ ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
646
+ * `index` ν•¨μˆ˜μ˜ 30초 κ°•μ œ Ready λ‘œμ§μ€ `os.path.getmtime(__file__)`을 μ‚¬μš©ν•˜λŠ”λ°, μ΄λŠ” 파일 μˆ˜μ • μ‹œκ°„μ„ κΈ°μ€€μœΌλ‘œ ν•˜λ―€λ‘œ μ•±μ˜ μ‹€μ œ μ‹œμž‘ μ‹œκ°„κ³Ό 달라 λΆ€μ •ν™•ν•©λ‹ˆλ‹€.
647
+ * **μˆ˜μ • μ œμ•ˆ:**
648
+ * `app.py`μ—μ„œ `app_ready`λ₯Ό `threading.Event` 객체둜 κ΄€λ¦¬ν•˜κ³ , 이λ₯Ό `register_routes`에 μ „λ‹¬ν•©λ‹ˆλ‹€. 각 라우트 ν•Έλ“€λŸ¬μ—μ„œλŠ” `app_ready_event.is_set()`으둜 μƒνƒœλ₯Ό ν™•μΈν•©λ‹ˆλ‹€. (μœ„ μ½”λ“œμ— 반영됨)
649
+ * μ•± μ‹œμž‘ μ‹œκ°„μ„ λͺ¨λ“ˆ λ‘œλ“œ μ‹œμ μ— `time.time()`으둜 κΈ°λ‘ν•˜κ³ , `index` ν•¨μˆ˜μ—μ„œ 이λ₯Ό μ‚¬μš©ν•˜μ—¬ κ²½κ³Ό μ‹œκ°„μ„ κ³„μ‚°ν•©λ‹ˆλ‹€. (μœ„ μ½”λ“œμ— 반영됨)
650
+ * 30초 κ°•μ œ Ready λ‘œμ§μ€ 주석 μ²˜λ¦¬ν•˜κ±°λ‚˜ μ œκ±°ν•˜λŠ” 것을 ꢌμž₯ν•©λ‹ˆλ‹€. μ΄ˆκΈ°ν™”κ°€ 였래 κ±Έλ¦¬λŠ” κ·Όλ³Έ 원인을 ν•΄κ²°ν•˜λŠ” 것이 μ’‹μŠ΅λ‹ˆλ‹€.
651
+
652
+ 3. **객체 및 속성 쑴재 μ—¬λΆ€ 확인 (λ°©μ–΄ μ½”λ“œ):**
653
+ * `llm_interface`, `retriever`, `stt_client`, `DocumentProcessor`, `base_retriever` λ“±μ˜ 객체가 Noneμ΄κ±°λ‚˜ ν•„μš”ν•œ λ©”μ†Œλ“œ/속성(`search`, `transcribe_audio`, `add_documents`, `documents` λ“±)이 없을 경우 `AttributeError`λ‚˜ `TypeError`κ°€ λ°œμƒν•  수 μžˆμŠ΅λ‹ˆλ‹€.
654
+ * **μˆ˜μ • μ œμ•ˆ:** 각 API ν•Έλ“€λŸ¬ μ‹œμž‘ λΆ€λΆ„μ΄λ‚˜ 객체 μ‚¬μš© 직전에 ν•΄λ‹Ή 객체와 ν•„μš”ν•œ 속성/λ©”μ†Œλ“œκ°€ μ‘΄μž¬ν•˜λŠ”μ§€ ν™•μΈν•˜λŠ” λ°©μ–΄ μ½”λ“œλ₯Ό μΆ”κ°€ν•©λ‹ˆλ‹€. (μœ„ μ½”λ“œμ— 일뢀 반영됨)
655
+
656
+ 4. **였λ₯˜ λ‘œκΉ…:**
657
+ * `except Exception as e:` λΈ”λ‘μ—μ„œ `logger.error(f"...", exc_info=True)`λ₯Ό μ‚¬μš©ν•˜μ—¬ μŠ€νƒ 트레이슀 전체λ₯Ό λ‘œκΉ…ν•˜λ©΄ 디버깅에 더 μœ μš©ν•©λ‹ˆλ‹€.
658
+ * **μˆ˜μ • μ œμ•ˆ:** μ£Όμš” `except` 블둝에 `exc_info=True`λ₯Ό μΆ”κ°€ν•©λ‹ˆλ‹€. (μœ„ μ½”λ“œμ— 반영됨)
659
+
660
+ 5. **`/api/documents` 둜직 κ°œμ„ :**
661
+ * `base_retriever.documents`κ°€ Langchain의 `Document` 객체 리슀트일 경우, `source` λ“±μ˜ μ •λ³΄λŠ” `doc.metadata['source']` 와 같이 μ ‘κ·Όν•΄μ•Ό ν•  수 μžˆμŠ΅λ‹ˆλ‹€. ν˜„μž¬ μ½”λ“œλŠ” λ”•μ…”λ„ˆλ¦¬μ™€ Langchain `Document` ꡬ쑰λ₯Ό ν˜Όμš©ν•˜μ—¬ μ²˜λ¦¬ν•˜λ €κ³  μ‹œλ„ν•˜κ³  μžˆμŠ΅λ‹ˆλ‹€. `base_retriever.documents`의 μ‹€μ œ 데이터 ꡬ쑰λ₯Ό λͺ…ν™•νžˆ ν•˜κ³  그에 맞게 μ½”λ“œλ₯Ό μˆ˜μ •ν•΄μ•Ό ν•©λ‹ˆλ‹€.
662
+ * λ¬Έμ„œ λͺ©λ‘ μ •λ ¬ 기쀀을 파일λͺ…(`filename`)으둜 λ³€κ²½ν•˜λŠ” 것이 더 직관적일 수 μžˆμŠ΅λ‹ˆλ‹€.
663
+ * **μˆ˜μ • μ œμ•ˆ:** `base_retriever.documents`의 ꡬ쑰λ₯Ό ν™•μΈν•˜κ³  `source`, `filename`, `filetype` μΆ”μΆœ λ‘œμ§μ„ λͺ…ν™•νžˆ ν•©λ‹ˆλ‹€. μ •λ ¬ 기쀀을 `filename`으둜 λ³€κ²½ν–ˆμŠ΅λ‹ˆλ‹€. (μœ„ μ½”λ“œ μ°Έμ‘°)
664
+
665
+ **μš”μ•½ 및 λ‹€μŒ 단계:**
666
+
667
+ * `/api/documents`의 503 였λ₯˜λŠ” ν˜„μž¬ μ½”λ“œλ§ŒμœΌλ‘œλŠ” μ„€λͺ…ν•˜κΈ° μ–΄λ ΅μŠ΅λ‹ˆλ‹€. **μ‹€ν–‰ ν™˜κ²½μ˜ μ½”λ“œ 버전 확인** 및 **상세 λ‘œκΉ… μΆ”κ°€**λ₯Ό 톡해 원인을 좔적해야 ν•©λ‹ˆλ‹€.
668
+ * `app_ready` μƒνƒœ 관리 방식을 `threading.Event` λ“±μœΌλ‘œ κ°œμ„ ν•˜κ³ , `index` ν•¨μˆ˜μ˜ μ‹œκ°„ 계산 λ‘œμ§μ„ μˆ˜μ •ν•˜λŠ” 것이 μ’‹μŠ΅λ‹ˆλ‹€.
669
+ * μ½”λ“œ μ „λ°˜μ— 걸쳐 객체 및 속성 쑴재 μ—¬λΆ€λ₯Ό ν™•μΈν•˜λŠ” λ°©μ–΄ μ½”λ“œλ₯Ό μΆ”κ°€ν•˜κ³ , 였λ₯˜ λ‘œκΉ…μ„ κ°•ν™”ν•©λ‹ˆλ‹€.
670
+
671
+ **κ°€μž₯ λ¨Όμ € λΈŒλΌμš°μ € 개발자 λ„κ΅¬μ˜ 'Network' νƒ­μ—μ„œ `/api/documents` μš”μ²­μ˜ 응닡(Response) 본문에 ν˜Ήμ‹œ 더 μžμ„Έν•œ 였λ₯˜ λ©”μ‹œμ§€κ°€ μžˆλŠ”μ§€ 확인해 λ³΄μ„Έμš”