RAG6_AgenticAI / app /app_routes.py
jeongsoo's picture
fix
1defd08
raw
history blame
44.2 kB
"""
RAG ๊ฒ€์ƒ‰ ์ฑ—๋ด‡ ์›น ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ - API ๋ผ์šฐํŠธ ์ •์˜ (TypeError ์žฌ์ˆ˜์ •)
"""
import os
import json
import logging
import tempfile
import requests
import time # ์•ฑ ์‹œ์ž‘ ์‹œ๊ฐ„ ๊ธฐ๋ก ์œ„ํ•ด ์ถ”๊ฐ€
import threading # threading.Event ์‚ฌ์šฉ ์œ„ํ•ด ์ถ”๊ฐ€
from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for
from datetime import datetime
from werkzeug.utils import secure_filename
# ๋กœ๊ฑฐ ๊ฐ€์ ธ์˜ค๊ธฐ
logger = logging.getLogger(__name__)
# ์•ฑ ์‹œ์ž‘ ์‹œ๊ฐ„ ๊ธฐ๋ก (๋ชจ๋“ˆ ๋กœ๋“œ ์‹œ์ )
APP_START_TIME = time.time()
# !! ์ค‘์š”: ํ•จ์ˆ˜ ์ •์˜์—์„œ app_ready_flag ๋Œ€์‹  app_ready_event๋ฅผ ๋ฐ›๋„๋ก ์ˆ˜์ • !!
def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready_event, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL):
"""Flask ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜์— ๊ธฐ๋ณธ ๋ผ์šฐํŠธ ๋“ฑ๋ก"""
# ํ—ฌํผ ํ•จ์ˆ˜ (๋ณ€๊ฒฝ ์—†์Œ)
def allowed_audio_file(filename):
"""ํŒŒ์ผ์ด ํ—ˆ์šฉ๋œ ์˜ค๋””์˜ค ํ™•์žฅ์ž๋ฅผ ๊ฐ€์ง€๋Š”์ง€ ํ™•์ธ"""
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a'}
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_AUDIO_EXTENSIONS
def allowed_doc_file(filename):
"""ํŒŒ์ผ์ด ํ—ˆ์šฉ๋œ ๋ฌธ์„œ ํ™•์žฅ์ž๋ฅผ ๊ฐ€์ง€๋Š”์ง€ ํ™•์ธ"""
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
# --- ๋กœ๊ทธ์ธ/๋กœ๊ทธ์•„์›ƒ ๋ผ์šฐํŠธ ---
@app.route('/login', methods=['GET', 'POST'])
def login():
error = None
next_url = request.args.get('next')
logger.info(f"-------------- ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ์ ‘์† (Next: {next_url}) --------------")
logger.info(f"Method: {request.method}")
if request.method == 'POST':
logger.info("๋กœ๊ทธ์ธ ์‹œ๋„ ๋ฐ›์Œ")
username = request.form.get('username', '')
password = request.form.get('password', '')
logger.info(f"์ž…๋ ฅ๋œ ์‚ฌ์šฉ์ž๋ช…: {username}")
valid_username = ADMIN_USERNAME
valid_password = ADMIN_PASSWORD
logger.info(f"๊ฒ€์ฆ์šฉ ์‚ฌ์šฉ์ž๋ช…: {valid_username}")
if username == valid_username and password == valid_password:
logger.info(f"๋กœ๊ทธ์ธ ์„ฑ๊ณต: {username}")
session.permanent = True
session['logged_in'] = True
session['username'] = username
logger.info(f"์„ธ์…˜ ์„ค์ • ์™„๋ฃŒ: {session}")
redirect_to = next_url or url_for('index')
logger.info(f"๋ฆฌ๋””๋ ‰์…˜ ๋Œ€์ƒ: {redirect_to}")
response = redirect(redirect_to)
logger.debug(f"๋กœ๊ทธ์ธ ์‘๋‹ต ํ—ค๋” (Set-Cookie ํ™•์ธ): {response.headers.getlist('Set-Cookie')}")
return response
else:
logger.warning("๋กœ๊ทธ์ธ ์‹คํŒจ: ์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ ๋ถˆ์ผ์น˜")
error = '์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ๊ฐ€ ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค.'
else: # GET ์š”์ฒญ
logger.info("๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ GET ์š”์ฒญ")
if session.get('logged_in'):
logger.info("์ด๋ฏธ ๋กœ๊ทธ์ธ๋œ ์‚ฌ์šฉ์ž, ๋ฉ”์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
return redirect(url_for('index'))
logger.info("---------- ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ๋ Œ๋”๋ง ----------")
return render_template('login.html', error=error, next=next_url)
@app.route('/logout')
def logout():
"""๋กœ๊ทธ์•„์›ƒ ์ฒ˜๋ฆฌ"""
username = session.get('username', 'unknown')
if session.pop('logged_in', None):
session.pop('username', None)
logger.info(f"์‚ฌ์šฉ์ž {username} ๋กœ๊ทธ์•„์›ƒ ์ฒ˜๋ฆฌ ์™„๋ฃŒ. ํ˜„์žฌ ์„ธ์…˜: {session}")
else:
logger.warning("๋กœ๊ทธ์ธ๋˜์ง€ ์•Š์€ ์ƒํƒœ์—์„œ ๋กœ๊ทธ์•„์›ƒ ์‹œ๋„")
logger.info("๋กœ๊ทธ์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
response = redirect(url_for('login'))
logger.debug(f"๋กœ๊ทธ์•„์›ƒ ์‘๋‹ต ํ—ค๋” (Set-Cookie ํ™•์ธ): {response.headers.getlist('Set-Cookie')}")
return response
# --- ๋ฉ”์ธ ํŽ˜์ด์ง€ ๋ฐ ์ƒํƒœ ํ™•์ธ (app_ready_event ์‚ฌ์šฉ) ---
@app.route('/')
@login_required
def index():
"""๋ฉ”์ธ ํŽ˜์ด์ง€"""
# app_ready_event๊ฐ€ Event ๊ฐ์ฒด์ธ์ง€ ํ™•์ธํ•˜๊ณ  ์ƒํƒœ ๊ฐ€์ ธ์˜ค๊ธฐ
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False # ๊ธฐ๋ณธ๊ฐ’ False
time_elapsed = time.time() - APP_START_TIME
if not is_ready:
logger.info(f"์•ฑ์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•„ ๋กœ๋”ฉ ํŽ˜์ด์ง€ ํ‘œ์‹œ (๊ฒฝ๊ณผ ์‹œ๊ฐ„: {time_elapsed:.1f}์ดˆ)")
# loading.html ํ…œํ”Œ๋ฆฟ์ด ์žˆ๋‹ค๊ณ  ๊ฐ€์ •
return render_template('loading.html') # 200 OK์™€ ๋กœ๋”ฉ ํŽ˜์ด์ง€
logger.info("๋ฉ”์ธ ํŽ˜์ด์ง€ ์š”์ฒญ")
# index.html ํ…œํ”Œ๋ฆฟ์ด ์žˆ๋‹ค๊ณ  ๊ฐ€์ •
return render_template('index.html')
@app.route('/api/status')
@login_required
def app_status():
"""์•ฑ ์ดˆ๊ธฐํ™” ์ƒํƒœ ํ™•์ธ API"""
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
logger.info(f"์•ฑ ์ƒํƒœ ํ™•์ธ ์š”์ฒญ: {'Ready' if is_ready else 'Not Ready'}")
return jsonify({"ready": is_ready})
# --- LLM API ---
@app.route('/api/llm', methods=['GET', 'POST'])
@login_required
def llm_api():
"""์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ LLM ๋ชฉ๋ก ๋ฐ ์„ ํƒ API"""
# is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
# LLM ๋ชฉ๋ก ์กฐํšŒ๋Š” ์ดˆ๊ธฐํ™” ์ค‘์—๋„ ๊ฐ€๋Šฅํ•˜๋„๋ก ํ—ˆ์šฉ
if request.method == 'GET':
logger.info("LLM ๋ชฉ๋ก ์š”์ฒญ")
try:
# ๊ฐ์ฒด ๋ฐ ์†์„ฑ ํ™•์ธ ๊ฐ•ํ™”
if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'):
logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ์†์„ฑ์ด ์—†์Šต๋‹ˆ๋‹ค.")
return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
current_details = llm_interface.get_current_llm_details()
supported_llms_dict = llm_interface.SUPPORTED_LLMS
supported_list = [{
"name": name, "id": id, "current": id == current_details.get("id")
} for name, id in supported_llms_dict.items()]
return jsonify({
"supported_llms": supported_list,
"current_llm": current_details
})
except Exception as e:
logger.error(f"LLM ์ •๋ณด ์กฐํšŒ ์˜ค๋ฅ˜: {e}", exc_info=True)
return jsonify({"error": "LLM ์ •๋ณด ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ"}), 500
elif request.method == 'POST':
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
if not is_ready: # LLM ๋ณ€๊ฒฝ์€ ์•ฑ ์ค€๋น„ ์™„๋ฃŒ ํ›„ ๊ฐ€๋Šฅ
return jsonify({"error": "์•ฑ์ด ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."}), 503
data = request.get_json()
if not data or 'llm_id' not in data:
return jsonify({"error": "LLM ID๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
llm_id = data['llm_id']
logger.info(f"LLM ๋ณ€๊ฒฝ ์š”์ฒญ: {llm_id}")
try:
# ๊ฐ์ฒด ๋ฐ ์†์„ฑ/๋ฉ”์†Œ๋“œ ํ™•์ธ ๊ฐ•ํ™”
if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'):
logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ์†์„ฑ/๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
return jsonify({"error": "LLM ์ธํ„ฐํŽ˜์ด์Šค ์˜ค๋ฅ˜"}), 500
if llm_id not in llm_interface.llm_clients:
return jsonify({"error": f"์ง€์›๋˜์ง€ ์•Š๋Š” LLM ID: {llm_id}"}), 400
success = llm_interface.set_llm(llm_id)
if success:
new_details = llm_interface.get_current_llm_details()
logger.info(f"LLM์ด '{new_details.get('name', llm_id)}'๋กœ ๋ณ€๊ฒฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
return jsonify({
"success": True,
"message": f"LLM์ด '{new_details.get('name', llm_id)}'๋กœ ๋ณ€๊ฒฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"current_llm": new_details
})
else:
logger.error(f"LLM ๋ณ€๊ฒฝ ์‹คํŒจ (ID: {llm_id})")
return jsonify({"error": "LLM ๋ณ€๊ฒฝ ์ค‘ ๋‚ด๋ถ€ ์˜ค๋ฅ˜ ๋ฐœ์ƒ"}), 500
except Exception as e:
logger.error(f"LLM ๋ณ€๊ฒฝ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {e}", exc_info=True)
return jsonify({"error": f"LLM ๋ณ€๊ฒฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"}), 500
# --- Chat API ---
@app.route('/api/chat', methods=['POST'])
@login_required
def chat():
"""ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์ฑ„๋ด‡ API"""
try:
# ์•ฑ์ด ์ค€๋น„๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
if not is_ready:
logger.warning("์•ฑ์ด ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค.")
return jsonify({
"error": "์•ฑ ์ดˆ๊ธฐํ™” ์ค‘...",
"answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‹œ์Šคํ…œ์ด ์•„์ง ์ค€๋น„ ์ค‘์ž…๋‹ˆ๋‹ค.",
"sources": []
}), 200 # 503 ๋Œ€์‹  200์œผ๋กœ ๋ณ€๊ฒฝํ•˜์—ฌ ์•ฑ์ด ์ •์ƒ ์‘๋‹ตํ•˜๋„๋ก ํ•จ
data = request.get_json()
if not data or 'query' not in data:
return jsonify({"error": "์ฟผ๋ฆฌ๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
query = data['query']
logger.info(f"ํ…์ŠคํŠธ ์ฟผ๋ฆฌ ์ˆ˜์‹ : {query[:100]}...")
# ๊ฒ€์ƒ‰ ์—”์ง„ ์ฒ˜๋ฆฌ ๋ถ€๋ถ„ ์ˆ˜์ •
search_results = []
search_warning = None
try:
# retriever ์ƒํƒœ ๊ฒ€์ฆ
if retriever is None:
logger.warning("Retriever๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
# Retriever๊ฐ€ None์ธ ๊ฒฝ์šฐ ๋นˆ ๊ฐ์ฒด๋ฅผ ์ƒ์„ฑํ•˜๊ณ  search ๋ฉ”์†Œ๋“œ ์ถ”๊ฐ€
from types import SimpleNamespace
retriever = SimpleNamespace()
retriever.search = lambda q, **kwargs: []
logger.info("์ž„์‹œ retriever ๊ฐ์ฒด ์ƒ์„ฑ ๋ฐ ๋นˆ search ๋ฉ”์†Œ๋“œ ์ถ”๊ฐ€")
search_warning = "๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
elif hasattr(retriever, 'is_mock') and retriever.is_mock:
logger.info("Mock Retriever ์‚ฌ์šฉ ์ค‘ - ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ.")
# search ๋ฉ”์†Œ๋“œ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ, ์—†์œผ๋ฉด ์ถ”๊ฐ€
if not hasattr(retriever, 'search'):
retriever.search = lambda q, **kwargs: []
logger.info("Mock retriever์— search ๋ฉ”์†Œ๋“œ ์ถ”๊ฐ€")
search_warning = "๊ฒ€์ƒ‰ ์ธ๋ฑ์Šค๊ฐ€ ์•„์ง ๊ตฌ์ถ• ์ค‘์ž…๋‹ˆ๋‹ค. ๊ธฐ๋ณธ ์‘๋‹ต๋งŒ ์ œ๊ณต๋ฉ๋‹ˆ๋‹ค."
elif not hasattr(retriever, 'search'):
logger.warning("Retriever์— search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
# search ๋ฉ”์†Œ๋“œ ๋™์  ์ถ”๊ฐ€
retriever.search = lambda q, **kwargs: []
logger.info("retriever์— ๋นˆ search ๋ฉ”์†Œ๋“œ ๋™์  ์ถ”๊ฐ€")
search_warning = "๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ์ด ํ˜„์žฌ ์ œํ•œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค."
else:
try:
logger.info(f"๊ฒ€์ƒ‰ ์ˆ˜ํ–‰: {query[:50]}...")
# ์˜ค๋ฅ˜ ์‹œ ๋นˆ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” ์ถ”๊ฐ€ try-except ๋ธ”๋ก
try:
search_results = retriever.search(query, top_k=5, first_stage_k=6)
except AttributeError as attr_err:
logger.error(f"retriever์— search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค: {attr_err}", exc_info=True)
# retriever๊ฐ€ MockComponent๊ฐ€ ์•„๋‹ˆ๊ฑฐ๋‚˜ search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ
# ๋™์ ์œผ๋กœ ๋ฉ”์†Œ๋“œ ์ถ”๊ฐ€
if not hasattr(retriever, 'search'):
logger.warning("retriever์— search ๋ฉ”์†Œ๋“œ๋ฅผ ๋™์ ์œผ๋กœ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
retriever.search = lambda q, **kwargs: []
search_results = []
search_warning = "๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ์ด ํ˜„์žฌ ์ œํ•œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค."
except Exception as search_err:
logger.error(f"retriever.search() ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {search_err}", exc_info=True)
search_results = []
search_warning = f"๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(search_err)}"
if not search_results:
logger.info("๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
logger.info(f"๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ: {len(search_results)}๊ฐœ ํ•ญ๋ชฉ")
except Exception as e:
logger.error(f"๊ฒ€์ƒ‰ ์ฒ˜๋ฆฌ ์ค‘ ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜: {e}", exc_info=True)
search_results = []
search_warning = f"๊ฒ€์ƒ‰ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
except Exception as e:
logger.error(f"๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
search_results = []
search_warning = f"๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
# LLM ์‘๋‹ต ์ƒ์„ฑ
try:
# DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ”์†Œ๋“œ ํ™•์ธ
context = ""
if search_results:
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
logger.warning("DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ prepare_rag_context ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
logger.info(f"์ปจํ…์ŠคํŠธ ์ค€๋น„ ์™„๋ฃŒ (๊ธธ์ด: {len(context) if context else 0}์ž)")
# LLM ์ธํ„ฐํŽ˜์ด์Šค ๊ฐ์ฒด ๋ฐ ๋ฉ”์†Œ๋“œ ํ™•์ธ
llm_id = data.get('llm_id', None)
if not context:
if search_warning:
logger.info(f"์ปจํ…์ŠคํŠธ ์—†์Œ, ๊ฒ€์ƒ‰ ๊ฒฝ๊ณ : {search_warning}")
answer = f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ({search_warning})"
else:
logger.info("์ปจํ…์ŠคํŠธ ์—†์ด ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ")
answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
else:
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ rag_generate ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ํ˜„์žฌ LLM ์„œ๋น„์Šค๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
else:
# LLM ํ˜ธ์ถœ ์‹œ ๊ฒ€์ƒ‰ ๊ฒฝ๊ณ  ์ฒ˜๋ฆฌ ์ถ”๊ฐ€
if search_warning:
# ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€๋ฅผ ์ฟผ๋ฆฌ์— ์ถ”๊ฐ€ํ•˜๋Š” ๋Œ€์‹  ๋‚ด๋ถ€์ ์œผ๋กœ ์ฒ˜๋ฆฌ (์ฝ˜ํ…์ธ ๋งŒ ์ „๋‹ฌ)
logger.info(f"๊ฒ€์ƒ‰ ๊ฒฝ๊ณ  ์žˆ์Œ: {search_warning}")
# ์›๋ž˜ ์ฟผ๋ฆฌ๋งŒ ์‚ฌ์šฉ
modified_query = query
else:
modified_query = query
try:
answer = llm_interface.rag_generate(modified_query, context, llm_id=llm_id)
logger.info(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ (๊ธธ์ด: {len(answer)})")
# ๊ฒ€์ƒ‰ ๊ฒฝ๊ณ ๊ฐ€ ์žˆ์„ ๊ฒฝ์šฐ, ์‘๋‹ต ์•ž์— ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€ ์—†์ด ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜
except Exception as llm_err:
logger.error(f"LLM ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜: {llm_err}", exc_info=True)
answer = f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(llm_err)}"
# ์†Œ์Šค ์ •๋ณด ์ถ”์ถœ
sources = []
if search_results:
for result in search_results:
if not isinstance(result, dict):
logger.warning(f"์˜ˆ์ƒ์น˜ ๋ชปํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ํ˜•์‹: {type(result)}")
continue
source_info = {}
source_key = result.get("source")
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
source_key = result["metadata"].get("source")
if source_key:
source_info["name"] = os.path.basename(source_key)
source_info["path"] = source_key
else:
source_info["name"] = "์•Œ ์ˆ˜ ์—†๋Š” ์†Œ์Šค"
if "score" in result:
source_info["score"] = result["score"]
if "rerank_score" in result:
source_info["rerank_score"] = result["rerank_score"]
sources.append(source_info)
return jsonify({
"answer": answer,
"sources": sources,
"search_warning": search_warning
})
except Exception as e:
logger.error(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
return jsonify({
"answer": f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
"sources": [],
"error": str(e)
})
except Exception as e:
logger.error(f"์ฑ„ํŒ… API์—์„œ ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
return jsonify({
"error": f"์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}",
"answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์„œ๋ฒ„์—์„œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
"sources": []
}), 500
# --- Voice Chat API ---
@app.route('/api/voice', methods=['POST'])
@login_required
def voice_chat():
"""์Œ์„ฑ ์ฑ— API ์—”๋“œํฌ์ธํŠธ"""
try:
# ์•ฑ์ด ์ค€๋น„๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
if not is_ready:
logger.warning("์•ฑ์ด ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค.")
return jsonify({"error": "์•ฑ ์ดˆ๊ธฐํ™” ์ค‘...", "answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‹œ์Šคํ…œ์ด ์•„์ง ์ค€๋น„ ์ค‘์ž…๋‹ˆ๋‹ค."}), 200 # 503 ๋Œ€์‹  200์œผ๋กœ ๋ณ€๊ฒฝ
# STT ํด๋ผ์ด์–ธํŠธ ํ™•์ธ
if stt_client is None or not hasattr(stt_client, 'transcribe_audio'):
logger.error("์Œ์„ฑ API ์š”์ฒญ ์‹œ STT ํด๋ผ์ด์–ธํŠธ๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์Œ")
return jsonify({"error": "์Œ์„ฑ ์ธ์‹ ์„œ๋น„์Šค ์ค€๋น„ ์•ˆ๋จ"}), 503
logger.info("์Œ์„ฑ ์ฑ— ์š”์ฒญ ์ˆ˜์‹ ")
if 'audio' not in request.files:
logger.error("์˜ค๋””์˜ค ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์Œ")
return jsonify({"error": "์˜ค๋””์˜ค ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
audio_file = request.files['audio']
logger.info(f"์ˆ˜์‹ ๋œ ์˜ค๋””์˜ค ํŒŒ์ผ: {audio_file.filename} ({audio_file.content_type})")
try:
# ์˜ค๋””์˜ค ํŒŒ์ผ ์ž„์‹œ ์ €์žฅ ๋ฐ ์ฒ˜๋ฆฌ
with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio:
audio_file.save(temp_audio.name)
logger.info(f"์˜ค๋””์˜ค ํŒŒ์ผ์„ ์ž„์‹œ ์ €์žฅ: {temp_audio.name}")
# STT ์ˆ˜ํ–‰ (๋ฐ”์ดํŠธ ์ „๋‹ฌ ๊ฐ€์ •)
with open(temp_audio.name, 'rb') as f_bytes:
audio_bytes = f_bytes.read()
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
# STT ๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ
if not isinstance(stt_result, dict) or not stt_result.get("success"):
error_msg = stt_result.get("error", "์•Œ ์ˆ˜ ์—†๋Š” STT ์˜ค๋ฅ˜") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ˜•์‹ ์˜ค๋ฅ˜"
logger.error(f"์Œ์„ฑ์ธ์‹ ์‹คํŒจ: {error_msg}")
return jsonify({"error": "์Œ์„ฑ์ธ์‹ ์‹คํŒจ", "details": error_msg}), 500
transcription = stt_result.get("text", "")
if not transcription:
logger.warning("์Œ์„ฑ์ธ์‹ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค.")
return jsonify({
"transcription": "",
"answer": "์Œ์„ฑ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ธ์‹ํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.",
"sources": []
}), 200 # 200 OK์™€ ๋ฉ”์‹œ์ง€
logger.info(f"์Œ์„ฑ์ธ์‹ ์„ฑ๊ณต: {transcription[:50]}...")
# --- RAG ๋ฐ LLM ํ˜ธ์ถœ (Chat API์™€ ๋™์ผ ๋กœ์ง) ---
# ๊ฒ€์ƒ‰ ์—”์ง„ ์ฒ˜๋ฆฌ ๋ถ€๋ถ„
search_results = []
search_warning = None
try:
# retriever ์ƒํƒœ ๊ฒ€์ฆ
if retriever is None:
logger.warning("Retriever๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
# Retriever๊ฐ€ None์ธ ๊ฒฝ์šฐ ๋นˆ ๊ฐ์ฒด๋ฅผ ์ƒ์„ฑํ•˜๊ณ  search ๋ฉ”์†Œ๋“œ ์ถ”๊ฐ€
from types import SimpleNamespace
retriever = SimpleNamespace()
retriever.search = lambda q, **kwargs: []
logger.info("์ž„์‹œ retriever ๊ฐ์ฒด ์ƒ์„ฑ ๋ฐ ๋นˆ search ๋ฉ”์†Œ๋“œ ์ถ”๊ฐ€")
search_warning = "๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
elif hasattr(retriever, 'is_mock') and retriever.is_mock:
logger.info("Mock Retriever ์‚ฌ์šฉ ์ค‘ - ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ.")
# search ๋ฉ”์†Œ๋“œ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ, ์—†์œผ๋ฉด ์ถ”๊ฐ€
if not hasattr(retriever, 'search'):
retriever.search = lambda q, **kwargs: []
logger.info("Mock retriever์— search ๋ฉ”์†Œ๋“œ ์ถ”๊ฐ€")
search_warning = "๊ฒ€์ƒ‰ ์ธ๋ฑ์Šค๊ฐ€ ์•„์ง ๊ตฌ์ถ• ์ค‘์ž…๋‹ˆ๋‹ค. ๊ธฐ๋ณธ ์‘๋‹ต๋งŒ ์ œ๊ณต๋ฉ๋‹ˆ๋‹ค."
elif not hasattr(retriever, 'search'):
logger.warning("Retriever์— search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
# search ๋ฉ”์†Œ๋“œ ๋™์  ์ถ”๊ฐ€
retriever.search = lambda q, **kwargs: []
logger.info("retriever์— ๋นˆ search ๋ฉ”์†Œ๋“œ ๋™์  ์ถ”๊ฐ€")
search_warning = "๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ์ด ํ˜„์žฌ ์ œํ•œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค."
else:
try:
logger.info(f"๊ฒ€์ƒ‰ ์ˆ˜ํ–‰: {transcription[:50]}...")
try:
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
except AttributeError as attr_err:
logger.error(f"retriever์— search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค: {attr_err}", exc_info=True)
# retriever๊ฐ€ MockComponent๊ฐ€ ์•„๋‹ˆ๊ฑฐ๋‚˜ search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ
# ๋™์ ์œผ๋กœ ๋ฉ”์†Œ๋“œ ์ถ”๊ฐ€
if not hasattr(retriever, 'search'):
logger.warning("retriever์— search ๋ฉ”์†Œ๋“œ๋ฅผ ๋™์ ์œผ๋กœ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
retriever.search = lambda q, **kwargs: []
search_results = []
search_warning = "๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ์ด ํ˜„์žฌ ์ œํ•œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค."
except Exception as search_err:
logger.error(f"retriever.search() ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {search_err}", exc_info=True)
search_results = []
search_warning = f"๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(search_err)}"
if not search_results:
logger.info("๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
logger.info(f"๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ: {len(search_results)}๊ฐœ ํ•ญ๋ชฉ")
except Exception as e:
logger.error(f"๊ฒ€์ƒ‰ ์ฒ˜๋ฆฌ ์ค‘ ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜: {e}", exc_info=True)
search_results = []
search_warning = f"๊ฒ€์ƒ‰ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
except Exception as e:
logger.error(f"๊ฒ€์ƒ‰ ์—”์ง„ ์ ‘๊ทผ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
search_results = []
search_warning = f"๊ฒ€์ƒ‰ ์—”์ง„ ์ ‘๊ทผ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
# LLM ์‘๋‹ต ์ƒ์„ฑ
context = ""
if search_results:
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'):
logger.warning("DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ prepare_rag_context ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
else:
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
logger.info(f"์ปจํ…์ŠคํŠธ ์ค€๋น„ ์™„๋ฃŒ (๊ธธ์ด: {len(context) if context else 0}์ž)")
# LLM ์ธํ„ฐํŽ˜์ด์Šค ํ˜ธ์ถœ
llm_id = request.form.get('llm_id', None) # form ๋ฐ์ดํ„ฐ์—์„œ llm_id ๊ฐ€์ ธ์˜ค๊ธฐ
if not context:
if search_warning:
logger.info(f"์ปจํ…์ŠคํŠธ ์—†์Œ, ๊ฒ€์ƒ‰ ๊ฒฝ๊ณ : {search_warning}")
answer = f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ({search_warning})"
else:
logger.info("์ปจํ…์ŠคํŠธ ์—†์ด ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ")
answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
else:
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'):
logger.error("LLM ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ rag_generate ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ํ˜„์žฌ LLM ์„œ๋น„์Šค๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
else:
# LLM ํ˜ธ์ถœ ์‹œ ๊ฒ€์ƒ‰ ๊ฒฝ๊ณ  ์ฒ˜๋ฆฌ ์ถ”๊ฐ€
if search_warning:
logger.info(f"๊ฒ€์ƒ‰ ๊ฒฝ๊ณ  ์žˆ์Œ: {search_warning}")
# ์›๋ž˜ ์ฟผ๋ฆฌ๋งŒ ์‚ฌ์šฉ
modified_query = transcription
else:
modified_query = transcription
try:
answer = llm_interface.rag_generate(modified_query, context, llm_id=llm_id)
logger.info(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ (๊ธธ์ด: {len(answer)})")
except Exception as llm_err:
logger.error(f"LLM ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜: {llm_err}", exc_info=True)
answer = f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(llm_err)}"
# ์†Œ์Šค ์ •๋ณด ์ถ”์ถœ
sources = []
if search_results:
for result in search_results:
if not isinstance(result, dict):
logger.warning(f"์˜ˆ์ƒ์น˜ ๋ชปํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ํ˜•์‹: {type(result)}")
continue
source_info = {}
source_key = result.get("source")
if not source_key and "metadata" in result and isinstance(result["metadata"], dict):
source_key = result["metadata"].get("source")
if source_key:
source_info["name"] = os.path.basename(source_key)
source_info["path"] = source_key
else:
source_info["name"] = "์•Œ ์ˆ˜ ์—†๋Š” ์†Œ์Šค"
if "score" in result:
source_info["score"] = result["score"]
if "rerank_score" in result:
source_info["rerank_score"] = result["rerank_score"]
sources.append(source_info)
# ์ตœ์ข… ์‘๋‹ต
response_data = {
"transcription": transcription,
"answer": answer,
"sources": sources,
"search_warning": search_warning
}
# LLM ์ •๋ณด ์ถ”๊ฐ€ (์˜ต์…˜)
if hasattr(llm_interface, 'get_current_llm_details'):
response_data["llm"] = llm_interface.get_current_llm_details()
return jsonify(response_data)
except Exception as e:
logger.error(f"์Œ์„ฑ ์ฑ— ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
return jsonify({
"error": "์Œ์„ฑ ์ฒ˜๋ฆฌ ์ค‘ ๋‚ด๋ถ€ ์˜ค๋ฅ˜ ๋ฐœ์ƒ",
"details": str(e),
"answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
}), 500
except Exception as e:
logger.error(f"์Œ์„ฑ API์—์„œ ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
return jsonify({
"error": f"์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}",
"answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์„œ๋ฒ„์—์„œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
}), 500
# --- Document Upload API ---
@app.route('/api/upload', methods=['POST'])
@login_required
def upload_document():
"""์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ์—…๋กœ๋“œ API"""
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False
if not is_ready:
return jsonify({"error": "์•ฑ ์ดˆ๊ธฐํ™” ์ค‘..."}), 503
# base_retriever ๊ฐ์ฒด ๋ฐ ํ•„์ˆ˜ ๋ฉ”์†Œ๋“œ ํ™•์ธ
if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
logger.error("๋ฌธ์„œ ์—…๋กœ๋“œ API ์š”์ฒญ ์‹œ base_retriever๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์ˆ˜ ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
return jsonify({"error": "๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 503
if 'document' not in request.files:
return jsonify({"error": "๋ฌธ์„œ ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
doc_file = request.files['document']
if not doc_file or not doc_file.filename:
return jsonify({"error": "์„ ํƒ๋œ ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค."}), 400
# ALLOWED_DOC_EXTENSIONS๋ฅผ ํ•จ์ˆ˜ ๋‚ด์—์„œ ๋‹ค์‹œ ์ •์˜ํ•˜๊ฑฐ๋‚˜ ์ „์—ญ ์ƒ์ˆ˜๋กœ ์‚ฌ์šฉ
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
if not allowed_doc_file(doc_file.filename):
logger.warning(f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {doc_file.filename}")
return jsonify({"error": f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹์ž…๋‹ˆ๋‹ค. ํ—ˆ์šฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
try:
filename = secure_filename(doc_file.filename)
# app.config ์‚ฌ์šฉ ํ™•์ธ
if 'DATA_FOLDER' not in app.config:
logger.error("Flask app.config์— DATA_FOLDER๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
return jsonify({"error": "์„œ๋ฒ„ ์„ค์ • ์˜ค๋ฅ˜ (DATA_FOLDER)"}), 500
data_folder = app.config['DATA_FOLDER']
os.makedirs(data_folder, exist_ok=True)
filepath = os.path.join(data_folder, filename)
doc_file.save(filepath)
logger.info(f"๋ฌธ์„œ ์ €์žฅ ์™„๋ฃŒ: {filepath}")
# DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ”์†Œ๋“œ ํ™•์ธ
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
logger.error("DocumentProcessor๊ฐ€ ์ค€๋น„๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ํ•„์š”ํ•œ ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
try: os.remove(filepath) # ์ €์žฅ๋œ ํŒŒ์ผ ์‚ญ์ œ
except OSError: pass
return jsonify({"error": "๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ธฐ ์˜ค๋ฅ˜"}), 500
content = None
file_ext = filename.rsplit('.', 1)[1].lower()
metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath}
docs = []
# ํŒŒ์ผ ์ฝ๊ธฐ ๋ฐ ๋‚ด์šฉ ์ถ”์ถœ
if file_ext in ['txt', 'md', 'csv']:
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
logger.info(f"UTF-8 ๋””์ฝ”๋”ฉ ์‹คํŒจ, CP949๋กœ ์‹œ๋„: {filename}")
try:
with open(filepath, 'r', encoding='cp949') as f:
content = f.read()
except Exception as e_cp949:
logger.error(f"CP949 ๋””์ฝ”๋”ฉ ์‹คํŒจ ({filename}): {e_cp949}")
return jsonify({"error": "ํŒŒ์ผ ์ธ์ฝ”๋”ฉ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค (UTF-8, CP949 ์‹œ๋„ ์‹คํŒจ)."}), 400
except Exception as e_read:
logger.error(f"ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜ ({filename}): {e_read}")
return jsonify({"error": f"ํŒŒ์ผ ์ฝ๊ธฐ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e_read)}"}), 500
elif file_ext == 'pdf':
logger.warning("PDF ์ฒ˜๋ฆฌ๋Š” ๊ตฌํ˜„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
# ์—ฌ๊ธฐ์— PDF ํ…์ŠคํŠธ ์ถ”์ถœ ๋กœ์ง ์ถ”๊ฐ€ (์˜ˆ: PyPDF2 ์‚ฌ์šฉ)
# content = extract_text_from_pdf(filepath)
elif file_ext == 'docx':
logger.warning("DOCX ์ฒ˜๋ฆฌ๋Š” ๊ตฌํ˜„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
# ์—ฌ๊ธฐ์— DOCX ํ…์ŠคํŠธ ์ถ”์ถœ ๋กœ์ง ์ถ”๊ฐ€ (์˜ˆ: python-docx ์‚ฌ์šฉ)
# content = extract_text_from_docx(filepath)
# ๋ฌธ์„œ ๋ถ„ํ• /์ฒ˜๋ฆฌ
if content is not None: # ๋‚ด์šฉ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์ฝํ˜”๊ฑฐ๋‚˜ ์ถ”์ถœ๋˜์—ˆ์„ ๋•Œ๋งŒ
if file_ext == 'csv':
logger.info(f"CSV ํŒŒ์ผ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {filename}")
docs = DocumentProcessor.csv_to_documents(content, metadata)
elif file_ext in ['txt', 'md'] or (file_ext in ['pdf', 'docx'] and content): # ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ๋˜๋Š” ์ถ”์ถœ๋œ ๋‚ด์šฉ
logger.info(f"ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {filename}")
# text_to_documents ํ•จ์ˆ˜๊ฐ€ ์ฒญํฌ ๋ถ„ํ•  ๋“ฑ์„ ์ˆ˜ํ–‰ํ•œ๋‹ค๊ณ  ๊ฐ€์ •
docs = DocumentProcessor.text_to_documents(
content, metadata=metadata,
chunk_size=512, chunk_overlap=50 # ์„ค์ •๊ฐ’ ์‚ฌ์šฉ
)
# ๊ฒ€์ƒ‰๊ธฐ์— ์ถ”๊ฐ€ ๋ฐ ์ €์žฅ
if docs:
logger.info(f"{len(docs)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ๋ฅผ ๊ฒ€์ƒ‰๊ธฐ์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค...")
base_retriever.add_documents(docs)
logger.info(f"๊ฒ€์ƒ‰๊ธฐ ์ƒํƒœ๋ฅผ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค...")
# app.config ์‚ฌ์šฉ ํ™•์ธ
if 'INDEX_PATH' not in app.config:
logger.error("Flask app.config์— INDEX_PATH๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
return jsonify({"error": "์„œ๋ฒ„ ์„ค์ • ์˜ค๋ฅ˜ (INDEX_PATH)"}), 500
index_path = app.config['INDEX_PATH']
# ์ธ๋ฑ์Šค ์ €์žฅ ๊ฒฝ๋กœ๊ฐ€ ํด๋”์ธ์ง€ ํŒŒ์ผ์ธ์ง€ ํ™•์ธ ํ•„์š” (VectorRetriever.save ๊ตฌํ˜„์— ๋”ฐ๋ผ ๋‹ค๋ฆ„)
# ์—ฌ๊ธฐ์„œ๋Š” index_path๊ฐ€ ๋””๋ ‰ํ† ๋ฆฌ๋ผ๊ณ  ๊ฐ€์ •ํ•˜๊ณ  ๋ถ€๋ชจ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
os.makedirs(os.path.dirname(index_path), exist_ok=True)
try:
base_retriever.save(index_path)
logger.info("์ธ๋ฑ์Šค ์ €์žฅ ์™„๋ฃŒ")
# TODO: ์žฌ์ˆœ์œ„ํ™” ๊ฒ€์ƒ‰๊ธฐ(retriever) ์—…๋ฐ์ดํŠธ ๋กœ์ง ํ•„์š” ์‹œ ์ถ”๊ฐ€
# ์˜ˆ: if retriever and hasattr(retriever, 'update_base_retriever'): retriever.update_base_retriever(base_retriever)
return jsonify({
"success": True,
"message": f"ํŒŒ์ผ '{filename}' ์—…๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ์™„๋ฃŒ ({len(docs)}๊ฐœ ์ฒญํฌ ์ถ”๊ฐ€)."
})
except Exception as e_save:
logger.error(f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e_save}", exc_info=True)
# ์ €์žฅ ์‹คํŒจ ์‹œ ์ถ”๊ฐ€๋œ ๋ฌธ์„œ ๋กค๋ฐฑ ๊ณ ๋ ค?
return jsonify({"error": f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜: {str(e_save)}"}), 500
else:
logger.warning(f"ํŒŒ์ผ '{filename}'์—์„œ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค.")
# ํŒŒ์ผ์€ ์ €์žฅ๋˜์—ˆ์œผ๋ฏ€๋กœ warning ๋ฐ˜ํ™˜
return jsonify({
"warning": True, # 'success' ๋Œ€์‹  'warning' ์‚ฌ์šฉ
"message": f"ํŒŒ์ผ '{filename}'์ด ์ €์žฅ๋˜์—ˆ์ง€๋งŒ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค."
})
except Exception as e:
logger.error(f"ํŒŒ์ผ ์—…๋กœ๋“œ ๋˜๋Š” ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
# ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ €์žฅ๋œ ํŒŒ์ผ ์‚ญ์ œ
if 'filepath' in locals() and os.path.exists(filepath):
try: os.remove(filepath)
except OSError as e_del: logger.error(f"์—…๋กœ๋“œ ์‹คํŒจ ํ›„ ํŒŒ์ผ ์‚ญ์ œ ์˜ค๋ฅ˜: {e_del}")
return jsonify({"error": f"ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
# --- Document List API ---
@app.route('/api/documents', methods=['GET'])
@login_required
def list_documents():
"""์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ๋ชฉ๋ก API"""
logger.info("๋ฌธ์„œ ๋ชฉ๋ก API ์š”์ฒญ ์‹œ์ž‘")
# base_retriever ์ƒํƒœ ํ™•์ธ
if base_retriever is None:
logger.warning("๋ฌธ์„œ API ์š”์ฒญ ์‹œ base_retriever๊ฐ€ None์ž…๋‹ˆ๋‹ค.")
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
elif not hasattr(base_retriever, 'documents'):
logger.warning("๋ฌธ์„œ API ์š”์ฒญ ์‹œ base_retriever์— 'documents' ์†์„ฑ์ด ์—†์Šต๋‹ˆ๋‹ค.")
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0})
# ๋กœ๊น… ์ถ”๊ฐ€
logger.info(f"base_retriever ๊ฐ์ฒด ํƒ€์ž…: {type(base_retriever)}")
logger.info(f"base_retriever.documents ์กด์žฌ ์—ฌ๋ถ€: {hasattr(base_retriever, 'documents')}")
doc_list_attr = getattr(base_retriever, 'documents', None) # ์•ˆ์ „ํ•˜๊ฒŒ ์†์„ฑ ๊ฐ€์ ธ์˜ค๊ธฐ
logger.info(f"base_retriever.documents ํƒ€์ž…: {type(doc_list_attr)}")
logger.info(f"base_retriever.documents ๊ธธ์ด: {len(doc_list_attr) if isinstance(doc_list_attr, list) else 'N/A'}")
try:
sources = {}
total_chunks = 0
doc_list = doc_list_attr # ์œ„์—์„œ ๊ฐ€์ ธ์˜จ ์†์„ฑ ์‚ฌ์šฉ
# doc_list๊ฐ€ ๋ฆฌ์ŠคํŠธ์ธ์ง€ ํ™•์ธ
if not isinstance(doc_list, list):
logger.error(f"base_retriever.documents๊ฐ€ ๋ฆฌ์ŠคํŠธ๊ฐ€ ์•„๋‹˜: {type(doc_list)}")
return jsonify({"error": "๋‚ด๋ถ€ ๋ฐ์ดํ„ฐ ๊ตฌ์กฐ ์˜ค๋ฅ˜"}), 500
logger.info(f"์ด {len(doc_list)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ์—์„œ ์†Œ์Šค ๋ชฉ๋ก ์ƒ์„ฑ ์ค‘...")
for i, doc in enumerate(doc_list):
# ๊ฐ ์ฒญํฌ๊ฐ€ ๋”•์…”๋„ˆ๋ฆฌ ํ˜•ํƒœ์ธ์ง€ ํ™•์ธ (Langchain Document ๊ฐ์ฒด๋„ ๋”•์…”๋„ˆ๋ฆฌ์ฒ˜๋Ÿผ ๋™์ž‘ ๊ฐ€๋Šฅ)
if not hasattr(doc, 'get'): # ๋”•์…”๋„ˆ๋ฆฌ ๋˜๋Š” ์œ ์‚ฌ ๊ฐ์ฒด์ธ์ง€ ํ™•์ธ
logger.warning(f"์ฒญํฌ {i}๊ฐ€ ๋”•์…”๋„ˆ๋ฆฌ ํƒ€์ž…์ด ์•„๋‹˜: {type(doc)}")
continue
# ์†Œ์Šค ์ •๋ณด ์ถ”์ถœ (metadata ์šฐ์„ )
source = "unknown"
metadata = doc.get("metadata")
if isinstance(metadata, dict):
source = metadata.get("source", "unknown")
# metadata์— ์—†์œผ๋ฉด doc ์ž์ฒด์—์„œ ์ฐพ๊ธฐ (ํ•˜์œ„ ํ˜ธํ™˜์„ฑ)
if source == "unknown":
source = doc.get("source", "unknown")
if source != "unknown":
if source in sources:
sources[source]["chunks"] += 1
else:
# filename, filetype ์ถ”์ถœ (metadata ์šฐ์„ )
filename = metadata.get("filename", source) if isinstance(metadata, dict) else source
filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown"
# metadata์— ์—†์œผ๋ฉด doc ์ž์ฒด์—์„œ ์ฐพ๊ธฐ
if filename == source and doc.get("filename"): filename = doc["filename"]
if filetype == "unknown" and doc.get("filetype"): filetype = doc["filetype"]
sources[source] = {
"filename": filename,
"chunks": 1,
"filetype": filetype
}
total_chunks += 1
else:
# ์†Œ์Šค ์ •๋ณด๊ฐ€ ์—†๋Š” ์ฒญํฌ ๋กœ๊น… (๋„ˆ๋ฌด ๋งŽ์œผ๋ฉด ์ฃผ์„ ์ฒ˜๋ฆฌ)
logger.warning(f"์ฒญํฌ {i}์—์„œ ์†Œ์Šค ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ: {str(doc)[:200]}...") # ๋‚ด์šฉ ์ผ๋ถ€ ๋กœ๊น…
# ์ตœ์ข… ๋ชฉ๋ก ์ƒ์„ฑ ๋ฐ ์ •๋ ฌ
documents = [{"source": src, **info} for src, info in sources.items()]
documents.sort(key=lambda x: x.get("filename", ""), reverse=False) # ํŒŒ์ผ๋ช… ๊ธฐ์ค€ ์˜ค๋ฆ„์ฐจ์ˆœ ์ •๋ ฌ
logger.info(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์™„๋ฃŒ: {len(documents)}๊ฐœ ์†Œ์Šค ํŒŒ์ผ, {total_chunks}๊ฐœ ์ฒญํฌ")
return jsonify({
"documents": documents,
"total_documents": len(documents),
"total_chunks": total_chunks
})
except Exception as e:
logger.error(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์‹ฌ๊ฐํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
# 503 ๋Œ€์‹  500 ๋ฐ˜ํ™˜
return jsonify({"error": f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500