Spaces:
No application file
No application file
""" | |
RAG ๊ฒ์ ์ฑ๋ด ์น ์ ํ๋ฆฌ์ผ์ด์ - API ๋ผ์ฐํธ ์ ์ (TypeError ์ฌ์์ ) | |
""" | |
import os | |
import json | |
import logging | |
import tempfile | |
import requests | |
import time # ์ฑ ์์ ์๊ฐ ๊ธฐ๋ก ์ํด ์ถ๊ฐ | |
import threading # threading.Event ์ฌ์ฉ ์ํด ์ถ๊ฐ | |
from flask import request, jsonify, render_template, send_from_directory, session, redirect, url_for | |
from datetime import datetime | |
from werkzeug.utils import secure_filename | |
# ๋ก๊ฑฐ ๊ฐ์ ธ์ค๊ธฐ | |
logger = logging.getLogger(__name__) | |
# ์ฑ ์์ ์๊ฐ ๊ธฐ๋ก (๋ชจ๋ ๋ก๋ ์์ ) | |
APP_START_TIME = time.time() | |
# !! ์ค์: ํจ์ ์ ์์์ app_ready_flag ๋์ app_ready_event๋ฅผ ๋ฐ๋๋ก ์์ !! | |
def register_routes(app, login_required, llm_interface, retriever, stt_client, DocumentProcessor, base_retriever, app_ready_event, ADMIN_USERNAME, ADMIN_PASSWORD, DEVICE_SERVER_URL): | |
"""Flask ์ ํ๋ฆฌ์ผ์ด์ ์ ๊ธฐ๋ณธ ๋ผ์ฐํธ ๋ฑ๋ก""" | |
# ํฌํผ ํจ์ (๋ณ๊ฒฝ ์์) | |
def allowed_audio_file(filename): | |
"""ํ์ผ์ด ํ์ฉ๋ ์ค๋์ค ํ์ฅ์๋ฅผ ๊ฐ์ง๋์ง ํ์ธ""" | |
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a'} | |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_AUDIO_EXTENSIONS | |
def allowed_doc_file(filename): | |
"""ํ์ผ์ด ํ์ฉ๋ ๋ฌธ์ ํ์ฅ์๋ฅผ ๊ฐ์ง๋์ง ํ์ธ""" | |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'} | |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS | |
# --- ๋ก๊ทธ์ธ/๋ก๊ทธ์์ ๋ผ์ฐํธ --- | |
def login(): | |
error = None | |
next_url = request.args.get('next') | |
logger.info(f"-------------- ๋ก๊ทธ์ธ ํ์ด์ง ์ ์ (Next: {next_url}) --------------") | |
logger.info(f"Method: {request.method}") | |
if request.method == 'POST': | |
logger.info("๋ก๊ทธ์ธ ์๋ ๋ฐ์") | |
username = request.form.get('username', '') | |
password = request.form.get('password', '') | |
logger.info(f"์ ๋ ฅ๋ ์ฌ์ฉ์๋ช : {username}") | |
valid_username = ADMIN_USERNAME | |
valid_password = ADMIN_PASSWORD | |
logger.info(f"๊ฒ์ฆ์ฉ ์ฌ์ฉ์๋ช : {valid_username}") | |
if username == valid_username and password == valid_password: | |
logger.info(f"๋ก๊ทธ์ธ ์ฑ๊ณต: {username}") | |
session.permanent = True | |
session['logged_in'] = True | |
session['username'] = username | |
logger.info(f"์ธ์ ์ค์ ์๋ฃ: {session}") | |
redirect_to = next_url or url_for('index') | |
logger.info(f"๋ฆฌ๋๋ ์ ๋์: {redirect_to}") | |
response = redirect(redirect_to) | |
logger.debug(f"๋ก๊ทธ์ธ ์๋ต ํค๋ (Set-Cookie ํ์ธ): {response.headers.getlist('Set-Cookie')}") | |
return response | |
else: | |
logger.warning("๋ก๊ทธ์ธ ์คํจ: ์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ ๋ถ์ผ์น") | |
error = '์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ๊ฐ ์ฌ๋ฐ๋ฅด์ง ์์ต๋๋ค.' | |
else: # GET ์์ฒญ | |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง GET ์์ฒญ") | |
if session.get('logged_in'): | |
logger.info("์ด๋ฏธ ๋ก๊ทธ์ธ๋ ์ฌ์ฉ์, ๋ฉ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์ ") | |
return redirect(url_for('index')) | |
logger.info("---------- ๋ก๊ทธ์ธ ํ์ด์ง ๋ ๋๋ง ----------") | |
return render_template('login.html', error=error, next=next_url) | |
def logout(): | |
"""๋ก๊ทธ์์ ์ฒ๋ฆฌ""" | |
username = session.get('username', 'unknown') | |
if session.pop('logged_in', None): | |
session.pop('username', None) | |
logger.info(f"์ฌ์ฉ์ {username} ๋ก๊ทธ์์ ์ฒ๋ฆฌ ์๋ฃ. ํ์ฌ ์ธ์ : {session}") | |
else: | |
logger.warning("๋ก๊ทธ์ธ๋์ง ์์ ์ํ์์ ๋ก๊ทธ์์ ์๋") | |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์ ") | |
response = redirect(url_for('login')) | |
logger.debug(f"๋ก๊ทธ์์ ์๋ต ํค๋ (Set-Cookie ํ์ธ): {response.headers.getlist('Set-Cookie')}") | |
return response | |
# --- ๋ฉ์ธ ํ์ด์ง ๋ฐ ์ํ ํ์ธ (app_ready_event ์ฌ์ฉ) --- | |
def index(): | |
"""๋ฉ์ธ ํ์ด์ง""" | |
# app_ready_event๊ฐ Event ๊ฐ์ฒด์ธ์ง ํ์ธํ๊ณ ์ํ ๊ฐ์ ธ์ค๊ธฐ | |
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False # ๊ธฐ๋ณธ๊ฐ False | |
time_elapsed = time.time() - APP_START_TIME | |
if not is_ready: | |
logger.info(f"์ฑ์ด ์์ง ์ค๋น๋์ง ์์ ๋ก๋ฉ ํ์ด์ง ํ์ (๊ฒฝ๊ณผ ์๊ฐ: {time_elapsed:.1f}์ด)") | |
# loading.html ํ ํ๋ฆฟ์ด ์๋ค๊ณ ๊ฐ์ | |
return render_template('loading.html') # 200 OK์ ๋ก๋ฉ ํ์ด์ง | |
logger.info("๋ฉ์ธ ํ์ด์ง ์์ฒญ") | |
# index.html ํ ํ๋ฆฟ์ด ์๋ค๊ณ ๊ฐ์ | |
return render_template('index.html') | |
def app_status(): | |
"""์ฑ ์ด๊ธฐํ ์ํ ํ์ธ API""" | |
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False | |
logger.info(f"์ฑ ์ํ ํ์ธ ์์ฒญ: {'Ready' if is_ready else 'Not Ready'}") | |
return jsonify({"ready": is_ready}) | |
# --- LLM API --- | |
def llm_api(): | |
"""์ฌ์ฉ ๊ฐ๋ฅํ LLM ๋ชฉ๋ก ๋ฐ ์ ํ API""" | |
# is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False | |
# LLM ๋ชฉ๋ก ์กฐํ๋ ์ด๊ธฐํ ์ค์๋ ๊ฐ๋ฅํ๋๋ก ํ์ฉ | |
if request.method == 'GET': | |
logger.info("LLM ๋ชฉ๋ก ์์ฒญ") | |
try: | |
# ๊ฐ์ฒด ๋ฐ ์์ฑ ํ์ธ ๊ฐํ | |
if llm_interface is None or not hasattr(llm_interface, 'get_current_llm_details') or not hasattr(llm_interface, 'SUPPORTED_LLMS'): | |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ์์ฑ์ด ์์ต๋๋ค.") | |
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500 | |
current_details = llm_interface.get_current_llm_details() | |
supported_llms_dict = llm_interface.SUPPORTED_LLMS | |
supported_list = [{ | |
"name": name, "id": id, "current": id == current_details.get("id") | |
} for name, id in supported_llms_dict.items()] | |
return jsonify({ | |
"supported_llms": supported_list, | |
"current_llm": current_details | |
}) | |
except Exception as e: | |
logger.error(f"LLM ์ ๋ณด ์กฐํ ์ค๋ฅ: {e}", exc_info=True) | |
return jsonify({"error": "LLM ์ ๋ณด ์กฐํ ์ค ์ค๋ฅ ๋ฐ์"}), 500 | |
elif request.method == 'POST': | |
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False | |
if not is_ready: # LLM ๋ณ๊ฒฝ์ ์ฑ ์ค๋น ์๋ฃ ํ ๊ฐ๋ฅ | |
return jsonify({"error": "์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์ ๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์."}), 503 | |
data = request.get_json() | |
if not data or 'llm_id' not in data: | |
return jsonify({"error": "LLM ID๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400 | |
llm_id = data['llm_id'] | |
logger.info(f"LLM ๋ณ๊ฒฝ ์์ฒญ: {llm_id}") | |
try: | |
# ๊ฐ์ฒด ๋ฐ ์์ฑ/๋ฉ์๋ ํ์ธ ๊ฐํ | |
if llm_interface is None or not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients') or not hasattr(llm_interface, 'get_current_llm_details'): | |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ์์ฑ/๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
return jsonify({"error": "LLM ์ธํฐํ์ด์ค ์ค๋ฅ"}), 500 | |
if llm_id not in llm_interface.llm_clients: | |
return jsonify({"error": f"์ง์๋์ง ์๋ LLM ID: {llm_id}"}), 400 | |
success = llm_interface.set_llm(llm_id) | |
if success: | |
new_details = llm_interface.get_current_llm_details() | |
logger.info(f"LLM์ด '{new_details.get('name', llm_id)}'๋ก ๋ณ๊ฒฝ๋์์ต๋๋ค.") | |
return jsonify({ | |
"success": True, | |
"message": f"LLM์ด '{new_details.get('name', llm_id)}'๋ก ๋ณ๊ฒฝ๋์์ต๋๋ค.", | |
"current_llm": new_details | |
}) | |
else: | |
logger.error(f"LLM ๋ณ๊ฒฝ ์คํจ (ID: {llm_id})") | |
return jsonify({"error": "LLM ๋ณ๊ฒฝ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์"}), 500 | |
except Exception as e: | |
logger.error(f"LLM ๋ณ๊ฒฝ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}", exc_info=True) | |
return jsonify({"error": f"LLM ๋ณ๊ฒฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"}), 500 | |
# --- Chat API --- | |
def chat(): | |
"""ํ ์คํธ ๊ธฐ๋ฐ ์ฑ๋ด API""" | |
try: | |
# ์ฑ์ด ์ค๋น๋์๋์ง ํ์ธ | |
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False | |
if not is_ready: | |
logger.warning("์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์ ๋๋ค.") | |
return jsonify({ | |
"error": "์ฑ ์ด๊ธฐํ ์ค...", | |
"answer": "์ฃ์กํฉ๋๋ค. ์์คํ ์ด ์์ง ์ค๋น ์ค์ ๋๋ค.", | |
"sources": [] | |
}), 200 # 503 ๋์ 200์ผ๋ก ๋ณ๊ฒฝํ์ฌ ์ฑ์ด ์ ์ ์๋ตํ๋๋ก ํจ | |
data = request.get_json() | |
if not data or 'query' not in data: | |
return jsonify({"error": "์ฟผ๋ฆฌ๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400 | |
query = data['query'] | |
logger.info(f"ํ ์คํธ ์ฟผ๋ฆฌ ์์ : {query[:100]}...") | |
# ๊ฒ์ ์์ง ์ฒ๋ฆฌ ๋ถ๋ถ ์์ | |
search_results = [] | |
search_warning = None | |
try: | |
# retriever ์ํ ๊ฒ์ฆ | |
if retriever is None: | |
logger.warning("Retriever๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.") | |
# Retriever๊ฐ None์ธ ๊ฒฝ์ฐ ๋น ๊ฐ์ฒด๋ฅผ ์์ฑํ๊ณ search ๋ฉ์๋ ์ถ๊ฐ | |
from types import SimpleNamespace | |
retriever = SimpleNamespace() | |
retriever.search = lambda q, **kwargs: [] | |
logger.info("์์ retriever ๊ฐ์ฒด ์์ฑ ๋ฐ ๋น search ๋ฉ์๋ ์ถ๊ฐ") | |
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค." | |
elif hasattr(retriever, 'is_mock') and retriever.is_mock: | |
logger.info("Mock Retriever ์ฌ์ฉ ์ค - ๊ฒ์ ๊ฒฐ๊ณผ ์์.") | |
# search ๋ฉ์๋๊ฐ ์๋์ง ํ์ธ, ์์ผ๋ฉด ์ถ๊ฐ | |
if not hasattr(retriever, 'search'): | |
retriever.search = lambda q, **kwargs: [] | |
logger.info("Mock retriever์ search ๋ฉ์๋ ์ถ๊ฐ") | |
search_warning = "๊ฒ์ ์ธ๋ฑ์ค๊ฐ ์์ง ๊ตฌ์ถ ์ค์ ๋๋ค. ๊ธฐ๋ณธ ์๋ต๋ง ์ ๊ณต๋ฉ๋๋ค." | |
elif not hasattr(retriever, 'search'): | |
logger.warning("Retriever์ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
# search ๋ฉ์๋ ๋์ ์ถ๊ฐ | |
retriever.search = lambda q, **kwargs: [] | |
logger.info("retriever์ ๋น search ๋ฉ์๋ ๋์ ์ถ๊ฐ") | |
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ํ์ฌ ์ ํ๋์ด ์์ต๋๋ค." | |
else: | |
try: | |
logger.info(f"๊ฒ์ ์ํ: {query[:50]}...") | |
# ์ค๋ฅ ์ ๋น ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํ๋ ์ถ๊ฐ try-except ๋ธ๋ก | |
try: | |
search_results = retriever.search(query, top_k=5, first_stage_k=6) | |
except AttributeError as attr_err: | |
logger.error(f"retriever์ search ๋ฉ์๋๊ฐ ์์ต๋๋ค: {attr_err}", exc_info=True) | |
# retriever๊ฐ MockComponent๊ฐ ์๋๊ฑฐ๋ search ๋ฉ์๋๊ฐ ์๋ ๊ฒฝ์ฐ | |
# ๋์ ์ผ๋ก ๋ฉ์๋ ์ถ๊ฐ | |
if not hasattr(retriever, 'search'): | |
logger.warning("retriever์ search ๋ฉ์๋๋ฅผ ๋์ ์ผ๋ก ์ถ๊ฐํฉ๋๋ค.") | |
retriever.search = lambda q, **kwargs: [] | |
search_results = [] | |
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ํ์ฌ ์ ํ๋์ด ์์ต๋๋ค." | |
except Exception as search_err: | |
logger.error(f"retriever.search() ํธ์ถ ์ค ์ค๋ฅ ๋ฐ์: {search_err}", exc_info=True) | |
search_results = [] | |
search_warning = f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(search_err)}" | |
if not search_results: | |
logger.info("๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.") | |
else: | |
logger.info(f"๊ฒ์ ๊ฒฐ๊ณผ: {len(search_results)}๊ฐ ํญ๋ชฉ") | |
except Exception as e: | |
logger.error(f"๊ฒ์ ์ฒ๋ฆฌ ์ค ์์์น ๋ชปํ ์ค๋ฅ: {e}", exc_info=True) | |
search_results = [] | |
search_warning = f"๊ฒ์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" | |
except Exception as e: | |
logger.error(f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True) | |
search_results = [] | |
search_warning = f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" | |
# LLM ์๋ต ์์ฑ | |
try: | |
# DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ | |
context = "" | |
if search_results: | |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'): | |
logger.warning("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ prepare_rag_context ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
else: | |
context = DocumentProcessor.prepare_rag_context(search_results, field="text") | |
logger.info(f"์ปจํ ์คํธ ์ค๋น ์๋ฃ (๊ธธ์ด: {len(context) if context else 0}์)") | |
# LLM ์ธํฐํ์ด์ค ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ | |
llm_id = data.get('llm_id', None) | |
if not context: | |
if search_warning: | |
logger.info(f"์ปจํ ์คํธ ์์, ๊ฒ์ ๊ฒฝ๊ณ : {search_warning}") | |
answer = f"์ฃ์กํฉ๋๋ค. ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ฐพ์ ์ ์์ต๋๋ค. ({search_warning})" | |
else: | |
logger.info("์ปจํ ์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ") | |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค." | |
else: | |
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'): | |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ rag_generate ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
answer = "์ฃ์กํฉ๋๋ค. ํ์ฌ LLM ์๋น์ค๋ฅผ ์ฌ์ฉํ ์ ์์ต๋๋ค." | |
else: | |
# LLM ํธ์ถ ์ ๊ฒ์ ๊ฒฝ๊ณ ์ฒ๋ฆฌ ์ถ๊ฐ | |
if search_warning: | |
# ๊ฒฝ๊ณ ๋ฉ์์ง๋ฅผ ์ฟผ๋ฆฌ์ ์ถ๊ฐํ๋ ๋์ ๋ด๋ถ์ ์ผ๋ก ์ฒ๋ฆฌ (์ฝํ ์ธ ๋ง ์ ๋ฌ) | |
logger.info(f"๊ฒ์ ๊ฒฝ๊ณ ์์: {search_warning}") | |
# ์๋ ์ฟผ๋ฆฌ๋ง ์ฌ์ฉ | |
modified_query = query | |
else: | |
modified_query = query | |
try: | |
answer = llm_interface.rag_generate(modified_query, context, llm_id=llm_id) | |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})") | |
# ๊ฒ์ ๊ฒฝ๊ณ ๊ฐ ์์ ๊ฒฝ์ฐ, ์๋ต ์์ ๊ฒฝ๊ณ ๋ฉ์์ง ์ถ๊ฐ ์์ด ๊ทธ๋๋ก ๋ฐํ | |
except Exception as llm_err: | |
logger.error(f"LLM ํธ์ถ ์ค ์ค๋ฅ: {llm_err}", exc_info=True) | |
answer = f"์ฃ์กํฉ๋๋ค. ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(llm_err)}" | |
# ์์ค ์ ๋ณด ์ถ์ถ | |
sources = [] | |
if search_results: | |
for result in search_results: | |
if not isinstance(result, dict): | |
logger.warning(f"์์์น ๋ชปํ ๊ฒ์ ๊ฒฐ๊ณผ ํ์: {type(result)}") | |
continue | |
source_info = {} | |
source_key = result.get("source") | |
if not source_key and "metadata" in result and isinstance(result["metadata"], dict): | |
source_key = result["metadata"].get("source") | |
if source_key: | |
source_info["name"] = os.path.basename(source_key) | |
source_info["path"] = source_key | |
else: | |
source_info["name"] = "์ ์ ์๋ ์์ค" | |
if "score" in result: | |
source_info["score"] = result["score"] | |
if "rerank_score" in result: | |
source_info["rerank_score"] = result["rerank_score"] | |
sources.append(source_info) | |
return jsonify({ | |
"answer": answer, | |
"sources": sources, | |
"search_warning": search_warning | |
}) | |
except Exception as e: | |
logger.error(f"LLM ์๋ต ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True) | |
return jsonify({ | |
"answer": f"์ฃ์กํฉ๋๋ค. ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", | |
"sources": [], | |
"error": str(e) | |
}) | |
except Exception as e: | |
logger.error(f"์ฑํ API์์ ์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True) | |
return jsonify({ | |
"error": f"์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {str(e)}", | |
"answer": "์ฃ์กํฉ๋๋ค. ์๋ฒ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.", | |
"sources": [] | |
}), 500 | |
# --- Voice Chat API --- | |
def voice_chat(): | |
"""์์ฑ ์ฑ API ์๋ํฌ์ธํธ""" | |
try: | |
# ์ฑ์ด ์ค๋น๋์๋์ง ํ์ธ | |
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False | |
if not is_ready: | |
logger.warning("์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์ ๋๋ค.") | |
return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค...", "answer": "์ฃ์กํฉ๋๋ค. ์์คํ ์ด ์์ง ์ค๋น ์ค์ ๋๋ค."}), 200 # 503 ๋์ 200์ผ๋ก ๋ณ๊ฒฝ | |
# STT ํด๋ผ์ด์ธํธ ํ์ธ | |
if stt_client is None or not hasattr(stt_client, 'transcribe_audio'): | |
logger.error("์์ฑ API ์์ฒญ ์ STT ํด๋ผ์ด์ธํธ๊ฐ ์ค๋น๋์ง ์์") | |
return jsonify({"error": "์์ฑ ์ธ์ ์๋น์ค ์ค๋น ์๋จ"}), 503 | |
logger.info("์์ฑ ์ฑ ์์ฒญ ์์ ") | |
if 'audio' not in request.files: | |
logger.error("์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์") | |
return jsonify({"error": "์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400 | |
audio_file = request.files['audio'] | |
logger.info(f"์์ ๋ ์ค๋์ค ํ์ผ: {audio_file.filename} ({audio_file.content_type})") | |
try: | |
# ์ค๋์ค ํ์ผ ์์ ์ ์ฅ ๋ฐ ์ฒ๋ฆฌ | |
with tempfile.NamedTemporaryFile(delete=True, suffix=os.path.splitext(audio_file.filename)[1]) as temp_audio: | |
audio_file.save(temp_audio.name) | |
logger.info(f"์ค๋์ค ํ์ผ์ ์์ ์ ์ฅ: {temp_audio.name}") | |
# STT ์ํ (๋ฐ์ดํธ ์ ๋ฌ ๊ฐ์ ) | |
with open(temp_audio.name, 'rb') as f_bytes: | |
audio_bytes = f_bytes.read() | |
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko") | |
# STT ๊ฒฐ๊ณผ ์ฒ๋ฆฌ | |
if not isinstance(stt_result, dict) or not stt_result.get("success"): | |
error_msg = stt_result.get("error", "์ ์ ์๋ STT ์ค๋ฅ") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ์ ์ค๋ฅ" | |
logger.error(f"์์ฑ์ธ์ ์คํจ: {error_msg}") | |
return jsonify({"error": "์์ฑ์ธ์ ์คํจ", "details": error_msg}), 500 | |
transcription = stt_result.get("text", "") | |
if not transcription: | |
logger.warning("์์ฑ์ธ์ ๊ฒฐ๊ณผ๊ฐ ๋น์ด์์ต๋๋ค.") | |
return jsonify({ | |
"transcription": "", | |
"answer": "์์ฑ์์ ํ ์คํธ๋ฅผ ์ธ์ํ์ง ๋ชปํ์ต๋๋ค.", | |
"sources": [] | |
}), 200 # 200 OK์ ๋ฉ์์ง | |
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...") | |
# --- RAG ๋ฐ LLM ํธ์ถ (Chat API์ ๋์ผ ๋ก์ง) --- | |
# ๊ฒ์ ์์ง ์ฒ๋ฆฌ ๋ถ๋ถ | |
search_results = [] | |
search_warning = None | |
try: | |
# retriever ์ํ ๊ฒ์ฆ | |
if retriever is None: | |
logger.warning("Retriever๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.") | |
# Retriever๊ฐ None์ธ ๊ฒฝ์ฐ ๋น ๊ฐ์ฒด๋ฅผ ์์ฑํ๊ณ search ๋ฉ์๋ ์ถ๊ฐ | |
from types import SimpleNamespace | |
retriever = SimpleNamespace() | |
retriever.search = lambda q, **kwargs: [] | |
logger.info("์์ retriever ๊ฐ์ฒด ์์ฑ ๋ฐ ๋น search ๋ฉ์๋ ์ถ๊ฐ") | |
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค." | |
elif hasattr(retriever, 'is_mock') and retriever.is_mock: | |
logger.info("Mock Retriever ์ฌ์ฉ ์ค - ๊ฒ์ ๊ฒฐ๊ณผ ์์.") | |
# search ๋ฉ์๋๊ฐ ์๋์ง ํ์ธ, ์์ผ๋ฉด ์ถ๊ฐ | |
if not hasattr(retriever, 'search'): | |
retriever.search = lambda q, **kwargs: [] | |
logger.info("Mock retriever์ search ๋ฉ์๋ ์ถ๊ฐ") | |
search_warning = "๊ฒ์ ์ธ๋ฑ์ค๊ฐ ์์ง ๊ตฌ์ถ ์ค์ ๋๋ค. ๊ธฐ๋ณธ ์๋ต๋ง ์ ๊ณต๋ฉ๋๋ค." | |
elif not hasattr(retriever, 'search'): | |
logger.warning("Retriever์ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
# search ๋ฉ์๋ ๋์ ์ถ๊ฐ | |
retriever.search = lambda q, **kwargs: [] | |
logger.info("retriever์ ๋น search ๋ฉ์๋ ๋์ ์ถ๊ฐ") | |
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ํ์ฌ ์ ํ๋์ด ์์ต๋๋ค." | |
else: | |
try: | |
logger.info(f"๊ฒ์ ์ํ: {transcription[:50]}...") | |
try: | |
search_results = retriever.search(transcription, top_k=5, first_stage_k=6) | |
except AttributeError as attr_err: | |
logger.error(f"retriever์ search ๋ฉ์๋๊ฐ ์์ต๋๋ค: {attr_err}", exc_info=True) | |
# retriever๊ฐ MockComponent๊ฐ ์๋๊ฑฐ๋ search ๋ฉ์๋๊ฐ ์๋ ๊ฒฝ์ฐ | |
# ๋์ ์ผ๋ก ๋ฉ์๋ ์ถ๊ฐ | |
if not hasattr(retriever, 'search'): | |
logger.warning("retriever์ search ๋ฉ์๋๋ฅผ ๋์ ์ผ๋ก ์ถ๊ฐํฉ๋๋ค.") | |
retriever.search = lambda q, **kwargs: [] | |
search_results = [] | |
search_warning = "๊ฒ์ ๊ธฐ๋ฅ์ด ํ์ฌ ์ ํ๋์ด ์์ต๋๋ค." | |
except Exception as search_err: | |
logger.error(f"retriever.search() ํธ์ถ ์ค ์ค๋ฅ ๋ฐ์: {search_err}", exc_info=True) | |
search_results = [] | |
search_warning = f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(search_err)}" | |
if not search_results: | |
logger.info("๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.") | |
else: | |
logger.info(f"๊ฒ์ ๊ฒฐ๊ณผ: {len(search_results)}๊ฐ ํญ๋ชฉ") | |
except Exception as e: | |
logger.error(f"๊ฒ์ ์ฒ๋ฆฌ ์ค ์์์น ๋ชปํ ์ค๋ฅ: {e}", exc_info=True) | |
search_results = [] | |
search_warning = f"๊ฒ์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" | |
except Exception as e: | |
logger.error(f"๊ฒ์ ์์ง ์ ๊ทผ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True) | |
search_results = [] | |
search_warning = f"๊ฒ์ ์์ง ์ ๊ทผ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" | |
# LLM ์๋ต ์์ฑ | |
context = "" | |
if search_results: | |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'prepare_rag_context'): | |
logger.warning("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ prepare_rag_context ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
else: | |
context = DocumentProcessor.prepare_rag_context(search_results, field="text") | |
logger.info(f"์ปจํ ์คํธ ์ค๋น ์๋ฃ (๊ธธ์ด: {len(context) if context else 0}์)") | |
# LLM ์ธํฐํ์ด์ค ํธ์ถ | |
llm_id = request.form.get('llm_id', None) # form ๋ฐ์ดํฐ์์ llm_id ๊ฐ์ ธ์ค๊ธฐ | |
if not context: | |
if search_warning: | |
logger.info(f"์ปจํ ์คํธ ์์, ๊ฒ์ ๊ฒฝ๊ณ : {search_warning}") | |
answer = f"์ฃ์กํฉ๋๋ค. ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ฐพ์ ์ ์์ต๋๋ค. ({search_warning})" | |
else: | |
logger.info("์ปจํ ์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ") | |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค." | |
else: | |
if llm_interface is None or not hasattr(llm_interface, 'rag_generate'): | |
logger.error("LLM ์ธํฐํ์ด์ค๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ rag_generate ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
answer = "์ฃ์กํฉ๋๋ค. ํ์ฌ LLM ์๋น์ค๋ฅผ ์ฌ์ฉํ ์ ์์ต๋๋ค." | |
else: | |
# LLM ํธ์ถ ์ ๊ฒ์ ๊ฒฝ๊ณ ์ฒ๋ฆฌ ์ถ๊ฐ | |
if search_warning: | |
logger.info(f"๊ฒ์ ๊ฒฝ๊ณ ์์: {search_warning}") | |
# ์๋ ์ฟผ๋ฆฌ๋ง ์ฌ์ฉ | |
modified_query = transcription | |
else: | |
modified_query = transcription | |
try: | |
answer = llm_interface.rag_generate(modified_query, context, llm_id=llm_id) | |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})") | |
except Exception as llm_err: | |
logger.error(f"LLM ํธ์ถ ์ค ์ค๋ฅ: {llm_err}", exc_info=True) | |
answer = f"์ฃ์กํฉ๋๋ค. ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(llm_err)}" | |
# ์์ค ์ ๋ณด ์ถ์ถ | |
sources = [] | |
if search_results: | |
for result in search_results: | |
if not isinstance(result, dict): | |
logger.warning(f"์์์น ๋ชปํ ๊ฒ์ ๊ฒฐ๊ณผ ํ์: {type(result)}") | |
continue | |
source_info = {} | |
source_key = result.get("source") | |
if not source_key and "metadata" in result and isinstance(result["metadata"], dict): | |
source_key = result["metadata"].get("source") | |
if source_key: | |
source_info["name"] = os.path.basename(source_key) | |
source_info["path"] = source_key | |
else: | |
source_info["name"] = "์ ์ ์๋ ์์ค" | |
if "score" in result: | |
source_info["score"] = result["score"] | |
if "rerank_score" in result: | |
source_info["rerank_score"] = result["rerank_score"] | |
sources.append(source_info) | |
# ์ต์ข ์๋ต | |
response_data = { | |
"transcription": transcription, | |
"answer": answer, | |
"sources": sources, | |
"search_warning": search_warning | |
} | |
# LLM ์ ๋ณด ์ถ๊ฐ (์ต์ ) | |
if hasattr(llm_interface, 'get_current_llm_details'): | |
response_data["llm"] = llm_interface.get_current_llm_details() | |
return jsonify(response_data) | |
except Exception as e: | |
logger.error(f"์์ฑ ์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
return jsonify({ | |
"error": "์์ฑ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์", | |
"details": str(e), | |
"answer": "์ฃ์กํฉ๋๋ค. ์ค๋์ค ์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค." | |
}), 500 | |
except Exception as e: | |
logger.error(f"์์ฑ API์์ ์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True) | |
return jsonify({ | |
"error": f"์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {str(e)}", | |
"answer": "์ฃ์กํฉ๋๋ค. ์๋ฒ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค." | |
}), 500 | |
# --- Document Upload API --- | |
def upload_document(): | |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ์ ๋ก๋ API""" | |
is_ready = app_ready_event.is_set() if isinstance(app_ready_event, threading.Event) else False | |
if not is_ready: | |
return jsonify({"error": "์ฑ ์ด๊ธฐํ ์ค..."}), 503 | |
# base_retriever ๊ฐ์ฒด ๋ฐ ํ์ ๋ฉ์๋ ํ์ธ | |
if base_retriever is None or not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'): | |
logger.error("๋ฌธ์ ์ ๋ก๋ API ์์ฒญ ์ base_retriever๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
return jsonify({"error": "๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ์ค๋น๋์ง ์์์ต๋๋ค."}), 503 | |
if 'document' not in request.files: | |
return jsonify({"error": "๋ฌธ์ ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400 | |
doc_file = request.files['document'] | |
if not doc_file or not doc_file.filename: | |
return jsonify({"error": "์ ํ๋ ํ์ผ์ด ์์ต๋๋ค."}), 400 | |
# ALLOWED_DOC_EXTENSIONS๋ฅผ ํจ์ ๋ด์์ ๋ค์ ์ ์ํ๊ฑฐ๋ ์ ์ญ ์์๋ก ์ฌ์ฉ | |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'} | |
if not allowed_doc_file(doc_file.filename): | |
logger.warning(f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์: {doc_file.filename}") | |
return jsonify({"error": f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์์ ๋๋ค. ํ์ฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400 | |
try: | |
filename = secure_filename(doc_file.filename) | |
# app.config ์ฌ์ฉ ํ์ธ | |
if 'DATA_FOLDER' not in app.config: | |
logger.error("Flask app.config์ DATA_FOLDER๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
return jsonify({"error": "์๋ฒ ์ค์ ์ค๋ฅ (DATA_FOLDER)"}), 500 | |
data_folder = app.config['DATA_FOLDER'] | |
os.makedirs(data_folder, exist_ok=True) | |
filepath = os.path.join(data_folder, filename) | |
doc_file.save(filepath) | |
logger.info(f"๋ฌธ์ ์ ์ฅ ์๋ฃ: {filepath}") | |
# DocumentProcessor ๊ฐ์ฒด ๋ฐ ๋ฉ์๋ ํ์ธ | |
if DocumentProcessor is None or not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'): | |
logger.error("DocumentProcessor๊ฐ ์ค๋น๋์ง ์์๊ฑฐ๋ ํ์ํ ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
try: os.remove(filepath) # ์ ์ฅ๋ ํ์ผ ์ญ์ | |
except OSError: pass | |
return jsonify({"error": "๋ฌธ์ ์ฒ๋ฆฌ๊ธฐ ์ค๋ฅ"}), 500 | |
content = None | |
file_ext = filename.rsplit('.', 1)[1].lower() | |
metadata = {"source": filename, "filename": filename, "filetype": file_ext, "filepath": filepath} | |
docs = [] | |
# ํ์ผ ์ฝ๊ธฐ ๋ฐ ๋ด์ฉ ์ถ์ถ | |
if file_ext in ['txt', 'md', 'csv']: | |
try: | |
with open(filepath, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
logger.info(f"UTF-8 ๋์ฝ๋ฉ ์คํจ, CP949๋ก ์๋: {filename}") | |
try: | |
with open(filepath, 'r', encoding='cp949') as f: | |
content = f.read() | |
except Exception as e_cp949: | |
logger.error(f"CP949 ๋์ฝ๋ฉ ์คํจ ({filename}): {e_cp949}") | |
return jsonify({"error": "ํ์ผ ์ธ์ฝ๋ฉ์ ์ฝ์ ์ ์์ต๋๋ค (UTF-8, CP949 ์๋ ์คํจ)."}), 400 | |
except Exception as e_read: | |
logger.error(f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ ({filename}): {e_read}") | |
return jsonify({"error": f"ํ์ผ ์ฝ๊ธฐ ์ค ์ค๋ฅ ๋ฐ์: {str(e_read)}"}), 500 | |
elif file_ext == 'pdf': | |
logger.warning("PDF ์ฒ๋ฆฌ๋ ๊ตฌํ๋์ง ์์์ต๋๋ค.") | |
# ์ฌ๊ธฐ์ PDF ํ ์คํธ ์ถ์ถ ๋ก์ง ์ถ๊ฐ (์: PyPDF2 ์ฌ์ฉ) | |
# content = extract_text_from_pdf(filepath) | |
elif file_ext == 'docx': | |
logger.warning("DOCX ์ฒ๋ฆฌ๋ ๊ตฌํ๋์ง ์์์ต๋๋ค.") | |
# ์ฌ๊ธฐ์ DOCX ํ ์คํธ ์ถ์ถ ๋ก์ง ์ถ๊ฐ (์: python-docx ์ฌ์ฉ) | |
# content = extract_text_from_docx(filepath) | |
# ๋ฌธ์ ๋ถํ /์ฒ๋ฆฌ | |
if content is not None: # ๋ด์ฉ์ด ์ฑ๊ณต์ ์ผ๋ก ์ฝํ๊ฑฐ๋ ์ถ์ถ๋์์ ๋๋ง | |
if file_ext == 'csv': | |
logger.info(f"CSV ํ์ผ ์ฒ๋ฆฌ ์์: {filename}") | |
docs = DocumentProcessor.csv_to_documents(content, metadata) | |
elif file_ext in ['txt', 'md'] or (file_ext in ['pdf', 'docx'] and content): # ํ ์คํธ ๊ธฐ๋ฐ ๋๋ ์ถ์ถ๋ ๋ด์ฉ | |
logger.info(f"ํ ์คํธ ๊ธฐ๋ฐ ๋ฌธ์ ์ฒ๋ฆฌ ์์: {filename}") | |
# text_to_documents ํจ์๊ฐ ์ฒญํฌ ๋ถํ ๋ฑ์ ์ํํ๋ค๊ณ ๊ฐ์ | |
docs = DocumentProcessor.text_to_documents( | |
content, metadata=metadata, | |
chunk_size=512, chunk_overlap=50 # ์ค์ ๊ฐ ์ฌ์ฉ | |
) | |
# ๊ฒ์๊ธฐ์ ์ถ๊ฐ ๋ฐ ์ ์ฅ | |
if docs: | |
logger.info(f"{len(docs)}๊ฐ ๋ฌธ์ ์ฒญํฌ๋ฅผ ๊ฒ์๊ธฐ์ ์ถ๊ฐํฉ๋๋ค...") | |
base_retriever.add_documents(docs) | |
logger.info(f"๊ฒ์๊ธฐ ์ํ๋ฅผ ์ ์ฅํฉ๋๋ค...") | |
# app.config ์ฌ์ฉ ํ์ธ | |
if 'INDEX_PATH' not in app.config: | |
logger.error("Flask app.config์ INDEX_PATH๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
return jsonify({"error": "์๋ฒ ์ค์ ์ค๋ฅ (INDEX_PATH)"}), 500 | |
index_path = app.config['INDEX_PATH'] | |
# ์ธ๋ฑ์ค ์ ์ฅ ๊ฒฝ๋ก๊ฐ ํด๋์ธ์ง ํ์ผ์ธ์ง ํ์ธ ํ์ (VectorRetriever.save ๊ตฌํ์ ๋ฐ๋ผ ๋ค๋ฆ) | |
# ์ฌ๊ธฐ์๋ index_path๊ฐ ๋๋ ํ ๋ฆฌ๋ผ๊ณ ๊ฐ์ ํ๊ณ ๋ถ๋ชจ ๋๋ ํ ๋ฆฌ ์์ฑ | |
os.makedirs(os.path.dirname(index_path), exist_ok=True) | |
try: | |
base_retriever.save(index_path) | |
logger.info("์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ") | |
# TODO: ์ฌ์์ํ ๊ฒ์๊ธฐ(retriever) ์ ๋ฐ์ดํธ ๋ก์ง ํ์ ์ ์ถ๊ฐ | |
# ์: if retriever and hasattr(retriever, 'update_base_retriever'): retriever.update_base_retriever(base_retriever) | |
return jsonify({ | |
"success": True, | |
"message": f"ํ์ผ '{filename}' ์ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ ({len(docs)}๊ฐ ์ฒญํฌ ์ถ๊ฐ)." | |
}) | |
except Exception as e_save: | |
logger.error(f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {e_save}", exc_info=True) | |
# ์ ์ฅ ์คํจ ์ ์ถ๊ฐ๋ ๋ฌธ์ ๋กค๋ฐฑ ๊ณ ๋ ค? | |
return jsonify({"error": f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ: {str(e_save)}"}), 500 | |
else: | |
logger.warning(f"ํ์ผ '{filename}'์์ ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์ ๋๋ค.") | |
# ํ์ผ์ ์ ์ฅ๋์์ผ๋ฏ๋ก warning ๋ฐํ | |
return jsonify({ | |
"warning": True, # 'success' ๋์ 'warning' ์ฌ์ฉ | |
"message": f"ํ์ผ '{filename}'์ด ์ ์ฅ๋์์ง๋ง ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์ ๋๋ค." | |
}) | |
except Exception as e: | |
logger.error(f"ํ์ผ ์ ๋ก๋ ๋๋ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
# ์ค๋ฅ ๋ฐ์ ์ ์ ์ฅ๋ ํ์ผ ์ญ์ | |
if 'filepath' in locals() and os.path.exists(filepath): | |
try: os.remove(filepath) | |
except OSError as e_del: logger.error(f"์ ๋ก๋ ์คํจ ํ ํ์ผ ์ญ์ ์ค๋ฅ: {e_del}") | |
return jsonify({"error": f"ํ์ผ ์ ๋ก๋ ์ค ์ค๋ฅ: {str(e)}"}), 500 | |
# --- Document List API --- | |
def list_documents(): | |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ๋ชฉ๋ก API""" | |
logger.info("๋ฌธ์ ๋ชฉ๋ก API ์์ฒญ ์์") | |
# base_retriever ์ํ ํ์ธ | |
if base_retriever is None: | |
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever๊ฐ None์ ๋๋ค.") | |
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0}) | |
elif not hasattr(base_retriever, 'documents'): | |
logger.warning("๋ฌธ์ API ์์ฒญ ์ base_retriever์ 'documents' ์์ฑ์ด ์์ต๋๋ค.") | |
return jsonify({"documents": [], "total_documents": 0, "total_chunks": 0}) | |
# ๋ก๊น ์ถ๊ฐ | |
logger.info(f"base_retriever ๊ฐ์ฒด ํ์ : {type(base_retriever)}") | |
logger.info(f"base_retriever.documents ์กด์ฌ ์ฌ๋ถ: {hasattr(base_retriever, 'documents')}") | |
doc_list_attr = getattr(base_retriever, 'documents', None) # ์์ ํ๊ฒ ์์ฑ ๊ฐ์ ธ์ค๊ธฐ | |
logger.info(f"base_retriever.documents ํ์ : {type(doc_list_attr)}") | |
logger.info(f"base_retriever.documents ๊ธธ์ด: {len(doc_list_attr) if isinstance(doc_list_attr, list) else 'N/A'}") | |
try: | |
sources = {} | |
total_chunks = 0 | |
doc_list = doc_list_attr # ์์์ ๊ฐ์ ธ์จ ์์ฑ ์ฌ์ฉ | |
# doc_list๊ฐ ๋ฆฌ์คํธ์ธ์ง ํ์ธ | |
if not isinstance(doc_list, list): | |
logger.error(f"base_retriever.documents๊ฐ ๋ฆฌ์คํธ๊ฐ ์๋: {type(doc_list)}") | |
return jsonify({"error": "๋ด๋ถ ๋ฐ์ดํฐ ๊ตฌ์กฐ ์ค๋ฅ"}), 500 | |
logger.info(f"์ด {len(doc_list)}๊ฐ ๋ฌธ์ ์ฒญํฌ์์ ์์ค ๋ชฉ๋ก ์์ฑ ์ค...") | |
for i, doc in enumerate(doc_list): | |
# ๊ฐ ์ฒญํฌ๊ฐ ๋์ ๋๋ฆฌ ํํ์ธ์ง ํ์ธ (Langchain Document ๊ฐ์ฒด๋ ๋์ ๋๋ฆฌ์ฒ๋ผ ๋์ ๊ฐ๋ฅ) | |
if not hasattr(doc, 'get'): # ๋์ ๋๋ฆฌ ๋๋ ์ ์ฌ ๊ฐ์ฒด์ธ์ง ํ์ธ | |
logger.warning(f"์ฒญํฌ {i}๊ฐ ๋์ ๋๋ฆฌ ํ์ ์ด ์๋: {type(doc)}") | |
continue | |
# ์์ค ์ ๋ณด ์ถ์ถ (metadata ์ฐ์ ) | |
source = "unknown" | |
metadata = doc.get("metadata") | |
if isinstance(metadata, dict): | |
source = metadata.get("source", "unknown") | |
# metadata์ ์์ผ๋ฉด doc ์์ฒด์์ ์ฐพ๊ธฐ (ํ์ ํธํ์ฑ) | |
if source == "unknown": | |
source = doc.get("source", "unknown") | |
if source != "unknown": | |
if source in sources: | |
sources[source]["chunks"] += 1 | |
else: | |
# filename, filetype ์ถ์ถ (metadata ์ฐ์ ) | |
filename = metadata.get("filename", source) if isinstance(metadata, dict) else source | |
filetype = metadata.get("filetype", "unknown") if isinstance(metadata, dict) else "unknown" | |
# metadata์ ์์ผ๋ฉด doc ์์ฒด์์ ์ฐพ๊ธฐ | |
if filename == source and doc.get("filename"): filename = doc["filename"] | |
if filetype == "unknown" and doc.get("filetype"): filetype = doc["filetype"] | |
sources[source] = { | |
"filename": filename, | |
"chunks": 1, | |
"filetype": filetype | |
} | |
total_chunks += 1 | |
else: | |
# ์์ค ์ ๋ณด๊ฐ ์๋ ์ฒญํฌ ๋ก๊น (๋๋ฌด ๋ง์ผ๋ฉด ์ฃผ์ ์ฒ๋ฆฌ) | |
logger.warning(f"์ฒญํฌ {i}์์ ์์ค ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์: {str(doc)[:200]}...") # ๋ด์ฉ ์ผ๋ถ ๋ก๊น | |
# ์ต์ข ๋ชฉ๋ก ์์ฑ ๋ฐ ์ ๋ ฌ | |
documents = [{"source": src, **info} for src, info in sources.items()] | |
documents.sort(key=lambda x: x.get("filename", ""), reverse=False) # ํ์ผ๋ช ๊ธฐ์ค ์ค๋ฆ์ฐจ์ ์ ๋ ฌ | |
logger.info(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์๋ฃ: {len(documents)}๊ฐ ์์ค ํ์ผ, {total_chunks}๊ฐ ์ฒญํฌ") | |
return jsonify({ | |
"documents": documents, | |
"total_documents": len(documents), | |
"total_chunks": total_chunks | |
}) | |
except Exception as e: | |
logger.error(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ฌ๊ฐํ ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
# 503 ๋์ 500 ๋ฐํ | |
return jsonify({"error": f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ: {str(e)}"}), 500 | |