Spaces:
Running
Running
""" | |
RAG ๊ฒ์ ์ฑ๋ด ์น ์ ํ๋ฆฌ์ผ์ด์ (์ธ์ ์ค์ ์์ ์ ์ฉ ๋ฐ ์ค๋ณต ๋ผ์ฐํธ ๋ฑ๋ก ๋ฐฉ์ง) | |
""" | |
import os | |
import json | |
import logging | |
import tempfile | |
import threading | |
import datetime | |
from flask import Flask, request, jsonify, render_template, send_from_directory, session, redirect, url_for | |
from flask_cors import CORS | |
from werkzeug.utils import secure_filename | |
from dotenv import load_dotenv | |
from functools import wraps | |
# ๋ก๊ฑฐ ์ค์ | |
logging.basicConfig( | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
level=logging.DEBUG # INFO์์ DEBUG๋ก ๋ณ๊ฒฝํ์ฌ ๋ ์์ธํ ๋ก๊ทธ ํ์ธ | |
) | |
logger = logging.getLogger(__name__) | |
# ํ๊ฒฝ ๋ณ์ ๋ก๋ | |
load_dotenv() | |
# ํ๊ฒฝ ๋ณ์ ๋ก๋ ์ํ ํ์ธ ๋ฐ ๋ก๊น | |
ADMIN_USERNAME = os.getenv('ADMIN_USERNAME') | |
ADMIN_PASSWORD = os.getenv('ADMIN_PASSWORD') | |
# ์ฅ์น ์๋ฒ URL ํ๊ฒฝ ๋ณ์ ์ถ๊ฐ | |
DEVICE_SERVER_URL = os.getenv('DEVICE_SERVER_URL', 'http://localhost:5050') | |
logger.info(f"์ฅ์น ์๋ฒ URL: {DEVICE_SERVER_URL}") | |
logger.info(f"==== ํ๊ฒฝ ๋ณ์ ๋ก๋ ์ํ ====") | |
logger.info(f"ADMIN_USERNAME ์ค์ ์ฌ๋ถ: {ADMIN_USERNAME is not None}") | |
# ๋น๋ฐ๋ฒํธ๋ ๋ก๋ ์ฌ๋ถ๋ง ๊ธฐ๋ก (๋ณด์) | |
logger.info(f"ADMIN_PASSWORD ์ค์ ์ฌ๋ถ: {ADMIN_PASSWORD is not None}") | |
# ํ๊ฒฝ ๋ณ์๊ฐ ์์ผ๋ฉด ๊ธฐ๋ณธ๊ฐ ์ค์ (๊ฐ๋ฐ์ฉ, ๋ฐฐํฌ ์ ํ๊ฒฝ ๋ณ์ ์ค์ ๊ถ์ฅ) | |
if not ADMIN_USERNAME: | |
ADMIN_USERNAME = 'admin' | |
logger.warning("ADMIN_USERNAME ํ๊ฒฝ๋ณ์๊ฐ ์์ด ๊ธฐ๋ณธ๊ฐ 'admin'์ผ๋ก ์ค์ ํฉ๋๋ค.") | |
if not ADMIN_PASSWORD: | |
ADMIN_PASSWORD = 'rag12345' | |
logger.warning("ADMIN_PASSWORD ํ๊ฒฝ๋ณ์๊ฐ ์์ด ๊ธฐ๋ณธ๊ฐ 'rag12345'๋ก ์ค์ ํฉ๋๋ค.") | |
class MockComponent: pass | |
# --- ๋ก์ปฌ ๋ชจ๋ ์ํฌํธ --- | |
# ์ค์ ๊ฒฝ๋ก์ ๋ง๊ฒ utils, retrieval ํด๋๊ฐ ์กด์ฌํด์ผ ํฉ๋๋ค. | |
try: | |
from utils.vito_stt import VitoSTT | |
from utils.llm_interface import LLMInterface | |
from utils.document_processor import DocumentProcessor | |
from retrieval.vector_retriever import VectorRetriever | |
from retrieval.reranker import ReRanker | |
# ์ฅ์น ๋ผ์ฐํธ ๋ฑ๋ก ํจ์ ์ํฌํธ | |
from app.app_device_routes import register_device_routes | |
except ImportError as e: | |
logger.error(f"๋ก์ปฌ ๋ชจ๋ ์ํฌํธ ์คํจ: {e}. utils, retrieval, app ํจํค์ง๊ฐ ์ฌ๋ฐ๋ฅธ ๊ฒฝ๋ก์ ์๋์ง ํ์ธํ์ธ์.") | |
# ๊ฐ๋ฐ/ํ ์คํธ๋ฅผ ์ํด ์์ ํด๋์ค/ํจ์ ์ ์ (์ค์ ์ฌ์ฉ ์ ์ ๊ฑฐ) | |
VitoSTT = LLMInterface = DocumentProcessor = VectorRetriever = ReRanker = MockComponent | |
def register_device_routes(*args, **kwargs): | |
logger.warning("Mock register_device_routes ํจ์ ํธ์ถ๋จ.") | |
pass | |
# --- ๋ก์ปฌ ๋ชจ๋ ์ํฌํธ ๋ --- | |
# Flask ์ฑ ์ด๊ธฐํ | |
app = Flask(__name__) | |
# CORS ์ค์ - ๋ชจ๋ ๋๋ฉ์ธ์์์ ์์ฒญ ํ์ฉ | |
CORS(app, supports_credentials=True) | |
# ์ธ์ ์ค์ - ๊ณ ์ ๋ ์ํฌ๋ฆฟ ํค ์ฌ์ฉ (์ค์ ๋ฐฐํฌ ์ ํ๊ฒฝ ๋ณ์ ๋ฑ์ผ๋ก ๊ด๋ฆฌ ๊ถ์ฅ) | |
app.secret_key = os.getenv('FLASK_SECRET_KEY', 'rag_chatbot_fixed_secret_key_12345') # ํ๊ฒฝ ๋ณ์ ์ฐ์ ์ฌ์ฉ | |
# --- ์ธ์ ์ฟ ํค ์ค์ ์์ (ํ๊น ํ์ด์ค ํ๊ฒฝ ๊ณ ๋ ค) --- | |
# ํ๊น ํ์ด์ค ์คํ์ด์ค๋ ์ผ๋ฐ์ ์ผ๋ก HTTPS๋ก ์๋น์ค๋๋ฏ๋ก Secure=True ์ค์ | |
app.config['SESSION_COOKIE_SECURE'] = True | |
app.config['SESSION_COOKIE_HTTPONLY'] = True # JavaScript์์ ์ฟ ํค ์ ๊ทผ ๋ฐฉ์ง (๋ณด์ ๊ฐํ) | |
# SameSite='Lax'๊ฐ ๋๋ถ๋ถ์ ๊ฒฝ์ฐ์ ๋ ์์ ํ๊ณ ํธํ์ฑ์ด ์ข์. | |
# ๋ง์ฝ ์ฑ์ด ๋ค๋ฅธ ๋๋ฉ์ธ์ iframe ๋ด์์ ์คํ๋์ด์ผ ํ๋ค๋ฉด 'None'์ผ๋ก ์ค์ ํด์ผ ํจ. | |
# (๋จ, 'None'์ผ๋ก ์ค์ ์ ๋ฐ๋์ Secure=True์ฌ์ผ ํจ) | |
# ๋ก๊ทธ ๋ถ์ ๊ฒฐ๊ณผ iframe ํ๊ฒฝ์ผ๋ก ํ์ธ๋์ด 'None'์ผ๋ก ๋ณ๊ฒฝ | |
app.config['SESSION_COOKIE_SAMESITE'] = 'None' # <--- ์ด๋ ๊ฒ ๋ณ๊ฒฝํฉ๋๋ค. | |
app.config['SESSION_COOKIE_DOMAIN'] = None # ํน์ ๋๋ฉ์ธ ์ ํ ์์ | |
app.config['SESSION_COOKIE_PATH'] = '/' # ์ฑ ์ ์ฒด ๊ฒฝ๋ก์ ์ฟ ํค ์ ์ฉ | |
app.config['PERMANENT_SESSION_LIFETIME'] = datetime.timedelta(days=1) # ์ธ์ ์ ํจ ์๊ฐ ์ฆ๊ฐ | |
# --- ์ธ์ ์ฟ ํค ์ค์ ๋ --- | |
# ์ต๋ ํ์ผ ํฌ๊ธฐ ์ค์ (10MB) | |
app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024 | |
# ์ ํ๋ฆฌ์ผ์ด์ ํ์ผ ๊ธฐ์ค ์๋ ๊ฒฝ๋ก ์ค์ | |
APP_ROOT = os.path.dirname(os.path.abspath(__file__)) | |
app.config['UPLOAD_FOLDER'] = os.path.join(APP_ROOT, 'uploads') | |
app.config['DATA_FOLDER'] = os.path.join(APP_ROOT, '..', 'data') | |
app.config['INDEX_PATH'] = os.path.join(APP_ROOT, '..', 'data', 'index') | |
# ํ์ํ ํด๋ ์์ฑ | |
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) | |
os.makedirs(app.config['DATA_FOLDER'], exist_ok=True) | |
os.makedirs(app.config['INDEX_PATH'], exist_ok=True) | |
# ํ์ฉ๋๋ ์ค๋์ค/๋ฌธ์ ํ์ผ ํ์ฅ์ | |
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a'} | |
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'} | |
# --- ์ ์ญ ๊ฐ์ฒด ์ด๊ธฐํ --- | |
try: | |
llm_interface = LLMInterface(default_llm="openai") | |
stt_client = VitoSTT() | |
except NameError: | |
logger.warning("LLM ๋๋ STT ์ธํฐํ์ด์ค ์ด๊ธฐํ ์คํจ. Mock ๊ฐ์ฒด๋ฅผ ์ฌ์ฉํฉ๋๋ค.") | |
llm_interface = MockComponent() | |
stt_client = MockComponent() | |
base_retriever = None | |
retriever = None | |
app_ready = False # ์ฑ ์ด๊ธฐํ ์ํ ํ๋๊ทธ | |
DEVICE_ROUTES_REGISTERED = False # ์ฅ์น ๋ผ์ฐํธ ๋ฑ๋ก ์ํ ํ๋๊ทธ | |
# --- ์ ์ญ ๊ฐ์ฒด ์ด๊ธฐํ ๋ --- | |
# --- ์ธ์ฆ ๋ฐ์ฝ๋ ์ดํฐ (์์ ๋จ) --- | |
def login_required(f): | |
def decorated_function(*args, **kwargs): | |
logger.info(f"----------- ์ธ์ฆ ํ์ ํ์ด์ง ์ ๊ทผ ์๋: {request.path} -----------") | |
logger.info(f"ํ์ฌ ํ๋ผ์คํฌ ์ธ์ ๊ฐ์ฒด: {session}") | |
logger.info(f"ํ์ฌ ์ธ์ ์ํ: logged_in={session.get('logged_in', False)}, username={session.get('username', 'None')}") | |
# ๋ธ๋ผ์ฐ์ ๊ฐ ๋ณด๋ธ ์ค์ ์ฟ ํค ํ์ธ (๋๋ฒ๊น ์ฉ) | |
logger.info(f"์์ฒญ์ ์ธ์ ์ฟ ํค ๊ฐ: {request.cookies.get('session', 'None')}") | |
# API ์์ฒญ์ด๊ณ ํด๋ผ์ด์ธํธ์์ ์ค๋ ๊ฒฝ์ฐ ์ธ์ฆ ๋ฌด์ (์์ ์กฐ์น) | |
# ---> ์ฃผ์: ์ด ๋ถ๋ถ์ ๋ณด์ ๊ฒํ ํ ์ค์ ํ๊ฒฝ์์๋ ์ ๊ฑฐํ๊ฑฐ๋ ๋ ์์ ํ ๋ฐฉ์์ผ๋ก ๋ณ๊ฒฝํด์ผ ํ ์ ์์ต๋๋ค. | |
if request.path.startswith('/api/device/'): | |
logger.info(f"์ฅ์น API ์์ฒญ: {request.path} - ์ธ์ฆ ์ ์ธ (์ฃผ์: ์์ ์กฐ์น)") | |
return f(*args, **kwargs) | |
# Flask ์ธ์ ์ 'logged_in' ํค๊ฐ ์๋์ง ์ง์ ํ์ธ | |
if 'logged_in' not in session: | |
logger.warning(f"ํ๋ผ์คํฌ ์ธ์ ์ 'logged_in' ์์. ๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์ .") | |
# ์๋ ์ฟ ํค ํ์ธ ๋ก์ง ์ ๊ฑฐ๋จ | |
return redirect(url_for('login', next=request.url)) # ๋ก๊ทธ์ธ ํ ์๋ ํ์ด์ง๋ก ๋์๊ฐ๋๋ก next ํ๋ผ๋ฏธํฐ ์ถ๊ฐ | |
logger.info(f"์ธ์ฆ ์ฑ๊ณต: {session.get('username', 'unknown')} ์ฌ์ฉ์๊ฐ {request.path} ์ ๊ทผ") | |
return f(*args, **kwargs) | |
return decorated_function | |
# --- ์ธ์ฆ ๋ฐ์ฝ๋ ์ดํฐ ๋ --- | |
# --- ์ค๋ฅ ํธ๋ค๋ฌ ์ถ๊ฐ --- | |
def not_found(e): | |
# ํด๋ผ์ด์ธํธ๊ฐ JSON์ ๊ธฐ๋ํ๋ API ํธ์ถ์ธ ๊ฒฝ์ฐ JSON ์๋ต | |
if request.path.startswith('/api/'): | |
return jsonify({"success": False, "error": "์์ฒญํ API ์๋ํฌ์ธํธ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."}), 404 | |
# ์ผ๋ฐ ์น ํ์ด์ง ์์ฒญ์ธ ๊ฒฝ์ฐ HTML ์๋ต | |
return "ํ์ด์ง๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.", 404 | |
def internal_error(e): | |
# ํด๋ผ์ด์ธํธ๊ฐ JSON์ ๊ธฐ๋ํ๋ API ํธ์ถ์ธ ๊ฒฝ์ฐ JSON ์๋ต | |
if request.path.startswith('/api/'): | |
return jsonify({"success": False, "error": "์๋ฒ ๋ด๋ถ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค."}), 500 | |
# ์ผ๋ฐ ์น ํ์ด์ง ์์ฒญ์ธ ๊ฒฝ์ฐ HTML ์๋ต | |
return "์๋ฒ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.", 500 | |
# --- ์ค๋ฅ ํธ๋ค๋ฌ ๋ --- | |
# --- ์ฅ์น ๊ด๋ จ ๋ผ์ฐํธ ๋ฑ๋ก (์์ ๋จ: ์ค๋ณต ๋ฐฉ์ง) --- | |
# ์ ์ญ ํ๋๊ทธ๋ฅผ ์ฌ์ฉํ์ฌ ํ ๋ฒ๋ง ๋ฑ๋ก๋๋๋ก ํจ | |
if not DEVICE_ROUTES_REGISTERED: | |
try: | |
# ์ํฌํธ๋ register_device_routes ํจ์ ์ฌ์ฉ | |
# ์ธ์ฆ ๋ฐ์ฝ๋ ์ดํฐ(login_required)์ ์๋ฒ URL ์ ๋ฌ | |
register_device_routes(app, login_required, DEVICE_SERVER_URL) | |
DEVICE_ROUTES_REGISTERED = True # ๋ฑ๋ก ์ฑ๊ณต ์ ํ๋๊ทธ ์ค์ | |
logger.info("์ฅ์น ๊ด๋ จ ๋ผ์ฐํธ ๋ฑ๋ก ์๋ฃ") | |
except NameError: | |
logger.error("register_device_routes ํจ์๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. app.app_device_routes ๋ชจ๋ ํ์ธ ํ์.") | |
except Exception as e: | |
logger.error(f"์ฅ์น ๊ด๋ จ ๋ผ์ฐํธ ๋ฑ๋ก ์คํจ: {e}", exc_info=True) | |
else: | |
logger.info("์ฅ์น ๊ด๋ จ ๋ผ์ฐํธ๊ฐ ์ด๋ฏธ ๋ฑ๋ก๋์ด ์์ด ๊ฑด๋<0xEB>๋๋๋ค.") | |
# --- ์ฅ์น ๊ด๋ จ ๋ผ์ฐํธ ๋ฑ๋ก ๋ --- | |
# --- ํฌํผ ํจ์ --- | |
def allowed_audio_file(filename): | |
"""ํ์ผ์ด ํ์ฉ๋ ์ค๋์ค ํ์ฅ์๋ฅผ ๊ฐ์ง๋์ง ํ์ธ""" | |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_AUDIO_EXTENSIONS | |
def allowed_doc_file(filename): | |
"""ํ์ผ์ด ํ์ฉ๋ ๋ฌธ์ ํ์ฅ์๋ฅผ ๊ฐ์ง๋์ง ํ์ธ""" | |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS | |
# --- ํฌํผ ํจ์ ๋ --- | |
# init_retriever ํจ์ ๋ด๋ถ์ ๋ก๊น ์ถ๊ฐ ์์ | |
# --- ๊ฒ์๊ธฐ ์ด๊ธฐํ ๊ด๋ จ ํจ์ --- | |
def init_retriever(): | |
"""๊ฒ์๊ธฐ ๊ฐ์ฒด ์ด๊ธฐํ ๋๋ ๋ก๋""" | |
global base_retriever, retriever | |
index_path = app.config['INDEX_PATH'] | |
data_path = app.config['DATA_FOLDER'] # data_path ์ ์ ํ์ธ | |
logger.info("--- init_retriever ์์ ---") | |
# 1. ๊ธฐ๋ณธ ๊ฒ์๊ธฐ ๋ก๋ ๋๋ ์ด๊ธฐํ | |
if os.path.exists(os.path.join(index_path, "documents.json")): | |
try: | |
logger.info(f"์ธ๋ฑ์ค ๋ก๋ ์๋: {index_path}") | |
base_retriever = VectorRetriever.load(index_path) | |
logger.info(f"์ธ๋ฑ์ค ๋ก๋ ์ฑ๊ณต. ๋ฌธ์ {len(getattr(base_retriever, 'documents', []))}๊ฐ") | |
except Exception as e: | |
logger.error(f"์ธ๋ฑ์ค ๋ก๋ ์คํจ: {e}", exc_info=True) | |
logger.info("์ VectorRetriever ์ด๊ธฐํ ์๋...") | |
try: | |
base_retriever = VectorRetriever() | |
logger.info("์ VectorRetriever ์ด๊ธฐํ ์ฑ๊ณต.") | |
except Exception as e_init: | |
logger.error(f"์ VectorRetriever ์ด๊ธฐํ ์คํจ: {e_init}", exc_info=True) | |
base_retriever = None | |
else: | |
logger.info("์ธ๋ฑ์ค ํ์ผ ์์. ์ VectorRetriever ์ด๊ธฐํ ์๋...") | |
try: | |
base_retriever = VectorRetriever() | |
logger.info("์ VectorRetriever ์ด๊ธฐํ ์ฑ๊ณต.") | |
except Exception as e_init: | |
logger.error(f"์ VectorRetriever ์ด๊ธฐํ ์คํจ: {e_init}", exc_info=True) | |
base_retriever = None | |
if base_retriever is None: | |
logger.error("base_retriever ์ด๊ธฐํ/๋ก๋์ ์คํจํ์ฌ init_retriever ์ค๋จ.") | |
return None | |
# 2. ๋ฐ์ดํฐ ํด๋ ๋ฌธ์ ๋ก๋ (๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ๋น์ด์์ ๋) | |
needs_loading = (not hasattr(base_retriever, 'documents') or not getattr(base_retriever, 'documents', None)) # None ์ฒดํฌ ์ถ๊ฐ | |
if needs_loading and os.path.exists(data_path): | |
logger.info(f"๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ๋น์ด์์ด {data_path}์์ ๋ฌธ์ ๋ก๋ ์๋...") | |
try: | |
docs = DocumentProcessor.load_documents_from_directory( | |
directory=data_path, | |
extensions=[".txt", ".md", ".csv"], | |
recursive=True | |
) | |
logger.info(f"{len(docs)}๊ฐ ๋ฌธ์ ๋ก๋ ์ฑ๊ณต.") | |
if docs and hasattr(base_retriever, 'add_documents'): | |
logger.info("๊ฒ์๊ธฐ์ ๋ฌธ์ ์ถ๊ฐ ์๋...") | |
base_retriever.add_documents(docs) | |
logger.info("๋ฌธ์ ์ถ๊ฐ ์๋ฃ.") | |
if hasattr(base_retriever, 'save'): | |
logger.info(f"๊ฒ์๊ธฐ ์ํ ์ ์ฅ ์๋: {index_path}") | |
try: | |
base_retriever.save(index_path) | |
logger.info("์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ.") | |
except Exception as e_save: | |
logger.error(f"์ธ๋ฑ์ค ์ ์ฅ ์คํจ: {e_save}", exc_info=True) | |
except Exception as e_load_add: | |
logger.error(f"DATA_FOLDER ๋ฌธ์ ๋ก๋/์ถ๊ฐ ์ค ์ค๋ฅ: {e_load_add}", exc_info=True) | |
# 3. ์ฌ์์ํ ๊ฒ์๊ธฐ ์ด๊ธฐํ | |
logger.info("์ฌ์์ํ ๊ฒ์๊ธฐ ์ด๊ธฐํ ์๋...") | |
try: | |
def custom_rerank_fn(query, results): | |
query_terms = set(query.lower().split()) | |
for result in results: | |
if isinstance(result, dict) and "text" in result: | |
text = result["text"].lower() | |
term_freq = sum(1 for term in query_terms if term in text) | |
normalized_score = term_freq / (len(text.split()) + 1) * 10 | |
result["rerank_score"] = result.get("score", 0) * 0.7 + normalized_score * 0.3 | |
elif isinstance(result, dict): | |
result["rerank_score"] = result.get("score", 0) | |
results.sort(key=lambda x: x.get("rerank_score", 0) if isinstance(x, dict) else 0, reverse=True) | |
return results | |
# ReRanker ํด๋์ค ์ฌ์ฉ | |
retriever = ReRanker( | |
base_retriever=base_retriever, | |
rerank_fn=custom_rerank_fn, | |
rerank_field="text" | |
) | |
logger.info("์ฌ์์ํ ๊ฒ์๊ธฐ ์ด๊ธฐํ ์๋ฃ.") | |
except Exception as e_rerank: | |
logger.error(f"์ฌ์์ํ ๊ฒ์๊ธฐ ์ด๊ธฐํ ์คํจ: {e_rerank}", exc_info=True) | |
logger.warning("์ฌ์์ํ ์คํจ, ๊ธฐ๋ณธ ๊ฒ์๊ธฐ๋ฅผ retriever๋ก ์ฌ์ฉํฉ๋๋ค.") | |
retriever = base_retriever # fallback | |
logger.info("--- init_retriever ์ข ๋ฃ ---") | |
return retriever | |
def background_init(): | |
"""๋ฐฑ๊ทธ๋ผ์ด๋์์ ๊ฒ์๊ธฐ ์ด๊ธฐํ ์ํ""" | |
global app_ready, retriever, base_retriever, llm_interface, stt_client | |
temp_app_ready = False # ์์ ์ํ ํ๋๊ทธ | |
try: | |
logger.info("๋ฐฑ๊ทธ๋ผ์ด๋ ์ด๊ธฐํ ์์...") | |
# 1. LLM, STT ์ธํฐํ์ด์ค ์ด๊ธฐํ (ํ์ ์) | |
if llm_interface is None or isinstance(llm_interface, MockComponent): | |
if 'LLMInterface' in globals() and LLMInterface != MockComponent: | |
llm_interface = LLMInterface(default_llm="openai") | |
logger.info("LLM ์ธํฐํ์ด์ค ์ด๊ธฐํ ์๋ฃ.") | |
else: | |
logger.warning("LLMInterface ํด๋์ค ์์. Mock ์ฌ์ฉ.") | |
llm_interface = MockComponent() # Mock ๊ฐ์ฒด ๋ณด์ฅ | |
if stt_client is None or isinstance(stt_client, MockComponent): | |
if 'VitoSTT' in globals() and VitoSTT != MockComponent: | |
stt_client = VitoSTT() | |
logger.info("STT ํด๋ผ์ด์ธํธ ์ด๊ธฐํ ์๋ฃ.") | |
else: | |
logger.warning("VitoSTT ํด๋์ค ์์. Mock ์ฌ์ฉ.") | |
stt_client = MockComponent() # Mock ๊ฐ์ฒด ๋ณด์ฅ | |
# 2. ๊ฒ์๊ธฐ ์ด๊ธฐํ | |
if 'VectorRetriever' in globals() and VectorRetriever != MockComponent: | |
logger.info("์ค์ ๊ฒ์๊ธฐ ์ด๊ธฐํ ์๋...") | |
retriever = init_retriever() | |
if hasattr(retriever, 'base_retriever') and base_retriever is None: | |
base_retriever = retriever.base_retriever | |
elif base_retriever is None: | |
logger.warning("init_retriever ํ base_retriever๊ฐ ์ค์ ๋์ง ์์. ํ์ธ ํ์.") | |
if isinstance(retriever, VectorRetriever): | |
base_retriever = retriever | |
if retriever is not None and base_retriever is not None: | |
logger.info("๊ฒ์๊ธฐ (Retriever, Base Retriever) ์ด๊ธฐํ ์ฑ๊ณต") | |
temp_app_ready = True | |
else: | |
logger.error("๊ฒ์๊ธฐ ์ด๊ธฐํ ํ์๋ retriever ๋๋ base_retriever๊ฐ None์ ๋๋ค.") | |
if base_retriever is None: base_retriever = MockComponent() | |
if retriever is None: retriever = MockComponent() | |
if not hasattr(retriever, 'search'): retriever.search = lambda query, **kwargs: [] | |
if not hasattr(base_retriever, 'documents'): base_retriever.documents = [] | |
temp_app_ready = True | |
else: | |
logger.warning("VectorRetriever ํด๋์ค ์์. Mock ๊ฒ์๊ธฐ ์ฌ์ฉ.") | |
base_retriever = MockComponent() | |
retriever = MockComponent() | |
if not hasattr(retriever, 'search'): retriever.search = lambda query, **kwargs: [] | |
if not hasattr(base_retriever, 'documents'): base_retriever.documents = [] | |
temp_app_ready = True | |
logger.info(f"๋ฐฑ๊ทธ๋ผ์ด๋ ์ด๊ธฐํ ์๋ฃ. ์ต์ข ์ํ: {'Ready' if temp_app_ready else 'Not Ready (Error during init)'}") | |
except Exception as e: | |
logger.error(f"์ฑ ๋ฐฑ๊ทธ๋ผ์ด๋ ์ด๊ธฐํ ์ค ์ฌ๊ฐํ ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
if base_retriever is None: base_retriever = MockComponent() | |
if retriever is None: retriever = MockComponent() | |
if not hasattr(retriever, 'search'): retriever.search = lambda query, **kwargs: [] | |
if not hasattr(base_retriever, 'documents'): base_retriever.documents = [] | |
temp_app_ready = True | |
logger.warning("์ด๊ธฐํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ง๋ง Mock ๊ฐ์ฒด๋ก ๋์ฒด ํ ์ฑ ์ฌ์ฉ ๊ฐ๋ฅ ์ํ๋ก ์ค์ .") | |
finally: | |
# ์ต์ข ์ ์ผ๋ก app_ready ์ํ ์ ๋ฐ์ดํธ | |
app_ready = temp_app_ready | |
# ์ฅ์น ๋ผ์ฐํธ ๋ฑ๋ก ํธ์ถ์ ์ฌ๊ธฐ์ ์ ๊ฑฐ๋จ (๋ฉ์ธ ๋ ๋ฒจ์์ ์ฒ๋ฆฌ) | |
# ๋ฐฑ๊ทธ๋ผ์ด๋ ์ค๋ ๋ ์์ ๋ถ๋ถ์ ๊ทธ๋๋ก ์ ์ง | |
init_thread = threading.Thread(target=background_init) | |
init_thread.daemon = True | |
init_thread.start() | |
# --- Flask ๋ผ์ฐํธ ์ ์ --- | |
def login(): | |
error = None | |
next_url = request.args.get('next') # ๋ฆฌ๋๋ ์ ํ URL ๊ฐ์ ธ์ค๊ธฐ | |
logger.info(f"-------------- ๋ก๊ทธ์ธ ํ์ด์ง ์ ์ (Next: {next_url}) --------------") | |
logger.info(f"Method: {request.method}") | |
if request.method == 'POST': | |
logger.info("๋ก๊ทธ์ธ ์๋ ๋ฐ์") | |
username = request.form.get('username', '') | |
password = request.form.get('password', '') | |
logger.info(f"์ ๋ ฅ๋ ์ฌ์ฉ์๋ช : {username}") | |
logger.info(f"๋น๋ฐ๋ฒํธ ์ ๋ ฅ ์ฌ๋ถ: {len(password) > 0}") | |
valid_username = ADMIN_USERNAME | |
valid_password = ADMIN_PASSWORD | |
logger.info(f"๊ฒ์ฆ์ฉ ์ฌ์ฉ์๋ช : {valid_username}") | |
logger.info(f"๊ฒ์ฆ์ฉ ๋น๋ฐ๋ฒํธ ์กด์ฌ ์ฌ๋ถ: {valid_password is not None and len(valid_password) > 0}") | |
if username == valid_username and password == valid_password: | |
logger.info(f"๋ก๊ทธ์ธ ์ฑ๊ณต: {username}") | |
logger.debug(f"์ธ์ ์ค์ ์ : {session}") | |
session.permanent = True | |
session['logged_in'] = True | |
session['username'] = username | |
session.modified = True | |
logger.info(f"์ธ์ ์ค์ ํ: {session}") | |
logger.info("์ธ์ ์ค์ ์๋ฃ, ๋ฆฌ๋๋ ์ ์๋") | |
redirect_to = next_url or url_for('index') | |
logger.info(f"๋ฆฌ๋๋ ์ ๋์: {redirect_to}") | |
response = redirect(redirect_to) | |
return response | |
else: | |
logger.warning("๋ก๊ทธ์ธ ์คํจ: ์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ ๋ถ์ผ์น") | |
if username != valid_username: logger.warning("์ฌ์ฉ์๋ช ๋ถ์ผ์น") | |
if password != valid_password: logger.warning("๋น๋ฐ๋ฒํธ ๋ถ์ผ์น") | |
error = '์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ๊ฐ ์ฌ๋ฐ๋ฅด์ง ์์ต๋๋ค.' | |
else: | |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง GET ์์ฒญ") | |
if 'logged_in' in session: | |
logger.info("์ด๋ฏธ ๋ก๊ทธ์ธ๋ ์ฌ์ฉ์, ๋ฉ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์ ") | |
return redirect(url_for('index')) | |
logger.info("---------- ๋ก๊ทธ์ธ ํ์ด์ง ๋ ๋๋ง ----------") | |
return render_template('login.html', error=error, next=next_url) | |
def logout(): | |
logger.info("-------------- ๋ก๊ทธ์์ ์์ฒญ --------------") | |
logger.info(f"๋ก๊ทธ์์ ์ ์ธ์ ์ํ: {session}") | |
if 'logged_in' in session: | |
username = session.get('username', 'unknown') | |
logger.info(f"์ฌ์ฉ์ {username} ๋ก๊ทธ์์ ์ฒ๋ฆฌ ์์") | |
session.pop('logged_in', None) | |
session.pop('username', None) | |
session.modified = True | |
logger.info(f"์ธ์ ์ ๋ณด ์ญ์ ์๋ฃ. ํ์ฌ ์ธ์ : {session}") | |
else: | |
logger.warning("๋ก๊ทธ์ธ๋์ง ์์ ์ํ์์ ๋ก๊ทธ์์ ์๋") | |
logger.info("๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์ ") | |
response = redirect(url_for('login')) | |
return response | |
def index(): | |
"""๋ฉ์ธ ํ์ด์ง""" | |
global app_ready | |
current_time = datetime.datetime.now() | |
try: | |
start_time = datetime.datetime.fromtimestamp(os.path.getmtime(__file__)) | |
time_diff = (current_time - start_time).total_seconds() | |
if not app_ready and time_diff > 30: | |
logger.warning(f"์ฑ์ด 30์ด ์ด์ ์ด๊ธฐํ ์ค ์ํ์ ๋๋ค. ๊ฐ์ ๋ก ready ์ํ๋ก ๋ณ๊ฒฝํฉ๋๋ค.") | |
app_ready = True | |
except FileNotFoundError: | |
logger.warning("__file__ ๊ฒฝ๋ก๋ฅผ ์ฐพ์ ์ ์์ด ์๊ฐ ๋น๊ต ๋ก์ง์ ๊ฑด๋<0xEB>๋๋๋ค.") | |
if not app_ready: # ๊ธฐ๋ณธ ํ์์์ ๋์ ๊ฐ๋จํ ๋ก์ง ์ถ๊ฐ ๊ฐ๋ฅ | |
logger.warning("์ฑ ์ค๋น ์ํ ํ์ธ (์๊ฐ ๋น๊ต ๋ถ๊ฐ)") | |
# ํ์์ ๋ค๋ฅธ ์ค๋น ์ํ ํ์ธ ๋ก์ง ์ถ๊ฐ | |
pass # ์์๋ก ํต๊ณผ | |
if not app_ready: | |
logger.info("์ฑ์ด ์์ง ์ค๋น๋์ง ์์ ๋ก๋ฉ ํ์ด์ง ํ์") | |
return render_template('loading.html'), 503 | |
logger.info("๋ฉ์ธ ํ์ด์ง ์์ฒญ") | |
return render_template('index.html') | |
def app_status(): | |
"""์ฑ ์ด๊ธฐํ ์ํ ํ์ธ API""" | |
logger.info(f"์ฑ ์ํ ํ์ธ ์์ฒญ: {'Ready' if app_ready else 'Not Ready'}") | |
return jsonify({"ready": app_ready}) | |
def llm_api(): | |
"""์ฌ์ฉ ๊ฐ๋ฅํ LLM ๋ชฉ๋ก ๋ฐ ์ ํ API""" | |
global llm_interface | |
if not app_ready: | |
return jsonify({"error": "์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์ ๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์."}), 503 | |
if request.method == 'GET': | |
logger.info("LLM ๋ชฉ๋ก ์์ฒญ") | |
try: | |
current_details = llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {"id": "unknown", "name": "Unknown"} | |
supported_llms_dict = llm_interface.SUPPORTED_LLMS if hasattr(llm_interface, 'SUPPORTED_LLMS') else {} | |
supported_list = [{ | |
"name": name, "id": id, "current": id == current_details.get("id") | |
} for name, id in supported_llms_dict.items()] | |
return jsonify({ | |
"supported_llms": supported_list, | |
"current_llm": current_details | |
}) | |
except Exception as e: | |
logger.error(f"LLM ์ ๋ณด ์กฐํ ์ค๋ฅ: {e}") | |
return jsonify({"error": "LLM ์ ๋ณด ์กฐํ ์ค ์ค๋ฅ ๋ฐ์"}), 500 | |
elif request.method == 'POST': | |
data = request.get_json() | |
if not data or 'llm_id' not in data: | |
return jsonify({"error": "LLM ID๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400 | |
llm_id = data['llm_id'] | |
logger.info(f"LLM ๋ณ๊ฒฝ ์์ฒญ: {llm_id}") | |
try: | |
if not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients'): | |
raise NotImplementedError("LLM ์ธํฐํ์ด์ค์ ํ์ํ ๋ฉ์๋/์์ฑ ์์") | |
if llm_id not in llm_interface.llm_clients: | |
return jsonify({"error": f"์ง์๋์ง ์๋ LLM ID: {llm_id}"}), 400 | |
success = llm_interface.set_llm(llm_id) | |
if success: | |
new_details = llm_interface.get_current_llm_details() | |
logger.info(f"LLM์ด '{new_details.get('name', llm_id)}'๋ก ๋ณ๊ฒฝ๋์์ต๋๋ค.") | |
return jsonify({ | |
"success": True, | |
"message": f"LLM์ด '{new_details.get('name', llm_id)}'๋ก ๋ณ๊ฒฝ๋์์ต๋๋ค.", | |
"current_llm": new_details | |
}) | |
else: | |
logger.error(f"LLM ๋ณ๊ฒฝ ์คํจ (ID: {llm_id})") | |
return jsonify({"error": "LLM ๋ณ๊ฒฝ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์"}), 500 | |
except Exception as e: | |
logger.error(f"LLM ๋ณ๊ฒฝ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}", exc_info=True) | |
return jsonify({"error": f"LLM ๋ณ๊ฒฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"}), 500 | |
def chat(): | |
"""ํ ์คํธ ๊ธฐ๋ฐ ์ฑ๋ด API""" | |
global retriever | |
if not app_ready or retriever is None: | |
return jsonify({"error": "์ฑ/๊ฒ์๊ธฐ๊ฐ ์์ง ์ด๊ธฐํ ์ค์ ๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์."}), 503 | |
try: | |
data = request.get_json() | |
if not data or 'query' not in data: | |
return jsonify({"error": "์ฟผ๋ฆฌ๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400 | |
query = data['query'] | |
logger.info(f"ํ ์คํธ ์ฟผ๋ฆฌ ์์ : {query[:100]}...") | |
if not hasattr(retriever, 'search'): | |
raise NotImplementedError("Retriever์ search ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
search_results = retriever.search(query, top_k=5, first_stage_k=6) | |
if not hasattr(DocumentProcessor, 'prepare_rag_context'): | |
raise NotImplementedError("DocumentProcessor์ prepare_rag_context ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
context = DocumentProcessor.prepare_rag_context(search_results, field="text") | |
if not context: | |
logger.warning("๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ด ์ปจํ ์คํธ๋ฅผ ์์ฑํ์ง ๋ชปํจ.") | |
pass | |
llm_id = data.get('llm_id', None) | |
if not hasattr(llm_interface, 'rag_generate'): | |
raise NotImplementedError("LLMInterface์ rag_generate ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
if not context: | |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค." | |
logger.info("์ปจํ ์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ") | |
else: | |
answer = llm_interface.rag_generate(query, context, llm_id=llm_id) | |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})") | |
sources = [] | |
if search_results: | |
for result in search_results: | |
if not isinstance(result, dict): | |
logger.warning(f"์์์น ๋ชปํ ๊ฒ์ ๊ฒฐ๊ณผ ํ์: {type(result)}") | |
continue | |
if "source" in result: | |
source_info = { | |
"source": result.get("source", "Unknown"), | |
"score": result.get("rerank_score", result.get("score", 0)) | |
} | |
if "text" in result and result.get("filetype") == "csv": | |
try: | |
text_lines = result["text"].strip().split('\n') | |
if text_lines: | |
first_line = text_lines[0].strip() | |
if ',' in first_line: | |
first_column = first_line.split(',')[0].strip() | |
source_info["id"] = first_column | |
logger.debug(f"CSV ์์ค ID ์ถ์ถ: {first_column} from {source_info['source']}") | |
except Exception as e: | |
logger.warning(f"CSV ์์ค ID ์ถ์ถ ์คํจ ({result.get('source')}): {e}") | |
sources.append(source_info) | |
response_data = { | |
"answer": answer, | |
"sources": sources, | |
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {} | |
} | |
return jsonify(response_data) | |
except Exception as e: | |
logger.error(f"์ฑํ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
return jsonify({"error": f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}), 500 | |
def voice_chat(): | |
"""์์ฑ ์ฑ API ์๋ํฌ์ธํธ""" | |
global retriever, stt_client | |
if not app_ready: | |
logger.warning("์ฑ ์ด๊ธฐํ๊ฐ ์๋ฃ๋์ง ์์์ง๋ง ์์ฑ API ์์ฒญ ์ฒ๋ฆฌ ์๋") | |
if retriever is None: | |
logger.error("retriever๊ฐ ์์ง ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") | |
return jsonify({ | |
"transcription": "(์์ฑ์ ํ ์คํธ๋ก ๋ณํํ์ง๋ง ๊ฒ์ ์์ง์ด ์์ง ์ค๋น๋์ง ์์์ต๋๋ค)", | |
"answer": "์ฃ์กํฉ๋๋ค. ๊ฒ์ ์์ง์ด ์์ง ์ด๊ธฐํ ์ค์ ๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.", | |
"sources": [] | |
}) | |
if stt_client is None: | |
return jsonify({ | |
"transcription": "(์์ฑ ์ธ์ ๊ธฐ๋ฅ์ด ์ค๋น ์ค์ ๋๋ค)", | |
"answer": "์ฃ์กํฉ๋๋ค. ํ์ฌ ์์ฑ ์ธ์ ์๋น์ค๊ฐ ์ด๊ธฐํ ์ค์ ๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.", | |
"sources": [] | |
}) | |
logger.info("์์ฑ ์ฑ ์์ฒญ ์์ ") | |
if 'audio' not in request.files: | |
logger.error("์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์") | |
return jsonify({"error": "์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400 | |
audio_file = request.files['audio'] | |
logger.info(f"์์ ๋ ์ค๋์ค ํ์ผ: {audio_file.filename} ({audio_file.content_type})") | |
try: | |
with tempfile.NamedTemporaryFile(delete=True) as temp_audio: | |
audio_file.save(temp_audio.name) | |
logger.info(f"์ค๋์ค ํ์ผ์ ์์ ์ ์ฅ: {temp_audio.name}") | |
if not hasattr(stt_client, 'transcribe_audio'): | |
raise NotImplementedError("STT ํด๋ผ์ด์ธํธ์ transcribe_audio ๋ฉ์๋๊ฐ ์์ต๋๋ค.") | |
with open(temp_audio.name, 'rb') as f_bytes: | |
audio_bytes = f_bytes.read() | |
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko") | |
if not isinstance(stt_result, dict) or not stt_result.get("success"): | |
error_msg = stt_result.get("error", "์ ์ ์๋ STT ์ค๋ฅ") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ์ ์ค๋ฅ" | |
logger.error(f"์์ฑ์ธ์ ์คํจ: {error_msg}") | |
return jsonify({ | |
"error": "์์ฑ์ธ์ ์คํจ", | |
"details": error_msg | |
}), 500 | |
transcription = stt_result.get("text", "") | |
if not transcription: | |
logger.warning("์์ฑ์ธ์ ๊ฒฐ๊ณผ๊ฐ ๋น์ด์์ต๋๋ค.") | |
return jsonify({"error": "์์ฑ์์ ํ ์คํธ๋ฅผ ์ธ์ํ์ง ๋ชปํ์ต๋๋ค.", "transcription": ""}), 400 | |
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...") | |
if retriever is None: | |
logger.error("STT ์ฑ๊ณต ํ ๊ฒ์ ์๋ ์ค retriever๊ฐ None์") | |
return jsonify({ | |
"transcription": transcription, | |
"answer": "์์ฑ์ ์ธ์ํ์ง๋ง, ํ์ฌ ๊ฒ์ ์์คํ ์ด ์ค๋น๋์ง ์์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.", | |
"sources": [] | |
}) | |
search_results = retriever.search(transcription, top_k=5, first_stage_k=6) | |
context = DocumentProcessor.prepare_rag_context(search_results, field="text") | |
if not context: | |
logger.warning("์์ฑ ์ฟผ๋ฆฌ์ ๋ํ ๊ฒ์ ๊ฒฐ๊ณผ ์์.") | |
pass | |
llm_id = request.form.get('llm_id', None) | |
if not context: | |
answer = "์ฃ์กํฉ๋๋ค. ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค." | |
logger.info("์ปจํ ์คํธ ์์ด ๊ธฐ๋ณธ ์๋ต ์์ฑ") | |
else: | |
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id) | |
logger.info(f"LLM ์๋ต ์์ฑ ์๋ฃ (๊ธธ์ด: {len(answer)})") | |
enhanced_sources = [] | |
if search_results: | |
for doc in search_results: | |
if not isinstance(doc, dict): continue | |
if "source" in doc: | |
source_info = { | |
"source": doc.get("source", "Unknown"), | |
"score": doc.get("rerank_score", doc.get("score", 0)) | |
} | |
if "text" in doc and doc.get("filetype") == "csv": | |
try: | |
text_lines = doc["text"].strip().split('\n') | |
if text_lines: | |
first_line = text_lines[0].strip() | |
if ',' in first_line: | |
first_column = first_line.split(',')[0].strip() | |
source_info["id"] = first_column | |
except Exception as e: | |
logger.warning(f"[์์ฑ์ฑ] CSV ์์ค ID ์ถ์ถ ์คํจ ({doc.get('source')}): {e}") | |
enhanced_sources.append(source_info) | |
response_data = { | |
"transcription": transcription, | |
"answer": answer, | |
"sources": enhanced_sources, | |
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {} | |
} | |
return jsonify(response_data) | |
except Exception as e: | |
logger.error(f"์์ฑ ์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
return jsonify({ | |
"error": "์์ฑ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์", | |
"details": str(e) | |
}), 500 | |
def upload_document(): | |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ์ ๋ก๋ API""" | |
global base_retriever, retriever | |
if not app_ready or base_retriever is None: | |
return jsonify({"error": "์ฑ/๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ์์ง ์ด๊ธฐํ ์ค์ ๋๋ค."}), 503 | |
if 'document' not in request.files: | |
return jsonify({"error": "๋ฌธ์ ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400 | |
doc_file = request.files['document'] | |
if doc_file.filename == '': | |
return jsonify({"error": "์ ํ๋ ํ์ผ์ด ์์ต๋๋ค."}), 400 | |
if not allowed_doc_file(doc_file.filename): | |
logger.error(f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์: {doc_file.filename}") | |
return jsonify({"error": f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์์ ๋๋ค. ํ์ฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400 | |
try: | |
filename = secure_filename(doc_file.filename) | |
filepath = os.path.join(app.config['DATA_FOLDER'], filename) | |
doc_file.save(filepath) | |
logger.info(f"๋ฌธ์ ์ ์ฅ ์๋ฃ: {filepath}") | |
try: | |
with open(filepath, 'r', encoding='utf-8') as f: | |
content = f.read() | |
except UnicodeDecodeError: | |
logger.info(f"UTF-8 ๋์ฝ๋ฉ ์คํจ, CP949๋ก ์๋: {filename}") | |
try: | |
with open(filepath, 'r', encoding='cp949') as f: | |
content = f.read() | |
except Exception as e_cp949: | |
logger.error(f"CP949 ๋์ฝ๋ฉ ์คํจ ({filename}): {e_cp949}") | |
return jsonify({"error": "ํ์ผ ์ธ์ฝ๋ฉ์ ์ฝ์ ์ ์์ต๋๋ค (UTF-8, CP949 ์๋ ์คํจ)."}), 400 | |
except Exception as e_read: | |
logger.error(f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ ({filename}): {e_read}") | |
return jsonify({"error": f"ํ์ผ ์ฝ๊ธฐ ์ค ์ค๋ฅ ๋ฐ์: {str(e_read)}"}), 500 | |
metadata = { | |
"source": filename, "filename": filename, | |
"filetype": filename.rsplit('.', 1)[1].lower(), | |
"filepath": filepath | |
} | |
file_ext = metadata["filetype"] | |
docs = [] | |
if not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'): | |
raise NotImplementedError("DocumentProcessor์ ํ์ํ ๋ฉ์๋ ์์") | |
if file_ext == 'csv': | |
logger.info(f"CSV ํ์ผ ์ฒ๋ฆฌ ์์: {filename}") | |
docs = DocumentProcessor.csv_to_documents(content, metadata) | |
else: | |
logger.info(f"์ผ๋ฐ ํ ์คํธ ๋ฌธ์ ์ฒ๋ฆฌ ์์: {filename}") | |
if file_ext in ['pdf', 'docx']: | |
logger.warning(f".{file_ext} ํ์ผ ์ฒ๋ฆฌ๋ ํ์ฌ ๊ตฌํ๋์ง ์์์ต๋๋ค. ํ ์คํธ ์ถ์ถ ๋ก์ง ์ถ๊ฐ ํ์.") | |
content = "" | |
if content: | |
docs = DocumentProcessor.text_to_documents( | |
content, metadata=metadata, | |
chunk_size=512, chunk_overlap=50 | |
) | |
if docs: | |
if not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'): | |
raise NotImplementedError("๊ธฐ๋ณธ ๊ฒ์๊ธฐ์ add_documents ๋๋ save ๋ฉ์๋ ์์") | |
logger.info(f"{len(docs)}๊ฐ ๋ฌธ์ ์ฒญํฌ๋ฅผ ๊ฒ์๊ธฐ์ ์ถ๊ฐํฉ๋๋ค...") | |
base_retriever.add_documents(docs) | |
logger.info(f"๊ฒ์๊ธฐ ์ํ๋ฅผ ์ ์ฅํฉ๋๋ค...") | |
index_path = app.config['INDEX_PATH'] | |
try: | |
base_retriever.save(index_path) | |
logger.info("์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ") | |
# ์ฌ์์ํ ๊ฒ์๊ธฐ ์ ๋ฐ์ดํธ ๋ก์ง ํ์ ์ ์ถ๊ฐ | |
# ์: if retriever != base_retriever and hasattr(retriever, 'update_base_retriever'): retriever.update_base_retriever(base_retriever) | |
return jsonify({ | |
"success": True, | |
"message": f"ํ์ผ '{filename}' ์ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ ์๋ฃ ({len(docs)}๊ฐ ์ฒญํฌ ์ถ๊ฐ)." | |
}) | |
except Exception as e_save: | |
logger.error(f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {e_save}") | |
return jsonify({"error": f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ: {str(e_save)}"}), 500 | |
else: | |
logger.warning(f"ํ์ผ '{filename}'์์ ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์๊ฑฐ๋ ์ง์๋์ง ์๋ ํ์์ ๋๋ค.") | |
return jsonify({ | |
"warning": True, | |
"message": f"ํ์ผ '{filename}'์ด ์ ์ฅ๋์์ง๋ง ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์์ต๋๋ค." | |
}) | |
except Exception as e: | |
logger.error(f"ํ์ผ ์ ๋ก๋ ๋๋ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
return jsonify({"error": f"ํ์ผ ์ ๋ก๋ ์ค ์ค๋ฅ: {str(e)}"}), 500 | |
def list_documents(): | |
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ๋ชฉ๋ก API""" | |
global base_retriever | |
if not app_ready or base_retriever is None: | |
return jsonify({"error": "์ฑ/๊ธฐ๋ณธ ๊ฒ์๊ธฐ๊ฐ ์์ง ์ด๊ธฐํ ์ค์ ๋๋ค."}), 503 | |
try: | |
sources = {} | |
total_chunks = 0 | |
if hasattr(base_retriever, 'documents') and base_retriever.documents: | |
logger.info(f"์ด {len(base_retriever.documents)}๊ฐ ๋ฌธ์ ์ฒญํฌ์์ ์์ค ๋ชฉ๋ก ์์ฑ ์ค...") | |
for doc in base_retriever.documents: | |
if not isinstance(doc, dict): continue | |
source = doc.get("source", "unknown") | |
if source == "unknown" and "metadata" in doc and isinstance(doc["metadata"], dict): | |
source = doc["metadata"].get("source", "unknown") | |
if source != "unknown": | |
if source in sources: | |
sources[source]["chunks"] += 1 | |
else: | |
filename = doc.get("filename", source) | |
filetype = doc.get("filetype", "unknown") | |
if "metadata" in doc and isinstance(doc["metadata"], dict): | |
filename = doc["metadata"].get("filename", filename) | |
filetype = doc["metadata"].get("filetype", filetype) | |
sources[source] = { | |
"filename": filename, | |
"chunks": 1, | |
"filetype": filetype | |
} | |
total_chunks += 1 | |
else: | |
logger.info("๊ฒ์๊ธฐ์ ๋ฌธ์๊ฐ ์๊ฑฐ๋ documents ์์ฑ์ ์ฐพ์ ์ ์์ต๋๋ค.") | |
documents = [{"source": src, **info} for src, info in sources.items()] | |
documents.sort(key=lambda x: x["chunks"], reverse=True) | |
logger.info(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์๋ฃ: {len(documents)}๊ฐ ์์ค ํ์ผ, {total_chunks}๊ฐ ์ฒญํฌ") | |
return jsonify({ | |
"documents": documents, | |
"total_documents": len(documents), | |
"total_chunks": total_chunks | |
}) | |
except Exception as e: | |
logger.error(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) | |
return jsonify({"error": f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ: {str(e)}"}), 500 | |
# ์ ์ ํ์ผ ์๋น | |
def send_static(path): | |
return send_from_directory('static', path) | |
# --- ์์ฒญ ์ฒ๋ฆฌ ํ --- | |
def after_request_func(response): | |
"""๋ชจ๋ ์๋ต์ ๋ํด ํ์ฒ๋ฆฌ ์ํ""" | |
# logger.debug(f"[After Request] ์๋ต ํค๋: {response.headers}") # ๋๋ฒ๊น ์ Set-Cookie ํ์ธ | |
return response | |
# ์ฑ ์คํ (๋ก์ปฌ ํ ์คํธ์ฉ) | |
if __name__ == '__main__': | |
logger.info("Flask ์ฑ์ ์ง์ ์คํํฉ๋๋ค (๊ฐ๋ฐ์ฉ ์๋ฒ).") | |
# ๋๋ฒ๊ทธ ๋ชจ๋๋ ๊ฐ๋ฐ ์ค์๋ง True๋ก ์ค์ ํ๊ณ , ์ค์ ๋ฐฐํฌ ์์๋ False๋ก ์ค์ ํด์ผ ํฉ๋๋ค. | |
# host='0.0.0.0' ์ ๋ชจ๋ ๋คํธ์ํฌ ์ธํฐํ์ด์ค์์ ์ ์ ๊ฐ๋ฅํ๊ฒ ํฉ๋๋ค. | |
port = int(os.environ.get("PORT", 7860)) | |
logger.info(f"์๋ฒ๋ฅผ http://0.0.0.0:{port} ์์ ์์ํฉ๋๋ค.") | |
# debug=True ์ฌ์ฉ ์ werkzeug reloader๊ฐ ํ์ฑํ๋์ด ์ฝ๋๊ฐ ๋ณ๊ฒฝ๋ ๋ ์๋ฒ๊ฐ ์ฌ์์๋ ์ ์์ผ๋ฉฐ, | |
# ์ด ๊ณผ์ ์์ ์ ์ญ ์ด๊ธฐํ ์ฝ๋๊ฐ ๋ค์ ์คํ๋ ์ ์์ต๋๋ค. | |
# DEVICE_ROUTES_REGISTERED ํ๋๊ทธ๊ฐ ์ด๋ฅผ ๋ฐฉ์งํฉ๋๋ค. | |
app.run(debug=True, host='0.0.0.0', port=port) |