jeongsoo's picture
Fix: ์ค‘๋ณต ๋ผ์šฐํŠธ ๋“ฑ๋ก ์˜ค๋ฅ˜ ํ•ด๊ฒฐ ๋ฐ ๊ด€๋ จ ์ฝ”๋“œ ์ˆ˜์ •
0adf356
"""
RAG ๊ฒ€์ƒ‰ ์ฑ—๋ด‡ ์›น ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ (์„ธ์…˜ ์„ค์ • ์ˆ˜์ • ์ ์šฉ ๋ฐ ์ค‘๋ณต ๋ผ์šฐํŠธ ๋“ฑ๋ก ๋ฐฉ์ง€)
"""
import os
import json
import logging
import tempfile
import threading
import datetime
from flask import Flask, request, jsonify, render_template, send_from_directory, session, redirect, url_for
from flask_cors import CORS
from werkzeug.utils import secure_filename
from dotenv import load_dotenv
from functools import wraps
# ๋กœ๊ฑฐ ์„ค์ •
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG # INFO์—์„œ DEBUG๋กœ ๋ณ€๊ฒฝํ•˜์—ฌ ๋” ์ƒ์„ธํ•œ ๋กœ๊ทธ ํ™•์ธ
)
logger = logging.getLogger(__name__)
# ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
load_dotenv()
# ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ ์ƒํƒœ ํ™•์ธ ๋ฐ ๋กœ๊น…
ADMIN_USERNAME = os.getenv('ADMIN_USERNAME')
ADMIN_PASSWORD = os.getenv('ADMIN_PASSWORD')
# ์žฅ์น˜ ์„œ๋ฒ„ URL ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์ถ”๊ฐ€
DEVICE_SERVER_URL = os.getenv('DEVICE_SERVER_URL', 'http://localhost:5050')
logger.info(f"์žฅ์น˜ ์„œ๋ฒ„ URL: {DEVICE_SERVER_URL}")
logger.info(f"==== ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ ์ƒํƒœ ====")
logger.info(f"ADMIN_USERNAME ์„ค์ • ์—ฌ๋ถ€: {ADMIN_USERNAME is not None}")
# ๋น„๋ฐ€๋ฒˆํ˜ธ๋Š” ๋กœ๋“œ ์—ฌ๋ถ€๋งŒ ๊ธฐ๋ก (๋ณด์•ˆ)
logger.info(f"ADMIN_PASSWORD ์„ค์ • ์—ฌ๋ถ€: {ADMIN_PASSWORD is not None}")
# ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์—†์œผ๋ฉด ๊ธฐ๋ณธ๊ฐ’ ์„ค์ • (๊ฐœ๋ฐœ์šฉ, ๋ฐฐํฌ ์‹œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ • ๊ถŒ์žฅ)
if not ADMIN_USERNAME:
ADMIN_USERNAME = 'admin'
logger.warning("ADMIN_USERNAME ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์—†์–ด ๊ธฐ๋ณธ๊ฐ’ 'admin'์œผ๋กœ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.")
if not ADMIN_PASSWORD:
ADMIN_PASSWORD = 'rag12345'
logger.warning("ADMIN_PASSWORD ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์—†์–ด ๊ธฐ๋ณธ๊ฐ’ 'rag12345'๋กœ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.")
class MockComponent: pass
# --- ๋กœ์ปฌ ๋ชจ๋“ˆ ์ž„ํฌํŠธ ---
# ์‹ค์ œ ๊ฒฝ๋กœ์— ๋งž๊ฒŒ utils, retrieval ํด๋”๊ฐ€ ์กด์žฌํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
try:
from utils.vito_stt import VitoSTT
from utils.llm_interface import LLMInterface
from utils.document_processor import DocumentProcessor
from retrieval.vector_retriever import VectorRetriever
from retrieval.reranker import ReRanker
# ์žฅ์น˜ ๋ผ์šฐํŠธ ๋“ฑ๋ก ํ•จ์ˆ˜ ์ž„ํฌํŠธ
from app.app_device_routes import register_device_routes
except ImportError as e:
logger.error(f"๋กœ์ปฌ ๋ชจ๋“ˆ ์ž„ํฌํŠธ ์‹คํŒจ: {e}. utils, retrieval, app ํŒจํ‚ค์ง€๊ฐ€ ์˜ฌ๋ฐ”๋ฅธ ๊ฒฝ๋กœ์— ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.")
# ๊ฐœ๋ฐœ/ํ…Œ์ŠคํŠธ๋ฅผ ์œ„ํ•ด ์ž„์‹œ ํด๋ž˜์Šค/ํ•จ์ˆ˜ ์ •์˜ (์‹ค์ œ ์‚ฌ์šฉ ์‹œ ์ œ๊ฑฐ)
VitoSTT = LLMInterface = DocumentProcessor = VectorRetriever = ReRanker = MockComponent
def register_device_routes(*args, **kwargs):
logger.warning("Mock register_device_routes ํ•จ์ˆ˜ ํ˜ธ์ถœ๋จ.")
pass
# --- ๋กœ์ปฌ ๋ชจ๋“ˆ ์ž„ํฌํŠธ ๋ ---
# Flask ์•ฑ ์ดˆ๊ธฐํ™”
app = Flask(__name__)
# CORS ์„ค์ • - ๋ชจ๋“  ๋„๋ฉ”์ธ์—์„œ์˜ ์š”์ฒญ ํ—ˆ์šฉ
CORS(app, supports_credentials=True)
# ์„ธ์…˜ ์„ค์ • - ๊ณ ์ •๋œ ์‹œํฌ๋ฆฟ ํ‚ค ์‚ฌ์šฉ (์‹ค์ œ ๋ฐฐํฌ ์‹œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋“ฑ์œผ๋กœ ๊ด€๋ฆฌ ๊ถŒ์žฅ)
app.secret_key = os.getenv('FLASK_SECRET_KEY', 'rag_chatbot_fixed_secret_key_12345') # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์šฐ์„  ์‚ฌ์šฉ
# --- ์„ธ์…˜ ์ฟ ํ‚ค ์„ค์ • ์ˆ˜์ • (ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ ๊ณ ๋ ค) ---
# ํ—ˆ๊น…ํŽ˜์ด์Šค ์ŠคํŽ˜์ด์Šค๋Š” ์ผ๋ฐ˜์ ์œผ๋กœ HTTPS๋กœ ์„œ๋น„์Šค๋˜๋ฏ€๋กœ Secure=True ์„ค์ •
app.config['SESSION_COOKIE_SECURE'] = True
app.config['SESSION_COOKIE_HTTPONLY'] = True # JavaScript์—์„œ ์ฟ ํ‚ค ์ ‘๊ทผ ๋ฐฉ์ง€ (๋ณด์•ˆ ๊ฐ•ํ™”)
# SameSite='Lax'๊ฐ€ ๋Œ€๋ถ€๋ถ„์˜ ๊ฒฝ์šฐ์— ๋” ์•ˆ์ „ํ•˜๊ณ  ํ˜ธํ™˜์„ฑ์ด ์ข‹์Œ.
# ๋งŒ์•ฝ ์•ฑ์ด ๋‹ค๋ฅธ ๋„๋ฉ”์ธ์˜ iframe ๋‚ด์—์„œ ์‹คํ–‰๋˜์–ด์•ผ ํ•œ๋‹ค๋ฉด 'None'์œผ๋กœ ์„ค์ •ํ•ด์•ผ ํ•จ.
# (๋‹จ, 'None'์œผ๋กœ ์„ค์ • ์‹œ ๋ฐ˜๋“œ์‹œ Secure=True์—ฌ์•ผ ํ•จ)
# ๋กœ๊ทธ ๋ถ„์„ ๊ฒฐ๊ณผ iframe ํ™˜๊ฒฝ์œผ๋กœ ํ™•์ธ๋˜์–ด 'None'์œผ๋กœ ๋ณ€๊ฒฝ
app.config['SESSION_COOKIE_SAMESITE'] = 'None' # <--- ์ด๋ ‡๊ฒŒ ๋ณ€๊ฒฝํ•ฉ๋‹ˆ๋‹ค.
app.config['SESSION_COOKIE_DOMAIN'] = None # ํŠน์ • ๋„๋ฉ”์ธ ์ œํ•œ ์—†์Œ
app.config['SESSION_COOKIE_PATH'] = '/' # ์•ฑ ์ „์ฒด ๊ฒฝ๋กœ์— ์ฟ ํ‚ค ์ ์šฉ
app.config['PERMANENT_SESSION_LIFETIME'] = datetime.timedelta(days=1) # ์„ธ์…˜ ์œ ํšจ ์‹œ๊ฐ„ ์ฆ๊ฐ€
# --- ์„ธ์…˜ ์ฟ ํ‚ค ์„ค์ • ๋ ---
# ์ตœ๋Œ€ ํŒŒ์ผ ํฌ๊ธฐ ์„ค์ • (10MB)
app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024
# ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ํŒŒ์ผ ๊ธฐ์ค€ ์ƒ๋Œ€ ๊ฒฝ๋กœ ์„ค์ •
APP_ROOT = os.path.dirname(os.path.abspath(__file__))
app.config['UPLOAD_FOLDER'] = os.path.join(APP_ROOT, 'uploads')
app.config['DATA_FOLDER'] = os.path.join(APP_ROOT, '..', 'data')
app.config['INDEX_PATH'] = os.path.join(APP_ROOT, '..', 'data', 'index')
# ํ•„์š”ํ•œ ํด๋” ์ƒ์„ฑ
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
os.makedirs(app.config['DATA_FOLDER'], exist_ok=True)
os.makedirs(app.config['INDEX_PATH'], exist_ok=True)
# ํ—ˆ์šฉ๋˜๋Š” ์˜ค๋””์˜ค/๋ฌธ์„œ ํŒŒ์ผ ํ™•์žฅ์ž
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a'}
ALLOWED_DOC_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'csv'}
# --- ์ „์—ญ ๊ฐ์ฒด ์ดˆ๊ธฐํ™” ---
try:
llm_interface = LLMInterface(default_llm="openai")
stt_client = VitoSTT()
except NameError:
logger.warning("LLM ๋˜๋Š” STT ์ธํ„ฐํŽ˜์ด์Šค ์ดˆ๊ธฐํ™” ์‹คํŒจ. Mock ๊ฐ์ฒด๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
llm_interface = MockComponent()
stt_client = MockComponent()
base_retriever = None
retriever = None
app_ready = False # ์•ฑ ์ดˆ๊ธฐํ™” ์ƒํƒœ ํ”Œ๋ž˜๊ทธ
DEVICE_ROUTES_REGISTERED = False # ์žฅ์น˜ ๋ผ์šฐํŠธ ๋“ฑ๋ก ์ƒํƒœ ํ”Œ๋ž˜๊ทธ
# --- ์ „์—ญ ๊ฐ์ฒด ์ดˆ๊ธฐํ™” ๋ ---
# --- ์ธ์ฆ ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ (์ˆ˜์ •๋จ) ---
def login_required(f):
@wraps(f)
def decorated_function(*args, **kwargs):
logger.info(f"----------- ์ธ์ฆ ํ•„์š” ํŽ˜์ด์ง€ ์ ‘๊ทผ ์‹œ๋„: {request.path} -----------")
logger.info(f"ํ˜„์žฌ ํ”Œ๋ผ์Šคํฌ ์„ธ์…˜ ๊ฐ์ฒด: {session}")
logger.info(f"ํ˜„์žฌ ์„ธ์…˜ ์ƒํƒœ: logged_in={session.get('logged_in', False)}, username={session.get('username', 'None')}")
# ๋ธŒ๋ผ์šฐ์ €๊ฐ€ ๋ณด๋‚ธ ์‹ค์ œ ์ฟ ํ‚ค ํ™•์ธ (๋””๋ฒ„๊น…์šฉ)
logger.info(f"์š”์ฒญ์˜ ์„ธ์…˜ ์ฟ ํ‚ค ๊ฐ’: {request.cookies.get('session', 'None')}")
# API ์š”์ฒญ์ด๊ณ  ํด๋ผ์ด์–ธํŠธ์—์„œ ์˜ค๋Š” ๊ฒฝ์šฐ ์ธ์ฆ ๋ฌด์‹œ (์ž„์‹œ ์กฐ์น˜)
# ---> ์ฃผ์˜: ์ด ๋ถ€๋ถ„์€ ๋ณด์•ˆ ๊ฒ€ํ†  ํ›„ ์‹ค์ œ ํ™˜๊ฒฝ์—์„œ๋Š” ์ œ๊ฑฐํ•˜๊ฑฐ๋‚˜ ๋” ์•ˆ์ „ํ•œ ๋ฐฉ์‹์œผ๋กœ ๋ณ€๊ฒฝํ•ด์•ผ ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
if request.path.startswith('/api/device/'):
logger.info(f"์žฅ์น˜ API ์š”์ฒญ: {request.path} - ์ธ์ฆ ์ œ์™ธ (์ฃผ์˜: ์ž„์‹œ ์กฐ์น˜)")
return f(*args, **kwargs)
# Flask ์„ธ์…˜์— 'logged_in' ํ‚ค๊ฐ€ ์žˆ๋Š”์ง€ ์ง์ ‘ ํ™•์ธ
if 'logged_in' not in session:
logger.warning(f"ํ”Œ๋ผ์Šคํฌ ์„ธ์…˜์— 'logged_in' ์—†์Œ. ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜.")
# ์ˆ˜๋™ ์ฟ ํ‚ค ํ™•์ธ ๋กœ์ง ์ œ๊ฑฐ๋จ
return redirect(url_for('login', next=request.url)) # ๋กœ๊ทธ์ธ ํ›„ ์›๋ž˜ ํŽ˜์ด์ง€๋กœ ๋Œ์•„๊ฐ€๋„๋ก next ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€
logger.info(f"์ธ์ฆ ์„ฑ๊ณต: {session.get('username', 'unknown')} ์‚ฌ์šฉ์ž๊ฐ€ {request.path} ์ ‘๊ทผ")
return f(*args, **kwargs)
return decorated_function
# --- ์ธ์ฆ ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ ๋ ---
# --- ์˜ค๋ฅ˜ ํ•ธ๋“ค๋Ÿฌ ์ถ”๊ฐ€ ---
@app.errorhandler(404)
def not_found(e):
# ํด๋ผ์ด์–ธํŠธ๊ฐ€ JSON์„ ๊ธฐ๋Œ€ํ•˜๋Š” API ํ˜ธ์ถœ์ธ ๊ฒฝ์šฐ JSON ์‘๋‹ต
if request.path.startswith('/api/'):
return jsonify({"success": False, "error": "์š”์ฒญํ•œ API ์—”๋“œํฌ์ธํŠธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}), 404
# ์ผ๋ฐ˜ ์›น ํŽ˜์ด์ง€ ์š”์ฒญ์ธ ๊ฒฝ์šฐ HTML ์‘๋‹ต
return "ํŽ˜์ด์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", 404
@app.errorhandler(500)
def internal_error(e):
# ํด๋ผ์ด์–ธํŠธ๊ฐ€ JSON์„ ๊ธฐ๋Œ€ํ•˜๋Š” API ํ˜ธ์ถœ์ธ ๊ฒฝ์šฐ JSON ์‘๋‹ต
if request.path.startswith('/api/'):
return jsonify({"success": False, "error": "์„œ๋ฒ„ ๋‚ด๋ถ€ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."}), 500
# ์ผ๋ฐ˜ ์›น ํŽ˜์ด์ง€ ์š”์ฒญ์ธ ๊ฒฝ์šฐ HTML ์‘๋‹ต
return "์„œ๋ฒ„ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.", 500
# --- ์˜ค๋ฅ˜ ํ•ธ๋“ค๋Ÿฌ ๋ ---
# --- ์žฅ์น˜ ๊ด€๋ จ ๋ผ์šฐํŠธ ๋“ฑ๋ก (์ˆ˜์ •๋จ: ์ค‘๋ณต ๋ฐฉ์ง€) ---
# ์ „์—ญ ํ”Œ๋ž˜๊ทธ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ•œ ๋ฒˆ๋งŒ ๋“ฑ๋ก๋˜๋„๋ก ํ•จ
if not DEVICE_ROUTES_REGISTERED:
try:
# ์ž„ํฌํŠธ๋œ register_device_routes ํ•จ์ˆ˜ ์‚ฌ์šฉ
# ์ธ์ฆ ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ(login_required)์™€ ์„œ๋ฒ„ URL ์ „๋‹ฌ
register_device_routes(app, login_required, DEVICE_SERVER_URL)
DEVICE_ROUTES_REGISTERED = True # ๋“ฑ๋ก ์„ฑ๊ณต ์‹œ ํ”Œ๋ž˜๊ทธ ์„ค์ •
logger.info("์žฅ์น˜ ๊ด€๋ จ ๋ผ์šฐํŠธ ๋“ฑ๋ก ์™„๋ฃŒ")
except NameError:
logger.error("register_device_routes ํ•จ์ˆ˜๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. app.app_device_routes ๋ชจ๋“ˆ ํ™•์ธ ํ•„์š”.")
except Exception as e:
logger.error(f"์žฅ์น˜ ๊ด€๋ จ ๋ผ์šฐํŠธ ๋“ฑ๋ก ์‹คํŒจ: {e}", exc_info=True)
else:
logger.info("์žฅ์น˜ ๊ด€๋ จ ๋ผ์šฐํŠธ๊ฐ€ ์ด๋ฏธ ๋“ฑ๋ก๋˜์–ด ์žˆ์–ด ๊ฑด๋„ˆ<0xEB>๋œ๋‹ˆ๋‹ค.")
# --- ์žฅ์น˜ ๊ด€๋ จ ๋ผ์šฐํŠธ ๋“ฑ๋ก ๋ ---
# --- ํ—ฌํผ ํ•จ์ˆ˜ ---
def allowed_audio_file(filename):
"""ํŒŒ์ผ์ด ํ—ˆ์šฉ๋œ ์˜ค๋””์˜ค ํ™•์žฅ์ž๋ฅผ ๊ฐ€์ง€๋Š”์ง€ ํ™•์ธ"""
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_AUDIO_EXTENSIONS
def allowed_doc_file(filename):
"""ํŒŒ์ผ์ด ํ—ˆ์šฉ๋œ ๋ฌธ์„œ ํ™•์žฅ์ž๋ฅผ ๊ฐ€์ง€๋Š”์ง€ ํ™•์ธ"""
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_DOC_EXTENSIONS
# --- ํ—ฌํผ ํ•จ์ˆ˜ ๋ ---
# init_retriever ํ•จ์ˆ˜ ๋‚ด๋ถ€์— ๋กœ๊น… ์ถ”๊ฐ€ ์˜ˆ์‹œ
# --- ๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™” ๊ด€๋ จ ํ•จ์ˆ˜ ---
def init_retriever():
"""๊ฒ€์ƒ‰๊ธฐ ๊ฐ์ฒด ์ดˆ๊ธฐํ™” ๋˜๋Š” ๋กœ๋“œ"""
global base_retriever, retriever
index_path = app.config['INDEX_PATH']
data_path = app.config['DATA_FOLDER'] # data_path ์ •์˜ ํ™•์ธ
logger.info("--- init_retriever ์‹œ์ž‘ ---")
# 1. ๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ ๋กœ๋“œ ๋˜๋Š” ์ดˆ๊ธฐํ™”
if os.path.exists(os.path.join(index_path, "documents.json")):
try:
logger.info(f"์ธ๋ฑ์Šค ๋กœ๋“œ ์‹œ๋„: {index_path}")
base_retriever = VectorRetriever.load(index_path)
logger.info(f"์ธ๋ฑ์Šค ๋กœ๋“œ ์„ฑ๊ณต. ๋ฌธ์„œ {len(getattr(base_retriever, 'documents', []))}๊ฐœ")
except Exception as e:
logger.error(f"์ธ๋ฑ์Šค ๋กœ๋“œ ์‹คํŒจ: {e}", exc_info=True)
logger.info("์ƒˆ VectorRetriever ์ดˆ๊ธฐํ™” ์‹œ๋„...")
try:
base_retriever = VectorRetriever()
logger.info("์ƒˆ VectorRetriever ์ดˆ๊ธฐํ™” ์„ฑ๊ณต.")
except Exception as e_init:
logger.error(f"์ƒˆ VectorRetriever ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e_init}", exc_info=True)
base_retriever = None
else:
logger.info("์ธ๋ฑ์Šค ํŒŒ์ผ ์—†์Œ. ์ƒˆ VectorRetriever ์ดˆ๊ธฐํ™” ์‹œ๋„...")
try:
base_retriever = VectorRetriever()
logger.info("์ƒˆ VectorRetriever ์ดˆ๊ธฐํ™” ์„ฑ๊ณต.")
except Exception as e_init:
logger.error(f"์ƒˆ VectorRetriever ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e_init}", exc_info=True)
base_retriever = None
if base_retriever is None:
logger.error("base_retriever ์ดˆ๊ธฐํ™”/๋กœ๋“œ์— ์‹คํŒจํ•˜์—ฌ init_retriever ์ค‘๋‹จ.")
return None
# 2. ๋ฐ์ดํ„ฐ ํด๋” ๋ฌธ์„œ ๋กœ๋“œ (๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ๊ฐ€ ๋น„์–ด์žˆ์„ ๋•Œ)
needs_loading = (not hasattr(base_retriever, 'documents') or not getattr(base_retriever, 'documents', None)) # None ์ฒดํฌ ์ถ”๊ฐ€
if needs_loading and os.path.exists(data_path):
logger.info(f"๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ๊ฐ€ ๋น„์–ด์žˆ์–ด {data_path}์—์„œ ๋ฌธ์„œ ๋กœ๋“œ ์‹œ๋„...")
try:
docs = DocumentProcessor.load_documents_from_directory(
directory=data_path,
extensions=[".txt", ".md", ".csv"],
recursive=True
)
logger.info(f"{len(docs)}๊ฐœ ๋ฌธ์„œ ๋กœ๋“œ ์„ฑ๊ณต.")
if docs and hasattr(base_retriever, 'add_documents'):
logger.info("๊ฒ€์ƒ‰๊ธฐ์— ๋ฌธ์„œ ์ถ”๊ฐ€ ์‹œ๋„...")
base_retriever.add_documents(docs)
logger.info("๋ฌธ์„œ ์ถ”๊ฐ€ ์™„๋ฃŒ.")
if hasattr(base_retriever, 'save'):
logger.info(f"๊ฒ€์ƒ‰๊ธฐ ์ƒํƒœ ์ €์žฅ ์‹œ๋„: {index_path}")
try:
base_retriever.save(index_path)
logger.info("์ธ๋ฑ์Šค ์ €์žฅ ์™„๋ฃŒ.")
except Exception as e_save:
logger.error(f"์ธ๋ฑ์Šค ์ €์žฅ ์‹คํŒจ: {e_save}", exc_info=True)
except Exception as e_load_add:
logger.error(f"DATA_FOLDER ๋ฌธ์„œ ๋กœ๋“œ/์ถ”๊ฐ€ ์ค‘ ์˜ค๋ฅ˜: {e_load_add}", exc_info=True)
# 3. ์žฌ์ˆœ์œ„ํ™” ๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™”
logger.info("์žฌ์ˆœ์œ„ํ™” ๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™” ์‹œ๋„...")
try:
def custom_rerank_fn(query, results):
query_terms = set(query.lower().split())
for result in results:
if isinstance(result, dict) and "text" in result:
text = result["text"].lower()
term_freq = sum(1 for term in query_terms if term in text)
normalized_score = term_freq / (len(text.split()) + 1) * 10
result["rerank_score"] = result.get("score", 0) * 0.7 + normalized_score * 0.3
elif isinstance(result, dict):
result["rerank_score"] = result.get("score", 0)
results.sort(key=lambda x: x.get("rerank_score", 0) if isinstance(x, dict) else 0, reverse=True)
return results
# ReRanker ํด๋ž˜์Šค ์‚ฌ์šฉ
retriever = ReRanker(
base_retriever=base_retriever,
rerank_fn=custom_rerank_fn,
rerank_field="text"
)
logger.info("์žฌ์ˆœ์œ„ํ™” ๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ.")
except Exception as e_rerank:
logger.error(f"์žฌ์ˆœ์œ„ํ™” ๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e_rerank}", exc_info=True)
logger.warning("์žฌ์ˆœ์œ„ํ™” ์‹คํŒจ, ๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ๋ฅผ retriever๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
retriever = base_retriever # fallback
logger.info("--- init_retriever ์ข…๋ฃŒ ---")
return retriever
def background_init():
"""๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™” ์ˆ˜ํ–‰"""
global app_ready, retriever, base_retriever, llm_interface, stt_client
temp_app_ready = False # ์ž„์‹œ ์ƒํƒœ ํ”Œ๋ž˜๊ทธ
try:
logger.info("๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ดˆ๊ธฐํ™” ์‹œ์ž‘...")
# 1. LLM, STT ์ธํ„ฐํŽ˜์ด์Šค ์ดˆ๊ธฐํ™” (ํ•„์š” ์‹œ)
if llm_interface is None or isinstance(llm_interface, MockComponent):
if 'LLMInterface' in globals() and LLMInterface != MockComponent:
llm_interface = LLMInterface(default_llm="openai")
logger.info("LLM ์ธํ„ฐํŽ˜์ด์Šค ์ดˆ๊ธฐํ™” ์™„๋ฃŒ.")
else:
logger.warning("LLMInterface ํด๋ž˜์Šค ์—†์Œ. Mock ์‚ฌ์šฉ.")
llm_interface = MockComponent() # Mock ๊ฐ์ฒด ๋ณด์žฅ
if stt_client is None or isinstance(stt_client, MockComponent):
if 'VitoSTT' in globals() and VitoSTT != MockComponent:
stt_client = VitoSTT()
logger.info("STT ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ.")
else:
logger.warning("VitoSTT ํด๋ž˜์Šค ์—†์Œ. Mock ์‚ฌ์šฉ.")
stt_client = MockComponent() # Mock ๊ฐ์ฒด ๋ณด์žฅ
# 2. ๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™”
if 'VectorRetriever' in globals() and VectorRetriever != MockComponent:
logger.info("์‹ค์ œ ๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™” ์‹œ๋„...")
retriever = init_retriever()
if hasattr(retriever, 'base_retriever') and base_retriever is None:
base_retriever = retriever.base_retriever
elif base_retriever is None:
logger.warning("init_retriever ํ›„ base_retriever๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์Œ. ํ™•์ธ ํ•„์š”.")
if isinstance(retriever, VectorRetriever):
base_retriever = retriever
if retriever is not None and base_retriever is not None:
logger.info("๊ฒ€์ƒ‰๊ธฐ (Retriever, Base Retriever) ์ดˆ๊ธฐํ™” ์„ฑ๊ณต")
temp_app_ready = True
else:
logger.error("๊ฒ€์ƒ‰๊ธฐ ์ดˆ๊ธฐํ™” ํ›„์—๋„ retriever ๋˜๋Š” base_retriever๊ฐ€ None์ž…๋‹ˆ๋‹ค.")
if base_retriever is None: base_retriever = MockComponent()
if retriever is None: retriever = MockComponent()
if not hasattr(retriever, 'search'): retriever.search = lambda query, **kwargs: []
if not hasattr(base_retriever, 'documents'): base_retriever.documents = []
temp_app_ready = True
else:
logger.warning("VectorRetriever ํด๋ž˜์Šค ์—†์Œ. Mock ๊ฒ€์ƒ‰๊ธฐ ์‚ฌ์šฉ.")
base_retriever = MockComponent()
retriever = MockComponent()
if not hasattr(retriever, 'search'): retriever.search = lambda query, **kwargs: []
if not hasattr(base_retriever, 'documents'): base_retriever.documents = []
temp_app_ready = True
logger.info(f"๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ. ์ตœ์ข… ์ƒํƒœ: {'Ready' if temp_app_ready else 'Not Ready (Error during init)'}")
except Exception as e:
logger.error(f"์•ฑ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ดˆ๊ธฐํ™” ์ค‘ ์‹ฌ๊ฐํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
if base_retriever is None: base_retriever = MockComponent()
if retriever is None: retriever = MockComponent()
if not hasattr(retriever, 'search'): retriever.search = lambda query, **kwargs: []
if not hasattr(base_retriever, 'documents'): base_retriever.documents = []
temp_app_ready = True
logger.warning("์ดˆ๊ธฐํ™” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์ง€๋งŒ Mock ๊ฐ์ฒด๋กœ ๋Œ€์ฒด ํ›„ ์•ฑ ์‚ฌ์šฉ ๊ฐ€๋Šฅ ์ƒํƒœ๋กœ ์„ค์ •.")
finally:
# ์ตœ์ข…์ ์œผ๋กœ app_ready ์ƒํƒœ ์—…๋ฐ์ดํŠธ
app_ready = temp_app_ready
# ์žฅ์น˜ ๋ผ์šฐํŠธ ๋“ฑ๋ก ํ˜ธ์ถœ์€ ์—ฌ๊ธฐ์„œ ์ œ๊ฑฐ๋จ (๋ฉ”์ธ ๋ ˆ๋ฒจ์—์„œ ์ฒ˜๋ฆฌ)
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์Šค๋ ˆ๋“œ ์‹œ์ž‘ ๋ถ€๋ถ„์€ ๊ทธ๋Œ€๋กœ ์œ ์ง€
init_thread = threading.Thread(target=background_init)
init_thread.daemon = True
init_thread.start()
# --- Flask ๋ผ์šฐํŠธ ์ •์˜ ---
@app.route('/login', methods=['GET', 'POST'])
def login():
error = None
next_url = request.args.get('next') # ๋ฆฌ๋””๋ ‰์…˜ํ•  URL ๊ฐ€์ ธ์˜ค๊ธฐ
logger.info(f"-------------- ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ์ ‘์† (Next: {next_url}) --------------")
logger.info(f"Method: {request.method}")
if request.method == 'POST':
logger.info("๋กœ๊ทธ์ธ ์‹œ๋„ ๋ฐ›์Œ")
username = request.form.get('username', '')
password = request.form.get('password', '')
logger.info(f"์ž…๋ ฅ๋œ ์‚ฌ์šฉ์ž๋ช…: {username}")
logger.info(f"๋น„๋ฐ€๋ฒˆํ˜ธ ์ž…๋ ฅ ์—ฌ๋ถ€: {len(password) > 0}")
valid_username = ADMIN_USERNAME
valid_password = ADMIN_PASSWORD
logger.info(f"๊ฒ€์ฆ์šฉ ์‚ฌ์šฉ์ž๋ช…: {valid_username}")
logger.info(f"๊ฒ€์ฆ์šฉ ๋น„๋ฐ€๋ฒˆํ˜ธ ์กด์žฌ ์—ฌ๋ถ€: {valid_password is not None and len(valid_password) > 0}")
if username == valid_username and password == valid_password:
logger.info(f"๋กœ๊ทธ์ธ ์„ฑ๊ณต: {username}")
logger.debug(f"์„ธ์…˜ ์„ค์ • ์ „: {session}")
session.permanent = True
session['logged_in'] = True
session['username'] = username
session.modified = True
logger.info(f"์„ธ์…˜ ์„ค์ • ํ›„: {session}")
logger.info("์„ธ์…˜ ์„ค์ • ์™„๋ฃŒ, ๋ฆฌ๋””๋ ‰์…˜ ์‹œ๋„")
redirect_to = next_url or url_for('index')
logger.info(f"๋ฆฌ๋””๋ ‰์…˜ ๋Œ€์ƒ: {redirect_to}")
response = redirect(redirect_to)
return response
else:
logger.warning("๋กœ๊ทธ์ธ ์‹คํŒจ: ์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ ๋ถˆ์ผ์น˜")
if username != valid_username: logger.warning("์‚ฌ์šฉ์ž๋ช… ๋ถˆ์ผ์น˜")
if password != valid_password: logger.warning("๋น„๋ฐ€๋ฒˆํ˜ธ ๋ถˆ์ผ์น˜")
error = '์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ๊ฐ€ ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค.'
else:
logger.info("๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ GET ์š”์ฒญ")
if 'logged_in' in session:
logger.info("์ด๋ฏธ ๋กœ๊ทธ์ธ๋œ ์‚ฌ์šฉ์ž, ๋ฉ”์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
return redirect(url_for('index'))
logger.info("---------- ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ๋ Œ๋”๋ง ----------")
return render_template('login.html', error=error, next=next_url)
@app.route('/logout')
def logout():
logger.info("-------------- ๋กœ๊ทธ์•„์›ƒ ์š”์ฒญ --------------")
logger.info(f"๋กœ๊ทธ์•„์›ƒ ์ „ ์„ธ์…˜ ์ƒํƒœ: {session}")
if 'logged_in' in session:
username = session.get('username', 'unknown')
logger.info(f"์‚ฌ์šฉ์ž {username} ๋กœ๊ทธ์•„์›ƒ ์ฒ˜๋ฆฌ ์‹œ์ž‘")
session.pop('logged_in', None)
session.pop('username', None)
session.modified = True
logger.info(f"์„ธ์…˜ ์ •๋ณด ์‚ญ์ œ ์™„๋ฃŒ. ํ˜„์žฌ ์„ธ์…˜: {session}")
else:
logger.warning("๋กœ๊ทธ์ธ๋˜์ง€ ์•Š์€ ์ƒํƒœ์—์„œ ๋กœ๊ทธ์•„์›ƒ ์‹œ๋„")
logger.info("๋กœ๊ทธ์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
response = redirect(url_for('login'))
return response
@app.route('/')
@login_required
def index():
"""๋ฉ”์ธ ํŽ˜์ด์ง€"""
global app_ready
current_time = datetime.datetime.now()
try:
start_time = datetime.datetime.fromtimestamp(os.path.getmtime(__file__))
time_diff = (current_time - start_time).total_seconds()
if not app_ready and time_diff > 30:
logger.warning(f"์•ฑ์ด 30์ดˆ ์ด์ƒ ์ดˆ๊ธฐํ™” ์ค‘ ์ƒํƒœ์ž…๋‹ˆ๋‹ค. ๊ฐ•์ œ๋กœ ready ์ƒํƒœ๋กœ ๋ณ€๊ฒฝํ•ฉ๋‹ˆ๋‹ค.")
app_ready = True
except FileNotFoundError:
logger.warning("__file__ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์–ด ์‹œ๊ฐ„ ๋น„๊ต ๋กœ์ง์„ ๊ฑด๋„ˆ<0xEB>๋œ๋‹ˆ๋‹ค.")
if not app_ready: # ๊ธฐ๋ณธ ํƒ€์ž„์•„์›ƒ ๋Œ€์‹  ๊ฐ„๋‹จํ•œ ๋กœ์ง ์ถ”๊ฐ€ ๊ฐ€๋Šฅ
logger.warning("์•ฑ ์ค€๋น„ ์ƒํƒœ ํ™•์ธ (์‹œ๊ฐ„ ๋น„๊ต ๋ถˆ๊ฐ€)")
# ํ•„์š”์‹œ ๋‹ค๋ฅธ ์ค€๋น„ ์ƒํƒœ ํ™•์ธ ๋กœ์ง ์ถ”๊ฐ€
pass # ์ž„์‹œ๋กœ ํ†ต๊ณผ
if not app_ready:
logger.info("์•ฑ์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•„ ๋กœ๋”ฉ ํŽ˜์ด์ง€ ํ‘œ์‹œ")
return render_template('loading.html'), 503
logger.info("๋ฉ”์ธ ํŽ˜์ด์ง€ ์š”์ฒญ")
return render_template('index.html')
@app.route('/api/status')
@login_required
def app_status():
"""์•ฑ ์ดˆ๊ธฐํ™” ์ƒํƒœ ํ™•์ธ API"""
logger.info(f"์•ฑ ์ƒํƒœ ํ™•์ธ ์š”์ฒญ: {'Ready' if app_ready else 'Not Ready'}")
return jsonify({"ready": app_ready})
@app.route('/api/llm', methods=['GET', 'POST'])
@login_required
def llm_api():
"""์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ LLM ๋ชฉ๋ก ๋ฐ ์„ ํƒ API"""
global llm_interface
if not app_ready:
return jsonify({"error": "์•ฑ์ด ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."}), 503
if request.method == 'GET':
logger.info("LLM ๋ชฉ๋ก ์š”์ฒญ")
try:
current_details = llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {"id": "unknown", "name": "Unknown"}
supported_llms_dict = llm_interface.SUPPORTED_LLMS if hasattr(llm_interface, 'SUPPORTED_LLMS') else {}
supported_list = [{
"name": name, "id": id, "current": id == current_details.get("id")
} for name, id in supported_llms_dict.items()]
return jsonify({
"supported_llms": supported_list,
"current_llm": current_details
})
except Exception as e:
logger.error(f"LLM ์ •๋ณด ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
return jsonify({"error": "LLM ์ •๋ณด ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ"}), 500
elif request.method == 'POST':
data = request.get_json()
if not data or 'llm_id' not in data:
return jsonify({"error": "LLM ID๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
llm_id = data['llm_id']
logger.info(f"LLM ๋ณ€๊ฒฝ ์š”์ฒญ: {llm_id}")
try:
if not hasattr(llm_interface, 'set_llm') or not hasattr(llm_interface, 'llm_clients'):
raise NotImplementedError("LLM ์ธํ„ฐํŽ˜์ด์Šค์— ํ•„์š”ํ•œ ๋ฉ”์†Œ๋“œ/์†์„ฑ ์—†์Œ")
if llm_id not in llm_interface.llm_clients:
return jsonify({"error": f"์ง€์›๋˜์ง€ ์•Š๋Š” LLM ID: {llm_id}"}), 400
success = llm_interface.set_llm(llm_id)
if success:
new_details = llm_interface.get_current_llm_details()
logger.info(f"LLM์ด '{new_details.get('name', llm_id)}'๋กœ ๋ณ€๊ฒฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
return jsonify({
"success": True,
"message": f"LLM์ด '{new_details.get('name', llm_id)}'๋กœ ๋ณ€๊ฒฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"current_llm": new_details
})
else:
logger.error(f"LLM ๋ณ€๊ฒฝ ์‹คํŒจ (ID: {llm_id})")
return jsonify({"error": "LLM ๋ณ€๊ฒฝ ์ค‘ ๋‚ด๋ถ€ ์˜ค๋ฅ˜ ๋ฐœ์ƒ"}), 500
except Exception as e:
logger.error(f"LLM ๋ณ€๊ฒฝ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {e}", exc_info=True)
return jsonify({"error": f"LLM ๋ณ€๊ฒฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"}), 500
@app.route('/api/chat', methods=['POST'])
@login_required
def chat():
"""ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์ฑ—๋ด‡ API"""
global retriever
if not app_ready or retriever is None:
return jsonify({"error": "์•ฑ/๊ฒ€์ƒ‰๊ธฐ๊ฐ€ ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."}), 503
try:
data = request.get_json()
if not data or 'query' not in data:
return jsonify({"error": "์ฟผ๋ฆฌ๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
query = data['query']
logger.info(f"ํ…์ŠคํŠธ ์ฟผ๋ฆฌ ์ˆ˜์‹ : {query[:100]}...")
if not hasattr(retriever, 'search'):
raise NotImplementedError("Retriever์— search ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
search_results = retriever.search(query, top_k=5, first_stage_k=6)
if not hasattr(DocumentProcessor, 'prepare_rag_context'):
raise NotImplementedError("DocumentProcessor์— prepare_rag_context ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
if not context:
logger.warning("๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์–ด ์ปจํ…์ŠคํŠธ๋ฅผ ์ƒ์„ฑํ•˜์ง€ ๋ชปํ•จ.")
pass
llm_id = data.get('llm_id', None)
if not hasattr(llm_interface, 'rag_generate'):
raise NotImplementedError("LLMInterface์— rag_generate ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
if not context:
answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
logger.info("์ปจํ…์ŠคํŠธ ์—†์ด ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ")
else:
answer = llm_interface.rag_generate(query, context, llm_id=llm_id)
logger.info(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ (๊ธธ์ด: {len(answer)})")
sources = []
if search_results:
for result in search_results:
if not isinstance(result, dict):
logger.warning(f"์˜ˆ์ƒ์น˜ ๋ชปํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ํ˜•์‹: {type(result)}")
continue
if "source" in result:
source_info = {
"source": result.get("source", "Unknown"),
"score": result.get("rerank_score", result.get("score", 0))
}
if "text" in result and result.get("filetype") == "csv":
try:
text_lines = result["text"].strip().split('\n')
if text_lines:
first_line = text_lines[0].strip()
if ',' in first_line:
first_column = first_line.split(',')[0].strip()
source_info["id"] = first_column
logger.debug(f"CSV ์†Œ์Šค ID ์ถ”์ถœ: {first_column} from {source_info['source']}")
except Exception as e:
logger.warning(f"CSV ์†Œ์Šค ID ์ถ”์ถœ ์‹คํŒจ ({result.get('source')}): {e}")
sources.append(source_info)
response_data = {
"answer": answer,
"sources": sources,
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
}
return jsonify(response_data)
except Exception as e:
logger.error(f"์ฑ„ํŒ… ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
return jsonify({"error": f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"}), 500
@app.route('/api/voice', methods=['POST'])
@login_required
def voice_chat():
"""์Œ์„ฑ ์ฑ— API ์—”๋“œํฌ์ธํŠธ"""
global retriever, stt_client
if not app_ready:
logger.warning("์•ฑ ์ดˆ๊ธฐํ™”๊ฐ€ ์™„๋ฃŒ๋˜์ง€ ์•Š์•˜์ง€๋งŒ ์Œ์„ฑ API ์š”์ฒญ ์ฒ˜๋ฆฌ ์‹œ๋„")
if retriever is None:
logger.error("retriever๊ฐ€ ์•„์ง ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค")
return jsonify({
"transcription": "(์Œ์„ฑ์„ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜ํ–ˆ์ง€๋งŒ ๊ฒ€์ƒ‰ ์—”์ง„์ด ์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค)",
"answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ฒ€์ƒ‰ ์—”์ง„์ด ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.",
"sources": []
})
if stt_client is None:
return jsonify({
"transcription": "(์Œ์„ฑ ์ธ์‹ ๊ธฐ๋Šฅ์ด ์ค€๋น„ ์ค‘์ž…๋‹ˆ๋‹ค)",
"answer": "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ํ˜„์žฌ ์Œ์„ฑ ์ธ์‹ ์„œ๋น„์Šค๊ฐ€ ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.",
"sources": []
})
logger.info("์Œ์„ฑ ์ฑ— ์š”์ฒญ ์ˆ˜์‹ ")
if 'audio' not in request.files:
logger.error("์˜ค๋””์˜ค ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์Œ")
return jsonify({"error": "์˜ค๋””์˜ค ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
audio_file = request.files['audio']
logger.info(f"์ˆ˜์‹ ๋œ ์˜ค๋””์˜ค ํŒŒ์ผ: {audio_file.filename} ({audio_file.content_type})")
try:
with tempfile.NamedTemporaryFile(delete=True) as temp_audio:
audio_file.save(temp_audio.name)
logger.info(f"์˜ค๋””์˜ค ํŒŒ์ผ์„ ์ž„์‹œ ์ €์žฅ: {temp_audio.name}")
if not hasattr(stt_client, 'transcribe_audio'):
raise NotImplementedError("STT ํด๋ผ์ด์–ธํŠธ์— transcribe_audio ๋ฉ”์†Œ๋“œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
with open(temp_audio.name, 'rb') as f_bytes:
audio_bytes = f_bytes.read()
stt_result = stt_client.transcribe_audio(audio_bytes, language="ko")
if not isinstance(stt_result, dict) or not stt_result.get("success"):
error_msg = stt_result.get("error", "์•Œ ์ˆ˜ ์—†๋Š” STT ์˜ค๋ฅ˜") if isinstance(stt_result, dict) else "STT ๊ฒฐ๊ณผ ํ˜•์‹ ์˜ค๋ฅ˜"
logger.error(f"์Œ์„ฑ์ธ์‹ ์‹คํŒจ: {error_msg}")
return jsonify({
"error": "์Œ์„ฑ์ธ์‹ ์‹คํŒจ",
"details": error_msg
}), 500
transcription = stt_result.get("text", "")
if not transcription:
logger.warning("์Œ์„ฑ์ธ์‹ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค.")
return jsonify({"error": "์Œ์„ฑ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ธ์‹ํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.", "transcription": ""}), 400
logger.info(f"์Œ์„ฑ์ธ์‹ ์„ฑ๊ณต: {transcription[:50]}...")
if retriever is None:
logger.error("STT ์„ฑ๊ณต ํ›„ ๊ฒ€์ƒ‰ ์‹œ๋„ ์ค‘ retriever๊ฐ€ None์ž„")
return jsonify({
"transcription": transcription,
"answer": "์Œ์„ฑ์„ ์ธ์‹ํ–ˆ์ง€๋งŒ, ํ˜„์žฌ ๊ฒ€์ƒ‰ ์‹œ์Šคํ…œ์ด ์ค€๋น„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.",
"sources": []
})
search_results = retriever.search(transcription, top_k=5, first_stage_k=6)
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
if not context:
logger.warning("์Œ์„ฑ ์ฟผ๋ฆฌ์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ.")
pass
llm_id = request.form.get('llm_id', None)
if not context:
answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
logger.info("์ปจํ…์ŠคํŠธ ์—†์ด ๊ธฐ๋ณธ ์‘๋‹ต ์ƒ์„ฑ")
else:
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
logger.info(f"LLM ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ (๊ธธ์ด: {len(answer)})")
enhanced_sources = []
if search_results:
for doc in search_results:
if not isinstance(doc, dict): continue
if "source" in doc:
source_info = {
"source": doc.get("source", "Unknown"),
"score": doc.get("rerank_score", doc.get("score", 0))
}
if "text" in doc and doc.get("filetype") == "csv":
try:
text_lines = doc["text"].strip().split('\n')
if text_lines:
first_line = text_lines[0].strip()
if ',' in first_line:
first_column = first_line.split(',')[0].strip()
source_info["id"] = first_column
except Exception as e:
logger.warning(f"[์Œ์„ฑ์ฑ—] CSV ์†Œ์Šค ID ์ถ”์ถœ ์‹คํŒจ ({doc.get('source')}): {e}")
enhanced_sources.append(source_info)
response_data = {
"transcription": transcription,
"answer": answer,
"sources": enhanced_sources,
"llm": llm_interface.get_current_llm_details() if hasattr(llm_interface, 'get_current_llm_details') else {}
}
return jsonify(response_data)
except Exception as e:
logger.error(f"์Œ์„ฑ ์ฑ— ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
return jsonify({
"error": "์Œ์„ฑ ์ฒ˜๋ฆฌ ์ค‘ ๋‚ด๋ถ€ ์˜ค๋ฅ˜ ๋ฐœ์ƒ",
"details": str(e)
}), 500
@app.route('/api/upload', methods=['POST'])
@login_required
def upload_document():
"""์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ์—…๋กœ๋“œ API"""
global base_retriever, retriever
if not app_ready or base_retriever is None:
return jsonify({"error": "์•ฑ/๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ๊ฐ€ ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค."}), 503
if 'document' not in request.files:
return jsonify({"error": "๋ฌธ์„œ ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
doc_file = request.files['document']
if doc_file.filename == '':
return jsonify({"error": "์„ ํƒ๋œ ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค."}), 400
if not allowed_doc_file(doc_file.filename):
logger.error(f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {doc_file.filename}")
return jsonify({"error": f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹์ž…๋‹ˆ๋‹ค. ํ—ˆ์šฉ: {', '.join(ALLOWED_DOC_EXTENSIONS)}"}), 400
try:
filename = secure_filename(doc_file.filename)
filepath = os.path.join(app.config['DATA_FOLDER'], filename)
doc_file.save(filepath)
logger.info(f"๋ฌธ์„œ ์ €์žฅ ์™„๋ฃŒ: {filepath}")
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
logger.info(f"UTF-8 ๋””์ฝ”๋”ฉ ์‹คํŒจ, CP949๋กœ ์‹œ๋„: {filename}")
try:
with open(filepath, 'r', encoding='cp949') as f:
content = f.read()
except Exception as e_cp949:
logger.error(f"CP949 ๋””์ฝ”๋”ฉ ์‹คํŒจ ({filename}): {e_cp949}")
return jsonify({"error": "ํŒŒ์ผ ์ธ์ฝ”๋”ฉ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค (UTF-8, CP949 ์‹œ๋„ ์‹คํŒจ)."}), 400
except Exception as e_read:
logger.error(f"ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜ ({filename}): {e_read}")
return jsonify({"error": f"ํŒŒ์ผ ์ฝ๊ธฐ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e_read)}"}), 500
metadata = {
"source": filename, "filename": filename,
"filetype": filename.rsplit('.', 1)[1].lower(),
"filepath": filepath
}
file_ext = metadata["filetype"]
docs = []
if not hasattr(DocumentProcessor, 'csv_to_documents') or not hasattr(DocumentProcessor, 'text_to_documents'):
raise NotImplementedError("DocumentProcessor์— ํ•„์š”ํ•œ ๋ฉ”์†Œ๋“œ ์—†์Œ")
if file_ext == 'csv':
logger.info(f"CSV ํŒŒ์ผ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {filename}")
docs = DocumentProcessor.csv_to_documents(content, metadata)
else:
logger.info(f"์ผ๋ฐ˜ ํ…์ŠคํŠธ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹œ์ž‘: {filename}")
if file_ext in ['pdf', 'docx']:
logger.warning(f".{file_ext} ํŒŒ์ผ ์ฒ˜๋ฆฌ๋Š” ํ˜„์žฌ ๊ตฌํ˜„๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ํ…์ŠคํŠธ ์ถ”์ถœ ๋กœ์ง ์ถ”๊ฐ€ ํ•„์š”.")
content = ""
if content:
docs = DocumentProcessor.text_to_documents(
content, metadata=metadata,
chunk_size=512, chunk_overlap=50
)
if docs:
if not hasattr(base_retriever, 'add_documents') or not hasattr(base_retriever, 'save'):
raise NotImplementedError("๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ์— add_documents ๋˜๋Š” save ๋ฉ”์†Œ๋“œ ์—†์Œ")
logger.info(f"{len(docs)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ๋ฅผ ๊ฒ€์ƒ‰๊ธฐ์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค...")
base_retriever.add_documents(docs)
logger.info(f"๊ฒ€์ƒ‰๊ธฐ ์ƒํƒœ๋ฅผ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค...")
index_path = app.config['INDEX_PATH']
try:
base_retriever.save(index_path)
logger.info("์ธ๋ฑ์Šค ์ €์žฅ ์™„๋ฃŒ")
# ์žฌ์ˆœ์œ„ํ™” ๊ฒ€์ƒ‰๊ธฐ ์—…๋ฐ์ดํŠธ ๋กœ์ง ํ•„์š” ์‹œ ์ถ”๊ฐ€
# ์˜ˆ: if retriever != base_retriever and hasattr(retriever, 'update_base_retriever'): retriever.update_base_retriever(base_retriever)
return jsonify({
"success": True,
"message": f"ํŒŒ์ผ '{filename}' ์—…๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ์™„๋ฃŒ ({len(docs)}๊ฐœ ์ฒญํฌ ์ถ”๊ฐ€)."
})
except Exception as e_save:
logger.error(f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e_save}")
return jsonify({"error": f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜: {str(e_save)}"}), 500
else:
logger.warning(f"ํŒŒ์ผ '{filename}'์—์„œ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค.")
return jsonify({
"warning": True,
"message": f"ํŒŒ์ผ '{filename}'์ด ์ €์žฅ๋˜์—ˆ์ง€๋งŒ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†์Šต๋‹ˆ๋‹ค."
})
except Exception as e:
logger.error(f"ํŒŒ์ผ ์—…๋กœ๋“œ ๋˜๋Š” ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
return jsonify({"error": f"ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
@app.route('/api/documents', methods=['GET'])
@login_required
def list_documents():
"""์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ๋ชฉ๋ก API"""
global base_retriever
if not app_ready or base_retriever is None:
return jsonify({"error": "์•ฑ/๊ธฐ๋ณธ ๊ฒ€์ƒ‰๊ธฐ๊ฐ€ ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค."}), 503
try:
sources = {}
total_chunks = 0
if hasattr(base_retriever, 'documents') and base_retriever.documents:
logger.info(f"์ด {len(base_retriever.documents)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ์—์„œ ์†Œ์Šค ๋ชฉ๋ก ์ƒ์„ฑ ์ค‘...")
for doc in base_retriever.documents:
if not isinstance(doc, dict): continue
source = doc.get("source", "unknown")
if source == "unknown" and "metadata" in doc and isinstance(doc["metadata"], dict):
source = doc["metadata"].get("source", "unknown")
if source != "unknown":
if source in sources:
sources[source]["chunks"] += 1
else:
filename = doc.get("filename", source)
filetype = doc.get("filetype", "unknown")
if "metadata" in doc and isinstance(doc["metadata"], dict):
filename = doc["metadata"].get("filename", filename)
filetype = doc["metadata"].get("filetype", filetype)
sources[source] = {
"filename": filename,
"chunks": 1,
"filetype": filetype
}
total_chunks += 1
else:
logger.info("๊ฒ€์ƒ‰๊ธฐ์— ๋ฌธ์„œ๊ฐ€ ์—†๊ฑฐ๋‚˜ documents ์†์„ฑ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
documents = [{"source": src, **info} for src, info in sources.items()]
documents.sort(key=lambda x: x["chunks"], reverse=True)
logger.info(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์™„๋ฃŒ: {len(documents)}๊ฐœ ์†Œ์Šค ํŒŒ์ผ, {total_chunks}๊ฐœ ์ฒญํฌ")
return jsonify({
"documents": documents,
"total_documents": len(documents),
"total_chunks": total_chunks
})
except Exception as e:
logger.error(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
return jsonify({"error": f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
# ์ •์  ํŒŒ์ผ ์„œ๋น™
@app.route('/static/<path:path>')
def send_static(path):
return send_from_directory('static', path)
# --- ์š”์ฒญ ์ฒ˜๋ฆฌ ํ›… ---
@app.after_request
def after_request_func(response):
"""๋ชจ๋“  ์‘๋‹ต์— ๋Œ€ํ•ด ํ›„์ฒ˜๋ฆฌ ์ˆ˜ํ–‰"""
# logger.debug(f"[After Request] ์‘๋‹ต ํ—ค๋”: {response.headers}") # ๋””๋ฒ„๊น… ์‹œ Set-Cookie ํ™•์ธ
return response
# ์•ฑ ์‹คํ–‰ (๋กœ์ปฌ ํ…Œ์ŠคํŠธ์šฉ)
if __name__ == '__main__':
logger.info("Flask ์•ฑ์„ ์ง์ ‘ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค (๊ฐœ๋ฐœ์šฉ ์„œ๋ฒ„).")
# ๋””๋ฒ„๊ทธ ๋ชจ๋“œ๋Š” ๊ฐœ๋ฐœ ์ค‘์—๋งŒ True๋กœ ์„ค์ •ํ•˜๊ณ , ์‹ค์ œ ๋ฐฐํฌ ์‹œ์—๋Š” False๋กœ ์„ค์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
# host='0.0.0.0' ์€ ๋ชจ๋“  ๋„คํŠธ์›Œํฌ ์ธํ„ฐํŽ˜์ด์Šค์—์„œ ์ ‘์† ๊ฐ€๋Šฅํ•˜๊ฒŒ ํ•ฉ๋‹ˆ๋‹ค.
port = int(os.environ.get("PORT", 7860))
logger.info(f"์„œ๋ฒ„๋ฅผ http://0.0.0.0:{port} ์—์„œ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค.")
# debug=True ์‚ฌ์šฉ ์‹œ werkzeug reloader๊ฐ€ ํ™œ์„ฑํ™”๋˜์–ด ์ฝ”๋“œ๊ฐ€ ๋ณ€๊ฒฝ๋  ๋•Œ ์„œ๋ฒ„๊ฐ€ ์žฌ์‹œ์ž‘๋  ์ˆ˜ ์žˆ์œผ๋ฉฐ,
# ์ด ๊ณผ์ •์—์„œ ์ „์—ญ ์ดˆ๊ธฐํ™” ์ฝ”๋“œ๊ฐ€ ๋‹ค์‹œ ์‹คํ–‰๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
# DEVICE_ROUTES_REGISTERED ํ”Œ๋ž˜๊ทธ๊ฐ€ ์ด๋ฅผ ๋ฐฉ์ง€ํ•ฉ๋‹ˆ๋‹ค.
app.run(debug=True, host='0.0.0.0', port=port)