Spaces:
Sleeping
Sleeping
Add application file
Browse files- app/app.py +373 -10
app/app.py
CHANGED
@@ -1,4 +1,201 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
RAG ๊ฒ์ ์ฑ๋ด ์น ์ ํ๋ฆฌ์ผ์ด์
|
3 |
"""
|
4 |
|
@@ -23,6 +220,23 @@ logger = logging.getLogger(__name__)
|
|
23 |
# ํ๊ฒฝ ๋ณ์ ๋ก๋
|
24 |
load_dotenv()
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
# ๋ก์ปฌ ๋ชจ๋ ์ํฌํธ
|
27 |
from utils.vito_stt import VitoSTT
|
28 |
from utils.llm_interface import LLMInterface
|
@@ -36,11 +250,13 @@ app = Flask(__name__)
|
|
36 |
# ์ธ์
์ค์ - ๊ณ ์ ๋ ์ํฌ๋ฆฟ ํค ์ฌ์ฉ
|
37 |
app.secret_key = 'rag_chatbot_fixed_secret_key_12345' # ๊ณ ์ ๋ ์ํฌ๋ฆฟ ํค
|
38 |
|
39 |
-
# ์ธ์
์ค์ ์ถ๊ฐ
|
40 |
-
app.config['SESSION_COOKIE_SECURE'] = False #
|
41 |
app.config['SESSION_COOKIE_HTTPONLY'] = True # JavaScript์์ ์ฟ ํค ์ ๊ทผ ๋ฐฉ์ง
|
42 |
-
app.config['SESSION_COOKIE_SAMESITE'] =
|
43 |
-
app.config['
|
|
|
|
|
44 |
|
45 |
# ์ต๋ ํ์ผ ํฌ๊ธฐ ์ค์ (10MB)
|
46 |
app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024
|
@@ -80,7 +296,19 @@ def login_required(f):
|
|
80 |
logger.info(f"ํ์ฌ ์ธ์
๊ฐ์ฒด: {session}")
|
81 |
logger.info(f"ํ์ฌ ์ธ์
์ํ: logged_in={session.get('logged_in', False)}, username={session.get('username', 'None')}")
|
82 |
logger.info(f"ํ์ฌ ์ธ์
์ผ์ด: {request.cookies.get('session', 'None')}")
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
logger.warning(f"๋น๋ก๊ทธ์ธ ์ํ์์ {request.path} ์ ๊ทผ ์๋, ๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
85 |
return redirect(url_for('login'))
|
86 |
|
@@ -204,6 +432,16 @@ def login():
|
|
204 |
logger.info("-------------- ๋ก๊ทธ์ธ ํ์ด์ง ์ ์ --------------")
|
205 |
logger.info(f"Method: {request.method}")
|
206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
if request.method == 'POST':
|
208 |
logger.info("๋ก๊ทธ์ธ ์๋ ๋ฐ์")
|
209 |
|
@@ -214,13 +452,27 @@ def login():
|
|
214 |
# ๋น๋ฐ๋ฒํธ๋ ์ผ๋ถ ๊ฒ์ถ ํ ๋ก๊น
|
215 |
logger.info(f"๋น๋ฐ๋ฒํธ ์
๋ ฅ ์ฌ๋ถ: {len(password) > 0}")
|
216 |
|
217 |
-
#
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
logger.info(f"ํ๊ฒฝ๋ณ์์์ ๊ฐ์ ธ์จ ์ฌ์ฉ์๋ช
: {valid_username if valid_username else '์ ์๋์ง ์์'}")
|
222 |
logger.info(f"ํ๊ฒฝ๋ณ์์์ ๋น๋ฐ๋ฒํธ ๊ฐ์ ธ์จ ์ฌ๋ถ: {len(valid_password) > 0 if valid_password else False}")
|
223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
if username == valid_username and password == valid_password:
|
225 |
logger.info(f"๋ก๊ทธ์ธ ์ฑ๊ณต: {username}")
|
226 |
# ์ธ์
์ํ ํ์ธ ๋ฐ ๋ก๊น
|
@@ -234,7 +486,11 @@ def login():
|
|
234 |
# ์ธ์
์ค์ ํ ๋ก๊น
|
235 |
logger.info(f"์ธ์
์ค์ ํ ์ธ์
์ํ: {session}")
|
236 |
logger.info("์ธ์
์ค์ ์๋ฃ, ๋ฆฌ๋๋ ์
์๋")
|
237 |
-
|
|
|
|
|
|
|
|
|
238 |
else:
|
239 |
logger.warning("๋ก๊ทธ์ธ ์คํจ: ์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ ๋ถ์ผ์น")
|
240 |
# ์ด๋ค ๋ถ๋ถ์ด ์ผ์นํ์ง ์๋์ง ์์ธ ๋ก๊น
|
@@ -443,6 +699,113 @@ def voice_chat():
|
|
443 |
audio_file = request.files['audio']
|
444 |
logger.info(f"์์ ๋ ํ์ผ: {audio_file.filename}")
|
445 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
446 |
try:
|
447 |
# ์ค๋์ค ํ์ผ ์ฝ๊ธฐ
|
448 |
with audio_file.stream as f:
|
|
|
1 |
+
@app.route('/api/upload', methods=['POST'])
|
2 |
+
@login_required
|
3 |
+
def upload_document():
|
4 |
+
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ์
๋ก๋ API"""
|
5 |
+
global base_retriever, retriever, app_ready
|
6 |
+
|
7 |
+
# ์ฑ ์ค๋น ์ํ ํ์ธ
|
8 |
+
if not app_ready:
|
9 |
+
return jsonify({"error": "์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์
๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์."}), 503
|
10 |
+
|
11 |
+
try:
|
12 |
+
# ํ์ผ์ด ์์ฒญ์ ํฌํจ๋์ด ์๋์ง ํ์ธ
|
13 |
+
if 'document' not in request.files:
|
14 |
+
return jsonify({"error": "๋ฌธ์ ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
15 |
+
|
16 |
+
doc_file = request.files['document']
|
17 |
+
logger.info(f"๋ฐ์ ํ์ผ๋ช
: {doc_file.filename}")
|
18 |
+
|
19 |
+
# ํ์ผ๋ช
์ด ๋น์ด์๋์ง ํ์ธ
|
20 |
+
if doc_file.filename == '':
|
21 |
+
return jsonify({"error": "์ ํ๋ ํ์ผ์ด ์์ต๋๋ค."}), 400
|
22 |
+
|
23 |
+
# ํ์ผ ํ์ ํ์ธ
|
24 |
+
if not allowed_doc_file(doc_file.filename):
|
25 |
+
logger.error(f"ํ์ฉ๋์ง ์๋ ํ์ผ ํ์: {doc_file.filename}")
|
26 |
+
return jsonify({"error": "ํ์ฉ๋์ง ์๋ ํ์ผ ํ์์
๋๋ค. ํ์ฌ ํ์ฉ๋ ํ์ผ ํ์: {}".format(', '.join(ALLOWED_DOC_EXTENSIONS))}), 400
|
27 |
+
|
28 |
+
# ํ์ผ๋ช
๋ณด์ ์ฒ๋ฆฌ
|
29 |
+
filename = secure_filename(doc_file.filename)
|
30 |
+
|
31 |
+
# ๋ฐ์ดํฐ ํด๋์ ์ ์ฅ
|
32 |
+
filepath = os.path.join(app.config['DATA_FOLDER'], filename)
|
33 |
+
doc_file.save(filepath)
|
34 |
+
|
35 |
+
logger.info(f"๋ฌธ์๊ฐ ์ ์ฅ๋์์ต๋๋ค: {filepath}")
|
36 |
+
|
37 |
+
# ๋ฌธ์ ์ฒ๋ฆฌ
|
38 |
+
try:
|
39 |
+
# ๋จผ์ UTF-8๋ก ์๋
|
40 |
+
try:
|
41 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
42 |
+
content = f.read()
|
43 |
+
except UnicodeDecodeError:
|
44 |
+
# UTF-8๋ก ์คํจํ๋ฉด CP949(ํ๊ตญ์ด Windows ๊ธฐ๋ณธ ์ธ์ฝ๋ฉ)๋ก ์๋
|
45 |
+
logger.info(f"UTF-8 ๋์ฝ๋ฉ ์คํจ, CP949๋ก ์๋: {filename}")
|
46 |
+
with open(filepath, 'r', encoding='cp949') as f:
|
47 |
+
content = f.read()
|
48 |
+
|
49 |
+
# ๋ฉํ๋ฐ์ดํฐ ์์ฑ
|
50 |
+
metadata = {
|
51 |
+
"source": filename,
|
52 |
+
"filename": filename,
|
53 |
+
"filetype": filename.rsplit('.', 1)[1].lower(),
|
54 |
+
"filepath": filepath
|
55 |
+
}
|
56 |
+
|
57 |
+
# ํ์ผ ํ์์ ๋ฐ๋ผ ๋ค๋ฅธ ์ฒ๋ฆฌ ์ ์ฉ
|
58 |
+
file_ext = filename.rsplit('.', 1)[1].lower()
|
59 |
+
|
60 |
+
# CSV ํ์ผ์ ํ ๋จ์๋ก ์ฒ๋ฆฌ
|
61 |
+
if file_ext == 'csv':
|
62 |
+
logger.info(f"CSV ํ์ผ ์
๋ก๋ ๊ฐ์ง, ํ ๋จ์๋ก ๋ถํ ์ฒ๋ฆฌ: {filename}")
|
63 |
+
docs = DocumentProcessor.csv_to_documents(content, metadata)
|
64 |
+
else:
|
65 |
+
# ์ผ๋ฐ ํ
์คํธ ๋ฌธ์ ์ฒ๋ฆฌ
|
66 |
+
docs = DocumentProcessor.text_to_documents(
|
67 |
+
content,
|
68 |
+
metadata=metadata,
|
69 |
+
chunk_size=512,
|
70 |
+
chunk_overlap=50
|
71 |
+
)
|
72 |
+
|
73 |
+
if docs:
|
74 |
+
logger.info(f"{len(docs)}๊ฐ ๋ฌธ์ ์ฒญํฌ๋ฅผ ๊ฒ์๊ธฐ์ ์ถ๊ฐํฉ๋๋ค...")
|
75 |
+
base_retriever.add_documents(docs)
|
76 |
+
|
77 |
+
# ์ธ๋ฑ์ค ์ ์ฅ
|
78 |
+
logger.info(f"๊ฒ์๊ธฐ ์ํ๋ฅผ ์ ์ฅํฉ๋๋ค...")
|
79 |
+
index_path = app.config['INDEX_PATH']
|
80 |
+
try:
|
81 |
+
base_retriever.save(index_path)
|
82 |
+
logger.info("์ธ๋ฑ์ค ์ ์ฅ ์๋ฃ")
|
83 |
+
except Exception as e:
|
84 |
+
logger.error(f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {e}")
|
85 |
+
return jsonify({"error": f"์ธ๋ฑ์ค ์ ์ฅ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
86 |
+
|
87 |
+
return jsonify({
|
88 |
+
"success": True,
|
89 |
+
"message": f"ํ์ผ '{filename}'๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์
๋ก๋๋๊ณ {len(docs)}๊ฐ ์ฒญํฌ๊ฐ ์ถ๊ฐ๋์์ต๋๋ค."
|
90 |
+
})
|
91 |
+
else:
|
92 |
+
logger.warning(f"ํ์ผ '{filename}'์์ ์ฒ๋ฆฌํ ๋ฌธ์๊ฐ ์์ต๋๋ค.")
|
93 |
+
return jsonify({
|
94 |
+
"warning": True,
|
95 |
+
"message": f"ํ์ผ '{filename}'์ด ์ ์ฅ๋์์ง๋ง ์ฒ๋ฆฌํ ๋ด์ฉ์ด ์์ต๋๋ค."
|
96 |
+
})
|
97 |
+
|
98 |
+
except Exception as e:
|
99 |
+
logger.error(f"๋ฌธ์ '{filename}' ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
100 |
+
return jsonify({"error": f"๋ฌธ์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
101 |
+
|
102 |
+
except Exception as e:
|
103 |
+
logger.error(f"ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
104 |
+
return jsonify({"error": f"ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
105 |
+
|
106 |
+
@app.route('/api/documents', methods=['GET'])
|
107 |
+
@login_required
|
108 |
+
def list_documents():
|
109 |
+
"""์ง์๋ฒ ์ด์ค ๋ฌธ์ ๋ชฉ๋ก API"""
|
110 |
+
global base_retriever, retriever, app_ready
|
111 |
+
|
112 |
+
# ์ฑ ์ค๋น ์ํ ํ์ธ
|
113 |
+
if not app_ready:
|
114 |
+
return jsonify({"error": "์ฑ์ด ์์ง ์ด๊ธฐํ ์ค์
๋๋ค. ์ ์ ๏ฟฝ๏ฟฝ๏ฟฝ ๋ค์ ์๋ํด์ฃผ์ธ์."}), 503
|
115 |
+
|
116 |
+
try:
|
117 |
+
# ๋ฌธ์ ์์ค ๋ชฉ๋ก ์์ฑ
|
118 |
+
sources = {}
|
119 |
+
|
120 |
+
if base_retriever and base_retriever.documents:
|
121 |
+
for doc in base_retriever.documents:
|
122 |
+
source = doc.get("source", "unknown")
|
123 |
+
if source in sources:
|
124 |
+
sources[source]["chunks"] += 1
|
125 |
+
else:
|
126 |
+
sources[source] = {
|
127 |
+
"filename": doc.get("filename", source),
|
128 |
+
"chunks": 1,
|
129 |
+
"filetype": doc.get("filetype", "unknown")
|
130 |
+
}
|
131 |
+
|
132 |
+
# ๋ชฉ๋ก ํ์์ผ๋ก ๋ณํ
|
133 |
+
documents = []
|
134 |
+
for source, info in sources.items():
|
135 |
+
documents.append({
|
136 |
+
"source": source,
|
137 |
+
"filename": info["filename"],
|
138 |
+
"chunks": info["chunks"],
|
139 |
+
"filetype": info["filetype"]
|
140 |
+
})
|
141 |
+
|
142 |
+
# ์ฒญํฌ ์๋ก ์ ๋ ฌ
|
143 |
+
documents.sort(key=lambda x: x["chunks"], reverse=True)
|
144 |
+
|
145 |
+
return jsonify({
|
146 |
+
"documents": documents,
|
147 |
+
"total_documents": len(documents),
|
148 |
+
"total_chunks": sum(doc["chunks"] for doc in documents)
|
149 |
+
})
|
150 |
+
|
151 |
+
except Exception as e:
|
152 |
+
logger.error(f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True)
|
153 |
+
return jsonify({"error": f"๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค ์ค๋ฅ: {str(e)}"}), 500
|
154 |
+
|
155 |
+
# ์ ์ ํ์ผ ์๋น
|
156 |
+
@app.route('/static/<path:path>')
|
157 |
+
def send_static(path):
|
158 |
+
return send_from_directory('static', path)# ์ธ์
์ฟ ํค ์ฒ๋ฆฌ ํ์ธ ๋ฐ ์์
|
159 |
+
@app.before_request
|
160 |
+
def process_cookies():
|
161 |
+
# ์๋ ์ฟ ํค ์ฒ๋ฆฌ ํ์ธ
|
162 |
+
if 'session_data' in request.cookies and 'logged_in' not in session:
|
163 |
+
try:
|
164 |
+
cookie_data = json.loads(request.cookies.get('session_data'))
|
165 |
+
logger.info(f"\n[Before Request] ์๋ ์ฟ ํค ๊ฐ ๋ฐ๊ฒฌ: {cookie_data}")
|
166 |
+
if cookie_data.get('logged_in'):
|
167 |
+
# ์ธ์
์ฌ๊ตฌ์ฑ
|
168 |
+
session['logged_in'] = True
|
169 |
+
session['username'] = cookie_data.get('username')
|
170 |
+
logger.info(f"\n[Before Request] ์๋ ์ฟ ํค์์ ์ธ์
๋ณต์: {session}")
|
171 |
+
except Exception as e:
|
172 |
+
logger.error(f"\n[Before Request] ์ฟ ํค ์ฒ๋ฆฌ ์ค๋ฅ: {e}")# ํ๊น
ํ์ด์ค ํ๊ฒฝ์ ์ํ ์ธ์
์ฒ๋ฆฌ ํฅ์
|
173 |
+
@app.after_request
|
174 |
+
def after_request_func(response):
|
175 |
+
# ์ธ์
์ด ์์ ๋์๋์ง ํ์ธ
|
176 |
+
if session.modified:
|
177 |
+
logger.info("\n[After Request] ์ธ์
์ด ์์ ๋์์ต๋๋ค.")
|
178 |
+
logger.info(f"\ud604์ฌ ์ธ์
๋ด์ฉ: {session}")
|
179 |
+
|
180 |
+
# ์๋ต ํค๋ ๋ก๊น
|
181 |
+
logger.info("\n[Response Headers]")
|
182 |
+
for header, value in response.headers:
|
183 |
+
logger.info(f" {header}: {value}")
|
184 |
+
|
185 |
+
# ์ฟ ํค ์ค์
|
186 |
+
if 'Set-Cookie' in response.headers:
|
187 |
+
logger.info(f"Set-Cookie ํค๋ ์์: {response.headers['Set-Cookie']}")
|
188 |
+
else:
|
189 |
+
# ๋ก๊ทธ์ธ ํ ์ธ์
์ฟ ํค๊ฐ ์์ผ๋ฉด ์ธ์
๊ฐ์ ํ์ธ
|
190 |
+
if 'logged_in' in session and request.path != '/login':
|
191 |
+
logger.info("์ธ์
์ logged_in์ด ์์ง๋ง ์ฟ ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.")
|
192 |
+
|
193 |
+
# ํ๊น
ํ์ด์ค ํ๋ก์ ๊ด๋ จ ํค๋ ์ฒ๋ฆฌ
|
194 |
+
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
195 |
+
response.headers['Pragma'] = 'no-cache'
|
196 |
+
response.headers['Expires'] = '0'
|
197 |
+
|
198 |
+
return response"""
|
199 |
RAG ๊ฒ์ ์ฑ๋ด ์น ์ ํ๋ฆฌ์ผ์ด์
|
200 |
"""
|
201 |
|
|
|
220 |
# ํ๊ฒฝ ๋ณ์ ๋ก๋
|
221 |
load_dotenv()
|
222 |
|
223 |
+
# ํ๊ฒฝ ๋ณ์ ๋ก๋ ์ํ ํ์ธ ๋ฐ ๋ก๊น
|
224 |
+
ADMIN_USERNAME = os.getenv('ADMIN_USERNAME')
|
225 |
+
ADMIN_PASSWORD = os.getenv('ADMIN_PASSWORD')
|
226 |
+
|
227 |
+
logger.info(f"==== ํ๊ฒฝ ๋ณ์ ๋ก๋ ์ํ ====")
|
228 |
+
logger.info(f"ADMIN_USERNAME ์ค์ ์ฌ๋ถ: {ADMIN_USERNAME is not None}")
|
229 |
+
logger.info(f"ADMIN_PASSWORD ์ค์ ์ฌ๋ถ: {ADMIN_PASSWORD is not None}")
|
230 |
+
|
231 |
+
# ํ๊ฒฝ ๋ณ์๊ฐ ์์ผ๋ฉด ๊ธฐ๋ณธ๊ฐ ์ค์
|
232 |
+
if not ADMIN_USERNAME:
|
233 |
+
ADMIN_USERNAME = 'admin'
|
234 |
+
logger.warning("ADMIN_USERNAME ํ๊ฒฝ๋ณ์๊ฐ ์์ด ๊ธฐ๋ณธ๊ฐ 'admin'์ผ๋ก ์ค์ ํฉ๋๋ค.")
|
235 |
+
|
236 |
+
if not ADMIN_PASSWORD:
|
237 |
+
ADMIN_PASSWORD = 'rag12345'
|
238 |
+
logger.warning("ADMIN_PASSWORD ํ๊ฒฝ๋ณ์๊ฐ ์์ด ๊ธฐ๋ณธ๊ฐ 'rag12345'๋ก ์ค์ ํฉ๋๋ค.")
|
239 |
+
|
240 |
# ๋ก์ปฌ ๋ชจ๋ ์ํฌํธ
|
241 |
from utils.vito_stt import VitoSTT
|
242 |
from utils.llm_interface import LLMInterface
|
|
|
250 |
# ์ธ์
์ค์ - ๊ณ ์ ๋ ์ํฌ๋ฆฟ ํค ์ฌ์ฉ
|
251 |
app.secret_key = 'rag_chatbot_fixed_secret_key_12345' # ๊ณ ์ ๋ ์ํฌ๋ฆฟ ํค
|
252 |
|
253 |
+
# ์ธ์
์ค์ ์ถ๊ฐ - ํ๊น
ํ์ด์ค ํ๊ฒฝ์ ๋ง๊ฒ ์กฐ์
|
254 |
+
app.config['SESSION_COOKIE_SECURE'] = False # HTTP ์์๋ ์ฟ ํค ์ ์ก ๊ฐ๋ฅ
|
255 |
app.config['SESSION_COOKIE_HTTPONLY'] = True # JavaScript์์ ์ฟ ํค ์ ๊ทผ ๋ฐฉ์ง
|
256 |
+
app.config['SESSION_COOKIE_SAMESITE'] = None # ํ๊น
ํ์ด์ค ํ๋ก์ ๊ด๋ จ ์ด์ ์์
|
257 |
+
app.config['SESSION_COOKIE_DOMAIN'] = None # ๋ชจ๋ ๋๋ฉ์ธ์ ์ฟ ํค ์ ์ฉ
|
258 |
+
app.config['SESSION_COOKIE_PATH'] = '/' # ๋ชจ๋ ๊ฒฝ๋ก์ ์ฟ ํค ์ ์ฉ
|
259 |
+
app.config['PERMANENT_SESSION_LIFETIME'] = datetime.timedelta(days=1) # ์ธ์
์ ํจ ์๊ฐ ์ฆ๊ฐ
|
260 |
|
261 |
# ์ต๋ ํ์ผ ํฌ๊ธฐ ์ค์ (10MB)
|
262 |
app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024
|
|
|
296 |
logger.info(f"ํ์ฌ ์ธ์
๊ฐ์ฒด: {session}")
|
297 |
logger.info(f"ํ์ฌ ์ธ์
์ํ: logged_in={session.get('logged_in', False)}, username={session.get('username', 'None')}")
|
298 |
logger.info(f"ํ์ฌ ์ธ์
์ผ์ด: {request.cookies.get('session', 'None')}")
|
299 |
+
|
300 |
+
# ์๋ ์ฟ ํค ํ์ธ
|
301 |
+
has_manual_cookie = False
|
302 |
+
if 'session_data' in request.cookies:
|
303 |
+
try:
|
304 |
+
cookie_data = json.loads(request.cookies.get('session_data'))
|
305 |
+
logger.info(f"์๋ ์ฟ ํค ๋ฐ์ดํฐ: {cookie_data}")
|
306 |
+
has_manual_cookie = cookie_data.get('logged_in', False)
|
307 |
+
except:
|
308 |
+
pass
|
309 |
+
|
310 |
+
# ์ธ์
๋๋ ์๋ ์ฟ ํค ์ค ํ๋๋ผ๋ ์์ผ๋ฉด ์ธ์ฆ ์ฑ๊ณต
|
311 |
+
if 'logged_in' not in session and not has_manual_cookie:
|
312 |
logger.warning(f"๋น๋ก๊ทธ์ธ ์ํ์์ {request.path} ์ ๊ทผ ์๋, ๋ก๊ทธ์ธ ํ์ด์ง๋ก ๋ฆฌ๋๋ ์
")
|
313 |
return redirect(url_for('login'))
|
314 |
|
|
|
432 |
logger.info("-------------- ๋ก๊ทธ์ธ ํ์ด์ง ์ ์ --------------")
|
433 |
logger.info(f"Method: {request.method}")
|
434 |
|
435 |
+
# ๊ฒฝ๋ก ๋ก๊ทธ ์ถ๊ฐ
|
436 |
+
logger.info(f"Request Path: {request.path}")
|
437 |
+
logger.info(f"Request Host: {request.host}")
|
438 |
+
logger.info(f"Request URL: {request.url}")
|
439 |
+
|
440 |
+
# ๋ชจ๋ ํค๋ ๋ก๊ทธ
|
441 |
+
logger.info("Request Headers:")
|
442 |
+
for header, value in request.headers.items():
|
443 |
+
logger.info(f" {header}: {value}")
|
444 |
+
|
445 |
if request.method == 'POST':
|
446 |
logger.info("๋ก๊ทธ์ธ ์๋ ๋ฐ์")
|
447 |
|
|
|
452 |
# ๋น๋ฐ๋ฒํธ๋ ์ผ๋ถ ๊ฒ์ถ ํ ๋ก๊น
|
453 |
logger.info(f"๋น๋ฐ๋ฒํธ ์
๋ ฅ ์ฌ๋ถ: {len(password) > 0}")
|
454 |
|
455 |
+
# ํ๊น
ํ์ด์ค ํ๊ฒฝ๋ณ์ ์ง์ ํ์ธ
|
456 |
+
# .env์์ ๊ฐ์ ธ์ค๊ธฐ
|
457 |
+
valid_username = os.environ.get('ADMIN_USERNAME') or os.getenv('ADMIN_USERNAME')
|
458 |
+
valid_password = os.environ.get('ADMIN_PASSWORD') or os.getenv('ADMIN_PASSWORD')
|
459 |
+
|
460 |
+
# ํ๊ฒฝ๋ณ์ ๋๋ฒ๊น
|
461 |
+
logger.info("\n[Environment Variables]")
|
462 |
+
logger.info(f"ADMIN_USERNAME from os.environ: {os.environ.get('ADMIN_USERNAME')}")
|
463 |
+
logger.info(f"ADMIN_PASSWORD from os.environ: {os.environ.get('ADMIN_PASSWORD') is not None}")
|
464 |
+
logger.info(f"ADMIN_USERNAME from os.getenv: {os.getenv('ADMIN_USERNAME')}")
|
465 |
+
logger.info(f"ADMIN_PASSWORD from os.getenv: {os.getenv('ADMIN_PASSWORD') is not None}")
|
466 |
|
467 |
logger.info(f"ํ๊ฒฝ๋ณ์์์ ๊ฐ์ ธ์จ ์ฌ์ฉ์๋ช
: {valid_username if valid_username else '์ ์๋์ง ์์'}")
|
468 |
logger.info(f"ํ๊ฒฝ๋ณ์์์ ๋น๋ฐ๋ฒํธ ๊ฐ์ ธ์จ ์ฌ๋ถ: {len(valid_password) > 0 if valid_password else False}")
|
469 |
|
470 |
+
# ํ๊น
ํ์ด์ค์์ ํ๊ฒฝ๋ณ์๊ฐ ๋ก๋๋์ง ์์ ๊ฒฝ์ฐ ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ
|
471 |
+
if not valid_username or not valid_password:
|
472 |
+
logger.warning("ํ๊ฒฝ๋ณ์์์ ์ฌ์ฉ์ ์๊ฒฉ์ฆ๋ช
์ ์ฐพ์ ์ ์์ด ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ")
|
473 |
+
valid_username = "admin"
|
474 |
+
valid_password = "rag12345"
|
475 |
+
|
476 |
if username == valid_username and password == valid_password:
|
477 |
logger.info(f"๋ก๊ทธ์ธ ์ฑ๊ณต: {username}")
|
478 |
# ์ธ์
์ํ ํ์ธ ๋ฐ ๋ก๊น
|
|
|
486 |
# ์ธ์
์ค์ ํ ๋ก๊น
|
487 |
logger.info(f"์ธ์
์ค์ ํ ์ธ์
์ํ: {session}")
|
488 |
logger.info("์ธ์
์ค์ ์๋ฃ, ๋ฆฌ๋๋ ์
์๋")
|
489 |
+
|
490 |
+
# ์ธ์
์ฟ ํค ์ค์ ํ์ธ
|
491 |
+
response = redirect(url_for('index'))
|
492 |
+
logger.info(f"Response Headers: {response.headers}")
|
493 |
+
return response
|
494 |
else:
|
495 |
logger.warning("๋ก๊ทธ์ธ ์คํจ: ์์ด๋ ๋๋ ๋น๋ฐ๋ฒํธ ๋ถ์ผ์น")
|
496 |
# ์ด๋ค ๋ถ๋ถ์ด ์ผ์นํ์ง ์๋์ง ์์ธ ๋ก๊น
|
|
|
699 |
audio_file = request.files['audio']
|
700 |
logger.info(f"์์ ๋ ํ์ผ: {audio_file.filename}")
|
701 |
|
702 |
+
try:
|
703 |
+
# ์ค๋์ค ํ์ผ ์ฝ๊ธฐ
|
704 |
+
with audio_file.stream as f:
|
705 |
+
audio_bytes = f.read()
|
706 |
+
|
707 |
+
# ์์ฑ์ธ์ (VitoSTT)
|
708 |
+
stt = VitoSTT()
|
709 |
+
stt_result = stt.transcribe_audio(audio_bytes, language="ko")
|
710 |
+
|
711 |
+
if not stt_result["success"]:
|
712 |
+
logger.error(f"์์ฑ์ธ์ ์คํจ: {stt_result['error']}")
|
713 |
+
return jsonify({
|
714 |
+
"error": stt_result["error"],
|
715 |
+
"details": stt_result.get("details", "")
|
716 |
+
}), 500
|
717 |
+
|
718 |
+
transcription = stt_result["text"]
|
719 |
+
if not transcription:
|
720 |
+
logger.warning("์์ฑ์ธ์ ๊ฒฐ๊ณผ๊ฐ ๋น์ด์์ต๋๋ค.")
|
721 |
+
return jsonify({"error": "์์ฑ์์ ํ
์คํธ๋ฅผ ์ธ์ํ์ง ๋ชปํ์ต๋๋ค."}), 400
|
722 |
+
|
723 |
+
logger.info(f"์์ฑ์ธ์ ์ฑ๊ณต: {transcription[:50]}...")
|
724 |
+
|
725 |
+
# ๊ฒ์๊ธฐ ํธ์ถ: ์ธ์๋ ํ
์คํธ๋ฅผ ์ฟผ๋ฆฌ๋ก ์ฌ์ฉ
|
726 |
+
sources = retriever.search(transcription, top_k=5, first_stage_k=6)
|
727 |
+
if not sources:
|
728 |
+
logger.warning("๊ฒ์๋ ์์ค๊ฐ ์์ต๋๋ค.")
|
729 |
+
sources = []
|
730 |
+
|
731 |
+
# ์์ค ๋ฌธ์ ๋ด์ฉ์ ์ปจํ
์คํธ๋ก ์ค๋น
|
732 |
+
context = DocumentProcessor.prepare_rag_context(sources, field="text")
|
733 |
+
logger.info(f"๊ฒ์๋ ์์ค ์: {len(sources)}")
|
734 |
+
|
735 |
+
# LLM ํธ์ถ: ์ง๋ฌธ๊ณผ ์ปจํ
์คํธ๋ฅผ ๋ฐํ์ผ๋ก ์๋ต ์์ฑ
|
736 |
+
llm_id = request.form.get('llm_id', None) # ํด๋ผ์ด์ธํธ์์ LLM ์ ํ์ด ์ ๊ณต๋๋ฉด ์ฌ์ฉ
|
737 |
+
answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
|
738 |
+
|
739 |
+
# ์์ค ์ ๋ณด ์ถ์ถ
|
740 |
+
enhanced_sources = []
|
741 |
+
for doc in sources:
|
742 |
+
if "source" in doc:
|
743 |
+
source_info = {
|
744 |
+
"source": doc.get("source", "Unknown"),
|
745 |
+
"score": doc.get("rerank_score", doc.get("score", 0))
|
746 |
+
}
|
747 |
+
|
748 |
+
# CSV ํ์ผ์ธ ๊ฒฝ์ฐ ์ฒซ ๋ฒ์งธ ์ปจํ
์ธ ๋ฐ์ดํฐ๋ฅผ ์ถ์ถํ์ฌ ํ์
|
749 |
+
if "text" in doc and "filetype" in doc and doc["filetype"] == "csv":
|
750 |
+
# ๋๋ฒ๊น
๋ก๊ทธ ์ถ๊ฐ
|
751 |
+
logger.info(f"[์์ฑ์ฑ] CSV ํ์ผ ์ฒ๋ฆฌ: {doc['source']}")
|
752 |
+
logger.info(f"[์์ฑ์ฑ] CSV ๋ด์ฉ ์ฒ์ ๋ถ๋ถ: {doc['text'][:100]}...")
|
753 |
+
|
754 |
+
# ์ฒซ ๋ฒ์งธ ๋ผ์ธ์ด๋ ๋ด์ฉ์์ ์ฝค๋ผ ๊ฐ ์ถ์ถ ์๋
|
755 |
+
try:
|
756 |
+
# ํ
์คํธ์ ์ฒ์ ๋ถ๋ถ์ ์ถ์ถ
|
757 |
+
text_lines = doc["text"].strip().split('\n')
|
758 |
+
logger.info(f"[์์ฑ์ฑ] CSV ๋ผ์ธ ๊ฐ์: {len(text_lines)}")
|
759 |
+
|
760 |
+
if len(text_lines) > 0:
|
761 |
+
first_line = text_lines[0].strip()
|
762 |
+
logger.info(f"[์์ฑ์ฑ] CSV ์ฒซ ์ค: {first_line}")
|
763 |
+
|
764 |
+
if ',' in first_line: # CSV ํ์์ด๋ฉด
|
765 |
+
first_columns = first_line.split(',')
|
766 |
+
logger.info(f"[์์ฑ์ฑ] CSV ์ฝค๋ผ ๊ฐ์: {len(first_columns)}")
|
767 |
+
|
768 |
+
first_column = first_columns[0].strip()
|
769 |
+
logger.info(f"[์์ฑ์ฑ] CSV ์ฒซ ๋ฒ์งธ ์ฝค๋ผ ๊ฐ: '{first_column}'")
|
770 |
+
source_info["id"] = first_column
|
771 |
+
logger.info(f"[์์ฑ์ฑ] source_info์ id ์ถ๊ฐ: {source_info}")
|
772 |
+
else:
|
773 |
+
logger.warning(f"[์์ฑ์ฑ] CSV ํ์ผ์ด์ง๋ง ์ฝ๋ง๊ฐ ์์: {first_line}")
|
774 |
+
else:
|
775 |
+
logger.warning(f"[์์ฑ์ฑ] CSV ํ์ผ์ด์ง๋ง ๋ผ์ธ์ด ์์: {doc['source']}")
|
776 |
+
except Exception as e:
|
777 |
+
logger.warning(f"[์์ฑ์ฑ] CSV ์ฒซ ๋ฒ์งธ ์ฝค๋ผ ์ถ์ถ ์คํจ: {e}")
|
778 |
+
|
779 |
+
enhanced_sources.append(source_info)
|
780 |
+
|
781 |
+
# ์ต์ข
์๋ต ๊ตฌ์กฐ ๋ก๊น
|
782 |
+
response_data = {
|
783 |
+
"transcription": transcription,
|
784 |
+
"answer": answer,
|
785 |
+
"sources": enhanced_sources,
|
786 |
+
"llm": llm_interface.get_current_llm_details()
|
787 |
+
}
|
788 |
+
logger.debug(f"[์์ฑ์ฑ] ์ต์ข
API ์๋ต ๊ตฌ์กฐ: {json.dumps(response_data, ensure_ascii=False, indent=2)[:500]}...")
|
789 |
+
|
790 |
+
return jsonify(response_data)
|
791 |
+
|
792 |
+
except Exception as e:
|
793 |
+
logger.error(f"์์ฑ ์ฑ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", exc_info=True)
|
794 |
+
return jsonify({
|
795 |
+
"error": "์์ฑ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ ๋ฐ์",
|
796 |
+
"details": str(e)
|
797 |
+
}), 500
|
798 |
+
"""
|
799 |
+
logger.info("์์ฑ ์ฑ ์์ฒญ ์์ ")
|
800 |
+
|
801 |
+
# ์ค๋์ค ํ์ผ ํ์ธ
|
802 |
+
if 'audio' not in request.files:
|
803 |
+
logger.error("์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์")
|
804 |
+
return jsonify({"error": "์ค๋์ค ํ์ผ์ด ์ ๊ณต๋์ง ์์์ต๋๋ค."}), 400
|
805 |
+
|
806 |
+
audio_file = request.files['audio']
|
807 |
+
logger.info(f"์์ ๋ ํ์ผ: {audio_file.filename}")
|
808 |
+
|
809 |
try:
|
810 |
# ์ค๋์ค ํ์ผ ์ฝ๊ธฐ
|
811 |
with audio_file.stream as f:
|