jeongsoo commited on
Commit
6539bc3
ยท
1 Parent(s): d9301c5

Add application file

Browse files
Files changed (1) hide show
  1. app/app.py +373 -10
app/app.py CHANGED
@@ -1,4 +1,201 @@
1
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  RAG ๊ฒ€์ƒ‰ ์ฑ—๋ด‡ ์›น ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
3
  """
4
 
@@ -23,6 +220,23 @@ logger = logging.getLogger(__name__)
23
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
24
  load_dotenv()
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # ๋กœ์ปฌ ๋ชจ๋“ˆ ์ž„ํฌํŠธ
27
  from utils.vito_stt import VitoSTT
28
  from utils.llm_interface import LLMInterface
@@ -36,11 +250,13 @@ app = Flask(__name__)
36
  # ์„ธ์…˜ ์„ค์ • - ๊ณ ์ •๋œ ์‹œํฌ๋ฆฟ ํ‚ค ์‚ฌ์šฉ
37
  app.secret_key = 'rag_chatbot_fixed_secret_key_12345' # ๊ณ ์ •๋œ ์‹œํฌ๋ฆฟ ํ‚ค
38
 
39
- # ์„ธ์…˜ ์„ค์ • ์ถ”๊ฐ€
40
- app.config['SESSION_COOKIE_SECURE'] = False # HTTPS์—์„œ๋งŒ ์ฟ ํ‚ค ์ „์†ก (False๋กœ ์„ค์ •ํ•˜์—ฌ HTTP์—์„œ๋„ ์„ธ์…˜ ์‚ฌ์šฉ ๊ฐ€๋Šฅ)
41
  app.config['SESSION_COOKIE_HTTPONLY'] = True # JavaScript์—์„œ ์ฟ ํ‚ค ์ ‘๊ทผ ๋ฐฉ์ง€
42
- app.config['SESSION_COOKIE_SAMESITE'] = 'Lax' # CSRF ๋ฐฉ์ง€
43
- app.config['PERMANENT_SESSION_LIFETIME'] = datetime.timedelta(hours=5) # ์„ธ์…˜ ์œ ํšจ ์‹œ๊ฐ„
 
 
44
 
45
  # ์ตœ๋Œ€ ํŒŒ์ผ ํฌ๊ธฐ ์„ค์ • (10MB)
46
  app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024
@@ -80,7 +296,19 @@ def login_required(f):
80
  logger.info(f"ํ˜„์žฌ ์„ธ์…˜ ๊ฐ์ฒด: {session}")
81
  logger.info(f"ํ˜„์žฌ ์„ธ์…˜ ์ƒํƒœ: logged_in={session.get('logged_in', False)}, username={session.get('username', 'None')}")
82
  logger.info(f"ํ˜„์žฌ ์„ธ์…˜ ์ผ€์ด: {request.cookies.get('session', 'None')}")
83
- if 'logged_in' not in session:
 
 
 
 
 
 
 
 
 
 
 
 
84
  logger.warning(f"๋น„๋กœ๊ทธ์ธ ์ƒํƒœ์—์„œ {request.path} ์ ‘๊ทผ ์‹œ๋„, ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
85
  return redirect(url_for('login'))
86
 
@@ -204,6 +432,16 @@ def login():
204
  logger.info("-------------- ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ์ ‘์† --------------")
205
  logger.info(f"Method: {request.method}")
206
 
 
 
 
 
 
 
 
 
 
 
207
  if request.method == 'POST':
208
  logger.info("๋กœ๊ทธ์ธ ์‹œ๋„ ๋ฐ›์Œ")
209
 
@@ -214,13 +452,27 @@ def login():
214
  # ๋น„๋ฐ€๋ฒˆํ˜ธ๋Š” ์ผ๋ถ€ ๊ฒ€์ถœ ํ›„ ๋กœ๊น…
215
  logger.info(f"๋น„๋ฐ€๋ฒˆํ˜ธ ์ž…๋ ฅ ์—ฌ๋ถ€: {len(password) > 0}")
216
 
217
- # .env์—์„œ ๊ณ„์ • ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
218
- valid_username = os.getenv('ADMIN_USERNAME')
219
- valid_password = os.getenv('ADMIN_PASSWORD')
 
 
 
 
 
 
 
 
220
 
221
  logger.info(f"ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ ๊ฐ€์ ธ์˜จ ์‚ฌ์šฉ์ž๋ช…: {valid_username if valid_username else '์ •์˜๋˜์ง€ ์•Š์Œ'}")
222
  logger.info(f"ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ ๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฐ€์ ธ์˜จ ์—ฌ๋ถ€: {len(valid_password) > 0 if valid_password else False}")
223
 
 
 
 
 
 
 
224
  if username == valid_username and password == valid_password:
225
  logger.info(f"๋กœ๊ทธ์ธ ์„ฑ๊ณต: {username}")
226
  # ์„ธ์…˜ ์ƒํƒœ ํ™•์ธ ๋ฐ ๋กœ๊น…
@@ -234,7 +486,11 @@ def login():
234
  # ์„ธ์…˜ ์„ค์ • ํ›„ ๋กœ๊น…
235
  logger.info(f"์„ธ์…˜ ์„ค์ • ํ›„ ์„ธ์…˜ ์ƒํƒœ: {session}")
236
  logger.info("์„ธ์…˜ ์„ค์ • ์™„๋ฃŒ, ๋ฆฌ๋””๋ ‰์…˜ ์‹œ๋„")
237
- return redirect(url_for('index'))
 
 
 
 
238
  else:
239
  logger.warning("๋กœ๊ทธ์ธ ์‹คํŒจ: ์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ ๋ถˆ์ผ์น˜")
240
  # ์–ด๋–ค ๋ถ€๋ถ„์ด ์ผ์น˜ํ•˜์ง€ ์•Š๋Š”์ง€ ์ƒ์„ธ ๋กœ๊น…
@@ -443,6 +699,113 @@ def voice_chat():
443
  audio_file = request.files['audio']
444
  logger.info(f"์ˆ˜์‹ ๋œ ํŒŒ์ผ: {audio_file.filename}")
445
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
  try:
447
  # ์˜ค๋””์˜ค ํŒŒ์ผ ์ฝ๊ธฐ
448
  with audio_file.stream as f:
 
1
+ @app.route('/api/upload', methods=['POST'])
2
+ @login_required
3
+ def upload_document():
4
+ """์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ์—…๋กœ๋“œ API"""
5
+ global base_retriever, retriever, app_ready
6
+
7
+ # ์•ฑ ์ค€๋น„ ์ƒํƒœ ํ™•์ธ
8
+ if not app_ready:
9
+ return jsonify({"error": "์•ฑ์ด ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."}), 503
10
+
11
+ try:
12
+ # ํŒŒ์ผ์ด ์š”์ฒญ์— ํฌํ•จ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธ
13
+ if 'document' not in request.files:
14
+ return jsonify({"error": "๋ฌธ์„œ ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
15
+
16
+ doc_file = request.files['document']
17
+ logger.info(f"๋ฐ›์€ ํŒŒ์ผ๋ช…: {doc_file.filename}")
18
+
19
+ # ํŒŒ์ผ๋ช…์ด ๋น„์–ด์žˆ๋Š”์ง€ ํ™•์ธ
20
+ if doc_file.filename == '':
21
+ return jsonify({"error": "์„ ํƒ๋œ ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค."}), 400
22
+
23
+ # ํŒŒ์ผ ํ˜•์‹ ํ™•์ธ
24
+ if not allowed_doc_file(doc_file.filename):
25
+ logger.error(f"ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {doc_file.filename}")
26
+ return jsonify({"error": "ํ—ˆ์šฉ๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹์ž…๋‹ˆ๋‹ค. ํ˜„์žฌ ํ—ˆ์šฉ๋œ ํŒŒ์ผ ํ˜•์‹: {}".format(', '.join(ALLOWED_DOC_EXTENSIONS))}), 400
27
+
28
+ # ํŒŒ์ผ๋ช… ๋ณด์•ˆ ์ฒ˜๋ฆฌ
29
+ filename = secure_filename(doc_file.filename)
30
+
31
+ # ๋ฐ์ดํ„ฐ ํด๋”์— ์ €์žฅ
32
+ filepath = os.path.join(app.config['DATA_FOLDER'], filename)
33
+ doc_file.save(filepath)
34
+
35
+ logger.info(f"๋ฌธ์„œ๊ฐ€ ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค: {filepath}")
36
+
37
+ # ๋ฌธ์„œ ์ฒ˜๋ฆฌ
38
+ try:
39
+ # ๋จผ์ € UTF-8๋กœ ์‹œ๋„
40
+ try:
41
+ with open(filepath, 'r', encoding='utf-8') as f:
42
+ content = f.read()
43
+ except UnicodeDecodeError:
44
+ # UTF-8๋กœ ์‹คํŒจํ•˜๋ฉด CP949(ํ•œ๊ตญ์–ด Windows ๊ธฐ๋ณธ ์ธ์ฝ”๋”ฉ)๋กœ ์‹œ๋„
45
+ logger.info(f"UTF-8 ๋””์ฝ”๋”ฉ ์‹คํŒจ, CP949๋กœ ์‹œ๋„: {filename}")
46
+ with open(filepath, 'r', encoding='cp949') as f:
47
+ content = f.read()
48
+
49
+ # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ƒ์„ฑ
50
+ metadata = {
51
+ "source": filename,
52
+ "filename": filename,
53
+ "filetype": filename.rsplit('.', 1)[1].lower(),
54
+ "filepath": filepath
55
+ }
56
+
57
+ # ํŒŒ์ผ ํ˜•์‹์— ๋”ฐ๋ผ ๋‹ค๋ฅธ ์ฒ˜๋ฆฌ ์ ์šฉ
58
+ file_ext = filename.rsplit('.', 1)[1].lower()
59
+
60
+ # CSV ํŒŒ์ผ์€ ํ–‰ ๋‹จ์œ„๋กœ ์ฒ˜๋ฆฌ
61
+ if file_ext == 'csv':
62
+ logger.info(f"CSV ํŒŒ์ผ ์—…๋กœ๋“œ ๊ฐ์ง€, ํ–‰ ๋‹จ์œ„๋กœ ๋ถ„ํ•  ์ฒ˜๋ฆฌ: {filename}")
63
+ docs = DocumentProcessor.csv_to_documents(content, metadata)
64
+ else:
65
+ # ์ผ๋ฐ˜ ํ…์ŠคํŠธ ๋ฌธ์„œ ์ฒ˜๋ฆฌ
66
+ docs = DocumentProcessor.text_to_documents(
67
+ content,
68
+ metadata=metadata,
69
+ chunk_size=512,
70
+ chunk_overlap=50
71
+ )
72
+
73
+ if docs:
74
+ logger.info(f"{len(docs)}๊ฐœ ๋ฌธ์„œ ์ฒญํฌ๋ฅผ ๊ฒ€์ƒ‰๊ธฐ์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค...")
75
+ base_retriever.add_documents(docs)
76
+
77
+ # ์ธ๋ฑ์Šค ์ €์žฅ
78
+ logger.info(f"๊ฒ€์ƒ‰๊ธฐ ์ƒํƒœ๋ฅผ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค...")
79
+ index_path = app.config['INDEX_PATH']
80
+ try:
81
+ base_retriever.save(index_path)
82
+ logger.info("์ธ๋ฑ์Šค ์ €์žฅ ์™„๋ฃŒ")
83
+ except Exception as e:
84
+ logger.error(f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
85
+ return jsonify({"error": f"์ธ๋ฑ์Šค ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
86
+
87
+ return jsonify({
88
+ "success": True,
89
+ "message": f"ํŒŒ์ผ '{filename}'๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜๊ณ  {len(docs)}๊ฐœ ์ฒญํฌ๊ฐ€ ์ถ”๊ฐ€๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
90
+ })
91
+ else:
92
+ logger.warning(f"ํŒŒ์ผ '{filename}'์—์„œ ์ฒ˜๋ฆฌํ•  ๋ฌธ์„œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
93
+ return jsonify({
94
+ "warning": True,
95
+ "message": f"ํŒŒ์ผ '{filename}'์ด ์ €์žฅ๋˜์—ˆ์ง€๋งŒ ์ฒ˜๋ฆฌํ•  ๋‚ด์šฉ์ด ์—†์Šต๋‹ˆ๋‹ค."
96
+ })
97
+
98
+ except Exception as e:
99
+ logger.error(f"๋ฌธ์„œ '{filename}' ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
100
+ return jsonify({"error": f"๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
101
+
102
+ except Exception as e:
103
+ logger.error(f"ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
104
+ return jsonify({"error": f"ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
105
+
106
+ @app.route('/api/documents', methods=['GET'])
107
+ @login_required
108
+ def list_documents():
109
+ """์ง€์‹๋ฒ ์ด์Šค ๋ฌธ์„œ ๋ชฉ๋ก API"""
110
+ global base_retriever, retriever, app_ready
111
+
112
+ # ์•ฑ ์ค€๋น„ ์ƒํƒœ ํ™•์ธ
113
+ if not app_ready:
114
+ return jsonify({"error": "์•ฑ์ด ์•„์ง ์ดˆ๊ธฐํ™” ์ค‘์ž…๋‹ˆ๋‹ค. ์ž ์‹œ ๏ฟฝ๏ฟฝ๏ฟฝ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."}), 503
115
+
116
+ try:
117
+ # ๋ฌธ์„œ ์†Œ์Šค ๋ชฉ๋ก ์ƒ์„ฑ
118
+ sources = {}
119
+
120
+ if base_retriever and base_retriever.documents:
121
+ for doc in base_retriever.documents:
122
+ source = doc.get("source", "unknown")
123
+ if source in sources:
124
+ sources[source]["chunks"] += 1
125
+ else:
126
+ sources[source] = {
127
+ "filename": doc.get("filename", source),
128
+ "chunks": 1,
129
+ "filetype": doc.get("filetype", "unknown")
130
+ }
131
+
132
+ # ๋ชฉ๋ก ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
133
+ documents = []
134
+ for source, info in sources.items():
135
+ documents.append({
136
+ "source": source,
137
+ "filename": info["filename"],
138
+ "chunks": info["chunks"],
139
+ "filetype": info["filetype"]
140
+ })
141
+
142
+ # ์ฒญํฌ ์ˆ˜๋กœ ์ •๋ ฌ
143
+ documents.sort(key=lambda x: x["chunks"], reverse=True)
144
+
145
+ return jsonify({
146
+ "documents": documents,
147
+ "total_documents": len(documents),
148
+ "total_chunks": sum(doc["chunks"] for doc in documents)
149
+ })
150
+
151
+ except Exception as e:
152
+ logger.error(f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
153
+ return jsonify({"error": f"๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"}), 500
154
+
155
+ # ์ •์  ํŒŒ์ผ ์„œ๋น™
156
+ @app.route('/static/<path:path>')
157
+ def send_static(path):
158
+ return send_from_directory('static', path)# ์„ธ์…˜ ์ฟ ํ‚ค ์ฒ˜๋ฆฌ ํ™•์ธ ๋ฐ ์ˆ˜์ •
159
+ @app.before_request
160
+ def process_cookies():
161
+ # ์ˆ˜๋™ ์ฟ ํ‚ค ์ฒ˜๋ฆฌ ํ™•์ธ
162
+ if 'session_data' in request.cookies and 'logged_in' not in session:
163
+ try:
164
+ cookie_data = json.loads(request.cookies.get('session_data'))
165
+ logger.info(f"\n[Before Request] ์ˆ˜๋™ ์ฟ ํ‚ค ๊ฐ’ ๋ฐœ๊ฒฌ: {cookie_data}")
166
+ if cookie_data.get('logged_in'):
167
+ # ์„ธ์…˜ ์žฌ๊ตฌ์„ฑ
168
+ session['logged_in'] = True
169
+ session['username'] = cookie_data.get('username')
170
+ logger.info(f"\n[Before Request] ์ˆ˜๋™ ์ฟ ํ‚ค์—์„œ ์„ธ์…˜ ๋ณต์›: {session}")
171
+ except Exception as e:
172
+ logger.error(f"\n[Before Request] ์ฟ ํ‚ค ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")# ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์„ ์œ„ํ•œ ์„ธ์…˜ ์ฒ˜๋ฆฌ ํ–ฅ์ƒ
173
+ @app.after_request
174
+ def after_request_func(response):
175
+ # ์„ธ์…˜์ด ์ˆ˜์ •๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
176
+ if session.modified:
177
+ logger.info("\n[After Request] ์„ธ์…˜์ด ์ˆ˜์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
178
+ logger.info(f"\ud604์žฌ ์„ธ์…˜ ๋‚ด์šฉ: {session}")
179
+
180
+ # ์‘๋‹ต ํ—ค๋” ๋กœ๊น…
181
+ logger.info("\n[Response Headers]")
182
+ for header, value in response.headers:
183
+ logger.info(f" {header}: {value}")
184
+
185
+ # ์ฟ ํ‚ค ์„ค์ •
186
+ if 'Set-Cookie' in response.headers:
187
+ logger.info(f"Set-Cookie ํ—ค๋” ์žˆ์Œ: {response.headers['Set-Cookie']}")
188
+ else:
189
+ # ๋กœ๊ทธ์ธ ํ›„ ์„ธ์…˜ ์ฟ ํ‚ค๊ฐ€ ์—†์œผ๋ฉด ์„ธ์…˜ ๊ฐ’์„ ํ™•์ธ
190
+ if 'logged_in' in session and request.path != '/login':
191
+ logger.info("์„ธ์…˜์— logged_in์ด ์žˆ์ง€๋งŒ ์ฟ ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
192
+
193
+ # ํ—ˆ๊น…ํŽ˜์ด์Šค ํ”„๋ก์‹œ ๊ด€๋ จ ํ—ค๋” ์ฒ˜๋ฆฌ
194
+ response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
195
+ response.headers['Pragma'] = 'no-cache'
196
+ response.headers['Expires'] = '0'
197
+
198
+ return response"""
199
  RAG ๊ฒ€์ƒ‰ ์ฑ—๋ด‡ ์›น ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
200
  """
201
 
 
220
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
221
  load_dotenv()
222
 
223
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ ์ƒํƒœ ํ™•์ธ ๋ฐ ๋กœ๊น…
224
+ ADMIN_USERNAME = os.getenv('ADMIN_USERNAME')
225
+ ADMIN_PASSWORD = os.getenv('ADMIN_PASSWORD')
226
+
227
+ logger.info(f"==== ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ ์ƒํƒœ ====")
228
+ logger.info(f"ADMIN_USERNAME ์„ค์ • ์—ฌ๋ถ€: {ADMIN_USERNAME is not None}")
229
+ logger.info(f"ADMIN_PASSWORD ์„ค์ • ์—ฌ๋ถ€: {ADMIN_PASSWORD is not None}")
230
+
231
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์—†์œผ๋ฉด ๊ธฐ๋ณธ๊ฐ’ ์„ค์ •
232
+ if not ADMIN_USERNAME:
233
+ ADMIN_USERNAME = 'admin'
234
+ logger.warning("ADMIN_USERNAME ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์—†์–ด ๊ธฐ๋ณธ๊ฐ’ 'admin'์œผ๋กœ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.")
235
+
236
+ if not ADMIN_PASSWORD:
237
+ ADMIN_PASSWORD = 'rag12345'
238
+ logger.warning("ADMIN_PASSWORD ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์—†์–ด ๊ธฐ๋ณธ๊ฐ’ 'rag12345'๋กœ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.")
239
+
240
  # ๋กœ์ปฌ ๋ชจ๋“ˆ ์ž„ํฌํŠธ
241
  from utils.vito_stt import VitoSTT
242
  from utils.llm_interface import LLMInterface
 
250
  # ์„ธ์…˜ ์„ค์ • - ๊ณ ์ •๋œ ์‹œํฌ๋ฆฟ ํ‚ค ์‚ฌ์šฉ
251
  app.secret_key = 'rag_chatbot_fixed_secret_key_12345' # ๊ณ ์ •๋œ ์‹œํฌ๋ฆฟ ํ‚ค
252
 
253
+ # ์„ธ์…˜ ์„ค์ • ์ถ”๊ฐ€ - ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์— ๋งž๊ฒŒ ์กฐ์ •
254
+ app.config['SESSION_COOKIE_SECURE'] = False # HTTP ์—์„œ๋„ ์ฟ ํ‚ค ์ „์†ก ๊ฐ€๋Šฅ
255
  app.config['SESSION_COOKIE_HTTPONLY'] = True # JavaScript์—์„œ ์ฟ ํ‚ค ์ ‘๊ทผ ๋ฐฉ์ง€
256
+ app.config['SESSION_COOKIE_SAMESITE'] = None # ํ—ˆ๊น…ํŽ˜์ด์Šค ํ”„๋กœ์‹œ ๊ด€๋ จ ์ด์Šˆ ์ˆ˜์ •
257
+ app.config['SESSION_COOKIE_DOMAIN'] = None # ๋ชจ๋“  ๋„๋ฉ”์ธ์— ์ฟ ํ‚ค ์ ์šฉ
258
+ app.config['SESSION_COOKIE_PATH'] = '/' # ๋ชจ๋“  ๊ฒฝ๋กœ์— ์ฟ ํ‚ค ์ ์šฉ
259
+ app.config['PERMANENT_SESSION_LIFETIME'] = datetime.timedelta(days=1) # ์„ธ์…˜ ์œ ํšจ ์‹œ๊ฐ„ ์ฆ๊ฐ€
260
 
261
  # ์ตœ๋Œ€ ํŒŒ์ผ ํฌ๊ธฐ ์„ค์ • (10MB)
262
  app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024
 
296
  logger.info(f"ํ˜„์žฌ ์„ธ์…˜ ๊ฐ์ฒด: {session}")
297
  logger.info(f"ํ˜„์žฌ ์„ธ์…˜ ์ƒํƒœ: logged_in={session.get('logged_in', False)}, username={session.get('username', 'None')}")
298
  logger.info(f"ํ˜„์žฌ ์„ธ์…˜ ์ผ€์ด: {request.cookies.get('session', 'None')}")
299
+
300
+ # ์ˆ˜๋™ ์ฟ ํ‚ค ํ™•์ธ
301
+ has_manual_cookie = False
302
+ if 'session_data' in request.cookies:
303
+ try:
304
+ cookie_data = json.loads(request.cookies.get('session_data'))
305
+ logger.info(f"์ˆ˜๋™ ์ฟ ํ‚ค ๋ฐ์ดํ„ฐ: {cookie_data}")
306
+ has_manual_cookie = cookie_data.get('logged_in', False)
307
+ except:
308
+ pass
309
+
310
+ # ์„ธ์…˜ ๋˜๋Š” ์ˆ˜๋™ ์ฟ ํ‚ค ์ค‘ ํ•˜๋‚˜๋ผ๋„ ์žˆ์œผ๋ฉด ์ธ์ฆ ์„ฑ๊ณต
311
+ if 'logged_in' not in session and not has_manual_cookie:
312
  logger.warning(f"๋น„๋กœ๊ทธ์ธ ์ƒํƒœ์—์„œ {request.path} ์ ‘๊ทผ ์‹œ๋„, ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€๋กœ ๋ฆฌ๋””๋ ‰์…˜")
313
  return redirect(url_for('login'))
314
 
 
432
  logger.info("-------------- ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ์ ‘์† --------------")
433
  logger.info(f"Method: {request.method}")
434
 
435
+ # ๊ฒฝ๋กœ ๋กœ๊ทธ ์ถ”๊ฐ€
436
+ logger.info(f"Request Path: {request.path}")
437
+ logger.info(f"Request Host: {request.host}")
438
+ logger.info(f"Request URL: {request.url}")
439
+
440
+ # ๋ชจ๋“  ํ—ค๋” ๋กœ๊ทธ
441
+ logger.info("Request Headers:")
442
+ for header, value in request.headers.items():
443
+ logger.info(f" {header}: {value}")
444
+
445
  if request.method == 'POST':
446
  logger.info("๋กœ๊ทธ์ธ ์‹œ๋„ ๋ฐ›์Œ")
447
 
 
452
  # ๋น„๋ฐ€๋ฒˆํ˜ธ๋Š” ์ผ๋ถ€ ๊ฒ€์ถœ ํ›„ ๋กœ๊น…
453
  logger.info(f"๋น„๋ฐ€๋ฒˆํ˜ธ ์ž…๋ ฅ ์—ฌ๋ถ€: {len(password) > 0}")
454
 
455
+ # ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์ง์ ‘ ํ™•์ธ
456
+ # .env์—์„œ ๊ฐ€์ ธ์˜ค๊ธฐ
457
+ valid_username = os.environ.get('ADMIN_USERNAME') or os.getenv('ADMIN_USERNAME')
458
+ valid_password = os.environ.get('ADMIN_PASSWORD') or os.getenv('ADMIN_PASSWORD')
459
+
460
+ # ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋””๋ฒ„๊น…
461
+ logger.info("\n[Environment Variables]")
462
+ logger.info(f"ADMIN_USERNAME from os.environ: {os.environ.get('ADMIN_USERNAME')}")
463
+ logger.info(f"ADMIN_PASSWORD from os.environ: {os.environ.get('ADMIN_PASSWORD') is not None}")
464
+ logger.info(f"ADMIN_USERNAME from os.getenv: {os.getenv('ADMIN_USERNAME')}")
465
+ logger.info(f"ADMIN_PASSWORD from os.getenv: {os.getenv('ADMIN_PASSWORD') is not None}")
466
 
467
  logger.info(f"ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ ๊ฐ€์ ธ์˜จ ์‚ฌ์šฉ์ž๋ช…: {valid_username if valid_username else '์ •์˜๋˜์ง€ ์•Š์Œ'}")
468
  logger.info(f"ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ ๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฐ€์ ธ์˜จ ์—ฌ๋ถ€: {len(valid_password) > 0 if valid_password else False}")
469
 
470
+ # ํ—ˆ๊น…ํŽ˜์ด์Šค์—์„œ ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ๋กœ๋“œ๋˜์ง€ ์•Š์„ ๊ฒฝ์šฐ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
471
+ if not valid_username or not valid_password:
472
+ logger.warning("ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ ์‚ฌ์šฉ์ž ์ž๊ฒฉ์ฆ๋ช…์„ ์ฐพ์„ ์ˆ˜ ์—†์–ด ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ")
473
+ valid_username = "admin"
474
+ valid_password = "rag12345"
475
+
476
  if username == valid_username and password == valid_password:
477
  logger.info(f"๋กœ๊ทธ์ธ ์„ฑ๊ณต: {username}")
478
  # ์„ธ์…˜ ์ƒํƒœ ํ™•์ธ ๋ฐ ๋กœ๊น…
 
486
  # ์„ธ์…˜ ์„ค์ • ํ›„ ๋กœ๊น…
487
  logger.info(f"์„ธ์…˜ ์„ค์ • ํ›„ ์„ธ์…˜ ์ƒํƒœ: {session}")
488
  logger.info("์„ธ์…˜ ์„ค์ • ์™„๋ฃŒ, ๋ฆฌ๋””๋ ‰์…˜ ์‹œ๋„")
489
+
490
+ # ์„ธ์…˜ ์ฟ ํ‚ค ์„ค์ • ํ™•์ธ
491
+ response = redirect(url_for('index'))
492
+ logger.info(f"Response Headers: {response.headers}")
493
+ return response
494
  else:
495
  logger.warning("๋กœ๊ทธ์ธ ์‹คํŒจ: ์•„์ด๋”” ๋˜๋Š” ๋น„๋ฐ€๋ฒˆํ˜ธ ๋ถˆ์ผ์น˜")
496
  # ์–ด๋–ค ๋ถ€๋ถ„์ด ์ผ์น˜ํ•˜์ง€ ์•Š๋Š”์ง€ ์ƒ์„ธ ๋กœ๊น…
 
699
  audio_file = request.files['audio']
700
  logger.info(f"์ˆ˜์‹ ๋œ ํŒŒ์ผ: {audio_file.filename}")
701
 
702
+ try:
703
+ # ์˜ค๋””์˜ค ํŒŒ์ผ ์ฝ๊ธฐ
704
+ with audio_file.stream as f:
705
+ audio_bytes = f.read()
706
+
707
+ # ์Œ์„ฑ์ธ์‹ (VitoSTT)
708
+ stt = VitoSTT()
709
+ stt_result = stt.transcribe_audio(audio_bytes, language="ko")
710
+
711
+ if not stt_result["success"]:
712
+ logger.error(f"์Œ์„ฑ์ธ์‹ ์‹คํŒจ: {stt_result['error']}")
713
+ return jsonify({
714
+ "error": stt_result["error"],
715
+ "details": stt_result.get("details", "")
716
+ }), 500
717
+
718
+ transcription = stt_result["text"]
719
+ if not transcription:
720
+ logger.warning("์Œ์„ฑ์ธ์‹ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค.")
721
+ return jsonify({"error": "์Œ์„ฑ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ธ์‹ํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."}), 400
722
+
723
+ logger.info(f"์Œ์„ฑ์ธ์‹ ์„ฑ๊ณต: {transcription[:50]}...")
724
+
725
+ # ๊ฒ€์ƒ‰๊ธฐ ํ˜ธ์ถœ: ์ธ์‹๋œ ํ…์ŠคํŠธ๋ฅผ ์ฟผ๋ฆฌ๋กœ ์‚ฌ์šฉ
726
+ sources = retriever.search(transcription, top_k=5, first_stage_k=6)
727
+ if not sources:
728
+ logger.warning("๊ฒ€์ƒ‰๋œ ์†Œ์Šค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
729
+ sources = []
730
+
731
+ # ์†Œ์Šค ๋ฌธ์„œ ๋‚ด์šฉ์„ ์ปจํ…์ŠคํŠธ๋กœ ์ค€๋น„
732
+ context = DocumentProcessor.prepare_rag_context(sources, field="text")
733
+ logger.info(f"๊ฒ€์ƒ‰๋œ ์†Œ์Šค ์ˆ˜: {len(sources)}")
734
+
735
+ # LLM ํ˜ธ์ถœ: ์งˆ๋ฌธ๊ณผ ์ปจํ…์ŠคํŠธ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์‘๋‹ต ์ƒ์„ฑ
736
+ llm_id = request.form.get('llm_id', None) # ํด๋ผ์ด์–ธํŠธ์—์„œ LLM ์„ ํƒ์ด ์ œ๊ณต๋˜๋ฉด ์‚ฌ์šฉ
737
+ answer = llm_interface.rag_generate(transcription, context, llm_id=llm_id)
738
+
739
+ # ์†Œ์Šค ์ •๋ณด ์ถ”์ถœ
740
+ enhanced_sources = []
741
+ for doc in sources:
742
+ if "source" in doc:
743
+ source_info = {
744
+ "source": doc.get("source", "Unknown"),
745
+ "score": doc.get("rerank_score", doc.get("score", 0))
746
+ }
747
+
748
+ # CSV ํŒŒ์ผ์ธ ๊ฒฝ์šฐ ์ฒซ ๋ฒˆ์งธ ์ปจํ…์ธ  ๋ฐ์ดํ„ฐ๋ฅผ ์ถ”์ถœํ•˜์—ฌ ํ‘œ์‹œ
749
+ if "text" in doc and "filetype" in doc and doc["filetype"] == "csv":
750
+ # ๋””๋ฒ„๊น… ๋กœ๊ทธ ์ถ”๊ฐ€
751
+ logger.info(f"[์Œ์„ฑ์ฑ—] CSV ํŒŒ์ผ ์ฒ˜๋ฆฌ: {doc['source']}")
752
+ logger.info(f"[์Œ์„ฑ์ฑ—] CSV ๋‚ด์šฉ ์ฒ˜์Œ ๋ถ€๋ถ„: {doc['text'][:100]}...")
753
+
754
+ # ์ฒซ ๋ฒˆ์งธ ๋ผ์ธ์ด๋‚˜ ๋‚ด์šฉ์—์„œ ์ฝค๋Ÿผ ๊ฐ’ ์ถ”์ถœ ์‹œ๋„
755
+ try:
756
+ # ํ…์ŠคํŠธ์˜ ์ฒ˜์Œ ๋ถ€๋ถ„์„ ์ถ”์ถœ
757
+ text_lines = doc["text"].strip().split('\n')
758
+ logger.info(f"[์Œ์„ฑ์ฑ—] CSV ๋ผ์ธ ๊ฐœ์ˆ˜: {len(text_lines)}")
759
+
760
+ if len(text_lines) > 0:
761
+ first_line = text_lines[0].strip()
762
+ logger.info(f"[์Œ์„ฑ์ฑ—] CSV ์ฒซ ์ค„: {first_line}")
763
+
764
+ if ',' in first_line: # CSV ํ˜•์‹์ด๋ฉด
765
+ first_columns = first_line.split(',')
766
+ logger.info(f"[์Œ์„ฑ์ฑ—] CSV ์ฝค๋Ÿผ ๊ฐœ์ˆ˜: {len(first_columns)}")
767
+
768
+ first_column = first_columns[0].strip()
769
+ logger.info(f"[์Œ์„ฑ์ฑ—] CSV ์ฒซ ๋ฒˆ์งธ ์ฝค๋Ÿผ ๊ฐ’: '{first_column}'")
770
+ source_info["id"] = first_column
771
+ logger.info(f"[์Œ์„ฑ์ฑ—] source_info์— id ์ถ”๊ฐ€: {source_info}")
772
+ else:
773
+ logger.warning(f"[์Œ์„ฑ์ฑ—] CSV ํŒŒ์ผ์ด์ง€๋งŒ ์ฝ”๋งˆ๊ฐ€ ์—†์Œ: {first_line}")
774
+ else:
775
+ logger.warning(f"[์Œ์„ฑ์ฑ—] CSV ํŒŒ์ผ์ด์ง€๋งŒ ๋ผ์ธ์ด ์—†์Œ: {doc['source']}")
776
+ except Exception as e:
777
+ logger.warning(f"[์Œ์„ฑ์ฑ—] CSV ์ฒซ ๋ฒˆ์งธ ์ฝค๋Ÿผ ์ถ”์ถœ ์‹คํŒจ: {e}")
778
+
779
+ enhanced_sources.append(source_info)
780
+
781
+ # ์ตœ์ข… ์‘๋‹ต ๊ตฌ์กฐ ๋กœ๊น…
782
+ response_data = {
783
+ "transcription": transcription,
784
+ "answer": answer,
785
+ "sources": enhanced_sources,
786
+ "llm": llm_interface.get_current_llm_details()
787
+ }
788
+ logger.debug(f"[์Œ์„ฑ์ฑ—] ์ตœ์ข… API ์‘๋‹ต ๊ตฌ์กฐ: {json.dumps(response_data, ensure_ascii=False, indent=2)[:500]}...")
789
+
790
+ return jsonify(response_data)
791
+
792
+ except Exception as e:
793
+ logger.error(f"์Œ์„ฑ ์ฑ— ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", exc_info=True)
794
+ return jsonify({
795
+ "error": "์Œ์„ฑ ์ฒ˜๋ฆฌ ์ค‘ ๋‚ด๋ถ€ ์˜ค๋ฅ˜ ๋ฐœ์ƒ",
796
+ "details": str(e)
797
+ }), 500
798
+ """
799
+ logger.info("์Œ์„ฑ ์ฑ— ์š”์ฒญ ์ˆ˜์‹ ")
800
+
801
+ # ์˜ค๋””์˜ค ํŒŒ์ผ ํ™•์ธ
802
+ if 'audio' not in request.files:
803
+ logger.error("์˜ค๋””์˜ค ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์Œ")
804
+ return jsonify({"error": "์˜ค๋””์˜ค ํŒŒ์ผ์ด ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}), 400
805
+
806
+ audio_file = request.files['audio']
807
+ logger.info(f"์ˆ˜์‹ ๋œ ํŒŒ์ผ: {audio_file.filename}")
808
+
809
  try:
810
  # ์˜ค๋””์˜ค ํŒŒ์ผ ์ฝ๊ธฐ
811
  with audio_file.stream as f: