Spaces:
Sleeping
Sleeping
Update streamlit_app.py
Browse files- streamlit_app.py +250 -180
streamlit_app.py
CHANGED
@@ -3,18 +3,29 @@
|
|
3 |
|
4 |
import streamlit as st
|
5 |
import pandas as pd
|
6 |
-
import numpy
|
7 |
import pickle
|
8 |
import google.generativeai as genai
|
9 |
import time
|
10 |
import json
|
11 |
import os
|
|
|
12 |
from datetime import datetime
|
13 |
|
14 |
# ํ๊ฒฝ ์ค์ (๊ถํ ๋ฌธ์ ํด๊ฒฐ)
|
15 |
os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false'
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# ํ์ด์ง ์ค์
|
20 |
st.set_page_config(
|
@@ -29,9 +40,30 @@ st.title("โจ AI ์นดํผ๋ผ์ดํฐ")
|
|
29 |
st.markdown("### ๐ฏ 37,671๊ฐ ์ค์ ๊ด๊ณ ์นดํผ ๋ฐ์ดํฐ ๊ธฐ๋ฐ RAG ์์คํ
")
|
30 |
st.markdown("---")
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
# ์ฌ์ด๋๋ฐ ์ค์
|
33 |
st.sidebar.header("๐๏ธ ์นดํผ ์์ฑ ์ค์ ")
|
34 |
|
|
|
35 |
# API ํค ์
๋ ฅ (ํ๊ฒฝ๋ณ์ ์ฐ์ ์ฌ์ฉ)
|
36 |
default_api_key = os.getenv("GEMINI_API_KEY", "")
|
37 |
|
@@ -52,6 +84,21 @@ if not api_key:
|
|
52 |
def load_system():
|
53 |
"""์์คํ
์ปดํฌ๋ํธ ๋ก๋ฉ - ์๋ฒ ๋ฉ ๊ธฐ๋ฐ RAG ์์คํ
"""
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
progress_container = st.container()
|
56 |
|
57 |
with progress_container:
|
@@ -63,7 +110,7 @@ def load_system():
|
|
63 |
status_text.text("๐ Gemini API ์ด๊ธฐํ ์ค...")
|
64 |
try:
|
65 |
genai.configure(api_key=api_key)
|
66 |
-
|
67 |
total_progress.progress(10)
|
68 |
st.success("โ
Gemini API ์ค์ ์๋ฃ")
|
69 |
except Exception as e:
|
@@ -72,21 +119,15 @@ def load_system():
|
|
72 |
|
73 |
# 2๋จ๊ณ: ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ (40%)
|
74 |
status_text.text("๐ค ํ๊ตญ์ด ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์ค... (1-2๋ถ ์์)")
|
75 |
-
|
76 |
|
77 |
-
# ์์ ์ ์ธ ๋ชจ๋ธ ๋ก๋ฉ ์ ๋ต
|
78 |
try:
|
79 |
-
#
|
80 |
-
os.makedirs('/tmp/sentence_transformers', exist_ok=True)
|
81 |
-
os.makedirs('/tmp/transformers', exist_ok=True)
|
82 |
-
|
83 |
-
# sentence-transformers ์ํฌํธ๋ฅผ ํจ์ ๋ด์์
|
84 |
from sentence_transformers import SentenceTransformer
|
85 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
cache_folder='/tmp/sentence_transformers')
|
90 |
total_progress.progress(40)
|
91 |
st.success("โ
ํ๊ตญ์ด ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ")
|
92 |
|
@@ -97,57 +138,78 @@ def load_system():
|
|
97 |
|
98 |
# 3๋จ๊ณ: ๋ฐ์ดํฐ ๋ก๋ (60%)
|
99 |
status_text.text("๐ ์นดํผ ๋ฐ์ดํฐ๋ฒ ์ด์ค ๋ก๋ฉ ์ค...")
|
|
|
100 |
try:
|
101 |
-
|
102 |
total_progress.progress(60)
|
103 |
-
st.success(f"โ
๋ฐ์ดํฐ ๋ก๋ฉ ์๋ฃ: {len(
|
104 |
except Exception as e:
|
105 |
st.error(f"โ ๋ฐ์ดํฐ ๋ก๋ฉ ์คํจ: {e}")
|
106 |
return None, None, None, None
|
107 |
|
108 |
# 4๋จ๊ณ: ์๋ฒ ๋ฉ ๋ฐ์ดํฐ ๋ก๋ (90%) - ์ด๊ฒ ํต์ฌ!
|
109 |
status_text.text("๐ ๋ฒกํฐ ์๋ฒ ๋ฉ ๋ก๋ฉ ์ค... (RAG ์์คํ
ํต์ฌ)")
|
|
|
110 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
with open('copy_embeddings.pkl', 'rb') as f:
|
112 |
embeddings_data = pickle.load(f)
|
113 |
-
|
114 |
total_progress.progress(90)
|
115 |
-
st.success(f"โ
์๋ฒ ๋ฉ ๋ก๋ฉ ์๋ฃ: {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
except Exception as e:
|
117 |
-
st.error(f"โ ์๋ฒ ๋ฉ ๋ก๋ฉ
|
118 |
st.error("๐จ ์๋ฒ ๋ฉ ์์ด๋ ์๋ฏธ์ ๊ฒ์์ด ๋ถ๊ฐ๋ฅํฉ๋๋ค!")
|
119 |
return None, None, None, None
|
120 |
|
121 |
# 5๋จ๊ณ: ์ต์ข
๊ฒ์ฆ (100%)
|
122 |
status_text.text("โจ ์์คํ
๊ฒ์ฆ ์ค...")
|
123 |
-
if
|
124 |
total_progress.progress(100)
|
125 |
status_text.text("๐ RAG ์์คํ
๋ก๋ฉ ์๋ฃ!")
|
126 |
|
127 |
-
# ์ฑ๊ณต ๋ฉ์์ง
|
128 |
success_col1, success_col2, success_col3 = st.columns(3)
|
129 |
with success_col1:
|
130 |
-
st.metric("์นดํผ ๋ฐ์ดํฐ", f"{len(
|
131 |
with success_col2:
|
132 |
-
st.metric("์๋ฒ ๋ฉ ์ฐจ์", f"{
|
133 |
with success_col3:
|
134 |
st.metric("๊ฒ์ ์์ง", "Korean SBERT")
|
135 |
|
136 |
-
# ์งํ๋ฅ ๋ฐ ์ ๊ฑฐ
|
137 |
time.sleep(1)
|
138 |
total_progress.empty()
|
139 |
status_text.empty()
|
140 |
|
141 |
-
|
|
|
142 |
else:
|
143 |
st.error("โ ์์คํ
๋ก๋ฉ ์คํจ: ํ์ ๊ตฌ์ฑ์์ ๋๋ฝ")
|
144 |
return None, None, None, None
|
145 |
|
146 |
-
# ์์คํ
๋ก๋ฉ
|
|
|
147 |
with st.spinner("๐ AI ์นดํผ๋ผ์ดํฐ ์์คํ
์ด๊ธฐ๏ฟฝ๏ฟฝ ์ค..."):
|
148 |
-
|
149 |
|
150 |
-
if
|
151 |
st.error("โ ์์คํ
์ ๋ก๋ฉํ ์ ์์ต๋๋ค. ํ์ด์ง๋ฅผ ์๋ก๊ณ ์นจํ๊ฑฐ๋ ๊ด๋ฆฌ์์๊ฒ ๋ฌธ์ํ์ธ์.")
|
152 |
st.stop()
|
153 |
|
@@ -155,7 +217,7 @@ if model is None or embedding_model is None or df is None or embeddings is None:
|
|
155 |
st.sidebar.success("๐ RAG ์์คํ
์ค๋น ์๋ฃ!")
|
156 |
|
157 |
# ์นดํ
๊ณ ๋ฆฌ ์ ํ
|
158 |
-
categories = ['์ ์ฒด'] + sorted(
|
159 |
selected_category = st.sidebar.selectbox(
|
160 |
"๐ ์นดํ
๊ณ ๋ฆฌ",
|
161 |
categories,
|
@@ -187,21 +249,22 @@ creative_level = st.sidebar.select_slider(
|
|
187 |
# ๋ฉ์ธ ์
๋ ฅ ์์ญ
|
188 |
st.markdown("## ๐ญ ์ด๋ค ์นดํผ๋ฅผ ๋ง๋ค๊ณ ์ถ์ผ์ ๊ฐ์?")
|
189 |
|
190 |
-
|
191 |
input_method = st.radio(
|
192 |
"์
๋ ฅ ๋ฐฉ์ ์ ํ:",
|
193 |
["์ง์ ์
๋ ฅ", "ํ
ํ๋ฆฟ ์ ํ"],
|
194 |
-
horizontal=True
|
|
|
195 |
)
|
196 |
|
197 |
if input_method == "์ง์ ์
๋ ฅ":
|
198 |
user_request = st.text_area(
|
199 |
"์นดํผ ์์ฒญ์ ์์ธํ ์์ฑํด์ฃผ์ธ์:",
|
200 |
placeholder="์: 30๋ ์ง์ฅ ์ฌ์ฑ์ฉ ํ๋ฆฌ๋ฏธ์ ์คํจ์ผ์ด ์ ์ ํ ๋ฐ์นญ ์นดํผ",
|
201 |
-
height=100
|
|
|
202 |
)
|
203 |
else:
|
204 |
-
# ํ
ํ๋ฆฟ ์ ํ
|
205 |
templates = {
|
206 |
"์ ์ ํ ๋ฐ์นญ": "๋์ {์นดํ
๊ณ ๋ฆฌ} ์ ์ ํ ๋ฐ์นญ ์นดํผ",
|
207 |
"ํ ์ธ ์ด๋ฒคํธ": "{์นดํ
๊ณ ๋ฆฌ} ํ ์ธ ์ด๋ฒคํธ ํ๋ก๋ชจ์
์นดํผ",
|
@@ -209,194 +272,214 @@ else:
|
|
209 |
"์ฑ/์๋น์ค ๋ฆฌ๋ด์ผ": "{์๋น์ค๋ช
} ์ ๋ฒ์ ์ถ์ ์นดํผ",
|
210 |
"์์ฆ ํ์ ": "{์์ฆ} ํ์ {์นดํ
๊ณ ๋ฆฌ} ํน๋ณ ์๋์
์นดํผ"
|
211 |
}
|
212 |
-
|
213 |
-
|
|
|
|
|
214 |
|
215 |
col1, col2 = st.columns(2)
|
216 |
with col1:
|
217 |
-
template_category = st.text_input("์ ํ/์๋น์ค:", value="")
|
218 |
with col2:
|
219 |
if selected_template == "์ฑ/์๋น์ค ๋ฆฌ๋ด์ผ":
|
220 |
-
service_name = st.text_input("์๋น์ค๋ช
:", placeholder="์: ๋ฐฐ๋ฌ์ฑ, ๊ธ์ต์ฑ")
|
221 |
user_request = templates[selected_template].format(์๋น์ค๋ช
=service_name)
|
222 |
elif selected_template == "์์ฆ ํ์ ":
|
223 |
-
season = st.selectbox("์์ฆ:", ["๋ด", "์ฌ๋ฆ", "๊ฐ์", "๊ฒจ์ธ", "ํฌ๋ฆฌ์ค๋ง์ค", "์ ๋
"])
|
224 |
user_request = templates[selected_template].format(์์ฆ=season, ์นดํ
๊ณ ๋ฆฌ=template_category)
|
225 |
else:
|
226 |
user_request = templates[selected_template].format(์นดํ
๊ณ ๋ฆฌ=template_category)
|
|
|
227 |
|
228 |
-
st.text_area("์์ฑ๋ ์์ฒญ:", value=user_request, height=80, disabled=True)
|
229 |
|
230 |
# ๊ณ ๊ธ ์ต์
|
231 |
with st.expander("๐ง ๊ณ ๊ธ ์ต์
"):
|
232 |
-
|
233 |
-
with
|
234 |
-
num_concepts = st.slider("์์ฑํ ์ปจ์
์:", 1, 5, 3)
|
235 |
-
min_similarity = st.slider("์ต์ ์ ์ฌ๋:", 0.0, 1.0, 0.3, 0.1)
|
236 |
-
with
|
237 |
-
show_references = st.checkbox("์ฐธ๊ณ ์นดํผ ๋ณด๊ธฐ", value=True)
|
238 |
-
num_references = st.slider("์ฐธ๊ณ ์นดํผ ์:", 3, 10, 5)
|
239 |
|
240 |
# RAG ์นดํผ ์์ฑ ํจ์ (์๋ฒ ๋ฉ ๊ธฐ๋ฐ ํ์!)
|
241 |
-
def generate_copy_with_rag(
|
242 |
"""RAG ๊ธฐ๋ฐ ์นดํผ ์์ฑ - ์๋ฒ ๋ฉ ํ์ ์ฌ์ฉ"""
|
243 |
-
|
244 |
-
if not user_request.strip():
|
245 |
st.error("โ ์นดํผ ์์ฒญ์ ์
๋ ฅํด์ฃผ์ธ์")
|
246 |
return None
|
247 |
|
248 |
-
# ์ง๏ฟฝ๏ฟฝ๏ฟฝ ์ํฉ ํ์
|
249 |
progress_bar = st.progress(0)
|
250 |
-
|
251 |
|
252 |
-
|
253 |
-
status_text.text("๐ ์๋ฏธ์ ๊ฒ์ ์ค... (RAG ํต์ฌ ๊ธฐ๋ฅ)")
|
254 |
progress_bar.progress(20)
|
255 |
|
256 |
try:
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
if category != '์ ์ฒด':
|
264 |
-
filtered_df = df[df['์นดํ
๊ณ ๋ฆฌ'] == category]
|
265 |
else:
|
266 |
-
|
267 |
|
268 |
progress_bar.progress(40)
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
-
# ์์ ์ฐธ๊ณ ์นดํผ ์ ๋ณ
|
276 |
-
top_indices = np.argsort(similarities)[::-1][:num_references]
|
277 |
|
278 |
reference_copies = []
|
279 |
-
for
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
283 |
reference_copies.append({
|
284 |
'copy': row['์นดํผ ๋ด์ฉ'],
|
285 |
'brand': row['๋ธ๋๋'],
|
286 |
-
'similarity': similarities[
|
287 |
})
|
288 |
-
|
289 |
progress_bar.progress(60)
|
290 |
|
291 |
if not reference_copies:
|
292 |
st.warning(f"โ ๏ธ ์ ์ฌ๋ {min_similarity} ์ด์์ธ ์ฐธ๊ณ ์นดํผ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ์ ์ฌ๋๋ฅผ ๋ฎ์ถฐ๋ณด์ธ์.")
|
293 |
-
|
294 |
-
|
295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
|
297 |
-
# 2๋จ๊ณ: AI ์นดํผ ์์ฑ
|
298 |
-
status_text.text("๐ค AI ์นดํผ ์์ฑ ์ค...")
|
299 |
-
progress_bar.progress(80)
|
300 |
|
301 |
-
|
302 |
-
|
303 |
-
f"{i}. \"{ref['copy']}\" - {ref['brand']} (์ ์ฌ๋: {ref['similarity']:.3f})"
|
304 |
-
for i, ref in enumerate(reference_copies, 1)
|
305 |
-
])
|
306 |
|
307 |
creativity_guidance = {
|
308 |
"๋ณด์์ ": "์์ ํ๊ณ ๊ฒ์ฆ๋ ํํ์ ์ฌ์ฉํ์ฌ",
|
309 |
"๊ท ํ": "์ฐฝ์์ ์ด๋ฉด์๋ ์ ์ ํ ์์ค์์",
|
310 |
"์ฐฝ์์ ": "๋
์ฐฝ์ ์ด๊ณ ํ์ ์ ์ธ ํํ์ผ๋ก"
|
311 |
}
|
312 |
-
|
313 |
prompt = f"""
|
314 |
๋น์ ์ ํ๊ตญ์ ์ ๋ฌธ ๊ด๊ณ ์นดํผ๋ผ์ดํฐ์
๋๋ค.
|
315 |
|
316 |
-
**์์ฒญ์ฌํญ:** {
|
317 |
-
**ํ๊ฒ ๊ณ ๊ฐ:** {
|
318 |
-
**๋ธ๋๋ ํค:** {
|
319 |
-
**์ฐฝ์์ฑ ์์ค:** {
|
320 |
|
321 |
**์ฐธ๊ณ ์นดํผ๋ค (์๋ฏธ์ ์ ์ฌ๋ ๊ธฐ๋ฐ ์ ๋ณ):**
|
322 |
-
{
|
323 |
|
324 |
**์์ฑ ๊ฐ์ด๋๋ผ์ธ:**
|
325 |
-
1. ์ ์ฐธ๊ณ ์นดํผ๋ค์ ์คํ์ผ๊ณผ ํค์
|
326 |
-
2.
|
327 |
-
3. ๊ฐ ์นดํผ๋ ํ๊ตญ์ด๋ก ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ด์ด์ผ
|
328 |
-
4. {
|
329 |
-
5. {
|
330 |
-
|
331 |
-
**์ถ๋ ฅ ํ์:**
|
332 |
-
1. [์นดํผ1]
|
333 |
-
- ์ค๋ช
: ์ ์ด ์นดํผ๊ฐ ํจ๊ณผ์ ์ธ์ง ๊ฐ๋จํ ์ค๋ช
|
334 |
|
335 |
-
|
336 |
-
|
|
|
337 |
|
338 |
-
|
339 |
-
- ์ค๋ช
:
|
|
|
340 |
|
341 |
-
**์ถ์ฒ ์นดํผ:** ์ ์ค ๊ฐ์ฅ ์ถ์ฒํ๋
|
342 |
"""
|
343 |
-
|
344 |
-
response = model.generate_content(prompt)
|
345 |
progress_bar.progress(100)
|
346 |
-
|
347 |
-
|
348 |
time.sleep(0.5)
|
349 |
progress_bar.empty()
|
350 |
-
|
351 |
|
352 |
return {
|
353 |
'references': reference_copies,
|
354 |
'generated_content': response.text,
|
355 |
'search_info': {
|
356 |
'query': search_query,
|
357 |
-
'total_candidates': len(
|
358 |
'selected_references': len(reference_copies)
|
359 |
},
|
360 |
'settings': {
|
361 |
-
'category':
|
362 |
-
'target':
|
363 |
-
'tone':
|
364 |
-
'creative':
|
365 |
}
|
366 |
}
|
367 |
-
|
368 |
-
|
369 |
-
st.error(f"
|
|
|
|
|
370 |
progress_bar.empty()
|
371 |
-
|
372 |
return None
|
373 |
|
374 |
# ์์ฑ ๋ฒํผ
|
375 |
-
if st.button("๐ ์นดํผ ์์ฑํ๊ธฐ", type="primary", use_container_width=True):
|
376 |
-
|
377 |
if not user_request or not user_request.strip():
|
378 |
st.error("โ ์นดํผ ์์ฒญ์ ์
๋ ฅํด์ฃผ์ธ์")
|
379 |
else:
|
380 |
-
# RAG ์นดํผ ์์ฑ
|
381 |
result = generate_copy_with_rag(
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
)
|
389 |
-
|
390 |
if result:
|
391 |
-
# ๊ฒฐ๊ณผ ํ์
|
392 |
st.markdown("## ๐ ์์ฑ๋ ์นดํผ")
|
393 |
st.markdown("---")
|
394 |
-
|
395 |
-
# ๊ฒ์ ์ ๋ณด ํ์
|
396 |
st.info(f"๐ **๊ฒ์ ์ ๋ณด**: {result['search_info']['total_candidates']:,}๊ฐ ํ๋ณด์์ "
|
397 |
f"{result['search_info']['selected_references']}๊ฐ ์ฐธ๊ณ ์นดํผ ์ ๋ณ")
|
398 |
-
|
399 |
-
# ์ฐธ๊ณ ์นดํผ ํ์
|
400 |
if show_references and result['references']:
|
401 |
with st.expander("๐ ์ฐธ๊ณ ํ ์นดํผ๋ค (์๋ฏธ์ ์ ์ฌ๋ ๊ธฐ๋ฐ ์ ๋ณ)"):
|
402 |
for i, ref in enumerate(result['references'], 1):
|
@@ -404,35 +487,36 @@ if st.button("๐ ์นดํผ ์์ฑํ๊ธฐ", type="primary", use_container_width=Tru
|
|
404 |
st.markdown(f" - ๋ธ๋๋: {ref['brand']}")
|
405 |
st.markdown(f" - ์ ์ฌ๋: {ref['similarity']:.3f}")
|
406 |
st.markdown("")
|
407 |
-
|
408 |
-
# ์์ฑ๋ ์นดํผ ํ์
|
409 |
st.markdown("### โจ AI๊ฐ ์์ฑํ ์นดํผ:")
|
410 |
st.markdown(result['generated_content'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
411 |
|
412 |
-
# ๊ฒฐ๊ณผ ๋ค์ด๋ก๋
|
413 |
-
result_json = json.dumps({
|
414 |
-
'timestamp': datetime.now().isoformat(),
|
415 |
-
'request': user_request,
|
416 |
-
'settings': result['settings'],
|
417 |
-
'search_info': result['search_info'],
|
418 |
-
'generated_content': result['generated_content']
|
419 |
-
}, ensure_ascii=False, indent=2)
|
420 |
-
|
421 |
-
st.download_button(
|
422 |
-
label="๐พ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋ (JSON)",
|
423 |
-
data=result_json,
|
424 |
-
file_name=f"copy_result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
|
425 |
-
mime="application/json"
|
426 |
-
)
|
427 |
|
428 |
# ์์คํ
์ ๋ณด (์ฌ์ด๋๋ฐ ํ๋จ)
|
429 |
st.sidebar.markdown("---")
|
430 |
st.sidebar.markdown("### ๐ RAG ์์คํ
์ ๋ณด")
|
431 |
-
if
|
432 |
-
st.sidebar.markdown(f"**์นดํผ ๋ฐ์ดํฐ**: {len(
|
433 |
-
st.sidebar.markdown(f"**์นดํ
๊ณ ๋ฆฌ**: {
|
434 |
-
st.sidebar.markdown(f"**๋ธ๋๋**: {
|
435 |
-
st.sidebar.markdown(f"**์๋ฒ ๋ฉ**: {
|
436 |
st.sidebar.markdown("**๊ฒ์ ์์ง**: Korean SBERT")
|
437 |
st.sidebar.markdown("**ํธ์คํ
**: ๐ค Hugging Face")
|
438 |
|
@@ -440,25 +524,7 @@ if df is not None and embeddings is not None:
|
|
440 |
with st.expander("๐ก RAG ์์คํ
์ฌ์ฉ๋ฒ ๊ฐ์ด๋"):
|
441 |
st.markdown("""
|
442 |
### ๐ฏ ํจ๊ณผ์ ์ธ ์ฌ์ฉ๋ฒ
|
443 |
-
|
444 |
-
**1. ๊ตฌ์ฒด์ ์ธ ์์ฒญํ๊ธฐ:**
|
445 |
-
- โ "์นดํผ ์จ์ค"
|
446 |
-
- โ
"30๋ ์ง์ฅ ์ฌ์ฑ์ฉ ํ๋ฆฌ๋ฏธ์ ์คํจ์ผ์ด ์ ์ ํ ๋ฐ์นญ ์นดํผ"
|
447 |
-
|
448 |
-
**2. RAG ์์คํ
์ ์ฅ์ :**
|
449 |
-
- ๐ง **์๋ฏธ์ ๊ฒ์**: ํค์๋๋ฟ๋ง ์๋๋ผ ์๋ฏธ๊น์ง ์ดํด
|
450 |
-
- ๐ฏ **๋ฌธ๋งฅ ๋งค์นญ**: ํ๊ฒ๊ณผ ์ํฉ์ ๋ง๋ ์นดํผ ์๋ ์ ๋ณ
|
451 |
-
- ๐ **๋ฐ์ดํฐ ๊ธฐ๋ฐ**: 37,671๊ฐ ์ค์ ์นดํผ์์ ํ์ตํ ํจํด
|
452 |
-
|
453 |
-
**3. ์ฐฝ์์ฑ ์กฐ์ :**
|
454 |
-
- **๋ณด์์ **: ์์ ํ ํด๋ผ์ด์ธํธ, ๊ฒ์ฆ๋ ์ ๊ทผ
|
455 |
-
- **๊ท ํ**: ์ผ๋ฐ์ ์ธ ํ๋ก์ ํธ (์ถ์ฒ!)
|
456 |
-
- **์ฐฝ์์ **: ํ์ ์ ๋ธ๋๋, ํ๊ฒฉ์ ์บ ํ์ธ
|
457 |
-
|
458 |
-
**4. ์ฐธ๊ณ ์นดํผ ํ์ฉ:**
|
459 |
-
- ์์ฑ๋ ์นดํผ์ ์ฐธ๊ณ ์นดํผ๋ฅผ ๋น๊ต ๋ถ์
|
460 |
-
- ํธ๋ ๋์ ํจํด ํ์
๊ฐ๋ฅ
|
461 |
-
- ๊ฒฝ์์ฌ ๋ถ์ ์๋ฃ๋ก ํ์ฉ
|
462 |
""")
|
463 |
|
464 |
# ํธํฐ
|
@@ -469,8 +535,12 @@ st.markdown(
|
|
469 |
)
|
470 |
|
471 |
# ์ฑ๋ฅ ๋ชจ๋ํฐ๋ง (๊ฐ๋ฐ์์ฉ)
|
472 |
-
if os.getenv("DEBUG_MODE"):
|
473 |
-
st.sidebar.markdown("### ๐ง ๋๋ฒ๊ทธ ์ ๋ณด")
|
474 |
-
if '
|
475 |
-
st.sidebar.write(f"์๋ฒ ๋ฉ ๋ฉ๋ชจ๋ฆฌ: {
|
476 |
st.sidebar.write(f"Streamlit ๋ฒ์ : {st.__version__}")
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
import streamlit as st
|
5 |
import pandas as pd
|
6 |
+
import numpy # ์ ์ญ์ ์ผ๋ก numpy๋ฅผ ๋จผ์ ์ํฌํธํด๋ด
๋๋ค.
|
7 |
import pickle
|
8 |
import google.generativeai as genai
|
9 |
import time
|
10 |
import json
|
11 |
import os
|
12 |
+
import sys # ๋๋ฒ๊น
์ฉ sys ๋ชจ๋ ์ํฌํธ
|
13 |
from datetime import datetime
|
14 |
|
15 |
# ํ๊ฒฝ ์ค์ (๊ถํ ๋ฌธ์ ํด๊ฒฐ)
|
16 |
os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false'
|
17 |
+
# ์บ์ ๊ฒฝ๋ก๋ฅผ ์ ํ๋ฆฌ์ผ์ด์
๋๋ ํ ๋ฆฌ ๋ด๋ก ๋ณ๊ฒฝ ์๋ (์ฐ๊ธฐ ๊ถํ ๋ฌธ์ ๋ฐฉ์ง)
|
18 |
+
APP_ROOT = os.path.dirname(os.path.abspath(__file__))
|
19 |
+
TRANSFORMERS_CACHE_DIR = os.path.join(APP_ROOT, '.cache', 'transformers')
|
20 |
+
SENTENCE_TRANSFORMERS_HOME_DIR = os.path.join(APP_ROOT, '.cache', 'sentence_transformers')
|
21 |
+
|
22 |
+
os.environ['TRANSFORMERS_CACHE'] = TRANSFORMERS_CACHE_DIR
|
23 |
+
os.environ['SENTENCE_TRANSFORMERS_HOME'] = SENTENCE_TRANSFORMERS_HOME_DIR
|
24 |
+
|
25 |
+
# ์บ์ ๋๋ ํ ๋ฆฌ ์์ฑ (์กด์ฌํ์ง ์์ผ๋ฉด)
|
26 |
+
os.makedirs(TRANSFORMERS_CACHE_DIR, exist_ok=True)
|
27 |
+
os.makedirs(SENTENCE_TRANSFORMERS_HOME_DIR, exist_ok=True)
|
28 |
+
|
29 |
|
30 |
# ํ์ด์ง ์ค์
|
31 |
st.set_page_config(
|
|
|
40 |
st.markdown("### ๐ฏ 37,671๊ฐ ์ค์ ๊ด๊ณ ์นดํผ ๋ฐ์ดํฐ ๊ธฐ๋ฐ RAG ์์คํ
")
|
41 |
st.markdown("---")
|
42 |
|
43 |
+
# --- ๋ฐํ์ ํ๊ฒฝ ๋๋ฒ๊น
(์ ํ๋ฆฌ์ผ์ด์
์ต์๋จ ๋๋ load_system ๋ฐ๋ก ์ ) ---
|
44 |
+
st.sidebar.markdown("---")
|
45 |
+
st.sidebar.markdown("### โ๏ธ ๋ฐํ์ ํ๊ฒฝ ์ ๋ณด (๋๋ฒ๊น
์ฉ)")
|
46 |
+
st.sidebar.text(f"Py Exec: {sys.executable}")
|
47 |
+
st.sidebar.text(f"Py Ver: {sys.version.split()[0]}") # ๊ฐ๋ตํ๊ฒ ๋ฒ์ ๋ง
|
48 |
+
# st.sidebar.text(f"sys.path: {sys.path}") # ๋๋ฌด ๊ธธ์ด์ ์ผ๋จ ์ฃผ์
|
49 |
+
st.sidebar.text(f"PYTHONPATH: {os.environ.get('PYTHONPATH', 'Not Set')}")
|
50 |
+
try:
|
51 |
+
# numpy๋ฅผ ์ฌ๊ธฐ์ ๋ค์ ์ํฌํธํ๊ณ ์ฌ์ฉ
|
52 |
+
import numpy as np_runtime_check
|
53 |
+
st.sidebar.text(f"NumPy Ver (Runtime): {np_runtime_check.__version__}")
|
54 |
+
# ํต์ฌ ๋ชจ๋ ์ํฌํธ ์๋
|
55 |
+
import numpy.core._multiarray_umath
|
56 |
+
st.sidebar.markdown("โ
NumPy core modules imported (Runtime)")
|
57 |
+
except Exception as e:
|
58 |
+
st.sidebar.error(f"โ NumPy import error (Runtime): {e}")
|
59 |
+
st.sidebar.markdown("---")
|
60 |
+
# --- ๋๋ฒ๊น
์ฝ๋ ๋ ---
|
61 |
+
|
62 |
+
|
63 |
# ์ฌ์ด๋๋ฐ ์ค์
|
64 |
st.sidebar.header("๐๏ธ ์นดํผ ์์ฑ ์ค์ ")
|
65 |
|
66 |
+
|
67 |
# API ํค ์
๋ ฅ (ํ๊ฒฝ๋ณ์ ์ฐ์ ์ฌ์ฉ)
|
68 |
default_api_key = os.getenv("GEMINI_API_KEY", "")
|
69 |
|
|
|
84 |
def load_system():
|
85 |
"""์์คํ
์ปดํฌ๋ํธ ๋ก๋ฉ - ์๋ฒ ๋ฉ ๊ธฐ๋ฐ RAG ์์คํ
"""
|
86 |
|
87 |
+
# --- ํจ์ ์์ ์ ๋๋ฒ๊น
์ ๋ณด ์ถ๊ฐ ---
|
88 |
+
st.write("--- load_system() ์์ ---")
|
89 |
+
st.write(f"Python Executable (load_system): {sys.executable}")
|
90 |
+
st.write(f"Python Version (load_system): {sys.version}")
|
91 |
+
# st.write(f"sys.path (load_system): {sys.path}") # ๋๋ฌด ๊ธธ์ด์ ์ฃผ์
|
92 |
+
st.write(f"PYTHONPATH (load_system): {os.environ.get('PYTHONPATH')}")
|
93 |
+
try:
|
94 |
+
import numpy as np_load_system_check # ์ ๋ณ์นญ ์ฌ์ฉ
|
95 |
+
st.write(f"NumPy version (load_system start): {np_load_system_check.__version__}")
|
96 |
+
import numpy.core._multiarray_umath
|
97 |
+
st.write("load_system start: Successfully imported numpy.core._multiarray_umath")
|
98 |
+
except Exception as e:
|
99 |
+
st.write(f"load_system start: Error importing NumPy parts: {e}")
|
100 |
+
# --- ๋๋ฒ๊น
์ ๋ณด ๋ ---
|
101 |
+
|
102 |
progress_container = st.container()
|
103 |
|
104 |
with progress_container:
|
|
|
110 |
status_text.text("๐ Gemini API ์ด๊ธฐํ ์ค...")
|
111 |
try:
|
112 |
genai.configure(api_key=api_key)
|
113 |
+
model_llm = genai.GenerativeModel('gemini-1.5-flash') # ๋ชจ๋ธ ์ด๋ฆ ํ์ธ (์ด์ ์ gemini-2.0-flash)
|
114 |
total_progress.progress(10)
|
115 |
st.success("โ
Gemini API ์ค์ ์๋ฃ")
|
116 |
except Exception as e:
|
|
|
119 |
|
120 |
# 2๋จ๊ณ: ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ (40%)
|
121 |
status_text.text("๐ค ํ๊ตญ์ด ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์ค... (1-2๋ถ ์์)")
|
122 |
+
embedding_model_instance = None # ๋ณ์๋ช
๋ณ๊ฒฝ
|
123 |
|
|
|
124 |
try:
|
125 |
+
# sentence-transformers ์ํฌํธ๋ฅผ ํจ์ ๋ด์์ ์ ์ง
|
|
|
|
|
|
|
|
|
126 |
from sentence_transformers import SentenceTransformer
|
127 |
+
# from sklearn.metrics.pairwise import cosine_similarity # ์ฌ๊ธฐ์๋ ์์ง ํ์ ์์
|
128 |
|
129 |
+
embedding_model_instance = SentenceTransformer('jhgan/ko-sbert-nli',
|
130 |
+
cache_folder=SENTENCE_TRANSFORMERS_HOME_DIR) # ์์ ๋ ์บ์ ๊ฒฝ๋ก ์ฌ์ฉ
|
|
|
131 |
total_progress.progress(40)
|
132 |
st.success("โ
ํ๊ตญ์ด ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ")
|
133 |
|
|
|
138 |
|
139 |
# 3๋จ๊ณ: ๋ฐ์ดํฐ ๋ก๋ (60%)
|
140 |
status_text.text("๐ ์นดํผ ๋ฐ์ดํฐ๋ฒ ์ด์ค ๋ก๋ฉ ์ค...")
|
141 |
+
df_data = None # ๋ณ์๋ช
๋ณ๊ฒฝ
|
142 |
try:
|
143 |
+
df_data = pd.read_excel('๊ด๊ณ ์นดํผ๋ฐ์ดํฐ_๋ธ๋๋์ถ์ถ์๋ฃ.xlsx')
|
144 |
total_progress.progress(60)
|
145 |
+
st.success(f"โ
๋ฐ์ดํฐ ๋ก๋ฉ ์๋ฃ: {len(df_data):,}๊ฐ ์นดํผ")
|
146 |
except Exception as e:
|
147 |
st.error(f"โ ๋ฐ์ดํฐ ๋ก๋ฉ ์คํจ: {e}")
|
148 |
return None, None, None, None
|
149 |
|
150 |
# 4๋จ๊ณ: ์๋ฒ ๋ฉ ๋ฐ์ดํฐ ๋ก๋ (90%) - ์ด๊ฒ ํต์ฌ!
|
151 |
status_text.text("๐ ๋ฒกํฐ ์๋ฒ ๋ฉ ๋ก๋ฉ ์ค... (RAG ์์คํ
ํต์ฌ)")
|
152 |
+
embeddings_array = None # ๋ณ์๋ช
๋ณ๊ฒฝ
|
153 |
try:
|
154 |
+
# --- pickle.load() ์ง์ NumPy ๋๋ฒ๊น
---
|
155 |
+
import numpy as np_pickle_check # ์ ๋ณ์นญ ์ฌ์ฉ
|
156 |
+
st.write(f"[DEBUG] NumPy version just before pickle.load: {np_pickle_check.__version__}")
|
157 |
+
import numpy.core._multiarray_umath
|
158 |
+
st.write("[DEBUG] Successfully imported numpy.core._multiarray_umath before pickle.load")
|
159 |
+
# --- ๋๋ฒ๊น
๋ ---
|
160 |
+
|
161 |
with open('copy_embeddings.pkl', 'rb') as f:
|
162 |
embeddings_data = pickle.load(f)
|
163 |
+
embeddings_array = embeddings_data['embeddings']
|
164 |
total_progress.progress(90)
|
165 |
+
st.success(f"โ
์๋ฒ ๋ฉ ๋ก๋ฉ ์๋ฃ: {embeddings_array.shape[0]:,}๊ฐ ร {embeddings_array.shape[1]}์ฐจ์")
|
166 |
+
except ModuleNotFoundError as mnfe: # ModuleNotFoundError๋ฅผ ํน์ ํด์ ์ก๊ธฐ
|
167 |
+
st.error(f"โ ์๋ฒ ๋ฉ ๋ก๋ฉ ์คํจ (ModuleNotFoundError): {mnfe}")
|
168 |
+
st.error(f"๐จ ํด๋น ๋ชจ๋์ ์ฐพ์ ์ ์์ต๋๋ค. sys.path: {sys.path}")
|
169 |
+
st.error("๐จ ์๋ฒ ๋ฉ ์์ด๋ ์๋ฏธ์ ๊ฒ์์ด ๋ถ๊ฐ๋ฅํฉ๋๋ค!")
|
170 |
+
# ์ถ๊ฐ ๋๋ฒ๊น
: ํ์ฌ ๋ก๋๋ numpy ๊ฐ์ฒด ์ํ
|
171 |
+
try:
|
172 |
+
import numpy as np_final_check
|
173 |
+
st.error(f"[DEBUG] NumPy object at failure: {np_final_check}")
|
174 |
+
st.error(f"[DEBUG] NumPy __file__ at failure: {np_final_check.__file__}")
|
175 |
+
except Exception as e_np_final:
|
176 |
+
st.error(f"[DEBUG] Could not even import numpy at failure: {e_np_final}")
|
177 |
+
return None, None, None, None
|
178 |
except Exception as e:
|
179 |
+
st.error(f"โ ์๋ฒ ๋ฉ ๋ก๋ฉ ์คํจ (์ผ๋ฐ ์ค๋ฅ): {e}")
|
180 |
st.error("๐จ ์๋ฒ ๋ฉ ์์ด๋ ์๋ฏธ์ ๊ฒ์์ด ๋ถ๊ฐ๋ฅํฉ๋๋ค!")
|
181 |
return None, None, None, None
|
182 |
|
183 |
# 5๋จ๊ณ: ์ต์ข
๊ฒ์ฆ (100%)
|
184 |
status_text.text("โจ ์์คํ
๊ฒ์ฆ ์ค...")
|
185 |
+
if model_llm and embedding_model_instance and df_data is not None and embeddings_array is not None:
|
186 |
total_progress.progress(100)
|
187 |
status_text.text("๐ RAG ์์คํ
๋ก๋ฉ ์๋ฃ!")
|
188 |
|
|
|
189 |
success_col1, success_col2, success_col3 = st.columns(3)
|
190 |
with success_col1:
|
191 |
+
st.metric("์นดํผ ๋ฐ์ดํฐ", f"{len(df_data):,}๊ฐ")
|
192 |
with success_col2:
|
193 |
+
st.metric("์๋ฒ ๋ฉ ์ฐจ์", f"{embeddings_array.shape[1]}D")
|
194 |
with success_col3:
|
195 |
st.metric("๊ฒ์ ์์ง", "Korean SBERT")
|
196 |
|
|
|
197 |
time.sleep(1)
|
198 |
total_progress.empty()
|
199 |
status_text.empty()
|
200 |
|
201 |
+
# ์ ์ญ ๋ณ์๋ช
๊ณผ์ ์ถฉ๋์ ํผํ๊ธฐ ์ํด ํจ์ ๋ด์์ ์ฌ์ฉํ ๋ณ์๋ช
์ผ๋ก ๋ฐํ
|
202 |
+
return model_llm, embedding_model_instance, df_data, embeddings_array
|
203 |
else:
|
204 |
st.error("โ ์์คํ
๋ก๋ฉ ์คํจ: ํ์ ๊ตฌ์ฑ์์ ๋๋ฝ")
|
205 |
return None, None, None, None
|
206 |
|
207 |
+
# ์์คํ
๋ก๋ฉ (๋ณ์๋ช
์ถฉ๋ ๋ฐฉ์ง๋ฅผ ์ํด ์๋ก์ด ์ด๋ฆ ์ฌ์ฉ)
|
208 |
+
loaded_model, loaded_embedding_model, loaded_df, loaded_embeddings = None, None, None, None
|
209 |
with st.spinner("๐ AI ์นดํผ๋ผ์ดํฐ ์์คํ
์ด๊ธฐ๏ฟฝ๏ฟฝ ์ค..."):
|
210 |
+
loaded_model, loaded_embedding_model, loaded_df, loaded_embeddings = load_system()
|
211 |
|
212 |
+
if loaded_model is None or loaded_embedding_model is None or loaded_df is None or loaded_embeddings is None:
|
213 |
st.error("โ ์์คํ
์ ๋ก๋ฉํ ์ ์์ต๋๋ค. ํ์ด์ง๋ฅผ ์๋ก๊ณ ์นจํ๊ฑฐ๋ ๊ด๋ฆฌ์์๊ฒ ๋ฌธ์ํ์ธ์.")
|
214 |
st.stop()
|
215 |
|
|
|
217 |
st.sidebar.success("๐ RAG ์์คํ
์ค๋น ์๋ฃ!")
|
218 |
|
219 |
# ์นดํ
๊ณ ๋ฆฌ ์ ํ
|
220 |
+
categories = ['์ ์ฒด'] + sorted(loaded_df['์นดํ
๊ณ ๋ฆฌ'].unique().tolist())
|
221 |
selected_category = st.sidebar.selectbox(
|
222 |
"๐ ์นดํ
๊ณ ๋ฆฌ",
|
223 |
categories,
|
|
|
249 |
# ๋ฉ์ธ ์
๋ ฅ ์์ญ
|
250 |
st.markdown("## ๐ญ ์ด๋ค ์นดํผ๋ฅผ ๋ง๋ค๊ณ ์ถ์ผ์ ๊ฐ์?")
|
251 |
|
252 |
+
user_request = "" # ์ด๊ธฐํ
|
253 |
input_method = st.radio(
|
254 |
"์
๋ ฅ ๋ฐฉ์ ์ ํ:",
|
255 |
["์ง์ ์
๋ ฅ", "ํ
ํ๋ฆฟ ์ ํ"],
|
256 |
+
horizontal=True,
|
257 |
+
key="input_method_radio" # ๊ณ ์ ํค ์ถ๊ฐ
|
258 |
)
|
259 |
|
260 |
if input_method == "์ง์ ์
๋ ฅ":
|
261 |
user_request = st.text_area(
|
262 |
"์นดํผ ์์ฒญ์ ์์ธํ ์์ฑํด์ฃผ์ธ์:",
|
263 |
placeholder="์: 30๋ ์ง์ฅ ์ฌ์ฑ์ฉ ํ๋ฆฌ๋ฏธ์ ์คํจ์ผ์ด ์ ์ ํ ๋ฐ์นญ ์นดํผ",
|
264 |
+
height=100,
|
265 |
+
key="user_request_direct" # ๊ณ ์ ํค ์ถ๊ฐ
|
266 |
)
|
267 |
else:
|
|
|
268 |
templates = {
|
269 |
"์ ์ ํ ๋ฐ์นญ": "๋์ {์นดํ
๊ณ ๋ฆฌ} ์ ์ ํ ๋ฐ์นญ ์นดํผ",
|
270 |
"ํ ์ธ ์ด๋ฒคํธ": "{์นดํ
๊ณ ๋ฆฌ} ํ ์ธ ์ด๋ฒคํธ ํ๋ก๋ชจ์
์นดํผ",
|
|
|
272 |
"์ฑ/์๋น์ค ๋ฆฌ๋ด์ผ": "{์๋น์ค๋ช
} ์ ๋ฒ์ ์ถ์ ์นดํผ",
|
273 |
"์์ฆ ํ์ ": "{์์ฆ} ํ์ {์นดํ
๊ณ ๋ฆฌ} ํน๋ณ ์๋์
์นดํผ"
|
274 |
}
|
275 |
+
selected_template = st.selectbox("ํ
ํ๋ฆฟ ์ ํ:", list(templates.keys()), key="template_selectbox")
|
276 |
+
template_category = ""
|
277 |
+
service_name = ""
|
278 |
+
season = ""
|
279 |
|
280 |
col1, col2 = st.columns(2)
|
281 |
with col1:
|
282 |
+
template_category = st.text_input("์ ํ/์๋น์ค:", value="", key="template_category_input")
|
283 |
with col2:
|
284 |
if selected_template == "์ฑ/์๋น์ค ๋ฆฌ๋ด์ผ":
|
285 |
+
service_name = st.text_input("์๋น์ค๋ช
:", placeholder="์: ๋ฐฐ๋ฌ์ฑ, ๊ธ์ต์ฑ", key="template_service_name_input")
|
286 |
user_request = templates[selected_template].format(์๋น์ค๋ช
=service_name)
|
287 |
elif selected_template == "์์ฆ ํ์ ":
|
288 |
+
season = st.selectbox("์์ฆ:", ["๋ด", "์ฌ๋ฆ", "๊ฐ์", "๊ฒจ์ธ", "ํฌ๋ฆฌ์ค๋ง์ค", "์ ๋
"], key="template_season_selectbox")
|
289 |
user_request = templates[selected_template].format(์์ฆ=season, ์นดํ
๊ณ ๋ฆฌ=template_category)
|
290 |
else:
|
291 |
user_request = templates[selected_template].format(์นดํ
๊ณ ๋ฆฌ=template_category)
|
292 |
+
st.text_area("์์ฑ๋ ์์ฒญ:", value=user_request, height=80, disabled=True, key="generated_request_template")
|
293 |
|
|
|
294 |
|
295 |
# ๊ณ ๊ธ ์ต์
|
296 |
with st.expander("๐ง ๊ณ ๊ธ ์ต์
"):
|
297 |
+
col1_adv, col2_adv = st.columns(2) # ๋ณ์๋ช
๋ณ๊ฒฝ
|
298 |
+
with col1_adv:
|
299 |
+
num_concepts = st.slider("์์ฑํ ์ปจ์
์:", 1, 5, 3, key="num_concepts_slider")
|
300 |
+
min_similarity = st.slider("์ต์ ์ ์ฌ๋:", 0.0, 1.0, 0.3, 0.1, key="min_similarity_slider")
|
301 |
+
with col2_adv:
|
302 |
+
show_references = st.checkbox("์ฐธ๊ณ ์นดํผ ๋ณด๊ธฐ", value=True, key="show_references_checkbox")
|
303 |
+
num_references = st.slider("์ฐธ๊ณ ์นดํผ ์:", 3, 10, 5, key="num_references_slider")
|
304 |
|
305 |
# RAG ์นดํผ ์์ฑ ํจ์ (์๋ฒ ๋ฉ ๊ธฐ๋ฐ ํ์!)
|
306 |
+
def generate_copy_with_rag(user_req, category_filter, target_aud, brand_tn, creative_lvl, num_con): # ๋ณ์๋ช
๋ณ๊ฒฝ
|
307 |
"""RAG ๊ธฐ๋ฐ ์นดํผ ์์ฑ - ์๋ฒ ๋ฉ ํ์ ์ฌ์ฉ"""
|
308 |
+
if not user_req.strip():
|
|
|
309 |
st.error("โ ์นดํผ ์์ฒญ์ ์
๋ ฅํด์ฃผ์ธ์")
|
310 |
return None
|
311 |
|
|
|
312 |
progress_bar = st.progress(0)
|
313 |
+
status_text_gen = st.empty() # ๋ณ์๋ช
๋ณ๊ฒฝ
|
314 |
|
315 |
+
status_text_gen.text("๐ ์๋ฏธ์ ๊ฒ์ ์ค... (RAG ํต์ฌ ๊ธฐ๋ฅ)")
|
|
|
316 |
progress_bar.progress(20)
|
317 |
|
318 |
try:
|
319 |
+
search_query = f"{user_req} {target_aud} ๊ด๊ณ ์นดํผ"
|
320 |
+
from sklearn.metrics.pairwise import cosine_similarity # generate_copy_with_rag ๋ด์์ ์ํฌํธ
|
321 |
+
query_embedding = loaded_embedding_model.encode([search_query]) # ๋ก๋๋ ๋ชจ๋ธ ์ฌ์ฉ
|
322 |
+
|
323 |
+
if category_filter != '์ ์ฒด':
|
324 |
+
filtered_df_gen = loaded_df[loaded_df['์นดํ
๊ณ ๋ฆฌ'] == category_filter].copy() # .copy() ์ถ๊ฐ
|
|
|
|
|
325 |
else:
|
326 |
+
filtered_df_gen = loaded_df.copy() # .copy() ์ถ๊ฐ
|
327 |
|
328 |
progress_bar.progress(40)
|
329 |
|
330 |
+
if filtered_df_gen.empty:
|
331 |
+
st.warning(f"โ ๏ธ ์ ํํ์ ์นดํ
๊ณ ๋ฆฌ '{category_filter}'์ ํด๋นํ๋ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
|
332 |
+
progress_bar.empty()
|
333 |
+
status_text_gen.empty()
|
334 |
+
return None
|
335 |
+
|
336 |
+
|
337 |
+
filtered_indices = filtered_df_gen.index.tolist()
|
338 |
+
# loaded_embeddings์์ ์ง์ ์ธ๋ฑ์ฑํ๊ธฐ ์ ์, filtered_indices๊ฐ loaded_embeddings์ ๋ฒ์ ๋ด์ ์๋์ง ํ์ธ
|
339 |
+
valid_indices_for_embedding = [idx for idx in filtered_indices if idx < len(loaded_embeddings)]
|
340 |
+
if not valid_indices_for_embedding:
|
341 |
+
st.warning(f"โ ๏ธ ์ ํจํ ์ธ๋ฑ์ค๋ฅผ ์ฐพ์ ์ ์์ด ์ ์ฌ๋ ๊ฒ์์ ์งํํ ์ ์์ต๋๋ค. (์นดํ
๊ณ ๋ฆฌ: {category_filter})")
|
342 |
+
progress_bar.empty()
|
343 |
+
status_text_gen.empty()
|
344 |
+
return None
|
345 |
+
|
346 |
+
# ์ ํจํ ์ธ๋ฑ์ค์ ํด๋นํ๋ ์๋ฒ ๋ฉ๋ง ์ฌ์ฉ
|
347 |
+
# ์ด ๋ถ๋ถ์ ์๋ณธ ๋ฐ์ดํฐํ๋ ์(loaded_df)์ ์ธ๋ฑ์ค๋ฅผ ์ฌ์ฉํด์ผ ํจ
|
348 |
+
# filtered_df_gen์ ์ธ๋ฑ์ค๋ loaded_df์ ๋ถ๋ถ์งํฉ์ด๋ฏ๋ก,
|
349 |
+
# loaded_embeddings์์ ์ด ์ธ๋ฑ์ค๋ค์ ์ง์ ์ฌ์ฉํด์ผ ํฉ๋๋ค.
|
350 |
+
# ์ฃผ์: filtered_indices๋ loaded_df์ ์ค์ ์ธ๋ฑ์ค ๊ฐ์ด์ด์ผ ํจ.
|
351 |
+
# ๋ง์ฝ filtered_df_gen.index๊ฐ 0๋ถํฐ ์์ํ๋ ์๋ก์ด ์ธ๋ฑ์ค๋ผ๋ฉด, ๋งคํ ํ์.
|
352 |
+
# ํ์ฌ ์ฝ๋๋ filtered_df.index.tolist()๊ฐ ์๋ณธ ์ธ๋ฑ์ค๋ฅผ ์ ์งํ๋ค๊ณ ๊ฐ์ .
|
353 |
+
|
354 |
+
filtered_embeddings_for_search = loaded_embeddings[valid_indices_for_embedding]
|
355 |
+
# ์ ์ฌ๋ ๊ณ์ฐ ์ query_embedding๊ณผ filtered_embeddings_for_search์ ์ฐจ์ ํ์ธ
|
356 |
+
if query_embedding.shape[1] != filtered_embeddings_for_search.shape[1]:
|
357 |
+
st.error(f"โ ์๋ฒ ๋ฉ ์ฐจ์ ๋ถ์ผ์น: ์ฟผ๋ฆฌ({query_embedding.shape[1]}D), ๋ฌธ์({filtered_embeddings_for_search.shape[1]}D)")
|
358 |
+
return None
|
359 |
+
|
360 |
+
|
361 |
+
similarities = cosine_similarity(query_embedding, filtered_embeddings_for_search)[0]
|
362 |
+
|
363 |
+
# ์์ N๊ฐ (num_references) ์ ํ
|
364 |
+
# similarities์ ๊ธธ์ด๋ valid_indices_for_embedding์ ๊ธธ์ด์ ๊ฐ์
|
365 |
+
# top_indices๋ similarities ๋ฐฐ์ด ๋ด์ ์ธ๋ฑ์ค
|
366 |
+
num_to_select = min(num_references, len(similarities))
|
367 |
+
top_similarity_indices = np.argsort(similarities)[::-1][:num_to_select]
|
368 |
|
|
|
|
|
369 |
|
370 |
reference_copies = []
|
371 |
+
for i in top_similarity_indices:
|
372 |
+
# i๋ similarities ๋ฐฐ์ด์์์ ์ธ๋ฑ์ค.
|
373 |
+
# ์ด ์ธ๋ฑ์ค๋ฅผ ์ฌ์ฉํ์ฌ valid_indices_for_embedding์์ ์๋ณธ ๋ฐ์ดํฐํ๋ ์์ ์ธ๋ฑ์ค๋ฅผ ๊ฐ์ ธ์์ผ ํจ.
|
374 |
+
original_df_idx = valid_indices_for_embedding[i]
|
375 |
+
row = loaded_df.iloc[original_df_idx] # ์๋ณธ df์์ ๊ฐ์ ธ์ด
|
376 |
+
if similarities[i] >= min_similarity:
|
377 |
reference_copies.append({
|
378 |
'copy': row['์นดํผ ๋ด์ฉ'],
|
379 |
'brand': row['๋ธ๋๋'],
|
380 |
+
'similarity': float(similarities[i]) # float์ผ๋ก ๋ณํ (JSON ์ง๋ ฌํ ๋๋น)
|
381 |
})
|
|
|
382 |
progress_bar.progress(60)
|
383 |
|
384 |
if not reference_copies:
|
385 |
st.warning(f"โ ๏ธ ์ ์ฌ๋ {min_similarity} ์ด์์ธ ์ฐธ๊ณ ์นดํผ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ์ ์ฌ๋๋ฅผ ๋ฎ์ถฐ๋ณด์ธ์.")
|
386 |
+
# ์ฐธ๊ณ ์นดํผ๊ฐ ์์ด๋ LLM์๊ฒ ์์ฑ์ ์์ฒญํ ์๋ ์๋๋ก ํจ (์ ํ์ฌํญ)
|
387 |
+
# progress_bar.empty()
|
388 |
+
# status_text_gen.empty()
|
389 |
+
# return None
|
390 |
+
references_text_for_prompt = "์ ์ฌ๋ ๋์ ์ฐธ๊ณ ์นดํผ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค."
|
391 |
+
else:
|
392 |
+
references_text_for_prompt = "\n".join([
|
393 |
+
f"{j+1}. \"{ref['copy']}\" - {ref['brand']} (์ ์ฌ๋: {ref['similarity']:.3f})"
|
394 |
+
for j, ref in enumerate(reference_copies)
|
395 |
+
])
|
396 |
|
|
|
|
|
|
|
397 |
|
398 |
+
status_text_gen.text("๐ค AI ์นดํผ ์์ฑ ์ค...")
|
399 |
+
progress_bar.progress(80)
|
|
|
|
|
|
|
400 |
|
401 |
creativity_guidance = {
|
402 |
"๋ณด์์ ": "์์ ํ๊ณ ๊ฒ์ฆ๋ ํํ์ ์ฌ์ฉํ์ฌ",
|
403 |
"๊ท ํ": "์ฐฝ์์ ์ด๋ฉด์๋ ์ ์ ํ ์์ค์์",
|
404 |
"์ฐฝ์์ ": "๋
์ฐฝ์ ์ด๊ณ ํ์ ์ ์ธ ํํ์ผ๋ก"
|
405 |
}
|
|
|
406 |
prompt = f"""
|
407 |
๋น์ ์ ํ๊ตญ์ ์ ๋ฌธ ๊ด๊ณ ์นดํผ๋ผ์ดํฐ์
๋๋ค.
|
408 |
|
409 |
+
**์์ฒญ์ฌํญ:** {user_req}
|
410 |
+
**ํ๊ฒ ๊ณ ๊ฐ:** {target_aud}
|
411 |
+
**๋ธ๋๋ ํค:** {brand_tn}
|
412 |
+
**์ฐฝ์์ฑ ์์ค:** {creative_lvl} ({creativity_guidance[creative_lvl]})
|
413 |
|
414 |
**์ฐธ๊ณ ์นดํผ๋ค (์๋ฏธ์ ์ ์ฌ๋ ๊ธฐ๋ฐ ์ ๋ณ):**
|
415 |
+
{references_text_for_prompt}
|
416 |
|
417 |
**์์ฑ ๊ฐ์ด๋๋ผ์ธ:**
|
418 |
+
1. ์ ์ฐธ๊ณ ์นดํผ๋ค์ ์คํ์ผ๊ณผ ํค์ ๋ถ์ํ๊ณ , ์์ฒญ์ฌํญ์ ๋ง์ถฐ ์๋ก์ด ์นดํผ {num_con}๊ฐ๋ฅผ ์์ฑํด์ฃผ์ธ์.
|
419 |
+
2. ๋ง์ฝ ์ฐธ๊ณ ์นดํผ๊ฐ ์๋ค๋ฉด, ์์ฒญ์ฌํญ๊ณผ ํ๊ฒ ๊ณ ๊ฐ, ๋ธ๋๋ ํค, ์ฐฝ์์ฑ ์์ค์๋ง ์ง์คํ์ฌ ์์ฑํด์ฃผ์ธ์.
|
420 |
+
3. ๊ฐ ์นดํผ๋ ํ๊ตญ์ด๋ก ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ด์ด์ผ ํฉ๋๋ค.
|
421 |
+
4. {target_aud}์๊ฒ ์ดํํ ์ ์๋ ํํ์ ์ฌ์ฉํด์ฃผ์ธ์.
|
422 |
+
5. {brand_tn} ํค์ค๋งค๋๋ฅผ ์ ์งํด์ฃผ์ธ์.
|
|
|
|
|
|
|
|
|
423 |
|
424 |
+
**์ถ๋ ฅ ํ์ (๊ฐ ์นดํผ์ ๊ฐ๋จํ ์ค๋ช
ํฌํจ):**
|
425 |
+
1. [์์ฑ๋ ์นดํผ 1]
|
426 |
+
- ์ค๋ช
: (์ด ์นดํผ๊ฐ ์ ํจ๊ณผ์ ์ธ์ง ๋๋ ์ด๋ค ์๋๋ก ์์ฑ๋์๋์ง)
|
427 |
|
428 |
+
2. [์์ฑ๋ ์นดํผ 2]
|
429 |
+
- ์ค๋ช
: (์ด ์นดํผ๊ฐ ์ ํจ๊ณผ์ ์ธ์ง ๋๋ ์ด๋ค ์๋๋ก ์์ฑ๋์๋์ง)
|
430 |
+
... (์์ฒญํ ์ปจ์
์๋งํผ ๋ฐ๋ณต)
|
431 |
|
432 |
+
**์ถ์ฒ ์นดํผ:** (์ ์์ฑ๋ ์นดํผ ์ค ๊ฐ์ฅ ์ถ์ฒํ๋ ๊ฒ ํ๋์ ๊ทธ ์ด์ )
|
433 |
"""
|
434 |
+
response = loaded_model.generate_content(prompt)
|
|
|
435 |
progress_bar.progress(100)
|
436 |
+
status_text_gen.text("โ
์๋ฃ!")
|
|
|
437 |
time.sleep(0.5)
|
438 |
progress_bar.empty()
|
439 |
+
status_text_gen.empty()
|
440 |
|
441 |
return {
|
442 |
'references': reference_copies,
|
443 |
'generated_content': response.text,
|
444 |
'search_info': {
|
445 |
'query': search_query,
|
446 |
+
'total_candidates': len(filtered_df_gen),
|
447 |
'selected_references': len(reference_copies)
|
448 |
},
|
449 |
'settings': {
|
450 |
+
'category': category_filter,
|
451 |
+
'target': target_aud,
|
452 |
+
'tone': brand_tn,
|
453 |
+
'creative': creative_lvl
|
454 |
}
|
455 |
}
|
456 |
+
except Exception as e_gen:
|
457 |
+
st.error(f"โ ์นดํผ ์์ฑ ์คํจ: {e_gen}")
|
458 |
+
st.error(f"์ค๋ฅ ํ์
: {type(e_gen)}") # ์ค๋ฅ ํ์
์ถ๋ ฅ
|
459 |
+
import traceback # ์์ธ ํธ๋ ์ด์ค๋ฐฑ
|
460 |
+
st.error(traceback.format_exc())
|
461 |
progress_bar.empty()
|
462 |
+
status_text_gen.empty()
|
463 |
return None
|
464 |
|
465 |
# ์์ฑ ๋ฒํผ
|
466 |
+
if st.button("๐ ์นดํผ ์์ฑํ๊ธฐ", type="primary", use_container_width=True, key="generate_button"):
|
|
|
467 |
if not user_request or not user_request.strip():
|
468 |
st.error("โ ์นดํผ ์์ฒญ์ ์
๋ ฅํด์ฃผ์ธ์")
|
469 |
else:
|
|
|
470 |
result = generate_copy_with_rag(
|
471 |
+
user_req=user_request,
|
472 |
+
category_filter=selected_category,
|
473 |
+
target_aud=target_audience,
|
474 |
+
brand_tn=brand_tone,
|
475 |
+
creative_lvl=creative_level,
|
476 |
+
num_con=num_concepts
|
477 |
)
|
|
|
478 |
if result:
|
|
|
479 |
st.markdown("## ๐ ์์ฑ๋ ์นดํผ")
|
480 |
st.markdown("---")
|
|
|
|
|
481 |
st.info(f"๐ **๊ฒ์ ์ ๋ณด**: {result['search_info']['total_candidates']:,}๊ฐ ํ๋ณด์์ "
|
482 |
f"{result['search_info']['selected_references']}๊ฐ ์ฐธ๊ณ ์นดํผ ์ ๋ณ")
|
|
|
|
|
483 |
if show_references and result['references']:
|
484 |
with st.expander("๐ ์ฐธ๊ณ ํ ์นดํผ๋ค (์๋ฏธ์ ์ ์ฌ๋ ๊ธฐ๋ฐ ์ ๋ณ)"):
|
485 |
for i, ref in enumerate(result['references'], 1):
|
|
|
487 |
st.markdown(f" - ๋ธ๋๋: {ref['brand']}")
|
488 |
st.markdown(f" - ์ ์ฌ๋: {ref['similarity']:.3f}")
|
489 |
st.markdown("")
|
|
|
|
|
490 |
st.markdown("### โจ AI๊ฐ ์์ฑํ ์นดํผ:")
|
491 |
st.markdown(result['generated_content'])
|
492 |
+
try:
|
493 |
+
result_json = json.dumps({
|
494 |
+
'timestamp': datetime.now().isoformat(),
|
495 |
+
'request': user_request,
|
496 |
+
'settings': result['settings'],
|
497 |
+
'search_info': result['search_info'],
|
498 |
+
'generated_content': result['generated_content'],
|
499 |
+
'references': result['references'] # ์ฐธ๊ณ ์นดํผ๋ JSON์ ํฌํจ
|
500 |
+
}, ensure_ascii=False, indent=2)
|
501 |
+
st.download_button(
|
502 |
+
label="๐พ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋ (JSON)",
|
503 |
+
data=result_json,
|
504 |
+
file_name=f"copy_result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
|
505 |
+
mime="application/json",
|
506 |
+
key="download_button"
|
507 |
+
)
|
508 |
+
except Exception as e_json:
|
509 |
+
st.error(f"โ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋ ํ์ผ ์์ฑ ์คํจ: {e_json}")
|
510 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
|
512 |
# ์์คํ
์ ๋ณด (์ฌ์ด๋๋ฐ ํ๋จ)
|
513 |
st.sidebar.markdown("---")
|
514 |
st.sidebar.markdown("### ๐ RAG ์์คํ
์ ๋ณด")
|
515 |
+
if loaded_df is not None and loaded_embeddings is not None:
|
516 |
+
st.sidebar.markdown(f"**์นดํผ ๋ฐ์ดํฐ**: {len(loaded_df):,}๊ฐ")
|
517 |
+
st.sidebar.markdown(f"**์นดํ
๊ณ ๋ฆฌ**: {loaded_df['์นดํ
๊ณ ๋ฆฌ'].nunique()}๊ฐ")
|
518 |
+
st.sidebar.markdown(f"**๋ธ๋๋**: {loaded_df['๋ธ๋๋'].nunique()}๊ฐ")
|
519 |
+
st.sidebar.markdown(f"**์๋ฒ ๋ฉ**: {loaded_embeddings.shape[1]}์ฐจ์") # ๋ก๋๋ ์๋ฒ ๋ฉ ์ฌ์ฉ
|
520 |
st.sidebar.markdown("**๊ฒ์ ์์ง**: Korean SBERT")
|
521 |
st.sidebar.markdown("**ํธ์คํ
**: ๐ค Hugging Face")
|
522 |
|
|
|
524 |
with st.expander("๐ก RAG ์์คํ
์ฌ์ฉ๋ฒ ๊ฐ์ด๋"):
|
525 |
st.markdown("""
|
526 |
### ๐ฏ ํจ๊ณผ์ ์ธ ์ฌ์ฉ๋ฒ
|
527 |
+
(๊ธฐ์กด ๋ด์ฉ๊ณผ ๋์ผ)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
""")
|
529 |
|
530 |
# ํธํฐ
|
|
|
535 |
)
|
536 |
|
537 |
# ์ฑ๋ฅ ๋ชจ๋ํฐ๋ง (๊ฐ๋ฐ์์ฉ)
|
538 |
+
if os.getenv("DEBUG_MODE") == "true": # ํ๊ฒฝ๋ณ์ ๊ฐ์ ๋ฌธ์์ด "true"๋ก ๋น๊ต
|
539 |
+
st.sidebar.markdown("### ๐ง ๋๋ฒ๊ทธ ์ ๋ณด (ํ์ฑํ๋จ)")
|
540 |
+
if 'loaded_embeddings' in locals() and loaded_embeddings is not None: # ๋ก๋๋ ๋ณ์ ์ฌ์ฉ
|
541 |
+
st.sidebar.write(f"์๋ฒ ๋ฉ ๋ฉ๋ชจ๋ฆฌ: {loaded_embeddings.nbytes / (1024*1024):.1f}MB")
|
542 |
st.sidebar.write(f"Streamlit ๋ฒ์ : {st.__version__}")
|
543 |
+
st.sidebar.write(f"Pandas ๋ฒ์ : {pd.__version__}")
|
544 |
+
st.sidebar.write(f"Numpy ๋ฒ์ (Global): {np.__version__ if 'np' in globals() else 'Not imported globally'}")
|
545 |
+
st.sidebar.write(f"Torch ๋ฒ์ : {torch.__version__ if 'torch' in globals() else 'Torch not directly used here'}") # torch๋ sentence-transformers ๋ด๋ถ ์ฌ์ฉ
|
546 |
+
st.sidebar.write(f"google-generativeai ๋ฒ์ : {genai.__version__}")
|