Spaces:
Sleeping
Sleeping
Update streamlit_app.py
Browse files- streamlit_app.py +89 -153
streamlit_app.py
CHANGED
@@ -51,13 +51,10 @@ st.sidebar.markdown("---")
|
|
51 |
st.sidebar.markdown("### โ๏ธ ๋ฐํ์ ํ๊ฒฝ ์ ๋ณด (๋๋ฒ๊น
์ฉ)")
|
52 |
st.sidebar.text(f"Py Exec: {sys.executable}")
|
53 |
st.sidebar.text(f"Py Ver: {sys.version.split()[0]}") # ๊ฐ๋ตํ๊ฒ ๋ฒ์ ๋ง
|
54 |
-
# st.sidebar.text(f"sys.path: {sys.path}") # ๋๋ฌด ๊ธธ์ด์ ์ผ๋จ ์ฃผ์
|
55 |
st.sidebar.text(f"PYTHONPATH: {os.environ.get('PYTHONPATH', 'Not Set')}")
|
56 |
try:
|
57 |
-
# numpy๋ฅผ ์ฌ๊ธฐ์ ๋ค์ ์ํฌํธํ๊ณ ์ฌ์ฉ
|
58 |
import numpy as np_runtime_check
|
59 |
st.sidebar.text(f"NumPy Ver (Runtime): {np_runtime_check.__version__}")
|
60 |
-
# ํต์ฌ ๋ชจ๋ ์ํฌํธ ์๋
|
61 |
import numpy.core._multiarray_umath
|
62 |
st.sidebar.markdown("โ
NumPy core modules imported (Runtime)")
|
63 |
except Exception as e:
|
@@ -75,16 +72,16 @@ default_api_key = os.getenv("GEMINI_API_KEY", "")
|
|
75 |
|
76 |
# 2. st.sidebar.text_input์ ์ฌ์ฉํ์ฌ ์ฌ์ฉ์์๊ฒ API ํค๋ฅผ ์
๋ ฅ๋ฐ๊ฑฐ๋,
|
77 |
# ํ๊ฒฝ๋ณ์์์ ๊ฐ์ ธ์จ ๊ฐ์ ๊ธฐ๋ณธ๊ฐ์ผ๋ก ๋ณด์ฌ์ค
|
78 |
-
api_key_value = st.sidebar.text_input(
|
79 |
"๐ Gemini API ํค",
|
80 |
-
value=default_api_key,
|
81 |
type="password",
|
82 |
help="ํ๊ฒฝ๋ณ์์ GEMINI_API_KEY๋ก ์ค์ ํ๋ฉด ์๋ ์
๋ ฅ๋ฉ๋๋ค",
|
83 |
-
key="gemini_api_key_input"
|
84 |
)
|
85 |
|
86 |
# 3. ์ฌ์ฉ์๊ฐ ์
๋ ฅํ๊ฑฐ๋ ํ๊ฒฝ๋ณ์์์ ๊ฐ์ ธ์จ API ํค ๊ฐ์ ์ฌ์ฉ
|
87 |
-
if not api_key_value:
|
88 |
st.warning("โ ๏ธ Gemini API ํค๋ฅผ ์
๋ ฅํด์ฃผ์ธ์")
|
89 |
st.info("๐ก Settings โ Repository secrets์์ GEMINI_API_KEY๋ฅผ ์ค์ ํ์ธ์")
|
90 |
st.stop()
|
@@ -95,61 +92,50 @@ if not api_key_value: # api_key_value ๋ณ์ ์ฌ์ฉ
|
|
95 |
def load_system():
|
96 |
"""์์คํ
์ปดํฌ๋ํธ ๋ก๋ฉ - ์๋ฒ ๋ฉ ๊ธฐ๋ฐ RAG ์์คํ
"""
|
97 |
|
98 |
-
# --- ํจ์ ์์ ์ ๋๋ฒ๊น
์ ๋ณด ์ถ๊ฐ ---
|
99 |
st.write("--- load_system() ์์ ---")
|
100 |
st.write(f"Python Executable (load_system): {sys.executable}")
|
101 |
st.write(f"Python Version (load_system): {sys.version}")
|
102 |
-
# st.write(f"sys.path (load_system): {sys.path}") # ๋๋ฌด ๊ธธ์ด์ ์ฃผ์
|
103 |
st.write(f"PYTHONPATH (load_system): {os.environ.get('PYTHONPATH')}")
|
104 |
try:
|
105 |
-
import numpy as np_load_system_check
|
106 |
st.write(f"NumPy version (load_system start): {np_load_system_check.__version__}")
|
107 |
import numpy.core._multiarray_umath
|
108 |
st.write("load_system start: Successfully imported numpy.core._multiarray_umath")
|
109 |
except Exception as e:
|
110 |
st.write(f"load_system start: Error importing NumPy parts: {e}")
|
111 |
-
# --- ๋๋ฒ๊น
์ ๋ณด ๋ ---
|
112 |
|
113 |
progress_container = st.container()
|
114 |
|
115 |
with progress_container:
|
116 |
-
# ์ ์ฒด ์งํ๋ฅ
|
117 |
total_progress = st.progress(0)
|
118 |
status_text = st.empty()
|
119 |
|
120 |
-
# 1๋จ๊ณ: API ์ค์ (10%)
|
121 |
status_text.text("๐ Gemini API ์ด๊ธฐํ ์ค...")
|
122 |
try:
|
123 |
-
|
124 |
-
|
|
|
125 |
total_progress.progress(10)
|
126 |
st.success("โ
Gemini API ์ค์ ์๋ฃ")
|
127 |
except Exception as e:
|
128 |
st.error(f"โ Gemini API ์ค์ ์คํจ: {e}")
|
129 |
return None, None, None, None
|
130 |
|
131 |
-
# 2๋จ๊ณ: ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ (40%)
|
132 |
status_text.text("๐ค ํ๊ตญ์ด ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์ค... (1-2๋ถ ์์)")
|
133 |
-
embedding_model_instance = None
|
134 |
-
|
135 |
try:
|
136 |
-
# sentence-transformers ์ํฌํธ๋ฅผ ํจ์ ๋ด์์ ์ ์ง
|
137 |
from sentence_transformers import SentenceTransformer
|
138 |
-
# from sklearn.metrics.pairwise import cosine_similarity # ์ฌ๊ธฐ์๋ ์์ง ํ์ ์์
|
139 |
-
|
140 |
embedding_model_instance = SentenceTransformer('jhgan/ko-sbert-nli',
|
141 |
-
cache_folder=SENTENCE_TRANSFORMERS_HOME_DIR)
|
142 |
total_progress.progress(40)
|
143 |
st.success("โ
ํ๊ตญ์ด ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ")
|
144 |
-
|
145 |
except Exception as e:
|
146 |
st.error(f"โ ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์คํจ: {e}")
|
147 |
st.error("๐จ ์๋ฒ ๋ฉ ๋ชจ๋ธ ์์ด๋ RAG ์์คํ
์ด ์๋ํ ์ ์์ต๋๋ค!")
|
148 |
return None, None, None, None
|
149 |
|
150 |
-
# 3๋จ๊ณ: ๋ฐ์ดํฐ ๋ก๋ (60%)
|
151 |
status_text.text("๐ ์นดํผ ๋ฐ์ดํฐ๋ฒ ์ด์ค ๋ก๋ฉ ์ค...")
|
152 |
-
df_data = None
|
153 |
try:
|
154 |
df_data = pd.read_excel('๊ด๊ณ ์นดํผ๋ฐ์ดํฐ_๋ธ๋๋์ถ์ถ์๋ฃ.xlsx')
|
155 |
total_progress.progress(60)
|
@@ -158,27 +144,23 @@ def load_system():
|
|
158 |
st.error(f"โ ๋ฐ์ดํฐ ๋ก๋ฉ ์คํจ: {e}")
|
159 |
return None, None, None, None
|
160 |
|
161 |
-
# 4๋จ๊ณ: ์๋ฒ ๋ฉ ๋ฐ์ดํฐ ๋ก๋ (90%) - ์ด๊ฒ ํต์ฌ!
|
162 |
status_text.text("๐ ๋ฒกํฐ ์๋ฒ ๋ฉ ๋ก๋ฉ ์ค... (RAG ์์คํ
ํต์ฌ)")
|
163 |
-
embeddings_array = None
|
164 |
try:
|
165 |
-
|
166 |
-
import numpy as np_pickle_check # ์ ๋ณ์นญ ์ฌ์ฉ
|
167 |
st.write(f"[DEBUG] NumPy version just before pickle.load: {np_pickle_check.__version__}")
|
168 |
import numpy.core._multiarray_umath
|
169 |
st.write("[DEBUG] Successfully imported numpy.core._multiarray_umath before pickle.load")
|
170 |
-
# --- ๋๋ฒ๊น
๋ ---
|
171 |
|
172 |
with open('copy_embeddings.pkl', 'rb') as f:
|
173 |
embeddings_data = pickle.load(f)
|
174 |
embeddings_array = embeddings_data['embeddings']
|
175 |
total_progress.progress(90)
|
176 |
st.success(f"โ
์๋ฒ ๋ฉ ๋ก๋ฉ ์๋ฃ: {embeddings_array.shape[0]:,}๊ฐ ร {embeddings_array.shape[1]}์ฐจ์")
|
177 |
-
except ModuleNotFoundError as mnfe:
|
178 |
st.error(f"โ ์๋ฒ ๋ฉ ๋ก๋ฉ ์คํจ (ModuleNotFoundError): {mnfe}")
|
179 |
st.error(f"๐จ ํด๋น ๋ชจ๋์ ์ฐพ์ ์ ์์ต๋๋ค. sys.path: {sys.path}")
|
180 |
st.error("๐จ ์๋ฒ ๋ฉ ์์ด๋ ์๋ฏธ์ ๊ฒ์์ด ๋ถ๊ฐ๋ฅํฉ๋๋ค!")
|
181 |
-
# ์ถ๊ฐ ๋๋ฒ๊น
: ํ์ฌ ๋ก๋๋ numpy ๊ฐ์ฒด ์ํ
|
182 |
try:
|
183 |
import numpy as np_final_check
|
184 |
st.error(f"[DEBUG] NumPy object at failure: {np_final_check}")
|
@@ -191,12 +173,10 @@ def load_system():
|
|
191 |
st.error("๐จ ์๋ฒ ๋ฉ ์์ด๋ ์๋ฏธ์ ๊ฒ์์ด ๋ถ๊ฐ๋ฅํฉ๋๋ค!")
|
192 |
return None, None, None, None
|
193 |
|
194 |
-
# 5๋จ๊ณ: ์ต์ข
๊ฒ์ฆ (100%)
|
195 |
status_text.text("โจ ์์คํ
๊ฒ์ฆ ์ค...")
|
196 |
if model_llm and embedding_model_instance and df_data is not None and embeddings_array is not None:
|
197 |
total_progress.progress(100)
|
198 |
status_text.text("๐ RAG ์์คํ
๋ก๋ฉ ์๋ฃ!")
|
199 |
-
|
200 |
success_col1, success_col2, success_col3 = st.columns(3)
|
201 |
with success_col1:
|
202 |
st.metric("์นดํผ ๋ฐ์ดํฐ", f"{len(df_data):,}๊ฐ")
|
@@ -204,18 +184,15 @@ def load_system():
|
|
204 |
st.metric("์๋ฒ ๋ฉ ์ฐจ์", f"{embeddings_array.shape[1]}D")
|
205 |
with success_col3:
|
206 |
st.metric("๊ฒ์ ์์ง", "Korean SBERT")
|
207 |
-
|
208 |
time.sleep(1)
|
209 |
total_progress.empty()
|
210 |
status_text.empty()
|
211 |
-
|
212 |
-
# ์ ์ญ ๋ณ์๋ช
๊ณผ์ ์ถฉ๋์ ํผํ๊ธฐ ์ํด ํจ์ ๋ด์์ ์ฌ์ฉํ ๋ณ์๋ช
์ผ๋ก ๋ฐํ
|
213 |
return model_llm, embedding_model_instance, df_data, embeddings_array
|
214 |
else:
|
215 |
st.error("โ ์์คํ
๋ก๋ฉ ์คํจ: ํ์ ๊ตฌ์ฑ์์ ๋๋ฝ")
|
216 |
return None, None, None, None
|
217 |
|
218 |
-
# ์์คํ
๋ก๋ฉ
|
219 |
loaded_model, loaded_embedding_model, loaded_df, loaded_embeddings = None, None, None, None
|
220 |
with st.spinner("๐ AI ์นดํผ๋ผ์ดํฐ ์์คํ
์ด๊ธฐํ ์ค..."):
|
221 |
loaded_model, loaded_embedding_model, loaded_df, loaded_embeddings = load_system()
|
@@ -224,6 +201,7 @@ if loaded_model is None or loaded_embedding_model is None or loaded_df is None o
|
|
224 |
st.error("โ ์์คํ
์ ๋ก๋ฉํ ์ ์์ต๋๋ค. ํ์ด์ง๋ฅผ ์๋ก๊ณ ์นจํ๊ฑฐ๋ ๊ด๋ฆฌ์์๊ฒ ๋ฌธ์ํ์ธ์.")
|
225 |
st.stop()
|
226 |
|
|
|
227 |
# ์ฌ์ด๋๋ฐ ์ค์ (์์คํ
๋ก๋ฉ ์ฑ๊ณต ํ)
|
228 |
st.sidebar.success("๐ RAG ์์คํ
์ค๋น ์๋ฃ!")
|
229 |
|
@@ -232,21 +210,24 @@ categories = ['์ ์ฒด'] + sorted(loaded_df['์นดํ
๊ณ ๋ฆฌ'].unique().tolist())
|
|
232 |
selected_category = st.sidebar.selectbox(
|
233 |
"๐ ์นดํ
๊ณ ๋ฆฌ",
|
234 |
categories,
|
235 |
-
help="ํน์ ์นดํ
๊ณ ๋ฆฌ๋ก ๊ฒ์์ ์ ํํ ์ ์์ต๋๋ค"
|
|
|
236 |
)
|
237 |
|
238 |
# ํ๊ฒ ๊ณ ๊ฐ ์ค์
|
239 |
target_audience = st.sidebar.selectbox(
|
240 |
"๐ฏ ํ๊ฒ ๊ณ ๊ฐ",
|
241 |
['20๋', '30๋', '์ผ๋ฐ', '10๋', '40๋', '50๋+', '๋จ์ฑ', '์ฌ์ฑ', '์ง์ฅ์ธ', 'ํ์', '์ฃผ๋ถ'],
|
242 |
-
help="ํ๊ฒ ๊ณ ๊ฐ์ ๋ง๋ ํค์ค๋งค๋๋ก ์นดํผ๋ฅผ ์์ฑํฉ๋๋ค"
|
|
|
243 |
)
|
244 |
|
245 |
# ๋ธ๋๋ ํค์ค๋งค๋
|
246 |
brand_tone = st.sidebar.selectbox(
|
247 |
"๐จ ๋ธ๋๋ ํค",
|
248 |
['์ธ๋ จ๋', '์น๊ทผํ', '๊ณ ๊ธ์ค๋ฌ์ด', 'ํ๊ธฐ์ฐฌ', '์ ๋ขฐํ ์ ์๋', '์ ์', '๋ฐ๋ปํ', '์ ๋ฌธ์ ์ธ'],
|
249 |
-
help="์ํ๋
|
|
|
250 |
)
|
251 |
|
252 |
# ์ฐฝ์์ฑ ์์ค
|
@@ -254,7 +235,8 @@ creative_level = st.sidebar.select_slider(
|
|
254 |
"๐ง ์ฐฝ์์ฑ ์์ค",
|
255 |
options=['๋ณด์์ ', '๊ท ํ', '์ฐฝ์์ '],
|
256 |
value='๊ท ํ',
|
257 |
-
help="๋ณด์์ : ์์ ํ ํํ, ์ฐฝ์์ : ๋
์ฐฝ์ ํํ"
|
|
|
258 |
)
|
259 |
|
260 |
# ๋ฉ์ธ ์
๋ ฅ ์์ญ
|
@@ -265,7 +247,7 @@ input_method = st.radio(
|
|
265 |
"์
๋ ฅ ๋ฐฉ์ ์ ํ:",
|
266 |
["์ง์ ์
๋ ฅ", "ํ
ํ๋ฆฟ ์ ํ"],
|
267 |
horizontal=True,
|
268 |
-
key="input_method_radio"
|
269 |
)
|
270 |
|
271 |
if input_method == "์ง์ ์
๋ ฅ":
|
@@ -273,7 +255,7 @@ if input_method == "์ง์ ์
๋ ฅ":
|
|
273 |
"์นดํผ ์์ฒญ์ ์์ธํ ์์ฑํด์ฃผ์ธ์:",
|
274 |
placeholder="์: 30๋ ์ง์ฅ ์ฌ์ฑ์ฉ ํ๋ฆฌ๋ฏธ์ ์คํจ์ผ์ด ์ ์ ํ ๋ฐ์นญ ์นดํผ",
|
275 |
height=100,
|
276 |
-
key="user_request_direct"
|
277 |
)
|
278 |
else:
|
279 |
templates = {
|
@@ -305,7 +287,7 @@ else:
|
|
305 |
|
306 |
# ๊ณ ๊ธ ์ต์
|
307 |
with st.expander("๐ง ๊ณ ๊ธ ์ต์
"):
|
308 |
-
col1_adv, col2_adv = st.columns(2)
|
309 |
with col1_adv:
|
310 |
num_concepts = st.slider("์์ฑํ ์ปจ์
์:", 1, 5, 3, key="num_concepts_slider")
|
311 |
min_similarity = st.slider("์ต์ ์ ์ฌ๋:", 0.0, 1.0, 0.3, 0.1, key="min_similarity_slider")
|
@@ -314,90 +296,63 @@ with st.expander("๐ง ๊ณ ๊ธ ์ต์
"):
|
|
314 |
num_references = st.slider("์ฐธ๊ณ ์นดํผ ์:", 3, 10, 5, key="num_references_slider")
|
315 |
|
316 |
# RAG ์นดํผ ์์ฑ ํจ์ (์๋ฒ ๋ฉ ๊ธฐ๋ฐ ํ์!)
|
317 |
-
def generate_copy_with_rag(user_req, category_filter, target_aud, brand_tn, creative_lvl, num_con):
|
318 |
-
"""RAG ๊ธฐ๋ฐ ์นดํผ ์์ฑ - ์๋ฒ ๋ฉ ํ์ ์ฌ์ฉ"""
|
319 |
if not user_req.strip():
|
320 |
st.error("โ ์นดํผ ์์ฒญ์ ์
๋ ฅํด์ฃผ์ธ์")
|
321 |
return None
|
322 |
|
323 |
progress_bar = st.progress(0)
|
324 |
-
status_text_gen = st.empty()
|
325 |
|
326 |
status_text_gen.text("๐ ์๋ฏธ์ ๊ฒ์ ์ค... (RAG ํต์ฌ ๊ธฐ๋ฅ)")
|
327 |
progress_bar.progress(20)
|
328 |
|
329 |
try:
|
330 |
search_query = f"{user_req} {target_aud} ๊ด๊ณ ์นดํผ"
|
331 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
332 |
-
query_embedding = loaded_embedding_model.encode([search_query])
|
333 |
|
334 |
if category_filter != '์ ์ฒด':
|
335 |
-
filtered_df_gen = loaded_df[loaded_df['์นดํ
๊ณ ๋ฆฌ'] == category_filter].copy()
|
336 |
else:
|
337 |
-
filtered_df_gen = loaded_df.copy()
|
338 |
|
339 |
progress_bar.progress(40)
|
340 |
|
341 |
if filtered_df_gen.empty:
|
342 |
st.warning(f"โ ๏ธ ์ ํํ์ ์นดํ
๊ณ ๋ฆฌ '{category_filter}'์ ํด๋นํ๋ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
|
343 |
-
progress_bar.empty()
|
344 |
-
status_text_gen.empty()
|
345 |
-
return None
|
346 |
-
|
347 |
|
348 |
filtered_indices = filtered_df_gen.index.tolist()
|
349 |
-
# loaded_embeddings์์ ์ง์ ์ธ๋ฑ์ฑํ๊ธฐ ์ ์, filtered_indices๊ฐ loaded_embeddings์ ๋ฒ์ ๋ด์ ์๋์ง ํ์ธ
|
350 |
valid_indices_for_embedding = [idx for idx in filtered_indices if idx < len(loaded_embeddings)]
|
351 |
if not valid_indices_for_embedding:
|
352 |
st.warning(f"โ ๏ธ ์ ํจํ ์ธ๋ฑ์ค๋ฅผ ์ฐพ์ ์ ์์ด ์ ์ฌ๋ ๊ฒ์์ ์งํํ ์ ์์ต๋๋ค. (์นดํ
๊ณ ๋ฆฌ: {category_filter})")
|
353 |
-
progress_bar.empty()
|
354 |
-
status_text_gen.empty()
|
355 |
-
return None
|
356 |
-
|
357 |
-
# ์ ํจํ ์ธ๋ฑ์ค์ ํด๋นํ๋ ์๋ฒ ๋ฉ๋ง ์ฌ์ฉ
|
358 |
-
# ์ด ๋ถ๋ถ์ ์๋ณธ ๋ฐ์ดํฐํ๋ ์(loaded_df)์ ์ธ๋ฑ์ค๋ฅผ ์ฌ์ฉํด์ผ ํจ
|
359 |
-
# filtered_df_gen์ ์ธ๋ฑ์ค๋ loaded_df์ ๋ถ๋ถ์งํฉ์ด๋ฏ๋ก,
|
360 |
-
# loaded_embeddings์์ ์ด ์ธ๋ฑ์ค๋ค์ ์ง์ ์ฌ์ฉํด์ผ ํฉ๋๋ค.
|
361 |
-
# ์ฃผ์: filtered_indices๋ loaded_df์ ์ค์ ์ธ๋ฑ์ค ๊ฐ์ด์ด์ผ ํจ.
|
362 |
-
# ๋ง์ฝ filtered_df_gen.index๊ฐ 0๋ถํฐ ์์ํ๋ ์๋ก์ด ์ธ๋ฑ์ค๋ผ๋ฉด, ๋งคํ ํ์.
|
363 |
-
# ํ์ฌ ์ฝ๋๋ filtered_df.index.tolist()๊ฐ ์๋ณธ ์ธ๋ฑ์ค๋ฅผ ์ ์งํ๋ค๊ณ ๊ฐ์ .
|
364 |
|
365 |
filtered_embeddings_for_search = loaded_embeddings[valid_indices_for_embedding]
|
366 |
-
# ์ ์ฌ๋ ๊ณ์ฐ ์ query_embedding๊ณผ filtered_embeddings_for_search์ ์ฐจ์ ํ์ธ
|
367 |
if query_embedding.shape[1] != filtered_embeddings_for_search.shape[1]:
|
368 |
st.error(f"โ ์๋ฒ ๋ฉ ์ฐจ์ ๋ถ์ผ์น: ์ฟผ๋ฆฌ({query_embedding.shape[1]}D), ๋ฌธ์({filtered_embeddings_for_search.shape[1]}D)")
|
369 |
return None
|
370 |
|
371 |
-
|
372 |
similarities = cosine_similarity(query_embedding, filtered_embeddings_for_search)[0]
|
373 |
-
|
374 |
-
# ์์ N๊ฐ (num_references) ์ ํ
|
375 |
-
# similarities์ ๊ธธ์ด๋ valid_indices_for_embedding์ ๊ธธ์ด์ ๊ฐ์
|
376 |
-
# top_indices๋ similarities ๋ฐฐ์ด ๋ด์ ์ธ๋ฑ์ค
|
377 |
num_to_select = min(num_references, len(similarities))
|
378 |
-
|
379 |
-
|
|
|
380 |
|
381 |
reference_copies = []
|
382 |
for i in top_similarity_indices:
|
383 |
-
# i๋ similarities ๋ฐฐ์ด์์์ ์ธ๋ฑ์ค.
|
384 |
-
# ์ด ์ธ๋ฑ์ค๋ฅผ ์ฌ์ฉํ์ฌ valid_indices_for_embedding์์ ์๋ณธ ๋ฐ์ดํฐํ๋ ์์ ์ธ๋ฑ์ค๋ฅผ ๊ฐ์ ธ์์ผ ํจ.
|
385 |
original_df_idx = valid_indices_for_embedding[i]
|
386 |
-
row = loaded_df.iloc[original_df_idx]
|
387 |
if similarities[i] >= min_similarity:
|
388 |
reference_copies.append({
|
389 |
'copy': row['์นดํผ ๋ด์ฉ'],
|
390 |
'brand': row['๋ธ๋๋'],
|
391 |
-
'similarity': float(similarities[i])
|
392 |
})
|
393 |
progress_bar.progress(60)
|
394 |
|
395 |
if not reference_copies:
|
396 |
-
st.warning(f"โ ๏ธ ์ ์ฌ๋ {min_similarity} ์ด์์ธ ์ฐธ๊ณ ์นดํผ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ์ ์ฌ๋๋ฅผ ๋ฎ์ถฐ๋ณด์ธ์.")
|
397 |
-
# ์ฐธ๊ณ ์นดํผ๊ฐ ์์ด๋ LLM์๊ฒ ์์ฑ์ ์์ฒญํ ์๋ ์๋๋ก ํจ (์ ํ์ฌํญ)
|
398 |
-
# progress_bar.empty()
|
399 |
-
# status_text_gen.empty()
|
400 |
-
# return None
|
401 |
references_text_for_prompt = "์ ์ฌ๋ ๋์ ์ฐธ๊ณ ์นดํผ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค."
|
402 |
else:
|
403 |
references_text_for_prompt = "\n".join([
|
@@ -405,73 +360,50 @@ def generate_copy_with_rag(user_req, category_filter, target_aud, brand_tn, crea
|
|
405 |
for j, ref in enumerate(reference_copies)
|
406 |
])
|
407 |
|
408 |
-
|
409 |
status_text_gen.text("๐ค AI ์นดํผ ์์ฑ ์ค...")
|
410 |
progress_bar.progress(80)
|
411 |
-
|
412 |
creativity_guidance = {
|
413 |
-
"๋ณด์์ ": "์์ ํ๊ณ ๊ฒ์ฆ๋ ํํ์ ์ฌ์ฉํ์ฌ",
|
414 |
-
"๊ท ํ": "์ฐฝ์์ ์ด๋ฉด์๋ ์ ์ ํ ์์ค์์",
|
415 |
"์ฐฝ์์ ": "๋
์ฐฝ์ ์ด๊ณ ํ์ ์ ์ธ ํํ์ผ๋ก"
|
416 |
}
|
417 |
prompt = f"""
|
418 |
๋น์ ์ ํ๊ตญ์ ์ ๋ฌธ ๊ด๊ณ ์นดํผ๋ผ์ดํฐ์
๋๋ค.
|
419 |
-
|
420 |
**์์ฒญ์ฌํญ:** {user_req}
|
421 |
**ํ๊ฒ ๊ณ ๊ฐ:** {target_aud}
|
422 |
**๋ธ๋๋ ํค:** {brand_tn}
|
423 |
**์ฐฝ์์ฑ ์์ค:** {creative_lvl} ({creativity_guidance[creative_lvl]})
|
424 |
-
|
425 |
**์ฐธ๊ณ ์นดํผ๋ค (์๋ฏธ์ ์ ์ฌ๋ ๊ธฐ๋ฐ ์ ๋ณ):**
|
426 |
{references_text_for_prompt}
|
427 |
-
|
428 |
**์์ฑ ๊ฐ์ด๋๋ผ์ธ:**
|
429 |
1. ์ ์ฐธ๊ณ ์นดํผ๋ค์ ์คํ์ผ๊ณผ ํค์ ๋ถ์ํ๊ณ , ์์ฒญ์ฌํญ์ ๋ง์ถฐ ์๋ก์ด ์นดํผ {num_con}๊ฐ๋ฅผ ์์ฑํด์ฃผ์ธ์.
|
430 |
2. ๋ง์ฝ ์ฐธ๊ณ ์นดํผ๊ฐ ์๋ค๋ฉด, ์์ฒญ์ฌํญ๊ณผ ํ๊ฒ ๊ณ ๊ฐ, ๋ธ๋๋ ํค, ์ฐฝ์์ฑ ์์ค์๋ง ์ง์คํ์ฌ ์์ฑํด์ฃผ์ธ์.
|
431 |
3. ๊ฐ ์นดํผ๋ ํ๊ตญ์ด๋ก ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ด์ด์ผ ํฉ๋๋ค.
|
432 |
4. {target_aud}์๊ฒ ์ดํํ ์ ์๋ ํํ์ ์ฌ์ฉํด์ฃผ์ธ์.
|
433 |
5. {brand_tn} ํค์ค๋งค๋๋ฅผ ์ ์งํด์ฃผ์ธ์.
|
434 |
-
|
435 |
**์ถ๋ ฅ ํ์ (๊ฐ ์นดํผ์ ๊ฐ๋จํ ์ค๋ช
ํฌํจ):**
|
436 |
1. [์์ฑ๋ ์นดํผ 1]
|
437 |
- ์ค๋ช
: (์ด ์นดํผ๊ฐ ์ ํจ๊ณผ์ ์ธ์ง ๋๋ ์ด๋ค ์๋๋ก ์์ฑ๋์๋์ง)
|
438 |
-
|
439 |
-
2. [์์ฑ๋ ์นดํผ 2]
|
440 |
-
- ์ค๋ช
: (์ด ์นดํผ๊ฐ ์ ํจ๊ณผ์ ์ธ์ง ๋๋ ์ด๋ค ์๋๋ก ์์ฑ๋์๋์ง)
|
441 |
... (์์ฒญํ ์ปจ์
์๋งํผ ๋ฐ๋ณต)
|
442 |
-
|
443 |
**์ถ์ฒ ์นดํผ:** (์ ์์ฑ๋ ์นดํผ ์ค ๊ฐ์ฅ ์ถ์ฒํ๋ ๊ฒ ํ๋์ ๊ทธ ์ด์ )
|
444 |
"""
|
445 |
response = loaded_model.generate_content(prompt)
|
446 |
-
progress_bar.progress(100)
|
447 |
-
status_text_gen.
|
448 |
-
time.sleep(0.5)
|
449 |
-
progress_bar.empty()
|
450 |
-
status_text_gen.empty()
|
451 |
-
|
452 |
return {
|
453 |
-
'references': reference_copies,
|
454 |
-
'generated_content': response.text,
|
455 |
'search_info': {
|
456 |
-
'query': search_query,
|
457 |
-
'total_candidates': len(filtered_df_gen),
|
458 |
'selected_references': len(reference_copies)
|
459 |
},
|
460 |
'settings': {
|
461 |
-
'category': category_filter,
|
462 |
-
'
|
463 |
-
'tone': brand_tn,
|
464 |
-
'creative': creative_lvl
|
465 |
}
|
466 |
}
|
467 |
except Exception as e_gen:
|
468 |
-
st.error(f"โ ์นดํผ ์์ฑ ์คํจ: {e_gen}")
|
469 |
-
st.error(
|
470 |
-
|
471 |
-
st.error(traceback.format_exc())
|
472 |
-
progress_bar.empty()
|
473 |
-
status_text_gen.empty()
|
474 |
-
return None
|
475 |
|
476 |
# ์์ฑ ๋ฒํผ
|
477 |
if st.button("๐ ์นดํผ ์์ฑํ๊ธฐ", type="primary", use_container_width=True, key="generate_button"):
|
@@ -479,16 +411,11 @@ if st.button("๐ ์นดํผ ์์ฑํ๊ธฐ", type="primary", use_container_width=Tru
|
|
479 |
st.error("โ ์นดํผ ์์ฒญ์ ์
๋ ฅํด์ฃผ์ธ์")
|
480 |
else:
|
481 |
result = generate_copy_with_rag(
|
482 |
-
user_req=user_request,
|
483 |
-
|
484 |
-
target_aud=target_audience,
|
485 |
-
brand_tn=brand_tone,
|
486 |
-
creative_lvl=creative_level,
|
487 |
-
num_con=num_concepts
|
488 |
)
|
489 |
if result:
|
490 |
-
st.markdown("## ๐ ์์ฑ๋ ์นดํผ")
|
491 |
-
st.markdown("---")
|
492 |
st.info(f"๐ **๊ฒ์ ์ ๋ณด**: {result['search_info']['total_candidates']:,}๊ฐ ํ๋ณด์์ "
|
493 |
f"{result['search_info']['selected_references']}๊ฐ ์ฐธ๊ณ ์นดํผ ์ ๋ณ")
|
494 |
if show_references and result['references']:
|
@@ -496,46 +423,49 @@ if st.button("๐ ์นดํผ ์์ฑํ๊ธฐ", type="primary", use_container_width=Tru
|
|
496 |
for i, ref in enumerate(result['references'], 1):
|
497 |
st.markdown(f"**{i}.** \"{ref['copy']}\"")
|
498 |
st.markdown(f" - ๋ธ๋๋: {ref['brand']}")
|
499 |
-
st.markdown(f" - ์ ์ฌ๋: {ref['similarity']:.3f}")
|
500 |
-
|
501 |
-
st.markdown("### โจ AI๊ฐ ์์ฑํ ์นดํผ:")
|
502 |
-
st.markdown(result['generated_content'])
|
503 |
try:
|
504 |
result_json = json.dumps({
|
505 |
-
'timestamp': datetime.now().isoformat(),
|
506 |
-
'
|
507 |
-
'
|
508 |
-
'search_info': result['search_info'],
|
509 |
-
'generated_content': result['generated_content'],
|
510 |
-
'references': result['references'] # ์ฐธ๊ณ ์นดํผ๋ JSON์ ํฌํจ
|
511 |
}, ensure_ascii=False, indent=2)
|
512 |
st.download_button(
|
513 |
-
label="๐พ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋ (JSON)",
|
514 |
-
data=result_json,
|
515 |
file_name=f"copy_result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
|
516 |
-
mime="application/json",
|
517 |
-
key="download_button"
|
518 |
)
|
519 |
-
except Exception as e_json:
|
520 |
-
st.error(f"โ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋ ํ์ผ ์์ฑ ์คํจ: {e_json}")
|
521 |
-
|
522 |
|
523 |
# ์์คํ
์ ๋ณด (์ฌ์ด๋๋ฐ ํ๋จ)
|
524 |
-
st.sidebar.markdown("---")
|
525 |
-
st.sidebar.markdown("### ๐ RAG ์์คํ
์ ๋ณด")
|
526 |
if loaded_df is not None and loaded_embeddings is not None:
|
527 |
st.sidebar.markdown(f"**์นดํผ ๋ฐ์ดํฐ**: {len(loaded_df):,}๊ฐ")
|
528 |
st.sidebar.markdown(f"**์นดํ
๊ณ ๋ฆฌ**: {loaded_df['์นดํ
๊ณ ๋ฆฌ'].nunique()}๊ฐ")
|
529 |
st.sidebar.markdown(f"**๋ธ๋๋**: {loaded_df['๋ธ๋๋'].nunique()}๊ฐ")
|
530 |
-
st.sidebar.markdown(f"**์๋ฒ ๋ฉ**: {loaded_embeddings.shape[1]}์ฐจ์")
|
531 |
-
st.sidebar.markdown("**๊ฒ์ ์์ง**: Korean SBERT")
|
532 |
-
st.sidebar.markdown("**ํธ์คํ
**: ๐ค Hugging Face")
|
533 |
|
534 |
# ์ฌ์ฉ๋ฒ ๊ฐ์ด๋
|
535 |
with st.expander("๐ก RAG ์์คํ
์ฌ์ฉ๋ฒ ๊ฐ์ด๋"):
|
536 |
st.markdown("""
|
537 |
### ๐ฏ ํจ๊ณผ์ ์ธ ์ฌ์ฉ๋ฒ
|
538 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
""")
|
540 |
|
541 |
# ํธํฐ
|
@@ -546,12 +476,18 @@ st.markdown(
|
|
546 |
)
|
547 |
|
548 |
# ์ฑ๋ฅ ๋ชจ๋ํฐ๋ง (๊ฐ๋ฐ์์ฉ)
|
549 |
-
if os.getenv("DEBUG_MODE") == "true":
|
550 |
st.sidebar.markdown("### ๐ง ๋๋ฒ๊ทธ ์ ๋ณด (ํ์ฑํ๋จ)")
|
551 |
-
if 'loaded_embeddings' in locals() and loaded_embeddings is not None:
|
552 |
st.sidebar.write(f"์๋ฒ ๋ฉ ๋ฉ๋ชจ๋ฆฌ: {loaded_embeddings.nbytes / (1024*1024):.1f}MB")
|
553 |
st.sidebar.write(f"Streamlit ๋ฒ์ : {st.__version__}")
|
554 |
st.sidebar.write(f"Pandas ๋ฒ์ : {pd.__version__}")
|
555 |
-
|
556 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
st.sidebar.write(f"google-generativeai ๋ฒ์ : {genai.__version__}")
|
|
|
51 |
st.sidebar.markdown("### โ๏ธ ๋ฐํ์ ํ๊ฒฝ ์ ๋ณด (๋๋ฒ๊น
์ฉ)")
|
52 |
st.sidebar.text(f"Py Exec: {sys.executable}")
|
53 |
st.sidebar.text(f"Py Ver: {sys.version.split()[0]}") # ๊ฐ๋ตํ๊ฒ ๋ฒ์ ๋ง
|
|
|
54 |
st.sidebar.text(f"PYTHONPATH: {os.environ.get('PYTHONPATH', 'Not Set')}")
|
55 |
try:
|
|
|
56 |
import numpy as np_runtime_check
|
57 |
st.sidebar.text(f"NumPy Ver (Runtime): {np_runtime_check.__version__}")
|
|
|
58 |
import numpy.core._multiarray_umath
|
59 |
st.sidebar.markdown("โ
NumPy core modules imported (Runtime)")
|
60 |
except Exception as e:
|
|
|
72 |
|
73 |
# 2. st.sidebar.text_input์ ์ฌ์ฉํ์ฌ ์ฌ์ฉ์์๊ฒ API ํค๋ฅผ ์
๋ ฅ๋ฐ๊ฑฐ๋,
|
74 |
# ํ๊ฒฝ๋ณ์์์ ๊ฐ์ ธ์จ ๊ฐ์ ๊ธฐ๋ณธ๊ฐ์ผ๋ก ๋ณด์ฌ์ค
|
75 |
+
api_key_value = st.sidebar.text_input(
|
76 |
"๐ Gemini API ํค",
|
77 |
+
value=default_api_key,
|
78 |
type="password",
|
79 |
help="ํ๊ฒฝ๋ณ์์ GEMINI_API_KEY๋ก ์ค์ ํ๋ฉด ์๋ ์
๋ ฅ๋ฉ๋๋ค",
|
80 |
+
key="gemini_api_key_input"
|
81 |
)
|
82 |
|
83 |
# 3. ์ฌ์ฉ์๊ฐ ์
๋ ฅํ๊ฑฐ๋ ํ๊ฒฝ๋ณ์์์ ๊ฐ์ ธ์จ API ํค ๊ฐ์ ์ฌ์ฉ
|
84 |
+
if not api_key_value:
|
85 |
st.warning("โ ๏ธ Gemini API ํค๋ฅผ ์
๋ ฅํด์ฃผ์ธ์")
|
86 |
st.info("๐ก Settings โ Repository secrets์์ GEMINI_API_KEY๋ฅผ ์ค์ ํ์ธ์")
|
87 |
st.stop()
|
|
|
92 |
def load_system():
|
93 |
"""์์คํ
์ปดํฌ๋ํธ ๋ก๋ฉ - ์๋ฒ ๋ฉ ๊ธฐ๋ฐ RAG ์์คํ
"""
|
94 |
|
|
|
95 |
st.write("--- load_system() ์์ ---")
|
96 |
st.write(f"Python Executable (load_system): {sys.executable}")
|
97 |
st.write(f"Python Version (load_system): {sys.version}")
|
|
|
98 |
st.write(f"PYTHONPATH (load_system): {os.environ.get('PYTHONPATH')}")
|
99 |
try:
|
100 |
+
import numpy as np_load_system_check
|
101 |
st.write(f"NumPy version (load_system start): {np_load_system_check.__version__}")
|
102 |
import numpy.core._multiarray_umath
|
103 |
st.write("load_system start: Successfully imported numpy.core._multiarray_umath")
|
104 |
except Exception as e:
|
105 |
st.write(f"load_system start: Error importing NumPy parts: {e}")
|
|
|
106 |
|
107 |
progress_container = st.container()
|
108 |
|
109 |
with progress_container:
|
|
|
110 |
total_progress = st.progress(0)
|
111 |
status_text = st.empty()
|
112 |
|
|
|
113 |
status_text.text("๐ Gemini API ์ด๊ธฐํ ์ค...")
|
114 |
try:
|
115 |
+
# ์ ์ญ ๋ณ์ api_key_value๋ฅผ ๋ช
์์ ์ผ๋ก ์ฌ์ฉ
|
116 |
+
genai.configure(api_key=api_key_value)
|
117 |
+
model_llm = genai.GenerativeModel('gemini-1.5-flash')
|
118 |
total_progress.progress(10)
|
119 |
st.success("โ
Gemini API ์ค์ ์๋ฃ")
|
120 |
except Exception as e:
|
121 |
st.error(f"โ Gemini API ์ค์ ์คํจ: {e}")
|
122 |
return None, None, None, None
|
123 |
|
|
|
124 |
status_text.text("๐ค ํ๊ตญ์ด ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์ค... (1-2๋ถ ์์)")
|
125 |
+
embedding_model_instance = None
|
|
|
126 |
try:
|
|
|
127 |
from sentence_transformers import SentenceTransformer
|
|
|
|
|
128 |
embedding_model_instance = SentenceTransformer('jhgan/ko-sbert-nli',
|
129 |
+
cache_folder=SENTENCE_TRANSFORMERS_HOME_DIR)
|
130 |
total_progress.progress(40)
|
131 |
st.success("โ
ํ๊ตญ์ด ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ")
|
|
|
132 |
except Exception as e:
|
133 |
st.error(f"โ ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์คํจ: {e}")
|
134 |
st.error("๐จ ์๋ฒ ๋ฉ ๋ชจ๋ธ ์์ด๋ RAG ์์คํ
์ด ์๋ํ ์ ์์ต๋๋ค!")
|
135 |
return None, None, None, None
|
136 |
|
|
|
137 |
status_text.text("๐ ์นดํผ ๋ฐ์ดํฐ๋ฒ ์ด์ค ๋ก๋ฉ ์ค...")
|
138 |
+
df_data = None
|
139 |
try:
|
140 |
df_data = pd.read_excel('๊ด๊ณ ์นดํผ๋ฐ์ดํฐ_๋ธ๋๋์ถ์ถ์๋ฃ.xlsx')
|
141 |
total_progress.progress(60)
|
|
|
144 |
st.error(f"โ ๋ฐ์ดํฐ ๋ก๋ฉ ์คํจ: {e}")
|
145 |
return None, None, None, None
|
146 |
|
|
|
147 |
status_text.text("๐ ๋ฒกํฐ ์๋ฒ ๋ฉ ๋ก๋ฉ ์ค... (RAG ์์คํ
ํต์ฌ)")
|
148 |
+
embeddings_array = None
|
149 |
try:
|
150 |
+
import numpy as np_pickle_check
|
|
|
151 |
st.write(f"[DEBUG] NumPy version just before pickle.load: {np_pickle_check.__version__}")
|
152 |
import numpy.core._multiarray_umath
|
153 |
st.write("[DEBUG] Successfully imported numpy.core._multiarray_umath before pickle.load")
|
|
|
154 |
|
155 |
with open('copy_embeddings.pkl', 'rb') as f:
|
156 |
embeddings_data = pickle.load(f)
|
157 |
embeddings_array = embeddings_data['embeddings']
|
158 |
total_progress.progress(90)
|
159 |
st.success(f"โ
์๋ฒ ๋ฉ ๋ก๋ฉ ์๋ฃ: {embeddings_array.shape[0]:,}๊ฐ ร {embeddings_array.shape[1]}์ฐจ์")
|
160 |
+
except ModuleNotFoundError as mnfe:
|
161 |
st.error(f"โ ์๋ฒ ๋ฉ ๋ก๋ฉ ์คํจ (ModuleNotFoundError): {mnfe}")
|
162 |
st.error(f"๐จ ํด๋น ๋ชจ๋์ ์ฐพ์ ์ ์์ต๋๋ค. sys.path: {sys.path}")
|
163 |
st.error("๐จ ์๋ฒ ๋ฉ ์์ด๋ ์๋ฏธ์ ๊ฒ์์ด ๋ถ๊ฐ๋ฅํฉ๋๋ค!")
|
|
|
164 |
try:
|
165 |
import numpy as np_final_check
|
166 |
st.error(f"[DEBUG] NumPy object at failure: {np_final_check}")
|
|
|
173 |
st.error("๐จ ์๋ฒ ๋ฉ ์์ด๋ ์๋ฏธ์ ๊ฒ์์ด ๋ถ๊ฐ๋ฅํฉ๋๋ค!")
|
174 |
return None, None, None, None
|
175 |
|
|
|
176 |
status_text.text("โจ ์์คํ
๊ฒ์ฆ ์ค...")
|
177 |
if model_llm and embedding_model_instance and df_data is not None and embeddings_array is not None:
|
178 |
total_progress.progress(100)
|
179 |
status_text.text("๐ RAG ์์คํ
๋ก๋ฉ ์๋ฃ!")
|
|
|
180 |
success_col1, success_col2, success_col3 = st.columns(3)
|
181 |
with success_col1:
|
182 |
st.metric("์นดํผ ๋ฐ์ดํฐ", f"{len(df_data):,}๊ฐ")
|
|
|
184 |
st.metric("์๋ฒ ๋ฉ ์ฐจ์", f"{embeddings_array.shape[1]}D")
|
185 |
with success_col3:
|
186 |
st.metric("๊ฒ์ ์์ง", "Korean SBERT")
|
|
|
187 |
time.sleep(1)
|
188 |
total_progress.empty()
|
189 |
status_text.empty()
|
|
|
|
|
190 |
return model_llm, embedding_model_instance, df_data, embeddings_array
|
191 |
else:
|
192 |
st.error("โ ์์คํ
๋ก๋ฉ ์คํจ: ํ์ ๊ตฌ์ฑ์์ ๋๋ฝ")
|
193 |
return None, None, None, None
|
194 |
|
195 |
+
# ์์คํ
๋ก๋ฉ
|
196 |
loaded_model, loaded_embedding_model, loaded_df, loaded_embeddings = None, None, None, None
|
197 |
with st.spinner("๐ AI ์นดํผ๋ผ์ดํฐ ์์คํ
์ด๊ธฐํ ์ค..."):
|
198 |
loaded_model, loaded_embedding_model, loaded_df, loaded_embeddings = load_system()
|
|
|
201 |
st.error("โ ์์คํ
์ ๋ก๋ฉํ ์ ์์ต๋๋ค. ํ์ด์ง๋ฅผ ์๋ก๊ณ ์นจํ๊ฑฐ๋ ๊ด๋ฆฌ์์๊ฒ ๋ฌธ์ํ์ธ์.")
|
202 |
st.stop()
|
203 |
|
204 |
+
# ์ดํ UI ๋ฐ ์นดํผ ์์ฑ ๋ก์ง (์ด์ ๊ณผ ๋์ผํ๊ฒ ์ ์ง)
|
205 |
# ์ฌ์ด๋๋ฐ ์ค์ (์์คํ
๋ก๋ฉ ์ฑ๊ณต ํ)
|
206 |
st.sidebar.success("๐ RAG ์์คํ
์ค๋น ์๋ฃ!")
|
207 |
|
|
|
210 |
selected_category = st.sidebar.selectbox(
|
211 |
"๐ ์นดํ
๊ณ ๋ฆฌ",
|
212 |
categories,
|
213 |
+
help="ํน์ ์นดํ
๊ณ ๋ฆฌ๋ก ๊ฒ์์ ์ ํํ ์ ์์ต๋๋ค",
|
214 |
+
key="category_selectbox" # ํค ์ถ๊ฐ
|
215 |
)
|
216 |
|
217 |
# ํ๊ฒ ๊ณ ๊ฐ ์ค์
|
218 |
target_audience = st.sidebar.selectbox(
|
219 |
"๐ฏ ํ๊ฒ ๊ณ ๊ฐ",
|
220 |
['20๋', '30๋', '์ผ๋ฐ', '10๋', '40๋', '50๋+', '๋จ์ฑ', '์ฌ์ฑ', '์ง์ฅ์ธ', 'ํ์', '์ฃผ๋ถ'],
|
221 |
+
help="ํ๊ฒ ๊ณ ๊ฐ์ ๋ง๋ ํค์ค๋งค๋๋ก ์นดํผ๋ฅผ ์์ฑํฉ๋๋ค",
|
222 |
+
key="target_audience_selectbox" # ํค ์ถ๊ฐ
|
223 |
)
|
224 |
|
225 |
# ๋ธ๋๋ ํค์ค๋งค๋
|
226 |
brand_tone = st.sidebar.selectbox(
|
227 |
"๐จ ๋ธ๋๋ ํค",
|
228 |
['์ธ๋ จ๋', '์น๊ทผํ', '๊ณ ๊ธ์ค๋ฌ์ด', 'ํ๊ธฐ์ฐฌ', '์ ๋ขฐํ ์ ์๋', '์ ์', '๋ฐ๋ปํ', '์ ๋ฌธ์ ์ธ'],
|
229 |
+
help="์ํ๋ ๋ธ๋๋ ์ด๋ฏธ์ง๋ฅผ ์ ํํ์ธ์",
|
230 |
+
key="brand_tone_selectbox" # ํค ์ถ๊ฐ
|
231 |
)
|
232 |
|
233 |
# ์ฐฝ์์ฑ ์์ค
|
|
|
235 |
"๐ง ์ฐฝ์์ฑ ์์ค",
|
236 |
options=['๋ณด์์ ', '๊ท ํ', '์ฐฝ์์ '],
|
237 |
value='๊ท ํ',
|
238 |
+
help="๋ณด์์ : ์์ ํ ํํ, ์ฐฝ์์ : ๋
์ฐฝ์ ํํ",
|
239 |
+
key="creative_level_slider" # ํค ์ถ๊ฐ
|
240 |
)
|
241 |
|
242 |
# ๋ฉ์ธ ์
๋ ฅ ์์ญ
|
|
|
247 |
"์
๋ ฅ ๋ฐฉ์ ์ ํ:",
|
248 |
["์ง์ ์
๋ ฅ", "ํ
ํ๋ฆฟ ์ ํ"],
|
249 |
horizontal=True,
|
250 |
+
key="input_method_radio"
|
251 |
)
|
252 |
|
253 |
if input_method == "์ง์ ์
๋ ฅ":
|
|
|
255 |
"์นดํผ ์์ฒญ์ ์์ธํ ์์ฑํด์ฃผ์ธ์:",
|
256 |
placeholder="์: 30๋ ์ง์ฅ ์ฌ์ฑ์ฉ ํ๋ฆฌ๋ฏธ์ ์คํจ์ผ์ด ์ ์ ํ ๋ฐ์นญ ์นดํผ",
|
257 |
height=100,
|
258 |
+
key="user_request_direct"
|
259 |
)
|
260 |
else:
|
261 |
templates = {
|
|
|
287 |
|
288 |
# ๊ณ ๊ธ ์ต์
|
289 |
with st.expander("๐ง ๊ณ ๊ธ ์ต์
"):
|
290 |
+
col1_adv, col2_adv = st.columns(2)
|
291 |
with col1_adv:
|
292 |
num_concepts = st.slider("์์ฑํ ์ปจ์
์:", 1, 5, 3, key="num_concepts_slider")
|
293 |
min_similarity = st.slider("์ต์ ์ ์ฌ๋:", 0.0, 1.0, 0.3, 0.1, key="min_similarity_slider")
|
|
|
296 |
num_references = st.slider("์ฐธ๊ณ ์นดํผ ์:", 3, 10, 5, key="num_references_slider")
|
297 |
|
298 |
# RAG ์นดํผ ์์ฑ ํจ์ (์๋ฒ ๋ฉ ๊ธฐ๋ฐ ํ์!)
|
299 |
+
def generate_copy_with_rag(user_req, category_filter, target_aud, brand_tn, creative_lvl, num_con):
|
|
|
300 |
if not user_req.strip():
|
301 |
st.error("โ ์นดํผ ์์ฒญ์ ์
๋ ฅํด์ฃผ์ธ์")
|
302 |
return None
|
303 |
|
304 |
progress_bar = st.progress(0)
|
305 |
+
status_text_gen = st.empty()
|
306 |
|
307 |
status_text_gen.text("๐ ์๋ฏธ์ ๊ฒ์ ์ค... (RAG ํต์ฌ ๊ธฐ๋ฅ)")
|
308 |
progress_bar.progress(20)
|
309 |
|
310 |
try:
|
311 |
search_query = f"{user_req} {target_aud} ๊ด๊ณ ์นดํผ"
|
312 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
313 |
+
query_embedding = loaded_embedding_model.encode([search_query])
|
314 |
|
315 |
if category_filter != '์ ์ฒด':
|
316 |
+
filtered_df_gen = loaded_df[loaded_df['์นดํ
๊ณ ๋ฆฌ'] == category_filter].copy()
|
317 |
else:
|
318 |
+
filtered_df_gen = loaded_df.copy()
|
319 |
|
320 |
progress_bar.progress(40)
|
321 |
|
322 |
if filtered_df_gen.empty:
|
323 |
st.warning(f"โ ๏ธ ์ ํํ์ ์นดํ
๊ณ ๋ฆฌ '{category_filter}'์ ํด๋นํ๋ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
|
324 |
+
progress_bar.empty(); status_text_gen.empty(); return None
|
|
|
|
|
|
|
325 |
|
326 |
filtered_indices = filtered_df_gen.index.tolist()
|
|
|
327 |
valid_indices_for_embedding = [idx for idx in filtered_indices if idx < len(loaded_embeddings)]
|
328 |
if not valid_indices_for_embedding:
|
329 |
st.warning(f"โ ๏ธ ์ ํจํ ์ธ๋ฑ์ค๋ฅผ ์ฐพ์ ์ ์์ด ์ ์ฌ๋ ๊ฒ์์ ์งํํ ์ ์์ต๋๋ค. (์นดํ
๊ณ ๋ฆฌ: {category_filter})")
|
330 |
+
progress_bar.empty(); status_text_gen.empty(); return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
|
332 |
filtered_embeddings_for_search = loaded_embeddings[valid_indices_for_embedding]
|
|
|
333 |
if query_embedding.shape[1] != filtered_embeddings_for_search.shape[1]:
|
334 |
st.error(f"โ ์๋ฒ ๋ฉ ์ฐจ์ ๋ถ์ผ์น: ์ฟผ๋ฆฌ({query_embedding.shape[1]}D), ๋ฌธ์({filtered_embeddings_for_search.shape[1]}D)")
|
335 |
return None
|
336 |
|
|
|
337 |
similarities = cosine_similarity(query_embedding, filtered_embeddings_for_search)[0]
|
|
|
|
|
|
|
|
|
338 |
num_to_select = min(num_references, len(similarities))
|
339 |
+
# numpy๋ฅผ ์ฌ๊ธฐ์ ๋ค์ ์ํฌํธํ์ฌ ์ฌ์ฉ (np ๋ณ์นญ ์ฌ์ฉ)
|
340 |
+
import numpy as np_generate_rag
|
341 |
+
top_similarity_indices = np_generate_rag.argsort(similarities)[::-1][:num_to_select]
|
342 |
|
343 |
reference_copies = []
|
344 |
for i in top_similarity_indices:
|
|
|
|
|
345 |
original_df_idx = valid_indices_for_embedding[i]
|
346 |
+
row = loaded_df.iloc[original_df_idx]
|
347 |
if similarities[i] >= min_similarity:
|
348 |
reference_copies.append({
|
349 |
'copy': row['์นดํผ ๋ด์ฉ'],
|
350 |
'brand': row['๋ธ๋๋'],
|
351 |
+
'similarity': float(similarities[i])
|
352 |
})
|
353 |
progress_bar.progress(60)
|
354 |
|
355 |
if not reference_copies:
|
|
|
|
|
|
|
|
|
|
|
356 |
references_text_for_prompt = "์ ์ฌ๋ ๋์ ์ฐธ๊ณ ์นดํผ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค."
|
357 |
else:
|
358 |
references_text_for_prompt = "\n".join([
|
|
|
360 |
for j, ref in enumerate(reference_copies)
|
361 |
])
|
362 |
|
|
|
363 |
status_text_gen.text("๐ค AI ์นดํผ ์์ฑ ์ค...")
|
364 |
progress_bar.progress(80)
|
|
|
365 |
creativity_guidance = {
|
366 |
+
"๋ณด์์ ": "์์ ํ๊ณ ๊ฒ์ฆ๋ ํํ์ ์ฌ์ฉํ์ฌ", "๊ท ํ": "์ฐฝ์์ ์ด๋ฉด์๋ ์ ์ ํ ์์ค์์",
|
|
|
367 |
"์ฐฝ์์ ": "๋
์ฐฝ์ ์ด๊ณ ํ์ ์ ์ธ ํํ์ผ๋ก"
|
368 |
}
|
369 |
prompt = f"""
|
370 |
๋น์ ์ ํ๊ตญ์ ์ ๋ฌธ ๊ด๊ณ ์นดํผ๋ผ์ดํฐ์
๋๋ค.
|
|
|
371 |
**์์ฒญ์ฌํญ:** {user_req}
|
372 |
**ํ๊ฒ ๊ณ ๊ฐ:** {target_aud}
|
373 |
**๋ธ๋๋ ํค:** {brand_tn}
|
374 |
**์ฐฝ์์ฑ ์์ค:** {creative_lvl} ({creativity_guidance[creative_lvl]})
|
|
|
375 |
**์ฐธ๊ณ ์นดํผ๋ค (์๋ฏธ์ ์ ์ฌ๋ ๊ธฐ๋ฐ ์ ๋ณ):**
|
376 |
{references_text_for_prompt}
|
|
|
377 |
**์์ฑ ๊ฐ์ด๋๋ผ์ธ:**
|
378 |
1. ์ ์ฐธ๊ณ ์นดํผ๋ค์ ์คํ์ผ๊ณผ ํค์ ๋ถ์ํ๊ณ , ์์ฒญ์ฌํญ์ ๋ง์ถฐ ์๋ก์ด ์นดํผ {num_con}๊ฐ๋ฅผ ์์ฑํด์ฃผ์ธ์.
|
379 |
2. ๋ง์ฝ ์ฐธ๊ณ ์นดํผ๊ฐ ์๋ค๋ฉด, ์์ฒญ์ฌํญ๊ณผ ํ๊ฒ ๊ณ ๊ฐ, ๋ธ๋๋ ํค, ์ฐฝ์์ฑ ์์ค์๋ง ์ง์คํ์ฌ ์์ฑํด์ฃผ์ธ์.
|
380 |
3. ๊ฐ ์นดํผ๋ ํ๊ตญ์ด๋ก ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ด์ด์ผ ํฉ๋๋ค.
|
381 |
4. {target_aud}์๊ฒ ์ดํํ ์ ์๋ ํํ์ ์ฌ์ฉํด์ฃผ์ธ์.
|
382 |
5. {brand_tn} ํค์ค๋งค๋๋ฅผ ์ ์งํด์ฃผ์ธ์.
|
|
|
383 |
**์ถ๋ ฅ ํ์ (๊ฐ ์นดํผ์ ๊ฐ๋จํ ์ค๋ช
ํฌํจ):**
|
384 |
1. [์์ฑ๋ ์นดํผ 1]
|
385 |
- ์ค๋ช
: (์ด ์นดํผ๊ฐ ์ ํจ๊ณผ์ ์ธ์ง ๋๋ ์ด๋ค ์๋๋ก ์์ฑ๋์๋์ง)
|
|
|
|
|
|
|
386 |
... (์์ฒญํ ์ปจ์
์๋งํผ ๋ฐ๋ณต)
|
|
|
387 |
**์ถ์ฒ ์นดํผ:** (์ ์์ฑ๋ ์นดํผ ์ค ๊ฐ์ฅ ์ถ์ฒํ๋ ๊ฒ ํ๋์ ๊ทธ ์ด์ )
|
388 |
"""
|
389 |
response = loaded_model.generate_content(prompt)
|
390 |
+
progress_bar.progress(100); status_text_gen.text("โ
์๋ฃ!"); time.sleep(0.5)
|
391 |
+
progress_bar.empty(); status_text_gen.empty()
|
|
|
|
|
|
|
|
|
392 |
return {
|
393 |
+
'references': reference_copies, 'generated_content': response.text,
|
|
|
394 |
'search_info': {
|
395 |
+
'query': search_query, 'total_candidates': len(filtered_df_gen),
|
|
|
396 |
'selected_references': len(reference_copies)
|
397 |
},
|
398 |
'settings': {
|
399 |
+
'category': category_filter, 'target': target_aud,
|
400 |
+
'tone': brand_tn, 'creative': creative_lvl
|
|
|
|
|
401 |
}
|
402 |
}
|
403 |
except Exception as e_gen:
|
404 |
+
st.error(f"โ ์นดํผ ์์ฑ ์คํจ: {e_gen}"); st.error(f"์ค๋ฅ ํ์
: {type(e_gen)}")
|
405 |
+
import traceback; st.error(traceback.format_exc())
|
406 |
+
progress_bar.empty(); status_text_gen.empty(); return None
|
|
|
|
|
|
|
|
|
407 |
|
408 |
# ์์ฑ ๋ฒํผ
|
409 |
if st.button("๐ ์นดํผ ์์ฑํ๊ธฐ", type="primary", use_container_width=True, key="generate_button"):
|
|
|
411 |
st.error("โ ์นดํผ ์์ฒญ์ ์
๋ ฅํด์ฃผ์ธ์")
|
412 |
else:
|
413 |
result = generate_copy_with_rag(
|
414 |
+
user_req=user_request, category_filter=selected_category, target_aud=target_audience,
|
415 |
+
brand_tn=brand_tone, creative_lvl=creative_level, num_con=num_concepts
|
|
|
|
|
|
|
|
|
416 |
)
|
417 |
if result:
|
418 |
+
st.markdown("## ๐ ์์ฑ๋ ์นดํผ"); st.markdown("---")
|
|
|
419 |
st.info(f"๐ **๊ฒ์ ์ ๋ณด**: {result['search_info']['total_candidates']:,}๊ฐ ํ๋ณด์์ "
|
420 |
f"{result['search_info']['selected_references']}๊ฐ ์ฐธ๊ณ ์นดํผ ์ ๋ณ")
|
421 |
if show_references and result['references']:
|
|
|
423 |
for i, ref in enumerate(result['references'], 1):
|
424 |
st.markdown(f"**{i}.** \"{ref['copy']}\"")
|
425 |
st.markdown(f" - ๋ธ๋๋: {ref['brand']}")
|
426 |
+
st.markdown(f" - ์ ์ฌ๋: {ref['similarity']:.3f}"); st.markdown("")
|
427 |
+
st.markdown("### โจ AI๊ฐ ์์ฑํ ์นดํผ:"); st.markdown(result['generated_content'])
|
|
|
|
|
428 |
try:
|
429 |
result_json = json.dumps({
|
430 |
+
'timestamp': datetime.now().isoformat(), 'request': user_request,
|
431 |
+
'settings': result['settings'], 'search_info': result['search_info'],
|
432 |
+
'generated_content': result['generated_content'], 'references': result['references']
|
|
|
|
|
|
|
433 |
}, ensure_ascii=False, indent=2)
|
434 |
st.download_button(
|
435 |
+
label="๐พ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋ (JSON)", data=result_json,
|
|
|
436 |
file_name=f"copy_result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
|
437 |
+
mime="application/json", key="download_button"
|
|
|
438 |
)
|
439 |
+
except Exception as e_json: st.error(f"โ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋ ํ์ผ ์์ฑ ์คํจ: {e_json}")
|
|
|
|
|
440 |
|
441 |
# ์์คํ
์ ๋ณด (์ฌ์ด๋๋ฐ ํ๋จ)
|
442 |
+
st.sidebar.markdown("---"); st.sidebar.markdown("### ๐ RAG ์์คํ
์ ๋ณด")
|
|
|
443 |
if loaded_df is not None and loaded_embeddings is not None:
|
444 |
st.sidebar.markdown(f"**์นดํผ ๋ฐ์ดํฐ**: {len(loaded_df):,}๊ฐ")
|
445 |
st.sidebar.markdown(f"**์นดํ
๊ณ ๋ฆฌ**: {loaded_df['์นดํ
๊ณ ๋ฆฌ'].nunique()}๊ฐ")
|
446 |
st.sidebar.markdown(f"**๋ธ๋๋**: {loaded_df['๋ธ๋๋'].nunique()}๊ฐ")
|
447 |
+
st.sidebar.markdown(f"**์๋ฒ ๋ฉ**: {loaded_embeddings.shape[1]}์ฐจ์")
|
448 |
+
st.sidebar.markdown("**๊ฒ์ ์์ง**: Korean SBERT"); st.sidebar.markdown("**ํธ์คํ
**: ๐ค Hugging Face")
|
|
|
449 |
|
450 |
# ์ฌ์ฉ๋ฒ ๊ฐ์ด๋
|
451 |
with st.expander("๐ก RAG ์์คํ
์ฌ์ฉ๋ฒ ๊ฐ์ด๋"):
|
452 |
st.markdown("""
|
453 |
### ๐ฏ ํจ๊ณผ์ ์ธ ์ฌ์ฉ๋ฒ
|
454 |
+
**1. ๊ตฌ์ฒด์ ์ธ ์์ฒญํ๊ธฐ:**
|
455 |
+
- โ "์นดํผ ์จ์ค"
|
456 |
+
- โ
"30๋ ์ง์ฅ ์ฌ์ฑ์ฉ ํ๋ฆฌ๋ฏธ์ ์คํจ์ผ์ด ์ ์ ํ ๋ฐ์นญ ์นดํผ"
|
457 |
+
**2. RAG ์์คํ
์ ์ฅ์ :**
|
458 |
+
- ๐ง **์๋ฏธ์ ๊ฒ์**: ํค์๋๋ฟ๋ง ์๋๋ผ ์๋ฏธ๊น์ง ์ดํด
|
459 |
+
- ๐ฏ **๋ฌธ๋งฅ ๋งค์นญ**: ํ๊ฒ๊ณผ ์ํฉ์ ๋ง๋ ์นดํผ ์๋ ์ ๋ณ
|
460 |
+
- ๐ **๋ฐ์ดํฐ ๊ธฐ๋ฐ**: 37,671๊ฐ ์ค์ ์นดํผ์์ ํ์ตํ ํจํด
|
461 |
+
**3. ์ฐฝ์์ฑ ์กฐ์ :**
|
462 |
+
- **๋ณด์์ **: ์์ ํ ํด๋ผ์ด์ธํธ, ๊ฒ์ฆ๋ ์ ๊ทผ
|
463 |
+
- **๊ท ํ**: ์ผ๋ฐ์ ์ธ ํ๋ก์ ํธ (์ถ์ฒ!)
|
464 |
+
- **์ฐฝ์์ **: ํ์ ์ ๋ธ๋๋, ํ๊ฒฉ์ ์บ ํ์ธ
|
465 |
+
**4. ์ฐธ๊ณ ์นดํผ ํ์ฉ:**
|
466 |
+
- ์์ฑ๋ ์นดํผ์ ์ฐธ๊ณ ์นดํผ๋ฅผ ๋น๊ต ๋ถ์
|
467 |
+
- ํธ๋ ๋์ ํจํด ํ์
๊ฐ๋ฅ
|
468 |
+
- ๊ฒฝ์์ฌ ๋ถ์ ์๋ฃ๋ก ํ์ฉ
|
469 |
""")
|
470 |
|
471 |
# ํธํฐ
|
|
|
476 |
)
|
477 |
|
478 |
# ์ฑ๋ฅ ๋ชจ๋ํฐ๋ง (๊ฐ๋ฐ์์ฉ)
|
479 |
+
if os.getenv("DEBUG_MODE") == "true":
|
480 |
st.sidebar.markdown("### ๐ง ๋๋ฒ๊ทธ ์ ๋ณด (ํ์ฑํ๋จ)")
|
481 |
+
if 'loaded_embeddings' in locals() and loaded_embeddings is not None:
|
482 |
st.sidebar.write(f"์๋ฒ ๋ฉ ๋ฉ๋ชจ๋ฆฌ: {loaded_embeddings.nbytes / (1024*1024):.1f}MB")
|
483 |
st.sidebar.write(f"Streamlit ๋ฒ์ : {st.__version__}")
|
484 |
st.sidebar.write(f"Pandas ๋ฒ์ : {pd.__version__}")
|
485 |
+
# np ๋ณ์นญ์ด ๋ก์ปฌ์์ ์ ์๋์ด ์์ง ์์ ์ ์์ผ๋ฏ๋ก, import๋ numpy ์ฌ์ฉ
|
486 |
+
try:
|
487 |
+
import numpy as np_debug_version
|
488 |
+
st.sidebar.write(f"Numpy ๋ฒ์ (Global): {np_debug_version.__version__}")
|
489 |
+
except ImportError:
|
490 |
+
st.sidebar.write("Numpy ๋ฒ์ (Global): Not imported or error")
|
491 |
+
|
492 |
+
# torch๋ ์ง์ ์ฌ์ฉํ์ง ์์ผ๋ฏ๋ก, sentence_transformers ๋ด๋ถ ๋ฒ์ ์ ์๊ธฐ๋ ์ด๋ ค์
|
493 |
st.sidebar.write(f"google-generativeai ๋ฒ์ : {genai.__version__}")
|