aicopy / streamlit_app.py
parkkyujin's picture
Update streamlit_app.py
0911a8b verified
# ์•ˆ์ •์ ์ธ AI ์นดํ”ผ๋ผ์ดํ„ฐ - ์ž„๋ฒ ๋”ฉ ๊ธฐ๋ฐ˜ RAG ์‹œ์Šคํ…œ
# Hugging Face Spaces ํ™˜๊ฒฝ ์ตœ์ ํ™” ๋ฒ„์ „
import streamlit as st
import pandas as pd
import numpy # ์ „์—ญ์ ์œผ๋กœ numpy๋ฅผ ๋จผ์ € ์ž„ํฌํŠธํ•ด๋ด…๋‹ˆ๋‹ค.
import pickle
import google.generativeai as genai
import time
import json
import os
import sys # ๋””๋ฒ„๊น…์šฉ sys ๋ชจ๋“ˆ ์ž„ํฌํŠธ
from datetime import datetime
# ํ™˜๊ฒฝ ์„ค์ • (๊ถŒํ•œ ๋ฌธ์ œ ํ•ด๊ฒฐ)
os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false'
# ์บ์‹œ ๊ฒฝ๋กœ๋ฅผ /tmp ๋กœ ์„ค์ • (Hugging Face Spaces์—์„œ ๊ถŒ์žฅ๋˜๋Š” ์“ฐ๊ธฐ ๊ฐ€๋Šฅ ๊ฒฝ๋กœ)
TMP_DIR = "/tmp"
TRANSFORMERS_CACHE_DIR = os.path.join(TMP_DIR, '.cache', 'transformers')
SENTENCE_TRANSFORMERS_HOME_DIR = os.path.join(TMP_DIR, '.cache', 'sentence_transformers')
os.environ['TRANSFORMERS_CACHE'] = TRANSFORMERS_CACHE_DIR
os.environ['SENTENCE_TRANSFORMERS_HOME'] = SENTENCE_TRANSFORMERS_HOME_DIR
# ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ (์กด์žฌํ•˜์ง€ ์•Š์œผ๋ฉด) - /tmp ์•„๋ž˜๋Š” ์ผ๋ฐ˜์ ์œผ๋กœ ์ƒ์„ฑ ๊ฐ€๋Šฅ
try:
os.makedirs(TRANSFORMERS_CACHE_DIR, exist_ok=True)
os.makedirs(SENTENCE_TRANSFORMERS_HOME_DIR, exist_ok=True)
except PermissionError:
st.warning(f"โš ๏ธ ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ ๊ถŒํ•œ ์—†์Œ: {TRANSFORMERS_CACHE_DIR} ๋˜๋Š” {SENTENCE_TRANSFORMERS_HOME_DIR}. ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ๊ฐ€ ๋А๋ฆด ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
except Exception as e_mkdir:
st.warning(f"โš ๏ธ ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜: {e_mkdir}")
# ํŽ˜์ด์ง€ ์„ค์ •
st.set_page_config(
page_title="AI ์นดํ”ผ๋ผ์ดํ„ฐ | RAG ๊ธฐ๋ฐ˜ ๊ด‘๊ณ  ์นดํ”ผ ์ƒ์„ฑ",
page_icon="โœจ",
layout="wide",
initial_sidebar_state="expanded"
)
# ์ œ๋ชฉ ๋ฐ ์„ค๋ช…
st.title("โœจ AI ์นดํ”ผ๋ผ์ดํ„ฐ")
st.markdown("### ๐ŸŽฏ 37,671๊ฐœ ์‹ค์ œ ๊ด‘๊ณ  ์นดํ”ผ ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ RAG ์‹œ์Šคํ…œ")
st.markdown("---")
# --- ๋Ÿฐํƒ€์ž„ ํ™˜๊ฒฝ ๋””๋ฒ„๊น… (์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์ตœ์ƒ๋‹จ ๋˜๋Š” load_system ๋ฐ”๋กœ ์ „) ---
# st.sidebar.markdown("---")
# st.sidebar.markdown("### โš™๏ธ ๋Ÿฐํƒ€์ž„ ํ™˜๊ฒฝ ์ •๋ณด (๋””๋ฒ„๊น…์šฉ)")
# st.sidebar.text(f"Py Exec: {sys.executable}")
# st.sidebar.text(f"Py Ver: {sys.version.split()[0]}") # ๊ฐ„๋žตํ•˜๊ฒŒ ๋ฒ„์ „๋งŒ
# st.sidebar.text(f"PYTHONPATH: {os.environ.get('PYTHONPATH', 'Not Set')}")
# try:
# import numpy as np_runtime_check
# st.sidebar.text(f"NumPy Ver (Runtime): {np_runtime_check.__version__}")
# import numpy.core._multiarray_umath
# st.sidebar.markdown("โœ… NumPy core modules imported (Runtime)")
# except Exception as e:
# st.sidebar.error(f"โŒ NumPy import error (Runtime): {e}")
# st.sidebar.markdown("---")
# --- ๋””๋ฒ„๊น… ์ฝ”๋“œ ๋ ---
# ์‚ฌ์ด๋“œ๋ฐ” ์„ค์ •
st.sidebar.header("๐ŸŽ›๏ธ ์นดํ”ผ ์ƒ์„ฑ ์„ค์ •")
# --- API ํ‚ค ์ฒ˜๋ฆฌ ๋ณ€๊ฒฝ ---
# ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค๋ฅผ ์ง์ ‘ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
api_key_value = os.getenv("GEMINI_API_KEY")
# API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ ์•ฑ ์ค‘๋‹จ ๋ฐ ์•ˆ๋‚ด ๋ฉ”์‹œ์ง€ ํ‘œ์‹œ
if not api_key_value:
st.error(" critical: ๐Ÿ”‘ GEMINI_API_KEY ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
st.info("Hugging Face Spaces์˜ 'Settings' -> 'Repository secrets'์—์„œ 'GEMINI_API_KEY'๋ฅผ ์ถ”๊ฐ€ํ•ด์ฃผ์„ธ์š”.")
st.info("์• ํ”Œ๋ฆฌ์ผ€์ด์…˜์ด ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ์ž‘๋™ํ•˜๋ ค๋ฉด API ํ‚ค๊ฐ€ ๋ฐ˜๋“œ์‹œ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
st.stop()
# --- API ํ‚ค ์ฒ˜๋ฆฌ ๋ณ€๊ฒฝ ๋ ---
# ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” (์บ์‹ฑ) - ์ž„๋ฒ ๋”ฉ ํ•„์ˆ˜!
@st.cache_resource(show_spinner=False)
def load_system():
"""์‹œ์Šคํ…œ ์ปดํฌ๋„ŒํŠธ ๋กœ๋”ฉ - ์ž„๋ฒ ๋”ฉ ๊ธฐ๋ฐ˜ RAG ์‹œ์Šคํ…œ"""
#st.write("--- load_system() ์‹œ์ž‘ ---")
#st.write(f"Python Executable (load_system): {sys.executable}")
#st.write(f"Python Version (load_system): {sys.version}")
#st.write(f"PYTHONPATH (load_system): {os.environ.get('PYTHONPATH')}")
#try:
# import numpy as np_load_system_check
# st.write(f"NumPy version (load_system start): {np_load_system_check.__version__}")
# import numpy.core._multiarray_umath
# st.write("load_system start: Successfully imported numpy.core._multiarray_umath")
#except Exception as e:
# st.write(f"load_system start: Error importing NumPy parts: {e}")
progress_container = st.container()
with progress_container:
total_progress = st.progress(0)
status_text = st.empty()
status_text.text("๐Ÿ”‘ Gemini API ์ดˆ๊ธฐํ™” ์ค‘...")
try:
# ์ „์—ญ ๋ณ€์ˆ˜ api_key_value๋ฅผ ๋ช…์‹œ์ ์œผ๋กœ ์‚ฌ์šฉ
genai.configure(api_key=api_key_value)
model_llm = genai.GenerativeModel('gemini-2.5-pro')
total_progress.progress(10)
st.success("โœ… Gemini API ์„ค์ • ์™„๋ฃŒ")
except Exception as e:
st.error(f"โŒ Gemini API ์„ค์ • ์‹คํŒจ: {e}")
return None, None, None, None
status_text.text("๐Ÿค– ํ•œ๊ตญ์–ด ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘... (1-2๋ถ„ ์†Œ์š”)")
embedding_model_instance = None
try:
from sentence_transformers import SentenceTransformer
embedding_model_instance = SentenceTransformer('jhgan/ko-sbert-nli',
cache_folder=SENTENCE_TRANSFORMERS_HOME_DIR)
total_progress.progress(40)
st.success("โœ… ํ•œ๊ตญ์–ด ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ")
except Exception as e:
st.error(f"โŒ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
st.error("๐Ÿšจ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์—†์ด๋Š” RAG ์‹œ์Šคํ…œ์ด ์ž‘๋™ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค!")
return None, None, None, None
status_text.text("๐Ÿ“Š ์นดํ”ผ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ๋กœ๋”ฉ ์ค‘...")
df_data = None
try:
df_data = pd.read_excel('๊ด‘๊ณ ์นดํ”ผ๋ฐ์ดํ„ฐ_๋ธŒ๋žœ๋“œ์ถ”์ถœ์™„๋ฃŒ.xlsx')
total_progress.progress(60)
st.success(f"โœ… ๋ฐ์ดํ„ฐ ๋กœ๋”ฉ ์™„๋ฃŒ: {len(df_data):,}๊ฐœ ์นดํ”ผ")
except Exception as e:
st.error(f"โŒ ๋ฐ์ดํ„ฐ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
return None, None, None, None
status_text.text("๐Ÿ” ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ ๋กœ๋”ฉ ์ค‘... (RAG ์‹œ์Šคํ…œ ํ•ต์‹ฌ)")
embeddings_array = None
try:
#import numpy as np_pickle_check
#st.write(f"[DEBUG] NumPy version just before pickle.load: {np_pickle_check.__version__}")
#import numpy.core._multiarray_umath
#st.write("[DEBUG] Successfully imported numpy.core._multiarray_umath before pickle.load")
with open('copy_embeddings.pkl', 'rb') as f:
embeddings_data = pickle.load(f)
embeddings_array = embeddings_data['embeddings']
total_progress.progress(90)
st.success(f"โœ… ์ž„๋ฒ ๋”ฉ ๋กœ๋”ฉ ์™„๋ฃŒ: {embeddings_array.shape[0]:,}๊ฐœ ร— {embeddings_array.shape[1]}์ฐจ์›")
except ModuleNotFoundError as mnfe:
st.error(f"โŒ ์ž„๋ฒ ๋”ฉ ๋กœ๋”ฉ ์‹คํŒจ (ModuleNotFoundError): {mnfe}")
st.error(f"๐Ÿšจ ํ•ด๋‹น ๋ชจ๋“ˆ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. sys.path: {sys.path}")
st.error("๐Ÿšจ ์ž„๋ฒ ๋”ฉ ์—†์ด๋Š” ์˜๋ฏธ์  ๊ฒ€์ƒ‰์ด ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค!")
try:
import numpy as np_final_check
st.error(f"[DEBUG] NumPy object at failure: {np_final_check}")
st.error(f"[DEBUG] NumPy __file__ at failure: {np_final_check.__file__}")
except Exception as e_np_final:
st.error(f"[DEBUG] Could not even import numpy at failure: {e_np_final}")
return None, None, None, None
except Exception as e:
st.error(f"โŒ ์ž„๋ฒ ๋”ฉ ๋กœ๋”ฉ ์‹คํŒจ (์ผ๋ฐ˜ ์˜ค๋ฅ˜): {e}")
st.error("๐Ÿšจ ์ž„๋ฒ ๋”ฉ ์—†์ด๋Š” ์˜๋ฏธ์  ๊ฒ€์ƒ‰์ด ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค!")
return None, None, None, None
status_text.text("โœจ ์‹œ์Šคํ…œ ๊ฒ€์ฆ ์ค‘...")
if model_llm and embedding_model_instance and df_data is not None and embeddings_array is not None:
total_progress.progress(100)
status_text.text("๐ŸŽ‰ RAG ์‹œ์Šคํ…œ ๋กœ๋”ฉ ์™„๋ฃŒ!")
success_col1, success_col2, success_col3 = st.columns(3)
with success_col1:
st.metric("์นดํ”ผ ๋ฐ์ดํ„ฐ", f"{len(df_data):,}๊ฐœ")
with success_col2:
st.metric("์ž„๋ฒ ๋”ฉ ์ฐจ์›", f"{embeddings_array.shape[1]}D")
with success_col3:
st.metric("๊ฒ€์ƒ‰ ์—”์ง„", "Korean SBERT")
time.sleep(1)
total_progress.empty()
status_text.empty()
return model_llm, embedding_model_instance, df_data, embeddings_array
else:
st.error("โŒ ์‹œ์Šคํ…œ ๋กœ๋”ฉ ์‹คํŒจ: ํ•„์ˆ˜ ๊ตฌ์„ฑ์š”์†Œ ๋ˆ„๋ฝ")
return None, None, None, None
# ์‹œ์Šคํ…œ ๋กœ๋”ฉ
loaded_model, loaded_embedding_model, loaded_df, loaded_embeddings = None, None, None, None
with st.spinner("๐Ÿš€ AI ์นดํ”ผ๋ผ์ดํ„ฐ ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” ์ค‘..."):
loaded_model, loaded_embedding_model, loaded_df, loaded_embeddings = load_system()
if loaded_model is None or loaded_embedding_model is None or loaded_df is None or loaded_embeddings is None:
st.error("โŒ ์‹œ์Šคํ…œ์„ ๋กœ๋”ฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ํŽ˜์ด์ง€๋ฅผ ์ƒˆ๋กœ๊ณ ์นจํ•˜๊ฑฐ๋‚˜ ๊ด€๋ฆฌ์ž์—๊ฒŒ ๋ฌธ์˜ํ•˜์„ธ์š”.")
st.stop()
# ์ดํ•˜ UI ๋ฐ ์นดํ”ผ ์ƒ์„ฑ ๋กœ์ง (์ด์ „๊ณผ ๋™์ผํ•˜๊ฒŒ ์œ ์ง€)
# ์‚ฌ์ด๋“œ๋ฐ” ์„ค์ • (์‹œ์Šคํ…œ ๋กœ๋”ฉ ์„ฑ๊ณต ํ›„)
st.sidebar.success("๐ŸŽ‰ RAG ์‹œ์Šคํ…œ ์ค€๋น„ ์™„๋ฃŒ!")
# ์นดํ…Œ๊ณ ๋ฆฌ ์„ ํƒ
categories = ['์ „์ฒด'] + sorted(loaded_df['์นดํ…Œ๊ณ ๋ฆฌ'].unique().tolist())
selected_category = st.sidebar.selectbox(
"๐Ÿ“‚ ์นดํ…Œ๊ณ ๋ฆฌ",
categories,
help="ํŠน์ • ์นดํ…Œ๊ณ ๋ฆฌ๋กœ ๊ฒ€์ƒ‰์„ ์ œํ•œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค",
key="category_selectbox" # ํ‚ค ์ถ”๊ฐ€
)
# ํƒ€๊ฒŸ ๊ณ ๊ฐ ์„ค์ •
target_audience = st.sidebar.selectbox(
"๐ŸŽฏ ํƒ€๊ฒŸ ๊ณ ๊ฐ",
['20๋Œ€', '30๋Œ€', '์ผ๋ฐ˜', '10๋Œ€', '40๋Œ€', '50๋Œ€+', '๋‚จ์„ฑ', '์—ฌ์„ฑ', '์ง์žฅ์ธ', 'ํ•™์ƒ', '์ฃผ๋ถ€'],
help="ํƒ€๊ฒŸ ๊ณ ๊ฐ์— ๋งž๋Š” ํ†ค์•ค๋งค๋„ˆ๋กœ ์นดํ”ผ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค",
key="target_audience_selectbox" # ํ‚ค ์ถ”๊ฐ€
)
# ๋ธŒ๋žœ๋“œ ํ†ค์•ค๋งค๋„ˆ
brand_tone = st.sidebar.selectbox(
"๐ŸŽจ ๋ธŒ๋žœ๋“œ ํ†ค",
['์„ธ๋ จ๋œ', '์นœ๊ทผํ•œ', '๊ณ ๊ธ‰์Šค๋Ÿฌ์šด', 'ํ™œ๊ธฐ์ฐฌ', '์‹ ๋ขฐํ•  ์ˆ˜ ์žˆ๋Š”', '์ Š์€', '๋”ฐ๋œปํ•œ', '์ „๋ฌธ์ ์ธ'],
help="์›ํ•˜๋Š” ๋ธŒ๋žœ๋“œ ์ด๋ฏธ์ง€๋ฅผ ์„ ํƒํ•˜์„ธ์š”",
key="brand_tone_selectbox" # ํ‚ค ์ถ”๊ฐ€
)
# ์ฐฝ์˜์„ฑ ์ˆ˜์ค€
creative_level = st.sidebar.select_slider(
"๐Ÿง  ์ฐฝ์˜์„ฑ ์ˆ˜์ค€",
options=['๋ณด์ˆ˜์ ', '๊ท ํ˜•', '์ฐฝ์˜์ '],
value='๊ท ํ˜•',
help="๋ณด์ˆ˜์ : ์•ˆ์ „ํ•œ ํ‘œํ˜„, ์ฐฝ์˜์ : ๋…์ฐฝ์  ํ‘œํ˜„",
key="creative_level_slider" # ํ‚ค ์ถ”๊ฐ€
)
# ๋ฉ”์ธ ์ž…๋ ฅ ์˜์—ญ
st.markdown("## ๐Ÿ’ญ ์–ด๋–ค ์นดํ”ผ๋ฅผ ๋งŒ๋“ค๊ณ  ์‹ถ์œผ์‹ ๊ฐ€์š”?")
user_request = "" # ์ดˆ๊ธฐํ™”
input_method = st.radio(
"์ž…๋ ฅ ๋ฐฉ์‹ ์„ ํƒ:",
["์ง์ ‘ ์ž…๋ ฅ", "ํ…œํ”Œ๋ฆฟ ์„ ํƒ"],
horizontal=True,
key="input_method_radio"
)
if input_method == "์ง์ ‘ ์ž…๋ ฅ":
user_request = st.text_area(
"์นดํ”ผ ์š”์ฒญ์„ ์ž์„ธํžˆ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”:",
placeholder="์˜ˆ: 30๋Œ€ ์ง์žฅ ์—ฌ์„ฑ์šฉ ํ”„๋ฆฌ๋ฏธ์—„ ์Šคํ‚จ์ผ€์–ด ์‹ ์ œํ’ˆ ๋Ÿฐ์นญ ์นดํ”ผ",
height=100,
key="user_request_direct"
)
else:
templates = {
"์‹ ์ œํ’ˆ ๋Ÿฐ์นญ": "๋Œ€์ƒ {์นดํ…Œ๊ณ ๋ฆฌ} ์‹ ์ œํ’ˆ ๋Ÿฐ์นญ ์นดํ”ผ",
"ํ• ์ธ ์ด๋ฒคํŠธ": "{์นดํ…Œ๊ณ ๋ฆฌ} ํ• ์ธ ์ด๋ฒคํŠธ ํ”„๋กœ๋ชจ์…˜ ์นดํ”ผ",
"๋ธŒ๋žœ๋“œ ์Šฌ๋กœ๊ฑด": "{์นดํ…Œ๊ณ ๋ฆฌ} ๋ธŒ๋žœ๋“œ์˜ ๋Œ€ํ‘œ ์Šฌ๋กœ๊ฑด",
"์•ฑ/์„œ๋น„์Šค ๋ฆฌ๋‰ด์–ผ": "{์„œ๋น„์Šค๋ช…} ์ƒˆ ๋ฒ„์ „ ์ถœ์‹œ ์นดํ”ผ",
"์‹œ์ฆŒ ํ•œ์ •": "{์‹œ์ฆŒ} ํ•œ์ • {์นดํ…Œ๊ณ ๋ฆฌ} ํŠน๋ณ„ ์—๋””์…˜ ์นดํ”ผ"
}
selected_template = st.selectbox("ํ…œํ”Œ๋ฆฟ ์„ ํƒ:", list(templates.keys()), key="template_selectbox")
template_category = ""
service_name = ""
season = ""
col1, col2 = st.columns(2)
with col1:
template_category = st.text_input("์ œํ’ˆ/์„œ๋น„์Šค:", value="", key="template_category_input")
with col2:
if selected_template == "์•ฑ/์„œ๋น„์Šค ๋ฆฌ๋‰ด์–ผ":
service_name = st.text_input("์„œ๋น„์Šค๋ช…:", placeholder="์˜ˆ: ๋ฐฐ๋‹ฌ์•ฑ, ๊ธˆ์œต์•ฑ", key="template_service_name_input")
user_request = templates[selected_template].format(์„œ๋น„์Šค๋ช…=service_name)
elif selected_template == "์‹œ์ฆŒ ํ•œ์ •":
season = st.selectbox("์‹œ์ฆŒ:", ["๋ด„", "์—ฌ๋ฆ„", "๊ฐ€์„", "๊ฒจ์šธ", "ํฌ๋ฆฌ์Šค๋งˆ์Šค", "์‹ ๋…„"], key="template_season_selectbox")
user_request = templates[selected_template].format(์‹œ์ฆŒ=season, ์นดํ…Œ๊ณ ๋ฆฌ=template_category)
else:
user_request = templates[selected_template].format(์นดํ…Œ๊ณ ๋ฆฌ=template_category)
st.text_area("์ƒ์„ฑ๋œ ์š”์ฒญ:", value=user_request, height=80, disabled=True, key="generated_request_template")
# ๊ณ ๊ธ‰ ์˜ต์…˜
with st.expander("๐Ÿ”ง ๊ณ ๊ธ‰ ์˜ต์…˜"):
col1_adv, col2_adv = st.columns(2)
with col1_adv:
num_concepts = st.slider("์ƒ์„ฑํ•  ์ปจ์…‰ ์ˆ˜:", 1, 5, 3, key="num_concepts_slider")
min_similarity = st.slider("์ตœ์†Œ ์œ ์‚ฌ๋„:", 0.0, 1.0, 0.3, 0.1, key="min_similarity_slider")
with col2_adv:
show_references = st.checkbox("์ฐธ๊ณ  ์นดํ”ผ ๋ณด๊ธฐ", value=True, key="show_references_checkbox")
num_references = st.slider("์ฐธ๊ณ  ์นดํ”ผ ์ˆ˜:", 3, 10, 5, key="num_references_slider")
# RAG ์นดํ”ผ ์ƒ์„ฑ ํ•จ์ˆ˜ (์ž„๋ฒ ๋”ฉ ๊ธฐ๋ฐ˜ ํ•„์ˆ˜!)
def generate_copy_with_rag(user_req, category_filter, target_aud, brand_tn, creative_lvl, num_con):
if not user_req.strip():
st.error("โŒ ์นดํ”ผ ์š”์ฒญ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”")
return None
progress_bar = st.progress(0)
status_text_gen = st.empty()
status_text_gen.text("๐Ÿ” ์˜๋ฏธ์  ๊ฒ€์ƒ‰ ์ค‘... (RAG ํ•ต์‹ฌ ๊ธฐ๋Šฅ)")
progress_bar.progress(20)
try:
search_query = f"{user_req} {target_aud} ๊ด‘๊ณ  ์นดํ”ผ"
from sklearn.metrics.pairwise import cosine_similarity
query_embedding = loaded_embedding_model.encode([search_query])
if category_filter != '์ „์ฒด':
filtered_df_gen = loaded_df[loaded_df['์นดํ…Œ๊ณ ๋ฆฌ'] == category_filter].copy()
else:
filtered_df_gen = loaded_df.copy()
progress_bar.progress(40)
if filtered_df_gen.empty:
st.warning(f"โš ๏ธ ์„ ํƒํ•˜์‹  ์นดํ…Œ๊ณ ๋ฆฌ '{category_filter}'์— ํ•ด๋‹นํ•˜๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
progress_bar.empty(); status_text_gen.empty(); return None
filtered_indices = filtered_df_gen.index.tolist()
valid_indices_for_embedding = [idx for idx in filtered_indices if idx < len(loaded_embeddings)]
if not valid_indices_for_embedding:
st.warning(f"โš ๏ธ ์œ ํšจํ•œ ์ธ๋ฑ์Šค๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์–ด ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰์„ ์ง„ํ–‰ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. (์นดํ…Œ๊ณ ๋ฆฌ: {category_filter})")
progress_bar.empty(); status_text_gen.empty(); return None
filtered_embeddings_for_search = loaded_embeddings[valid_indices_for_embedding]
if query_embedding.shape[1] != filtered_embeddings_for_search.shape[1]:
st.error(f"โŒ ์ž„๋ฒ ๋”ฉ ์ฐจ์› ๋ถˆ์ผ์น˜: ์ฟผ๋ฆฌ({query_embedding.shape[1]}D), ๋ฌธ์„œ({filtered_embeddings_for_search.shape[1]}D)")
return None
similarities = cosine_similarity(query_embedding, filtered_embeddings_for_search)[0]
num_to_select = min(num_references, len(similarities))
# numpy๋ฅผ ์—ฌ๊ธฐ์„œ ๋‹ค์‹œ ์ž„ํฌํŠธํ•˜์—ฌ ์‚ฌ์šฉ (np ๋ณ„์นญ ์‚ฌ์šฉ)
import numpy as np_generate_rag
top_similarity_indices = np_generate_rag.argsort(similarities)[::-1][:num_to_select]
reference_copies = []
for i in top_similarity_indices:
original_df_idx = valid_indices_for_embedding[i]
row = loaded_df.iloc[original_df_idx]
if similarities[i] >= min_similarity:
reference_copies.append({
'copy': row['์นดํ”ผ ๋‚ด์šฉ'],
'brand': row['๋ธŒ๋žœ๋“œ'],
'similarity': float(similarities[i])
})
progress_bar.progress(60)
if not reference_copies:
references_text_for_prompt = "์œ ์‚ฌ๋„ ๋†’์€ ์ฐธ๊ณ  ์นดํ”ผ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."
else:
references_text_for_prompt = "\n".join([
f"{j+1}. \"{ref['copy']}\" - {ref['brand']} (์œ ์‚ฌ๋„: {ref['similarity']:.3f})"
for j, ref in enumerate(reference_copies)
])
status_text_gen.text("๐Ÿค– AI ์นดํ”ผ ์ƒ์„ฑ ์ค‘...")
progress_bar.progress(80)
creativity_guidance = {
"๋ณด์ˆ˜์ ": "์•ˆ์ „ํ•˜๊ณ  ๊ฒ€์ฆ๋œ ํ‘œํ˜„์„ ์‚ฌ์šฉํ•˜์—ฌ", "๊ท ํ˜•": "์ฐฝ์˜์ ์ด๋ฉด์„œ๋„ ์ ์ ˆํ•œ ์ˆ˜์ค€์—์„œ",
"์ฐฝ์˜์ ": "๋…์ฐฝ์ ์ด๊ณ  ํ˜์‹ ์ ์ธ ํ‘œํ˜„์œผ๋กœ"
}
prompt = f"""
๋‹น์‹ ์€ ํ•œ๊ตญ์˜ ์ „๋ฌธ ๊ด‘๊ณ  ์นดํ”ผ๋ผ์ดํ„ฐ์ž…๋‹ˆ๋‹ค.
**์š”์ฒญ์‚ฌํ•ญ:** {user_req}
**ํƒ€๊ฒŸ ๊ณ ๊ฐ:** {target_aud}
**๋ธŒ๋žœ๋“œ ํ†ค:** {brand_tn}
**์ฐฝ์˜์„ฑ ์ˆ˜์ค€:** {creative_lvl} ({creativity_guidance[creative_lvl]})
**์ฐธ๊ณ  ์นดํ”ผ๋“ค (์˜๋ฏธ์  ์œ ์‚ฌ๋„ ๊ธฐ๋ฐ˜ ์„ ๋ณ„):**
{references_text_for_prompt}
**์ž‘์„ฑ ๊ฐ€์ด๋“œ๋ผ์ธ:**
1. ์œ„ ์ฐธ๊ณ  ์นดํ”ผ๋“ค์˜ ์Šคํƒ€์ผ๊ณผ ํ†ค์„ ๋ถ„์„ํ•˜๊ณ , ์š”์ฒญ์‚ฌํ•ญ์— ๋งž์ถฐ ์ƒˆ๋กœ์šด ์นดํ”ผ {num_con}๊ฐœ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.
2. ๋งŒ์•ฝ ์ฐธ๊ณ  ์นดํ”ผ๊ฐ€ ์—†๋‹ค๋ฉด, ์š”์ฒญ์‚ฌํ•ญ๊ณผ ํƒ€๊ฒŸ ๊ณ ๊ฐ, ๋ธŒ๋žœ๋“œ ํ†ค, ์ฐฝ์˜์„ฑ ์ˆ˜์ค€์—๋งŒ ์ง‘์ค‘ํ•˜์—ฌ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.
3. ๊ฐ ์นดํ”ผ๋Š” ํ•œ๊ตญ์–ด๋กœ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
4. {target_aud}์—๊ฒŒ ์–ดํ•„ํ•  ์ˆ˜ ์žˆ๋Š” ํ‘œํ˜„์„ ์‚ฌ์šฉํ•ด์ฃผ์„ธ์š”.
5. {brand_tn} ํ†ค์•ค๋งค๋„ˆ๋ฅผ ์œ ์ง€ํ•ด์ฃผ์„ธ์š”.
**์ถœ๋ ฅ ํ˜•์‹ (๊ฐ ์นดํ”ผ์™€ ๊ฐ„๋‹จํ•œ ์„ค๋ช… ํฌํ•จ):**
1. [์ƒ์„ฑ๋œ ์นดํ”ผ 1]
- ์„ค๋ช…: (์ด ์นดํ”ผ๊ฐ€ ์™œ ํšจ๊ณผ์ ์ธ์ง€ ๋˜๋Š” ์–ด๋–ค ์˜๋„๋กœ ์ž‘์„ฑ๋˜์—ˆ๋Š”์ง€)
... (์š”์ฒญํ•œ ์ปจ์…‰ ์ˆ˜๋งŒํผ ๋ฐ˜๋ณต)
**์ถ”์ฒœ ์นดํ”ผ:** (์œ„ ์ƒ์„ฑ๋œ ์นดํ”ผ ์ค‘ ๊ฐ€์žฅ ์ถ”์ฒœํ•˜๋Š” ๊ฒƒ ํ•˜๋‚˜์™€ ๊ทธ ์ด์œ )
"""
response = loaded_model.generate_content(prompt)
progress_bar.progress(100); status_text_gen.text("โœ… ์™„๋ฃŒ!"); time.sleep(0.5)
progress_bar.empty(); status_text_gen.empty()
return {
'references': reference_copies, 'generated_content': response.text,
'search_info': {
'query': search_query, 'total_candidates': len(filtered_df_gen),
'selected_references': len(reference_copies)
},
'settings': {
'category': category_filter, 'target': target_aud,
'tone': brand_tn, 'creative': creative_lvl
}
}
except Exception as e_gen:
st.error(f"โŒ ์นดํ”ผ ์ƒ์„ฑ ์‹คํŒจ: {e_gen}"); st.error(f"์˜ค๋ฅ˜ ํƒ€์ž…: {type(e_gen)}")
import traceback; st.error(traceback.format_exc())
progress_bar.empty(); status_text_gen.empty(); return None
# ์ƒ์„ฑ ๋ฒ„ํŠผ
if st.button("๐Ÿš€ ์นดํ”ผ ์ƒ์„ฑํ•˜๊ธฐ", type="primary", use_container_width=True, key="generate_button"):
if not user_request or not user_request.strip():
st.error("โŒ ์นดํ”ผ ์š”์ฒญ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”")
else:
result = generate_copy_with_rag(
user_req=user_request, category_filter=selected_category, target_aud=target_audience,
brand_tn=brand_tone, creative_lvl=creative_level, num_con=num_concepts
)
if result:
st.markdown("## ๐ŸŽ‰ ์ƒ์„ฑ๋œ ์นดํ”ผ"); st.markdown("---")
st.info(f"๐Ÿ” **๊ฒ€์ƒ‰ ์ •๋ณด**: {result['search_info']['total_candidates']:,}๊ฐœ ํ›„๋ณด์—์„œ "
f"{result['search_info']['selected_references']}๊ฐœ ์ฐธ๊ณ  ์นดํ”ผ ์„ ๋ณ„")
if show_references and result['references']:
with st.expander("๐Ÿ“š ์ฐธ๊ณ ํ•œ ์นดํ”ผ๋“ค (์˜๋ฏธ์  ์œ ์‚ฌ๋„ ๊ธฐ๋ฐ˜ ์„ ๋ณ„)"):
for i, ref in enumerate(result['references'], 1):
st.markdown(f"**{i}.** \"{ref['copy']}\"")
st.markdown(f" - ๋ธŒ๋žœ๋“œ: {ref['brand']}")
st.markdown(f" - ์œ ์‚ฌ๋„: {ref['similarity']:.3f}"); st.markdown("")
st.markdown("### โœจ AI๊ฐ€ ์ƒ์„ฑํ•œ ์นดํ”ผ:"); st.markdown(result['generated_content'])
try:
result_json = json.dumps({
'timestamp': datetime.now().isoformat(), 'request': user_request,
'settings': result['settings'], 'search_info': result['search_info'],
'generated_content': result['generated_content'], 'references': result['references']
}, ensure_ascii=False, indent=2)
st.download_button(
label="๐Ÿ’พ ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ (JSON)", data=result_json,
file_name=f"copy_result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
mime="application/json", key="download_button"
)
except Exception as e_json: st.error(f"โŒ ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ ํŒŒ์ผ ์ƒ์„ฑ ์‹คํŒจ: {e_json}")
# ์‹œ์Šคํ…œ ์ •๋ณด (์‚ฌ์ด๋“œ๋ฐ” ํ•˜๋‹จ)
st.sidebar.markdown("---"); st.sidebar.markdown("### ๐Ÿ“Š RAG ์‹œ์Šคํ…œ ์ •๋ณด")
if loaded_df is not None and loaded_embeddings is not None:
st.sidebar.markdown(f"**์นดํ”ผ ๋ฐ์ดํ„ฐ**: {len(loaded_df):,}๊ฐœ")
st.sidebar.markdown(f"**์นดํ…Œ๊ณ ๋ฆฌ**: {loaded_df['์นดํ…Œ๊ณ ๋ฆฌ'].nunique()}๊ฐœ")
st.sidebar.markdown(f"**๋ธŒ๋žœ๋“œ**: {loaded_df['๋ธŒ๋žœ๋“œ'].nunique()}๊ฐœ")
st.sidebar.markdown(f"**์ž„๋ฒ ๋”ฉ**: {loaded_embeddings.shape[1]}์ฐจ์›")
st.sidebar.markdown("**๊ฒ€์ƒ‰ ์—”์ง„**: Korean SBERT"); st.sidebar.markdown("**ํ˜ธ์ŠคํŒ…**: ๐Ÿค— Hugging Face")
# ์‚ฌ์šฉ๋ฒ• ๊ฐ€์ด๋“œ
with st.expander("๐Ÿ’ก RAG ์‹œ์Šคํ…œ ์‚ฌ์šฉ๋ฒ• ๊ฐ€์ด๋“œ"):
st.markdown("""
### ๐ŸŽฏ ํšจ๊ณผ์ ์ธ ์‚ฌ์šฉ๋ฒ•
**1. ๊ตฌ์ฒด์ ์ธ ์š”์ฒญํ•˜๊ธฐ:**
- โŒ "์นดํ”ผ ์จ์ค˜"
- โœ… "30๋Œ€ ์ง์žฅ ์—ฌ์„ฑ์šฉ ํ”„๋ฆฌ๋ฏธ์—„ ์Šคํ‚จ์ผ€์–ด ์‹ ์ œํ’ˆ ๋Ÿฐ์นญ ์นดํ”ผ"
**2. RAG ์‹œ์Šคํ…œ์˜ ์žฅ์ :**
- ๐Ÿง  **์˜๋ฏธ์  ๊ฒ€์ƒ‰**: ํ‚ค์›Œ๋“œ๋ฟ๋งŒ ์•„๋‹ˆ๋ผ ์˜๋ฏธ๊นŒ์ง€ ์ดํ•ด
- ๐ŸŽฏ **๋ฌธ๋งฅ ๋งค์นญ**: ํƒ€๊ฒŸ๊ณผ ์ƒํ™ฉ์— ๋งž๋Š” ์นดํ”ผ ์ž๋™ ์„ ๋ณ„
- ๐Ÿ“Š **๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜**: 37,671๊ฐœ ์‹ค์ œ ์นดํ”ผ์—์„œ ํ•™์Šตํ•œ ํŒจํ„ด
**3. ์ฐฝ์˜์„ฑ ์กฐ์ ˆ:**
- **๋ณด์ˆ˜์ **: ์•ˆ์ „ํ•œ ํด๋ผ์ด์–ธํŠธ, ๊ฒ€์ฆ๋œ ์ ‘๊ทผ
- **๊ท ํ˜•**: ์ผ๋ฐ˜์ ์ธ ํ”„๋กœ์ ํŠธ (์ถ”์ฒœ!)
- **์ฐฝ์˜์ **: ํ˜์‹ ์  ๋ธŒ๋žœ๋“œ, ํŒŒ๊ฒฉ์  ์บ ํŽ˜์ธ
**4. ์ฐธ๊ณ  ์นดํ”ผ ํ™œ์šฉ:**
- ์ƒ์„ฑ๋œ ์นดํ”ผ์™€ ์ฐธ๊ณ  ์นดํ”ผ๋ฅผ ๋น„๊ต ๋ถ„์„
- ํŠธ๋ Œ๋“œ์™€ ํŒจํ„ด ํŒŒ์•… ๊ฐ€๋Šฅ
- ๊ฒฝ์Ÿ์‚ฌ ๋ถ„์„ ์ž๋ฃŒ๋กœ ํ™œ์šฉ
""")
# ํ‘ธํ„ฐ
st.markdown("---")
st.markdown(
"๐Ÿ’ก **AI ์นดํ”ผ๋ผ์ดํ„ฐ** | 37,671๊ฐœ ์‹ค์ œ ๊ด‘๊ณ  ์นดํ”ผ ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ | "
"RAG(๊ฒ€์ƒ‰ ์ฆ๊ฐ• ์ƒ์„ฑ) ์‹œ์Šคํ…œ powered by Korean SBERT + Gemini AI"
)
# ์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง (๊ฐœ๋ฐœ์ž์šฉ)
if os.getenv("DEBUG_MODE") == "true":
st.sidebar.markdown("### ๐Ÿ”ง ๋””๋ฒ„๊ทธ ์ •๋ณด (ํ™œ์„ฑํ™”๋จ)")
if 'loaded_embeddings' in locals() and loaded_embeddings is not None:
st.sidebar.write(f"์ž„๋ฒ ๋”ฉ ๋ฉ”๋ชจ๋ฆฌ: {loaded_embeddings.nbytes / (1024*1024):.1f}MB")
st.sidebar.write(f"Streamlit ๋ฒ„์ „: {st.__version__}")
st.sidebar.write(f"Pandas ๋ฒ„์ „: {pd.__version__}")
# np ๋ณ„์นญ์ด ๋กœ์ปฌ์—์„œ ์ •์˜๋˜์–ด ์žˆ์ง€ ์•Š์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ, import๋œ numpy ์‚ฌ์šฉ
try:
import numpy as np_debug_version
st.sidebar.write(f"Numpy ๋ฒ„์ „ (Global): {np_debug_version.__version__}")
except ImportError:
st.sidebar.write("Numpy ๋ฒ„์ „ (Global): Not imported or error")
# torch๋Š” ์ง์ ‘ ์‚ฌ์šฉํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ, sentence_transformers ๋‚ด๋ถ€ ๋ฒ„์ „์„ ์•Œ๊ธฐ๋Š” ์–ด๋ ค์›€
st.sidebar.write(f"google-generativeai ๋ฒ„์ „: {genai.__version__}")