Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from gradio_client import Client
|
|
8 |
import markdown
|
9 |
import tempfile
|
10 |
import base64
|
11 |
-
from
|
12 |
|
13 |
# ๋ก๊น
์ค์
|
14 |
logging.basicConfig(
|
@@ -25,6 +25,9 @@ IMAGE_API_URL = "http://211.233.58.201:7896"
|
|
25 |
# ์ต๋ ํ ํฐ ์ ์ค์ (Claude-3 Sonnet์ ์ต๋ ํ ํฐ ์)
|
26 |
MAX_TOKENS = 7999
|
27 |
|
|
|
|
|
|
|
28 |
def get_system_prompt():
|
29 |
return """
|
30 |
๋น์ ์ ์ ๋ฌธ ๋ธ๋ก๊ทธ ์์ฑ ์ ๋ฌธ๊ฐ์
๋๋ค. ๋ชจ๋ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ ์์ฒญ์ ๋ํด ๋ค์์ 8๋จ๊ณ ํ๋ ์์ํฌ๋ฅผ ์ฒ ์ ํ ๋ฐ๋ฅด๋, ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ธ ๊ธ์ด ๋๋๋ก ์์ฑํด์ผ ํฉ๋๋ค:
|
@@ -141,31 +144,113 @@ def convert_md_to_html(md_text, title="Ginigen Blog"):
|
|
141 |
"""
|
142 |
return html_doc
|
143 |
|
144 |
-
#
|
145 |
-
def
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
-
#
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
def chatbot_interface():
|
171 |
st.title("Ginigen Blog")
|
@@ -186,6 +271,10 @@ def chatbot_interface():
|
|
186 |
if "generate_image" not in st.session_state:
|
187 |
st.session_state.generate_image = False
|
188 |
|
|
|
|
|
|
|
|
|
189 |
# ์ด๋ฏธ์ง API ์ํ
|
190 |
if "image_api_status" not in st.session_state:
|
191 |
st.session_state.image_api_status = test_image_api_connection()
|
@@ -199,6 +288,9 @@ def chatbot_interface():
|
|
199 |
# ์ด๋ฏธ์ง ์์ฑ ํ ๊ธ
|
200 |
st.session_state.generate_image = st.sidebar.toggle("๋ธ๋ก๊ทธ ๊ธ ์์ฑ ํ ์ด๋ฏธ์ง ์๋ ์์ฑ", value=st.session_state.generate_image)
|
201 |
|
|
|
|
|
|
|
202 |
# ์ด๋ฏธ์ง API ์ํ ํ์
|
203 |
st.sidebar.text(st.session_state.image_api_status)
|
204 |
|
@@ -246,7 +338,7 @@ def chatbot_interface():
|
|
246 |
if latest_blog:
|
247 |
st.sidebar.subheader("์ต๊ทผ ๋ธ๋ก๊ทธ ๋ค์ด๋ก๋")
|
248 |
|
249 |
-
col1, col2
|
250 |
|
251 |
# ๋งํฌ๋ค์ด์ผ๋ก ๋ค์ด๋ก๋
|
252 |
with col1:
|
@@ -266,20 +358,6 @@ def chatbot_interface():
|
|
266 |
file_name=f"{latest_blog_title}.html",
|
267 |
mime="text/html"
|
268 |
)
|
269 |
-
|
270 |
-
# PDF๋ก ๋ค์ด๋ก๋
|
271 |
-
with col3:
|
272 |
-
try:
|
273 |
-
pdf_data = convert_md_to_pdf(latest_blog, latest_blog_title)
|
274 |
-
st.download_button(
|
275 |
-
label="PDF",
|
276 |
-
data=pdf_data,
|
277 |
-
file_name=f"{latest_blog_title}.pdf",
|
278 |
-
mime="application/pdf"
|
279 |
-
)
|
280 |
-
except Exception as e:
|
281 |
-
st.error(f"PDF ์์ฑ ์ค๋ฅ: {e}")
|
282 |
-
logging.error(f"PDF ์์ฑ ์ค๋ฅ: {e}")
|
283 |
|
284 |
# ๋ํ ๊ธฐ๋ก ๋ถ๋ฌ์ค๊ธฐ
|
285 |
uploaded_file = st.sidebar.file_uploader("๋ํ ๊ธฐ๋ก ๋ถ๋ฌ์ค๊ธฐ", type=['json'])
|
@@ -320,10 +398,23 @@ def chatbot_interface():
|
|
320 |
message_placeholder = st.empty()
|
321 |
full_response = ""
|
322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
# API ํธ์ถ
|
324 |
with client.messages.stream(
|
325 |
max_tokens=MAX_TOKENS,
|
326 |
-
system=
|
327 |
messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
|
328 |
model=st.session_state["ai_model"]
|
329 |
) as stream:
|
@@ -373,7 +464,7 @@ def chatbot_interface():
|
|
373 |
|
374 |
# ๋ธ๋ก๊ทธ ๋ค์ด๋ก๋ ๋ฒํผ ํ์ (์๋ต ๋ฐ๋ก ์๋์)
|
375 |
st.subheader("์ด ๋ธ๋ก๊ทธ ๋ค์ด๋ก๋:")
|
376 |
-
col1, col2
|
377 |
|
378 |
with col1:
|
379 |
st.download_button(
|
@@ -391,19 +482,6 @@ def chatbot_interface():
|
|
391 |
file_name=f"{prompt[:30]}.html",
|
392 |
mime="text/html"
|
393 |
)
|
394 |
-
|
395 |
-
with col3:
|
396 |
-
try:
|
397 |
-
pdf_data = convert_md_to_pdf(full_response, prompt[:30])
|
398 |
-
st.download_button(
|
399 |
-
label="PDF๋ก ์ ์ฅ",
|
400 |
-
data=pdf_data,
|
401 |
-
file_name=f"{prompt[:30]}.pdf",
|
402 |
-
mime="application/pdf"
|
403 |
-
)
|
404 |
-
except Exception as e:
|
405 |
-
st.error(f"PDF ์์ฑ ์ค๋ฅ: {e}")
|
406 |
-
logging.error(f"PDF ์์ฑ ์ค๋ฅ: {e}")
|
407 |
|
408 |
# ์๋ ์ ์ฅ ๊ธฐ๋ฅ
|
409 |
if st.session_state.auto_save:
|
@@ -414,7 +492,11 @@ def chatbot_interface():
|
|
414 |
save_msg = {"role": msg["role"], "content": msg["content"]}
|
415 |
save_messages.append(save_msg)
|
416 |
|
417 |
-
|
|
|
|
|
|
|
|
|
418 |
json.dump(save_messages, f, ensure_ascii=False, indent=4)
|
419 |
except Exception as e:
|
420 |
st.sidebar.error(f"์๋ ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
@@ -446,7 +528,6 @@ if __name__ == "__main__":
|
|
446 |
f.write("gradio-client>=1.8.0\n")
|
447 |
f.write("requests>=2.32.3\n")
|
448 |
f.write("markdown>=3.5.1\n")
|
449 |
-
f.write("weasyprint>=60.2\n")
|
450 |
f.write("pillow>=10.1.0\n")
|
451 |
|
452 |
main()
|
|
|
8 |
import markdown
|
9 |
import tempfile
|
10 |
import base64
|
11 |
+
from datetime import datetime
|
12 |
|
13 |
# ๋ก๊น
์ค์
|
14 |
logging.basicConfig(
|
|
|
25 |
# ์ต๋ ํ ํฐ ์ ์ค์ (Claude-3 Sonnet์ ์ต๋ ํ ํฐ ์)
|
26 |
MAX_TOKENS = 7999
|
27 |
|
28 |
+
# SerpHouse API Key ์ค์
|
29 |
+
SERPHOUSE_API_KEY = os.environ.get("SERPHOUSE_API_KEY", "")
|
30 |
+
|
31 |
def get_system_prompt():
|
32 |
return """
|
33 |
๋น์ ์ ์ ๋ฌธ ๋ธ๋ก๊ทธ ์์ฑ ์ ๋ฌธ๊ฐ์
๋๋ค. ๋ชจ๋ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ ์์ฒญ์ ๋ํด ๋ค์์ 8๋จ๊ณ ํ๋ ์์ํฌ๋ฅผ ์ฒ ์ ํ ๋ฐ๋ฅด๋, ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ธ ๊ธ์ด ๋๋๋ก ์์ฑํด์ผ ํฉ๋๋ค:
|
|
|
144 |
"""
|
145 |
return html_doc
|
146 |
|
147 |
+
# ์น ๊ฒ์ ํค์๋ ์ถ์ถ ํจ์
|
148 |
+
def extract_keywords(text: str, top_k: int = 5) -> str:
|
149 |
+
"""
|
150 |
+
1) ํ๊ธ(๊ฐ-ํฃ), ์์ด(a-zA-Z), ์ซ์(0-9), ๊ณต๋ฐฑ๋ง ๋จ๊น
|
151 |
+
2) ๊ณต๋ฐฑ ๊ธฐ์ค ํ ํฐ ๋ถ๋ฆฌ
|
152 |
+
3) ์ต๋ top_k๊ฐ๋ง
|
153 |
+
"""
|
154 |
+
import re
|
155 |
+
text = re.sub(r"[^a-zA-Z0-9๊ฐ-ํฃ\s]", "", text)
|
156 |
+
tokens = text.split()
|
157 |
+
key_tokens = tokens[:top_k]
|
158 |
+
return " ".join(key_tokens)
|
159 |
+
|
160 |
+
# ์น ๊ฒ์ ํจ์
|
161 |
+
def do_web_search(query: str) -> str:
|
162 |
+
"""
|
163 |
+
์์ 20๊ฐ 'organic' ๊ฒฐ๊ณผ item ์ ์ฒด(์ ๋ชฉ, link, snippet ๋ฑ)๋ฅผ
|
164 |
+
JSON ๋ฌธ์์ด ํํ๋ก ๋ฐํ
|
165 |
+
"""
|
166 |
+
try:
|
167 |
+
url = "https://api.serphouse.com/serp/live"
|
168 |
+
|
169 |
+
# ๊ธฐ๋ณธ GET ๋ฐฉ์์ผ๋ก ํ๋ผ๋ฏธํฐ ๊ฐ์ํํ๊ณ ๊ฒฐ๊ณผ ์๋ฅผ 20๊ฐ๋ก ์ ํ
|
170 |
+
params = {
|
171 |
+
"q": query,
|
172 |
+
"domain": "google.com",
|
173 |
+
"serp_type": "web", # ๊ธฐ๋ณธ ์น ๊ฒ์
|
174 |
+
"device": "desktop",
|
175 |
+
"lang": "en",
|
176 |
+
"num": "20" # ์ต๋ 20๊ฐ ๊ฒฐ๊ณผ๋ง ์์ฒญ
|
177 |
+
}
|
178 |
+
|
179 |
+
headers = {
|
180 |
+
"Authorization": f"Bearer {SERPHOUSE_API_KEY}"
|
181 |
+
}
|
182 |
+
|
183 |
+
logging.info(f"SerpHouse API ํธ์ถ ์ค... ๊ฒ์์ด: {query}")
|
184 |
+
logging.info(f"์์ฒญ URL: {url} - ํ๋ผ๋ฏธํฐ: {params}")
|
185 |
+
|
186 |
+
# GET ์์ฒญ ์ํ
|
187 |
+
response = requests.get(url, headers=headers, params=params, timeout=60)
|
188 |
+
response.raise_for_status()
|
189 |
+
|
190 |
+
logging.info(f"SerpHouse API ์๋ต ์ํ ์ฝ๋: {response.status_code}")
|
191 |
+
data = response.json()
|
192 |
+
|
193 |
+
# ๋ค์ํ ์๋ต ๊ตฌ์กฐ ์ฒ๋ฆฌ
|
194 |
+
results = data.get("results", {})
|
195 |
+
organic = None
|
196 |
+
|
197 |
+
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 1
|
198 |
+
if isinstance(results, dict) and "organic" in results:
|
199 |
+
organic = results["organic"]
|
200 |
+
|
201 |
+
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 2 (์ค์ฒฉ๋ results)
|
202 |
+
elif isinstance(results, dict) and "results" in results:
|
203 |
+
if isinstance(results["results"], dict) and "organic" in results["results"]:
|
204 |
+
organic = results["results"]["organic"]
|
205 |
+
|
206 |
+
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 3 (์ต์์ organic)
|
207 |
+
elif "organic" in data:
|
208 |
+
organic = data["organic"]
|
209 |
+
|
210 |
+
if not organic:
|
211 |
+
logging.warning("์๋ต์์ organic ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
212 |
+
logging.debug(f"์๋ต ๊ตฌ์กฐ: {list(data.keys())}")
|
213 |
+
if isinstance(results, dict):
|
214 |
+
logging.debug(f"results ๊ตฌ์กฐ: {list(results.keys())}")
|
215 |
+
return "No web search results found or unexpected API response structure."
|
216 |
|
217 |
+
# ๊ฒฐ๊ณผ ์ ์ ํ ๋ฐ ์ปจํ
์คํธ ๊ธธ์ด ์ต์ ํ
|
218 |
+
max_results = min(20, len(organic))
|
219 |
+
limited_organic = organic[:max_results]
|
220 |
+
|
221 |
+
# ๊ฒฐ๊ณผ ํ์ ๊ฐ์ - ๋งํฌ๋ค์ด ํ์์ผ๋ก ์ถ๋ ฅํ์ฌ ๊ฐ๋
์ฑ ํฅ์
|
222 |
+
summary_lines = []
|
223 |
+
for idx, item in enumerate(limited_organic, start=1):
|
224 |
+
title = item.get("title", "No title")
|
225 |
+
link = item.get("link", "#")
|
226 |
+
snippet = item.get("snippet", "No description")
|
227 |
+
displayed_link = item.get("displayed_link", link)
|
228 |
+
|
229 |
+
# ๋งํฌ๋ค์ด ํ์ (๋งํฌ ํด๋ฆญ ๊ฐ๋ฅ)
|
230 |
+
summary_lines.append(
|
231 |
+
f"### Result {idx}: {title}\n\n"
|
232 |
+
f"{snippet}\n\n"
|
233 |
+
f"**์ถ์ฒ**: [{displayed_link}]({link})\n\n"
|
234 |
+
f"---\n"
|
235 |
+
)
|
236 |
+
|
237 |
+
# ๋ชจ๋ธ์๊ฒ ๋ช
ํํ ์ง์นจ ์ถ๊ฐ
|
238 |
+
instructions = """
|
239 |
+
# ์น ๊ฒ์ ๊ฒฐ๊ณผ
|
240 |
+
์๋๋ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ง๋ฌธ์ ๋ต๋ณํ ๋ ์ด ์ ๋ณด๋ฅผ ํ์ฉํ์ธ์:
|
241 |
+
1. ๊ฐ ๊ฒฐ๊ณผ์ ์ ๋ชฉ, ๋ด์ฉ, ์ถ์ฒ ๋งํฌ๋ฅผ ์ฐธ๊ณ ํ์ธ์
|
242 |
+
2. ๋ต๋ณ์ ๊ด๋ จ ์ ๋ณด์ ์ถ์ฒ๋ฅผ ๋ช
์์ ์ผ๋ก ์ธ์ฉํ์ธ์ (์: "X ์ถ์ฒ์ ๋ฐ๋ฅด๋ฉด...")
|
243 |
+
3. ์๋ต์ ์ค์ ์ถ์ฒ ๋งํฌ๋ฅผ ํฌํจํ์ธ์
|
244 |
+
4. ์ฌ๋ฌ ์ถ์ฒ์ ์ ๋ณด๋ฅผ ์ข
ํฉํ์ฌ ๋ต๋ณํ์ธ์
|
245 |
+
"""
|
246 |
+
|
247 |
+
search_results = instructions + "\n".join(summary_lines)
|
248 |
+
logging.info(f"๊ฒ์ ๊ฒฐ๊ณผ {len(limited_organic)}๊ฐ ์ฒ๋ฆฌ ์๋ฃ")
|
249 |
+
return search_results
|
250 |
+
|
251 |
+
except Exception as e:
|
252 |
+
logging.error(f"Web search failed: {e}")
|
253 |
+
return f"Web search failed: {str(e)}"
|
254 |
|
255 |
def chatbot_interface():
|
256 |
st.title("Ginigen Blog")
|
|
|
271 |
if "generate_image" not in st.session_state:
|
272 |
st.session_state.generate_image = False
|
273 |
|
274 |
+
# ์น ๊ฒ์ ํ ๊ธ
|
275 |
+
if "use_web_search" not in st.session_state:
|
276 |
+
st.session_state.use_web_search = False
|
277 |
+
|
278 |
# ์ด๋ฏธ์ง API ์ํ
|
279 |
if "image_api_status" not in st.session_state:
|
280 |
st.session_state.image_api_status = test_image_api_connection()
|
|
|
288 |
# ์ด๋ฏธ์ง ์์ฑ ํ ๊ธ
|
289 |
st.session_state.generate_image = st.sidebar.toggle("๋ธ๋ก๊ทธ ๊ธ ์์ฑ ํ ์ด๋ฏธ์ง ์๋ ์์ฑ", value=st.session_state.generate_image)
|
290 |
|
291 |
+
# ์น ๊ฒ์ ํ ๊ธ
|
292 |
+
st.session_state.use_web_search = st.sidebar.toggle("์ฃผ์ ์น ๊ฒ์ ๋ฐ ๋ถ์", value=st.session_state.use_web_search)
|
293 |
+
|
294 |
# ์ด๋ฏธ์ง API ์ํ ํ์
|
295 |
st.sidebar.text(st.session_state.image_api_status)
|
296 |
|
|
|
338 |
if latest_blog:
|
339 |
st.sidebar.subheader("์ต๊ทผ ๋ธ๋ก๊ทธ ๋ค์ด๋ก๋")
|
340 |
|
341 |
+
col1, col2 = st.sidebar.columns(2)
|
342 |
|
343 |
# ๋งํฌ๋ค์ด์ผ๋ก ๋ค์ด๋ก๋
|
344 |
with col1:
|
|
|
358 |
file_name=f"{latest_blog_title}.html",
|
359 |
mime="text/html"
|
360 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
|
362 |
# ๋ํ ๊ธฐ๋ก ๋ถ๋ฌ์ค๊ธฐ
|
363 |
uploaded_file = st.sidebar.file_uploader("๋ํ ๊ธฐ๋ก ๋ถ๋ฌ์ค๊ธฐ", type=['json'])
|
|
|
398 |
message_placeholder = st.empty()
|
399 |
full_response = ""
|
400 |
|
401 |
+
# ์น ๊ฒ์ ์ํ (์น ๊ฒ์ ์ต์
์ด ์ผ์ ธ ์์ ๊ฒฝ์ฐ)
|
402 |
+
system_prompt = get_system_prompt()
|
403 |
+
if st.session_state.use_web_search:
|
404 |
+
with st.spinner("์น์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒ์ ์ค..."):
|
405 |
+
search_query = extract_keywords(prompt, top_k=5)
|
406 |
+
search_results = do_web_search(search_query)
|
407 |
+
if "search failed" not in search_results.lower():
|
408 |
+
# ์์คํ
ํ๋กฌํํธ์ ๊ฒ์ ๊ฒฐ๊ณผ ์ถ๊ฐ
|
409 |
+
system_prompt += f"\n\n๊ฒ์ ๊ฒฐ๊ณผ:\n{search_results}\n"
|
410 |
+
st.success(f"๊ฒ์ ์๋ฃ: '{search_query}'์ ๋ํ ์ ๋ณด๋ฅผ ์์งํ์ต๋๋ค.")
|
411 |
+
else:
|
412 |
+
st.error("์น ๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.")
|
413 |
+
|
414 |
# API ํธ์ถ
|
415 |
with client.messages.stream(
|
416 |
max_tokens=MAX_TOKENS,
|
417 |
+
system=system_prompt,
|
418 |
messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
|
419 |
model=st.session_state["ai_model"]
|
420 |
) as stream:
|
|
|
464 |
|
465 |
# ๋ธ๋ก๊ทธ ๋ค์ด๋ก๋ ๋ฒํผ ํ์ (์๋ต ๋ฐ๋ก ์๋์)
|
466 |
st.subheader("์ด ๋ธ๋ก๊ทธ ๋ค์ด๋ก๋:")
|
467 |
+
col1, col2 = st.columns(2)
|
468 |
|
469 |
with col1:
|
470 |
st.download_button(
|
|
|
482 |
file_name=f"{prompt[:30]}.html",
|
483 |
mime="text/html"
|
484 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
|
486 |
# ์๋ ์ ์ฅ ๊ธฐ๋ฅ
|
487 |
if st.session_state.auto_save:
|
|
|
492 |
save_msg = {"role": msg["role"], "content": msg["content"]}
|
493 |
save_messages.append(save_msg)
|
494 |
|
495 |
+
# ํ์ฌ ์๊ฐ์ ํฌํจํ ํ์ผ๋ช
์์ฑ
|
496 |
+
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
|
497 |
+
filename = f'chat_history_auto_save_{current_time}.json'
|
498 |
+
|
499 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
500 |
json.dump(save_messages, f, ensure_ascii=False, indent=4)
|
501 |
except Exception as e:
|
502 |
st.sidebar.error(f"์๋ ์ ์ฅ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
|
|
528 |
f.write("gradio-client>=1.8.0\n")
|
529 |
f.write("requests>=2.32.3\n")
|
530 |
f.write("markdown>=3.5.1\n")
|
|
|
531 |
f.write("pillow>=10.1.0\n")
|
532 |
|
533 |
main()
|