Spaces:
Running
Running
""" | |
Ginigen Blog / Streamlit App | |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
- 2025-04-23 : Brave Search API λ²μ | |
- SerpHouse μ λ©΄ μ κ±°, Brave Search API μ μ© | |
- API Key : νκ²½λ³μ SERPHOUSE_API_KEY (μ΄λ¦λ§ κ·Έλλ‘ μ¬μ©) | |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
""" | |
import os | |
import streamlit as st | |
import json | |
import anthropic | |
import requests | |
import logging | |
from gradio_client import Client | |
import markdown | |
import re | |
from datetime import datetime | |
# BeautifulSoupλ λ μ΄μ μ¬μ©νμ§ μμ§λ§, νμ μ μ μ§ | |
# from bs4 import BeautifulSoup | |
# βββββββββββββββββββββββββββββ 1) λ‘κΉ βββββββββββββββββββββββββββββββββββββββββ | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - %(levelname)s - %(message)s" | |
) | |
# βββββββββββββββββββββββββββββ 2) μ μ μμ / API ν€ βββββββββββββββββββββββββββ | |
ANTHROPIC_KEY = os.getenv("API_KEY", "") | |
BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Brave Search API ν€ | |
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search" | |
IMAGE_API_URL = "http://211.233.58.201:7896" | |
MAX_TOKENS = 7_999 | |
# βββββββββββββββββββββββββββββ 3) ν΄λΌμ΄μΈνΈ ββββββββββββββββββββββββββββββββββ | |
client = anthropic.Anthropic(api_key=ANTHROPIC_KEY) | |
# βββββββββββββββββββββββββββββ 4) μμ€ν ν둬ννΈ βββββββββββββββββββββββββββββ | |
def get_system_prompt() -> str: | |
return """ | |
λΉμ μ μ λ¬Έ λΈλ‘κ·Έ μμ± μ λ¬Έκ°μ λλ€. λͺ¨λ λΈλ‘κ·Έ κΈ μμ± μμ²μ λν΄ λ€μμ 8λ¨κ³ νλ μμν¬λ₯Ό μ² μ ν λ°λ₯΄λ, μμ°μ€λ½κ³ λ§€λ ₯μ μΈ κΈμ΄ λλλ‘ μμ±ν΄μΌ ν©λλ€: | |
λ μ μ°κ²° λ¨κ³ | |
1.1. 곡κ°λ νμ±μ μν μΉκ·Όν μΈμ¬ | |
1.2. λ μμ μ€μ κ³ λ―Όμ λ°μν λμ μ§λ¬Έ | |
1.3. μ£Όμ μ λν μ¦κ°μ κ΄μ¬ μ λ | |
λ¬Έμ μ μ λ¨κ³ | |
2.1. λ μμ νμΈν¬μΈνΈ ꡬ체ν | |
2.2. λ¬Έμ μ μκΈμ±κ³Ό μν₯λ λΆμ | |
2.3. ν΄κ²° νμμ±μ λν 곡κ°λ νμ± | |
μ λ¬Έμ± μ μ¦ λ¨κ³ | |
3.1. κ°κ΄μ λ°μ΄ν° κΈ°λ° λΆμ | |
3.2. μ λ¬Έκ° κ²¬ν΄μ μ°κ΅¬ κ²°κ³Ό μΈμ© | |
3.3. μ€μ μ¬λ‘λ₯Ό ν΅ν λ¬Έμ ꡬ체ν | |
μ루μ μ 곡 λ¨κ³ | |
4.1. λ¨κ³λ³ μ€μ² κ°μ΄λλΌμΈ μ μ | |
4.2. μ¦μ μ μ© κ°λ₯ν ꡬ체μ ν | |
4.3. μμ μ₯μ λ¬Όκ³Ό 극볡 λ°©μ ν¬ν¨ | |
μ λ’°λ κ°ν λ¨κ³ | |
5.1. μ€μ μ±κ³΅ μ¬λ‘ μ μ | |
5.2. ꡬ체μ μ¬μ©μ νκΈ° μΈμ© | |
5.3. κ°κ΄μ λ°μ΄ν°λ‘ ν¨κ³Ό μ μ¦ | |
νλ μ λ λ¨κ³ | |
6.1. λͺ νν 첫 μ€μ² λ¨κ³ μ μ | |
6.2. μκΈμ±μ κ°μ‘°ν νλ μ΄κ΅¬ | |
6.3. μ€μ² λκΈ° λΆμ¬ μμ ν¬ν¨ | |
μ§μ μ± κ°ν λ¨κ³ | |
7.1. μ루μ μ νκ³ ν¬λͺ νκ² κ³΅κ° | |
7.2. κ°μΈλ³ μ°¨μ΄ μ‘΄μ¬ μΈμ | |
7.3. νμ 쑰건과 μ£Όμμ¬ν λͺ μ | |
κ΄κ³ μ§μ λ¨κ³ | |
8.1. μ§μ μ± μλ κ°μ¬ μΈμ¬ | |
8.2. λ€μ 컨ν μΈ μκ³ λ‘ κΈ°λκ° μ‘°μ± | |
8.3. μν΅ μ±λ μλ΄ | |
μμ± μ μ€μμ¬ν | |
9.1. κΈμ μ: 1500-2000μ λ΄μΈ | |
9.2. λ¬Έλ¨ κΈΈμ΄: 3-4λ¬Έμ₯ μ΄λ΄ | |
9.3. μκ°μ ꡬλΆ: μμ λͺ©, ꡬλΆμ , λ²νΈ λͺ©λ‘ νμ© | |
9.4. ν€μ€λ§€λ: μΉκ·Όνκ³ μ λ¬Έμ μΈ λν체 | |
9.5. λ°μ΄ν°: λͺ¨λ μ 보μ μΆμ² λͺ μ | |
9.6. κ°λ μ±: λͺ νν λ¨λ½ ꡬλΆκ³Ό κ°μ‘°μ μ¬μ© | |
""" | |
# βββββββββββββββββββββββββββββ 5) Brave Search ν¨μ βββββββββββββββββββββββββββ | |
def brave_search(query: str, count: int = 5): | |
""" | |
Brave Web Search API νΈμΆ β list[dict] λ°ν | |
λ°ν νλͺ©: title, link, snippet, displayed_link, index | |
""" | |
if not BRAVE_KEY: | |
raise RuntimeError("νκ²½λ³μ SERPHOUSE_API_KEY(=Brave API key)κ° μ€μ λμ΄ μμ§ μμ΅λλ€.") | |
headers = { | |
"Accept": "application/json", | |
"Accept-Encoding": "gzip", | |
"X-Subscription-Token": BRAVE_KEY | |
} | |
params = {"q": query, "count": str(count)} | |
resp = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15) | |
resp.raise_for_status() | |
data = resp.json() | |
web_results = ( | |
data.get("web", {}).get("results") or | |
data.get("results", []) | |
) | |
articles = [] | |
for idx, r in enumerate(web_results[:count], 1): | |
url = r.get("url", r.get("link", "")) | |
host = re.sub(r"https?://(www\\.)?", "", url).split("/")[0] | |
articles.append({ | |
"index": idx, | |
"title": r.get("title", "μ λͺ© μμ"), | |
"link": url, | |
"snippet": r.get("description", r.get("text", "λ΄μ© μμ")), | |
"displayed_link": host | |
}) | |
return articles | |
# βββββββββββββββββββββββββββββ 6) κ²μ β λ§ν¬λ€μ΄ βββββββββββββββββββββββββββββ | |
def generate_mock_search_results(query: str) -> str: | |
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
mock = [{ | |
"title": f"{query} κ΄λ ¨ κ°μ κ²°κ³Ό", | |
"link": "https://example.com", | |
"snippet": "API νΈμΆ μ€ν¨λ‘ μμ±λ μμ κ²°κ³Όμ λλ€.", | |
"displayed_link": "example.com" | |
}] | |
body = "\n".join( | |
f"### Result {i+1}: {m['title']}\n\n{m['snippet']}\n\n" | |
f"**μΆμ²**: [{m['displayed_link']}]({m['link']})\n\n---\n" | |
for i, m in enumerate(mock) | |
) | |
return f"# κ°μ κ²μ κ²°κ³Ό (μμ±: {ts})\n\n{body}" | |
def do_web_search(query: str) -> str: | |
""" | |
Brave Search μ μ© κ²μ ν¨μ. | |
μ€ν¨νκ±°λ μΏΌν° μ΄κ³Ό μ mock κ²°κ³Ό λ°ν. | |
""" | |
try: | |
articles = brave_search(query, count=5) | |
except Exception as e: | |
logging.error(f"Brave κ²μ μ€ν¨: {e}") | |
return generate_mock_search_results(query) | |
if not articles: | |
return generate_mock_search_results(query) | |
md_lines = [] | |
for a in articles: | |
md_lines.append( | |
f"### Result {a['index']}: {a['title']}\n\n" | |
f"{a['snippet']}\n\n" | |
f"**μΆμ²**: [{a['displayed_link']}]({a['link']})\n\n---\n" | |
) | |
header = ( | |
"# μΉ κ²μ κ²°κ³Ό\n" | |
"μλ μ 보λ₯Ό λ΅λ³μ νμ©νμΈμ: μΆμ² μΈμ©Β·λ§ν¬ ν¬ν¨Β·λ€μ μΆμ² μ’ ν©\n\n" | |
) | |
return header + "".join(md_lines) | |
# βββββββββββββββββββββββββββββ 7) μ΄λ―Έμ§Β·MD λ³ν λ± μ νΈ βββββββββββββββββββββββ | |
def test_image_api_connection(): | |
try: | |
Client(IMAGE_API_URL) | |
return "μ΄λ―Έμ§ API μ°κ²° μ±κ³΅" | |
except Exception as e: | |
logging.error(e) | |
return f"μ΄λ―Έμ§ API μ°κ²° μ€ν¨: {e}" | |
def generate_image(prompt, width=768, height=768, guidance=3.5, | |
inference_steps=30, seed=3): | |
if not prompt: | |
return None, "ν둬ννΈ λΆμ‘±" | |
try: | |
c = Client(IMAGE_API_URL) | |
res = c.predict( | |
prompt=prompt, width=width, height=height, | |
guidance=guidance, inference_steps=inference_steps, | |
seed=seed, do_img2img=False, init_image=None, | |
image2image_strength=0.8, resize_img=True, | |
api_name="/generate_image" | |
) | |
return res[0], f"Seed: {res[1]}" | |
except Exception as e: | |
logging.error(e) | |
return None, str(e) | |
def extract_image_prompt(blog_content, blog_topic): | |
system = f"λ€μ κΈμ λ°νμΌλ‘ μ μ ν μ΄λ―Έμ§ ν둬ννΈλ₯Ό μμ΄λ‘ ν μ€λ§ μ¨μ€:\n{blog_topic}" | |
try: | |
res = client.messages.create( | |
model="claude-3-7-sonnet-20250219", | |
max_tokens=80, | |
system=system, | |
messages=[{"role": "user", "content": blog_content}] | |
) | |
return res.content[0].text.strip() | |
except Exception: | |
return f"A professional photo related to {blog_topic}, high quality" | |
def convert_md_to_html(md_text, title="Ginigen Blog"): | |
body = markdown.markdown(md_text) | |
return f"""<!DOCTYPE html><html><head> | |
<title>{title}</title><meta charset="utf-8"></head><body>{body}</body></html>""" | |
def extract_keywords(text: str, k: int = 5) -> str: | |
txt = re.sub(r"[^κ°-ν£a-zA-Z0-9\\s]", "", text) | |
return " ".join(txt.split()[:k]) | |
# βββββββββββββββββββββββββββββ 8) Streamlit UI ββββββββββββββββββββββββββββββββ | |
def chatbot_interface(): | |
st.title("Ginigen Blog") | |
# μΈμ μν μ΄κΈ°ν | |
defaults = { | |
"ai_model": "claude-3-7-sonnet-20250219", | |
"messages": [], | |
"auto_save": True, | |
"generate_image": False, | |
"use_web_search": False, | |
"image_api_status": test_image_api_connection() | |
} | |
for k, v in defaults.items(): | |
if k not in st.session_state: | |
st.session_state[k] = v | |
sb = st.sidebar | |
sb.title("λν κΈ°λ‘ κ΄λ¦¬") | |
sb.toggle("μλ μ μ₯", key="auto_save") | |
sb.toggle("λΈλ‘κ·Έ κΈ μμ± ν μ΄λ―Έμ§ μλ μμ±", key="generate_image") | |
sb.toggle("μ£Όμ μΉ κ²μ λ° λΆμ", key="use_web_search") | |
sb.text(st.session_state.image_api_status) | |
# κΈ°μ‘΄ λ©μμ§ λ λλ§ | |
for m in st.session_state.messages: | |
with st.chat_message(m["role"]): | |
st.markdown(m["content"]) | |
if "image" in m: | |
st.image(m["image"], caption=m.get("image_caption", "")) | |
# μ¬μ©μ μ λ ₯ | |
if prompt := st.chat_input("무μμ λμλ릴κΉμ?"): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.chat_message("assistant"): | |
placeholder = st.empty() | |
full_resp = "" | |
sys_prompt = get_system_prompt() | |
# (μ ν) Brave κ²μ | |
if st.session_state.use_web_search: | |
with st.spinner("μΉ κ²μ μ€β¦"): | |
q = extract_keywords(prompt) | |
sb.info(f"κ²μμ΄: {q}") | |
search_md = do_web_search(q) | |
if "κ°μ κ²μ κ²°κ³Ό" in search_md: | |
sb.warning("μ€μ κ²μ κ²°κ³Όλ₯Ό κ°μ Έμ€μ§ λͺ»νμ΅λλ€.") | |
sys_prompt += f"\n\nκ²μ κ²°κ³Ό:\n{search_md}\n" | |
# Claude μ€νΈλ¦¬λ° | |
with client.messages.stream( | |
model=st.session_state.ai_model, | |
max_tokens=MAX_TOKENS, | |
system=sys_prompt, | |
messages=[{"role": m["role"], "content": m["content"]} | |
for m in st.session_state.messages] | |
) as stream: | |
for t in stream.text_stream: | |
full_resp += t or "" | |
placeholder.markdown(full_resp + "β") | |
placeholder.markdown(full_resp) | |
# (μ ν) μ΄λ―Έμ§ μμ± | |
if st.session_state.generate_image: | |
with st.spinner("μ΄λ―Έμ§ μμ± μ€β¦"): | |
img_prompt = extract_image_prompt(full_resp, prompt) | |
img, caption = generate_image(img_prompt) | |
if img: | |
st.image(img, caption=caption) | |
st.session_state.messages.append( | |
{"role": "assistant", "content": full_resp, | |
"image": img, "image_caption": caption} | |
) | |
else: | |
st.error(f"μ΄λ―Έμ§ μμ± μ€ν¨: {caption}") | |
st.session_state.messages.append( | |
{"role": "assistant", "content": full_resp} | |
) | |
else: | |
st.session_state.messages.append( | |
{"role": "assistant", "content": full_resp} | |
) | |
# λ€μ΄λ‘λ λ²νΌ | |
st.subheader("μ΄ λΈλ‘κ·Έ λ€μ΄λ‘λ:") | |
c1, c2 = st.columns(2) | |
c1.download_button("λ§ν¬λ€μ΄", full_resp, | |
file_name=f"{prompt[:30]}.md", mime="text/markdown") | |
html = convert_md_to_html(full_resp, prompt[:30]) | |
c2.download_button("HTML", html, | |
file_name=f"{prompt[:30]}.html", mime="text/html") | |
# μλ μ μ₯ | |
if st.session_state.auto_save and st.session_state.messages: | |
try: | |
fname = f"chat_history_{datetime.now():%Y%m%d_%H%M%S}.json" | |
with open(fname, "w", encoding="utf-8") as f: | |
json.dump(st.session_state.messages, f, ensure_ascii=False, indent=2) | |
except Exception as e: | |
sb.error(f"μλ μ μ₯ μ€λ₯: {e}") | |
# βββββββββββββββββββββββββββββ 9) main ββββββββββββββββββββββββββββββββββββββββ | |
def main(): | |
chatbot_interface() | |
if __name__ == "__main__": | |
# requirements.txt μμ± | |
with open("requirements.txt", "w") as f: | |
f.write("\n".join([ | |
"streamlit>=1.31.0", | |
"anthropic>=0.18.1", | |
"gradio-client>=1.8.0", | |
"requests>=2.32.3", | |
"markdown>=3.5.1", | |
"pillow>=10.1.0" | |
])) | |
main() | |