Spaces:
Running
Running
""" | |
Ginigen Blog / Streamlit AppβββBrave Search API Edition | |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
* 2025-04-23 : SerpHouse μμ‘΄μ± β’ Brave Search API λ‘ μ λ©΄ κ΅μ²΄ | |
* νκ²½λ³μ SERPHOUSE_API_KEY β Brave API Key κ·Έλλ‘ μ¬μ© | |
* **μλ³Έ μ½λμ κΈ°λ₯ 100 % μ μ§** | |
- Markdown / HTML λΈλ‘κ·Έ λ€μ΄λ‘λ (μ¬μ΄λλ° + λ³Έλ¬Έ) | |
- λν κΈ°λ‘ JSON μ λ‘λ & λ€μ΄λ‘λ + λ°±κ·ΈλΌμ΄λ μλ μ μ₯ | |
- μ΄λ―Έμ§ μλ μμ± μ΅μ | |
- Streamlit λͺ¨λ UI ν κΈ | |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
""" | |
# ββββββββββββββββββββββββββββββββ Imports ββββββββββββββββββββββββββββββββ | |
import os, json, re, logging, requests, markdown | |
from datetime import datetime | |
import streamlit as st | |
import anthropic | |
from gradio_client import Client | |
# from bs4 import BeautifulSoup # νμ μ μ£Όμ ν΄μ | |
# ββββββββββββββββββββββββββββββββ νκ²½ λ³μ / μμ βββββββββββββββββββββββββββ | |
ANTHROPIC_KEY = os.getenv("API_KEY", "") | |
BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # μ΄λ¦ μ μ§ | |
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search" | |
IMAGE_API_URL = "http://211.233.58.201:7896" | |
MAX_TOKENS = 7_999 | |
# ββββββββββββββββββββββββββββββββ λ‘κΉ ββββββββββββββββββββββββββββββββββββββ | |
logging.basicConfig(level=logging.INFO, | |
format="%(asctime)s - %(levelname)s - %(message)s") | |
# ββββββββββββββββββββββββββββββββ Anthropic Client βββββββββββββββββββββββββ | |
client = anthropic.Anthropic(api_key=ANTHROPIC_KEY) | |
# ββββββββββββββββββββββββββββββββ λΈλ‘κ·Έ μμ± μμ€ν ν둬ννΈ ββββββββββββββββ | |
def get_system_prompt() -> str: | |
return """ | |
λΉμ μ μ λ¬Έ λΈλ‘κ·Έ μμ± μ λ¬Έκ°μ λλ€. λͺ¨λ λΈλ‘κ·Έ κΈ μμ± μμ²μ λν΄ λ€μμ 8λ¨κ³ νλ μμν¬λ₯Ό μ² μ ν λ°λ₯΄λ, μμ°μ€λ½κ³ λ§€λ ₯μ μΈ κΈμ΄ λλλ‘ μμ±ν΄μΌ ν©λλ€: | |
λ μ μ°κ²° λ¨κ³ | |
1.1. 곡κ°λ νμ±μ μν μΉκ·Όν μΈμ¬ | |
1.2. λ μμ μ€μ κ³ λ―Όμ λ°μν λμ μ§λ¬Έ | |
1.3. μ£Όμ μ λν μ¦κ°μ κ΄μ¬ μ λ | |
λ¬Έμ μ μ λ¨κ³ | |
2.1. λ μμ νμΈν¬μΈνΈ ꡬ체ν | |
2.2. λ¬Έμ μ μκΈμ±κ³Ό μν₯λ λΆμ | |
2.3. ν΄κ²° νμμ±μ λν 곡κ°λ νμ± | |
μ λ¬Έμ± μ μ¦ λ¨κ³ | |
3.1. κ°κ΄μ λ°μ΄ν° κΈ°λ° λΆμ | |
3.2. μ λ¬Έκ° κ²¬ν΄μ μ°κ΅¬ κ²°κ³Ό μΈμ© | |
3.3. μ€μ μ¬λ‘λ₯Ό ν΅ν λ¬Έμ ꡬ체ν | |
μ루μ μ 곡 λ¨κ³ | |
4.1. λ¨κ³λ³ μ€μ² κ°μ΄λλΌμΈ μ μ | |
4.2. μ¦μ μ μ© κ°λ₯ν ꡬ체μ ν | |
4.3. μμ μ₯μ λ¬Όκ³Ό 극볡 λ°©μ ν¬ν¨ | |
μ λ’°λ κ°ν λ¨κ³ | |
5.1. μ€μ μ±κ³΅ μ¬λ‘ μ μ | |
5.2. ꡬ체μ μ¬μ©μ νκΈ° μΈμ© | |
5.3. κ°κ΄μ λ°μ΄ν°λ‘ ν¨κ³Ό μ μ¦ | |
νλ μ λ λ¨κ³ | |
6.1. λͺ νν 첫 μ€μ² λ¨κ³ μ μ | |
6.2. μκΈμ±μ κ°μ‘°ν νλ μ΄κ΅¬ | |
6.3. μ€μ² λκΈ° λΆμ¬ μμ ν¬ν¨ | |
μ§μ μ± κ°ν λ¨κ³ | |
7.1. μ루μ μ νκ³ ν¬λͺ νκ² κ³΅κ° | |
7.2. κ°μΈλ³ μ°¨μ΄ μ‘΄μ¬ μΈμ | |
7.3. νμ 쑰건과 μ£Όμμ¬ν λͺ μ | |
κ΄κ³ μ§μ λ¨κ³ | |
8.1. μ§μ μ± μλ κ°μ¬ μΈμ¬ | |
8.2. λ€μ 컨ν μΈ μκ³ λ‘ κΈ°λκ° μ‘°μ± | |
8.3. μν΅ μ±λ μλ΄ | |
μμ± μ μ€μμ¬ν | |
9.1. κΈμ μ: 1500-2000μ λ΄μΈ | |
9.2. λ¬Έλ¨ κΈΈμ΄: 3-4λ¬Έμ₯ μ΄λ΄ | |
9.3. μκ°μ ꡬλΆ: μμ λͺ©, ꡬλΆμ , λ²νΈ λͺ©λ‘ νμ© | |
9.4. ν€μ€λ§€λ: μΉκ·Όνκ³ μ λ¬Έμ μΈ λν체 | |
9.5. λ°μ΄ν°: λͺ¨λ μ 보μ μΆμ² λͺ μ | |
9.6. κ°λ μ±: λͺ νν λ¨λ½ ꡬλΆκ³Ό κ°μ‘°μ μ¬μ© | |
""" | |
# ββββββββββββββββββββββββββββββββ Brave Search API βββββββββββββββββββββββββ | |
def brave_search(query: str, count: int = 5): | |
""" | |
Brave Web Search API νΈμΆ β list[dict] | |
λ°ν νλ: index, title, link, snippet, displayed_link | |
""" | |
if not BRAVE_KEY: | |
raise RuntimeError("β οΈ SERPHOUSE_API_KEY (Brave API Key) νκ²½λ³μκ° λΉμ΄ μμ΅λλ€.") | |
headers = { | |
"Accept": "application/json", | |
"Accept-Encoding": "gzip", | |
"X-Subscription-Token": BRAVE_KEY | |
} | |
params = {"q": query, "count": str(count)} | |
r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15) | |
r.raise_for_status() | |
data = r.json() | |
raw = data.get("web", {}).get("results") or data.get("results", []) | |
arts = [] | |
for i, res in enumerate(raw[:count], 1): | |
url = res.get("url", res.get("link", "")) | |
host = re.sub(r"https?://(www\.)?", "", url).split("/")[0] | |
arts.append({ | |
"index": i, | |
"title": res.get("title", "μ λͺ© μμ"), | |
"link": url, | |
"snippet": res.get("description", res.get("text", "λ΄μ© μμ")), | |
"displayed_link": host | |
}) | |
return arts | |
def mock_results(query: str) -> str: | |
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
return (f"# κ°μ κ²μ κ²°κ³Ό (μμ±: {ts})\n\n" | |
f"### Result 1: {query} κ΄λ ¨ μμ κ²°κ³Ό\n\n" | |
"API νΈμΆ μ€ν¨λ‘ μμ±λ μμ λ°μ΄ν°μ λλ€.\n\n" | |
"**μΆμ²**: [example.com](https://example.com)\n\n---\n") | |
def do_web_search(query: str) -> str: | |
try: | |
arts = brave_search(query, 5) | |
except Exception as e: | |
logging.error(f"Brave κ²μ μ€ν¨: {e}") | |
return mock_results(query) | |
if not arts: | |
return mock_results(query) | |
hdr = "# μΉ κ²μ κ²°κ³Ό\nμλ μ 보λ₯Ό μ°Έκ³ ν΄μ λ΅λ³νμΈμ.\n\n" | |
body = "\n".join( | |
f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n" | |
f"**μΆμ²**: [{a['displayed_link']}]({a['link']})\n\n---\n" | |
for a in arts | |
) | |
return hdr + body | |
# ββββββββββββββββββββββββββββββββ μ΄λ―Έμ§ Β· λ³ν μ νΈ ββββββββββββββββββββββββ | |
def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3): | |
if not prompt: return None, "ν둬ννΈ λΆμ‘±" | |
try: | |
res = Client(IMAGE_API_URL).predict( | |
prompt=prompt, width=w, height=h, guidance=g, | |
inference_steps=steps, seed=seed, | |
do_img2img=False, init_image=None, | |
image2image_strength=0.8, resize_img=True, | |
api_name="/generate_image") | |
return res[0], f"Seed: {res[1]}" | |
except Exception as e: | |
logging.error(e); return None, str(e) | |
def extract_image_prompt(blog: str, topic: str): | |
sys = f"λ€μ κΈλ‘λΆν° μμ΄ 1μ€ μ΄λ―Έμ§ ν둬ννΈ μμ±:\n{topic}" | |
try: | |
res = client.messages.create( | |
model="claude-3-7-sonnet-20250219", | |
max_tokens=80, system=sys, | |
messages=[{"role": "user", "content": blog}] | |
) | |
return res.content[0].text.strip() | |
except Exception: | |
return f"A professional photo related to {topic}, high quality" | |
def md_to_html(md: str, title="Ginigen Blog"): | |
return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>" | |
def keywords(text: str, top=5): | |
return " ".join(re.sub(r"[^κ°-ν£a-zA-Z0-9\\s]", "", text).split()[:top]) | |
# ββββββββββββββββββββββββββββββββ Streamlit UI ββββββββββββββββββββββββββββ | |
def ginigen_app(): | |
st.title("Ginigen Blog") | |
# μΈμ κΈ°λ³Έκ° | |
defaults = dict( | |
ai_model="claude-3-7-sonnet-20250219", | |
messages=[], | |
auto_save=True, | |
generate_image=False, | |
use_web_search=False | |
) | |
for k, v in defaults.items(): | |
st.session_state.setdefault(k, v) | |
# ββ μ¬μ΄λλ° μ»¨νΈλ‘€ | |
sb = st.sidebar | |
sb.title("λν κΈ°λ‘ κ΄λ¦¬") | |
sb.toggle("μλ μ μ₯", key="auto_save") | |
sb.toggle("μ΄λ―Έμ§ μλ μμ±", key="generate_image") | |
sb.toggle("μΉ κ²μ μ¬μ©", key="use_web_search") | |
# ββ μ΅κ·Ό λΈλ‘κ·Έ λ€μ΄λ‘λ (λ§ν¬λ€μ΄ / HTML) | |
latest_blog = next( | |
(m["content"] for m in reversed(st.session_state.messages) | |
if m["role"] == "assistant" and m["content"].strip()), None) | |
if latest_blog: | |
title = re.search(r"# (.*?)(\n|$)", latest_blog) | |
title = title.group(1).strip() if title else "blog" | |
sb.subheader("μ΅κ·Ό λΈλ‘κ·Έ λ€μ΄λ‘λ") | |
c1, c2 = sb.columns(2) | |
c1.download_button("Markdown", latest_blog, | |
file_name=f"{title}.md", mime="text/markdown") | |
c2.download_button("HTML", md_to_html(latest_blog, title), | |
file_name=f"{title}.html", mime="text/html") | |
# ββ JSON λν κΈ°λ‘ μ λ‘λ | |
up = sb.file_uploader("λν κΈ°λ‘ λΆλ¬μ€κΈ° (.json)", type=["json"]) | |
if up: | |
try: | |
st.session_state.messages = json.load(up) | |
sb.success("λν κΈ°λ‘ λΆλ¬μ€κΈ° μλ£") | |
except Exception as e: | |
sb.error(f"λΆλ¬μ€κΈ° μ€ν¨: {e}") | |
# ββ JSON λν κΈ°λ‘ λ€μ΄λ‘λ | |
if sb.button("λν κΈ°λ‘ JSON λ€μ΄λ‘λ"): | |
sb.download_button("μ μ₯", json.dumps(st.session_state.messages, | |
ensure_ascii=False, indent=2), | |
file_name="chat_history.json", | |
mime="application/json") | |
# ββ κΈ°μ‘΄ λ©μμ§ λ λλ§ | |
for m in st.session_state.messages: | |
with st.chat_message(m["role"]): | |
st.markdown(m["content"]) | |
if "image" in m: | |
st.image(m["image"], caption=m.get("image_caption", "")) | |
# ββ μ¬μ©μ μ λ ₯ | |
if prompt := st.chat_input("무μμ λμλ릴κΉμ?"): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): st.markdown(prompt) | |
with st.chat_message("assistant"): | |
placeholder = st.empty(); answer = "" | |
sys_prompt = get_system_prompt() | |
if st.session_state.use_web_search: | |
with st.spinner("μΉ κ²μ μ€β¦"): | |
search_md = do_web_search(keywords(prompt)) | |
sys_prompt += f"\n\nκ²μ κ²°κ³Ό:\n{search_md}\n" | |
# Claude μ€νΈλ¦¬λ° | |
with client.messages.stream( | |
model=st.session_state.ai_model, max_tokens=MAX_TOKENS, | |
system=sys_prompt, | |
messages=[{"role": m["role"], "content": m["content"]} | |
for m in st.session_state.messages] | |
) as stream: | |
for t in stream.text_stream: | |
answer += t or "" | |
placeholder.markdown(answer + "β") | |
placeholder.markdown(answer) | |
# μ΄λ―Έμ§ μ΅μ | |
if st.session_state.generate_image: | |
with st.spinner("μ΄λ―Έμ§ μμ± μ€β¦"): | |
ip = extract_image_prompt(answer, prompt) | |
img, cap = generate_image(ip) | |
if img: | |
st.image(img, caption=cap) | |
st.session_state.messages.append( | |
{"role": "assistant", "content": answer, | |
"image": img, "image_caption": cap}) | |
answer_entry_saved = True | |
if not st.session_state.generate_image: | |
st.session_state.messages.append( | |
{"role": "assistant", "content": answer}) | |
# λ³Έλ¬Έ λ€μ΄λ‘λ λ²νΌ (MD / HTML) | |
st.subheader("μ΄ λΈλ‘κ·Έ λ€μ΄λ‘λ") | |
b1, b2 = st.columns(2) | |
b1.download_button("Markdown", answer, | |
file_name=f"{prompt[:30]}.md", mime="text/markdown") | |
b2.download_button("HTML", md_to_html(answer, prompt[:30]), | |
file_name=f"{prompt[:30]}.html", mime="text/html") | |
# ββ μλ λ°±μ μ μ₯ | |
if st.session_state.auto_save and st.session_state.messages: | |
try: | |
fn = f"chat_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json" | |
with open(fn, "w", encoding="utf-8") as fp: | |
json.dump(st.session_state.messages, fp, | |
ensure_ascii=False, indent=2) | |
except Exception as e: | |
logging.error(f"μλ μ μ₯ μ€ν¨: {e}") | |
# ββββββββββββββββββββββββββββββββ main / requirements ββββββββββββββββββββββ | |
def main(): ginigen_app() | |
if __name__ == "__main__": | |
# requirements.txt λμ μμ± | |
with open("requirements.txt", "w") as f: | |
f.write("\n".join([ | |
"streamlit>=1.31.0", | |
"anthropic>=0.18.1", | |
"gradio-client>=1.8.0", | |
"requests>=2.32.3", | |
"markdown>=3.5.1", | |
"pillow>=10.1.0" | |
])) | |
main() | |