Spaces:
Sleeping
Sleeping
import gradio as gr | |
import openai | |
import os | |
from typing import Optional | |
import requests | |
from bs4 import BeautifulSoup | |
############################# | |
# OpenAI API ํด๋ผ์ด์ธํธ ์ค์ | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
if not openai.api_key: | |
raise ValueError("OpenAI API ํ ํฐ(OPENAI_API_KEY)์ด ์ค์ ๋์ง ์์์ต๋๋ค.") | |
def call_openai_api( | |
content: str, | |
system_message: str, | |
max_tokens: int, | |
temperature: float, | |
top_p: float | |
) -> str: | |
""" | |
OpenAI์ GPT-4o-mini ๋ชจ๋ธ์ ์ด์ฉํด ํ ๋ฒ์ ์ง๋ฌธ(content)์ ๋ํ ๋ต๋ณ์ ๋ฐํํ๋ ํจ์. | |
""" | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-4o-mini", | |
messages=[ | |
{"role": "system", "content": system_message}, | |
{"role": "user", "content": content}, | |
], | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
) | |
assistant_message = response.choices[0].message['content'] | |
return assistant_message | |
except Exception as e: | |
return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" | |
############################# | |
# ๊ณ ๊ธ ์ค์ (OpenAI) - ์ฝ๋์์๋ง ์ ์ (UI์ ๋ ธ์ถ ๊ธ์ง) | |
############################# | |
OPENAI_SYSTEM_MESSAGE = """๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ. | |
๋๋ ์ต๊ณ ์ ๋น์์ด๋ค. | |
๋ด๊ฐ ์๊ตฌํ๋ ๊ฒ๋ค์ ์ต๋ํ ์์ธํ๊ณ ์ ํํ๊ฒ ๋ต๋ณํ๋ผ. | |
##[๊ธฐ๋ณธ๊ท์น] | |
1. ๋ฐ๋์ ํ๊ตญ์ด(ํ๊ธ)๋ก ์์ฑํ๋ผ. | |
2. ๋๋ ๊ฐ์ฅ ์ฃผ๋ชฉ๋ฐ๋ ๋ง์ผํฐ์ด๋ฉฐ ๋ธ๋ก๊ทธ ๋ง์ผํ ์ ๋ฌธ๊ฐ์ด๋ค. | |
3. ํนํ ๋๋ '์ ๋ณด์ฑ(Informative)' ์ ๋ฌธ ๋ธ๋ก๊ทธ ๋ง์ผํ ์ ๋ฌธ๊ฐ์ด๋ค. | |
4. ์ ๋ณด ์ ๊ณต์ ์ด์ ์ ๋ง์ถ์ด ์์ฑํ๋ค. | |
##[ํ ์คํธ ์์ฑ ๊ท์น] | |
1. ์์ฃผ์ ๋ฅผ 5๊ฐ๋ก ๊ตฌ๋ถํ์ฌ 2000์ ์ด์๋๋๋ก ์์ฑํ๋ผ. | |
2. ์ ์ฒด ๋งฅ๋ฝ์ ์ดํดํ๊ณ ๋ฌธ์ฅ์ ์ผ๊ด์ฑ์ ์ ์งํ๋ผ. | |
3. ์ ๋๋ก ์ฐธ๊ณ ๊ธ์ ํ๋ฌธ์ฅ ์ด์ ๊ทธ๋๋ก ์ถ๋ ฅํ์ง ๋ง ๊ฒ. | |
4. ์ฃผ์ ์ ์ํฉ์ ๋ง๋ ์ ์ ํ ์ดํ๋ฅผ ์ ํํ๋ผ. | |
5. ํ๊ธ ์ดํ์ ๋์ด๋๋ ์ฝ๊ฒ ์์ฑํ๋ผ. | |
6. ์ ๋ ๋ฌธ์ฅ์ ๋์ '๋ต๋๋ค'๋ฅผ ์ฌ์ฉํ์ง ๋ง ๊ฒ. | |
###[์ ๋ณด์ฑ ๋ธ๋ก๊ทธ ์์ฑ ๊ท์น] | |
1. ๋ ์๊ฐ ์ป๊ณ ์ ํ๋ ์ ์ฉํ ์ ๋ณด์ ํฅ๋ฏธ๋ก์ด ์ ๋ณด๋ฅผ ์ ๊ณตํ๋๋ก ์์ฑํ๋ผ. | |
2. ๋ ์์ ๊ณต๊ฐ์ ์ด๋์ด๋ด๊ณ ๊ถ๊ธ์ฆ์ ํด๊ฒฐํ๋๋ก ์์ฑํ๋ผ. | |
3. ๋ ์์ ๊ด์ฌ์ฌ๋ฅผ ์ถฉ์กฑ์ํค๋๋ก ์์ฑํ๋ผ. | |
4. ๋ ์์๊ฒ ์ด๋์ด ๋๋ ์ ๋ณด๋ฅผ ์์ฑํ๋ผ. | |
##[์ ์ธ ๊ท์น] | |
1. ๋ฐ๋์ ๋น์์ด ๋ฐ ์์ค(expletive, abusive language, slang)์ ์ ์ธํ๋ผ. | |
2. ๋ฐ๋์ ์ฐธ๊ณ ๊ธ์ ๋งํฌ(URL)๋ ์ ์ธํ๋ผ. | |
3. ์ฐธ๊ณ ๊ธ์์ '๋งํฌ๋ฅผ ํ์ธํด์ฃผ์ธ์'์ ๊ฐ์ ๋งํฌ ์ด๋์ ๋ฌธ๊ตฌ๋ ์ ์ธํ๋ผ. | |
4. ์ฐธ๊ณ ๊ธ์ ์๋ ์์ฑ์, ํ์, ์ ํ๋ฒ, ๊ธฐ์์ ์ด๋ฆ, ์ ์นญ, ๋๋ค์์ ๋ฐ๋์ ์ ์ธํ๋ผ. | |
5. ๋ฐ๋์ ๋ฌธ์ฅ์ ๋๋ถ๋ถ์ด ์ด์ํ ํ๊ตญ์ด ํํ์ ์ ์ธํ๋ผ('์์', '๋ต๋๋ค', 'ํด์', 'ํด์ฃผ์ฃ ', '๋์ฃ ', '๋์ด์', '๊ณ ์' ๋ฑ.) | |
""" | |
OPENAI_MAX_TOKENS = 4000 | |
OPENAI_TEMPERATURE = 0.7 | |
OPENAI_TOP_P = 0.95 | |
############################# | |
# UI - ๋ธ๋ก๊ทธ ์์ฑ๊ธฐ | |
############################# | |
def blog_generator(): | |
with gr.Blocks() as blog_tab: | |
gr.Markdown("# ๋ธ๋ก๊ทธ ์์ฑ๊ธฐ") | |
# ๋งํฌ๋ฐ๊พธ๊ธฐ (๋ผ๋์ค ๋ฒํผ) | |
tone_radio = gr.Radio( | |
label="๋งํฌ๋ฐ๊พธ๊ธฐ", | |
choices=["์น๊ทผํ๊ฒ", "์ผ๋ฐ์ ์ธ", "์ ๋ฌธ์ ์ธ"], | |
value="์ผ๋ฐ์ ์ธ" # ๊ธฐ๋ณธ ์ ํ | |
) | |
# ์ฐธ์กฐ๊ธ ์ ๋ ฅ (3๊ฐ) | |
ref1 = gr.Textbox(label="์ฐธ์กฐ๊ธ 1") | |
ref2 = gr.Textbox(label="์ฐธ์กฐ๊ธ 2") | |
ref3 = gr.Textbox(label="์ฐธ์กฐ๊ธ 3") | |
output_box = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20, interactive=False) | |
def generate_blog(tone_value: str, ref1_value: str, ref2_value: str, ref3_value: str) -> str: | |
# ํ๋กฌํํธ ์์ฑ | |
question = ( | |
f"๋งํฌ: {tone_value}\n" | |
f"์ฐธ์กฐ๊ธ1: {ref1_value}\n" | |
f"์ฐธ์กฐ๊ธ2: {ref2_value}\n" | |
f"์ฐธ์กฐ๊ธ3: {ref3_value}\n" | |
) | |
# OpenAI GPT-4o-mini ๋ชจ๋ธ ํธ์ถ | |
response = call_openai_api( | |
content=question, | |
system_message=OPENAI_SYSTEM_MESSAGE, | |
max_tokens=OPENAI_MAX_TOKENS, | |
temperature=OPENAI_TEMPERATURE, | |
top_p=OPENAI_TOP_P | |
) | |
return response | |
generate_button = gr.Button("์์ฑํ๊ธฐ") | |
generate_button.click( | |
fn=generate_blog, | |
inputs=[tone_radio, ref1, ref2, ref3], | |
outputs=output_box | |
) | |
return blog_tab | |
############################# | |
# ์ถ๊ฐ ๊ธฐ๋ฅ - ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ | |
############################# | |
def convert_to_mobile_url(url): | |
""" | |
PC URL์ ๋ชจ๋ฐ์ผ URL๋ก ๋ณํ. | |
""" | |
if "m.blog.naver.com" not in url: | |
if "blog.naver.com" in url: | |
url_parts = url.split("/") | |
if len(url_parts) >= 5: | |
user_id = url_parts[3] | |
post_id = url_parts[4] | |
return f"https://m.blog.naver.com/{user_id}/{post_id}" | |
return url | |
def scrape_naver_blog(url): | |
""" | |
๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ(ํ ์คํธ๋ง) ์คํฌ๋ํ. | |
""" | |
try: | |
# ๋ชจ๋ฐ์ผ URL ๋ณํ | |
mobile_url = convert_to_mobile_url(url) | |
print(f"Converted Mobile URL: {mobile_url}") | |
response = requests.get(mobile_url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# ์ ๋ชฉ ์คํฌ๋ํ | |
title_element = soup.find("div", class_="se-module se-module-text se-title-text") | |
title = title_element.get_text(strip=True) if title_element else "์ ๋ชฉ์ ์ฐพ์ ์ ์์" | |
# ๋ณธ๋ฌธ ๋ด์ฉ ์คํฌ๋ํ | |
content_elements = soup.find_all("div", class_="se-module se-module-text") | |
content = "\n".join( | |
elem.get_text(strip=True) for elem in content_elements | |
) if content_elements else "๋ด์ฉ์ ์ฐพ์ ์ ์์" | |
# ๋๋ฒ๊น ๋ฉ์์ง ์ถ๋ ฅ | |
print(f"Scraped Title: {title}") | |
print(f"Scraped Content: {content}") | |
# ๊ฒฐ๊ณผ ๋ฐํ | |
result = f"์ ๋ชฉ: {title}\n\n๋ด์ฉ: {content}" | |
return result | |
except Exception as e: | |
print(f"Error: {e}") | |
return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}" | |
# Gradio ์ธํฐํ์ด์ค ์ ์ | |
def run_scraper(url): | |
return scrape_naver_blog(url) | |
def naver_blog_scraper(): | |
with gr.Blocks() as scraper_tab: | |
gr.Markdown("# ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ") | |
url_input = gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL") | |
output_box = gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ", lines=20, interactive=False) | |
scrape_button = gr.Button("์คํฌ๋ํํ๊ธฐ") | |
scrape_button.click( | |
fn=run_scraper, | |
inputs=url_input, | |
outputs=output_box | |
) | |
return scraper_tab | |
############################# | |
# ๋ฉ์ธ UI - ํญ ๊ตฌ์ฑ | |
############################# | |
with gr.Blocks() as demo: | |
gr.Markdown("# ํตํฉ Gradio ์ ํ๋ฆฌ์ผ์ด์ ") | |
with gr.Tab("๋ธ๋ก๊ทธ ์์ฑ๊ธฐ"): | |
blog_tab = blog_generator() | |
with gr.Tab("๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ"): | |
scraper_tab = naver_blog_scraper() | |
if __name__ == "__main__": | |
demo.launch() | |