Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
def get_url_content(url): | |
response = requests.get(url) | |
if response.status_code == 200: | |
return response.text | |
else: | |
return "URL์์ ์ฝํ ์ธ ๋ฅผ ๊ฐ์ ธ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค." | |
def parse_html(html_content): | |
soup = BeautifulSoup(html_content, 'html.parser') | |
# ์ํ๋ HTML ์์๋ฅผ ํ์ฑํ์ฌ ๋ฐํ | |
# ์: soup.find_all('p') ๋ฑ | |
return soup.prettify() | |
# Gradio ์ธํฐํ์ด์ค ํจ์ | |
def gradio_fetch_and_parse(url): | |
html_content = get_url_content(url) | |
parsed_content = parse_html(html_content) | |
return parsed_content | |
def get_main_content(html_content): | |
soup = BeautifulSoup(html_content, 'html.parser') | |
# ์: ๋ณธ๋ฌธ ๋ด์ฉ์ด <div class="main-content"> ์์ ์๋ค๊ณ ๊ฐ์ | |
main_content = soup.find('div', class_='main-content') | |
return main_content.get_text(strip=True) if main_content else '' | |
def format_script(text): | |
# ํ ์คํธ๋ฅผ ๋ฌธ์ฅ๋ณ๋ก ๋ถ๋ฆฌ | |
sentences = text.split('.') | |
# 2๋ฌธ์ฅ์ฉ ๋ฌถ์ด์ ์คํฌ๋ฆฝํธ ํํ๋ก ์์ฑ | |
script = "" | |
for i in range(0, min(len(sentences), 10), 2): | |
script += sentences[i].strip() + '. ' | |
if i+1 < len(sentences): | |
script += sentences[i+1].strip() + '\n' | |
return script | |
# Gradio ์ธํฐํ์ด์ค ํจ์ | |
def gradio_fetch_and_format_script(url): | |
html_content = get_url_content(url) | |
main_content = get_main_content(html_content) | |
return format_script(main_content) | |
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ | |
iface_html = gr.Interface( | |
fn=gradio_fetch_and_parse, | |
inputs=gr.Textbox(label="URL์ ์ ๋ ฅํ์ธ์"), | |
outputs=gr.Textbox(label="์คํฌ๋ฉ๋ HTML ์ฝํ ์ธ ") | |
) | |
iface_script = gr.Interface( | |
fn=gradio_fetch_and_format_script, | |
inputs=gr.Textbox(label="URL์ ์ ๋ ฅํ์ธ์"), | |
outputs=gr.Textbox(label="์์์ฉ ์คํฌ๋ฆฝํธ") | |
) | |
# ๋ ์ธํฐํ์ด์ค๋ฅผ ํญ์ผ๋ก ๊ตฌ์ฑํ์ฌ ์คํ | |
iface_combined = gr.TabbedInterface(interfaces=[iface_html, iface_script], | |
tab_names=["HTML ๋ณด๊ธฐ", "์คํฌ๋ฆฝํธ ์์ฑ"]) | |
iface_combined.launch() |