hyejaurl / app.py
seawolf2357's picture
Update app.py
f31d39f verified
raw
history blame
2.2 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
def get_url_content(url):
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return "URL์—์„œ ์ฝ˜ํ…์ธ ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
def parse_html(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
# ์›ํ•˜๋Š” HTML ์š”์†Œ๋ฅผ ํŒŒ์‹ฑํ•˜์—ฌ ๋ฐ˜ํ™˜
# ์˜ˆ: soup.find_all('p') ๋“ฑ
return soup.prettify()
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ํ•จ์ˆ˜
def gradio_fetch_and_parse(url):
html_content = get_url_content(url)
parsed_content = parse_html(html_content)
return parsed_content
def get_main_content(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
main_content = soup.find('div', class_='main-content')
if main_content:
text = main_content.get_text(strip=True)
print("์ถ”์ถœ๋œ ํ…์ŠคํŠธ:", text) # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ๋กœ๊ทธ
return text
else:
return ''
def format_script(text):
sentences = text.split('.')
script = ""
for i in range(0, min(len(sentences), 10), 2):
line = sentences[i].strip() + '. '
if i+1 < len(sentences):
line += sentences[i+1].strip() + '\n'
script += line
print("ํ˜„์žฌ ์Šคํฌ๋ฆฝํŠธ:", script) # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ๋กœ๊ทธ
return script
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ํ•จ์ˆ˜
def gradio_fetch_and_format_script(url):
html_content = get_url_content(url)
main_content = get_main_content(html_content)
return format_script(main_content)
# ๊ธฐ์กด ํ•จ์ˆ˜๋“ค...
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
iface_html = gr.Interface(
fn=gradio_fetch_and_parse,
inputs=gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
outputs=gr.Textbox(label="์Šคํฌ๋žฉ๋œ HTML ์ฝ˜ํ…์ธ ")
)
iface_script = gr.Interface(
fn=gradio_fetch_and_format_script,
inputs=gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
outputs=gr.Textbox(label="์˜์ƒ์šฉ ์Šคํฌ๋ฆฝํŠธ")
)
# ๋‘ ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ํƒญ์œผ๋กœ ๊ตฌ์„ฑํ•˜์—ฌ ์‹คํ–‰
iface_combined = gr.TabbedInterface([iface_html, iface_script],
["HTML ๋ณด๊ธฐ", "์Šคํฌ๋ฆฝํŠธ ์ƒ์„ฑ"])
iface_combined.launch()