Spaces:
Sleeping
Sleeping
File size: 2,533 Bytes
c05ccf6 c35600d 1840ee6 ef1d2b4 b1ad9d3 76666bd b1ad9d3 9f47898 f31d39f 38b8df6 f31d39f ef1d2b4 38b8df6 9f47898 ef1d2b4 f31d39f ef1d2b4 f31d39f ef1d2b4 f31d39f ef1d2b4 bf141c7 ef1d2b4 bf141c7 ef1d2b4 19ef13d 1840ee6 8904944 1840ee6 8904944 1840ee6 3476805 8904944 3476805 8904944 3476805 8904944 19ef13d 9f47898 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
def get_url_content(url):
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return "URL์์ ์ฝํ
์ธ ๋ฅผ ๊ฐ์ ธ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค."
def parse_html(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
# ์ํ๋ HTML ์์๋ฅผ ํ์ฑํ์ฌ ๋ฐํ
# ์: soup.find_all('p') ๋ฑ
return soup.prettify()
# Gradio ์ธํฐํ์ด์ค ํจ์
def gradio_fetch_and_parse(url):
html_content = get_url_content(url)
parsed_content = parse_html(html_content)
return parsed_content
def get_main_content(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
# <script> ํ๊ทธ๋ฅผ ์ฐพ์ ๋ณธ๋ฌธ ์ฝํ
์ธ ์ถ์ถ
scripts = soup.find_all('meta content')
text = ''
for script in scripts:
if script.string:
text += script.string.strip() + '\n'
if text:
print("์ถ์ถ๋ ํ
์คํธ:", text)
return text
else:
print("๋ณธ๋ฌธ ์ฝํ
์ธ ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
return ''
def format_script(text):
sentences = text.split('.')
script = ""
for i in range(0, min(len(sentences), 10), 2):
line = sentences[i].strip() + '. '
if i+1 < len(sentences):
line += sentences[i+1].strip() + '\n'
script += line
print("ํ์ฌ ์คํฌ๋ฆฝํธ:", script) # ๋๋ฒ๊น
์ ์ํ ๋ก๊ทธ
return script
def gradio_fetch_and_format_script(url):
print("ํจ์ ํธ์ถ๋จ:", url)
html_content = get_url_content(url)
main_content = get_main_content(html_content)
print("์ถ์ถ๋ ๋ณธ๋ฌธ:", main_content)
script = format_script(main_content)
print("์์ฑ๋ ์คํฌ๋ฆฝํธ:", script)
return script
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
iface_html = gr.Interface(
fn=gradio_fetch_and_parse,
inputs=gr.Textbox(label="URL์ ์
๋ ฅํ์ธ์"),
outputs=gr.Textbox(label="์คํฌ๋ฉ๋ HTML ์ฝํ
์ธ ")
)
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
iface_script = gr.Interface(
fn=gradio_fetch_and_format_script, # ์ด ํจ์๊ฐ ํธ์ถ๋์ด์ผ ํจ
inputs=gr.Textbox(label="URL์ ์
๋ ฅํ์ธ์"), # ์
๋ ฅ ํ๋
outputs=gr.Textbox(label="์์์ฉ ์คํฌ๋ฆฝํธ") # ์ถ๋ ฅ ํ๋
)
# ๋ ์ธํฐํ์ด์ค๋ฅผ ํญ์ผ๋ก ๊ตฌ์ฑํ์ฌ ์คํ
iface_combined = gr.TabbedInterface([iface_html, iface_script],
["HTML ๋ณด๊ธฐ", "์คํฌ๋ฆฝํธ ์์ฑ"])
iface_combined.launch() |