Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| def get_url_content(url): | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| return response.text | |
| else: | |
| return "URL์์ ์ฝํ ์ธ ๋ฅผ ๊ฐ์ ธ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค." | |
| def parse_html(html_content): | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| # ์ํ๋ HTML ์์๋ฅผ ํ์ฑํ์ฌ ๋ฐํ | |
| # ์: soup.find_all('p') ๋ฑ | |
| return soup.prettify() | |
| # Gradio ์ธํฐํ์ด์ค ํจ์ | |
| def gradio_fetch_and_parse(url): | |
| html_content = get_url_content(url) | |
| parsed_content = parse_html(html_content) | |
| return parsed_content | |
| def get_main_content(html_content): | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| # <script> ํ๊ทธ๋ฅผ ์ฐพ์ ๋ณธ๋ฌธ ์ฝํ ์ธ ์ถ์ถ | |
| scripts = soup.find_all('meta content') | |
| text = '' | |
| for script in scripts: | |
| if script.string: | |
| text += script.string.strip() + '\n' | |
| if text: | |
| print("์ถ์ถ๋ ํ ์คํธ:", text) | |
| return text | |
| else: | |
| print("๋ณธ๋ฌธ ์ฝํ ์ธ ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.") | |
| return '' | |
| def format_script(text): | |
| sentences = text.split('.') | |
| script = "" | |
| for i in range(0, min(len(sentences), 10), 2): | |
| line = sentences[i].strip() + '. ' | |
| if i+1 < len(sentences): | |
| line += sentences[i+1].strip() + '\n' | |
| script += line | |
| print("ํ์ฌ ์คํฌ๋ฆฝํธ:", script) # ๋๋ฒ๊น ์ ์ํ ๋ก๊ทธ | |
| return script | |
| def gradio_fetch_and_format_script(url): | |
| print("ํจ์ ํธ์ถ๋จ:", url) | |
| html_content = get_url_content(url) | |
| main_content = get_main_content(html_content) | |
| print("์ถ์ถ๋ ๋ณธ๋ฌธ:", main_content) | |
| script = format_script(main_content) | |
| print("์์ฑ๋ ์คํฌ๋ฆฝํธ:", script) | |
| return script | |
| # Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ | |
| iface_html = gr.Interface( | |
| fn=gradio_fetch_and_parse, | |
| inputs=gr.Textbox(label="URL์ ์ ๋ ฅํ์ธ์"), | |
| outputs=gr.Textbox(label="์คํฌ๋ฉ๋ HTML ์ฝํ ์ธ ") | |
| ) | |
| # Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ | |
| iface_script = gr.Interface( | |
| fn=gradio_fetch_and_format_script, # ์ด ํจ์๊ฐ ํธ์ถ๋์ด์ผ ํจ | |
| inputs=gr.Textbox(label="URL์ ์ ๋ ฅํ์ธ์"), # ์ ๋ ฅ ํ๋ | |
| outputs=gr.Textbox(label="์์์ฉ ์คํฌ๋ฆฝํธ") # ์ถ๋ ฅ ํ๋ | |
| ) | |
| # ๋ ์ธํฐํ์ด์ค๋ฅผ ํญ์ผ๋ก ๊ตฌ์ฑํ์ฌ ์คํ | |
| iface_combined = gr.TabbedInterface([iface_html, iface_script], | |
| ["HTML ๋ณด๊ธฐ", "์คํฌ๋ฆฝํธ ์์ฑ"]) | |
| iface_combined.launch() |