Spaces:
Build error
Build error
import requests | |
from bs4 import BeautifulSoup | |
import gradio as gr | |
def scrape_naver_blog(url): | |
try: | |
# 디버깅 로그: URL 확인 | |
print(f"[DEBUG] Scraping URL: {url}") | |
# 네이버 블로그 HTML 가져오기 | |
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"} | |
response = requests.get(url, headers=headers) | |
# HTTP 상태 코드 확인 | |
print(f"[DEBUG] HTTP Response Status Code: {response.status_code}") | |
if response.status_code != 200: | |
return f"Error: Unable to access the page. HTTP Status Code: {response.status_code}" | |
# HTML 파싱 | |
soup = BeautifulSoup(response.text, "html.parser") | |
# 제목과 내용 스크래핑 | |
title_xpath = "div > div > div > div:nth-of-type(10) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div > div > div > div:nth-of-type(1) > div > div > div:nth-of-type(2)" | |
content_xpath = "div > div > div > div:nth-of-type(10) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div > div > div > div:nth-of-type(2) > div:nth-of-type(2) > div > div" | |
# CSS 선택자 변환 | |
title_element = soup.select_one(title_xpath.replace(" > ", " > ")) | |
content_element = soup.select_one(content_xpath.replace(" > ", " > ")) | |
if not title_element or not content_element: | |
return "Error: Unable to locate title or content using the provided XPaths." | |
# 텍스트 추출 | |
title = title_element.get_text(strip=True) | |
content = content_element.get_text(strip=True) | |
# 결과 반환 | |
return f"제목: {title}\n내용: {content}" | |
except Exception as e: | |
# 디버깅 로그: 예외 발생 시 | |
print(f"[DEBUG] Exception occurred: {str(e)}") | |
return f"An error occurred: {str(e)}" | |
# Gradio 인터페이스 설정 | |
def gradio_interface(url): | |
return scrape_naver_blog(url) | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.Textbox(label="Naver Blog URL"), | |
outputs=gr.Textbox(label="Scraped Content"), | |
title="Naver Blog Scraper", | |
description="Enter a Naver Blog URL to scrape the title and content.", | |
) | |
if __name__ == "__main__": | |
iface.launch() | |