naver_blog_00 / app.py
aliceblue11's picture
Update app.py
dc7031d verified
raw
history blame
2.36 kB
import requests
from bs4 import BeautifulSoup
import gradio as gr
def scrape_naver_blog(url):
try:
# 디버깅 로그: URL 확인
print(f"[DEBUG] Scraping URL: {url}")
# 네이버 블로그 HTML 가져오기
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}
response = requests.get(url, headers=headers)
# HTTP 상태 코드 확인
print(f"[DEBUG] HTTP Response Status Code: {response.status_code}")
if response.status_code != 200:
return f"Error: Unable to access the page. HTTP Status Code: {response.status_code}"
# HTML 파싱
soup = BeautifulSoup(response.text, "html.parser")
# 제목과 내용 스크래핑
title_xpath = "div > div > div > div:nth-of-type(10) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div > div > div > div:nth-of-type(1) > div > div > div:nth-of-type(2)"
content_xpath = "div > div > div > div:nth-of-type(10) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div > div > div > div:nth-of-type(2) > div:nth-of-type(2) > div > div"
# CSS 선택자 변환
title_element = soup.select_one(title_xpath.replace(" > ", " > "))
content_element = soup.select_one(content_xpath.replace(" > ", " > "))
if not title_element or not content_element:
return "Error: Unable to locate title or content using the provided XPaths."
# 텍스트 추출
title = title_element.get_text(strip=True)
content = content_element.get_text(strip=True)
# 결과 반환
return f"제목: {title}\n내용: {content}"
except Exception as e:
# 디버깅 로그: 예외 발생 시
print(f"[DEBUG] Exception occurred: {str(e)}")
return f"An error occurred: {str(e)}"
# Gradio 인터페이스 설정
def gradio_interface(url):
return scrape_naver_blog(url)
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(label="Naver Blog URL"),
outputs=gr.Textbox(label="Scraped Content"),
title="Naver Blog Scraper",
description="Enter a Naver Blog URL to scrape the title and content.",
)
if __name__ == "__main__":
iface.launch()