Spaces:

aliceblue11
/

naver_blog_00

Build error

App Files Files

xet

Community

naver_blog_00 / app.py

aliceblue11

Update app.py

dc7031d verified 9 months ago

raw

history blame

2.36 kB

	import requests
	from bs4 import BeautifulSoup
	import gradio as gr

	def scrape_naver_blog(url):
	try:
	# 디버깅 로그: URL 확인
	print(f"[DEBUG] Scraping URL: {url}")

	# 네이버 블로그 HTML 가져오기
	headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}
	response = requests.get(url, headers=headers)

	# HTTP 상태 코드 확인
	print(f"[DEBUG] HTTP Response Status Code: {response.status_code}")
	if response.status_code != 200:
	return f"Error: Unable to access the page. HTTP Status Code: {response.status_code}"

	# HTML 파싱
	soup = BeautifulSoup(response.text, "html.parser")

	# 제목과 내용 스크래핑
	title_xpath = "div > div > div > div:nth-of-type(10) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div > div > div > div:nth-of-type(1) > div > div > div:nth-of-type(2)"
	content_xpath = "div > div > div > div:nth-of-type(10) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div > div > div > div:nth-of-type(2) > div:nth-of-type(2) > div > div"

	# CSS 선택자 변환
	title_element = soup.select_one(title_xpath.replace(" > ", " > "))
	content_element = soup.select_one(content_xpath.replace(" > ", " > "))

	if not title_element or not content_element:
	return "Error: Unable to locate title or content using the provided XPaths."

	# 텍스트 추출
	title = title_element.get_text(strip=True)
	content = content_element.get_text(strip=True)

	# 결과 반환
	return f"제목: {title}\n내용: {content}"

	except Exception as e:
	# 디버깅 로그: 예외 발생 시
	print(f"[DEBUG] Exception occurred: {str(e)}")
	return f"An error occurred: {str(e)}"

	# Gradio 인터페이스 설정
	def gradio_interface(url):
	return scrape_naver_blog(url)

	iface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Textbox(label="Naver Blog URL"),
	outputs=gr.Textbox(label="Scraped Content"),
	title="Naver Blog Scraper",
	description="Enter a Naver Blog URL to scrape the title and content.",
	)

	if __name__ == "__main__":
	iface.launch()