Bl_Crawl_t3

Running

App Files Files Community

Bl_Crawl_t3 / app.py

AIRider

Update app.py

eb256b7 verified about 1 year ago

raw

history blame

2.39 kB

	import gradio as gr
	from requests_html import HTMLSession
	import re

	def setup_session():
	session = HTMLSession()
	return session

	def generate_naver_search_url(query):
	base_url = "https://search.naver.com/search.naver?"
	params = {"ssc": "tab.blog.all", "sm": "tab_jum"}
	params["query"] = query
	url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
	return url

	def crawl_naver_search_results(url):
	session = setup_session()
	response = session.get(url)
	response.html.render()
	results = []
	i = 1
	for li in response.html.find("li.bx"):
	for div in li.find("div.detail_box"):
	for div2 in div.find("div.title_area"):
	title = div2.text.strip()
	for a in div2.find("a", href=True):
	link = a.attrs["href"]
	results.append({"번호": i, "제목": title, "링크": link})
	i += 1
	html_table = "<table><tr><th>번호</th><th>제목</th><th>링크</th></tr>"
	for result in results[:10]: # 10개의 결과만 출력
	html_table += f"<tr><td>{result['번호']}</td><td>{result['제목']}</td><td>{result['링크']}</td></tr>"
	html_table += "</table>"
	return html_table

	def get_blog_content(link):
	session = setup_session()
	response = session.get(link)
	response.html.render()
	title = ""
	for component in response.html.find("div.se-component.se-text.se-l-default"):
	for paragraph in component.find("p.se-text-paragraph"):
	title += paragraph.text.strip() + "\n"
	return title

	with gr.Interface(
	fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
	inputs=gr.Textbox(label="키워드를 입력하세요"),
	outputs=gr.HTML(label="크롤링된 제목과 링크 목록"),
	title="네이버 검색 제목과 링크 크롤러",
	description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
	) as demo:
	button = gr.Button("블로그 제목 가져오기")
	text_input = gr.Textbox(label="링크를 입력하세요")
	text_output = gr.Textbox(label="블로그 제목")

	def get_blog_content_wrapper(link):
	return get_blog_content(link)

	button.click(fn=get_blog_content_wrapper, inputs=text_input, outputs=text_output)

	demo.launch(share=True)