Spaces:
Paused
Paused
File size: 1,733 Bytes
f4888ec 29582f1 f4888ec 29582f1 f4888ec 29582f1 2e96832 29582f1 3305ec2 f4888ec 3305ec2 f4888ec 3305ec2 f4888ec 3305ec2 f4888ec 3305ec2 2e96832 3305ec2 2e96832 3305ec2 2e96832 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from tabulate import tabulate
def setup_session():
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
return session
def generate_naver_search_url(query):
base_url = "https://search.naver.com/search.naver?"
params = {"ssc": "tab.blog.all", "sm": "tab_jum"}
params["query"] = query
url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
return url
def crawl_naver_search_results(url):
session = setup_session()
response = session.get(url)
soup = BeautifulSoup(response.text, "html.parser")
results = []
for li in soup.find_all("li", class_="bx")[:10]:
for div in li.find_all("div", class_="detail_box"):
for div2 in div.find_all("div", class_ "title_area"):
title = div2.text.strip()
for a in div2.find_all("a", href=True):
link = a["href"]
results.append([title, link])
return tabulate(results, headers=["제목", "링크"], tablefmt="grid")
with gr.Interface(
fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
inputs=gr.Textbox(label="키워드를 입력하세요"),
outputs=gr.Textbox(label="크롤링된 제목과 링크 목록"),
title="네이버 검색 제목과 링크 크롤러",
description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
) as demo:
demo.launch() |