File size: 1,733 Bytes
f4888ec
29582f1
f4888ec
29582f1
 
f4888ec
29582f1
 
 
 
 
 
 
2e96832
 
 
 
 
 
29582f1
3305ec2
f4888ec
 
3305ec2
 
f4888ec
3305ec2
f4888ec
3305ec2
 
 
f4888ec
 
3305ec2
2e96832
3305ec2
2e96832
3305ec2
 
 
2e96832
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
import requests
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from tabulate import tabulate

def setup_session():
    session = requests.Session()
    retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
    session.mount('https://', HTTPAdapter(max_retries=retries))
    return session

def generate_naver_search_url(query):
    base_url = "https://search.naver.com/search.naver?"
    params = {"ssc": "tab.blog.all", "sm": "tab_jum"}
    params["query"] = query
    url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
    return url

def crawl_naver_search_results(url):
    session = setup_session()
    response = session.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    results = []
    for li in soup.find_all("li", class_="bx")[:10]:
        for div in li.find_all("div", class_="detail_box"):
            for div2 in div.find_all("div", class_ "title_area"):
                title = div2.text.strip()
                for a in div2.find_all("a", href=True):
                    link = a["href"]
                    results.append([title, link])
    return tabulate(results, headers=["제목", "링크"], tablefmt="grid")

with gr.Interface(
    fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
    inputs=gr.Textbox(label="키워드를 입력하세요"),
    outputs=gr.Textbox(label="크롤링된 제목과 링크 목록"),
    title="네이버 검색 제목과 링크 크롤러",
    description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
) as demo:
    demo.launch()