File size: 2,393 Bytes
f4888ec
eb256b7
710f005
29582f1
 
eb256b7
29582f1
 
2e96832
 
 
 
 
 
29582f1
3305ec2
f4888ec
 
eb256b7
3305ec2
57f2a47
eb256b7
 
 
3305ec2
eb256b7
 
710f005
57f2a47
 
 
 
 
 
 
 
eb256b7
 
 
25cb68f
eb256b7
 
25cb68f
18844e0
3305ec2
e1604fe
 
 
 
 
 
 
 
 
 
57f2a47
e1604fe
 
a8319ba
e1604fe
a8319ba
e1604fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
from requests_html import HTMLSession
import re

def setup_session():
    session = HTMLSession()
    return session

def generate_naver_search_url(query):
    base_url = "https://search.naver.com/search.naver?"
    params = {"ssc": "tab.blog.all", "sm": "tab_jum"}
    params["query"] = query
    url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
    return url

def crawl_naver_search_results(url):
    session = setup_session()
    response = session.get(url)
    response.html.render()
    results = []
    i = 1
    for li in response.html.find("li.bx"):
        for div in li.find("div.detail_box"):
            for div2 in div.find("div.title_area"):
                title = div2.text.strip()
                for a in div2.find("a", href=True):
                    link = a.attrs["href"]
                    results.append({"번호": i, "제목": title, "링크": link})
                    i += 1
    html_table = "<table><tr><th>번호</th><th>제목</th><th>링크</th></tr>"
    for result in results[:10]:  # 10개의 결과만 출력
        html_table += f"<tr><td>{result['번호']}</td><td>{result['제목']}</td><td>{result['링크']}</td></tr>"
    html_table += "</table>"
    return html_table

def get_blog_content(link):
    session = setup_session()
    response = session.get(link)
    response.html.render()
    title = ""
    for component in response.html.find("div.se-component.se-text.se-l-default"):
        for paragraph in component.find("p.se-text-paragraph"):
            title += paragraph.text.strip() + "\n"
    return title

with gr.Interface(
    fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
    inputs=gr.Textbox(label="키워드를 입력하세요"),
    outputs=gr.HTML(label="크롤링된 제목과 링크 목록"),
    title="네이버 검색 제목과 링크 크롤러",
    description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
) as demo:
    button = gr.Button("블로그 제목 가져오기")
    text_input = gr.Textbox(label="링크를 입력하세요")
    text_output = gr.Textbox(label="블로그 제목")

    def get_blog_content_wrapper(link):
        return get_blog_content(link)

    button.click(fn=get_blog_content_wrapper, inputs=text_input, outputs=text_output)

    demo.launch(share=True)