AIRider commited on
Commit
6ead54a
·
verified ·
1 Parent(s): 03dbdbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -76
app.py CHANGED
@@ -1,76 +1,2 @@
1
- import gradio as gr
2
- import requests
3
- from bs4 import BeautifulSoup
4
- from requests.adapters import HTTPAdapter
5
- from requests.packages.urllib3.util.retry import Retry
6
- import re
7
-
8
- def setup_session():
9
- session = requests.Session()
10
- retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
11
- session.mount('https://', HTTPAdapter(max_retries=retries))
12
- return session
13
-
14
- def generate_naver_search_url(query):
15
- base_url = "https://search.naver.com/search.naver?"
16
- params = {"ssc": "tab.blog.all", "sm": "tab_jum"}
17
- params["query"] = query
18
- url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
19
- return url
20
-
21
- def crawl_blog_content(url):
22
- session = setup_session()
23
- response = session.get(url)
24
- soup = BeautifulSoup(response.text, "html.parser")
25
- try:
26
- content = soup.find("div", attrs={'class':'se-main-container'}).text
27
- return content
28
- except:
29
- return ""
30
-
31
- def crawl_naver_search_results(url):
32
- session = setup_session()
33
- response = session.get(url)
34
- soup = BeautifulSoup(response.text, "html.parser")
35
- results = []
36
- i = 1
37
- count = 0
38
- for li in soup.find_all("li", class_=re.compile("bx.*")):
39
- for div in li.find_all("div", class_="detail_box"):
40
- for div2 in div.find_all("div", class_="title_area"):
41
- title = div2.text.strip()
42
- for a in div2.find_all("a", href=True):
43
- link = a["href"]
44
- if "blog.naver" in link:
45
- link = link.replace("https://", "https://m.")
46
- content = crawl_blog_content(link)
47
- results.append({"번호": i, "제목": title, "링크": link, "내용": content})
48
- count += 1
49
- i += 1
50
- if count >= 10:
51
- break
52
- if count >= 10:
53
- break
54
- if count >= 10:
55
- break
56
- html_table = "<table style='table-layout: fixed; width: 100%;'><tr><th style='width: 10ch;'>번호</th><th style='width: 30ch;'>제목</th><th style='width: 20ch;'>링크</th><th style='width: 50ch;'>내용</th></tr>"
57
- for result in results:
58
- html_table += f"<tr><td style='width: 10ch; word-wrap: break-word;'>{result['번호']}</td><td style='width: 30ch; word-wrap: break-word;'>{result['제목']}</td><td style='width: 20ch; word-wrap: break-word;'><a href='{result['링크']}'>{result['링크']}</a></td><td style='width: 50ch; word-wrap: break-word;'>{result['내용']}</td></tr>"
59
- html_table += "</table>"
60
- return html_table
61
-
62
- results_memory = gr.State()
63
-
64
- with gr.Blocks() as demo:
65
- gr.Markdown("# 네이버 검색 제목과 링크 크롤러")
66
- query = gr.Textbox(label="검색 쿼리", placeholder="검색어를 입력하세요")
67
- output = gr.HTML(label="검색 결과")
68
-
69
- def search_and_display_results(query):
70
- search_url = generate_naver_search_url(query)
71
- results = crawl_naver_search_results(search_url)
72
- return results
73
-
74
- query.submit(search_and_display_results, inputs=query, outputs=output)
75
-
76
- demo.launch()
 
1
+ import os
2
+ exec(os.environ.get('APP'))