AIRider commited on
Commit
710f005
·
verified ·
1 Parent(s): daca80e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -3,6 +3,7 @@ import requests
3
  from bs4 import BeautifulSoup
4
  from requests.adapters import HTTPAdapter
5
  from requests.packages.urllib3.util.retry import Retry
 
6
 
7
  def setup_session():
8
  session = requests.Session()
@@ -22,31 +23,24 @@ def crawl_naver_search_results(url):
22
  response = session.get(url)
23
  soup = BeautifulSoup(response.text, "html.parser")
24
  results = []
25
- for li in soup.find_all("li", class_="bx")[:10]:
26
  for div in li.find_all("div", class_="detail_box"):
27
  for div2 in div.find_all("div", class_="title_area"):
28
  title = div2.text.strip()
29
  for a in div2.find_all("a", href=True):
30
  link = a["href"]
31
- # 제목과 내용 크롤링
32
- response = session.get(link)
33
- soup = BeautifulSoup(response.text, "html.parser")
34
- content = soup.find("div", class_="post_ct").text.strip()
35
- results.append({"title": title, "content": content})
36
- return results
37
-
38
- def format_results(results):
39
- html = "<table>"
40
  for result in results:
41
- html += f"<tr><td>{result['title']}</td><td>{result['content']}</td></tr>"
42
- html += "</table>"
43
- return html
44
 
45
  with gr.Interface(
46
- fn=lambda query: format_results(crawl_naver_search_results(generate_naver_search_url(query))),
47
  inputs=gr.Textbox(label="키워드를 입력하세요"),
48
- outputs=gr.HTML(label="크롤링된 제목과 내용"),
49
- title="네이버 검색 제목과 내용 크롤러",
50
- description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 내용을 크롤링합니다"
51
  ) as demo:
52
  demo.launch()
 
3
  from bs4 import BeautifulSoup
4
  from requests.adapters import HTTPAdapter
5
  from requests.packages.urllib3.util.retry import Retry
6
+ import re
7
 
8
  def setup_session():
9
  session = requests.Session()
 
23
  response = session.get(url)
24
  soup = BeautifulSoup(response.text, "html.parser")
25
  results = []
26
+ for i, li in enumerate(soup.find_all("li", class_=re.compile("bx.*")), start=1):
27
  for div in li.find_all("div", class_="detail_box"):
28
  for div2 in div.find_all("div", class_="title_area"):
29
  title = div2.text.strip()
30
  for a in div2.find_all("a", href=True):
31
  link = a["href"]
32
+ results.append({"번호": i, "제목": title, "링크": link})
33
+ html_table = "<table><tr><th>번호</th><th>제목</th><th>링크</th></tr>"
 
 
 
 
 
 
 
34
  for result in results:
35
+ html_table += f"<tr><td>{result['번호']}</td><td>{result['제목']}</td><td>{result['링크']}</td></tr>"
36
+ html_table += "</table>"
37
+ return html_table
38
 
39
  with gr.Interface(
40
+ fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
41
  inputs=gr.Textbox(label="키워드를 입력하세요"),
42
+ outputs=gr.HTML(label="크롤링된 제목과 링크 목록"),
43
+ title="네이버 검색 제목과 링크 크롤러",
44
+ description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
45
  ) as demo:
46
  demo.launch()