AIRider commited on
Commit
57f2a47
·
verified ·
1 Parent(s): 746c273

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -17
app.py CHANGED
@@ -23,28 +23,27 @@ def crawl_naver_search_results(url):
23
  response = session.get(url)
24
  soup = BeautifulSoup(response.text, "html.parser")
25
  results = []
26
- for i, li in enumerate(soup.find_all("li", class_=re.compile("bx.*")), start=1):
 
27
  for div in li.find_all("div", class_="detail_box"):
28
  for div2 in div.find_all("div", class_="title_area"):
29
  title = div2.text.strip()
30
  for a in div2.find_all("a", href=True):
31
  link = a["href"]
32
  results.append({"번호": i, "제목": title, "링크": link})
33
- if results:
34
- first_result = results[0]
35
- first_result_url = first_result["링크"]
36
- response = session.get(first_result_url)
37
- soup = BeautifulSoup(response.text, "html.parser")
38
- title_tag = soup.find("div", class_="se-module se-module-text se-title-text")
39
- if title_tag:
40
- title = title_tag.text.strip()
41
- first_result["제목"] = title
42
- html_table = "<table><tr><th>번호</th><th>제목</th><th>링크</th></tr>"
43
- html_table += f"<tr><td>{first_result['번호']}</td><td>{title}</td><td>{first_result['링크']}</td></tr>"
44
- for result in results[1:]:
45
- html_table += f"<tr><td>{result['번호']}</td><td>{result['제목']}</td><td>{result['링크']}</td></tr>"
46
- html_table += "</table>"
47
- return html_table
48
 
49
  with gr.Interface(
50
  fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
@@ -53,4 +52,13 @@ with gr.Interface(
53
  title="네이버 검색 제목과 링크 크롤러",
54
  description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
55
  ) as demo:
56
- demo.launch()
 
 
 
 
 
 
 
 
 
 
23
  response = session.get(url)
24
  soup = BeautifulSoup(response.text, "html.parser")
25
  results = []
26
+ i = 1
27
+ for li in soup.find_all("li", class_=re.compile("bx.*")):
28
  for div in li.find_all("div", class_="detail_box"):
29
  for div2 in div.find_all("div", class_="title_area"):
30
  title = div2.text.strip()
31
  for a in div2.find_all("a", href=True):
32
  link = a["href"]
33
  results.append({"번호": i, "제목": title, "링크": link})
34
+ i += 1
35
+ html_table = "<table><tr><th>번호</th><th>제목</th><th>링크</th></tr>"
36
+ for result in results[:10]: # 10개의 결과만 출력
37
+ html_table += f"<tr><td>{result['번호']}</td><td>{result['제목']}</td><td>{result['링크']}</td></tr>"
38
+ html_table += "</table>"
39
+ return html_table
40
+
41
+ def get_blog_content(link):
42
+ session = setup_session()
43
+ response = session.get(link)
44
+ soup = BeautifulSoup(response.text, "html.parser")
45
+ content = soup.find("div", class_="se-main-container").text.strip()
46
+ return content
 
 
47
 
48
  with gr.Interface(
49
  fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
 
52
  title="네이버 검색 제목과 링크 크롤러",
53
  description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
54
  ) as demo:
55
+ demo.launch()
56
+
57
+ with gr.Interface(
58
+ fn=get_blog_content,
59
+ inputs=gr.Textbox(label="링크를 입력하세요"),
60
+ outputs=gr.Textbox(label="블로그 내용"),
61
+ title="블로그 내용 가져오기",
62
+ description="링크를 입력하여 블로그 내용을 가져옵니다"
63
+ ) as blog_content_interface:
64
+ blog_content_interface.launch()