Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import requests
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
from requests.adapters import HTTPAdapter
|
5 |
from requests.packages.urllib3.util.retry import Retry
|
|
|
6 |
|
7 |
def setup_session():
|
8 |
session = requests.Session()
|
@@ -22,31 +23,24 @@ def crawl_naver_search_results(url):
|
|
22 |
response = session.get(url)
|
23 |
soup = BeautifulSoup(response.text, "html.parser")
|
24 |
results = []
|
25 |
-
for li in soup.find_all("li", class_="bx")
|
26 |
for div in li.find_all("div", class_="detail_box"):
|
27 |
for div2 in div.find_all("div", class_="title_area"):
|
28 |
title = div2.text.strip()
|
29 |
for a in div2.find_all("a", href=True):
|
30 |
link = a["href"]
|
31 |
-
|
32 |
-
|
33 |
-
soup = BeautifulSoup(response.text, "html.parser")
|
34 |
-
content = soup.find("div", class_="post_ct").text.strip()
|
35 |
-
results.append({"title": title, "content": content})
|
36 |
-
return results
|
37 |
-
|
38 |
-
def format_results(results):
|
39 |
-
html = "<table>"
|
40 |
for result in results:
|
41 |
-
|
42 |
-
|
43 |
-
return
|
44 |
|
45 |
with gr.Interface(
|
46 |
-
fn=lambda query:
|
47 |
inputs=gr.Textbox(label="키워드를 입력하세요"),
|
48 |
-
outputs=gr.HTML(label="크롤링된 제목과
|
49 |
-
title="네이버 검색 제목과
|
50 |
-
description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과
|
51 |
) as demo:
|
52 |
demo.launch()
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
from requests.adapters import HTTPAdapter
|
5 |
from requests.packages.urllib3.util.retry import Retry
|
6 |
+
import re
|
7 |
|
8 |
def setup_session():
|
9 |
session = requests.Session()
|
|
|
23 |
response = session.get(url)
|
24 |
soup = BeautifulSoup(response.text, "html.parser")
|
25 |
results = []
|
26 |
+
for i, li in enumerate(soup.find_all("li", class_=re.compile("bx.*")), start=1):
|
27 |
for div in li.find_all("div", class_="detail_box"):
|
28 |
for div2 in div.find_all("div", class_="title_area"):
|
29 |
title = div2.text.strip()
|
30 |
for a in div2.find_all("a", href=True):
|
31 |
link = a["href"]
|
32 |
+
results.append({"번호": i, "제목": title, "링크": link})
|
33 |
+
html_table = "<table><tr><th>번호</th><th>제목</th><th>링크</th></tr>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
for result in results:
|
35 |
+
html_table += f"<tr><td>{result['번호']}</td><td>{result['제목']}</td><td>{result['링크']}</td></tr>"
|
36 |
+
html_table += "</table>"
|
37 |
+
return html_table
|
38 |
|
39 |
with gr.Interface(
|
40 |
+
fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
|
41 |
inputs=gr.Textbox(label="키워드를 입력하세요"),
|
42 |
+
outputs=gr.HTML(label="크롤링된 제목과 링크 목록"),
|
43 |
+
title="네이버 검색 제목과 링크 크롤러",
|
44 |
+
description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
|
45 |
) as demo:
|
46 |
demo.launch()
|