Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,28 +23,27 @@ def crawl_naver_search_results(url):
|
|
23 |
response = session.get(url)
|
24 |
soup = BeautifulSoup(response.text, "html.parser")
|
25 |
results = []
|
26 |
-
|
|
|
27 |
for div in li.find_all("div", class_="detail_box"):
|
28 |
for div2 in div.find_all("div", class_="title_area"):
|
29 |
title = div2.text.strip()
|
30 |
for a in div2.find_all("a", href=True):
|
31 |
link = a["href"]
|
32 |
results.append({"번호": i, "제목": title, "링크": link})
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
html_table += "</table>"
|
47 |
-
return html_table
|
48 |
|
49 |
with gr.Interface(
|
50 |
fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
|
@@ -53,4 +52,13 @@ with gr.Interface(
|
|
53 |
title="네이버 검색 제목과 링크 크롤러",
|
54 |
description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
|
55 |
) as demo:
|
56 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
response = session.get(url)
|
24 |
soup = BeautifulSoup(response.text, "html.parser")
|
25 |
results = []
|
26 |
+
i = 1
|
27 |
+
for li in soup.find_all("li", class_=re.compile("bx.*")):
|
28 |
for div in li.find_all("div", class_="detail_box"):
|
29 |
for div2 in div.find_all("div", class_="title_area"):
|
30 |
title = div2.text.strip()
|
31 |
for a in div2.find_all("a", href=True):
|
32 |
link = a["href"]
|
33 |
results.append({"번호": i, "제목": title, "링크": link})
|
34 |
+
i += 1
|
35 |
+
html_table = "<table><tr><th>번호</th><th>제목</th><th>링크</th></tr>"
|
36 |
+
for result in results[:10]: # 10개의 결과만 출력
|
37 |
+
html_table += f"<tr><td>{result['번호']}</td><td>{result['제목']}</td><td>{result['링크']}</td></tr>"
|
38 |
+
html_table += "</table>"
|
39 |
+
return html_table
|
40 |
+
|
41 |
+
def get_blog_content(link):
|
42 |
+
session = setup_session()
|
43 |
+
response = session.get(link)
|
44 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
45 |
+
content = soup.find("div", class_="se-main-container").text.strip()
|
46 |
+
return content
|
|
|
|
|
47 |
|
48 |
with gr.Interface(
|
49 |
fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)),
|
|
|
52 |
title="네이버 검색 제목과 링크 크롤러",
|
53 |
description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다"
|
54 |
) as demo:
|
55 |
+
demo.launch()
|
56 |
+
|
57 |
+
with gr.Interface(
|
58 |
+
fn=get_blog_content,
|
59 |
+
inputs=gr.Textbox(label="링크를 입력하세요"),
|
60 |
+
outputs=gr.Textbox(label="블로그 내용"),
|
61 |
+
title="블로그 내용 가져오기",
|
62 |
+
description="링크를 입력하여 블로그 내용을 가져옵니다"
|
63 |
+
) as blog_content_interface:
|
64 |
+
blog_content_interface.launch()
|