AIRider commited on
Commit
2e96832
ยท
verified ยท
1 Parent(s): 43c0554

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -89
app.py CHANGED
@@ -13,93 +13,18 @@ def setup_session():
13
  session.mount('https://', HTTPAdapter(max_retries=retries))
14
  return session
15
 
16
- def get_base_url(board_select):
17
- urls = {
18
- "๋ง˜์ด๋ฒ ๋ฒ ": "https://cafe.naver.com/ArticleList.nhn?search.clubid=29434212&search.menuid=2&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=29434212",
19
- "๋ง˜์Šคํ™€๋ฆญ": "https://cafe.naver.com/ArticleList.nhn?search.clubid=10094499&search.menuid=599&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=10094499",
20
- "๊ด‘์ฃผ๋ง˜": "https://cafe.naver.com/ArticleList.nhn?search.clubid=26025763&search.menuid=508&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=26025763",
21
- "์‡ผํ•‘์ง€๋ฆ„์‹ ": "https://cafe.naver.com/ArticleList.nhn?search.clubid=25729954&search.menuid=751&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=25729954",
22
- "๋ถ€์‚ฐ๋ง˜": "https://cafe.naver.com/ArticleList.nhn?search.clubid=28707025&search.menuid=282&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=28707025",
23
- "์ง„ํฌ๋ง˜": "https://cafe.naver.com/ArticleList.nhn?search.clubid=21442290&search.menuid=476&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=21442290"
24
- }
25
- return urls.get(board_select, "Invalid board selected")
26
 
27
- def convert_views(view_string):
28
- if '๋งŒ' in view_string:
29
- number_part = view_string.replace('๋งŒ', '')
30
- return int(float(number_part) * 10000)
31
- return int(view_string.replace(",", ""))
32
-
33
- def extract_data_to_excel_and_html(page, board_select):
34
- session = setup_session()
35
- base_url = get_base_url(board_select)
36
- if base_url == "Invalid board selected":
37
- return "Invalid board selected", ""
38
-
39
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
40
- today = datetime.datetime.now().strftime("%Y%m%d")
41
- workbook = xlsxwriter.Workbook(f'{board_select}_{today}.xlsx')
42
- worksheet = workbook.add_worksheet()
43
- worksheet.write('A1', '์ œ๋ชฉ')
44
- worksheet.write('B1', '์ž‘์„ฑ์ผ')
45
- worksheet.write('C1', '์กฐํšŒ์ˆ˜')
46
- worksheet.write('D1', '์ข‹์•„์š”')
47
-
48
- html_output = "<table style='width:100%; border: 1px solid black;'><tr><th>์ œ๋ชฉ</th><th>์ž‘์„ฑ์ผ</th><th>์กฐํšŒ์ˆ˜</th><th>์ข‹์•„์š”</th></tr>"
49
- row = 1
50
- for p in range(1, page + 1):
51
- url = f"{base_url}&search.page={p}"
52
- response = session.get(url, headers=headers)
53
- soup = BeautifulSoup(response.text, 'html.parser')
54
- article_boards = soup.find_all('div', class_='article-board m-tcol-c')
55
- if len(article_boards) < 2:
56
- continue
57
- article_board = article_boards[1]
58
- rows = article_board.find_all('tr')
59
- current_date = datetime.datetime.now().strftime("%Y.%m.%d")
60
-
61
- for row_data in rows:
62
- try:
63
- a_tag = row_data.find('a', class_='article')
64
- if a_tag:
65
- link = a_tag['href']
66
- title = a_tag.get_text(strip=True)
67
- full_link = f"https://cafe.naver.com{link}"
68
- views = convert_views(row_data.find('td', class_='td_view').get_text(strip=True))
69
- likes = int(row_data.find('td', class_='td_likes').get_text(strip=True).replace(",", ""))
70
- date = row_data.find('td', class_='td_date').get_text(strip=True)
71
- if ":" in date:
72
- date = current_date
73
-
74
- worksheet.write_url(row, 0, full_link, string=title)
75
- worksheet.write(row, 1, date)
76
- worksheet.write_number(row, 2, views)
77
- worksheet.write_number(row, 3, likes)
78
-
79
- html_output += f"<tr><td><a href='{full_link}' target='_blank'>{title}</a></td><td>{date}</td><td>{views}</td><td>{likes}</td></tr>"
80
-
81
- row += 1
82
- except AttributeError:
83
- continue
84
-
85
- workbook.close()
86
- html_output += "</table>"
87
- return f"{board_select}_{today}.xlsx", html_output
88
-
89
- with gr.Blocks() as demo:
90
- gr.Markdown("# N์‚ฌ Cafe ํ•ซ๋”œ ๊ฒŒ์‹œํŒ ํฌ๋กค๋ง")
91
- gr.Markdown("""
92
- ํŽ˜์ด์ง€๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ๊ฒฐ๊ณผ๋ฅผ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.<br>
93
- ์ตœ๋Œ€ ํŽ˜์ด์ง€์ˆ˜๋Š” 50ํŽ˜์ด์ง€ ์ž…๋‹ˆ๋‹ค.
94
- ํ™œ์šฉ๋ฒ• ๋ฐ ์ด์ปค๋จธ์Šค ๊ต์œก ๋ฌธ์˜๋Š” *** ์œผ๋กœ ์ฃผ์„ธ์š”.
95
- """)
96
- board_select = gr.Radio(["๋ง˜์ด๋ฒ ๋ฒ ", "๋ง˜์Šคํ™€๋ฆญ", "๊ด‘์ฃผ๋ง˜", "์‡ผํ•‘์ง€๋ฆ„์‹ ", "๋ถ€์‚ฐ๋ง˜", "๏ฟฝ๏ฟฝํฌ๋ง˜"], label="๊ฒŒ์‹œํŒ์„ ์„ ํƒํ•˜์„ธ์š”")
97
- with gr.Row():
98
- inp = gr.Number(label="์ˆ˜์ง‘์„ ์›ํ•˜์‹œ๋Š” ํŽ˜์ด์ง€ ์ˆ˜๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value=1)
99
- btn = gr.Button("์ˆ˜์ง‘")
100
- output_file = gr.File(label="์—‘์…€ํŒŒ์ผ๋กœ ๋‹ค์šด๋กœ๋“œ")
101
- output_html = gr.HTML()
102
- btn.click(fn=extract_data_to_excel_and_html, inputs=[inp, board_select], outputs=[output_file, output_html])
103
-
104
- if __name__ == "__main__":
105
- demo.launch(share=True)
 
13
  session.mount('https://', HTTPAdapter(max_retries=retries))
14
  return session
15
 
16
+ def generate_naver_search_url(query):
17
+ base_url = "https://search.naver.com/search.naver?"
18
+ params = {"ssc": "tab.blog.all", "sm": "tab_jum"}
19
+ params["query"] = query
20
+ url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
21
+ return url
 
 
 
 
22
 
23
+ with gr.Interface(
24
+ fn=generate_naver_search_url,
25
+ inputs=gr.Textbox(label="ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”"),
26
+ outputs=gr.Textbox(label="์ƒ์„ฑ๋œ URL"),
27
+ title="๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ URL ์ƒ์„ฑ๊ธฐ",
28
+ description="๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ๋ฅผ ์ž…๋ ฅํ•˜์—ฌ ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ URL์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค"
29
+ ) as demo:
30
+ demo.launch()