Kims12 commited on
Commit
eb14b17
Β·
verified Β·
1 Parent(s): 5e21995

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +568 -1
app.py CHANGED
@@ -1 +1,568 @@
1
- d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from requests.adapters import HTTPAdapter
3
+ from requests.packages.urllib3.util.retry import Retry
4
+ from bs4 import BeautifulSoup
5
+ import gradio as gr
6
+ import datetime
7
+ import pandas as pd
8
+ import xlsxwriter
9
+ import logging
10
+ import time
11
+ import random
12
+ from datetime import datetime
13
+ import pytz
14
+
15
+ # λ‘œκΉ… μ„€μ •
16
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
17
+ logger = logging.getLogger(__name__)
18
+
19
+ class ProxyConfig:
20
+ def __init__(self):
21
+ self.proxy_base = {
22
+ "username": "65d866e39dc83ab06068",
23
+ "password": "aefb03d059da70e2",
24
+ "host": "gw.dataimpulse.com",
25
+ "ports": {
26
+ "http": "823",
27
+ "socks5": "824"
28
+ }
29
+ }
30
+
31
+ def get_proxy_config(self, use_socks=False):
32
+ """ν”„λ‘μ‹œ μ„€μ • 생성"""
33
+ try:
34
+ username = self.proxy_base["username"]
35
+ password = self.proxy_base["password"]
36
+ host = self.proxy_base["host"]
37
+ port = self.proxy_base["ports"]["socks5" if use_socks else "http"]
38
+
39
+ proxy_auth = f"{username}__cr.kr"
40
+ protocol = "socks5" if use_socks else "http"
41
+ proxy_url = f"{protocol}://{proxy_auth}:{password}@{host}:{port}"
42
+
43
+ logger.info(f"[PROXY] Configuration created: {protocol}://{host}:{port}")
44
+
45
+ return {
46
+ protocol: proxy_url
47
+ }
48
+ except Exception as e:
49
+ logger.error(f"[PROXY] Configuration failed: {str(e)}")
50
+ return None
51
+
52
+ def setup_session():
53
+ """더 κ°•ν™”λœ μ„Έμ…˜ μ„€μ •"""
54
+ session = requests.Session()
55
+
56
+ # ν”„λ‘μ‹œ μ„€μ •
57
+ proxy_config = ProxyConfig()
58
+ proxies = proxy_config.get_proxy_config(use_socks=False)
59
+ if proxies:
60
+ session.proxies.update(proxies)
61
+ try:
62
+ # ν”„λ‘μ‹œ IP 확인
63
+ ip_response = session.get('https://api.ipify.org?format=json', timeout=10)
64
+ if ip_response.status_code == 200:
65
+ logger.info(f"[PROXY] Current IP: {ip_response.json().get('ip')}")
66
+ else:
67
+ logger.warning(f"[PROXY] Failed to get IP. Status code: {ip_response.status_code}")
68
+ except Exception as e:
69
+ logger.error(f"[PROXY] IP check failed: {str(e)}")
70
+ else:
71
+ logger.warning("[PROXY] No proxy configuration available")
72
+
73
+ # μž¬μ‹œλ„ μ„€μ •
74
+ retries = Retry(
75
+ total=5, # 총 μž¬μ‹œλ„ 횟수
76
+ backoff_factor=1, # μž¬μ‹œλ„ κ°„ λŒ€κΈ° μ‹œκ°„ κ³„μˆ˜
77
+ status_forcelist=[500, 502, 503, 504], # μž¬μ‹œλ„ν•  HTTP μƒνƒœ μ½”λ“œ
78
+ allowed_methods=["GET", "HEAD", "OPTIONS"] # μž¬μ‹œλ„ν•  HTTP λ©”μ„œλ“œ
79
+ )
80
+
81
+ # κΈ°λ³Έ 헀더 μ„€μ •
82
+ session.headers.update({
83
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
84
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
85
+ 'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
86
+ 'Accept-Encoding': 'gzip, deflate, br',
87
+ 'Connection': 'keep-alive',
88
+ 'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
89
+ 'Sec-Ch-Ua-Mobile': '?0',
90
+ 'Sec-Ch-Ua-Platform': '"Windows"',
91
+ 'Sec-Fetch-Dest': 'document',
92
+ 'Sec-Fetch-Mode': 'navigate',
93
+ 'Sec-Fetch-Site': 'none',
94
+ 'Sec-Fetch-User': '?1',
95
+ 'Upgrade-Insecure-Requests': '1',
96
+ 'Cache-Control': 'max-age=0',
97
+ 'DNT': '1' # Do Not Track μš”μ²­
98
+ })
99
+
100
+ # HTTPS μ–΄λŒ‘ν„° μ„€μ •
101
+ adapter = HTTPAdapter(
102
+ max_retries=retries,
103
+ pool_connections=100, # μ—°κ²° ν’€ 크기
104
+ pool_maxsize=100 # μ΅œλŒ€ μ—°κ²° 수
105
+ )
106
+ session.mount('https://', adapter)
107
+ session.mount('http://', adapter)
108
+
109
+ return session
110
+
111
+ def get_base_url(board_select):
112
+ urls = {
113
+ "맘이베베": "https://cafe.naver.com/ArticleList.nhn?search.clubid=29434212&search.menuid=2&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=29434212",
114
+ "λ§˜μŠ€ν™€λ¦­": "https://cafe.naver.com/ArticleList.nhn?search.clubid=10094499&search.menuid=599&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=10094499",
115
+ "κ΄‘μ£Όλ§˜": "https://cafe.naver.com/ArticleList.nhn?search.clubid=26025763&search.menuid=508&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=26025763",
116
+ "쇼핑지름신": "https://cafe.naver.com/ArticleList.nhn?search.clubid=25729954&search.menuid=751&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=25729954",
117
+ "λΆ€μ‚°λ§˜": "https://cafe.naver.com/ArticleList.nhn?search.clubid=28707025&search.menuid=282&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=28707025",
118
+ "μ§„ν¬λ§˜": "https://cafe.naver.com/ArticleList.nhn?search.clubid=21442290&search.menuid=476&search.boardtype=L&userDisplay=50&search.specialmenutype=&search.totalCount=501&search.cafeId=21442290"
119
+ }
120
+ selected_url = urls.get(board_select)
121
+ if not selected_url:
122
+ logging.warning(f"Invalid board selected: {board_select}")
123
+ return "Invalid board selected"
124
+ return selected_url
125
+
126
+ def convert_views(view_string):
127
+ if '만' in view_string:
128
+ number_part = view_string.replace('만', '')
129
+ return int(float(number_part) * 10000)
130
+ return int(view_string.replace(",", ""))
131
+
132
+ def validate_row_data(row_data):
133
+ """ν–‰ λ°μ΄ν„°μ˜ μœ νš¨μ„± 검사"""
134
+ required_fields = ['td_view', 'td_likes', 'td_date']
135
+ for field in required_fields:
136
+ if not row_data.find('td', class_=field):
137
+ return False
138
+ return True
139
+
140
+ def extract_data_to_excel_and_html(page, board_select):
141
+ try:
142
+ if not isinstance(page, (int, float)) or page < 1 or page > 50:
143
+ return None, "<p>νŽ˜μ΄μ§€ μˆ˜λŠ” 1-50 사이여야 ν•©λ‹ˆλ‹€.</p>"
144
+
145
+ session = setup_session()
146
+ base_url = get_base_url(board_select)
147
+ if base_url == "Invalid board selected":
148
+ return "Invalid board selected", ""
149
+
150
+ korea_time = datetime.now(pytz.timezone('Asia/Seoul'))
151
+ filename = f'{board_select}_{korea_time.strftime("%Y%m%d_%H%M%S")}.xlsx'
152
+
153
+ workbook = xlsxwriter.Workbook(filename)
154
+ worksheet = workbook.add_worksheet()
155
+
156
+ # Excel μŠ€νƒ€μΌ μ •μ˜
157
+ header_format = workbook.add_format({
158
+ 'bold': True,
159
+ 'align': 'center', # 제λͺ© 열이 κ°€μš΄λ° μ •λ ¬λ˜λ„λ‘ μ„€μ •
160
+ 'valign': 'vcenter',
161
+ 'bg_color': '#f8f9fa',
162
+ 'border': 1
163
+ })
164
+
165
+ data_format = workbook.add_format({
166
+ 'align': 'left',
167
+ 'valign': 'vcenter',
168
+ 'border': 1
169
+ })
170
+
171
+ link_format = workbook.add_format({
172
+ 'align': 'left',
173
+ 'valign': 'vcenter',
174
+ 'border': 1,
175
+ 'color': '#0066cc',
176
+ 'underline': True
177
+ })
178
+
179
+ date_format = workbook.add_format({
180
+ 'align': 'center',
181
+ 'valign': 'vcenter',
182
+ 'border': 1
183
+ })
184
+
185
+ number_format = workbook.add_format({
186
+ 'align': 'center',
187
+ 'valign': 'vcenter',
188
+ 'border': 1,
189
+ 'num_format': '#,##0'
190
+ })
191
+
192
+ # 헀더 μž‘μ„±
193
+ headers = ['제λͺ©', 'μž‘μ„±μΌ', '쑰회수', 'μ’‹μ•„μš”', 'λŒ“κΈ€μˆ˜']
194
+ for col, header in enumerate(headers):
195
+ worksheet.write(0, col, header, header_format)
196
+
197
+ # ν•„ν„° μΆ”κ°€
198
+ worksheet.autofilter(0, 0, 0, len(headers) - 1)
199
+
200
+ # HTML ν…Œμ΄λΈ” μ‹œμž‘
201
+ html_output = """
202
+ <style>
203
+ .crawl-table {
204
+ width: 100%;
205
+ border-collapse: collapse;
206
+ margin: 10px 0;
207
+ font-family: 'Pretendard', -apple-system, BlinkMacSystemFont, system-ui, Roboto, sans-serif;
208
+ }
209
+ .crawl-table thead th,
210
+ .crawl-table tr:first-child th {
211
+ background-color: #000000;
212
+ color: #ffffff;
213
+ border: 1px solid #dee2e6;
214
+ padding: 12px 8px;
215
+ font-weight: 600;
216
+ vertical-align: middle;
217
+ text-align: center !important; /* λͺ¨λ“  헀더λ₯Ό κ°€μš΄λ° μ •λ ¬λ‘œ κ°•μ œ */
218
+ }
219
+ .crawl-table td {
220
+ border: 1px solid #dee2e6;
221
+ padding: 10px 8px;
222
+ line-height: 1.4;
223
+ }
224
+ .crawl-table td:first-child {
225
+ text-align: left; /* 제λͺ© 열은 μ™Όμͺ½ μ •λ ¬ */
226
+ }
227
+ .crawl-table td:nth-child(2),
228
+ .crawl-table td:nth-child(3),
229
+ .crawl-table td:nth-child(4),
230
+ .crawl-table td:nth-child(5) {
231
+ text-align: right; /* λ‚˜λ¨Έμ§€ 열은 κ°€μš΄λ° μ •λ ¬ */
232
+ }
233
+ .crawl-table td:first-child a {
234
+ text-decoration: none;
235
+ color: #0066cc;
236
+ }
237
+ .crawl-table tr:nth-child(even) {
238
+ background-color: #f8f9fa;
239
+ }
240
+ .crawl-table tr:hover {
241
+ background-color: #f0f0f0;
242
+ }
243
+ @media (max-width: 768px) {
244
+ .crawl-table {
245
+ font-size: 14px;
246
+ }
247
+ .crawl-table th,
248
+ .crawl-table td {
249
+ padding: 8px 4px;
250
+ }
251
+ }
252
+ </style>
253
+ <table class="crawl-table">
254
+ <thead>
255
+ <tr>
256
+ <th>제λͺ©</th>
257
+ <th>μž‘μ„±μΌ</th>
258
+ <th>쑰회수</th>
259
+ <th>μ’‹μ•„μš”</th>
260
+ <th>λŒ“κΈ€μˆ˜</th>
261
+ </tr>
262
+ </thead>
263
+ <tbody>
264
+ """
265
+
266
+ row = 1
267
+ current_date = datetime.now().strftime("%Y.%m.%d")
268
+
269
+ for p in range(1, page + 1):
270
+ try:
271
+ url = f"{base_url}&search.page={p}"
272
+ logger.info(f"[CRAWL] Fetching page {p}: {url}")
273
+ response = session.get(url)
274
+ delay = random.uniform(0.5, 1.0)
275
+ time.sleep(delay)
276
+
277
+ if response.status_code != 200:
278
+ logger.error(f"[CRAWL] Failed to fetch page {p}. Status code: {response.status_code}")
279
+ continue
280
+
281
+ soup = BeautifulSoup(response.text, 'html.parser')
282
+ article_boards = soup.find_all('div', class_='article-board m-tcol-c')
283
+
284
+ if len(article_boards) < 2:
285
+ logger.warning(f"[CRAWL] No article boards found on page {p}")
286
+ continue
287
+
288
+ article_board = article_boards[1]
289
+ rows = article_board.find_all('tr')
290
+ logger.info(f"[CRAWL] Found {len(rows)} rows on page {p}")
291
+
292
+ for row_data in rows:
293
+ try:
294
+ if not validate_row_data(row_data):
295
+ continue
296
+
297
+ a_tag = row_data.find('a', class_='article')
298
+ if not a_tag:
299
+ continue
300
+
301
+ link = a_tag['href']
302
+ title = a_tag.get_text(strip=True)
303
+ full_link = f"https://cafe.naver.com{link}"
304
+
305
+ views = convert_views(row_data.find('td', class_='td_view').get_text(strip=True))
306
+ likes = int(row_data.find('td', class_='td_likes').get_text(strip=True).replace(",", ""))
307
+ date = row_data.find('td', class_='td_date').get_text(strip=True)
308
+
309
+ # λŒ“κΈ€μˆ˜ μΆ”μΆœ
310
+ comment_tag = row_data.find('a', class_='cmt')
311
+ comments = 0
312
+ if comment_tag and comment_tag.find('em'):
313
+ comments = int(comment_tag.find('em').get_text(strip=True))
314
+
315
+ if ":" in date:
316
+ date = current_date
317
+
318
+ # Excel 데이터 μž‘μ„±
319
+ worksheet.write_url(row, 0, full_link, link_format, title)
320
+ worksheet.write(row, 1, date, date_format)
321
+ worksheet.write_number(row, 2, views, number_format)
322
+ worksheet.write_number(row, 3, likes, number_format)
323
+ worksheet.write_number(row, 4, comments, number_format)
324
+
325
+ # HTML ν…Œμ΄λΈ” 데이터 μΆ”κ°€
326
+ html_output += f""" <tr>
327
+ <td><a href='{full_link}' target='_blank'>{title}</a></td>
328
+ <td>{date}</td>
329
+ <td>{views:,}</td>
330
+ <td>{likes:,}</td>
331
+ <td>{comments:,}</td>
332
+ </tr>
333
+ """
334
+ row += 1
335
+
336
+ except AttributeError as e:
337
+ logger.warning(f"[CRAWL] Row parsing error: {str(e)}")
338
+ continue
339
+
340
+ except Exception as e:
341
+ logger.error(f"[CRAWL] Page {p} crawling error: {str(e)}")
342
+ continue
343
+
344
+ worksheet.set_column(0, 0, 50)
345
+ worksheet.set_column(1, 1, 12)
346
+ worksheet.set_column(2, 2, 10)
347
+ worksheet.set_column(3, 3, 10)
348
+ worksheet.set_column(4, 4, 10)
349
+
350
+ workbook.close()
351
+ html_output += """ </tbody>
352
+ </table>"""
353
+
354
+ return filename, html_output
355
+
356
+ except Exception as e:
357
+ error_message = f"데이터 μˆ˜μ§‘ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄ μ£Όμ„Έμš”. (μ—λŸ¬: {str(e)})"
358
+ logger.error(f"[CRAWL] 전체 크둀링 μ‹€νŒ¨: {str(e)}")
359
+ return None, f"<p style='color: #dc3545; padding: 10px; background-color: #f8d7da; border-radius: 4px;'>{error_message}</p>"
360
+
361
+ def crawl_with_progress(board, pages):
362
+ try:
363
+ excel_file, html_output = extract_data_to_excel_and_html(pages, board)
364
+ if excel_file:
365
+ return excel_file, html_output, "μˆ˜μ§‘ μ™„λ£Œ" # status λ©”μ‹œμ§€ μΆ”κ°€
366
+ else:
367
+ return None, "", "μˆ˜μ§‘ μ‹€νŒ¨" # μ‹€νŒ¨μ‹œμ—λ„ 3개 κ°’ λ°˜ν™˜
368
+ except Exception as e:
369
+ return None, "", f"였λ₯˜ λ°œμƒ: {str(e)}" # μ˜ˆμ™Έ λ°œμƒμ‹œμ—λ„ 3개 κ°’ λ°˜ν™˜
370
+
371
+ css = """
372
+ /* 전체 μ»¨ν…Œμ΄λ„ˆ μŠ€νƒ€μΌλ§ */
373
+ .gradio-container {
374
+ font-family: 'Pretendard', -apple-system, BlinkMacSystemFont, system-ui, Roboto, sans-serif !important;
375
+ max-width: 1000px !important;
376
+ margin: 2rem auto !important;
377
+ padding: 2rem !important;
378
+ background-color: #ffffff !important;
379
+ box-shadow: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24) !important;
380
+ border-radius: 12px !important;
381
+ }
382
+
383
+ /* 제λͺ© μŠ€νƒ€μΌλ§ */
384
+ h1 {
385
+ font-size: 2.2rem !important;
386
+ font-weight: 700 !important;
387
+ color: #000000 !important;
388
+ text-align: center !important;
389
+ margin-bottom: 2rem !important;
390
+ padding-bottom: 1.5rem !important;
391
+ border-bottom: 2px solid #000000 !important;
392
+ }
393
+
394
+ /* μ„€λͺ… ν…μŠ€νŠΈ μŠ€νƒ€μΌλ§ */
395
+ .gr-markdown {
396
+ text-align: center !important;
397
+ color: #666666 !important;
398
+ font-size: 1rem !important;
399
+ margin-bottom: 2rem !important;
400
+ }
401
+
402
+ /* λΌλ””μ˜€ λ²„νŠΌ κ·Έλ£Ή μŠ€νƒ€μΌλ§ */
403
+ .gr-form {
404
+ background-color: #f8f8f8 !important;
405
+ padding: 1.5rem !important;
406
+ border-radius: 8px !important;
407
+ margin-bottom: 1.5rem !important;
408
+ }
409
+
410
+ .gr-radio-row {
411
+ display: grid !important;
412
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)) !important;
413
+ gap: 1rem !important;
414
+ padding: 1rem !important;
415
+ }
416
+
417
+ .gr-radio {
418
+ border: 2px solid #000000 !important;
419
+ padding: 0.8rem !important;
420
+ border-radius: 6px !important;
421
+ transition: all 0.3s ease !important;
422
+ }
423
+
424
+ .gr-radio:checked {
425
+ background-color: #000000 !important;
426
+ color: #ffffff !important;
427
+ }
428
+
429
+ /* 숫자 μž…λ ₯ ν•„λ“œ μŠ€νƒ€μΌλ§ */
430
+ .gr-number-input {
431
+ border: 2px solid #000000 !important;
432
+ border-radius: 6px !important;
433
+ padding: 0.8rem !important;
434
+ font-size: 1rem !important;
435
+ width: 100% !important;
436
+ max-width: 300px !important;
437
+ margin: 0 auto !important;
438
+ }
439
+
440
+ /* μƒνƒœ ν…μŠ€νŠΈλ°•μŠ€ μŠ€νƒ€μΌλ§ */
441
+ .gr-textbox {
442
+ background-color: #f8f8f8 !important;
443
+ border: 1px solid #e0e0e0 !important;
444
+ border-radius: 6px !important;
445
+ padding: 1rem !important;
446
+ margin: 1rem 0 !important;
447
+ font-size: 0.95rem !important;
448
+ }
449
+
450
+ /* μˆ˜μ§‘ λ²„νŠΌ μŠ€νƒ€μΌλ§ */
451
+ .gr-button {
452
+ background-color: #000000 !important;
453
+ color: #ffffff !important;
454
+ padding: 1rem 2rem !important;
455
+ border-radius: 6px !important;
456
+ font-weight: 600 !important;
457
+ font-size: 1.1rem !important;
458
+ border: none !important;
459
+ width: 100% !important;
460
+ max-width: 300px !important;
461
+ margin: 1.5rem auto !important;
462
+ display: block !important;
463
+ transition: all 0.3s ease !important;
464
+ }
465
+
466
+ .gr-button:hover {
467
+ background-color: #333333 !important;
468
+ transform: translateY(-2px) !important;
469
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
470
+ }
471
+
472
+ /* 파일 λ‹€μš΄λ‘œλ“œ μ˜μ—­ μŠ€νƒ€μΌλ§ */
473
+ .gr-file {
474
+ border: 2px dashed #000000 !important;
475
+ border-radius: 8px !important;
476
+ padding: 2rem !important;
477
+ text-align: center !important;
478
+ background-color: #f8f8f8 !important;
479
+ margin-top: 2rem !important;
480
+ }
481
+
482
+ /* HTML κ²°κ³Ό ν…Œμ΄λΈ” μŠ€νƒ€μΌλ§ */
483
+ table {
484
+ width: 100% !important;
485
+ border-collapse: collapse !important;
486
+ margin-top: 1.5rem !important;
487
+ border-radius: 8px !important;
488
+ overflow: hidden !important;
489
+ box-shadow: 0 1px 3px rgba(0,0,0,0.12) !important;
490
+ }
491
+
492
+ th {
493
+ background-color: #000000 !important;
494
+ color: #ffffff !important;
495
+ padding: 1rem !important;
496
+ text-align: center !important; /* λͺ¨λ“  헀더λ₯Ό κ°€μš΄λ° μ •λ ¬λ‘œ λ³€κ²½ */
497
+ font-weight: 600 !important;
498
+ }
499
+
500
+ td {
501
+ padding: 0.8rem !important;
502
+ border-bottom: 1px solid #e0e0e0 !important;
503
+ color: #333333 !important;
504
+ }
505
+
506
+ tr:hover {
507
+ background-color: #f5f5f5 !important;
508
+ }
509
+
510
+ /* λ°˜μ‘ν˜• λ””μžμΈ */
511
+ @media (max-width: 768px) {
512
+ .gradio-container {
513
+ padding: 1rem !important;
514
+ margin: 1rem !important;
515
+ }
516
+
517
+ h1 {
518
+ font-size: 1.8rem !important;
519
+ }
520
+
521
+ .gr-radio-row {
522
+ grid-template-columns: 1fr !important;
523
+ }
524
+ }
525
+ """
526
+
527
+ with gr.Blocks(css=css) as demo:
528
+ gr.Markdown("# N사 Cafe ν•«λ”œ κ²Œμ‹œνŒ 크둀링")
529
+ gr.Markdown("""
530
+ νŽ˜μ΄μ§€λ₯Ό μž…λ ₯ν•˜λ©΄ κ²°κ³Όλ₯Ό 좜λ ₯ν•©λ‹ˆλ‹€.
531
+ μ΅œλŒ€ νŽ˜μ΄μ§€μˆ˜λŠ” 50νŽ˜μ΄μ§€ μž…λ‹ˆλ‹€.
532
+ """)
533
+
534
+ with gr.Row():
535
+ board_select = gr.Radio(
536
+ choices=["맘이베베", "λ§˜μŠ€ν™€λ¦­", "κ΄‘μ£Όλ§˜", "쇼핑지름신", "λΆ€μ‚°λ§˜", "μ§„ν¬λ§˜"],
537
+ label="κ²Œμ‹œνŒμ„ μ„ νƒν•˜μ„Έμš”",
538
+ container=True
539
+ )
540
+
541
+ with gr.Row():
542
+ inp = gr.Number(
543
+ label="μˆ˜μ§‘μ„ μ›ν•˜μ‹œλŠ” νŽ˜μ΄μ§€ 수λ₯Ό μž…λ ₯ν•˜μ„Έμš”(μ΅œλŒ€ 50νŽ˜μ΄μ§€)",
544
+ value=1,
545
+ minimum=1,
546
+ maximum=50,
547
+ container=True
548
+ )
549
+
550
+ status = gr.Textbox(
551
+ label="μƒνƒœ",
552
+ value="λŒ€κΈ° 쀑...",
553
+ container=True
554
+ )
555
+
556
+ btn = gr.Button("μˆ˜μ§‘ν•˜κΈ°", variant="primary")
557
+
558
+ output_file = gr.File(label="μ—‘μ…€νŒŒμΌ λ‹€μš΄λ‘œλ“œ")
559
+ output_html = gr.HTML()
560
+
561
+ btn.click(
562
+ fn=crawl_with_progress,
563
+ inputs=[board_select, inp],
564
+ outputs=[output_file, output_html, status]
565
+ )
566
+
567
+ if __name__ == "__main__":
568
+ demo.launch()