Kims12 commited on
Commit
d5fb63f
ยท
verified ยท
1 Parent(s): 17c4358

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +310 -0
app.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import urllib.parse # iframe ๊ฒฝ๋กœ ๋ณด์ •์„ ์œ„ํ•œ ๋ชจ๋“ˆ
5
+ import re
6
+ import logging
7
+ import tempfile
8
+ import pandas as pd
9
+ import mecab # pythonโ€‘mecabโ€‘ko ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์‚ฌ์šฉ
10
+ import os
11
+ import time
12
+ import hmac
13
+ import hashlib
14
+ import base64
15
+
16
+ # ๋””๋ฒ„๊น…(๋กœ๊ทธ)์šฉ ํ•จ์ˆ˜
17
+ def debug_log(message: str):
18
+ print(f"[DEBUG] {message}")
19
+
20
+ # =============================================================================
21
+ # [๊ธฐ๋ณธ์ฝ”๋“œ]: ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์—์„œ ์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์„ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜
22
+ # =============================================================================
23
+ def scrape_naver_blog(url: str) -> str:
24
+ debug_log("scrape_naver_blog ํ•จ์ˆ˜ ์‹œ์ž‘")
25
+ debug_log(f"์š”์ฒญ๋ฐ›์€ URL: {url}")
26
+
27
+ # ํ—ค๋” ์„ธํŒ…(ํฌ๋กค๋ง ์ฐจ๋‹จ ๋ฐฉ์ง€)
28
+ headers = {
29
+ "User-Agent": (
30
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
31
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
32
+ "Chrome/96.0.4664.110 Safari/537.36"
33
+ )
34
+ }
35
+
36
+ try:
37
+ # 1) ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋ฉ”์ธ ํŽ˜์ด์ง€ ์š”์ฒญ
38
+ response = requests.get(url, headers=headers)
39
+ debug_log("HTTP GET ์š”์ฒญ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
40
+
41
+ if response.status_code != 200:
42
+ debug_log(f"์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {response.status_code}")
43
+ return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {response.status_code}"
44
+
45
+ soup = BeautifulSoup(response.text, "html.parser")
46
+ debug_log("HTML ํŒŒ์‹ฑ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
47
+
48
+ # 2) iframe ํƒœ๊ทธ ์ฐพ๊ธฐ
49
+ iframe = soup.select_one("iframe#mainFrame")
50
+ if not iframe:
51
+ debug_log("iframe#mainFrame ํƒœ๊ทธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
52
+ return "๋ณธ๋ฌธ iframe์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
53
+
54
+ iframe_src = iframe.get("src")
55
+ if not iframe_src:
56
+ debug_log("iframe src๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
57
+ return "๋ณธ๋ฌธ iframe์˜ src๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
58
+
59
+ # 3) iframe src๊ฐ€ ์ƒ๋Œ€๊ฒฝ๋กœ์ธ ๊ฒฝ์šฐ ์ ˆ๋Œ€๊ฒฝ๋กœ๋กœ ๋ณด์ •
60
+ parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
61
+ debug_log(f"iframe ํŽ˜์ด์ง€ ์š”์ฒญ URL: {parsed_iframe_url}")
62
+
63
+ # 4) iframe ํŽ˜์ด์ง€ ์žฌ์š”์ฒญ
64
+ iframe_response = requests.get(parsed_iframe_url, headers=headers)
65
+ debug_log("HTTP GET ์š”์ฒญ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
66
+
67
+ if iframe_response.status_code != 200:
68
+ debug_log(f"iframe ์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {iframe_response.status_code}")
69
+ return f"iframe์—์„œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {iframe_response.status_code}"
70
+
71
+ iframe_soup = BeautifulSoup(iframe_response.text, "html.parser")
72
+ debug_log("HTML ํŒŒ์‹ฑ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
73
+
74
+ # ์ œ๋ชฉ ์ถ”์ถœ
75
+ title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text')
76
+ title = title_div.get_text(strip=True) if title_div else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
77
+ debug_log(f"์ถ”์ถœ๋œ ์ œ๋ชฉ: {title}")
78
+
79
+ # ๋ณธ๋ฌธ ์ถ”์ถœ
80
+ content_div = iframe_soup.select_one('.se-main-container')
81
+ if content_div:
82
+ content = content_div.get_text("\n", strip=True)
83
+ else:
84
+ content = "๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
85
+ debug_log("๋ณธ๋ฌธ ์ถ”์ถœ ์™„๋ฃŒ")
86
+
87
+ # ๊ฒฐ๊ณผ ํ•ฉ์น˜๊ธฐ
88
+ result = f"[์ œ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
89
+ debug_log("์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์„ ํ•ฉ์ณ ๋ฐ˜ํ™˜ ์ค€๋น„ ์™„๋ฃŒ")
90
+ return result
91
+
92
+ except Exception as e:
93
+ debug_log(f"์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}")
94
+ return f"์Šคํฌ๋ž˜ํ•‘ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
95
+
96
+ # =============================================================================
97
+ # [์ฐธ์กฐ์ฝ”๋“œ-1]: ํ˜•ํƒœ์†Œ ๋ถ„์„ ํ•จ์ˆ˜ (Mecab ์ด์šฉ)
98
+ # =============================================================================
99
+ logging.basicConfig(level=logging.DEBUG)
100
+ logger = logging.getLogger(__name__)
101
+
102
+ def analyze_text(text: str):
103
+ logger.debug("์›๋ณธ ํ…์ŠคํŠธ: %s", text)
104
+
105
+ # 1. ํ•œ๊ตญ์–ด๋งŒ ๋‚จ๊ธฐ๊ธฐ (๊ณต๋ฐฑ, ์˜์–ด, ๊ธฐํ˜ธ ๋“ฑ ์ œ๊ฑฐ)
106
+ filtered_text = re.sub(r'[^๊ฐ€-ํžฃ]', '', text)
107
+ logger.debug("ํ•„ํ„ฐ๋ง๋œ ํ…์ŠคํŠธ (ํ•œ๊ตญ์–ด๋งŒ, ๊ณต๋ฐฑ ์ œ๊ฑฐ): %s", filtered_text)
108
+
109
+ if not filtered_text:
110
+ logger.debug("์œ ํšจํ•œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๊ฐ€ ์—†์Œ.")
111
+ return pd.DataFrame(columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"]), ""
112
+
113
+ # 2. Mecab์„ ์ด์šฉํ•œ ํ˜•ํƒœ์†Œ ๋ถ„์„ (๋ช…์‚ฌ์™€ ๋ณตํ•ฉ๋ช…์‚ฌ๋งŒ ์ถ”์ถœ)
114
+ mecab_instance = mecab.MeCab() # ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ
115
+ tokens = mecab_instance.pos(filtered_text)
116
+ logger.debug("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ: %s", tokens)
117
+
118
+ freq = {}
119
+ for word, pos in tokens:
120
+ if word and word.strip():
121
+ if pos.startswith("NN"):
122
+ freq[word] = freq.get(word, 0) + 1
123
+ logger.debug("๋‹จ์–ด: %s, ํ’ˆ์‚ฌ: %s, ํ˜„์žฌ ๋นˆ๋„: %d", word, pos, freq[word])
124
+
125
+ # 3. ๋นˆ๋„์ˆ˜๋ฅผ ๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ
126
+ sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
127
+ logger.debug("๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ๋œ ๋‹จ์–ด ๋นˆ๋„: %s", sorted_freq)
128
+
129
+ # 4. ๊ฒฐ๊ณผ DataFrame ์ƒ์„ฑ
130
+ df = pd.DataFrame(sorted_freq, columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"])
131
+ logger.debug("๊ฒฐ๊ณผ DataFrame ์ƒ์„ฑ๋จ, shape: %s", df.shape)
132
+
133
+ # 5. Excel ํŒŒ์ผ ์ƒ์„ฑ (์ž„์‹œ ํŒŒ์ผ ์ €์žฅ)
134
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
135
+ df.to_excel(temp_file.name, index=False, engine='openpyxl')
136
+ temp_file.close()
137
+ logger.debug("Excel ํŒŒ์ผ ์ƒ์„ฑ๋จ: %s", temp_file.name)
138
+
139
+ return df, temp_file.name
140
+
141
+ # =============================================================================
142
+ # [์ฐธ์กฐ์ฝ”๋“œ-2]: ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ ๊ด€๋ จ ํ•จ์ˆ˜
143
+ # =============================================================================
144
+ def generate_signature(timestamp, method, uri, secret_key):
145
+ message = f"{timestamp}.{method}.{uri}"
146
+ digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
147
+ return base64.b64encode(digest).decode()
148
+
149
+ def get_header(method, uri, api_key, secret_key, customer_id):
150
+ timestamp = str(round(time.time() * 1000))
151
+ signature = generate_signature(timestamp, method, uri, secret_key)
152
+ return {
153
+ "Content-Type": "application/json; charset=UTF-8",
154
+ "X-Timestamp": timestamp,
155
+ "X-API-KEY": api_key,
156
+ "X-Customer": str(customer_id),
157
+ "X-Signature": signature
158
+ }
159
+
160
+ def fetch_related_keywords(keyword):
161
+ API_KEY = os.environ["NAVER_API_KEY"]
162
+ SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
163
+ CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
164
+
165
+ BASE_URL = "https://api.naver.com"
166
+ uri = "/keywordstool"
167
+ method = "GET"
168
+ headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
169
+ params = {
170
+ "hintKeywords": [keyword],
171
+ "showDetail": "1"
172
+ }
173
+ response = requests.get(BASE_URL + uri, params=params, headers=headers)
174
+ data = response.json()
175
+ if "keywordList" not in data:
176
+ return pd.DataFrame()
177
+ df = pd.DataFrame(data["keywordList"])
178
+ if len(df) > 100:
179
+ df = df.head(100)
180
+
181
+ def parse_count(x):
182
+ try:
183
+ return int(str(x).replace(",", ""))
184
+ except:
185
+ return 0
186
+
187
+ df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
188
+ df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
189
+ df["ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] + df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"]
190
+ df.rename(columns={"relKeyword": "์ •๋ณดํ‚ค์›Œ๋“œ"}, inplace=True)
191
+ result_df = df[["์ •๋ณดํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"]]
192
+ return result_df
193
+
194
+ def fetch_blog_count(keyword):
195
+ client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
196
+ client_secret = os.environ["NAVER_SEARCH_CLIENT_SECRET"]
197
+ url = "https://openapi.naver.com/v1/search/blog.json"
198
+ headers = {
199
+ "X-Naver-Client-Id": client_id,
200
+ "X-Naver-Client-Secret": client_secret
201
+ }
202
+ params = {"query": keyword, "display": 1}
203
+ response = requests.get(url, headers=headers, params=params)
204
+ if response.status_code == 200:
205
+ data = response.json()
206
+ return data.get("total", 0)
207
+ else:
208
+ return 0
209
+
210
+ def create_excel_file(df):
211
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
212
+ excel_path = tmp.name
213
+ df.to_excel(excel_path, index=False)
214
+ return excel_path
215
+
216
+ def process_keyword(keywords: str, include_related: bool):
217
+ """
218
+ ์—ฌ๋Ÿฌ ํ‚ค์›Œ๋“œ๋ฅผ ์—”ํ„ฐ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ๋ฆฌ์ŠคํŠธ๋กœ ๋งŒ๋“ค๊ณ ,
219
+ ๊ฐ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•ด ๋„ค์ด๋ฒ„ ๊ด‘๊ณ  API๋กœ ๊ฒ€์ƒ‰๋Ÿ‰ ์ •๋ณด๋ฅผ ์กฐํšŒํ•˜๋ฉฐ,
220
+ ์ฒซ ๋ฒˆ์งธ ํ‚ค์›Œ๋“œ์˜ ๊ฒฝ์šฐ ์˜ต์…˜์— ๋”ฐ๋ผ ์—ฐ๊ด€๊ฒ€์ƒ‰์–ด๋„ ์ถ”๊ฐ€ํ•œ ํ›„,
221
+ ๊ฐ ์ •๋ณดํ‚ค์›Œ๋“œ์— ๋Œ€ํ•ด ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ์ˆ˜๋ฅผ ์กฐํšŒํ•˜์—ฌ DataFrame๊ณผ Excel ํŒŒ์ผ์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
222
+ """
223
+ input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
224
+ result_dfs = []
225
+
226
+ for idx, kw in enumerate(input_keywords):
227
+ df_kw = fetch_related_keywords(kw)
228
+ if df_kw.empty:
229
+ continue
230
+ row_kw = df_kw[df_kw["์ •๋ณดํ‚ค์›Œ๋“œ"] == kw]
231
+ if not row_kw.empty:
232
+ result_dfs.append(row_kw)
233
+ else:
234
+ result_dfs.append(df_kw.head(1))
235
+ if include_related and idx == 0:
236
+ df_related = df_kw[df_kw["์ •๋ณดํ‚ค์›Œ๋“œ"] != kw]
237
+ if not df_related.empty:
238
+ result_dfs.append(df_related)
239
+
240
+ if result_dfs:
241
+ result_df = pd.concat(result_dfs, ignore_index=True)
242
+ result_df.drop_duplicates(subset=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
243
+ else:
244
+ result_df = pd.DataFrame(columns=["์ •๋ณดํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"])
245
+
246
+ result_df["๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"] = result_df["์ •๋ณดํ‚ค์›Œ๋“œ"].apply(fetch_blog_count)
247
+ result_df.sort_values(by="ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", ascending=False, inplace=True)
248
+
249
+ return result_df, create_excel_file(result_df)
250
+
251
+ # =============================================================================
252
+ # ํ†ตํ•ฉ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜: ๋ธ”๋กœ๊ทธ ๋‚ด์šฉ(ํ…์ŠคํŠธ)์— ๋Œ€ํ•ด ํ˜•ํƒœ์†Œ ๋ถ„์„์„ ์ˆ˜ํ–‰ํ•œ ํ›„,
253
+ # ํ‚ค์›Œ๋“œ์˜ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ์ˆ˜๋ฅผ ์ถ”๊ฐ€ํ•˜์—ฌ ์ตœ์ข… ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•จ.
254
+ # =============================================================================
255
+ def process_blog_content(text: str):
256
+ debug_log("process_blog_content ํ•จ์ˆ˜ ์‹œ์ž‘")
257
+ # 1. ํ˜•ํƒœ์†Œ ๋ถ„์„ ์‹คํ–‰ ([์ฐธ์กฐ์ฝ”๋“œ-1] ํ™œ์šฉ)
258
+ df_morph, morph_excel = analyze_text(text)
259
+ debug_log("ํ˜•ํƒœ์†Œ ๋ถ„์„ ์™„๋ฃŒ")
260
+
261
+ if df_morph.empty:
262
+ debug_log("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด์žˆ์Œ")
263
+ return df_morph, ""
264
+
265
+ # 2. ํ˜•ํƒœ์†Œ ๋ถ„์„๋œ ๋‹จ์–ด ๋ชฉ๋ก ์ถ”์ถœ (ํ‚ค์›Œ๋“œ ์กฐํšŒ์šฉ)
266
+ keywords = "\n".join(df_morph["๋‹จ์–ด"].tolist())
267
+ debug_log(f"์ถ”์ถœ๋œ ๋‹จ์–ด ๋ชฉ๋ก: {keywords}")
268
+
269
+ # 3. ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ ([์ฐธ์กฐ์ฝ”๋“œ-2] ํ™œ์šฉ)
270
+ df_keyword, keyword_excel = process_keyword(keywords, include_related=False)
271
+ debug_log("ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰ ์ •๋ณด ์กฐํšŒ ์™„๋ฃŒ")
272
+
273
+ # 4. ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ์™€ ํ‚ค์›Œ๋“œ ์ •๋ณด๋ฅผ ๋‹จ์–ด ๊ธฐ์ค€์œผ๋กœ ๋ณ‘ํ•ฉ
274
+ df_merged = pd.merge(df_morph, df_keyword, left_on="๋‹จ์–ด", right_on="์ •๋ณดํ‚ค์›Œ๋“œ", how="left")
275
+ debug_log("๋ฐ์ดํ„ฐ ๋ณ‘ํ•ฉ ์™„๋ฃŒ")
276
+ df_merged.drop(columns=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
277
+
278
+ # 5. ๋ณ‘ํ•ฉ ๊ฒฐ๊ณผ๋ฅผ Excel ํŒŒ์ผ๋กœ ์ƒ์„ฑ
279
+ merged_excel = create_excel_file(df_merged)
280
+ debug_log(f"๋ณ‘ํ•ฉ ๊ฒฐ๊ณผ Excel ํŒŒ์ผ ์ƒ์„ฑ๋จ: {merged_excel}")
281
+
282
+ return df_merged, merged_excel
283
+
284
+ # =============================================================================
285
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ (ํ—ˆ๊น…ํŽ˜์ด์Šค ๊ทธ๋ผ๋””์˜ค ํ™˜๊ฒฝ)
286
+ # =============================================================================
287
+ with gr.Blocks() as demo:
288
+ gr.Markdown("# ๋ธ”๋กœ๊ทธ ๊ธ€ ํ˜•ํƒœ์†Œ ๋ถ„์„ ๋ฐ ํ‚ค์›Œ๋“œ ์ •๋ณด ์กฐํšŒ")
289
+
290
+ with gr.Tab("๋ธ”๋กœ๊ทธ ๋‚ด์šฉ ์ž…๋ ฅ ๋ฐ ์Šคํฌ๋ž˜ํ•‘"):
291
+ with gr.Row():
292
+ blog_url = gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ", placeholder="์˜ˆ: https://blog.naver.com/ssboost/222983068507")
293
+ fetch_button = gr.Button("๋ธ”๋กœ๊ทธ๋‚ด์šฉ๊ฐ€์ ธ์˜ค๊ธฐ")
294
+ blog_content = gr.Textbox(label="๋ธ”๋กœ๊ทธ ๋‚ด์šฉ (์ œ๋ชฉ ๋ฐ ๋ณธ๋ฌธ)", lines=10, placeholder="๋ธ”๋กœ๊ทธ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ค๊ฑฐ๋‚˜ ์ง์ ‘ ์ž…๋ ฅํ•˜์„ธ์š”.")
295
+ # '๋ธ”๋กœ๊ทธ๋‚ด์šฉ๊ฐ€์ ธ์˜ค๊ธฐ' ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰ํ•˜์—ฌ blog_content์— ๋ฐ˜์˜
296
+ fetch_button.click(fn=scrape_naver_blog, inputs=blog_url, outputs=blog_content)
297
+
298
+ with gr.Tab("ํ˜•ํƒœ์†Œ ๋ถ„์„ ์‹คํ–‰"):
299
+ with gr.Row():
300
+ analysis_button = gr.Button("ํ˜•ํƒœ์†Œ๋ถ„์„")
301
+ # ๋ถ„์„ ๊ฒฐ๊ณผ๋Š” ์ˆ˜์ • ๊ฐ€๋Šฅํ•˜๋„๋ก interactive=True ์„ค์ •
302
+ output_table = gr.Dataframe(label="๋ถ„์„ ๊ฒฐ๊ณผ (ํ˜•ํƒœ์†Œ ๋ฐ ํ‚ค์›Œ๋“œ ์ •๋ณด)", interactive=True)
303
+ output_file = gr.File(label="Excel ๋‹ค์šด๋กœ๋“œ")
304
+ # 'ํ˜•ํƒœ์†Œ๋ถ„์„' ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ process_blog_content ํ•จ์ˆ˜ ์‹คํ–‰
305
+ analysis_button.click(fn=process_blog_content, inputs=blog_content, outputs=[output_table, output_file])
306
+
307
+ if __name__ == "__main__":
308
+ debug_log("Gradio ์•ฑ ์‹คํ–‰ ์‹œ์ž‘")
309
+ demo.launch()
310
+ debug_log("Gradio ์•ฑ ์‹คํ–‰ ์ข…๋ฃŒ")