Kims12 commited on
Commit
771fcfc
ยท
verified ยท
1 Parent(s): f8570dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -214
app.py CHANGED
@@ -17,10 +17,11 @@ import base64
17
  def debug_log(message: str):
18
  print(f"[DEBUG] {message}")
19
 
20
- # --- ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘ ---
21
  def scrape_naver_blog(url: str) -> str:
22
  debug_log("scrape_naver_blog ํ•จ์ˆ˜ ์‹œ์ž‘")
23
  debug_log(f"์š”์ฒญ๋ฐ›์€ URL: {url}")
 
24
  headers = {
25
  "User-Agent": (
26
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
@@ -28,14 +29,20 @@ def scrape_naver_blog(url: str) -> str:
28
  "Chrome/96.0.4664.110 Safari/537.36"
29
  )
30
  }
 
31
  try:
 
32
  response = requests.get(url, headers=headers)
33
  debug_log("HTTP GET ์š”์ฒญ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
34
  if response.status_code != 200:
35
  debug_log(f"์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {response.status_code}")
36
  return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {response.status_code}"
 
 
37
  soup = BeautifulSoup(response.text, "html.parser")
38
  debug_log("HTML ํŒŒ์‹ฑ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
 
 
39
  iframe = soup.select_one("iframe#mainFrame")
40
  if not iframe:
41
  debug_log("iframe#mainFrame ํƒœ๊ทธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
@@ -44,8 +51,12 @@ def scrape_naver_blog(url: str) -> str:
44
  if not iframe_src:
45
  debug_log("iframe src๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
46
  return "๋ณธ๋ฌธ iframe์˜ src๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
 
 
47
  parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
48
  debug_log(f"iframe ํŽ˜์ด์ง€ ์š”์ฒญ URL: {parsed_iframe_url}")
 
 
49
  iframe_response = requests.get(parsed_iframe_url, headers=headers)
50
  debug_log("HTTP GET ์š”์ฒญ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
51
  if iframe_response.status_code != 200:
@@ -53,6 +64,8 @@ def scrape_naver_blog(url: str) -> str:
53
  return f"iframe์—์„œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {iframe_response.status_code}"
54
  iframe_soup = BeautifulSoup(iframe_response.text, "html.parser")
55
  debug_log("HTML ํŒŒ์‹ฑ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
 
 
56
  title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text')
57
  title = title_div.get_text(strip=True) if title_div else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
58
  debug_log(f"์ถ”์ถœ๋œ ์ œ๋ชฉ: {title}")
@@ -62,42 +75,58 @@ def scrape_naver_blog(url: str) -> str:
62
  else:
63
  content = "๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
64
  debug_log("๋ณธ๋ฌธ ์ถ”์ถœ ์™„๋ฃŒ")
 
65
  result = f"[์ œ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
66
- debug_log("์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ ํ•ฉ์นจ ์™„๋ฃŒ")
67
  return result
 
68
  except Exception as e:
69
  debug_log(f"์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}")
70
  return f"์Šคํฌ๋ž˜ํ•‘ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
71
 
72
- # --- ํ˜•ํƒœ์†Œ ๋ถ„์„ (์ฐธ์กฐ์ฝ”๋“œ-1) ---
73
  def analyze_text(text: str):
74
  logging.basicConfig(level=logging.DEBUG)
75
  logger = logging.getLogger(__name__)
76
  logger.debug("์›๋ณธ ํ…์ŠคํŠธ: %s", text)
 
 
77
  filtered_text = re.sub(r'[^๊ฐ€-ํžฃ]', '', text)
78
- logger.debug("ํ•„ํ„ฐ๋ง๋œ ํ…์ŠคํŠธ: %s", filtered_text)
 
79
  if not filtered_text:
80
  logger.debug("์œ ํšจํ•œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๊ฐ€ ์—†์Œ.")
81
  return pd.DataFrame(columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"]), ""
 
 
82
  mecab_instance = mecab.MeCab()
83
  tokens = mecab_instance.pos(filtered_text)
84
  logger.debug("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ: %s", tokens)
 
85
  freq = {}
86
  for word, pos in tokens:
87
- if word and word.strip() and pos.startswith("NN"):
88
- freq[word] = freq.get(word, 0) + 1
89
- logger.debug("๋‹จ์–ด: %s, ํ’ˆ์‚ฌ: %s, ๋นˆ๋„: %d", word, pos, freq[word])
 
 
 
90
  sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
91
- logger.debug("์ •๋ ฌ๋œ ๋‹จ์–ด ๋นˆ๋„: %s", sorted_freq)
 
 
92
  df = pd.DataFrame(sorted_freq, columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"])
93
- logger.debug("ํ˜•ํƒœ์†Œ ๋ถ„์„ DataFrame ์ƒ์„ฑ๋จ, shape: %s", df.shape)
 
 
94
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
95
  df.to_excel(temp_file.name, index=False, engine='openpyxl')
96
  temp_file.close()
97
  logger.debug("Excel ํŒŒ์ผ ์ƒ์„ฑ๋จ: %s", temp_file.name)
 
98
  return df, temp_file.name
99
 
100
- # --- ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ๋ฐ ๊ด‘๊ณ  API ๊ด€๋ จ (์ฐธ์กฐ์ฝ”๋“œ-2) ---
101
  def generate_signature(timestamp, method, uri, secret_key):
102
  message = f"{timestamp}.{method}.{uri}"
103
  digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
@@ -119,6 +148,7 @@ def fetch_related_keywords(keyword):
119
  API_KEY = os.environ["NAVER_API_KEY"]
120
  SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
121
  CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
 
122
  BASE_URL = "https://api.naver.com"
123
  uri = "/keywordstool"
124
  method = "GET"
@@ -134,11 +164,13 @@ def fetch_related_keywords(keyword):
134
  df = pd.DataFrame(data["keywordList"])
135
  if len(df) > 100:
136
  df = df.head(100)
 
137
  def parse_count(x):
138
  try:
139
  return int(str(x).replace(",", ""))
140
  except:
141
  return 0
 
142
  df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
143
  df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
144
  df["ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] + df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"]
@@ -177,6 +209,7 @@ def process_keyword(keywords: str, include_related: bool):
177
  debug_log(f"process_keyword ํ˜ธ์ถœ, ํ‚ค์›Œ๋“œ๋“ค: {keywords}, ์—ฐ๊ด€๊ฒ€์ƒ‰์–ด ํฌํ•จ: {include_related}")
178
  input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
179
  result_dfs = []
 
180
  for idx, kw in enumerate(input_keywords):
181
  df_kw = fetch_related_keywords(kw)
182
  if df_kw.empty:
@@ -190,241 +223,73 @@ def process_keyword(keywords: str, include_related: bool):
190
  df_related = df_kw[df_kw["์ •๋ณดํ‚ค์›Œ๋“œ"] != kw]
191
  if not df_related.empty:
192
  result_dfs.append(df_related)
 
193
  if result_dfs:
194
  result_df = pd.concat(result_dfs, ignore_index=True)
195
  result_df.drop_duplicates(subset=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
196
  else:
197
  result_df = pd.DataFrame(columns=["์ •๋ณดํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"])
 
198
  result_df["๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"] = result_df["์ •๋ณดํ‚ค์›Œ๋“œ"].apply(fetch_blog_count)
199
  result_df.sort_values(by="ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", ascending=False, inplace=True)
200
  debug_log("process_keyword ์™„๋ฃŒ")
201
  return result_df, create_excel_file(result_df)
202
 
203
- # --- ํ˜•ํƒœ์†Œ ๋ถ„์„๊ณผ ๊ฒ€์ƒ‰๋Ÿ‰/๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ๋ณ‘ํ•ฉ ---
204
- def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
 
 
 
 
 
 
 
205
  debug_log("morphological_analysis_and_enrich ํ•จ์ˆ˜ ์‹œ์ž‘")
206
  df_freq, _ = analyze_text(text)
207
  if df_freq.empty:
208
  debug_log("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ๊ฐ€ ๋นˆ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์ž…๋‹ˆ๋‹ค.")
209
  return df_freq, ""
210
- if remove_freq1:
211
- before_shape = df_freq.shape
212
- df_freq = df_freq[df_freq["๋นˆ๋„์ˆ˜"] != 1]
213
- debug_log(f"๋นˆ๋„์ˆ˜ 1 ์ œ๊ฑฐ ์ ์šฉ๋จ. {before_shape} -> {df_freq.shape}")
214
  keywords = "\n".join(df_freq["๋‹จ์–ด"].tolist())
215
  debug_log(f"๋ถ„์„๋œ ํ‚ค์›Œ๋“œ: {keywords}")
 
 
216
  df_keyword_info, _ = process_keyword(keywords, include_related=False)
217
  debug_log("๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ ์™„๋ฃŒ")
 
 
218
  merged_df = pd.merge(df_freq, df_keyword_info, left_on="๋‹จ์–ด", right_on="์ •๋ณดํ‚ค์›Œ๋“œ", how="left")
219
  merged_df.drop(columns=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
 
 
220
  merged_excel_path = create_excel_file(merged_df)
221
  debug_log("morphological_analysis_and_enrich ํ•จ์ˆ˜ ์™„๋ฃŒ")
222
  return merged_df, merged_excel_path
223
 
224
- # --- ์ง์ ‘ ํ‚ค์›Œ๋“œ ๋ถ„์„ (๋‹จ๋… ๋ถ„์„) ---
225
- def direct_keyword_analysis(text: str, keyword_input: str):
226
- debug_log("direct_keyword_analysis ํ•จ์ˆ˜ ์‹œ์ž‘")
227
- keywords = re.split(r'[\n,]+', keyword_input)
228
- keywords = [kw.strip() for kw in keywords if kw.strip()]
229
- debug_log(f"์ž…๋ ฅ๋œ ํ‚ค์›Œ๋“œ ๋ชฉ๋ก: {keywords}")
230
- results = []
231
- for kw in keywords:
232
- count = text.count(kw)
233
- results.append((kw, count))
234
- debug_log(f"ํ‚ค์›Œ๋“œ '{kw}'์˜ ๋นˆ๋„์ˆ˜: {count}")
235
- df = pd.DataFrame(results, columns=["ํ‚ค์›Œ๋“œ", "๋นˆ๋„์ˆ˜"])
236
- excel_path = create_excel_file(df)
237
- debug_log("direct_keyword_analysis ํ•จ์ˆ˜ ์™„๋ฃŒ")
238
- return df, excel_path
239
-
240
- # --- ํ†ตํ•ฉ ๋ถ„์„ (ํ˜•ํƒœ์†Œ ๋ถ„์„ + ์ง์ ‘ ํ‚ค์›Œ๋“œ ๋ถ„์„) ---
241
- def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
242
- debug_log("combined_analysis ํ•จ์ˆ˜ ์‹œ์ž‘")
243
- merged_df, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
244
- if "์ง์ ‘์ž…๋ ฅ" not in merged_df.columns:
245
- merged_df["์ง์ ‘์ž…๋ ฅ"] = ""
246
- direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
247
- direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
248
- debug_log(f"์ž…๋ ฅ๋œ ์ง์ ‘ ํ‚ค์›Œ๋“œ: {direct_keywords}")
249
- for dk in direct_keywords:
250
- if dk in merged_df["๋‹จ์–ด"].values:
251
- merged_df.loc[merged_df["๋‹จ์–ด"] == dk, "์ง์ ‘์ž…๋ ฅ"] = "์ง์ ‘์ž…๋ ฅ"
252
- else:
253
- freq = blog_text.count(dk)
254
- df_direct, _ = process_keyword(dk, include_related=False)
255
- if (not df_direct.empty) and (dk in df_direct["์ •๋ณดํ‚ค์›Œ๋“œ"].values):
256
- row = df_direct[df_direct["์ •๋ณดํ‚ค์›Œ๋“œ"] == dk].iloc[0]
257
- pc = row.get("PC์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
258
- mobile = row.get("๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
259
- total = row.get("ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
260
- blog_count = row.get("๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜", None)
261
- else:
262
- pc = mobile = total = blog_count = None
263
- new_row = {
264
- "๋‹จ์–ด": dk,
265
- "๋นˆ๋„์ˆ˜": freq,
266
- "PC์›”๊ฒ€์ƒ‰๋Ÿ‰": pc,
267
- "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰": mobile,
268
- "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰": total,
269
- "๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜": blog_count,
270
- "์ง์ ‘์ž…๋ ฅ": "์ง์ ‘์ž…๋ ฅ"
271
- }
272
- merged_df = pd.concat([merged_df, pd.DataFrame([new_row])], ignore_index=True)
273
- merged_df = merged_df.sort_values(by="๋นˆ๋„์ˆ˜", ascending=False).reset_index(drop=True)
274
- combined_excel = create_excel_file(merged_df)
275
- debug_log("combined_analysis ํ•จ์ˆ˜ ์™„๋ฃŒ")
276
- return merged_df, combined_excel
277
-
278
- # --- ๋ถ„์„ ํ•ธ๋“ค๋Ÿฌ ---
279
- def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool):
280
- debug_log("analysis_handler ํ•จ์ˆ˜ ์‹œ์ž‘")
281
- if direct_keyword_only:
282
- # "์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ๋งŒ ๋ถ„์„" ์„ ํƒ ์‹œ ๋‹จ๋… ๋ถ„์„ ์ˆ˜ํ–‰
283
- return direct_keyword_analysis(blog_text, direct_keyword_input)
284
- else:
285
- # ๊ธฐ๋ณธ ํ†ตํ•ฉ ๋ถ„์„ ์ˆ˜ํ–‰
286
- return combined_analysis(blog_text, remove_freq1, direct_keyword_input)
287
-
288
- # --- ์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰ ---
289
- def fetch_blog_content(url: str):
290
- debug_log("fetch_blog_content ํ•จ์ˆ˜ ์‹œ์ž‘")
291
- content = scrape_naver_blog(url)
292
- debug_log("fetch_blog_content ํ•จ์ˆ˜ ์™„๋ฃŒ")
293
- return content
294
-
295
- # --- Custom CSS ---
296
- custom_css = """
297
- /* ์ „์ฒด ์ปจํ…Œ์ด๋„ˆ ์Šคํƒ€์ผ */
298
- .gradio-container {
299
- max-width: 960px;
300
- margin: auto;
301
- font-family: 'Helvetica Neue', Arial, sans-serif;
302
- background: #f5f7fa;
303
- padding: 2rem;
304
- }
305
- /* ํ—ค๋” ์Šคํƒ€์ผ */
306
- .custom-header {
307
- text-align: center;
308
- font-size: 2.5rem;
309
- font-weight: bold;
310
- margin-bottom: 1.5rem;
311
- color: #333;
312
- }
313
- /* ๊ทธ๋ฃน ๋ฐ•์Šค ์Šคํƒ€์ผ */
314
- .custom-group {
315
- background: #ffffff;
316
- border-radius: 8px;
317
- padding: 1.5rem;
318
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
319
- margin-bottom: 1.5rem;
320
- }
321
- /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
322
- .custom-button {
323
- background-color: #007bff;
324
- color: #fff;
325
- border: none;
326
- border-radius: 4px;
327
- padding: 0.6rem 1.2rem;
328
- font-size: 1rem;
329
- cursor: pointer;
330
- transition: background-color 0.3s;
331
- }
332
- .custom-button:hover {
333
- background-color: #0056b3;
334
- }
335
- /* ์ฒดํฌ๋ฐ•์Šค ์Šคํƒ€์ผ */
336
- .custom-checkbox {
337
- margin-right: 1rem;
338
- font-size: 1rem;
339
- font-weight: bold;
340
- }
341
- /* ๊ฒฐ๊ณผ ํ…Œ์ด๋ธ” ๋ฐ ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ */
342
- .custom-result {
343
- margin-top: 1.5rem;
344
- }
345
- /* ๊ฐ€์šด๋ฐ ์ •๋ ฌ */
346
- .centered {
347
- display: flex;
348
- justify-content: center;
349
- align-items: center;
350
- }
351
- /* ์‚ฌ์šฉ์„ค๋ช… ์Šคํƒ€์ผ */
352
- .usage-instructions {
353
- font-size: 1.1rem;
354
- line-height: 1.6;
355
- color: #555;
356
- background: #fff;
357
- padding: 1.5rem;
358
- border-radius: 8px;
359
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
360
- margin-top: 2rem;
361
- }
362
- .usage-instructions h2 {
363
- font-size: 1.8rem;
364
- font-weight: bold;
365
- margin-bottom: 1rem;
366
- color: #333;
367
- }
368
- .usage-instructions ul {
369
- list-style: disc;
370
- margin-left: 2rem;
371
- }
372
- """
373
-
374
- # --- Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ ---
375
- with gr.Blocks(title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํ˜•ํƒœ์†Œ ๋ถ„์„ ์„œ๋น„์Šค", css=custom_css) as demo:
376
- gr.HTML("<div class='custom-header'>๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํ˜•ํƒœ์†Œ ๋ถ„์„ ์„œ๋น„์Šค ๐Ÿš€</div>")
377
- # ๋ธ”๋กœ๊ทธ ๋งํฌ์™€ ์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰ ๋ฒ„ํŠผ์„ ํ•œ ๊ทธ๋ฃน ๋‚ด์— ๋ฐฐ์น˜ (๋ฒ„ํŠผ์€ ๊ฐ€์šด๋ฐ ์ •๋ ฌ)
378
- with gr.Group(elem_classes="custom-group"):
379
  with gr.Row():
380
  blog_url_input = gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ", placeholder="์˜ˆ: https://blog.naver.com/ssboost/222983068507", lines=1)
381
- with gr.Row(elem_classes="centered"):
382
- scrape_button = gr.Button("์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰", elem_classes="custom-button")
383
- with gr.Group(elem_classes="custom-group"):
384
- blog_content_box = gr.Textbox(label="๋ธ”๋กœ๊ทธ ๋‚ด์šฉ (์ˆ˜์ • ๊ฐ€๋Šฅ)", lines=10, placeholder="์Šคํฌ๋ž˜ํ•‘๋œ ๋ธ”๋กœ๊ทธ ๋‚ด์šฉ์ด ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค.")
385
- with gr.Group(elem_classes="custom-group"):
 
 
386
  with gr.Row():
387
- remove_freq_checkbox = gr.Checkbox(label="๋นˆ๋„์ˆ˜1 ์ œ๊ฑฐ", value=True, elem_classes="custom-checkbox")
388
  with gr.Row():
389
- direct_keyword_only_checkbox = gr.Checkbox(label="์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ๋งŒ ๋ถ„์„", value=False, elem_classes="custom-checkbox")
390
  with gr.Row():
391
- direct_keyword_box = gr.Textbox(label="์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ (์—”ํ„ฐ ๋˜๋Š” ','๋กœ ๊ตฌ๋ถ„)", lines=2, placeholder="์˜ˆ: ํ‚ค์›Œ๋“œ1, ํ‚ค์›Œ๋“œ2\nํ‚ค์›Œ๋“œ3")
392
- with gr.Group(elem_classes="custom-group"):
393
- with gr.Row(elem_classes="centered"):
394
- analyze_button = gr.Button("๋ถ„์„ ์‹คํ–‰", elem_classes="custom-button")
395
- with gr.Group(elem_classes="custom-group custom-result"):
396
- result_df = gr.Dataframe(label="ํ†ตํ•ฉ ๋ถ„์„ ๊ฒฐ๊ณผ (๋‹จ์–ด, ๋นˆ๋„์ˆ˜, ๊ฒ€์ƒ‰๋Ÿ‰, ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜, ์ง์ ‘์ž…๋ ฅ)", interactive=True)
397
- with gr.Group(elem_classes="custom-group"):
398
- excel_file = gr.File(label="Excel ๋‹ค์šด๋กœ๋“œ")
399
- # ์‚ฌ์šฉ์„ค๋ช… HTML ๋ธ”๋ก (์•„๋ž˜์— ๋ฐฐ์น˜)
400
- with gr.Group(elem_classes="custom-group"):
401
- usage_html = gr.HTML("""
402
- <div class="usage-instructions">
403
- <h2>์‚ฌ์šฉ ์„ค๋ช… ๐Ÿ“–</h2>
404
- <ul>
405
- <li>๐Ÿ”— <strong>๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ</strong>: ๋ถ„์„ํ•  ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ URL์„ ์ž…๋ ฅํ•˜์„ธ์š”.</li>
406
- <li>โœ‚๏ธ <strong>์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰</strong>: ๋งํฌ ์ž…๋ ฅ ํ›„ ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์ด ์ž๋™์œผ๋กœ ๋ถˆ๋Ÿฌ์™€์ง‘๋‹ˆ๋‹ค.</li>
407
- <li>๐Ÿ“ <strong>๋ธ”๋กœ๊ทธ ๋‚ด์šฉ (์ˆ˜์ • ๊ฐ€๋Šฅ)</strong>: ๋ถˆ๋Ÿฌ์˜จ ๋ธ”๋กœ๊ทธ ๋‚ด์šฉ์ด ํ‘œ์‹œ๋˜๋ฉฐ, ํ•„์š”์— ๋”ฐ๋ผ ์ง์ ‘ ์ˆ˜์ •ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.</li>
408
- <li>โš™๏ธ <strong>์˜ต์…˜ ์„ค์ •</strong>:
409
- <ul>
410
- <li><em>๋นˆ๋„์ˆ˜1 ์ œ๊ฑฐ</em>: ๊ธฐ๋ณธ ์„ ํƒ๋˜์–ด ์žˆ์œผ๋ฉฐ, ๋นˆ๋„์ˆ˜๊ฐ€ 1์ธ ๋‹จ์–ด๋Š” ๊ฒฐ๊ณผ์—์„œ ์ œ์™ธํ•ฉ๋‹ˆ๋‹ค.</li>
411
- <li><em>์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ๋งŒ ๋ถ„์„</em>: ์ด ์˜ต์…˜์„ ์„ ํƒํ•˜๋ฉด, ๋ธ”๋กœ๊ทธ ๋ณธ๋ฌธ์—์„œ ์ง์ ‘ ์ž…๋ ฅํ•œ ํ‚ค์›Œ๋“œ๋งŒ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.</li>
412
- </ul>
413
- </li>
414
- <li>๐Ÿ”ค <strong>์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ</strong>: ์—”ํ„ฐ ๋˜๋Š” ์‰ผํ‘œ(,)๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ๋ถ„์„ํ•  ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.</li>
415
- <li>๐Ÿš€ <strong>๋ถ„์„ ์‹คํ–‰</strong>: ์„ค์ •ํ•œ ์˜ต์…˜์— ๋”ฐ๋ผ ํ˜•ํƒœ์†Œ ๋ถ„์„ ๋ฐ ํ‚ค์›Œ๋“œ ๋ถ„์„์ด ์ˆ˜ํ–‰๋˜์–ด ๊ฒฐ๊ณผ๊ฐ€ ํ‘œ์™€ Excel ํŒŒ์ผ๋กœ ์ถœ๋ ฅ๋ฉ๋‹ˆ๋‹ค.</li>
416
- <li>๐Ÿ“ฅ <strong>Excel ๋‹ค์šด๋กœ๋“œ</strong>: ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ Excel ํŒŒ์ผ๋กœ ๋‹ค์šด๋กœ๋“œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.</li>
417
- </ul>
418
- <p><strong>Tip:</strong> ๋ถ„์„ ๊ฒฐ๊ณผ๋Š” ์‹ค์‹œ๊ฐ„์œผ๋กœ ์—…๋ฐ์ดํŠธ๋˜๋ฉฐ, ํ•„์š”์‹œ ์ˆ˜์ • ํ›„ ๋‹ค์‹œ ๋ถ„์„ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ฆ๊ฑฐ์šด ๋ถ„์„ ๋˜์„ธ์š”! ๐Ÿ˜Š</p>
419
- </div>
420
- """)
421
- # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
422
- scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
423
- analyze_button.click(fn=analysis_handler,
424
- inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox],
425
- outputs=[result_df, excel_file])
426
 
427
  if __name__ == "__main__":
428
  debug_log("Gradio ์•ฑ ์‹คํ–‰ ์‹œ์ž‘")
429
  demo.launch()
430
- debug_log("Gradio ์•ฑ ์‹คํ–‰ ์ข…๋ฃŒ")
 
17
  def debug_log(message: str):
18
  print(f"[DEBUG] {message}")
19
 
20
+ # [๊ธฐ๋ณธ์ฝ”๋“œ] - ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘ ๊ธฐ๋Šฅ
21
  def scrape_naver_blog(url: str) -> str:
22
  debug_log("scrape_naver_blog ํ•จ์ˆ˜ ์‹œ์ž‘")
23
  debug_log(f"์š”์ฒญ๋ฐ›์€ URL: {url}")
24
+
25
  headers = {
26
  "User-Agent": (
27
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
 
29
  "Chrome/96.0.4664.110 Safari/537.36"
30
  )
31
  }
32
+
33
  try:
34
+ # 1) ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ '๋ฉ”์ธ' ํŽ˜์ด์ง€ ์š”์ฒญ
35
  response = requests.get(url, headers=headers)
36
  debug_log("HTTP GET ์š”์ฒญ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
37
  if response.status_code != 200:
38
  debug_log(f"์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {response.status_code}")
39
  return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {response.status_code}"
40
+
41
+ # 2) ๋ฉ”์ธ ํŽ˜์ด์ง€ ํŒŒ์‹ฑ
42
  soup = BeautifulSoup(response.text, "html.parser")
43
  debug_log("HTML ํŒŒ์‹ฑ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
44
+
45
+ # 3) iframe ํƒœ๊ทธ ์ฐพ๊ธฐ
46
  iframe = soup.select_one("iframe#mainFrame")
47
  if not iframe:
48
  debug_log("iframe#mainFrame ํƒœ๊ทธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
 
51
  if not iframe_src:
52
  debug_log("iframe src๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
53
  return "๋ณธ๋ฌธ iframe์˜ src๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
54
+
55
+ # 4) iframe src ๋ณด์ • (์ ˆ๋Œ€๊ฒฝ๋กœ ์ฒ˜๋ฆฌ)
56
  parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
57
  debug_log(f"iframe ํŽ˜์ด์ง€ ์š”์ฒญ URL: {parsed_iframe_url}")
58
+
59
+ # 5) iframe ํŽ˜์ด์ง€ ์š”์ฒญ ๋ฐ ํŒŒ์‹ฑ
60
  iframe_response = requests.get(parsed_iframe_url, headers=headers)
61
  debug_log("HTTP GET ์š”์ฒญ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
62
  if iframe_response.status_code != 200:
 
64
  return f"iframe์—์„œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {iframe_response.status_code}"
65
  iframe_soup = BeautifulSoup(iframe_response.text, "html.parser")
66
  debug_log("HTML ํŒŒ์‹ฑ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
67
+
68
+ # 6) ์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ ์ถ”์ถœ
69
  title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text')
70
  title = title_div.get_text(strip=True) if title_div else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
71
  debug_log(f"์ถ”์ถœ๋œ ์ œ๋ชฉ: {title}")
 
75
  else:
76
  content = "๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
77
  debug_log("๋ณธ๋ฌธ ์ถ”์ถœ ์™„๋ฃŒ")
78
+
79
  result = f"[์ œ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
80
+ debug_log("์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์„ ํ•ฉ์ณ ๋ฐ˜ํ™˜ ์ค€๋น„ ์™„๋ฃŒ")
81
  return result
82
+
83
  except Exception as e:
84
  debug_log(f"์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}")
85
  return f"์Šคํฌ๋ž˜ํ•‘ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
86
 
87
+ # [์ฐธ์กฐ์ฝ”๋“œ-1] ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ธฐ๋Šฅ
88
  def analyze_text(text: str):
89
  logging.basicConfig(level=logging.DEBUG)
90
  logger = logging.getLogger(__name__)
91
  logger.debug("์›๋ณธ ํ…์ŠคํŠธ: %s", text)
92
+
93
+ # 1. ํ•œ๊ตญ์–ด๋งŒ ๋‚จ๊ธฐ๊ธฐ (๊ณต๋ฐฑ, ์˜์–ด, ๊ธฐํ˜ธ ๋“ฑ ์ œ๊ฑฐ)
94
  filtered_text = re.sub(r'[^๊ฐ€-ํžฃ]', '', text)
95
+ logger.debug("ํ•„ํ„ฐ๋ง๋œ ํ…์ŠคํŠธ (ํ•œ๊ตญ์–ด๋งŒ, ๊ณต๋ฐฑ ์ œ๊ฑฐ): %s", filtered_text)
96
+
97
  if not filtered_text:
98
  logger.debug("์œ ํšจํ•œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๊ฐ€ ์—†์Œ.")
99
  return pd.DataFrame(columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"]), ""
100
+
101
+ # 2. Mecab์„ ์ด์šฉํ•œ ํ˜•ํƒœ์†Œ ๋ถ„์„ (๋ช…์‚ฌ์™€ ๋ณตํ•ฉ๋ช…์‚ฌ๋งŒ ์ถ”์ถœ)
102
  mecab_instance = mecab.MeCab()
103
  tokens = mecab_instance.pos(filtered_text)
104
  logger.debug("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ: %s", tokens)
105
+
106
  freq = {}
107
  for word, pos in tokens:
108
+ if word and word.strip():
109
+ if pos.startswith("NN"):
110
+ freq[word] = freq.get(word, 0) + 1
111
+ logger.debug("๋‹จ์–ด: %s, ํ’ˆ์‚ฌ: %s, ํ˜„์žฌ ๋นˆ๋„: %d", word, pos, freq[word])
112
+
113
+ # 3. ๋นˆ๋„์ˆ˜๋ฅผ ๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ
114
  sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
115
+ logger.debug("๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ๋œ ๋‹จ์–ด ๋นˆ๋„: %s", sorted_freq)
116
+
117
+ # 4. ๊ฒฐ๊ณผ DataFrame ์ƒ์„ฑ
118
  df = pd.DataFrame(sorted_freq, columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"])
119
+ logger.debug("๊ฒฐ๊ณผ DataFrame ์ƒ์„ฑ๋จ, shape: %s", df.shape)
120
+
121
+ # 5. Excel ํŒŒ์ผ ์ƒ์„ฑ (์ž„์‹œ ํŒŒ์ผ)
122
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
123
  df.to_excel(temp_file.name, index=False, engine='openpyxl')
124
  temp_file.close()
125
  logger.debug("Excel ํŒŒ์ผ ์ƒ์„ฑ๋จ: %s", temp_file.name)
126
+
127
  return df, temp_file.name
128
 
129
+ # [์ฐธ์กฐ์ฝ”๋“œ-2] ๋„ค์ด๋ฒ„ ๊ด‘๊ณ  API ๋ฐ ๊ฒ€์ƒ‰๋Ÿ‰/๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ ๊ธฐ๋Šฅ
130
  def generate_signature(timestamp, method, uri, secret_key):
131
  message = f"{timestamp}.{method}.{uri}"
132
  digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
 
148
  API_KEY = os.environ["NAVER_API_KEY"]
149
  SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
150
  CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
151
+
152
  BASE_URL = "https://api.naver.com"
153
  uri = "/keywordstool"
154
  method = "GET"
 
164
  df = pd.DataFrame(data["keywordList"])
165
  if len(df) > 100:
166
  df = df.head(100)
167
+
168
  def parse_count(x):
169
  try:
170
  return int(str(x).replace(",", ""))
171
  except:
172
  return 0
173
+
174
  df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
175
  df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
176
  df["ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] + df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"]
 
209
  debug_log(f"process_keyword ํ˜ธ์ถœ, ํ‚ค์›Œ๋“œ๋“ค: {keywords}, ์—ฐ๊ด€๊ฒ€์ƒ‰์–ด ํฌํ•จ: {include_related}")
210
  input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
211
  result_dfs = []
212
+
213
  for idx, kw in enumerate(input_keywords):
214
  df_kw = fetch_related_keywords(kw)
215
  if df_kw.empty:
 
223
  df_related = df_kw[df_kw["์ •๋ณดํ‚ค์›Œ๋“œ"] != kw]
224
  if not df_related.empty:
225
  result_dfs.append(df_related)
226
+
227
  if result_dfs:
228
  result_df = pd.concat(result_dfs, ignore_index=True)
229
  result_df.drop_duplicates(subset=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
230
  else:
231
  result_df = pd.DataFrame(columns=["์ •๋ณดํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"])
232
+
233
  result_df["๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"] = result_df["์ •๋ณดํ‚ค์›Œ๋“œ"].apply(fetch_blog_count)
234
  result_df.sort_values(by="ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", ascending=False, inplace=True)
235
  debug_log("process_keyword ์™„๋ฃŒ")
236
  return result_df, create_excel_file(result_df)
237
 
238
+ # ์ƒˆ๋กœ์šด ๊ธฐ๋Šฅ: '๋ธ”๋กœ๊ทธ๋‚ด์šฉ๊ฐ€์ ธ์˜ค๊ธฐ' ์‹คํ–‰ ์‹œ ๋ธ”๋กœ๊ทธ ๋งํฌ๋กœ๋ถ€ํ„ฐ ์ œ๋ชฉ/๋ณธ๋ฌธ ์Šคํฌ๋ž˜ํ•‘
239
+ def fetch_blog_content(url: str):
240
+ debug_log("fetch_blog_content ํ•จ์ˆ˜ ์‹œ์ž‘")
241
+ content = scrape_naver_blog(url)
242
+ debug_log("fetch_blog_content ํ•จ์ˆ˜ ์™„๋ฃŒ")
243
+ return content
244
+
245
+ # ์ƒˆ๋กœ์šด ๊ธฐ๋Šฅ: ํ˜•ํƒœ์†Œ ๋ถ„์„ ๋ฐ ๊ฒ€์ƒ‰๋Ÿ‰, ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ์ถ”๊ฐ€
246
+ def morphological_analysis_and_enrich(text: str):
247
  debug_log("morphological_analysis_and_enrich ํ•จ์ˆ˜ ์‹œ์ž‘")
248
  df_freq, _ = analyze_text(text)
249
  if df_freq.empty:
250
  debug_log("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ๊ฐ€ ๋นˆ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์ž…๋‹ˆ๋‹ค.")
251
  return df_freq, ""
252
+
253
+ # ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ์—์„œ ํ‚ค์›Œ๋“œ ์ถ”์ถœ (๊ฐ ๋‹จ์–ด๋ฅผ ์—”ํ„ฐ๋กœ ๊ตฌ๋ถ„)
 
 
254
  keywords = "\n".join(df_freq["๋‹จ์–ด"].tolist())
255
  debug_log(f"๋ถ„์„๋œ ํ‚ค์›Œ๋“œ: {keywords}")
256
+
257
+ # [์ฐธ์กฐ์ฝ”๋“œ-2]๋ฅผ ํ™œ์šฉํ•˜์—ฌ ๊ฐ ํ‚ค์›Œ๋“œ์˜ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ (์—ฐ๊ด€๊ฒ€์ƒ‰์–ด ๋ฏธํฌํ•จ)
258
  df_keyword_info, _ = process_keyword(keywords, include_related=False)
259
  debug_log("๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ ์™„๋ฃŒ")
260
+
261
+ # ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ์™€ ๊ฒ€์ƒ‰๋Ÿ‰ ์ •๋ณด๋ฅผ ๋ณ‘ํ•ฉ (ํ‚ค์›Œ๋“œ ๊ธฐ์ค€)
262
  merged_df = pd.merge(df_freq, df_keyword_info, left_on="๋‹จ์–ด", right_on="์ •๋ณดํ‚ค์›Œ๋“œ", how="left")
263
  merged_df.drop(columns=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
264
+
265
+ # ๋ณ‘ํ•ฉ ๊ฒฐ๊ณผ Excel ํŒŒ์ผ ์ƒ์„ฑ
266
  merged_excel_path = create_excel_file(merged_df)
267
  debug_log("morphological_analysis_and_enrich ํ•จ์ˆ˜ ์™„๋ฃŒ")
268
  return merged_df, merged_excel_path
269
 
270
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ (Hugging Face Spaces ํ™˜๊ฒฝ์— ์ ํ•ฉ)
271
+ with gr.Blocks(title="๋ธ”๋กœ๊ทธ๊ธ€ ํ˜•ํƒœ์†Œ ๋ถ„์„ ์ŠคํŽ˜์ด์Šค", css=".gradio-container { max-width: 960px; margin: auto; }") as demo:
272
+ gr.Markdown("# ๋ธ”๋กœ๊ทธ๊ธ€ ํ˜•ํƒœ์†Œ ๋ถ„์„ ์ŠคํŽ˜์ด์Šค")
273
+
274
+ with gr.Tab("๋ธ”๋กœ๊ทธ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  with gr.Row():
276
  blog_url_input = gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ", placeholder="์˜ˆ: https://blog.naver.com/ssboost/222983068507", lines=1)
277
+ fetch_button = gr.Button("๋ธ”๋กœ๊ทธ๋‚ด์šฉ๊ฐ€์ ธ์˜ค๊ธฐ")
278
+ blog_content = gr.Textbox(label="๋ธ”๋กœ๊ทธ ๋‚ด์šฉ", lines=10, placeholder="๋ธ”๋กœ๊ทธ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ค๊ฑฐ๋‚˜ ์ง์ ‘ ์ž…๋ ฅํ•˜์„ธ์š”.")
279
+ fetch_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content)
280
+
281
+ with gr.Tab("ํ˜•ํƒœ์†Œ ๋ถ„์„"):
282
+ with gr.Row():
283
+ analysis_input = gr.Textbox(label="๋ถ„์„ํ•  ํ…์ŠคํŠธ", lines=10, placeholder="๋ถ„์„ํ•  ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๊ฑฐ๋‚˜ '๋ธ”๋กœ๊ทธ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ'์—์„œ ๊ฐ€์ ธ์˜จ ๋‚ด์šฉ์„ ์ˆ˜์ •ํ•˜์„ธ์š”.")
284
  with gr.Row():
285
+ analyze_button = gr.Button("ํ˜•ํƒœ์†Œ๋ถ„์„")
286
  with gr.Row():
287
+ analysis_result = gr.Dataframe(label="๋ถ„์„ ๊ฒฐ๊ณผ (๋‹จ์–ด, ๋นˆ๋„์ˆ˜, ๊ฒ€์ƒ‰๋Ÿ‰, ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ๋“ฑ)")
288
  with gr.Row():
289
+ analysis_excel = gr.File(label="Excel ๋‹ค์šด๋กœ๋“œ")
290
+ analyze_button.click(fn=morphological_analysis_and_enrich, inputs=analysis_input, outputs=[analysis_result, analysis_excel])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  if __name__ == "__main__":
293
  debug_log("Gradio ์•ฑ ์‹คํ–‰ ์‹œ์ž‘")
294
  demo.launch()
295
+ debug_log("Gradio ์•ฑ ์‹คํ–‰ ์ข…๋ฃŒ")