Kims12 commited on
Commit
2c541cf
Β·
verified Β·
1 Parent(s): 8ac20cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -86
app.py CHANGED
@@ -126,7 +126,7 @@ def analyze_text(text: str):
126
 
127
  return df, temp_file.name
128
 
129
- # [μ°Έμ‘°μ½”λ“œ-2] 넀이버 κ΄‘κ³  API: μ„œλͺ… 생성 및 헀더 ꡬ성
130
  def generate_signature(timestamp, method, uri, secret_key):
131
  message = f"{timestamp}.{method}.{uri}"
132
  digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
@@ -143,7 +143,6 @@ def get_header(method, uri, api_key, secret_key, customer_id):
143
  "X-Signature": signature
144
  }
145
 
146
- # κΈ°μ‘΄ 단일 ν‚€μ›Œλ“œμš© ν•¨μˆ˜ (참고용)
147
  def fetch_related_keywords(keyword):
148
  debug_log(f"fetch_related_keywords 호좜, ν‚€μ›Œλ“œ: {keyword}")
149
  API_KEY = os.environ["NAVER_API_KEY"]
@@ -180,83 +179,6 @@ def fetch_related_keywords(keyword):
180
  debug_log("fetch_related_keywords μ™„λ£Œ")
181
  return result_df
182
 
183
- # μ‹ κ·œ μΆ”κ°€: ν‚€μ›Œλ“œ 10κ°œμ”© 그룹으둜 λ¬Άμ–΄ ν•œ 번의 API ν˜ΈμΆœμ„ ν•˜λŠ” ν•¨μˆ˜
184
- # (단, 각 그룹은 순차적으둜 호좜됨)
185
- def fetch_related_keywords_batch(keywords: list):
186
- debug_log(f"fetch_related_keywords_batch 호좜, ν‚€μ›Œλ“œ κ·Έλ£Ή: {keywords}")
187
- API_KEY = os.environ["NAVER_API_KEY"]
188
- SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
189
- CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
190
-
191
- BASE_URL = "https://api.naver.com"
192
- uri = "/keywordstool"
193
- method = "GET"
194
- headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
195
- params = {
196
- "hintKeywords": keywords, # 리슀트 κ·ΈλŒ€λ‘œ 전달 (μ΅œλŒ€ 10개)
197
- "showDetail": "1"
198
- }
199
- response = requests.get(BASE_URL + uri, params=params, headers=headers)
200
- data = response.json()
201
- if "keywordList" not in data:
202
- return pd.DataFrame()
203
- df = pd.DataFrame(data["keywordList"])
204
- if len(df) > 100:
205
- df = df.head(100)
206
-
207
- def parse_count(x):
208
- try:
209
- return int(str(x).replace(",", ""))
210
- except:
211
- return 0
212
-
213
- df["PCμ›”κ²€μƒ‰λŸ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
214
- df["λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
215
- df["ν† νƒˆμ›”κ²€μƒ‰λŸ‰"] = df["PCμ›”κ²€μƒ‰λŸ‰"] + df["λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰"]
216
- df.rename(columns={"relKeyword": "μ •λ³΄ν‚€μ›Œλ“œ"}, inplace=True)
217
- result_df = df[["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"]]
218
- debug_log("fetch_related_keywords_batch μ™„λ£Œ")
219
- return result_df
220
-
221
- # process_keyword ν•¨μˆ˜λ₯Ό κ·Έλ£Ήλ³„λ‘œ(각 그룹은 순차적으둜) μ²˜λ¦¬ν•˜λ„λ‘ κ°œμ„ 
222
- def process_keyword(keywords: str, include_related: bool):
223
- debug_log(f"process_keyword 호좜, ν‚€μ›Œλ“œλ“€: {keywords}, 연관검색어 포함: {include_related}")
224
- input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
225
- groups = [input_keywords[i:i+10] for i in range(0, len(input_keywords), 10)]
226
- result_dfs = []
227
-
228
- # 각 그룹을 순차적으둜 처리 (λ™μ‹œμ— ν˜ΈμΆœν•˜μ§€ μ•ŠμŒ)
229
- for idx, group in enumerate(groups):
230
- debug_log(f"κ·Έλ£Ή {idx+1} 처리 μ‹œμž‘: {group}")
231
- df_batch = fetch_related_keywords_batch(group)
232
- if df_batch.empty:
233
- continue
234
- # κ·Έλ£Ή λ‚΄ 각 ν‚€μ›Œλ“œμ— λŒ€ν•΄ κ²°κ³Ό μΆ”μΆœ
235
- for kw in group:
236
- row_kw = df_batch[df_batch["μ •λ³΄ν‚€μ›Œλ“œ"] == kw]
237
- if not row_kw.empty:
238
- result_dfs.append(row_kw)
239
- else:
240
- result_dfs.append(df_batch.head(1))
241
- # 첫 번째 그룹에 λŒ€ν•΄μ„œλ§Œ 연관검색어 μ˜΅μ…˜ 적용 (첫 ν‚€μ›Œλ“œ μ œμ™Έ)
242
- if include_related and idx == 0:
243
- first_keyword = group[0]
244
- df_related = df_batch[df_batch["μ •λ³΄ν‚€μ›Œλ“œ"] != first_keyword]
245
- if not df_related.empty:
246
- result_dfs.append(df_related)
247
- debug_log(f"κ·Έλ£Ή {idx+1} 처리 μ™„λ£Œ")
248
-
249
- if result_dfs:
250
- result_df = pd.concat(result_dfs, ignore_index=True)
251
- result_df.drop_duplicates(subset=["μ •λ³΄ν‚€μ›Œλ“œ"], inplace=True)
252
- else:
253
- result_df = pd.DataFrame(columns=["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"])
254
-
255
- result_df["λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"] = result_df["μ •λ³΄ν‚€μ›Œλ“œ"].apply(fetch_blog_count)
256
- result_df.sort_values(by="ν† νƒˆμ›”κ²€μƒ‰λŸ‰", ascending=False, inplace=True)
257
- debug_log("process_keyword μ™„λ£Œ")
258
- return result_df, create_excel_file(result_df)
259
-
260
  def fetch_blog_count(keyword):
261
  debug_log(f"fetch_blog_count 호좜, ν‚€μ›Œλ“œ: {keyword}")
262
  client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
@@ -283,6 +205,36 @@ def create_excel_file(df):
283
  debug_log(f"Excel 파일 생성됨: {excel_path}")
284
  return excel_path
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  # [μ°Έμ‘°μ½”λ“œ-1] 및 [μ°Έμ‘°μ½”λ“œ-2]λ₯Ό ν™œμš©ν•œ ν˜•νƒœμ†Œ 뢄석 및 κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ μΆ”κ°€ (λΉˆλ„μˆ˜1 제거 μ˜΅μ…˜ 포함)
287
  def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
288
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ‹œμž‘")
@@ -313,7 +265,32 @@ def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
313
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ™„λ£Œ")
314
  return merged_df, merged_excel_path
315
 
316
- # μƒˆλ‘­κ²Œ μΆ”κ°€λœ κΈ°λŠ₯: μž…λ ₯ν•œ λΈ”λ‘œκ·Έ λ§ν¬λ‘œλΆ€ν„° μŠ€ν¬λž˜ν•‘ν•˜μ—¬ μˆ˜μ • κ°€λŠ₯ν•œ ν…μŠ€νŠΈ λ°•μŠ€μ— 좜λ ₯
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  def fetch_blog_content(url: str):
318
  debug_log("fetch_blog_content ν•¨μˆ˜ μ‹œμž‘")
319
  content = scrape_naver_blog(url)
@@ -325,23 +302,29 @@ with gr.Blocks(title="넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀", css=".
325
  gr.Markdown("# 넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀")
326
  with gr.Row():
327
  blog_url_input = gr.Textbox(label="넀이버 λΈ”λ‘œκ·Έ 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
328
- with gr.Row():
329
  scrape_button = gr.Button("μŠ€ν¬λž˜ν•‘ μ‹€ν–‰")
330
  with gr.Row():
331
  blog_content_box = gr.Textbox(label="λΈ”λ‘œκ·Έ λ‚΄μš© (μˆ˜μ • κ°€λŠ₯)", lines=10, placeholder="μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€.")
332
  with gr.Row():
333
  remove_freq_checkbox = gr.Checkbox(label="λΉˆλ„μˆ˜1 제거", value=False)
 
 
334
  with gr.Row():
335
  analyze_button = gr.Button("뢄석 μ‹€ν–‰")
 
336
  with gr.Row():
337
- analysis_result = gr.Dataframe(label="뢄석 κ²°κ³Ό (단어, λΉˆλ„μˆ˜, κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ λ“±)")
 
 
338
  with gr.Row():
339
- analysis_excel = gr.File(label="Excel λ‹€μš΄λ‘œλ“œ")
 
340
 
341
- # μŠ€ν¬λž˜ν•‘ μ‹€ν–‰ μ‹œ URLλ‘œλΆ€οΏ½οΏ½οΏ½ λΈ”λ‘œκ·Έ λ³Έλ¬Έ μŠ€ν¬λž˜ν•‘ ν›„ μˆ˜μ • κ°€λŠ₯ν•œ ν…μŠ€νŠΈ λ°•μŠ€μ— 좜λ ₯
342
  scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
343
- # 뢄석 μ‹€ν–‰ μ‹œ μˆ˜μ •λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ„ λŒ€μƒμœΌλ‘œ ν˜•νƒœμ†Œ 뢄석 및 κ²€μƒ‰λŸ‰/λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ 쑰회 μ§„ν–‰
344
- analyze_button.click(fn=morphological_analysis_and_enrich, inputs=[blog_content_box, remove_freq_checkbox], outputs=[analysis_result, analysis_excel])
 
345
 
346
  if __name__ == "__main__":
347
  debug_log("Gradio μ•± μ‹€ν–‰ μ‹œμž‘")
 
126
 
127
  return df, temp_file.name
128
 
129
+ # [μ°Έμ‘°μ½”λ“œ-2] 넀이버 κ΄‘κ³  API 및 κ²€μƒ‰λŸ‰/λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ 쑰회 κΈ°λŠ₯
130
  def generate_signature(timestamp, method, uri, secret_key):
131
  message = f"{timestamp}.{method}.{uri}"
132
  digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
 
143
  "X-Signature": signature
144
  }
145
 
 
146
  def fetch_related_keywords(keyword):
147
  debug_log(f"fetch_related_keywords 호좜, ν‚€μ›Œλ“œ: {keyword}")
148
  API_KEY = os.environ["NAVER_API_KEY"]
 
179
  debug_log("fetch_related_keywords μ™„λ£Œ")
180
  return result_df
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  def fetch_blog_count(keyword):
183
  debug_log(f"fetch_blog_count 호좜, ν‚€μ›Œλ“œ: {keyword}")
184
  client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
 
205
  debug_log(f"Excel 파일 생성됨: {excel_path}")
206
  return excel_path
207
 
208
+ def process_keyword(keywords: str, include_related: bool):
209
+ debug_log(f"process_keyword 호좜, ν‚€μ›Œλ“œλ“€: {keywords}, 연관검색어 포함: {include_related}")
210
+ input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
211
+ result_dfs = []
212
+
213
+ for idx, kw in enumerate(input_keywords):
214
+ df_kw = fetch_related_keywords(kw)
215
+ if df_kw.empty:
216
+ continue
217
+ row_kw = df_kw[df_kw["μ •λ³΄ν‚€μ›Œλ“œ"] == kw]
218
+ if not row_kw.empty:
219
+ result_dfs.append(row_kw)
220
+ else:
221
+ result_dfs.append(df_kw.head(1))
222
+ if include_related and idx == 0:
223
+ df_related = df_kw[df_kw["μ •λ³΄ν‚€μ›Œλ“œ"] != kw]
224
+ if not df_related.empty:
225
+ result_dfs.append(df_related)
226
+
227
+ if result_dfs:
228
+ result_df = pd.concat(result_dfs, ignore_index=True)
229
+ result_df.drop_duplicates(subset=["μ •λ³΄ν‚€μ›Œλ“œ"], inplace=True)
230
+ else:
231
+ result_df = pd.DataFrame(columns=["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"])
232
+
233
+ result_df["λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"] = result_df["μ •λ³΄ν‚€μ›Œλ“œ"].apply(fetch_blog_count)
234
+ result_df.sort_values(by="ν† νƒˆμ›”κ²€μƒ‰λŸ‰", ascending=False, inplace=True)
235
+ debug_log("process_keyword μ™„λ£Œ")
236
+ return result_df, create_excel_file(result_df)
237
+
238
  # [μ°Έμ‘°μ½”λ“œ-1] 및 [μ°Έμ‘°μ½”λ“œ-2]λ₯Ό ν™œμš©ν•œ ν˜•νƒœμ†Œ 뢄석 및 κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ μΆ”κ°€ (λΉˆλ„μˆ˜1 제거 μ˜΅μ…˜ 포함)
239
  def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
240
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ‹œμž‘")
 
265
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ™„λ£Œ")
266
  return merged_df, merged_excel_path
267
 
268
+ # μƒˆλ‘­κ²Œ μΆ”κ°€λœ κΈ°λŠ₯ 1,2,3: 직접 μž…λ ₯ν•œ ν‚€μ›Œλ“œ(μ—”ν„° λ˜λŠ” ','둜 κ΅¬λΆ„λœ λ‹€μˆ˜μ˜ ν‚€μ›Œλ“œ)κ°€ λΈ”λ‘œκ·Έ λ³Έλ¬Έ λ‚΄ λ“±μž₯ λΉˆλ„μˆ˜λ₯Ό 체크
269
+ def direct_keyword_analysis(text: str, keyword_input: str):
270
+ debug_log("direct_keyword_analysis ν•¨μˆ˜ μ‹œμž‘")
271
+ # μ—”ν„° λ˜λŠ” μ‰Όν‘œλ‘œ λΆ„λ¦¬ν•˜μ—¬ ν‚€μ›Œλ“œ λͺ©λ‘ 생성
272
+ keywords = re.split(r'[\n,]+', keyword_input)
273
+ keywords = [kw.strip() for kw in keywords if kw.strip()]
274
+ debug_log(f"μž…λ ₯된 ν‚€μ›Œλ“œ λͺ©λ‘: {keywords}")
275
+ results = []
276
+ for kw in keywords:
277
+ count = text.count(kw)
278
+ results.append((kw, count))
279
+ debug_log(f"ν‚€μ›Œλ“œ '{kw}'의 λΉˆλ„μˆ˜: {count}")
280
+ df = pd.DataFrame(results, columns=["ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜"])
281
+ excel_path = create_excel_file(df)
282
+ debug_log("direct_keyword_analysis ν•¨μˆ˜ μ™„λ£Œ")
283
+ return df, excel_path
284
+
285
+ # 뢄석 μ‹€ν–‰ λ²„νŠΌ 클릭 μ‹œ, μˆ˜μ • κ°€λŠ₯ν•œ λΈ”λ‘œκ·Έ 본문을 λŒ€μƒμœΌλ‘œ ν˜•νƒœμ†Œ 뢄석과 직접 ν‚€μ›Œλ“œ 뢄석을 ν•¨κ»˜ μ§„ν–‰
286
+ def analyze_combined(blog_text: str, remove_freq1: bool, keyword_input: str):
287
+ debug_log("analyze_combined ν•¨μˆ˜ μ‹œμž‘")
288
+ morph_df, morph_excel = morphological_analysis_and_enrich(blog_text, remove_freq1)
289
+ direct_df, direct_excel = direct_keyword_analysis(blog_text, keyword_input)
290
+ debug_log("analyze_combined ν•¨μˆ˜ μ™„λ£Œ")
291
+ return morph_df, morph_excel, direct_df, direct_excel
292
+
293
+ # μŠ€ν¬λž˜ν•‘ μ‹€ν–‰: λΈ”λ‘œκ·Έ 링크λ₯Ό 톡해 λ‚΄μš©μ„ 가져와 μˆ˜μ • κ°€λŠ₯ν•œ ν…μŠ€νŠΈ λ°•μŠ€μ— 좜λ ₯
294
  def fetch_blog_content(url: str):
295
  debug_log("fetch_blog_content ν•¨μˆ˜ μ‹œμž‘")
296
  content = scrape_naver_blog(url)
 
302
  gr.Markdown("# 넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀")
303
  with gr.Row():
304
  blog_url_input = gr.Textbox(label="넀이버 λΈ”λ‘œκ·Έ 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
 
305
  scrape_button = gr.Button("μŠ€ν¬λž˜ν•‘ μ‹€ν–‰")
306
  with gr.Row():
307
  blog_content_box = gr.Textbox(label="λΈ”λ‘œκ·Έ λ‚΄μš© (μˆ˜μ • κ°€λŠ₯)", lines=10, placeholder="μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€.")
308
  with gr.Row():
309
  remove_freq_checkbox = gr.Checkbox(label="λΉˆλ„μˆ˜1 제거", value=False)
310
+ with gr.Row():
311
+ keyword_input_box = gr.Textbox(label="직접 ν‚€μ›Œλ“œ μž…λ ₯ (μ—”ν„° λ˜λŠ” ','둜 ꡬ뢄)", lines=2, placeholder="예: ν‚€μ›Œλ“œ1, ν‚€μ›Œλ“œ2\nν‚€μ›Œλ“œ3")
312
  with gr.Row():
313
  analyze_button = gr.Button("뢄석 μ‹€ν–‰")
314
+ with gr.Markdown("### ν˜•νƒœμ†Œ 뢄석 κ²°κ³Ό")
315
  with gr.Row():
316
+ morph_result_df = gr.Dataframe(label="ν˜•νƒœμ†Œ 뢄석 κ²°κ³Ό (단어, λΉˆλ„μˆ˜, κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ λ“±)")
317
+ morph_excel_file = gr.File(label="ν˜•νƒœμ†Œ 뢄석 Excel λ‹€μš΄λ‘œλ“œ")
318
+ with gr.Markdown("### 직접 ν‚€μ›Œλ“œ 뢄석 κ²°κ³Ό")
319
  with gr.Row():
320
+ direct_result_df = gr.Dataframe(label="직접 ν‚€μ›Œλ“œ 뢄석 κ²°κ³Ό (ν‚€μ›Œλ“œ, λΉˆλ„μˆ˜)")
321
+ direct_excel_file = gr.File(label="직접 ν‚€μ›Œλ“œ 뢄석 Excel λ‹€μš΄λ‘œλ“œ")
322
 
323
+ # μŠ€ν¬λž˜ν•‘ μ‹€ν–‰: URL을 μž…λ ₯ν•˜λ©΄ λΈ”λ‘œκ·Έ λ‚΄μš©μ„ μˆ˜μ • κ°€λŠ₯ν•œ ν…μŠ€νŠΈ λ°•μŠ€μ— μ±„μ›Œμ€Œ
324
  scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
325
+ # 뢄석 μ‹€ν–‰: μˆ˜μ •λœ λΈ”λ‘œκ·Έ λ‚΄μš©κ³Ό λΉˆλ„μˆ˜1 제거 μ˜΅μ…˜, 직접 μž…λ ₯ ν‚€μ›Œλ“œλ₯Ό λŒ€μƒμœΌλ‘œ 두 뢄석을 ν•¨κ»˜ μ§„ν–‰
326
+ analyze_button.click(fn=analyze_combined, inputs=[blog_content_box, remove_freq_checkbox, keyword_input_box],
327
+ outputs=[morph_result_df, morph_excel_file, direct_result_df, direct_excel_file])
328
 
329
  if __name__ == "__main__":
330
  debug_log("Gradio μ•± μ‹€ν–‰ μ‹œμž‘")