Kims12 commited on
Commit
82bbb8a
Β·
verified Β·
1 Parent(s): b4650b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -9
app.py CHANGED
@@ -221,15 +221,28 @@ def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
221
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ™„λ£Œ")
222
  return merged_df, merged_excel_path
223
 
224
- # --- 톡합 뢄석 (ν˜•νƒœμ†Œ 뢄석 + 직접 μž…λ ₯ ν‚€μ›Œλ“œ) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
226
  debug_log("combined_analysis ν•¨μˆ˜ μ‹œμž‘")
227
- # ν˜•νƒœμ†Œ 뢄석 및 κ²€μƒ‰λŸ‰/λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ 병합 κ²°κ³Ό
228
  merged_df, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
229
- # 결과에 'μ§μ ‘μž…λ ₯' 컬럼 μΆ”κ°€ (μ΄ˆκΈ°κ°’: 빈 λ¬Έμžμ—΄)
230
  if "μ§μ ‘μž…λ ₯" not in merged_df.columns:
231
  merged_df["μ§μ ‘μž…λ ₯"] = ""
232
- # 직접 μž…λ ₯ν•œ ν‚€μ›Œλ“œ λͺ©λ‘ (μ—”ν„° λ˜λŠ” ','둜 ꡬ뢄)
233
  direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
234
  direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
235
  debug_log(f"μž…λ ₯된 직접 ν‚€μ›Œλ“œ: {direct_keywords}")
@@ -262,6 +275,16 @@ def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input:
262
  debug_log("combined_analysis ν•¨μˆ˜ μ™„λ£Œ")
263
  return merged_df, combined_excel
264
 
 
 
 
 
 
 
 
 
 
 
265
  # --- μŠ€ν¬λž˜ν•‘ μ‹€ν–‰ ---
266
  def fetch_blog_content(url: str):
267
  debug_log("fetch_blog_content ν•¨μˆ˜ μ‹œμž‘")
@@ -270,18 +293,29 @@ def fetch_blog_content(url: str):
270
  return content
271
 
272
  # --- Gradio μΈν„°νŽ˜μ΄μŠ€ ꡬ성 ---
273
- with gr.Blocks(title="넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀", css=".gradio-container { max-width: 960px; margin: auto; }") as demo:
 
 
 
 
274
  gr.Markdown("# 넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀")
275
- with gr.Row():
 
276
  blog_url_input = gr.Textbox(label="넀이버 λΈ”λ‘œκ·Έ 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
277
- scrape_button = gr.Button("μŠ€ν¬λž˜ν•‘ μ‹€ν–‰")
 
278
  with gr.Row():
279
  blog_content_box = gr.Textbox(label="λΈ”λ‘œκ·Έ λ‚΄μš© (μˆ˜μ • κ°€λŠ₯)", lines=10, placeholder="μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€.")
280
  with gr.Row():
281
- remove_freq_checkbox = gr.Checkbox(label="λΉˆλ„μˆ˜1 제거", value=False)
 
 
 
 
282
  direct_keyword_box = gr.Textbox(label="직접 ν‚€μ›Œλ“œ μž…λ ₯ (μ—”ν„° λ˜λŠ” ','둜 ꡬ뢄)", lines=2, placeholder="예: ν‚€μ›Œλ“œ1, ν‚€μ›Œλ“œ2\nν‚€μ›Œλ“œ3")
283
  with gr.Row():
284
  analyze_button = gr.Button("뢄석 μ‹€ν–‰")
 
285
  with gr.Row():
286
  result_df = gr.Dataframe(label="톡합 뢄석 κ²°κ³Ό (단어, λΉˆλ„μˆ˜, κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜, μ§μ ‘μž…λ ₯)", interactive=True)
287
  with gr.Row():
@@ -289,7 +323,7 @@ with gr.Blocks(title="넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀", css=".
289
 
290
  # 이벀트 μ—°κ²°
291
  scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
292
- analyze_button.click(fn=combined_analysis, inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box],
293
  outputs=[result_df, excel_file])
294
 
295
  if __name__ == "__main__":
 
221
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ™„λ£Œ")
222
  return merged_df, merged_excel_path
223
 
224
+ # --- 직접 ν‚€μ›Œλ“œ 뢄석 (단독 뢄석) ---
225
+ def direct_keyword_analysis(text: str, keyword_input: str):
226
+ debug_log("direct_keyword_analysis ν•¨μˆ˜ μ‹œμž‘")
227
+ keywords = re.split(r'[\n,]+', keyword_input)
228
+ keywords = [kw.strip() for kw in keywords if kw.strip()]
229
+ debug_log(f"μž…λ ₯된 ν‚€μ›Œλ“œ λͺ©λ‘: {keywords}")
230
+ results = []
231
+ for kw in keywords:
232
+ count = text.count(kw)
233
+ results.append((kw, count))
234
+ debug_log(f"ν‚€μ›Œλ“œ '{kw}'의 λΉˆλ„μˆ˜: {count}")
235
+ df = pd.DataFrame(results, columns=["ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜"])
236
+ excel_path = create_excel_file(df)
237
+ debug_log("direct_keyword_analysis ν•¨μˆ˜ μ™„λ£Œ")
238
+ return df, excel_path
239
+
240
+ # --- 톡합 뢄석 (ν˜•νƒœμ†Œ 뢄석 + 직접 ν‚€μ›Œλ“œ 뢄석) ---
241
  def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
242
  debug_log("combined_analysis ν•¨μˆ˜ μ‹œμž‘")
 
243
  merged_df, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
 
244
  if "μ§μ ‘μž…λ ₯" not in merged_df.columns:
245
  merged_df["μ§μ ‘μž…λ ₯"] = ""
 
246
  direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
247
  direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
248
  debug_log(f"μž…λ ₯된 직접 ν‚€μ›Œλ“œ: {direct_keywords}")
 
275
  debug_log("combined_analysis ν•¨μˆ˜ μ™„λ£Œ")
276
  return merged_df, combined_excel
277
 
278
+ # --- 뢄석 ν•Έλ“€λŸ¬ ---
279
+ def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool):
280
+ debug_log("analysis_handler ν•¨μˆ˜ μ‹œμž‘")
281
+ if direct_keyword_only:
282
+ # 직접 ν‚€μ›Œλ“œ λΆ„μ„λ§Œ μˆ˜ν–‰
283
+ return direct_keyword_analysis(blog_text, direct_keyword_input)
284
+ else:
285
+ # 톡합 뢄석 (ν˜•νƒœμ†Œ 뢄석 + 직접 ν‚€μ›Œλ“œ 뢄석)
286
+ return combined_analysis(blog_text, remove_freq1, direct_keyword_input)
287
+
288
  # --- μŠ€ν¬λž˜ν•‘ μ‹€ν–‰ ---
289
  def fetch_blog_content(url: str):
290
  debug_log("fetch_blog_content ν•¨μˆ˜ μ‹œμž‘")
 
293
  return content
294
 
295
  # --- Gradio μΈν„°νŽ˜μ΄μŠ€ ꡬ성 ---
296
+ custom_css = """
297
+ .gradio-container { max-width: 960px; margin: auto; }
298
+ .centered-button-row { justify-content: center; }
299
+ """
300
+ with gr.Blocks(title="넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀", css=custom_css) as demo:
301
  gr.Markdown("# 넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀")
302
+ # λΈ”λ‘œκ·Έ 링크와 μŠ€ν¬λž˜ν•‘ μ‹€ν–‰ λ²„νŠΌμ„ ν•œ κ·Έλ£Ή 내에 배치 (λ²„νŠΌμ€ κ°€μš΄λ° μ •λ ¬)
303
+ with gr.Group():
304
  blog_url_input = gr.Textbox(label="넀이버 λΈ”λ‘œκ·Έ 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
305
+ with gr.Row(elem_classes="centered-button-row"):
306
+ scrape_button = gr.Button("μŠ€ν¬λž˜ν•‘ μ‹€ν–‰")
307
  with gr.Row():
308
  blog_content_box = gr.Textbox(label="λΈ”λ‘œκ·Έ λ‚΄μš© (μˆ˜μ • κ°€λŠ₯)", lines=10, placeholder="μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€.")
309
  with gr.Row():
310
+ remove_freq_checkbox = gr.Checkbox(label="λΉˆλ„μˆ˜1 제거", value=True)
311
+ # "λΉˆλ„μˆ˜1 제거" μ•„λž˜μ— "직접 ν‚€μ›Œλ“œ μž…λ ₯만 뢄석" 선택 ν•­λͺ© μΆ”κ°€ (κΈ°λ³Έ 미선택)
312
+ with gr.Row():
313
+ direct_keyword_only_checkbox = gr.Checkbox(label="직접 ν‚€μ›Œλ“œ μž…λ ₯만 뢄석", value=False)
314
+ with gr.Row():
315
  direct_keyword_box = gr.Textbox(label="직접 ν‚€μ›Œλ“œ μž…λ ₯ (μ—”ν„° λ˜λŠ” ','둜 ꡬ뢄)", lines=2, placeholder="예: ν‚€μ›Œλ“œ1, ν‚€μ›Œλ“œ2\nν‚€μ›Œλ“œ3")
316
  with gr.Row():
317
  analyze_button = gr.Button("뢄석 μ‹€ν–‰")
318
+ # κ²°κ³Ό ν…Œμ΄λΈ”μ€ ν™”λ©΄ 전체 폭을 μ‚¬μš©ν•˜κ³ , Excel λ‹€μš΄λ‘œλ“œ λ²„νŠΌμ€ κ·Έ μ•„λž˜ 별도 행에 배치
319
  with gr.Row():
320
  result_df = gr.Dataframe(label="톡합 뢄석 κ²°κ³Ό (단어, λΉˆλ„μˆ˜, κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜, μ§μ ‘μž…λ ₯)", interactive=True)
321
  with gr.Row():
 
323
 
324
  # 이벀트 μ—°κ²°
325
  scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
326
+ analyze_button.click(fn=analysis_handler, inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox],
327
  outputs=[result_df, excel_file])
328
 
329
  if __name__ == "__main__":