Kims12 commited on
Commit
fdac880
Β·
verified Β·
1 Parent(s): 989a45c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -7
app.py CHANGED
@@ -265,7 +265,32 @@ def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
265
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ™„λ£Œ")
266
  return merged_df, merged_excel_path
267
 
268
- # μƒˆλ‘­κ²Œ μΆ”κ°€λœ κΈ°λŠ₯: μž…λ ₯ν•œ λΈ”λ‘œκ·Έ λ§ν¬λ‘œλΆ€ν„° μŠ€ν¬λž˜ν•‘ν•˜μ—¬ μˆ˜μ • κ°€λŠ₯ν•œ ν…μŠ€νŠΈ λ°•μŠ€μ— 좜λ ₯
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  def fetch_blog_content(url: str):
270
  debug_log("fetch_blog_content ν•¨μˆ˜ μ‹œμž‘")
271
  content = scrape_naver_blog(url)
@@ -277,23 +302,31 @@ with gr.Blocks(title="넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀", css=".
277
  gr.Markdown("# 넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀")
278
  with gr.Row():
279
  blog_url_input = gr.Textbox(label="넀이버 λΈ”λ‘œκ·Έ 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
280
- with gr.Row():
281
  scrape_button = gr.Button("μŠ€ν¬λž˜ν•‘ μ‹€ν–‰")
282
  with gr.Row():
283
  blog_content_box = gr.Textbox(label="λΈ”λ‘œκ·Έ λ‚΄μš© (μˆ˜μ • κ°€λŠ₯)", lines=10, placeholder="μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€.")
284
  with gr.Row():
285
  remove_freq_checkbox = gr.Checkbox(label="λΉˆλ„μˆ˜1 제거", value=False)
 
 
286
  with gr.Row():
287
  analyze_button = gr.Button("뢄석 μ‹€ν–‰")
 
 
288
  with gr.Row():
289
- analysis_result = gr.Dataframe(label="뢄석 κ²°κ³Ό (단어, λΉˆλ„μˆ˜, κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ λ“±)")
 
 
 
290
  with gr.Row():
291
- analysis_excel = gr.File(label="Excel λ‹€μš΄λ‘œλ“œ")
 
292
 
293
- # μŠ€ν¬λž˜ν•‘ μ‹€ν–‰ μ‹œ URLλ‘œλΆ€ν„° λΈ”λ‘œκ·Έ λ³Έλ¬Έ μŠ€ν¬λž˜ν•‘ ν›„ μˆ˜μ • κ°€λŠ₯ν•œ ν…μŠ€νŠΈ λ°•μŠ€μ— 좜λ ₯
294
  scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
295
- # 뢄석 μ‹€ν–‰ μ‹œ μˆ˜μ •λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ„ λŒ€μƒμœΌλ‘œ ν˜•νƒœμ†Œ 뢄석 및 κ²€μƒ‰λŸ‰/λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ 쑰회 μ§„ν–‰
296
- analyze_button.click(fn=morphological_analysis_and_enrich, inputs=[blog_content_box, remove_freq_checkbox], outputs=[analysis_result, analysis_excel])
 
297
 
298
  if __name__ == "__main__":
299
  debug_log("Gradio μ•± μ‹€ν–‰ μ‹œμž‘")
 
265
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ™„λ£Œ")
266
  return merged_df, merged_excel_path
267
 
268
+ # μƒˆλ‘­κ²Œ μΆ”κ°€λœ κΈ°λŠ₯ 1,2,3: 직접 μž…λ ₯ν•œ ν‚€μ›Œλ“œ(μ—”ν„° λ˜λŠ” ','둜 κ΅¬λΆ„λœ λ‹€μˆ˜μ˜ ν‚€μ›Œλ“œ)κ°€ λΈ”λ‘œκ·Έ λ³Έλ¬Έ λ‚΄ λ“±μž₯ λΉˆλ„μˆ˜λ₯Ό 체크
269
+ def direct_keyword_analysis(text: str, keyword_input: str):
270
+ debug_log("direct_keyword_analysis ν•¨μˆ˜ μ‹œμž‘")
271
+ # μ—”ν„° λ˜λŠ” μ‰Όν‘œλ‘œ λΆ„λ¦¬ν•˜μ—¬ ν‚€μ›Œλ“œ λͺ©λ‘ 생성
272
+ keywords = re.split(r'[\n,]+', keyword_input)
273
+ keywords = [kw.strip() for kw in keywords if kw.strip()]
274
+ debug_log(f"μž…λ ₯된 ν‚€μ›Œλ“œ λͺ©λ‘: {keywords}")
275
+ results = []
276
+ for kw in keywords:
277
+ count = text.count(kw)
278
+ results.append((kw, count))
279
+ debug_log(f"ν‚€μ›Œλ“œ '{kw}'의 λΉˆλ„μˆ˜: {count}")
280
+ df = pd.DataFrame(results, columns=["ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜"])
281
+ excel_path = create_excel_file(df)
282
+ debug_log("direct_keyword_analysis ν•¨μˆ˜ μ™„λ£Œ")
283
+ return df, excel_path
284
+
285
+ # 뢄석 μ‹€ν–‰ λ²„νŠΌ 클릭 μ‹œ, μˆ˜μ • κ°€λŠ₯ν•œ λΈ”λ‘œκ·Έ 본문을 λŒ€μƒμœΌλ‘œ ν˜•νƒœμ†Œ 뢄석과 직접 ν‚€μ›Œλ“œ 뢄석을 ν•¨κ»˜ μ§„ν–‰
286
+ def analyze_combined(blog_text: str, remove_freq1: bool, keyword_input: str):
287
+ debug_log("analyze_combined ν•¨μˆ˜ μ‹œμž‘")
288
+ morph_df, morph_excel = morphological_analysis_and_enrich(blog_text, remove_freq1)
289
+ direct_df, direct_excel = direct_keyword_analysis(blog_text, keyword_input)
290
+ debug_log("analyze_combined ν•¨μˆ˜ μ™„λ£Œ")
291
+ return morph_df, morph_excel, direct_df, direct_excel
292
+
293
+ # μŠ€ν¬λž˜ν•‘ μ‹€ν–‰: λΈ”λ‘œκ·Έ 링크λ₯Ό 톡해 λ‚΄μš©μ„ 가져와 μˆ˜μ • κ°€λŠ₯ν•œ ν…μŠ€νŠΈ λ°•μŠ€μ— 좜λ ₯
294
  def fetch_blog_content(url: str):
295
  debug_log("fetch_blog_content ν•¨μˆ˜ μ‹œμž‘")
296
  content = scrape_naver_blog(url)
 
302
  gr.Markdown("# 넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 슀페이슀")
303
  with gr.Row():
304
  blog_url_input = gr.Textbox(label="넀이버 λΈ”λ‘œκ·Έ 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
 
305
  scrape_button = gr.Button("μŠ€ν¬λž˜ν•‘ μ‹€ν–‰")
306
  with gr.Row():
307
  blog_content_box = gr.Textbox(label="λΈ”λ‘œκ·Έ λ‚΄μš© (μˆ˜μ • κ°€λŠ₯)", lines=10, placeholder="μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€.")
308
  with gr.Row():
309
  remove_freq_checkbox = gr.Checkbox(label="λΉˆλ„μˆ˜1 제거", value=False)
310
+ with gr.Row():
311
+ keyword_input_box = gr.Textbox(label="직접 ν‚€μ›Œλ“œ μž…λ ₯ (μ—”ν„° λ˜λŠ” ','둜 ꡬ뢄)", lines=2, placeholder="예: ν‚€μ›Œλ“œ1, ν‚€μ›Œλ“œ2\nν‚€μ›Œλ“œ3")
312
  with gr.Row():
313
  analyze_button = gr.Button("뢄석 μ‹€ν–‰")
314
+
315
+ gr.Markdown("### ν˜•νƒœμ†Œ 뢄석 κ²°κ³Ό")
316
  with gr.Row():
317
+ morph_result_df = gr.Dataframe(label="ν˜•νƒœμ†Œ 뢄석 κ²°κ³Ό (단어, λΉˆλ„μˆ˜, κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ λ“±)")
318
+ morph_excel_file = gr.File(label="ν˜•νƒœμ†Œ 뢄석 Excel λ‹€μš΄λ‘œλ“œ")
319
+
320
+ gr.Markdown("### 직접 ν‚€μ›Œλ“œ 뢄석 κ²°κ³Ό")
321
  with gr.Row():
322
+ direct_result_df = gr.Dataframe(label="직접 ν‚€μ›Œλ“œ 뢄석 κ²°κ³Ό (ν‚€μ›Œλ“œ, λΉˆλ„μˆ˜)")
323
+ direct_excel_file = gr.File(label="직접 ν‚€μ›Œλ“œ 뢄석 Excel λ‹€μš΄λ‘œλ“œ")
324
 
325
+ # μŠ€ν¬λž˜ν•‘ μ‹€ν–‰: URL을 μž…λ ₯ν•˜λ©΄ λΈ”λ‘œκ·Έ λ‚΄μš©μ„ μˆ˜μ • κ°€λŠ₯ν•œ ν…μŠ€νŠΈ λ°•μŠ€μ— μ±„μ›Œμ€Œ
326
  scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
327
+ # 뢄석 μ‹€ν–‰: μˆ˜μ •λœ λΈ”λ‘œκ·Έ λ‚΄μš©κ³Ό λΉˆλ„μˆ˜1 제거 μ˜΅μ…˜, 직접 μž…λ ₯ ν‚€μ›Œλ“œλ₯Ό λŒ€μƒμœΌλ‘œ 두 뢄석을 ν•¨κ»˜ μ§„ν–‰
328
+ analyze_button.click(fn=analyze_combined, inputs=[blog_content_box, remove_freq_checkbox, keyword_input_box],
329
+ outputs=[morph_result_df, morph_excel_file, direct_result_df, direct_excel_file])
330
 
331
  if __name__ == "__main__":
332
  debug_log("Gradio μ•± μ‹€ν–‰ μ‹œμž‘")