Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -265,7 +265,32 @@ def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
|
|
265 |
debug_log("morphological_analysis_and_enrich ν¨μ μλ£")
|
266 |
return merged_df, merged_excel_path
|
267 |
|
268 |
-
# μλ‘κ² μΆκ°λ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
def fetch_blog_content(url: str):
|
270 |
debug_log("fetch_blog_content ν¨μ μμ")
|
271 |
content = scrape_naver_blog(url)
|
@@ -277,23 +302,31 @@ with gr.Blocks(title="λ€μ΄λ² λΈλ‘κ·Έ ννμ λΆμ μ€νμ΄μ€", css=".
|
|
277 |
gr.Markdown("# λ€μ΄λ² λΈλ‘κ·Έ ννμ λΆμ μ€νμ΄μ€")
|
278 |
with gr.Row():
|
279 |
blog_url_input = gr.Textbox(label="λ€μ΄λ² λΈλ‘κ·Έ λ§ν¬", placeholder="μ: https://blog.naver.com/ssboost/222983068507", lines=1)
|
280 |
-
with gr.Row():
|
281 |
scrape_button = gr.Button("μ€ν¬λν μ€ν")
|
282 |
with gr.Row():
|
283 |
blog_content_box = gr.Textbox(label="λΈλ‘κ·Έ λ΄μ© (μμ κ°λ₯)", lines=10, placeholder="μ€ν¬λνλ λΈλ‘κ·Έ λ΄μ©μ΄ μ¬κΈ°μ νμλ©λλ€.")
|
284 |
with gr.Row():
|
285 |
remove_freq_checkbox = gr.Checkbox(label="λΉλμ1 μ κ±°", value=False)
|
|
|
|
|
286 |
with gr.Row():
|
287 |
analyze_button = gr.Button("λΆμ μ€ν")
|
|
|
|
|
288 |
with gr.Row():
|
289 |
-
|
|
|
|
|
|
|
290 |
with gr.Row():
|
291 |
-
|
|
|
292 |
|
293 |
-
# μ€ν¬λν
|
294 |
scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
|
295 |
-
# λΆμ
|
296 |
-
analyze_button.click(fn=
|
|
|
297 |
|
298 |
if __name__ == "__main__":
|
299 |
debug_log("Gradio μ± μ€ν μμ")
|
|
|
265 |
debug_log("morphological_analysis_and_enrich ν¨μ μλ£")
|
266 |
return merged_df, merged_excel_path
|
267 |
|
268 |
+
# μλ‘κ² μΆκ°λ κΈ°λ₯ 1,2,3: μ§μ μ
λ ₯ν ν€μλ(μν° λλ ','λ‘ κ΅¬λΆλ λ€μμ ν€μλ)κ° λΈλ‘κ·Έ λ³Έλ¬Έ λ΄ λ±μ₯ λΉλμλ₯Ό 체ν¬
|
269 |
+
def direct_keyword_analysis(text: str, keyword_input: str):
|
270 |
+
debug_log("direct_keyword_analysis ν¨μ μμ")
|
271 |
+
# μν° λλ μΌνλ‘ λΆλ¦¬νμ¬ ν€μλ λͺ©λ‘ μμ±
|
272 |
+
keywords = re.split(r'[\n,]+', keyword_input)
|
273 |
+
keywords = [kw.strip() for kw in keywords if kw.strip()]
|
274 |
+
debug_log(f"μ
λ ₯λ ν€μλ λͺ©λ‘: {keywords}")
|
275 |
+
results = []
|
276 |
+
for kw in keywords:
|
277 |
+
count = text.count(kw)
|
278 |
+
results.append((kw, count))
|
279 |
+
debug_log(f"ν€μλ '{kw}'μ λΉλμ: {count}")
|
280 |
+
df = pd.DataFrame(results, columns=["ν€μλ", "λΉλμ"])
|
281 |
+
excel_path = create_excel_file(df)
|
282 |
+
debug_log("direct_keyword_analysis ν¨μ μλ£")
|
283 |
+
return df, excel_path
|
284 |
+
|
285 |
+
# λΆμ μ€ν λ²νΌ ν΄λ¦ μ, μμ κ°λ₯ν λΈλ‘κ·Έ λ³Έλ¬Έμ λμμΌλ‘ ννμ λΆμκ³Ό μ§μ ν€μλ λΆμμ ν¨κ» μ§ν
|
286 |
+
def analyze_combined(blog_text: str, remove_freq1: bool, keyword_input: str):
|
287 |
+
debug_log("analyze_combined ν¨μ μμ")
|
288 |
+
morph_df, morph_excel = morphological_analysis_and_enrich(blog_text, remove_freq1)
|
289 |
+
direct_df, direct_excel = direct_keyword_analysis(blog_text, keyword_input)
|
290 |
+
debug_log("analyze_combined ν¨μ μλ£")
|
291 |
+
return morph_df, morph_excel, direct_df, direct_excel
|
292 |
+
|
293 |
+
# μ€ν¬λν μ€ν: λΈλ‘κ·Έ λ§ν¬λ₯Ό ν΅ν΄ λ΄μ©μ κ°μ Έμ μμ κ°λ₯ν ν
μ€νΈ λ°μ€μ μΆλ ₯
|
294 |
def fetch_blog_content(url: str):
|
295 |
debug_log("fetch_blog_content ν¨μ μμ")
|
296 |
content = scrape_naver_blog(url)
|
|
|
302 |
gr.Markdown("# λ€μ΄λ² λΈλ‘κ·Έ ννμ λΆμ μ€νμ΄μ€")
|
303 |
with gr.Row():
|
304 |
blog_url_input = gr.Textbox(label="λ€μ΄λ² λΈλ‘κ·Έ λ§ν¬", placeholder="μ: https://blog.naver.com/ssboost/222983068507", lines=1)
|
|
|
305 |
scrape_button = gr.Button("μ€ν¬λν μ€ν")
|
306 |
with gr.Row():
|
307 |
blog_content_box = gr.Textbox(label="λΈλ‘κ·Έ λ΄μ© (μμ κ°λ₯)", lines=10, placeholder="μ€ν¬λνλ λΈλ‘κ·Έ λ΄μ©μ΄ μ¬κΈ°μ νμλ©λλ€.")
|
308 |
with gr.Row():
|
309 |
remove_freq_checkbox = gr.Checkbox(label="λΉλμ1 μ κ±°", value=False)
|
310 |
+
with gr.Row():
|
311 |
+
keyword_input_box = gr.Textbox(label="μ§μ ν€μλ μ
λ ₯ (μν° λλ ','λ‘ κ΅¬λΆ)", lines=2, placeholder="μ: ν€μλ1, ν€μλ2\nν€μλ3")
|
312 |
with gr.Row():
|
313 |
analyze_button = gr.Button("λΆμ μ€ν")
|
314 |
+
|
315 |
+
gr.Markdown("### ννμ λΆμ κ²°κ³Ό")
|
316 |
with gr.Row():
|
317 |
+
morph_result_df = gr.Dataframe(label="ννμ λΆμ κ²°κ³Ό (λ¨μ΄, λΉλμ, κ²μλ, λΈλ‘κ·Έλ¬Έμμ λ±)")
|
318 |
+
morph_excel_file = gr.File(label="ννμ λΆμ Excel λ€μ΄λ‘λ")
|
319 |
+
|
320 |
+
gr.Markdown("### μ§μ ν€μλ λΆμ κ²°κ³Ό")
|
321 |
with gr.Row():
|
322 |
+
direct_result_df = gr.Dataframe(label="μ§μ ν€μλ λΆμ κ²°κ³Ό (ν€μλ, λΉλμ)")
|
323 |
+
direct_excel_file = gr.File(label="μ§μ ν€μλ λΆμ Excel λ€μ΄λ‘λ")
|
324 |
|
325 |
+
# μ€ν¬λν μ€ν: URLμ μ
λ ₯νλ©΄ λΈλ‘κ·Έ λ΄μ©μ μμ κ°λ₯ν ν
μ€νΈ λ°μ€μ μ±μμ€
|
326 |
scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
|
327 |
+
# λΆμ μ€ν: μμ λ λΈλ‘κ·Έ λ΄μ©κ³Ό λΉλμ1 μ κ±° μ΅μ
, μ§μ μ
λ ₯ ν€μλλ₯Ό λμμΌλ‘ λ λΆμμ ν¨κ» μ§ν
|
328 |
+
analyze_button.click(fn=analyze_combined, inputs=[blog_content_box, remove_freq_checkbox, keyword_input_box],
|
329 |
+
outputs=[morph_result_df, morph_excel_file, direct_result_df, direct_excel_file])
|
330 |
|
331 |
if __name__ == "__main__":
|
332 |
debug_log("Gradio μ± μ€ν μμ")
|