Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -221,15 +221,28 @@ def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
|
|
221 |
debug_log("morphological_analysis_and_enrich ν¨μ μλ£")
|
222 |
return merged_df, merged_excel_path
|
223 |
|
224 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
|
226 |
debug_log("combined_analysis ν¨μ μμ")
|
227 |
-
# ννμ λΆμ λ° κ²μλ/λΈλ‘κ·Έλ¬Έμμ λ³ν© κ²°κ³Ό
|
228 |
merged_df, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
|
229 |
-
# κ²°κ³Όμ 'μ§μ μ
λ ₯' μ»¬λΌ μΆκ° (μ΄κΈ°κ°: λΉ λ¬Έμμ΄)
|
230 |
if "μ§μ μ
λ ₯" not in merged_df.columns:
|
231 |
merged_df["μ§μ μ
λ ₯"] = ""
|
232 |
-
# μ§μ μ
λ ₯ν ν€μλ λͺ©λ‘ (μν° λλ ','λ‘ κ΅¬λΆ)
|
233 |
direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
|
234 |
direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
|
235 |
debug_log(f"μ
λ ₯λ μ§μ ν€μλ: {direct_keywords}")
|
@@ -262,6 +275,16 @@ def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input:
|
|
262 |
debug_log("combined_analysis ν¨μ μλ£")
|
263 |
return merged_df, combined_excel
|
264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
# --- μ€ν¬λν μ€ν ---
|
266 |
def fetch_blog_content(url: str):
|
267 |
debug_log("fetch_blog_content ν¨μ μμ")
|
@@ -270,18 +293,29 @@ def fetch_blog_content(url: str):
|
|
270 |
return content
|
271 |
|
272 |
# --- Gradio μΈν°νμ΄μ€ κ΅¬μ± ---
|
273 |
-
|
|
|
|
|
|
|
|
|
274 |
gr.Markdown("# λ€μ΄λ² λΈλ‘κ·Έ ννμ λΆμ μ€νμ΄μ€")
|
275 |
-
|
|
|
276 |
blog_url_input = gr.Textbox(label="λ€μ΄λ² λΈλ‘κ·Έ λ§ν¬", placeholder="μ: https://blog.naver.com/ssboost/222983068507", lines=1)
|
277 |
-
|
|
|
278 |
with gr.Row():
|
279 |
blog_content_box = gr.Textbox(label="λΈλ‘κ·Έ λ΄μ© (μμ κ°λ₯)", lines=10, placeholder="μ€ν¬λνλ λΈλ‘κ·Έ λ΄μ©μ΄ μ¬κΈ°μ νμλ©λλ€.")
|
280 |
with gr.Row():
|
281 |
-
remove_freq_checkbox = gr.Checkbox(label="λΉλμ1 μ κ±°", value=
|
|
|
|
|
|
|
|
|
282 |
direct_keyword_box = gr.Textbox(label="μ§μ ν€μλ μ
λ ₯ (μν° λλ ','λ‘ κ΅¬λΆ)", lines=2, placeholder="μ: ν€μλ1, ν€μλ2\nν€μλ3")
|
283 |
with gr.Row():
|
284 |
analyze_button = gr.Button("λΆμ μ€ν")
|
|
|
285 |
with gr.Row():
|
286 |
result_df = gr.Dataframe(label="ν΅ν© λΆμ κ²°κ³Ό (λ¨μ΄, λΉλμ, κ²μλ, λΈλ‘κ·Έλ¬Έμμ, μ§μ μ
λ ₯)", interactive=True)
|
287 |
with gr.Row():
|
@@ -289,7 +323,7 @@ with gr.Blocks(title="λ€μ΄λ² λΈλ‘κ·Έ ννμ λΆμ μ€νμ΄μ€", css=".
|
|
289 |
|
290 |
# μ΄λ²€νΈ μ°κ²°
|
291 |
scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
|
292 |
-
analyze_button.click(fn=
|
293 |
outputs=[result_df, excel_file])
|
294 |
|
295 |
if __name__ == "__main__":
|
|
|
221 |
debug_log("morphological_analysis_and_enrich ν¨μ μλ£")
|
222 |
return merged_df, merged_excel_path
|
223 |
|
224 |
+
# --- μ§μ ν€μλ λΆμ (λ¨λ
λΆμ) ---
|
225 |
+
def direct_keyword_analysis(text: str, keyword_input: str):
|
226 |
+
debug_log("direct_keyword_analysis ν¨μ μμ")
|
227 |
+
keywords = re.split(r'[\n,]+', keyword_input)
|
228 |
+
keywords = [kw.strip() for kw in keywords if kw.strip()]
|
229 |
+
debug_log(f"μ
λ ₯λ ν€μλ λͺ©λ‘: {keywords}")
|
230 |
+
results = []
|
231 |
+
for kw in keywords:
|
232 |
+
count = text.count(kw)
|
233 |
+
results.append((kw, count))
|
234 |
+
debug_log(f"ν€μλ '{kw}'μ λΉλμ: {count}")
|
235 |
+
df = pd.DataFrame(results, columns=["ν€μλ", "λΉλμ"])
|
236 |
+
excel_path = create_excel_file(df)
|
237 |
+
debug_log("direct_keyword_analysis ν¨μ μλ£")
|
238 |
+
return df, excel_path
|
239 |
+
|
240 |
+
# --- ν΅ν© λΆμ (ννμ λΆμ + μ§μ ν€μλ λΆμ) ---
|
241 |
def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
|
242 |
debug_log("combined_analysis ν¨μ μμ")
|
|
|
243 |
merged_df, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
|
|
|
244 |
if "μ§μ μ
λ ₯" not in merged_df.columns:
|
245 |
merged_df["μ§μ μ
λ ₯"] = ""
|
|
|
246 |
direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
|
247 |
direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
|
248 |
debug_log(f"μ
λ ₯λ μ§μ ν€μλ: {direct_keywords}")
|
|
|
275 |
debug_log("combined_analysis ν¨μ μλ£")
|
276 |
return merged_df, combined_excel
|
277 |
|
278 |
+
# --- λΆμ νΈλ€λ¬ ---
|
279 |
+
def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool):
|
280 |
+
debug_log("analysis_handler ν¨μ μμ")
|
281 |
+
if direct_keyword_only:
|
282 |
+
# μ§μ ν€μλ λΆμλ§ μν
|
283 |
+
return direct_keyword_analysis(blog_text, direct_keyword_input)
|
284 |
+
else:
|
285 |
+
# ν΅ν© λΆμ (ννμ λΆμ + μ§μ ν€μλ λΆμ)
|
286 |
+
return combined_analysis(blog_text, remove_freq1, direct_keyword_input)
|
287 |
+
|
288 |
# --- μ€ν¬λν μ€ν ---
|
289 |
def fetch_blog_content(url: str):
|
290 |
debug_log("fetch_blog_content ν¨μ μμ")
|
|
|
293 |
return content
|
294 |
|
295 |
# --- Gradio μΈν°νμ΄μ€ κ΅¬μ± ---
|
296 |
+
custom_css = """
|
297 |
+
.gradio-container { max-width: 960px; margin: auto; }
|
298 |
+
.centered-button-row { justify-content: center; }
|
299 |
+
"""
|
300 |
+
with gr.Blocks(title="λ€μ΄λ² λΈλ‘κ·Έ ννμ λΆμ μ€νμ΄μ€", css=custom_css) as demo:
|
301 |
gr.Markdown("# λ€μ΄λ² λΈλ‘κ·Έ ννμ λΆμ μ€νμ΄μ€")
|
302 |
+
# λΈλ‘κ·Έ λ§ν¬μ μ€ν¬λν μ€ν λ²νΌμ ν κ·Έλ£Ή λ΄μ λ°°μΉ (λ²νΌμ κ°μ΄λ° μ λ ¬)
|
303 |
+
with gr.Group():
|
304 |
blog_url_input = gr.Textbox(label="λ€μ΄λ² λΈλ‘κ·Έ λ§ν¬", placeholder="μ: https://blog.naver.com/ssboost/222983068507", lines=1)
|
305 |
+
with gr.Row(elem_classes="centered-button-row"):
|
306 |
+
scrape_button = gr.Button("μ€ν¬λν μ€ν")
|
307 |
with gr.Row():
|
308 |
blog_content_box = gr.Textbox(label="λΈλ‘κ·Έ λ΄μ© (μμ κ°λ₯)", lines=10, placeholder="μ€ν¬λνλ λΈλ‘κ·Έ λ΄μ©μ΄ μ¬κΈ°μ νμλ©λλ€.")
|
309 |
with gr.Row():
|
310 |
+
remove_freq_checkbox = gr.Checkbox(label="λΉλμ1 μ κ±°", value=True)
|
311 |
+
# "λΉλμ1 μ κ±°" μλμ "μ§μ ν€μλ μ
λ ₯λ§ λΆμ" μ ν νλͺ© μΆκ° (κΈ°λ³Έ λ―Έμ ν)
|
312 |
+
with gr.Row():
|
313 |
+
direct_keyword_only_checkbox = gr.Checkbox(label="μ§μ ν€μλ μ
λ ₯λ§ λΆμ", value=False)
|
314 |
+
with gr.Row():
|
315 |
direct_keyword_box = gr.Textbox(label="μ§μ ν€μλ μ
λ ₯ (μν° λλ ','λ‘ κ΅¬λΆ)", lines=2, placeholder="μ: ν€μλ1, ν€μλ2\nν€μλ3")
|
316 |
with gr.Row():
|
317 |
analyze_button = gr.Button("λΆμ μ€ν")
|
318 |
+
# κ²°κ³Ό ν
μ΄λΈμ νλ©΄ μ 체 νμ μ¬μ©νκ³ , Excel λ€μ΄λ‘λ λ²νΌμ κ·Έ μλ λ³λ νμ λ°°μΉ
|
319 |
with gr.Row():
|
320 |
result_df = gr.Dataframe(label="ν΅ν© λΆμ κ²°κ³Ό (λ¨μ΄, λΉλμ, κ²μλ, λΈλ‘κ·Έλ¬Έμμ, μ§μ μ
λ ₯)", interactive=True)
|
321 |
with gr.Row():
|
|
|
323 |
|
324 |
# μ΄λ²€νΈ μ°κ²°
|
325 |
scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
|
326 |
+
analyze_button.click(fn=analysis_handler, inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox],
|
327 |
outputs=[result_df, excel_file])
|
328 |
|
329 |
if __name__ == "__main__":
|