|
import gradio as gr |
|
import pandas as pd |
|
import re |
|
from io import BytesIO |
|
import tempfile |
|
|
|
def process_excel(file_bytes): |
|
""" |
|
μ
λ‘λλ μμ
νμΌμμ D4μ
λΆν° Dμ΄μ μνλͺ
μ μΆμΆνμ¬, |
|
κ° μ
μμ νΉμλ¬Έμλ₯Ό μ κ±°ν ν 곡백 κΈ°μ€μΌλ‘ ν€μλλ₯Ό μΆμΆν©λλ€. |
|
ν μ
λ΄μμ μ€λ³΅λ ν€μλλ ν λ²λ§ μΉ΄μ΄νΈνκ³ , μ 체 μ
μ λν΄ |
|
ν€μλμ λΉλλ₯Ό κ³μ°νμ¬ μμ
νμΌκ³Ό ν(λ°μ΄ν°νλ μ)λ‘ κ²°κ³Όλ₯Ό μΆλ ₯ν©λλ€. |
|
μλ¬ λ°μ μ, μλ¬ λ©μμ§λ₯Ό ν
μ€νΈ νμΌκ³Ό λ°μ΄ν°νλ μ ννλ‘ λ°νν©λλ€. |
|
""" |
|
|
|
try: |
|
df = pd.read_excel(BytesIO(file_bytes), header=None, engine="openpyxl") |
|
except Exception as e: |
|
error_message = "μμ
νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμμ΅λλ€: " + str(e) |
|
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb") |
|
temp_error.write(error_message.encode("utf-8")) |
|
temp_error.close() |
|
error_df = pd.DataFrame({"μλ¬": [error_message]}) |
|
return temp_error.name, error_df |
|
|
|
|
|
if df.shape[1] < 4 or df.shape[0] < 4: |
|
error_message = "μμ
νμΌμ νμμ΄ μ¬λ°λ₯΄μ§ μμ΅λλ€." |
|
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb") |
|
temp_error.write(error_message.encode("utf-8")) |
|
temp_error.close() |
|
error_df = pd.DataFrame({"μλ¬": [error_message]}) |
|
return temp_error.name, error_df |
|
|
|
|
|
product_names_series = df.iloc[3:, 3] |
|
product_names_series = product_names_series.dropna() |
|
|
|
keyword_counts = {} |
|
for cell in product_names_series: |
|
|
|
if not isinstance(cell, str): |
|
cell = str(cell) |
|
|
|
cleaned = re.sub(r'[^0-9a-zA-Zκ°-ν£\s]', '', cell) |
|
|
|
keywords = cleaned.split() |
|
|
|
unique_keywords = set(keywords) |
|
for keyword in unique_keywords: |
|
keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1 |
|
|
|
|
|
sorted_keywords = sorted(keyword_counts.items(), key=lambda x: (-x[1], x[0])) |
|
|
|
|
|
result_df = pd.DataFrame(sorted_keywords, columns=["ν€μλ", "λΉλ"]) |
|
|
|
|
|
output = BytesIO() |
|
try: |
|
with pd.ExcelWriter(output, engine="openpyxl") as writer: |
|
result_df.to_excel(writer, index=False, startrow=1, header=False) |
|
worksheet = writer.sheets["Sheet1"] |
|
worksheet.cell(row=1, column=1, value="ν€μλ") |
|
worksheet.cell(row=1, column=2, value="λΉλ") |
|
output.seek(0) |
|
except Exception as e: |
|
error_message = "μμ
νμΌμ μμ±νλ μ€ μ€λ₯κ° λ°μνμμ΅λλ€: " + str(e) |
|
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb") |
|
temp_error.write(error_message.encode("utf-8")) |
|
temp_error.close() |
|
error_df = pd.DataFrame({"μλ¬": [error_message]}) |
|
return temp_error.name, error_df |
|
|
|
|
|
temp_excel = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", mode="wb") |
|
temp_excel.write(output.getvalue()) |
|
temp_excel.close() |
|
|
|
return temp_excel.name, result_df |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_excel, |
|
inputs=gr.File(label="μμ
νμΌ μ
λ‘λ", type="binary"), |
|
outputs=[ |
|
gr.File(label="κ²°κ³Ό μμ
νμΌ"), |
|
gr.DataFrame(label="ν€μλ λΆμ ν") |
|
], |
|
title="μμ
μνλͺ
ν€μλ μΆμΆ λ° λΉλ λΆμ", |
|
description=( |
|
"μμ
νμΌμ D4μ
λΆν° Dμ΄μ μλ μνλͺ
λ°μ΄ν°λ₯Ό λΆμνμ¬, " |
|
"νΉμλ¬Έμλ₯Ό μ κ±°ν ν 곡백 κΈ°μ€μΌλ‘ ν€μλλ₯Ό μΆμΆν©λλ€. " |
|
"ν μ
λ΄μμ μ€λ³΅λ ν€μλλ ν λ²λ§ μΉ΄μ΄νΈνλ©°, μ΅μ’
μ μΌλ‘ ν€μλμ λΉλλ₯Ό " |
|
"λ΄λ¦Όμ°¨μμΌλ‘ μ 리ν κ²°κ³Όλ₯Ό μμ
νμΌκ³Ό ν(λ°μ΄ν°νλ μ)λ‘ μΆλ ₯ν©λλ€." |
|
) |
|
) |
|
|
|
iface.launch() |