File size: 4,673 Bytes
da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 1f57f2c da3ce18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
import pandas as pd
import re
from io import BytesIO
import tempfile
def process_excel(file_bytes):
"""
μ
λ‘λλ μμ
νμΌμμ D4μ
λΆν° Dμ΄μ μνλͺ
μ μΆμΆνμ¬,
κ° μ
μμ νΉμλ¬Έμλ₯Ό μ κ±°ν ν 곡백 κΈ°μ€μΌλ‘ ν€μλλ₯Ό μΆμΆν©λλ€.
ν μ
λ΄μμ μ€λ³΅λ ν€μλλ ν λ²λ§ μΉ΄μ΄νΈνκ³ , μ 체 μ
μ λν΄
ν€μλμ λΉλλ₯Ό κ³μ°νμ¬ μμ
νμΌκ³Ό ν(λ°μ΄ν°νλ μ)λ‘ κ²°κ³Όλ₯Ό μΆλ ₯ν©λλ€.
μλ¬ λ°μ μ, μλ¬ λ©μμ§λ₯Ό ν
μ€νΈ νμΌκ³Ό λ°μ΄ν°νλ μ ννλ‘ λ°νν©λλ€.
"""
# μμ
νμΌ μ½κΈ°
try:
df = pd.read_excel(BytesIO(file_bytes), header=None, engine="openpyxl")
except Exception as e:
error_message = "μμ
νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμμ΅λλ€: " + str(e)
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
temp_error.write(error_message.encode("utf-8"))
temp_error.close()
error_df = pd.DataFrame({"μλ¬": [error_message]})
return temp_error.name, error_df
# μμ
νμΌ νμ μ²΄ν¬ (μ΅μ 4μ΄, μ΅μ 4ν)
if df.shape[1] < 4 or df.shape[0] < 4:
error_message = "μμ
νμΌμ νμμ΄ μ¬λ°λ₯΄μ§ μμ΅λλ€."
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
temp_error.write(error_message.encode("utf-8"))
temp_error.close()
error_df = pd.DataFrame({"μλ¬": [error_message]})
return temp_error.name, error_df
# Dμ΄(4λ²μ§Έ μ΄, μΈλ±μ€ 3)μμ 4ν(μΈλ±μ€ 3)λΆν° λ°μ΄ν°λ₯Ό κ°μ Έμ΄
product_names_series = df.iloc[3:, 3]
product_names_series = product_names_series.dropna()
keyword_counts = {}
for cell in product_names_series:
# λ¬Έμμ΄μ΄ μλ κ²½μ° λ¬Έμμ΄λ‘ λ³ν
if not isinstance(cell, str):
cell = str(cell)
# νΉμλ¬Έμ μ κ±° (μ«μ, μλ¬Έ, νκΈ, 곡백μ μ μΈν λ¬Έμ μ κ±°)
cleaned = re.sub(r'[^0-9a-zA-Zκ°-ν£\s]', '', cell)
# 곡백 κΈ°μ€μΌλ‘ ν€μλ λΆλ¦¬
keywords = cleaned.split()
# νλμ μ
λ΄ μ€λ³΅ ν€μλλ μ κ±°
unique_keywords = set(keywords)
for keyword in unique_keywords:
keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
# ν€μλλ₯Ό λΉλ λ΄λ¦Όμ°¨μ(λΉλκ° κ°μΌλ©΄ ν€μλ μ€λ¦μ°¨μ)μΌλ‘ μ λ ¬
sorted_keywords = sorted(keyword_counts.items(), key=lambda x: (-x[1], x[0]))
# κ²°κ³Όλ₯Ό DataFrameμΌλ‘ μμ± (A1: "ν€μλ", B1: "λΉλ")
result_df = pd.DataFrame(sorted_keywords, columns=["ν€μλ", "λΉλ"])
# μμ
νμΌ μμ± (A1, B1 μ
μ ν€λ μΆκ°)
output = BytesIO()
try:
with pd.ExcelWriter(output, engine="openpyxl") as writer:
result_df.to_excel(writer, index=False, startrow=1, header=False)
worksheet = writer.sheets["Sheet1"]
worksheet.cell(row=1, column=1, value="ν€μλ")
worksheet.cell(row=1, column=2, value="λΉλ")
output.seek(0)
except Exception as e:
error_message = "μμ
νμΌμ μμ±νλ μ€ μ€λ₯κ° λ°μνμμ΅λλ€: " + str(e)
temp_error = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="wb")
temp_error.write(error_message.encode("utf-8"))
temp_error.close()
error_df = pd.DataFrame({"μλ¬": [error_message]})
return temp_error.name, error_df
# μμ μμ
νμΌ μμ± ν κ²½λ‘ λ°ν
temp_excel = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", mode="wb")
temp_excel.write(output.getvalue())
temp_excel.close()
return temp_excel.name, result_df
# κ·ΈλΌλμ€ μΈν°νμ΄μ€ μμ± (μμ
νμΌκ³Ό κ²°κ³Ό ν λ κ°μ§ ννλ‘ μΆλ ₯)
iface = gr.Interface(
fn=process_excel,
inputs=gr.File(label="μμ
νμΌ μ
λ‘λ", type="binary"),
outputs=[
gr.File(label="κ²°κ³Ό μμ
νμΌ"),
gr.DataFrame(label="ν€μλ λΆμ ν")
],
title="μμ
μνλͺ
ν€μλ μΆμΆ λ° λΉλ λΆμ",
description=(
"μμ
νμΌμ D4μ
λΆν° Dμ΄μ μλ μνλͺ
λ°μ΄ν°λ₯Ό λΆμνμ¬, "
"νΉμλ¬Έμλ₯Ό μ κ±°ν ν 곡백 κΈ°μ€μΌλ‘ ν€μλλ₯Ό μΆμΆν©λλ€. "
"ν μ
λ΄μμ μ€λ³΅λ ν€μλλ ν λ²λ§ μΉ΄μ΄νΈνλ©°, μ΅μ’
μ μΌλ‘ ν€μλμ λΉλλ₯Ό "
"λ΄λ¦Όμ°¨μμΌλ‘ μ 리ν κ²°κ³Όλ₯Ό μμ
νμΌκ³Ό ν(λ°μ΄ν°νλ μ)λ‘ μΆλ ₯ν©λλ€."
)
)
iface.launch() |