openfree's picture
Update app.py
5ffc072 verified
raw
history blame
5.22 kB
import gradio as gr
import pandas as pd
from io import BytesIO
def convert_file(input_file, conversion_type):
# ํŒŒ์ผ์ด ์—…๋กœ๋“œ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
if input_file is None:
return None, "ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”."
# ํŒŒ์ผ ๋‚ด์šฉ ์ฝ๊ธฐ
try:
# ํŒŒ์ผ ๊ฐ์ฒด์—์„œ ์ฝ๊ธฐ ์‹œ๋„
file_bytes = input_file.read()
file_name = input_file.name
except AttributeError:
# AttributeError๊ฐ€ ๋ฐœ์ƒํ•˜๋ฉด input_file์„ ํŒŒ์ผ ๊ฒฝ๋กœ๋กœ ์ฒ˜๋ฆฌ
file_name = input_file
with open(file_name, "rb") as f:
file_bytes = f.read()
file_extension = file_name.lower().split('.')[-1]
df = None
output_file = None
converted_format = None
try:
# ๋ณ€ํ™˜: CSV์—์„œ Parquet์œผ๋กœ
if conversion_type == "CSV to Parquet":
if file_extension != "csv":
return None, "CSV์—์„œ Parquet์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋ ค๋ฉด CSV ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”."
# ๋‹ค์–‘ํ•œ ์ธ์ฝ”๋”ฉ์„ ์‹œ๋„ (chardet ์—†์ด)
encodings_to_try = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
encoding = None
for enc in encodings_to_try:
try:
df = pd.read_csv(BytesIO(file_bytes), encoding=enc)
encoding = enc
break
except UnicodeDecodeError:
continue
except Exception as e:
return None, f"CSV ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}"
if df is None:
return None, "์ผ๋ฐ˜์ ์ธ ์ธ์ฝ”๋”ฉ์œผ๋กœ CSV๋ฅผ ์ฝ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ํŒŒ์ผ์ด ํŠน์ดํ•œ ์ธ์ฝ”๋”ฉ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
output_file = "output.parquet"
df.to_parquet(output_file, index=False)
converted_format = "Parquet"
# ๋ณ€ํ™˜: Parquet์—์„œ CSV๋กœ
elif conversion_type == "Parquet to CSV":
if file_extension != "parquet":
return None, "Parquet์—์„œ CSV๋กœ ๋ณ€ํ™˜ํ•˜๋ ค๋ฉด Parquet ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”."
df = pd.read_parquet(BytesIO(file_bytes))
output_file = "output.csv"
df.to_csv(output_file, index=False, encoding='utf-8')
converted_format = "CSV"
else:
return None, "์ž˜๋ชป๋œ ๋ณ€ํ™˜ ์œ ํ˜•์ด ์„ ํƒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
# ์ƒ์œ„ 10๊ฐœ ํ–‰์˜ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์ƒ์„ฑ
preview = df.head(10).to_string(index=False)
info_message = (
f"์ž…๋ ฅ ํŒŒ์ผ: {file_name}\n"
f"๋ณ€ํ™˜๋œ ํŒŒ์ผ ํ˜•์‹: {converted_format}\n"
)
if conversion_type == "CSV to Parquet" and encoding:
info_message += f"์‚ฌ์šฉ๋œ ์ธ์ฝ”๋”ฉ: {encoding}\n"
info_message += f"\n๋ฏธ๋ฆฌ๋ณด๊ธฐ (์ƒ์œ„ 10๊ฐœ ํ–‰):\n{preview}"
return output_file, info_message
except Exception as e:
return None, f"๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
# ๋ชจ๋˜ํ•˜๊ณ  ์„ธ๋ จ๋œ ์Šคํƒ€์ผ์„ ์œ„ํ•œ ์‚ฌ์šฉ์ž ์ •์˜ CSS
custom_css = """
body {
background-color: #f4f4f4;
font-family: 'Helvetica Neue', Arial, sans-serif;
}
.gradio-container {
max-width: 900px;
margin: 40px auto;
padding: 20px;
background-color: #ffffff;
border-radius: 12px;
box-shadow: 0 8px 16px rgba(0,0,0,0.1);
}
h1, h2 {
color: #333333;
}
.gradio-input, .gradio-output {
margin-bottom: 20px;
}
.gradio-button {
background-color: #4CAF50 !important;
color: white !important;
border: none !important;
padding: 10px 20px !important;
font-size: 16px !important;
border-radius: 6px !important;
cursor: pointer;
}
.gradio-button:hover {
background-color: #45a049 !important;
}
"""
with gr.Blocks(css=custom_css, title="CSV <-> Parquet ๋ณ€ํ™˜๊ธฐ") as demo:
gr.Markdown("# CSV <-> Parquet ๋ณ€ํ™˜๊ธฐ")
gr.Markdown("CSV ๋˜๋Š” Parquet ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๊ณ  ๋ณ€ํ™˜ ์œ ํ˜•์„ ์„ ํƒํ•˜์„ธ์š”. ์•ฑ์€ ํŒŒ์ผ์„ ๋ฐ˜๋Œ€ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜๊ณ  ์ƒ์œ„ 10๊ฐœ ํ–‰์˜ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
with gr.Row():
with gr.Column(scale=1):
input_file = gr.File(label="CSV ๋˜๋Š” Parquet ํŒŒ์ผ ์—…๋กœ๋“œ")
with gr.Column(scale=1):
conversion_type = gr.Radio(
choices=["CSV to Parquet", "Parquet to CSV"],
label="๋ณ€ํ™˜ ์œ ํ˜•",
value="CSV to Parquet" # ๊ธฐ๋ณธ๊ฐ’ ์„ค์ •
)
convert_button = gr.Button("๋ณ€ํ™˜", elem_classes=["gradio-button"])
with gr.Row():
output_file = gr.File(label="๋ณ€ํ™˜๋œ ํŒŒ์ผ")
preview = gr.Textbox(label="๋ฏธ๋ฆฌ๋ณด๊ธฐ (์ƒ์œ„ 10๊ฐœ ํ–‰)", lines=15)
convert_button.click(fn=convert_file, inputs=[input_file, conversion_type], outputs=[output_file, preview])
gr.Markdown("""
### ์ฐธ๊ณ :
- ์ด ๋ณ€ํ™˜๊ธฐ๋Š” ์ผ๋ฐ˜์ ์ธ CSV ์ธ์ฝ”๋”ฉ(UTF-8, Latin-1, ISO-8859-1, CP1252)์„ ์‹œ๋„ํ•ฉ๋‹ˆ๋‹ค
- Parquet ํŒŒ์ผ์€ CSV๋ณด๋‹ค ๋ฐ์ดํ„ฐ ํƒ€์ž…์„ ๋” ์ž˜ ๋ณด์กดํ•ฉ๋‹ˆ๋‹ค
- ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋Š” ๋ฐ์ดํ„ฐ์˜ ์ฒ˜์Œ 10ํ–‰๋งŒ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค
""")
if __name__ == "__main__":
demo.launch()