|
import gradio as gr |
|
import pandas as pd |
|
from io import BytesIO |
|
|
|
def convert_file(input_file, conversion_type): |
|
|
|
if input_file is None: |
|
return None, "ํ์ผ์ ์
๋ก๋ํด ์ฃผ์ธ์." |
|
|
|
|
|
try: |
|
|
|
file_bytes = input_file.read() |
|
file_name = input_file.name |
|
except AttributeError: |
|
|
|
file_name = input_file |
|
with open(file_name, "rb") as f: |
|
file_bytes = f.read() |
|
|
|
file_extension = file_name.lower().split('.')[-1] |
|
df = None |
|
output_file = None |
|
converted_format = None |
|
|
|
try: |
|
|
|
if conversion_type == "CSV to Parquet": |
|
if file_extension != "csv": |
|
return None, "CSV์์ Parquet์ผ๋ก ๋ณํํ๋ ค๋ฉด CSV ํ์ผ์ ์
๋ก๋ํด ์ฃผ์ธ์." |
|
|
|
|
|
encodings_to_try = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252'] |
|
encoding = None |
|
|
|
for enc in encodings_to_try: |
|
try: |
|
df = pd.read_csv(BytesIO(file_bytes), encoding=enc) |
|
encoding = enc |
|
break |
|
except UnicodeDecodeError: |
|
continue |
|
except Exception as e: |
|
return None, f"CSV ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}" |
|
|
|
if df is None: |
|
return None, "์ผ๋ฐ์ ์ธ ์ธ์ฝ๋ฉ์ผ๋ก CSV๋ฅผ ์ฝ์ง ๋ชปํ์ต๋๋ค. ํ์ผ์ด ํน์ดํ ์ธ์ฝ๋ฉ์ ์ฌ์ฉํ ์ ์์ต๋๋ค." |
|
|
|
output_file = "output.parquet" |
|
df.to_parquet(output_file, index=False) |
|
converted_format = "Parquet" |
|
|
|
|
|
elif conversion_type == "Parquet to CSV": |
|
if file_extension != "parquet": |
|
return None, "Parquet์์ CSV๋ก ๋ณํํ๋ ค๋ฉด Parquet ํ์ผ์ ์
๋ก๋ํด ์ฃผ์ธ์." |
|
|
|
df = pd.read_parquet(BytesIO(file_bytes)) |
|
output_file = "output.csv" |
|
df.to_csv(output_file, index=False, encoding='utf-8') |
|
converted_format = "CSV" |
|
else: |
|
return None, "์๋ชป๋ ๋ณํ ์ ํ์ด ์ ํ๋์์ต๋๋ค." |
|
|
|
|
|
preview = df.head(10).to_string(index=False) |
|
info_message = ( |
|
f"์
๋ ฅ ํ์ผ: {file_name}\n" |
|
f"๋ณํ๋ ํ์ผ ํ์: {converted_format}\n" |
|
) |
|
if conversion_type == "CSV to Parquet" and encoding: |
|
info_message += f"์ฌ์ฉ๋ ์ธ์ฝ๋ฉ: {encoding}\n" |
|
|
|
info_message += f"\n๋ฏธ๋ฆฌ๋ณด๊ธฐ (์์ 10๊ฐ ํ):\n{preview}" |
|
|
|
return output_file, info_message |
|
|
|
except Exception as e: |
|
return None, f"๋ณํ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
|
|
custom_css = """ |
|
body { |
|
background-color: #f4f4f4; |
|
font-family: 'Helvetica Neue', Arial, sans-serif; |
|
} |
|
.gradio-container { |
|
max-width: 900px; |
|
margin: 40px auto; |
|
padding: 20px; |
|
background-color: #ffffff; |
|
border-radius: 12px; |
|
box-shadow: 0 8px 16px rgba(0,0,0,0.1); |
|
} |
|
h1, h2 { |
|
color: #333333; |
|
} |
|
.gradio-input, .gradio-output { |
|
margin-bottom: 20px; |
|
} |
|
.gradio-button { |
|
background-color: #4CAF50 !important; |
|
color: white !important; |
|
border: none !important; |
|
padding: 10px 20px !important; |
|
font-size: 16px !important; |
|
border-radius: 6px !important; |
|
cursor: pointer; |
|
} |
|
.gradio-button:hover { |
|
background-color: #45a049 !important; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=custom_css, title="CSV <-> Parquet ๋ณํ๊ธฐ") as demo: |
|
gr.Markdown("# CSV <-> Parquet ๋ณํ๊ธฐ") |
|
gr.Markdown("CSV ๋๋ Parquet ํ์ผ์ ์
๋ก๋ํ๊ณ ๋ณํ ์ ํ์ ์ ํํ์ธ์. ์ฑ์ ํ์ผ์ ๋ฐ๋ ํ์์ผ๋ก ๋ณํํ๊ณ ์์ 10๊ฐ ํ์ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ ํ์ํฉ๋๋ค.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
input_file = gr.File(label="CSV ๋๋ Parquet ํ์ผ ์
๋ก๋") |
|
with gr.Column(scale=1): |
|
conversion_type = gr.Radio( |
|
choices=["CSV to Parquet", "Parquet to CSV"], |
|
label="๋ณํ ์ ํ", |
|
value="CSV to Parquet" |
|
) |
|
|
|
convert_button = gr.Button("๋ณํ", elem_classes=["gradio-button"]) |
|
|
|
with gr.Row(): |
|
output_file = gr.File(label="๋ณํ๋ ํ์ผ") |
|
preview = gr.Textbox(label="๋ฏธ๋ฆฌ๋ณด๊ธฐ (์์ 10๊ฐ ํ)", lines=15) |
|
|
|
convert_button.click(fn=convert_file, inputs=[input_file, conversion_type], outputs=[output_file, preview]) |
|
|
|
gr.Markdown(""" |
|
### ์ฐธ๊ณ : |
|
- ์ด ๋ณํ๊ธฐ๋ ์ผ๋ฐ์ ์ธ CSV ์ธ์ฝ๋ฉ(UTF-8, Latin-1, ISO-8859-1, CP1252)์ ์๋ํฉ๋๋ค |
|
- Parquet ํ์ผ์ CSV๋ณด๋ค ๋ฐ์ดํฐ ํ์
์ ๋ ์ ๋ณด์กดํฉ๋๋ค |
|
- ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ ๋ฐ์ดํฐ์ ์ฒ์ 10ํ๋ง ํ์ํฉ๋๋ค |
|
""") |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |