File size: 5,221 Bytes
d2b9031 49e25d2 ff86828 d2b9031 6a1564b 5ffc072 6a1564b 5ffc072 fa41b98 5ffc072 4f2568a 5ffc072 4f2568a 5ffc072 4f2568a 6a1564b fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 fa41b98 5ffc072 7773ef1 5ffc072 31c7995 5ffc072 31c7995 5ffc072 31c7995 fa41b98 5ffc072 fa41b98 31c7995 5ffc072 31c7995 5ffc072 31c7995 fa41b98 5ffc072 fa41b98 72dd3ca fa41b98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
import pandas as pd
from io import BytesIO
def convert_file(input_file, conversion_type):
# ํ์ผ์ด ์
๋ก๋๋์๋์ง ํ์ธ
if input_file is None:
return None, "ํ์ผ์ ์
๋ก๋ํด ์ฃผ์ธ์."
# ํ์ผ ๋ด์ฉ ์ฝ๊ธฐ
try:
# ํ์ผ ๊ฐ์ฒด์์ ์ฝ๊ธฐ ์๋
file_bytes = input_file.read()
file_name = input_file.name
except AttributeError:
# AttributeError๊ฐ ๋ฐ์ํ๋ฉด input_file์ ํ์ผ ๊ฒฝ๋ก๋ก ์ฒ๋ฆฌ
file_name = input_file
with open(file_name, "rb") as f:
file_bytes = f.read()
file_extension = file_name.lower().split('.')[-1]
df = None
output_file = None
converted_format = None
try:
# ๋ณํ: CSV์์ Parquet์ผ๋ก
if conversion_type == "CSV to Parquet":
if file_extension != "csv":
return None, "CSV์์ Parquet์ผ๋ก ๋ณํํ๋ ค๋ฉด CSV ํ์ผ์ ์
๋ก๋ํด ์ฃผ์ธ์."
# ๋ค์ํ ์ธ์ฝ๋ฉ์ ์๋ (chardet ์์ด)
encodings_to_try = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
encoding = None
for enc in encodings_to_try:
try:
df = pd.read_csv(BytesIO(file_bytes), encoding=enc)
encoding = enc
break
except UnicodeDecodeError:
continue
except Exception as e:
return None, f"CSV ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}"
if df is None:
return None, "์ผ๋ฐ์ ์ธ ์ธ์ฝ๋ฉ์ผ๋ก CSV๋ฅผ ์ฝ์ง ๋ชปํ์ต๋๋ค. ํ์ผ์ด ํน์ดํ ์ธ์ฝ๋ฉ์ ์ฌ์ฉํ ์ ์์ต๋๋ค."
output_file = "output.parquet"
df.to_parquet(output_file, index=False)
converted_format = "Parquet"
# ๋ณํ: Parquet์์ CSV๋ก
elif conversion_type == "Parquet to CSV":
if file_extension != "parquet":
return None, "Parquet์์ CSV๋ก ๋ณํํ๋ ค๋ฉด Parquet ํ์ผ์ ์
๋ก๋ํด ์ฃผ์ธ์."
df = pd.read_parquet(BytesIO(file_bytes))
output_file = "output.csv"
df.to_csv(output_file, index=False, encoding='utf-8')
converted_format = "CSV"
else:
return None, "์๋ชป๋ ๋ณํ ์ ํ์ด ์ ํ๋์์ต๋๋ค."
# ์์ 10๊ฐ ํ์ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์์ฑ
preview = df.head(10).to_string(index=False)
info_message = (
f"์
๋ ฅ ํ์ผ: {file_name}\n"
f"๋ณํ๋ ํ์ผ ํ์: {converted_format}\n"
)
if conversion_type == "CSV to Parquet" and encoding:
info_message += f"์ฌ์ฉ๋ ์ธ์ฝ๋ฉ: {encoding}\n"
info_message += f"\n๋ฏธ๋ฆฌ๋ณด๊ธฐ (์์ 10๊ฐ ํ):\n{preview}"
return output_file, info_message
except Exception as e:
return None, f"๋ณํ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"
# ๋ชจ๋ํ๊ณ ์ธ๋ จ๋ ์คํ์ผ์ ์ํ ์ฌ์ฉ์ ์ ์ CSS
custom_css = """
body {
background-color: #f4f4f4;
font-family: 'Helvetica Neue', Arial, sans-serif;
}
.gradio-container {
max-width: 900px;
margin: 40px auto;
padding: 20px;
background-color: #ffffff;
border-radius: 12px;
box-shadow: 0 8px 16px rgba(0,0,0,0.1);
}
h1, h2 {
color: #333333;
}
.gradio-input, .gradio-output {
margin-bottom: 20px;
}
.gradio-button {
background-color: #4CAF50 !important;
color: white !important;
border: none !important;
padding: 10px 20px !important;
font-size: 16px !important;
border-radius: 6px !important;
cursor: pointer;
}
.gradio-button:hover {
background-color: #45a049 !important;
}
"""
with gr.Blocks(css=custom_css, title="CSV <-> Parquet ๋ณํ๊ธฐ") as demo:
gr.Markdown("# CSV <-> Parquet ๋ณํ๊ธฐ")
gr.Markdown("CSV ๋๋ Parquet ํ์ผ์ ์
๋ก๋ํ๊ณ ๋ณํ ์ ํ์ ์ ํํ์ธ์. ์ฑ์ ํ์ผ์ ๋ฐ๋ ํ์์ผ๋ก ๋ณํํ๊ณ ์์ 10๊ฐ ํ์ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ ํ์ํฉ๋๋ค.")
with gr.Row():
with gr.Column(scale=1):
input_file = gr.File(label="CSV ๋๋ Parquet ํ์ผ ์
๋ก๋")
with gr.Column(scale=1):
conversion_type = gr.Radio(
choices=["CSV to Parquet", "Parquet to CSV"],
label="๋ณํ ์ ํ",
value="CSV to Parquet" # ๊ธฐ๋ณธ๊ฐ ์ค์
)
convert_button = gr.Button("๋ณํ", elem_classes=["gradio-button"])
with gr.Row():
output_file = gr.File(label="๋ณํ๋ ํ์ผ")
preview = gr.Textbox(label="๋ฏธ๋ฆฌ๋ณด๊ธฐ (์์ 10๊ฐ ํ)", lines=15)
convert_button.click(fn=convert_file, inputs=[input_file, conversion_type], outputs=[output_file, preview])
gr.Markdown("""
### ์ฐธ๊ณ :
- ์ด ๋ณํ๊ธฐ๋ ์ผ๋ฐ์ ์ธ CSV ์ธ์ฝ๋ฉ(UTF-8, Latin-1, ISO-8859-1, CP1252)์ ์๋ํฉ๋๋ค
- Parquet ํ์ผ์ CSV๋ณด๋ค ๋ฐ์ดํฐ ํ์
์ ๋ ์ ๋ณด์กดํฉ๋๋ค
- ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ ๋ฐ์ดํฐ์ ์ฒ์ 10ํ๋ง ํ์ํฉ๋๋ค
""")
if __name__ == "__main__":
demo.launch() |