File size: 5,221 Bytes
d2b9031
49e25d2
ff86828
d2b9031
6a1564b
5ffc072
6a1564b
5ffc072
fa41b98
5ffc072
4f2568a
5ffc072
4f2568a
 
 
5ffc072
4f2568a
 
 
 
6a1564b
 
 
 
fa41b98
 
5ffc072
fa41b98
 
5ffc072
 
 
 
 
fa41b98
5ffc072
 
 
 
 
 
 
 
 
fa41b98
5ffc072
 
fa41b98
 
 
 
 
5ffc072
fa41b98
 
5ffc072
fa41b98
 
 
 
 
 
5ffc072
fa41b98
5ffc072
fa41b98
 
5ffc072
 
fa41b98
5ffc072
 
fa41b98
5ffc072
fa41b98
 
 
 
5ffc072
7773ef1
5ffc072
31c7995
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ffc072
 
 
31c7995
 
 
5ffc072
31c7995
fa41b98
 
5ffc072
 
fa41b98
31c7995
5ffc072
31c7995
 
5ffc072
 
31c7995
 
fa41b98
 
5ffc072
 
 
 
fa41b98
72dd3ca
fa41b98
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import gradio as gr
import pandas as pd
from io import BytesIO

def convert_file(input_file, conversion_type):
    # ํŒŒ์ผ์ด ์—…๋กœ๋“œ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
    if input_file is None:
        return None, "ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”."
    
    # ํŒŒ์ผ ๋‚ด์šฉ ์ฝ๊ธฐ
    try:
        # ํŒŒ์ผ ๊ฐ์ฒด์—์„œ ์ฝ๊ธฐ ์‹œ๋„
        file_bytes = input_file.read()
        file_name = input_file.name
    except AttributeError:
        # AttributeError๊ฐ€ ๋ฐœ์ƒํ•˜๋ฉด input_file์„ ํŒŒ์ผ ๊ฒฝ๋กœ๋กœ ์ฒ˜๋ฆฌ
        file_name = input_file
        with open(file_name, "rb") as f:
            file_bytes = f.read()
    
    file_extension = file_name.lower().split('.')[-1]
    df = None
    output_file = None
    converted_format = None
    
    try:
        # ๋ณ€ํ™˜: CSV์—์„œ Parquet์œผ๋กœ
        if conversion_type == "CSV to Parquet":
            if file_extension != "csv":
                return None, "CSV์—์„œ Parquet์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋ ค๋ฉด CSV ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”."
            
            # ๋‹ค์–‘ํ•œ ์ธ์ฝ”๋”ฉ์„ ์‹œ๋„ (chardet ์—†์ด)
            encodings_to_try = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
            encoding = None
            
            for enc in encodings_to_try:
                try:
                    df = pd.read_csv(BytesIO(file_bytes), encoding=enc)
                    encoding = enc
                    break
                except UnicodeDecodeError:
                    continue
                except Exception as e:
                    return None, f"CSV ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}"
            
            if df is None:
                return None, "์ผ๋ฐ˜์ ์ธ ์ธ์ฝ”๋”ฉ์œผ๋กœ CSV๋ฅผ ์ฝ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ํŒŒ์ผ์ด ํŠน์ดํ•œ ์ธ์ฝ”๋”ฉ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
            
            output_file = "output.parquet"
            df.to_parquet(output_file, index=False)
            converted_format = "Parquet"
            
        # ๋ณ€ํ™˜: Parquet์—์„œ CSV๋กœ
        elif conversion_type == "Parquet to CSV":
            if file_extension != "parquet":
                return None, "Parquet์—์„œ CSV๋กœ ๋ณ€ํ™˜ํ•˜๋ ค๋ฉด Parquet ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”."
            
            df = pd.read_parquet(BytesIO(file_bytes))
            output_file = "output.csv"
            df.to_csv(output_file, index=False, encoding='utf-8')
            converted_format = "CSV"
        else:
            return None, "์ž˜๋ชป๋œ ๋ณ€ํ™˜ ์œ ํ˜•์ด ์„ ํƒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
        
        # ์ƒ์œ„ 10๊ฐœ ํ–‰์˜ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์ƒ์„ฑ
        preview = df.head(10).to_string(index=False)
        info_message = (
            f"์ž…๋ ฅ ํŒŒ์ผ: {file_name}\n"
            f"๋ณ€ํ™˜๋œ ํŒŒ์ผ ํ˜•์‹: {converted_format}\n"
        )
        if conversion_type == "CSV to Parquet" and encoding:
            info_message += f"์‚ฌ์šฉ๋œ ์ธ์ฝ”๋”ฉ: {encoding}\n"
        
        info_message += f"\n๋ฏธ๋ฆฌ๋ณด๊ธฐ (์ƒ์œ„ 10๊ฐœ ํ–‰):\n{preview}"
        
        return output_file, info_message
    
    except Exception as e:
        return None, f"๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"

# ๋ชจ๋˜ํ•˜๊ณ  ์„ธ๋ จ๋œ ์Šคํƒ€์ผ์„ ์œ„ํ•œ ์‚ฌ์šฉ์ž ์ •์˜ CSS
custom_css = """
body {
    background-color: #f4f4f4;
    font-family: 'Helvetica Neue', Arial, sans-serif;
}
.gradio-container {
    max-width: 900px;
    margin: 40px auto;
    padding: 20px;
    background-color: #ffffff;
    border-radius: 12px;
    box-shadow: 0 8px 16px rgba(0,0,0,0.1);
}
h1, h2 {
    color: #333333;
}
.gradio-input, .gradio-output {
    margin-bottom: 20px;
}
.gradio-button {
    background-color: #4CAF50 !important;
    color: white !important;
    border: none !important;
    padding: 10px 20px !important;
    font-size: 16px !important;
    border-radius: 6px !important;
    cursor: pointer;
}
.gradio-button:hover {
    background-color: #45a049 !important;
}
"""

with gr.Blocks(css=custom_css, title="CSV <-> Parquet ๋ณ€ํ™˜๊ธฐ") as demo:
    gr.Markdown("# CSV <-> Parquet ๋ณ€ํ™˜๊ธฐ")
    gr.Markdown("CSV ๋˜๋Š” Parquet ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๊ณ  ๋ณ€ํ™˜ ์œ ํ˜•์„ ์„ ํƒํ•˜์„ธ์š”. ์•ฑ์€ ํŒŒ์ผ์„ ๋ฐ˜๋Œ€ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜๊ณ  ์ƒ์œ„ 10๊ฐœ ํ–‰์˜ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_file = gr.File(label="CSV ๋˜๋Š” Parquet ํŒŒ์ผ ์—…๋กœ๋“œ")
        with gr.Column(scale=1):
            conversion_type = gr.Radio(
                choices=["CSV to Parquet", "Parquet to CSV"], 
                label="๋ณ€ํ™˜ ์œ ํ˜•",
                value="CSV to Parquet"  # ๊ธฐ๋ณธ๊ฐ’ ์„ค์ •
            )
    
    convert_button = gr.Button("๋ณ€ํ™˜", elem_classes=["gradio-button"])
    
    with gr.Row():
        output_file = gr.File(label="๋ณ€ํ™˜๋œ ํŒŒ์ผ")
        preview = gr.Textbox(label="๋ฏธ๋ฆฌ๋ณด๊ธฐ (์ƒ์œ„ 10๊ฐœ ํ–‰)", lines=15)
    
    convert_button.click(fn=convert_file, inputs=[input_file, conversion_type], outputs=[output_file, preview])
    
    gr.Markdown("""
    ### ์ฐธ๊ณ :
    - ์ด ๋ณ€ํ™˜๊ธฐ๋Š” ์ผ๋ฐ˜์ ์ธ CSV ์ธ์ฝ”๋”ฉ(UTF-8, Latin-1, ISO-8859-1, CP1252)์„ ์‹œ๋„ํ•ฉ๋‹ˆ๋‹ค
    - Parquet ํŒŒ์ผ์€ CSV๋ณด๋‹ค ๋ฐ์ดํ„ฐ ํƒ€์ž…์„ ๋” ์ž˜ ๋ณด์กดํ•ฉ๋‹ˆ๋‹ค
    - ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋Š” ๋ฐ์ดํ„ฐ์˜ ์ฒ˜์Œ 10ํ–‰๋งŒ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค
    """)

if __name__ == "__main__":
    demo.launch()