File size: 1,467 Bytes
d518747
 
 
 
 
 
 
 
b53bd23
d518747
 
 
b53bd23
 
 
 
 
 
 
d518747
 
b53bd23
d518747
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b53bd23
 
d518747
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from PIL import Image
import io
import base64

def image_to_parquet(files):
    # List to store image data
    image_data = []

    for file_info in files:
        # Read image
        with open(file_info.name, "rb") as image_file:
            img = Image.open(image_file)
            buffered = io.BytesIO()
            img.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
        
        # Store image data and name
        image_data.append({"name": file_info.orig_name, "data": img_str})
    
    # Create DataFrame
    df = pd.DataFrame(image_data)
    
    # Convert DataFrame to PyArrow Table
    table = pa.Table.from_pandas(df)
    
    # Save table as Parquet file
    parquet_buffer = io.BytesIO()
    pq.write_table(table, parquet_buffer)
    
    # Return Parquet file
    parquet_buffer.seek(0)
    return parquet_buffer

def download_parquet(file):
    return file

# Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        image_input = gr.File(label="Upload Images", type="filepath", file_count="multiple", file_types=["image"])
        download_button = gr.File(label="Download Parquet File", interactive=False)
        
    convert_button = gr.Button("Convert to Parquet")
    
    convert_button.click(fn=image_to_parquet, inputs=[image_input], outputs=[download_button])

demo.launch()