File size: 6,956 Bytes
817809c
 
85e469d
 
 
 
 
a63daec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85e469d
 
 
 
 
 
 
 
 
 
 
 
58b25bf
a63daec
85e469d
 
58b25bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85e469d
 
 
58b25bf
85e469d
 
 
 
 
 
 
 
58b25bf
85e469d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
817809c
85e469d
 
 
 
 
 
58b25bf
 
 
 
 
 
a63daec
85e469d
 
 
 
58b25bf
85e469d
58b25bf
85e469d
 
 
 
 
 
 
 
 
 
58b25bf
85e469d
 
 
 
 
 
 
 
 
 
 
 
 
 
a63daec
 
85e469d
a63daec
 
85e469d
a63daec
 
85e469d
 
 
 
a63daec
 
 
 
 
 
 
 
 
 
 
 
85e469d
817809c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import gradio as gr
import pandas as pd
import numpy as np
from search import search_images_by_text, get_similar_images
import requests
from io import BytesIO

def create_collection_url(row):
    base_url = "https://www.vogue.com/fashion-shows/"
    season = str(row["season"]).lower()
    year = str(row["year"])
    category = str(row["category"]).lower() if pd.notna(row["category"]) and row["category"] and str(row["category"]).lower() != "nan" else None
    designer = str(row["designer"]).lower().replace(" ", "-")
    
    # Add city if available
    city = str(row["city"]).lower().replace(" ", "-") if pd.notna(row["city"]) and row["city"] and str(row["city"]).lower() != "nan" else None
    
    if pd.isna(category) or category is None or category == "nan":
        if city:
            return f"{base_url}{city}-{season}-{year}/{designer}"
        else:
            return f"{base_url}{season}-{year}/{designer}"
    else:
        if city:
            return f"{base_url}{city}-{season}-{year}-{category}/{designer}"
        else:
            return f"{base_url}{season}-{year}-{category}/{designer}"

import requests
from io import BytesIO
#@st.cache_data(show_spinner="Loading FashionDB...")
def load_data_hf():
    # Load the Parquet file directly from Hugging Face
    df_url = "https://huggingface.co/datasets/traopia/vogue_runway_small/resolve/main/VogueRunway.parquet"
    df = pd.read_parquet(df_url)

    # Load the .npy file using requests
    npy_url = "https://huggingface.co/datasets/traopia/vogue_runway_small/resolve/main/VogueRunway_image.npy"
    response = requests.get(npy_url)
    response.raise_for_status()  # Raise error if download fails
    embeddings = np.load(BytesIO(response.content))
    df['collection'] = df.apply(create_collection_url, axis=1)
    return df, embeddings


from huggingface_hub import hf_hub_download
def load_data1():
# Login using e.g. `huggingface-cli login` to access this dataset
    path = hf_hub_download(
        repo_id="traopia/fashion_show_data_all_embeddings",
        filename="fashion_show_data_all_embeddings.json"
    )
    df = pd.read_json(path, lines = True)

    #df = pd.read_json("hf://datasets/traopia/fashion_show_data_all_embeddings.json/fashion_show_data_all_embeddings.json", lines=True)
    df["fashion_clip_image"] = df["fashion_clip_image"].apply(lambda x: x[0] if isinstance(x, list) else x)
    df["image_urls"] = df["image_urls"].apply(lambda x: x[0] if x is not None else None)
    df = df.rename(columns={"fashion_house":"designer", "image_urls":"url", "URL":"collection"})
    
    df = df.dropna(subset="fashion_clip_image")
    df = df.reset_index(drop=True)
    df["key"] = df.index
    embeddings = np.vstack(df["fashion_clip_image"].values)
    
    return df, embeddings

df, embeddings = load_data_hf()

# Filter and search
def filter_and_search(fashion_house, category, season, start_year, end_year, query):
    filtered = df.copy()

    if fashion_house:
        filtered = filtered[filtered['designer'].isin(fashion_house)]
    if category:
        filtered = filtered[filtered['category'].isin(category)]
    if season:
        filtered = filtered[filtered['season'].isin(season)]
    filtered = filtered[(filtered['year'] >= start_year) & (filtered['year'] <= end_year)]

    if query:
        results = search_images_by_text(query, filtered, embeddings)
    else:
        results = filtered.head(30)
    
    image_urls = results["url"].tolist()
    metadata = results.to_dict(orient="records")
    return image_urls, metadata

# Display metadata and similar
def show_metadata(idx, metadata):
    item = metadata[idx]
    out = ""
    for field in ["designer", "season", "year", "category"]:
        if field in item and pd.notna(item[field]):
            out += f"**{field.title()}**: {item[field]}\n"
    if 'collection' in item and pd.notna(item['collection']):
        out += f"\n[View Collection]({item['collection']})"
    return out

def find_similar(idx, metadata):
    key = metadata[idx]["key"]
    similar_df = get_similar_images(df, key, embeddings, top_k=5)
    return similar_df["url"].tolist(), similar_df.to_dict(orient="records")

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 👗 FashionDB Explorer")

    with gr.Row():
        fashion_house = gr.Dropdown(label="Fashion House", choices=sorted(df["designer"].dropna().unique()), multiselect=True)
        category = gr.Dropdown(label="Category", choices=sorted(df["category"].dropna().unique()), multiselect=True)
        season = gr.Dropdown(label="Season", choices=sorted(df["season"].dropna().unique()), multiselect=True)
        #year_range = gr.RangeSlider(label="Year Range", minimum=int(df['year'].min()), maximum=int(df['year'].max()), value=(2000, 2025), step=1)
        
        min_year = int(df['year'].min())
        max_year = int(df['year'].max())

        start_year = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
        end_year = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)

    query = gr.Textbox(label="Search", placeholder="e.g., pink dress")
    search_button = gr.Button("Search")
    
    result_gallery = gr.Gallery(label="Search Results", columns=5, height="auto")
    metadata_output = gr.Markdown()
    similar_gallery = gr.Gallery(label="Similar Images", columns = 5, height="auto")

    metadata_state = gr.State([])
    selected_idx = gr.Number(value=0, visible=False)

    def handle_search(*args):
        imgs, meta = filter_and_search(*args)
        return imgs, meta, "", []

    search_button.click(
        handle_search,
        inputs=[fashion_house, category, season, start_year, end_year, query],
        outputs=[result_gallery, metadata_state, metadata_output, similar_gallery]
    )

    def handle_click(evt: gr.SelectData, metadata):
        idx = evt.index
        md = show_metadata(idx, metadata)
        return idx, md

    result_gallery.select(
        handle_click,
        inputs=[metadata_state],
        outputs=[selected_idx, metadata_output]
    )

    # def show_similar(idx, metadata):
    #     return find_similar(int(idx), metadata)
    def show_similar(idx, metadata):
        similar_images, similar_metadata = find_similar(int(idx), metadata)
        return similar_images, similar_metadata

    similar_metadata_state = gr.State()
    similar_metadata_output = gr.Markdown()
    show_similar_button = gr.Button("Show Similar Images")
    show_similar_button.click(
        show_similar,
        inputs=[selected_idx, metadata_state],
        outputs=[similar_gallery, similar_metadata_state]
    )

    def handle_similar_click(evt: gr.SelectData, metadata):
        idx = evt.index
        md = show_metadata(idx, metadata)
        return idx, md
    
    similar_gallery.select(
    handle_similar_click,
    inputs=[similar_metadata_state],
    outputs=[selected_idx, similar_metadata_output]
    )

demo.launch()