Spaces:
Running
Running
traopia
commited on
Commit
·
e9b2c9e
1
Parent(s):
c57012a
queries and other fizes
Browse files- app_fashionDB.py +116 -9
- example_queries.py +28 -0
- search_fashionDB.py +44 -12
app_fashionDB.py
CHANGED
@@ -4,16 +4,18 @@ import numpy as np
|
|
4 |
from search_fashionDB import search_images_by_text, get_similar_images, search_images_by_image
|
5 |
import requests
|
6 |
from io import BytesIO
|
|
|
7 |
|
8 |
import requests
|
9 |
from io import BytesIO
|
10 |
-
|
11 |
|
12 |
#@st.cache_data(show_spinner="Loading FashionDB...")
|
13 |
def load_data_hf():
|
14 |
# Load the Parquet file directly from Hugging Face
|
15 |
df_url = "https://huggingface.co/datasets/traopia/FashionDB/resolve/main/data_vogue_final.parquet"
|
16 |
df = pd.read_parquet(df_url)
|
|
|
17 |
df = df.explode("image_urls_sample")
|
18 |
df = df.rename(columns={"image_urls_sample":"url", "URL":"collection"})
|
19 |
|
@@ -39,8 +41,10 @@ df, df_fh, df_designers, embeddings, embeddings_urls = load_data_hf()
|
|
39 |
# Suppose embeddings is a numpy array (N, D) and embeddings_urls is a list of urls/keys
|
40 |
embedding_map = {url: i for i, url in enumerate(embeddings_urls)}
|
41 |
|
|
|
42 |
# Filter and search
|
43 |
-
def filter_and_search(fashion_house, designer, category, season, start_year, end_year, query
|
|
|
44 |
filtered = df.copy()
|
45 |
|
46 |
if fashion_house:
|
@@ -54,6 +58,30 @@ def filter_and_search(fashion_house, designer, category, season, start_year, end
|
|
54 |
filtered = filtered[filtered['season'].isin(season)]
|
55 |
filtered = filtered[(filtered['year'] >= start_year) & (filtered['year'] <= end_year)]
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
if query:
|
58 |
image_urls, metadata = search_images_by_text(query, filtered, embeddings, embeddings_urls)
|
59 |
else:
|
@@ -104,6 +132,20 @@ with gr.Blocks() as demo:
|
|
104 |
start_year = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
|
105 |
end_year = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
query = gr.Textbox(label="Search by text", placeholder="e.g., pink dress")
|
108 |
search_button = gr.Button("Search")
|
109 |
|
@@ -115,13 +157,13 @@ with gr.Blocks() as demo:
|
|
115 |
metadata_state = gr.State([])
|
116 |
selected_idx = gr.Number(value=0, visible=False)
|
117 |
|
118 |
-
def handle_search(fh, dis, cat, sea, sy, ey, q):
|
119 |
-
imgs, meta = filter_and_search(fh, dis, cat, sea, sy, ey, q)
|
120 |
return imgs, meta, "", [], None
|
121 |
|
122 |
search_button.click(
|
123 |
handle_search,
|
124 |
-
inputs=[fashion_house, designer, category, season, start_year, end_year, query],
|
125 |
outputs=[result_gallery, metadata_state, metadata_output, similar_gallery, reference_image]
|
126 |
)
|
127 |
|
@@ -174,6 +216,14 @@ with gr.Blocks() as demo:
|
|
174 |
start_year_img = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
|
175 |
end_year_img = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
uploaded_image = gr.Image(label="Upload an image", type="pil")
|
178 |
search_by_image_button = gr.Button("Search by Image")
|
179 |
|
@@ -182,23 +232,46 @@ with gr.Blocks() as demo:
|
|
182 |
uploaded_metadata_output = gr.Markdown()
|
183 |
uploaded_reference_image = gr.Image(label="Reference Image", interactive=False)
|
184 |
|
185 |
-
def handle_search_by_image(image, fh, dis, cat, sea, sy, ey):
|
186 |
if image is None:
|
187 |
return [], "Please upload an image first.", None
|
188 |
# Apply filters
|
189 |
filtered_df = df.copy()
|
190 |
if fh: filtered_df = filtered_df[filtered_df["fashion_house"].isin(fh)]
|
191 |
-
if dis: filtered_df = filtered_df[filtered_df["designer_name"].isin(
|
192 |
if cat: filtered_df = filtered_df[filtered_df["category"].isin(cat)]
|
193 |
if sea: filtered_df = filtered_df[filtered_df["season"].isin(sea)]
|
194 |
filtered_df = filtered_df[(filtered_df["year"] >= sy) & (filtered_df["year"] <= ey)]
|
195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
images, metadata = search_images_by_image(image, filtered_df, embeddings, embeddings_urls)
|
197 |
return images, metadata, ""
|
198 |
|
199 |
search_by_image_button.click(
|
200 |
handle_search_by_image,
|
201 |
-
inputs=[uploaded_image, fashion_house_img, designer_img, category_img, season_img, start_year_img, end_year_img],
|
202 |
outputs=[uploaded_result_gallery, uploaded_metadata_state, uploaded_metadata_output]
|
203 |
)
|
204 |
|
@@ -257,12 +330,46 @@ with gr.Blocks() as demo:
|
|
257 |
)
|
258 |
|
259 |
with gr.Tab("Query on FashionDB"):
|
260 |
-
|
|
|
|
|
261 |
gr.Markdown(
|
262 |
"### 🔗 Query FashionDB SPARQL Endpoint\n"
|
263 |
"[Click here to open the SPARQL endpoint](https://fashionwiki.wikibase.cloud/query/)",
|
264 |
elem_id="sparql-link"
|
265 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
back_button = gr.Button("Back to Home")
|
268 |
|
|
|
4 |
from search_fashionDB import search_images_by_text, get_similar_images, search_images_by_image
|
5 |
import requests
|
6 |
from io import BytesIO
|
7 |
+
import urllib.parse
|
8 |
|
9 |
import requests
|
10 |
from io import BytesIO
|
11 |
+
from example_queries import EXAMPLE_QUERIES
|
12 |
|
13 |
#@st.cache_data(show_spinner="Loading FashionDB...")
|
14 |
def load_data_hf():
|
15 |
# Load the Parquet file directly from Hugging Face
|
16 |
df_url = "https://huggingface.co/datasets/traopia/FashionDB/resolve/main/data_vogue_final.parquet"
|
17 |
df = pd.read_parquet(df_url)
|
18 |
+
df = df.drop_duplicates(subset=["URL"])
|
19 |
df = df.explode("image_urls_sample")
|
20 |
df = df.rename(columns={"image_urls_sample":"url", "URL":"collection"})
|
21 |
|
|
|
41 |
# Suppose embeddings is a numpy array (N, D) and embeddings_urls is a list of urls/keys
|
42 |
embedding_map = {url: i for i, url in enumerate(embeddings_urls)}
|
43 |
|
44 |
+
|
45 |
# Filter and search
|
46 |
+
def filter_and_search(fashion_house, designer, category, season, start_year, end_year, query,
|
47 |
+
fh_country, fh_city, designer_nationality, designer_birth_year_start, designer_birth_year_end):
|
48 |
filtered = df.copy()
|
49 |
|
50 |
if fashion_house:
|
|
|
58 |
filtered = filtered[filtered['season'].isin(season)]
|
59 |
filtered = filtered[(filtered['year'] >= start_year) & (filtered['year'] <= end_year)]
|
60 |
|
61 |
+
# Fashion house filters via df_fh (country, city)
|
62 |
+
if (fh_country and len(fh_country) > 0) or (fh_city and len(fh_city) > 0):
|
63 |
+
fh_cols = [c for c in ['fashion_house', 'country', 'city'] if c in df_fh.columns]
|
64 |
+
if 'fashion_house' in fh_cols:
|
65 |
+
merged = filtered.merge(df_fh[fh_cols], on='fashion_house', how='left')
|
66 |
+
if fh_country and 'country' in merged.columns:
|
67 |
+
merged = merged[merged['country'].isin(fh_country)]
|
68 |
+
if fh_city and 'city' in merged.columns:
|
69 |
+
merged = merged[merged['city'].isin(fh_city)]
|
70 |
+
filtered = merged.drop_duplicates(subset=['url'])
|
71 |
+
|
72 |
+
# Designer filters via df_designers (nationality, year_birth)
|
73 |
+
if (designer_nationality and len(designer_nationality) > 0) or (designer_birth_year_start is not None or designer_birth_year_end is not None):
|
74 |
+
des_cols = [c for c in ['designer_name', 'nationality', 'year_birth'] if c in df_designers.columns]
|
75 |
+
if 'designer_name' in des_cols:
|
76 |
+
merged = filtered.merge(df_designers[des_cols], on='designer_name', how='left')
|
77 |
+
if designer_nationality and 'nationality' in merged.columns:
|
78 |
+
merged = merged[merged['nationality'].isin(designer_nationality)]
|
79 |
+
if (designer_birth_year_start is not None or designer_birth_year_end is not None) and 'year_birth' in merged.columns:
|
80 |
+
by_start = designer_birth_year_start if designer_birth_year_start is not None else merged['year_birth'].min()
|
81 |
+
by_end = designer_birth_year_end if designer_birth_year_end is not None else merged['year_birth'].max()
|
82 |
+
merged = merged[(merged['year_birth'] >= by_start) & (merged['year_birth'] <= by_end)]
|
83 |
+
filtered = merged.drop_duplicates(subset=['url'])
|
84 |
+
|
85 |
if query:
|
86 |
image_urls, metadata = search_images_by_text(query, filtered, embeddings, embeddings_urls)
|
87 |
else:
|
|
|
132 |
start_year = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
|
133 |
end_year = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
|
134 |
|
135 |
+
# Additional filters banner for Fashion House and Designer metadata
|
136 |
+
with gr.Row():
|
137 |
+
fh_countries = sorted(df_fh['country'].dropna().unique()) if 'country' in df_fh.columns else []
|
138 |
+
fh_cities = sorted(df_fh['city'].dropna().unique()) if 'city' in df_fh.columns else []
|
139 |
+
designer_places = sorted(df_designers['nationality'].dropna().unique()) if 'nationality' in df_designers.columns else []
|
140 |
+
birth_year_min = int(df_designers['year_birth'].min()) if 'year_birth' in df_designers.columns else 1900
|
141 |
+
birth_year_max = int(df_designers['year_birth'].max()) if 'year_birth' in df_designers.columns else 2024
|
142 |
+
|
143 |
+
fh_country = gr.Dropdown(label="Country of Fashion House", choices=fh_countries, multiselect=True)
|
144 |
+
fh_city = gr.Dropdown(label="HQ of Fashion House", choices=fh_cities, multiselect=True)
|
145 |
+
designer_nationality = gr.Dropdown(label="Designer Nationality", choices=designer_places, multiselect=True)
|
146 |
+
designer_birth_year_start = gr.Slider(minimum=birth_year_min, maximum=birth_year_max, value=birth_year_min, step=1, label="Designer Birth Year Start")
|
147 |
+
designer_birth_year_end = gr.Slider(minimum=birth_year_min, maximum=birth_year_max, value=birth_year_max, step=1, label="Designer Birth Year End")
|
148 |
+
|
149 |
query = gr.Textbox(label="Search by text", placeholder="e.g., pink dress")
|
150 |
search_button = gr.Button("Search")
|
151 |
|
|
|
157 |
metadata_state = gr.State([])
|
158 |
selected_idx = gr.Number(value=0, visible=False)
|
159 |
|
160 |
+
def handle_search(fh, dis, cat, sea, sy, ey, q, fh_co, fh_ci, d_pob, d_by_start, d_by_end):
|
161 |
+
imgs, meta = filter_and_search(fh, dis, cat, sea, sy, ey, q, fh_co, fh_ci, d_pob, d_by_start, d_by_end)
|
162 |
return imgs, meta, "", [], None
|
163 |
|
164 |
search_button.click(
|
165 |
handle_search,
|
166 |
+
inputs=[fashion_house, designer, category, season, start_year, end_year, query, fh_country, fh_city, designer_nationality, designer_birth_year_start, designer_birth_year_end],
|
167 |
outputs=[result_gallery, metadata_state, metadata_output, similar_gallery, reference_image]
|
168 |
)
|
169 |
|
|
|
216 |
start_year_img = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
|
217 |
end_year_img = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
|
218 |
|
219 |
+
# Additional banner for FH/Designer filters in image search
|
220 |
+
with gr.Row():
|
221 |
+
fh_country_img = gr.Dropdown(label="Country of Fashion House", choices=fh_countries, multiselect=True)
|
222 |
+
fh_city_img = gr.Dropdown(label="HQ of Fashion House", choices=fh_cities, multiselect=True)
|
223 |
+
designer_nationality_img = gr.Dropdown(label="Designer Nationality", choices=designer_places, multiselect=True)
|
224 |
+
designer_birth_year_start_img = gr.Slider(minimum=birth_year_min, maximum=birth_year_max, value=birth_year_min, step=1, label="Designer Birth Year Start")
|
225 |
+
designer_birth_year_end_img = gr.Slider(minimum=birth_year_min, maximum=birth_year_max, value=birth_year_max, step=1, label="Designer Birth Year End")
|
226 |
+
|
227 |
uploaded_image = gr.Image(label="Upload an image", type="pil")
|
228 |
search_by_image_button = gr.Button("Search by Image")
|
229 |
|
|
|
232 |
uploaded_metadata_output = gr.Markdown()
|
233 |
uploaded_reference_image = gr.Image(label="Reference Image", interactive=False)
|
234 |
|
235 |
+
def handle_search_by_image(image, fh, dis, cat, sea, sy, ey, fh_co, fh_ci, d_pob, d_by_start, d_by_end):
|
236 |
if image is None:
|
237 |
return [], "Please upload an image first.", None
|
238 |
# Apply filters
|
239 |
filtered_df = df.copy()
|
240 |
if fh: filtered_df = filtered_df[filtered_df["fashion_house"].isin(fh)]
|
241 |
+
if dis: filtered_df = filtered_df[filtered_df["designer_name"].isin(dis)]
|
242 |
if cat: filtered_df = filtered_df[filtered_df["category"].isin(cat)]
|
243 |
if sea: filtered_df = filtered_df[filtered_df["season"].isin(sea)]
|
244 |
filtered_df = filtered_df[(filtered_df["year"] >= sy) & (filtered_df["year"] <= ey)]
|
245 |
|
246 |
+
# FH/Designer metadata filters via joins
|
247 |
+
if (fh_co and len(fh_co) > 0) or (fh_ci and len(fh_ci) > 0):
|
248 |
+
fh_cols = [c for c in ['fashion_house', 'country', 'city'] if c in df_fh.columns]
|
249 |
+
if 'fashion_house' in fh_cols:
|
250 |
+
merged = filtered_df.merge(df_fh[fh_cols], on='fashion_house', how='left')
|
251 |
+
if fh_co and 'country' in merged.columns:
|
252 |
+
merged = merged[merged['country'].isin(fh_co)]
|
253 |
+
if fh_ci and 'city' in merged.columns:
|
254 |
+
merged = merged[merged['city'].isin(fh_ci)]
|
255 |
+
filtered_df = merged.drop_duplicates(subset=['url'])
|
256 |
+
|
257 |
+
if (d_pob and len(d_pob) > 0) or (d_by_start is not None or d_by_end is not None):
|
258 |
+
des_cols = [c for c in ['designer_name', 'nationality', 'year_birth'] if c in df_designers.columns]
|
259 |
+
if 'designer_name' in des_cols:
|
260 |
+
merged = filtered_df.merge(df_designers[des_cols], on='designer_name', how='left')
|
261 |
+
if d_pob and 'nationality' in merged.columns:
|
262 |
+
merged = merged[merged['nationality'].isin(d_pob)]
|
263 |
+
if (d_by_start is not None or d_by_end is not None) and 'year_birth' in merged.columns:
|
264 |
+
by_start = d_by_start if d_by_start is not None else merged['year_birth'].min()
|
265 |
+
by_end = d_by_end if d_by_end is not None else merged['year_birth'].max()
|
266 |
+
merged = merged[(merged['year_birth'] >= by_start) & (merged['year_birth'] <= by_end)]
|
267 |
+
filtered_df = merged.drop_duplicates(subset=['url'])
|
268 |
+
|
269 |
images, metadata = search_images_by_image(image, filtered_df, embeddings, embeddings_urls)
|
270 |
return images, metadata, ""
|
271 |
|
272 |
search_by_image_button.click(
|
273 |
handle_search_by_image,
|
274 |
+
inputs=[uploaded_image, fashion_house_img, designer_img, category_img, season_img, start_year_img, end_year_img, fh_country_img, fh_city_img, designer_nationality_img, designer_birth_year_start_img, designer_birth_year_end_img],
|
275 |
outputs=[uploaded_result_gallery, uploaded_metadata_state, uploaded_metadata_output]
|
276 |
)
|
277 |
|
|
|
330 |
)
|
331 |
|
332 |
with gr.Tab("Query on FashionDB"):
|
333 |
+
|
334 |
+
# Front-page SPARQL query UI and examples
|
335 |
+
with gr.Accordion("Query FashionDB (SPARQL)", open=True):
|
336 |
gr.Markdown(
|
337 |
"### 🔗 Query FashionDB SPARQL Endpoint\n"
|
338 |
"[Click here to open the SPARQL endpoint](https://fashionwiki.wikibase.cloud/query/)",
|
339 |
elem_id="sparql-link"
|
340 |
)
|
341 |
+
with gr.Row():
|
342 |
+
example_dropdown = gr.Dropdown(label="Example SPARQL Queries", choices=list(EXAMPLE_QUERIES.keys()))
|
343 |
+
query_text = gr.Textbox(label="SPARQL Query", lines=10)
|
344 |
+
open_link_md = gr.Markdown()
|
345 |
+
|
346 |
+
def on_example_change(example_key):
|
347 |
+
if not example_key or example_key not in EXAMPLE_QUERIES:
|
348 |
+
return "", ""
|
349 |
+
q = EXAMPLE_QUERIES[example_key].strip()
|
350 |
+
encoded = urllib.parse.quote(q)
|
351 |
+
link = f"[Open in SPARQL Editor](https://fashionwiki.wikibase.cloud/query/#query={encoded})"
|
352 |
+
return q, link
|
353 |
+
|
354 |
+
example_dropdown.change(
|
355 |
+
on_example_change,
|
356 |
+
inputs=[example_dropdown],
|
357 |
+
outputs=[query_text, open_link_md]
|
358 |
+
)
|
359 |
+
|
360 |
+
def on_query_change(q):
|
361 |
+
q = (q or "").strip()
|
362 |
+
if not q:
|
363 |
+
return ""
|
364 |
+
encoded = urllib.parse.quote(q)
|
365 |
+
return f"[Open in SPARQL Editor](https://fashionwiki.wikibase.cloud/query/#query={encoded})"
|
366 |
+
|
367 |
+
query_text.change(
|
368 |
+
on_query_change,
|
369 |
+
inputs=[query_text],
|
370 |
+
outputs=[open_link_md]
|
371 |
+
)
|
372 |
+
|
373 |
|
374 |
back_button = gr.Button("Back to Home")
|
375 |
|
example_queries.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Example SPARQL queries for FashionDB
|
2 |
+
EXAMPLE_QUERIES = {
|
3 |
+
# "All fashion houses with country and city": (
|
4 |
+
# """
|
5 |
+
# PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
6 |
+
# PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
7 |
+
# PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
8 |
+
# PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
9 |
+
# PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
10 |
+
# PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
11 |
+
|
12 |
+
# SELECT ?fashion_house ?fashion_houseLabel ?countryLabel ?cityLabel WHERE {
|
13 |
+
# ?fashion_house wbt:P31 wb:Q783794; # instance of fashion house (example)
|
14 |
+
# wbt:P17 ?country. # country
|
15 |
+
# OPTIONAL { ?fashion_house wbt:P131 ?city. } # located in the administrative territorial entity
|
16 |
+
# SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
17 |
+
# }
|
18 |
+
# LIMIT 50
|
19 |
+
# """
|
20 |
+
# ),
|
21 |
+
"which designer were born in 1969": (
|
22 |
+
"PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>\nPREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>\nPREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/> \nPREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/> \nPREFIX p: <https://fashionwiki.wikibase.cloud/prop/> \nPREFIX prov: <http://www.w3.org/ns/prov#> \nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n\nSELECT ?designerLabel ?birthdate WHERE {\n ?designer wbt:P3 ?birthdate .\n FILTER (YEAR(?birthdate) = 1969)\n\n SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". }\n}"
|
23 |
+
),
|
24 |
+
|
25 |
+
"Which designers studied at Central Saint Martins?": (
|
26 |
+
"PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>\nPREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>\nPREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/> \nPREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>\nPREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/> \nPREFIX p: <https://fashionwiki.wikibase.cloud/prop/> \nPREFIX prov: <http://www.w3.org/ns/prov#> \n\nSELECT ?fashion_designerLabel (SAMPLE(?reference_URL) AS ?reference_URL) {\n # Restrict to designers who are instances of fashion designer (Q5)\n\n ?fashion_designer wbt:P2 wb:Q5.\n ?fashion_designer wbt:P9 ?educated_at.\n ?educated_at rdfs:label 'Central Saint Martins'@en . \n\n\n # Retrieve references from the statement\n OPTIONAL {\n ?statement prov:wasDerivedFrom ?reference.\n ?reference pr:P24 ?reference_URL.\n }\n\n # Retrieve labels for the fashion designer\n SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". } \n} \nGROUP BY ?fashion_designerLabel \nORDER BY ?fashion_designerLabel"
|
27 |
+
),
|
28 |
+
}
|
search_fashionDB.py
CHANGED
@@ -80,10 +80,17 @@ def search_images_by_image(uploaded_image, df, embeddings,embeddings_urls, top_
|
|
80 |
sims = cosine_similarity([image_emb], embeddings)[0]
|
81 |
top_indices = np.argsort(sims)[::-1][:top_k]
|
82 |
top_urls = [embeddings_urls[i] for i in top_indices]
|
83 |
-
metadata
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
return top_urls,
|
87 |
|
88 |
|
89 |
|
@@ -97,16 +104,41 @@ def search_images_by_text(text, df, embeddings, embeddings_urls, top_k=30):
|
|
97 |
with torch.no_grad():
|
98 |
text_emb = model.get_text_features(**inputs).cpu().numpy()
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
sims = cosine_similarity(text_emb, embeddings_filtered)[0]
|
104 |
-
sims = np.asarray(sims).flatten()
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
def get_similar_images(df, image_key, embeddings, embedding_map, embeddings_urls, top_k=5):
|
112 |
if image_key not in embedding_map:
|
|
|
80 |
sims = cosine_similarity([image_emb], embeddings)[0]
|
81 |
top_indices = np.argsort(sims)[::-1][:top_k]
|
82 |
top_urls = [embeddings_urls[i] for i in top_indices]
|
83 |
+
# Build metadata in the same order as top_urls
|
84 |
+
df_subset = df[df["url"].isin(top_urls)].copy()
|
85 |
+
records = df_subset.to_dict(orient="records")
|
86 |
+
by_url = {}
|
87 |
+
for r in records:
|
88 |
+
u = r.get("url")
|
89 |
+
if u is not None and u not in by_url:
|
90 |
+
by_url[u] = r
|
91 |
+
ordered_metadata = [by_url[u] for u in top_urls if u in by_url]
|
92 |
|
93 |
+
return top_urls, ordered_metadata
|
94 |
|
95 |
|
96 |
|
|
|
104 |
with torch.no_grad():
|
105 |
text_emb = model.get_text_features(**inputs).cpu().numpy()
|
106 |
|
107 |
+
# Build URL -> index map once per call
|
108 |
+
url_to_index = {str(url): idx for idx, url in enumerate(embeddings_urls)}
|
109 |
+
# Collect indices of embeddings corresponding to filtered df URLs
|
110 |
+
filtered_urls = df["url"].astype(str).tolist()
|
111 |
+
filtered_indices = [url_to_index[u] for u in filtered_urls if u in url_to_index]
|
112 |
+
|
113 |
+
if not filtered_indices:
|
114 |
+
return [], []
|
115 |
+
|
116 |
+
embeddings_filtered = embeddings[filtered_indices]
|
117 |
sims = cosine_similarity(text_emb, embeddings_filtered)[0]
|
118 |
+
sims = np.asarray(sims).flatten()
|
119 |
+
|
120 |
+
# Rank within the filtered set
|
121 |
+
top_indices_local = np.argsort(sims)[::-1][:top_k]
|
122 |
+
# Map local ranks back to URLs in the same order, dedupe while preserving order
|
123 |
+
ranked_urls = [embeddings_urls[filtered_indices[i]] for i in top_indices_local]
|
124 |
+
seen = set()
|
125 |
+
top_urls = []
|
126 |
+
for u in ranked_urls:
|
127 |
+
if u not in seen:
|
128 |
+
seen.add(u)
|
129 |
+
top_urls.append(u)
|
130 |
+
|
131 |
+
# Build metadata in the same order as top_urls
|
132 |
+
df_subset = df[df["url"].isin(top_urls)].copy()
|
133 |
+
records = df_subset.to_dict(orient="records")
|
134 |
+
by_url = {}
|
135 |
+
for r in records:
|
136 |
+
u = r.get("url")
|
137 |
+
if u is not None and u not in by_url:
|
138 |
+
by_url[u] = r
|
139 |
+
ordered_metadata = [by_url[u] for u in top_urls if u in by_url]
|
140 |
+
|
141 |
+
return top_urls, ordered_metadata
|
142 |
|
143 |
def get_similar_images(df, image_key, embeddings, embedding_map, embeddings_urls, top_k=5):
|
144 |
if image_key not in embedding_map:
|