Spaces:
Sleeping
Sleeping
traopia
commited on
Commit
·
9eafc14
1
Parent(s):
115de81
spacey
Browse files- src/visual_qa.py +13 -2
src/visual_qa.py
CHANGED
@@ -6,7 +6,7 @@ import torch
|
|
6 |
import os
|
7 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
8 |
|
9 |
-
|
10 |
import chromadb
|
11 |
|
12 |
from datetime import datetime
|
@@ -72,7 +72,18 @@ model = CLIPModel.from_pretrained(model_name).to(device)
|
|
72 |
processor = CLIPProcessor.from_pretrained(model_name)
|
73 |
|
74 |
def main_text_retrieve_images(text, result_query=None, n_retrieved=3):
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
df_emb = df_emb.drop_duplicates(subset='image_urls')
|
77 |
df_emb['fashion_clip_image'] = df_emb['fashion_clip_image'].apply(lambda x: x[0] if type(x) == list else None)
|
78 |
df_emb['image_url'] = df_emb['image_urls'].apply(lambda x: x[0] if x else None)
|
|
|
6 |
import os
|
7 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
8 |
|
9 |
+
from datasets import load_dataset
|
10 |
import chromadb
|
11 |
|
12 |
from datetime import datetime
|
|
|
72 |
processor = CLIPProcessor.from_pretrained(model_name)
|
73 |
|
74 |
def main_text_retrieve_images(text, result_query=None, n_retrieved=3):
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
# Load the dataset (no split specified, so the whole dataset)
|
79 |
+
dataset = load_dataset("traopia/fashion_show_data_all_embeddings.json")
|
80 |
+
# This returns a DatasetDict with splits as keys (usually 'train' by default).
|
81 |
+
# To get the whole dataset, you can access the first split like this:
|
82 |
+
split_name = list(dataset.keys())[0]
|
83 |
+
full_dataset = dataset[split_name]
|
84 |
+
|
85 |
+
# Convert to pandas DataFrame
|
86 |
+
df_emb = full_dataset.to_pandas()
|
87 |
df_emb = df_emb.drop_duplicates(subset='image_urls')
|
88 |
df_emb['fashion_clip_image'] = df_emb['fashion_clip_image'].apply(lambda x: x[0] if type(x) == list else None)
|
89 |
df_emb['image_url'] = df_emb['image_urls'].apply(lambda x: x[0] if x else None)
|