traopia commited on
Commit
9eafc14
·
1 Parent(s): 115de81
Files changed (1) hide show
  1. src/visual_qa.py +13 -2
src/visual_qa.py CHANGED
@@ -6,7 +6,7 @@ import torch
6
  import os
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
-
10
  import chromadb
11
 
12
  from datetime import datetime
@@ -72,7 +72,18 @@ model = CLIPModel.from_pretrained(model_name).to(device)
72
  processor = CLIPProcessor.from_pretrained(model_name)
73
 
74
  def main_text_retrieve_images(text, result_query=None, n_retrieved=3):
75
- df_emb = pd.read_json("data/fashion_show_data_all_embeddings.json", lines=True)
 
 
 
 
 
 
 
 
 
 
 
76
  df_emb = df_emb.drop_duplicates(subset='image_urls')
77
  df_emb['fashion_clip_image'] = df_emb['fashion_clip_image'].apply(lambda x: x[0] if type(x) == list else None)
78
  df_emb['image_url'] = df_emb['image_urls'].apply(lambda x: x[0] if x else None)
 
6
  import os
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
+ from datasets import load_dataset
10
  import chromadb
11
 
12
  from datetime import datetime
 
72
  processor = CLIPProcessor.from_pretrained(model_name)
73
 
74
  def main_text_retrieve_images(text, result_query=None, n_retrieved=3):
75
+
76
+
77
+
78
+ # Load the dataset (no split specified, so the whole dataset)
79
+ dataset = load_dataset("traopia/fashion_show_data_all_embeddings.json")
80
+ # This returns a DatasetDict with splits as keys (usually 'train' by default).
81
+ # To get the whole dataset, you can access the first split like this:
82
+ split_name = list(dataset.keys())[0]
83
+ full_dataset = dataset[split_name]
84
+
85
+ # Convert to pandas DataFrame
86
+ df_emb = full_dataset.to_pandas()
87
  df_emb = df_emb.drop_duplicates(subset='image_urls')
88
  df_emb['fashion_clip_image'] = df_emb['fashion_clip_image'].apply(lambda x: x[0] if type(x) == list else None)
89
  df_emb['image_url'] = df_emb['image_urls'].apply(lambda x: x[0] if x else None)