Spaces:

MagnaSC
/

ImgSearch

Running on Zero

App Files Files Community

AkinyemiAra commited on Jun 23

Commit

e1286f2

verified ·

1 Parent(s): 0856eb3

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -19

app.py CHANGED Viewed

@@ -24,30 +24,49 @@ def get_embedding(image: Image.Image, device="cpu"):
     # L2 normalize the embeddings
     emb = emb / emb.norm(p=2, dim=-1, keepdim=True)
     return emb
 def get_reference_embeddings():
     if os.path.exists(CACHE_FILE):
         with open(CACHE_FILE, "rb") as f:
-            return pickle.load(f)
-    embeddings = {}
-    # Use GPU for preprocessing reference images too for consistency
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    for img_path in DATASET_DIR.glob("*.jpg"):
-        img = Image.open(img_path).convert("RGB")
-        emb = get_embedding(img, device=device)
-        # Store on CPU to save GPU memory
-        embeddings[img_path.name] = emb.cpu()
-    with open(CACHE_FILE, "wb") as f:
-        pickle.dump(embeddings, f)
-    return embeddings
 reference_embeddings = get_reference_embeddings()
 @spaces.GPU
 def search_similar(query_img):
     query_emb = get_embedding(query_img, device="cuda")
     results = []
@@ -59,10 +78,21 @@ def search_similar(query_img):
         results.append((name, sim))
     results.sort(key=lambda x: x[1], reverse=True)
-    return [(f"dataset/{name}", f"Score: {score:.4f}") for name, score in results[:5]]
-@spaces.GPU
 def add_image(name: str, image):
     path = DATASET_DIR / f"{name}.jpg"
     image.save(path)
@@ -70,12 +100,13 @@ def add_image(name: str, image):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     emb = get_embedding(image, device=device)
-    # Store on CPU to save memory
     reference_embeddings[f"{name}.jpg"] = emb.cpu()
     with open(CACHE_FILE, "wb") as f:
         pickle.dump(reference_embeddings, f)
-    return f"Image {name} added to dataset."
 search_interface = gr.Interface(fn=search_similar,
                                 inputs=gr.Image(type="pil", label="Query Image"),
@@ -88,4 +119,4 @@ add_interface = gr.Interface(fn=add_image,
                              allow_flagging="never")
 demo = gr.TabbedInterface([search_interface, add_interface], tab_names=["Search", "Add Product"])
-demo.launch()

     # L2 normalize the embeddings
     emb = emb / emb.norm(p=2, dim=-1, keepdim=True)
     return emb
 def get_reference_embeddings():
+    # Get all current image files
+    current_images = set(img_path.name for img_path in DATASET_DIR.glob("*.jpg"))
+    # Load existing cache if it exists
+    cached_embeddings = {}
     if os.path.exists(CACHE_FILE):
         with open(CACHE_FILE, "rb") as f:
+            cached_embeddings = pickle.load(f)
+    # Check if cache is up to date
+    cached_images = set(cached_embeddings.keys())
+    # If cache is missing images or has extra images, rebuild
+    if current_images != cached_images:
+        print(f"Cache outdated. Current: {len(current_images)}, Cached: {len(cached_images)}")
+        embeddings = {}
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        for img_path in DATASET_DIR.glob("*.jpg"):
+            print(f"Processing {img_path.name}...")
+            img = Image.open(img_path).convert("RGB")
+            emb = get_embedding(img, device=device)
+            embeddings[img_path.name] = emb.cpu()
+        # Save updated cache
+        with open(CACHE_FILE, "wb") as f:
+            pickle.dump(embeddings, f)
+        print(f"Cache updated with {len(embeddings)} images")
+        return embeddings
+    else:
+        print(f"Using cached embeddings for {len(cached_embeddings)} images")
+        return cached_embeddings
 reference_embeddings = get_reference_embeddings()
 @spaces.GPU
 def search_similar(query_img):
+    # Refresh embeddings to catch any new images
+    global reference_embeddings
+    reference_embeddings = get_reference_embeddings()
     query_emb = get_embedding(query_img, device="cuda")
     results = []
         results.append((name, sim))
     results.sort(key=lambda x: x[1], reverse=True)
+    # Filter out low similarity results (adjust threshold as needed)
+    SIMILARITY_THRESHOLD = 0.2  # Only show results above 20% similarity
+    filtered_results = [(name, score) for name, score in results if score > SIMILARITY_THRESHOLD]
+    if not filtered_results:
+        return [("No similar images found", "No matches above similarity threshold")]
+    # Return top 5 results
+    return [(f"dataset/{name}", f"Score: {score:.4f}") for name, score in filtered_results[:5]]
 def add_image(name: str, image):
+    if not name.strip():
+        return "Please provide a valid image name."
     path = DATASET_DIR / f"{name}.jpg"
     image.save(path)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     emb = get_embedding(image, device=device)
+    # Add to current embeddings and save cache
     reference_embeddings[f"{name}.jpg"] = emb.cpu()
     with open(CACHE_FILE, "wb") as f:
         pickle.dump(reference_embeddings, f)
+    return f"Image '{name}' added to dataset. Total images: {len(reference_embeddings)}"
 search_interface = gr.Interface(fn=search_similar,
                                 inputs=gr.Image(type="pil", label="Query Image"),
                              allow_flagging="never")
 demo = gr.TabbedInterface([search_interface, add_interface], tab_names=["Search", "Add Product"])
+demo.launch()