Spaces:

MagnaSC
/

ImgSearch

Running on Zero

App Files Files Community

AkinyemiAra commited on Jun 23

Commit

55bb1f4

verified ·

1 Parent(s): c0e2011

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -6

app.py CHANGED Viewed

@@ -1,3 +1,10 @@
 import gradio as gr
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
@@ -18,15 +25,35 @@ CACHE_FILE = "cache.pkl"
 # Define supported image formats
 IMAGE_EXTENSIONS = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.gif", "*.webp", "*.tiff", "*.tif"]
-def get_all_image_files():
-    """Get all image files from dataset directory"""
     image_files = []
     for ext in IMAGE_EXTENSIONS:
         image_files.extend(DATASET_DIR.glob(ext))
         image_files.extend(DATASET_DIR.glob(ext.upper()))  # Also check uppercase
     return image_files
-def get_embedding(image: Image.Image, device="cpu"):
     # Use CLIP's built-in preprocessing
     inputs = processor(images=image, return_tensors="pt").to(device)
     model_device = model.to(device)
@@ -37,7 +64,20 @@ def get_embedding(image: Image.Image, device="cpu"):
     return emb
 @spaces.GPU
-def get_reference_embeddings():
     # Get all current image files
     current_image_files = get_all_image_files()
     current_images = set(img_path.name for img_path in current_image_files)
@@ -79,7 +119,20 @@ def get_reference_embeddings():
 reference_embeddings = get_reference_embeddings()
 @spaces.GPU
-def search_similar(query_img):
     # Refresh embeddings to catch any new images
     global reference_embeddings
     reference_embeddings = get_reference_embeddings()
@@ -107,7 +160,22 @@ def search_similar(query_img):
     return [(f"dataset/{name}", f"Score: {score:.4f}") for name, score in filtered_results[:5]]
 @spaces.GPU
-def add_image(name: str, image):
     if not name.strip():
         return "Please provide a valid image name."

+"""
+CLIP Image Search Application
+A Gradio-based application for searching similar images using OpenAI's CLIP model.
+Supports multiple image formats and provides a web interface for uploading and searching images.
+"""
 import gradio as gr
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
 # Define supported image formats
 IMAGE_EXTENSIONS = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.gif", "*.webp", "*.tiff", "*.tif"]
+def get_all_image_files()  -> List[Path]:
+    """
+    Get all image files from the dataset directory.
+    Searches for images with supported extensions in both lowercase and uppercase.
+    Returns:
+        List[Path]: List of Path objects for all found image files
+    """
     image_files = []
     for ext in IMAGE_EXTENSIONS:
         image_files.extend(DATASET_DIR.glob(ext))
         image_files.extend(DATASET_DIR.glob(ext.upper()))  # Also check uppercase
     return image_files
+def get_embedding(image: Image.Image, device: str = "cpu") -> torch.Tensor:
+    """
+    Generate CLIP embedding for an image.
+    Args:
+        image (Image.Image): PIL Image object to process
+        device (str, optional): Device to run computation on. Defaults to "cpu".
+    Returns:
+        torch.Tensor: L2-normalized image embedding tensor
+    Raises:
+        RuntimeError: If CUDA is requested but not available
+    """
     # Use CLIP's built-in preprocessing
     inputs = processor(images=image, return_tensors="pt").to(device)
     model_device = model.to(device)
     return emb
 @spaces.GPU
+def get_reference_embeddings() -> Dict[str, torch.Tensor]:
+    """
+    Load or compute embeddings for all reference images in the dataset.
+    Checks if cached embeddings are up to date with the current dataset.
+    If not, recomputes embeddings for all images and updates the cache.
+    Returns:
+        Dict[str, torch.Tensor]: Dictionary mapping image filenames to their embeddings
+    Raises:
+        FileNotFoundError: If dataset directory doesn't exist
+        PermissionError: If unable to write cache file
+    """
     # Get all current image files
     current_image_files = get_all_image_files()
     current_images = set(img_path.name for img_path in current_image_files)
 reference_embeddings = get_reference_embeddings()
 @spaces.GPU
+def search_similar(query_img: Image.Image) -> List[Tuple[str, str]]:
+    """
+    Find similar images to the query image using CLIP embeddings.
+    Args:
+        query_img (Image.Image): Query image to find similar images for
+    Returns:
+        List[Tuple[str, str]]: List of tuples containing (image_path, similarity_score)
+                              Limited to top 5 results above similarity threshold
+    Raises:
+        RuntimeError: If CUDA operations fail
+    """
     # Refresh embeddings to catch any new images
     global reference_embeddings
     reference_embeddings = get_reference_embeddings()
     return [(f"dataset/{name}", f"Score: {score:.4f}") for name, score in filtered_results[:5]]
 @spaces.GPU
+def add_image(name: str, image: Image.Image) -> str:
+    """
+    Add a new image to the dataset and update embeddings.
+    Args:
+        name (str): Name for the new image (without extension)
+        image (Image.Image): PIL Image object to add to dataset
+    Returns:
+        str: Success message with total image count
+    Raises:
+        ValueError: If name is empty or invalid
+        PermissionError: If unable to save image or update cache
+        RuntimeError: If embedding computation fails
+    """
     if not name.strip():
         return "Please provide a valid image name."