Spaces:

Dheenaoo7
/

getitem

Sleeping

App Files Files Community

dheena commited on 22 days ago

Commit

e8ee9c0

1 Parent(s): 7c5bfda

initial commit

Browse files

Files changed (4) hide show

requirements.txt +18 -1
src/model.py +65 -0
src/segmentation.py +193 -0
src/streamlit_app.py +37 -38

requirements.txt CHANGED Viewed

@@ -1,3 +1,20 @@
 altair
 pandas
-streamlit

+# Existing packages
 altair
 pandas
+streamlit
+# New packages based on imports
+faiss-cpu                  # or faiss-gpu if you're using GPU
+torch                     # PyTorch
+ftfy                      # often required by CLIP
+git+https://github.com/openai/CLIP.git  # for CLIP from OpenAI
+openai                    # OpenAI API client
+numpy
+Pillow                    # PIL
+fastapi
+segmentation-models-pytorch  # Assuming "segmentation" is a model lib, adjust if needed
+# Additional NLP/ML utilities
+opencv-python             # cv2
+requests
+transformers              # HuggingFace Transformers

src/model.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import faiss
+import torch
+import clip
+from openai import OpenAI
+import numpy as np
+from PIL import Image
+from fastapi import FastAPI
+from typing import List
+import segmentation
+client = OpenAI()
+device = "cpu"
+model, preprocess = clip.load("ViT-B/32", device=device)
+def get_image_features(image: Image.Image) -> np.ndarray:
+    """Extract CLIP features from an image."""
+    image_input = preprocess(image).unsqueeze(0).to(device)
+    with torch.no_grad():
+        image_features = model.encode_image(image_input).float()
+    return image_features.cpu().numpy()
+# FAISS setup
+index = faiss.IndexFlatIP(512)
+meta_data_store = []
+def save_image_in_index(image_features: np.ndarray, metadata: dict):
+    """Normalize features and add to index."""
+    faiss.normalize_L2(image_features)
+    index.add(image_features)
+    meta_data_store.append(metadata)
+def process_image_embedding(image_url: str, labels=['clothes']) -> np.ndarray:
+    """Get feature embedding for a query image."""
+    search_image, search_detections = segmentation.grounded_segmentation(image=image_url, labels=labels)
+    cropped_image = segmentation.cut_image(search_image, search_detections[0].mask, search_detections[0].box)
+    # Convert to valid RGB
+    if cropped_image.dtype != np.uint8:
+        cropped_image = (cropped_image * 255).astype(np.uint8)
+    if cropped_image.ndim == 2:
+        cropped_image = np.stack([cropped_image] * 3, axis=-1)
+    pil_image = Image.fromarray(cropped_image)
+    return pil_image
+def get_top_k_results(image_url: str, k: int = 10) -> List[dict]:
+    """Find top-k similar images from the index."""
+    processed_image = process_image_embedding(image_url)
+    image_search_embedding = get_image_features(processed_image)
+    faiss.normalize_L2(image_search_embedding)
+    distances, indices = index.search(image_search_embedding.reshape(1, -1), k)
+    results = []
+    for i, dist in zip(indices[0], distances[0]):
+        if i < len(meta_data_store):
+            results.append({
+                'metadata': meta_data_store[i],
+                'score': float(dist)
+            })
+    return results

src/segmentation.py ADDED Viewed

	@@ -0,0 +1,193 @@

+from dataclasses import dataclass
+from typing import Any, List, Dict, Optional, Union, Tuple
+import os
+import cv2
+import torch
+import requests
+import numpy as np
+from PIL import Image
+from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
+# In[2]:
+@dataclass
+class BoundingBox:
+    xmin: int
+    ymin: int
+    xmax: int
+    ymax: int
+    @property
+    def xyxy(self) -> List[float]:
+        return [self.xmin, self.ymin, self.xmax, self.ymax]
+@dataclass
+class DetectionResult:
+    score: float
+    label: str
+    box: BoundingBox
+    mask: Optional[np.array] = None
+    @classmethod
+    def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
+        return cls(score=detection_dict['score'],
+                   label=detection_dict['label'],
+                   box=BoundingBox(xmin=detection_dict['box']['xmin'],
+                                   ymin=detection_dict['box']['ymin'],
+                                   xmax=detection_dict['box']['xmax'],
+                                   ymax=detection_dict['box']['ymax']))
+def mask_to_polygon(mask: np.ndarray) -> List[List[int]]:
+    # Find contours in the binary mask
+    contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # Find the contour with the largest area
+    largest_contour = max(contours, key=cv2.contourArea)
+    # Extract the vertices of the contour
+    polygon = largest_contour.reshape(-1, 2).tolist()
+    return polygon
+def polygon_to_mask(polygon: List[Tuple[int, int]], image_shape: Tuple[int, int]) -> np.ndarray:
+    """
+    Convert a polygon to a segmentation mask.
+    Args:
+    - polygon (list): List of (x, y) coordinates representing the vertices of the polygon.
+    - image_shape (tuple): Shape of the image (height, width) for the mask.
+    Returns:
+    - np.ndarray: Segmentation mask with the polygon filled.
+    """
+    # Create an empty mask
+    mask = np.zeros(image_shape, dtype=np.uint8)
+    # Convert polygon to an array of points
+    pts = np.array(polygon, dtype=np.int32)
+    # Fill the polygon with white color (255)
+    cv2.fillPoly(mask, [pts], color=(255,))
+    return mask
+def load_image(image_str: str) -> Image.Image:
+    if image_str.startswith("http"):
+        image = Image.open(requests.get(image_str, stream=True).raw).convert("RGB")
+    else:
+        image = Image.open(image_str).convert("RGB")
+    return image
+def get_boxes(results: DetectionResult) -> List[List[List[float]]]:
+    boxes = []
+    for result in results:
+        xyxy = result.box.xyxy
+        boxes.append(xyxy)
+    return [boxes]
+def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
+    masks = masks.cpu().float()
+    masks = masks.permute(0, 2, 3, 1)
+    masks = masks.mean(axis=-1)
+    masks = (masks > 0).int()
+    masks = masks.numpy().astype(np.uint8)
+    masks = list(masks)
+    if polygon_refinement:
+        for idx, mask in enumerate(masks):
+            shape = mask.shape
+            polygon = mask_to_polygon(mask)
+            mask = polygon_to_mask(polygon, shape)
+            masks[idx] = mask
+    return masks
+# In[6]:
+def detect(
+    image: Image.Image,
+    labels: List[str],
+    threshold: float = 0.3,
+    detector_id: Optional[str] = None
+) -> List[Dict[str, Any]]:
+    """
+    Use Grounding DINO to detect a set of labels in an image in a zero-shot fashion.
+    """
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    detector_id = detector_id if detector_id is not None else "IDEA-Research/grounding-dino-tiny"
+    object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device=device)
+    labels = [label if label.endswith(".") else label+"." for label in labels]
+    results = object_detector(image,  candidate_labels=labels, threshold=threshold)
+    results = [DetectionResult.from_dict(result) for result in results]
+    return results
+def segment(
+    image: Image.Image,
+    detection_results: List[Dict[str, Any]],
+    polygon_refinement: bool = False,
+    segmenter_id: Optional[str] = None
+) -> List[DetectionResult]:
+    """
+    Use Segment Anything (SAM) to generate masks given an image + a set of bounding boxes.
+    """
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    segmenter_id = segmenter_id if segmenter_id is not None else "facebook/sam-vit-base"
+    segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to(device)
+    processor = AutoProcessor.from_pretrained(segmenter_id)
+    boxes = get_boxes(detection_results)
+    inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to(device)
+    outputs = segmentator(**inputs)
+    masks = processor.post_process_masks(
+        masks=outputs.pred_masks,
+        original_sizes=inputs.original_sizes,
+        reshaped_input_sizes=inputs.reshaped_input_sizes
+    )[0]
+    masks = refine_masks(masks, polygon_refinement)
+    for detection_result, mask in zip(detection_results, masks):
+        detection_result.mask = mask
+    return detection_results
+def grounded_segmentation(
+    image: Union[Image.Image, str],
+    labels: List[str],
+    threshold: float = 0.3,
+    polygon_refinement: bool = False,
+    detector_id: Optional[str] = None,
+    segmenter_id: Optional[str] = None
+) -> Tuple[np.ndarray, List[DetectionResult]]:
+    if isinstance(image, str):
+        image = load_image(image)
+    detections = detect(image, labels, threshold, detector_id)
+    detections = segment(image, detections, polygon_refinement, segmenter_id)
+    return image, detections
+# In[7]:
+# save clipped images
+def cut_image(image, mask, box):
+    ny_image = np.array(image)
+    cut = cv2.bitwise_and(ny_image, ny_image, mask=mask.astype(np.uint8)*255)
+    x0, y0, x1, y1 = map(int, box.xyxy)
+    cropped = cut[y0:y1, x0:x1]
+    cropped_bgr = cv2.cvtColor(cropped, cv2.COLOR_RGB2BGR)
+    return cropped_bgr

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,39 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+from PIL import Image
+import model
+image_url_input = st.text_input("Enter the image URL:")
+k_value_input = st.number_input("Enter k_value:", min_value=1, value=5)
+if st.button("Get Results"):
+    results = model.get_top_k_results(image_url_input, int(k_value_input))
+    st.json({"results": [{"metadata": r["metadata"], "score": r["score"]} for r in results]})
+if 'metadata_inputs' not in st.session_state:
+    st.session_state['metadata_inputs'] = {}
+uploaded_files = st.file_uploader("Choose images...", type=["jpg", "jpeg", "png"], accept_multiple_files=True)
+if uploaded_files:
+    for uploaded_file in uploaded_files:
+        file_key = uploaded_file.name
+        image = Image.open(uploaded_file)
+        st.session_state['metadata_inputs'][file_key] = st.text_input(
+            f"Metadata for {uploaded_file.name}",
+            value=st.session_state['metadata_inputs'].get(file_key, ""),
+            key=f"metadata_{file_key}"
+        )
+    if st.button("Upload Images"):
+        for uploaded_file in uploaded_files:
+            metadata = st.session_state['metadata_inputs'][uploaded_file.name]
+            if metadata:
+                image = Image.open(uploaded_file)
+                cropped_image = model.process_image_embedding(image)
+                feature = model.get_image_features(cropped_image)
+                model.save_image_in_index(feature, metadata)
+        st.success("Images uploaded successfully.")