File size: 3,179 Bytes
5fb95d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import torch
import open_clip
import gradio as gr
from datasets import load_dataset
from torchvision import transforms
from PIL import Image
import numpy as np

# Load the dataset (fashion product images dataset)
dataset = load_dataset("ceyda/fashion-products-small", split="train")

# Load CLIP model with correct unpacking and QuickGELU
model = open_clip.create_model("ViT-B-32-quickgelu", pretrained="openai")

# Corrected image transform function
preprocess = open_clip.image_transform(model.visual.image_size, is_train=False)

# Load tokenizer
tokenizer = open_clip.get_tokenizer("ViT-B-32")

# Move model to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Function to compute image embeddings
def get_image_embedding(image):
    image = preprocess(image).unsqueeze(0).to(device)
    with torch.no_grad():
        image_features = model.encode_image(image)
    return image_features / image_features.norm(dim=-1, keepdim=True)

# Function to compute text embeddings
def get_text_embedding(text):
    text_inputs = tokenizer([text]).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_inputs)
    return text_features / text_features.norm(dim=-1, keepdim=True)

# Precompute embeddings for all images in the dataset
image_embeddings = []
image_paths = []
for item in dataset.select(range(1000)):  # Limit to 100 images for speed
    image = item["image"]
    image_embeddings.append(get_image_embedding(image))
    image_paths.append(image)

# Stack all embeddings into a tensor
image_embeddings = torch.cat(image_embeddings, dim=0)

# Function to search for similar images based on text
def search_similar_image(query_text):
    text_embedding = get_text_embedding(query_text)
    similarities = (image_embeddings @ text_embedding.T).squeeze(1).cpu().numpy()
    
    # Get top 20 matches
    best_match_idxs = np.argsort(similarities)[-20:][::-1]

    return [image_paths[i] for i in best_match_idxs]

# Function to search for similar images based on an uploaded image
def search_similar_by_image(uploaded_image):
    query_embedding = get_image_embedding(uploaded_image)
    similarities = (image_embeddings @ query_embedding.T).squeeze(1).cpu().numpy()

    # Get top 20 matches
    best_match_idxs = np.argsort(similarities)[-20:][::-1]

    return [image_paths[i] for i in best_match_idxs]

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🛍️ Visual Search for Fashion Products")
    gr.Markdown("Search using **text** or **upload an image** to find similar items.")

    with gr.Row():
        query_input = gr.Textbox(label="Search by Text", placeholder="e.g., red sneakers")
        search_button = gr.Button("Search by Text")

    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload an Image")
        image_search_button = gr.Button("Search by Image")

    output_gallery = gr.Gallery(label="Similar Items", columns=4, height=500)

    search_button.click(search_similar_image, inputs=[query_input], outputs=[output_gallery])
    image_search_button.click(search_similar_by_image, inputs=[image_input], outputs=[output_gallery])

demo.launch(share=True)