Spaces:

aymnsk
/

InSightLIP

Running

App Files Files Community

aymnsk commited on Jul 20

Commit

cb59386

verified ·

1 Parent(s): 548ae0d

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +2 -0
README (2).md +13 -0
app (2).py +75 -0
baklava.jpg +3 -0
cat (1).jpg +3 -0
requirements (1).txt +7 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+baklava.jpg filter=lfs diff=lfs merge=lfs -text
+cat[[:space:]](1).jpg filter=lfs diff=lfs merge=lfs -text

README (2).md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Compare Siglip1 Siglip2
+emoji: 🚀
+colorFrom: red
+colorTo: gray
+sdk: gradio
+sdk_version: 5.16.1
+app_file: app.py
+pinned: false
+short_description: Compare SigLIP1 and SigLIP2 on zero shot classification
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app (2).py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""This space is taken and modified from https://huggingface.co/spaces/merve/compare_clip_siglip"""
+import torch
+from transformers import AutoModel, AutoProcessor
+import gradio as gr
+################################################################################
+# Load the models
+################################################################################
+sg1_ckpt = "google/siglip-so400m-patch14-384"
+siglip1_model = AutoModel.from_pretrained(sg1_ckpt, device_map="cpu").eval()
+siglip1_processor = AutoProcessor.from_pretrained(sg1_ckpt)
+sg2_ckpt = "google/siglip2-so400m-patch14-384"
+siglip2_model = AutoModel.from_pretrained(sg2_ckpt, device_map="cpu").eval()
+siglip2_processor = AutoProcessor.from_pretrained(sg2_ckpt)
+################################################################################
+# Utilities
+################################################################################
+def postprocess_siglip(sg1_probs, sg2_probs, labels):
+    sg1_output = {labels[i]: sg1_probs[0][i] for i in range(len(labels))}
+    sg2_output = {labels[i]: sg2_probs[0][i] for i in range(len(labels))}
+    return sg1_output, sg2_output
+def siglip_detector(image, texts):
+    sg1_inputs = siglip1_processor(
+        text=texts, images=image, return_tensors="pt", padding="max_length", max_length=64
+    ).to("cpu")
+    sg2_inputs = siglip2_processor(
+        text=texts, images=image, return_tensors="pt", padding="max_length", max_length=64
+    ).to("cpu")
+    with torch.no_grad():
+        sg1_outputs = siglip1_model(**sg1_inputs)
+        sg2_outputs = siglip2_model(**sg2_inputs)
+        sg1_logits_per_image = sg1_outputs.logits_per_image
+        sg2_logits_per_image = sg2_outputs.logits_per_image
+        sg1_probs = torch.sigmoid(sg1_logits_per_image)
+        sg2_probs = torch.sigmoid(sg2_logits_per_image)
+    return sg1_probs, sg2_probs
+def infer(image, candidate_labels):
+    candidate_labels = [label.lstrip(" ") for label in candidate_labels.split(",")]
+    sg1_probs, sg2_probs = siglip_detector(image, candidate_labels)
+    return postprocess_siglip(sg1_probs, sg2_probs, labels=candidate_labels)
+with gr.Blocks() as demo:
+    gr.Markdown("# Compare SigLIP 1 and SigLIP 2")
+    gr.Markdown(
+        "Compare the performance of SigLIP 1 and SigLIP 2 on zero-shot classification in this Space :point_down:"
+    )
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil")
+            text_input = gr.Textbox(label="Input a list of labels (comma seperated)")
+            run_button = gr.Button("Run", visible=True)
+        with gr.Column():
+            siglip1_output = gr.Label(label="SigLIP 1 Output", num_top_classes=3)
+            siglip2_output = gr.Label(label="SigLIP 2 Output", num_top_classes=3)
+    examples = [
+        ["./baklava.jpg", "dessert on a plate, a serving of baklava, a plate and spoon"],
+        ["./cat.jpg", "a cat, two cats, three cats"],
+        ["./cat.jpg", "two sleeping cats, two cats playing, three cats laying down"],
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=[image_input, text_input],
+        outputs=[siglip1_output, siglip2_output],
+        fn=infer,
+    )
+    run_button.click(fn=infer, inputs=[image_input, text_input], outputs=[siglip1_output, siglip2_output])
+demo.launch()

baklava.jpg ADDED Viewed

Git LFS Details

SHA256: c7b83d3f4d8e57b63c94783c3054d064073e9dbaae524d32764ea2f470b65582
Pointer size: 131 Bytes
Size of remote file: 148 kB

cat (1).jpg ADDED Viewed

Git LFS Details

SHA256: dea9e7ef97386345f7cff32f9055da4982da5471c48d575146c796ab4563b04e
Pointer size: 131 Bytes
Size of remote file: 173 kB

requirements (1).txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+torch
+git+https://github.com/huggingface/transformers@main
+sentencepiece
+pillow
+protobuf
+accelerate