aymnsk commited on
Commit
cb59386
·
verified ·
1 Parent(s): 548ae0d

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +2 -0
  2. README (2).md +13 -0
  3. app (2).py +75 -0
  4. baklava.jpg +3 -0
  5. cat (1).jpg +3 -0
  6. requirements (1).txt +7 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ baklava.jpg filter=lfs diff=lfs merge=lfs -text
37
+ cat[[:space:]](1).jpg filter=lfs diff=lfs merge=lfs -text
README (2).md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Compare Siglip1 Siglip2
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 5.16.1
8
+ app_file: app.py
9
+ pinned: false
10
+ short_description: Compare SigLIP1 and SigLIP2 on zero shot classification
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app (2).py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This space is taken and modified from https://huggingface.co/spaces/merve/compare_clip_siglip"""
2
+ import torch
3
+ from transformers import AutoModel, AutoProcessor
4
+ import gradio as gr
5
+
6
+ ################################################################################
7
+ # Load the models
8
+ ################################################################################
9
+ sg1_ckpt = "google/siglip-so400m-patch14-384"
10
+ siglip1_model = AutoModel.from_pretrained(sg1_ckpt, device_map="cpu").eval()
11
+ siglip1_processor = AutoProcessor.from_pretrained(sg1_ckpt)
12
+
13
+ sg2_ckpt = "google/siglip2-so400m-patch14-384"
14
+ siglip2_model = AutoModel.from_pretrained(sg2_ckpt, device_map="cpu").eval()
15
+ siglip2_processor = AutoProcessor.from_pretrained(sg2_ckpt)
16
+
17
+
18
+ ################################################################################
19
+ # Utilities
20
+ ################################################################################
21
+ def postprocess_siglip(sg1_probs, sg2_probs, labels):
22
+ sg1_output = {labels[i]: sg1_probs[0][i] for i in range(len(labels))}
23
+ sg2_output = {labels[i]: sg2_probs[0][i] for i in range(len(labels))}
24
+ return sg1_output, sg2_output
25
+
26
+
27
+ def siglip_detector(image, texts):
28
+ sg1_inputs = siglip1_processor(
29
+ text=texts, images=image, return_tensors="pt", padding="max_length", max_length=64
30
+ ).to("cpu")
31
+ sg2_inputs = siglip2_processor(
32
+ text=texts, images=image, return_tensors="pt", padding="max_length", max_length=64
33
+ ).to("cpu")
34
+ with torch.no_grad():
35
+ sg1_outputs = siglip1_model(**sg1_inputs)
36
+ sg2_outputs = siglip2_model(**sg2_inputs)
37
+ sg1_logits_per_image = sg1_outputs.logits_per_image
38
+ sg2_logits_per_image = sg2_outputs.logits_per_image
39
+ sg1_probs = torch.sigmoid(sg1_logits_per_image)
40
+ sg2_probs = torch.sigmoid(sg2_logits_per_image)
41
+ return sg1_probs, sg2_probs
42
+
43
+
44
+ def infer(image, candidate_labels):
45
+ candidate_labels = [label.lstrip(" ") for label in candidate_labels.split(",")]
46
+ sg1_probs, sg2_probs = siglip_detector(image, candidate_labels)
47
+ return postprocess_siglip(sg1_probs, sg2_probs, labels=candidate_labels)
48
+
49
+
50
+ with gr.Blocks() as demo:
51
+ gr.Markdown("# Compare SigLIP 1 and SigLIP 2")
52
+ gr.Markdown(
53
+ "Compare the performance of SigLIP 1 and SigLIP 2 on zero-shot classification in this Space :point_down:"
54
+ )
55
+ with gr.Row():
56
+ with gr.Column():
57
+ image_input = gr.Image(type="pil")
58
+ text_input = gr.Textbox(label="Input a list of labels (comma seperated)")
59
+ run_button = gr.Button("Run", visible=True)
60
+ with gr.Column():
61
+ siglip1_output = gr.Label(label="SigLIP 1 Output", num_top_classes=3)
62
+ siglip2_output = gr.Label(label="SigLIP 2 Output", num_top_classes=3)
63
+ examples = [
64
+ ["./baklava.jpg", "dessert on a plate, a serving of baklava, a plate and spoon"],
65
+ ["./cat.jpg", "a cat, two cats, three cats"],
66
+ ["./cat.jpg", "two sleeping cats, two cats playing, three cats laying down"],
67
+ ]
68
+ gr.Examples(
69
+ examples=examples,
70
+ inputs=[image_input, text_input],
71
+ outputs=[siglip1_output, siglip2_output],
72
+ fn=infer,
73
+ )
74
+ run_button.click(fn=infer, inputs=[image_input, text_input], outputs=[siglip1_output, siglip2_output])
75
+ demo.launch()
baklava.jpg ADDED

Git LFS Details

  • SHA256: c7b83d3f4d8e57b63c94783c3054d064073e9dbaae524d32764ea2f470b65582
  • Pointer size: 131 Bytes
  • Size of remote file: 148 kB
cat (1).jpg ADDED

Git LFS Details

  • SHA256: dea9e7ef97386345f7cff32f9055da4982da5471c48d575146c796ab4563b04e
  • Pointer size: 131 Bytes
  • Size of remote file: 173 kB
requirements (1).txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ git+https://github.com/huggingface/transformers@main
4
+ sentencepiece
5
+ pillow
6
+ protobuf
7
+ accelerate