Spaces:

FengHou97
/

Cross-Domain-Recognition

Sleeping

App Files Files Community

FengHou97 commited on May 7

Commit

4ebfb1d

verified ·

1 Parent(s): ae1712c

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -4

app.py CHANGED Viewed

@@ -2,12 +2,12 @@ from turtle import title
 import gradio as gr
 from transformers import pipeline
 import numpy as np
-from PIL import Image
 pipes = {
     "ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
-    "ViT/L-14": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
 }
 inputs = [
     gr.Image(type='pil',
@@ -22,11 +22,23 @@ inputs = [
                       label="Prompt Template Prompt",
                       placeholder="Optional prompt template as prefix",
                       value="a photo of a {}"),
 ]
 images="festival.jpg"
-def shot(image, labels_text, model_name, hypothesis_template):
     labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
     res = pipes[model_name](images=image,
            candidate_labels=labels,
            hypothesis_template=hypothesis_template)
@@ -35,7 +47,7 @@ def shot(image, labels_text, model_name, hypothesis_template):
 iface = gr.Interface(shot,
             inputs,
             "label",
-            examples=[["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}"]],
             description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
             Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
             To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",

 import gradio as gr
 from transformers import pipeline
 import numpy as np
+from PIL import Image
 pipes = {
     "ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
+    "ViT/L-14": pipeline("zero-shot-image-classification", model="openai/clip-vit-large-patch14"),
 }
 inputs = [
     gr.Image(type='pil',
                       label="Prompt Template Prompt",
                       placeholder="Optional prompt template as prefix",
                       value="a photo of a {}"),
+    gr.Textbox(lines=1,
+                      label="Prompt Template Prompt",
+                      placeholder="Optional prompt template as suffix",
+                      value="in {} {} {} from {} with {}."),
+    gr.Textbox(lines=1,
+                      label="Prior Domains", placeholder="Add a domain label, one by one"),
 ]
 images="festival.jpg"
+def shot(image, labels_text, model_name, hypothesis_template_prefix, hypothesis_template_suffix, domains_text):
     labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
+    domains = [domain.strip(" ") for domain in domains_text.strip(" ").split(",")]
+    hypothesis_template_suffix.format(for domain in domains)
+    hypothesis_template = hypothesis_template_prefix + ' ' + hypothesis_template_suffix
     res = pipes[model_name](images=image,
            candidate_labels=labels,
            hypothesis_template=hypothesis_template)
 iface = gr.Interface(shot,
             inputs,
             "label",
+            examples=[["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", "clear, autumn, day, side, light occlusion"]],
             description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
             Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
             To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",