FengHou97 commited on
Commit
4ebfb1d
·
verified ·
1 Parent(s): ae1712c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -2,12 +2,12 @@ from turtle import title
2
  import gradio as gr
3
  from transformers import pipeline
4
  import numpy as np
5
- from PIL import Image
6
 
7
 
8
  pipes = {
9
  "ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
10
- "ViT/L-14": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
11
  }
12
  inputs = [
13
  gr.Image(type='pil',
@@ -22,11 +22,23 @@ inputs = [
22
  label="Prompt Template Prompt",
23
  placeholder="Optional prompt template as prefix",
24
  value="a photo of a {}"),
 
 
 
 
 
 
 
 
25
  ]
26
  images="festival.jpg"
27
 
28
- def shot(image, labels_text, model_name, hypothesis_template):
29
  labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
 
 
 
 
30
  res = pipes[model_name](images=image,
31
  candidate_labels=labels,
32
  hypothesis_template=hypothesis_template)
@@ -35,7 +47,7 @@ def shot(image, labels_text, model_name, hypothesis_template):
35
  iface = gr.Interface(shot,
36
  inputs,
37
  "label",
38
- examples=[["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}"]],
39
  description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
40
  Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
41
  To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import numpy as np
5
+ from PIL import Image
6
 
7
 
8
  pipes = {
9
  "ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
10
+ "ViT/L-14": pipeline("zero-shot-image-classification", model="openai/clip-vit-large-patch14"),
11
  }
12
  inputs = [
13
  gr.Image(type='pil',
 
22
  label="Prompt Template Prompt",
23
  placeholder="Optional prompt template as prefix",
24
  value="a photo of a {}"),
25
+
26
+ gr.Textbox(lines=1,
27
+ label="Prompt Template Prompt",
28
+ placeholder="Optional prompt template as suffix",
29
+ value="in {} {} {} from {} with {}."),
30
+
31
+ gr.Textbox(lines=1,
32
+ label="Prior Domains", placeholder="Add a domain label, one by one"),
33
  ]
34
  images="festival.jpg"
35
 
36
+ def shot(image, labels_text, model_name, hypothesis_template_prefix, hypothesis_template_suffix, domains_text):
37
  labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
38
+ domains = [domain.strip(" ") for domain in domains_text.strip(" ").split(",")]
39
+ hypothesis_template_suffix.format(for domain in domains)
40
+ hypothesis_template = hypothesis_template_prefix + ' ' + hypothesis_template_suffix
41
+
42
  res = pipes[model_name](images=image,
43
  candidate_labels=labels,
44
  hypothesis_template=hypothesis_template)
 
47
  iface = gr.Interface(shot,
48
  inputs,
49
  "label",
50
+ examples=[["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", "clear, autumn, day, side, light occlusion"]],
51
  description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
52
  Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
53
  To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",