FengHou97 commited on
Commit
ae1712c
·
verified ·
1 Parent(s): 88dc9b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -12,6 +12,8 @@ pipes = {
12
  inputs = [
13
  gr.Image(type='pil',
14
  label="Image"),
 
 
15
  gr.Radio(choices=[
16
  "ViT/B-16",
17
  "ViT/L-14",
@@ -20,8 +22,6 @@ inputs = [
20
  label="Prompt Template Prompt",
21
  placeholder="Optional prompt template as prefix",
22
  value="a photo of a {}"),
23
- gr.Textbox(lines=1,
24
- label="Candidate Labels", placeholder="Add a class label, one by one",),
25
  ]
26
  images="festival.jpg"
27
 
@@ -35,7 +35,7 @@ def shot(image, labels_text, model_name, hypothesis_template):
35
  iface = gr.Interface(shot,
36
  inputs,
37
  "label",
38
- examples=[["festival.jpg", "ViT/B-16", "a photo of a {}", "lantern, firecracker, couplet"]],
39
  description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
40
  Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
41
  To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",
 
12
  inputs = [
13
  gr.Image(type='pil',
14
  label="Image"),
15
+ gr.Textbox(lines=1,
16
+ label="Candidate Labels", placeholder="Add a class label, one by one"),
17
  gr.Radio(choices=[
18
  "ViT/B-16",
19
  "ViT/L-14",
 
22
  label="Prompt Template Prompt",
23
  placeholder="Optional prompt template as prefix",
24
  value="a photo of a {}"),
 
 
25
  ]
26
  images="festival.jpg"
27
 
 
35
  iface = gr.Interface(shot,
36
  inputs,
37
  "label",
38
+ examples=[["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}"]],
39
  description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
40
  Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
41
  To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",