Spaces:

FengHou97
/

Cross-Domain-Recognition

Sleeping

File size: 2,227 Bytes

bc7c311
 
 
 
 
 
 
 
 
 
 
 
6826959
bc7c311
6826959
bc7c311
 
ca567df
6826959
bc7c311
b966b7e
ac729e4
f2ed16f
 
b0f3258
 
bc7c311
 
 
 
 
 
 
 
 
 
 
 
 
f2ed16f
bc7c311
a80cfc9
 
 
bc7c311

from turtle import title
import gradio as gr
from transformers import pipeline
import numpy as np
from PIL import Image


pipes = {
    "ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
    "ViT/L-14": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
}
inputs = [
    gr.Image(type='pil', 
                    label="Image"),
    gr.Radio(choices=[
                                "ViT/B-16",
                                "ViT/L-14", 
                            ], type="value", label="Model"), 
    gr.Textbox(lines=1, 
                      label="Prompt Template Prompt", 
                      placeholder="Optional prompt template as prefix",
                      value=""),
    # gr.Textbox(lines=1, 
    #                   label="Prior Domains", placeholder="Add a domain label, one by one",),
    gr.Textbox(lines=1, 
                      label="Candidate Labels", placeholder="Add a class label, one by one",),
]
images="festival.jpg"

def shot(image, labels_text, model_name, hypothesis_template):
    labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
    res = pipes[model_name](images=image, 
           candidate_labels=labels,
           hypothesis_template=hypothesis_template)
    return {dic["label"]: dic["score"] for dic in res}

iface = gr.Interface(shot, 
            inputs, 
            "label", 
            examples=[["festival.jpg", "ViT/B-16", "a photo of a {}", "lantern, firecracker, couplet"]], 
            description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
            Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
            To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",
            title="Cross-Domain Recognition")

iface.launch()