import gradio as gr
from transformers import pipeline

vit_classifier = pipeline("image-classification", model="MichaelMM2000/vit-base-animals10")

clip_detector = pipeline(model="openai/clip-vit-base-patch32", task="zero-shot-image-classification")

# Animals-10 class names
labels_animals10 = [
    "butterfly", "cat", "chicken", "cow", "dog",
    "elephant", "horse", "sheep", "spider", "squirrel"
]

def classify_animal(image):
    vit_results = vit_classifier(image)
    vit_output = {str(result['label']): float(result['score']) for result in vit_results}

    clip_results = clip_detector(image, candidate_labels=[f"a photo of a {label}" for label in labels_animals10])
    clip_output = {str(result['label']): float(result['score']) for result in clip_results}

    return {
        "ViT Classification": vit_output,
        "CLIP Zero-Shot Classification": clip_output
    }

# Optional: you can add example images
example_images = [
    "example_images/cat.jpeg",
    "example_images/chicken1.jpeg",
    "example_images/chicken2.jpeg",
    "example_images/elefant.jpg",
    "example_images/butterfly.jpg"
]


iface = gr.Interface(
    fn=classify_animal,
    inputs=gr.Image(type="filepath"),
    outputs=gr.JSON(),
    title="Animals-10 Classification: ViT vs CLIP",
    description="Upload an animal image to compare predictions from your trained ViT model and a zero-shot CLIP model.",
    examples=example_images
)

iface.launch(ssr_mode=False)