from transformers import ViTImageProcessor, ViTForImageClassification from PIL import Image import gradio as gr # Load the model and processor processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224') model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224') def predict(image): inputs = processor(images=image, return_tensors="pt") outputs = model(**inputs) logits = outputs.logits predicted_class_idx = logits.argmax(-1).item() return model.config.id2label[predicted_class_idx] def classify_image(image): image = Image.fromarray(image.astype('uint8'), 'RGB') label = predict(image) return label iface = gr.Interface( fn=classify_image, inputs=gr.Image(type="numpy", label="Upload an Image"), outputs=gr.Textbox(label="Predicted Class"), title="Image Classification with ViT", description="Upload an image to classify it using the Vision Transformer (ViT) model." ) if __name__ == "__main__": iface.launch()