import gradio as gr from PIL import Image import torch from transformers import BlipForConditionalGeneration, AutoProcessor # Load processor and model from Hugging Face Hub processor = AutoProcessor.from_pretrained("daliavanilla/BLIP-Radiology-model") model = BlipForConditionalGeneration.from_pretrained("daliavanilla/BLIP-Radiology-model") # Use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Define the prediction function def generate_caption(image): # Process the image image = Image.fromarray(image) inputs = processor(images=image, return_tensors="pt").to(device) # Generate caption generated_ids = model.generate(pixel_values=inputs.pixel_values, max_length=50) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_caption # Define the Gradio interface interface = gr.Interface( fn=generate_caption, inputs=gr.Image(type="numpy"), # Ensure the image type is correctly handled by PIL outputs=gr.Textbox(), live=True ) # Launch the Gradio interface interface.launch()