# import gradio as gr # # from PIL import Image # from transformers.utils import logging # from transformers import BlipForConditionalGeneration, AutoProcessor # logging.set_verbosity_error() # model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") # def caption_image(image): # inputs = processor(image, return_tensors="pt") # out = model.generate(**inputs) # caption = processor.decode(out[0], skip_special_tokens=True) # return caption # gr.Interface(caption_image, gr.inputs.Image(), "text").launch() # # gr.Interface(caption_image, image_input, caption_output).launch() import streamlit as st # from PIL import Image from transformers.utils import logging from transformers import BlipForConditionalGeneration, AutoProcessor import torch logging.set_verbosity_error() model = BlipForConditionalGeneration.from_pretrained("./models/Salesforce/blip-image-captioning-base") processor = AutoProcessor.from_pretrained("./models/Salesforce/blip-image-captioning-base") st.title("Image Captioning") uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_column_width=True) st.write("") st.write("Generating caption...") inputs = processor(image, return_tensors="pt") out = model.generate(**inputs) caption = processor.decode(out[0], skip_special_tokens=True) st.write("Caption:", caption)