File size: 991 Bytes
5292153
 
a37e4d7
5a232f1
5292153
5a232f1
 
5292153
a37e4d7
 
5292153
a37e4d7
 
5292153
a37e4d7
 
5292153
a37e4d7
5292153
a37e4d7
 
 
 
 
 
 
 
2b4a20c
 
a37e4d7
5292153
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
from PIL import Image
import torch
from transformers import BlipProcessor, BlipForQuestionAnswering

processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")

device = torch.device("cpu")
model.to(device)

def answer_question(image: Image.Image, question: str) -> str:
    inputs = processor(image.convert("RGB"), question, return_tensors="pt").to(device)
    
    with torch.no_grad():
        output = model.generate(**inputs)
    
    return processor.decode(output[0], skip_special_tokens=True).strip()

# Gradio interface
demo = gr.Interface(
    fn=answer_question,
    inputs=[
        gr.Image(type="pil", label="Upload an Image"),
        gr.Textbox(label="Ask a Question About the Image")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="Visual Question Answering",
    description="Ask a question about an image"
)

if __name__ == "__main__":
    demo.launch()