File size: 3,176 Bytes
422cae6
46f1f02
 
422cae6
46f1f02
 
 
 
 
7cb1958
46f1f02
 
 
 
 
 
 
b970dfa
278623e
b970dfa
d94240d
278623e
b970dfa
 
 
d94240d
 
278623e
d94240d
 
 
422cae6
278623e
d94240d
614613a
b29c175
 
259faf6
614613a
e4f79b8
278623e
 
 
b29c175
 
 
 
614613a
278623e
 
e4f79b8
 
278623e
e4f79b8
fe80ae7
b29c175
 
b970dfa
e4f79b8
259faf6
b29c175
259faf6
d94240d
278623e
 
 
422cae6
e4f79b8
278623e
 
b29c175
278623e
422cae6
278623e
 
 
422cae6
d94240d
 
278623e
 
 
422cae6
278623e
422cae6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image

# Load the BLIP image captioning model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Function to generate description for an image using BLIP
def describe_image(image: Image.Image):
    try:
        inputs = processor(images=image, return_tensors="pt")
        out = model.generate(**inputs)
        description = processor.decode(out[0], skip_special_tokens=True)
        return description
    except Exception as e:
        return f"Error describing the image: {e}"

# Chatbot logic
def chat(user_input, chat_history, image):
    try:
        response = f"AI Response: {user_input}"
        if image is not None:
            image_description = describe_image(image)
            response += f"\n\n[Image Description]: {image_description}"
        chat_history.append(("User", user_input))
        chat_history.append(("AI", response))
        formatted_history = "\n".join([f"{role}: {msg}" for role, msg in chat_history])
        return formatted_history, chat_history
    except Exception as e:
        return f"Error: {e}", chat_history

# Build Gradio interface
with gr.Blocks(css="""
    body {
        background-color: #f7f7f7;
        color: #333;
        font-family: 'Roboto', sans-serif;
    }
    .gradio-container {
        width: 100%;
        max-width: 700px;
        margin: auto;
        background-color: #ffffff;
        padding: 30px;
        border-radius: 15px;
        box-shadow: 0px 10px 30px rgba(0, 0, 0, 0.1);
    }
    .textbox, .button {
        margin-bottom: 15px;
    }
    #chatbox {
        height: 300px;
        overflow-y: auto;
        border: 1px solid #dcdcdc;
        padding: 20px;
        border-radius: 10px;
        background-color: #f9f9f9;
        margin-bottom: 20px;
        font-size: 14px;
        line-height: 1.6;
    }
""") as demo:
    
    gr.Markdown("## 🤖 **AI Chatbot with Image Captioning (BLIP + Gradio)**")
    gr.Markdown("Upload an image to generate a caption, then chat with the bot.")

    with gr.Column():
        user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", lines=2)
        submit_button = gr.Button("Send Message")
        clear_button = gr.Button("Clear Chat")
        chatbot_output = gr.Textbox(label="Chat History", lines=12, interactive=False, elem_id="chatbox")

        image_input = gr.Image(label="Upload Image", type="pil", elem_id="image-upload")
        upload_button = gr.Button("Describe Image")
        image_caption = gr.Textbox(label="Image Description", interactive=False)

    chat_history = gr.State([])

    submit_button.click(fn=chat, inputs=[user_input, chat_history, image_input], outputs=[chatbot_output, chat_history])
    clear_button.click(fn=lambda: ("", []), inputs=[], outputs=[chatbot_output, chat_history])
    upload_button.click(fn=describe_image, inputs=[image_input], outputs=[image_caption])

# Launch the app
if __name__ == "__main__":
    demo.launch()