File size: 3,547 Bytes
b488bd8
900d27d
fc655be
98d3845
900d27d
 
 
 
68d7ce5
 
900d27d
68d7ce5
36c56fe
 
 
818e378
 
 
 
68d7ce5
36c56fe
 
68d7ce5
 
900d27d
68d7ce5
 
 
b488bd8
 
 
 
 
 
 
68d7ce5
36c56fe
 
 
 
b488bd8
 
 
68d7ce5
b488bd8
68d7ce5
36c56fe
 
b488bd8
68d7ce5
36c56fe
68d7ce5
b488bd8
68d7ce5
36c56fe
818e378
68d7ce5
 
a587508
36c56fe
 
 
68d7ce5
 
36c56fe
68d7ce5
 
 
 
 
 
 
36c56fe
 
 
 
68d7ce5
 
 
36c56fe
68d7ce5
 
 
 
 
 
36c56fe
 
 
 
 
 
 
68d7ce5
 
 
 
36c56fe
 
 
 
 
 
b488bd8
 
68d7ce5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import requests
from transformers import BlipForQuestionAnswering, AutoProcessor
from PIL import Image
import gradio as gr

model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base")

groq_api_key = "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"
groq_api_url = "https://api.groq.com/openai/v1/chat/completions"

def qna(image, question, history):
    if image is None:
        return history + [(question, "Please upload an image first.")], history + [(question, "Please upload an image first.")]
    
    try:
        inputs = processor(image, question, return_tensors="pt")
        out = model.generate(**inputs)
        short_answer = processor.decode(out[0], skip_special_tokens=True)
        
        context = "\n".join([f"Q: {q}\nA: {a}" for q, a in history]) if history else "No previous context."
        
        full_prompt = f"""Context of previous conversation:
{context}

Current Image Description: {short_answer}
Question: {question}
Please provide a detailed answer based on the image and previous context."""
        
        headers = {
            "Authorization": f"Bearer {groq_api_key}",
            "Content-Type": "application/json"
        }
        
        data = {
            "model": "llama3-8b-8192",
            "messages": [
                {"role": "system", "content": "You are a helpful assistant that answers questions about images based on the provided context and BLIP model's initial analysis."},
                {"role": "user", "content": full_prompt}
            ]
        }
        
        response = requests.post(groq_api_url, headers=headers, json=data)
        
        if response.status_code == 200:
            detailed_answer = response.json()['choices'][0]['message']['content'].strip()
            new_history = history + [(question, detailed_answer)]
            return new_history, new_history
        else:
            error_msg = f"Error {response.status_code}: {response.text}"
            return history + [(question, error_msg)], history + [(question, error_msg)]
            
    except Exception as e:
        error_msg = f"An error occurred: {str(e)}"
        return history + [(question, error_msg)], history + [(question, error_msg)]

def clear_history():
    return [], []

def init_history():
    return [], []

with gr.Blocks() as demo:
    gr.Markdown("# Interactive Image Chatbot")
    gr.Markdown("Upload an image and ask questions about it. The chatbot will maintain context of the conversation.")
    
    with gr.Row():
        image_input = gr.Image(type="pil")
        
    with gr.Row():
        with gr.Column():
            chatbot = gr.Chatbot()
            question = gr.Textbox(label="Ask a question about the image", placeholder="Type your question here...")
            with gr.Row():
                clear = gr.Button("Clear Conversation")
                new_image = gr.Button("New Image")
    
    state = gr.State([])
    
    # Handle question submission
    question.submit(
        qna,
        inputs=[image_input, question, state],
        outputs=[chatbot, state]
    )
    
    # Handle image upload
    image_input.change(
        init_history,
        outputs=[chatbot, state]
    )
    
    # Clear conversation
    clear.click(
        clear_history,
        outputs=[chatbot, state]
    )
    
    # New image button
    new_image.click(
        clear_history,
        outputs=[chatbot, state]
    )

if __name__ == "__main__":
    demo.launch()