import requests from transformers import BlipForQuestionAnswering, AutoProcessor from PIL import Image import gradio as gr model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base") groq_api_key = "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" groq_api_url = "https://api.groq.com/openai/v1/chat/completions" def qna(image, question, history): if image is None: return history + [(question, "Please upload an image first.")], history + [(question, "Please upload an image first.")] try: inputs = processor(image, question, return_tensors="pt") out = model.generate(**inputs) short_answer = processor.decode(out[0], skip_special_tokens=True) context = "\n".join([f"Q: {q}\nA: {a}" for q, a in history]) if history else "No previous context." full_prompt = f"""Context of previous conversation: {context} Current Image Description: {short_answer} Question: {question} Please provide a detailed answer based on the image and previous context.""" headers = { "Authorization": f"Bearer {groq_api_key}", "Content-Type": "application/json" } data = { "model": "llama3-8b-8192", "messages": [ {"role": "system", "content": "You are a helpful assistant that answers questions about images based on the provided context and BLIP model's initial analysis."}, {"role": "user", "content": full_prompt} ] } response = requests.post(groq_api_url, headers=headers, json=data) if response.status_code == 200: detailed_answer = response.json()['choices'][0]['message']['content'].strip() new_history = history + [(question, detailed_answer)] return new_history, new_history else: error_msg = f"Error {response.status_code}: {response.text}" return history + [(question, error_msg)], history + [(question, error_msg)] except Exception as e: error_msg = f"An error occurred: {str(e)}" return history + [(question, error_msg)], history + [(question, error_msg)] def clear_history(): return [], [] def init_history(): return [], [] with gr.Blocks() as demo: gr.Markdown("# Interactive Image Chatbot") gr.Markdown("Upload an image and ask questions about it. The chatbot will maintain context of the conversation.") with gr.Row(): image_input = gr.Image(type="pil") with gr.Row(): with gr.Column(): chatbot = gr.Chatbot() question = gr.Textbox(label="Ask a question about the image", placeholder="Type your question here...") with gr.Row(): clear = gr.Button("Clear Conversation") new_image = gr.Button("New Image") state = gr.State([]) # Handle question submission question.submit( qna, inputs=[image_input, question, state], outputs=[chatbot, state] ) # Handle image upload image_input.change( init_history, outputs=[chatbot, state] ) # Clear conversation clear.click( clear_history, outputs=[chatbot, state] ) # New image button new_image.click( clear_history, outputs=[chatbot, state] ) if __name__ == "__main__": demo.launch()