import base64 import requests import openai from smolagents import Tool class ImageAnalysisTool(Tool): name = "image_analysis" description = "Analyze the content of an image and answer a specific question about it." inputs = { "url": { "type": "string", "description": "URL to the image" }, "question": { "type": "string", "description": "Question about the image content" } } output_type = "string" def forward(self, url: str, question: str) -> str: try: # Download image image_path = "/tmp/image_input.jpg" r = requests.get(url) with open(image_path, "wb") as f: f.write(r.content) # Encode & analyze base64_image = self.encode_image(image_path) response = openai.ChatCompletion.create( model="gpt-4-turbo", messages=[ { "role": "user", "content": [ {"type": "text", "text": question}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], max_tokens=300 ) return response["choices"][0]["message"]["content"].strip() except Exception as e: return f"Error analyzing image: {e}" def encode_image(self, image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8")