Spaces:
Sleeping
Sleeping
import base64 | |
import openai | |
from smolagents import Tool | |
class ImageAnalysisTool(Tool): | |
name = "image_analysis" | |
description = "Analyze the content of an image and answer a specific question about it." | |
inputs = { | |
"image_path": { | |
"type": "string", | |
"description": "Path to the image file (jpg, png, etc.)" | |
}, | |
"question": { | |
"type": "string", | |
"description": "A question about the image content" | |
} | |
} | |
output_type = "string" | |
def __init__(self): | |
super().__init__() | |
def forward(self, image_path: str, question: str) -> str: | |
base64_image = self.encode_image(image_path) | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-4-turbo", | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": question}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}" | |
} | |
} | |
] | |
} | |
], | |
max_tokens=300 | |
) | |
return response["choices"][0]["message"]["content"] | |
except Exception as e: | |
return f"Error analyzing image: {e}" | |
def encode_image(self, image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode("utf-8") | |