Spaces:
Sleeping
Sleeping
import os | |
import base64 | |
import requests | |
from smolagents import Tool | |
class ImageAnalysisTool(Tool): | |
name = "image_analysis" | |
description = "Analyze the content of an image and answer a specific question about it using Hugging Face Inference API." | |
inputs = { | |
"image_path": { | |
"type": "string", | |
"description": "Path to the image file (jpg, png, etc.)" | |
}, | |
"question": { | |
"type": "string", | |
"description": "A question about the image content" | |
} | |
} | |
output_type = "string" | |
def __init__(self): | |
super().__init__() | |
api_token = os.getenv("HF_API_TOKEN") | |
if not api_token: | |
raise EnvironmentError("HF_API_TOKEN not found in environment variables.") | |
self.api_url = "https://api-inference.huggingface.co/models/microsoft/git-base-captioning" | |
self.headers = { | |
"Authorization": f"Bearer {api_token}", | |
"Content-Type": "application/json" | |
} | |
def forward(self, image_path: str, question: str) -> str: | |
try: | |
with open(image_path, "rb") as img_file: | |
image_bytes = img_file.read() | |
# Encode image to base64 string | |
img_b64 = base64.b64encode(image_bytes).decode("utf-8") | |
# Prepare JSON payload - the exact structure depends on the model capabilities | |
# Here we send just the image for captioning | |
payload = { | |
"inputs": img_b64 | |
} | |
response = requests.post( | |
self.api_url, | |
headers=self.headers, | |
json=payload, | |
timeout=60 | |
) | |
if response.status_code == 200: | |
result = response.json() | |
caption = None | |
# Try common keys for caption output | |
if isinstance(result, dict): | |
caption = result.get("generated_text") or result.get("caption") or result.get("text") | |
elif isinstance(result, list) and len(result) > 0 and isinstance(result[0], dict): | |
caption = result[0].get("generated_text") or result[0].get("caption") or result[0].get("text") | |
if not caption: | |
return "Error: No caption found in model response." | |
# Combine caption with the question to form a simple answer | |
answer = f"Caption: {caption}\nAnswer to question '{question}': {caption}" | |
return answer.strip() | |
else: | |
return f"Error analyzing image: {response.status_code} {response.text}" | |
except Exception as e: | |
return f"Error analyzing image: {e}" | |