Spaces:
Sleeping
Sleeping
File size: 2,745 Bytes
9571928 02840f8 5c5f32d 0b0ce33 02840f8 0a0ae08 0b0ce33 9571928 0b0ce33 9571928 0b0ce33 9571928 0b0ce33 9571928 0a0ae08 9571928 0a0ae08 9571928 02840f8 9571928 0a0ae08 9571928 0a0ae08 9571928 0b0ce33 9571928 0a0ae08 9571928 0a0ae08 9571928 0a0ae08 9571928 0a0ae08 9571928 02840f8 0b0ce33 5c5f32d 0a0ae08 0b0ce33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
import base64
import requests
from smolagents import Tool
class ImageAnalysisTool(Tool):
name = "image_analysis"
description = "Analyze the content of an image and answer a specific question about it using Hugging Face Inference API."
inputs = {
"image_path": {
"type": "string",
"description": "Path to the image file (jpg, png, etc.)"
},
"question": {
"type": "string",
"description": "A question about the image content"
}
}
output_type = "string"
def __init__(self):
super().__init__()
api_token = os.getenv("HF_API_TOKEN")
if not api_token:
raise EnvironmentError("HF_API_TOKEN not found in environment variables.")
self.api_url = "https://api-inference.huggingface.co/models/microsoft/git-base-captioning"
self.headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json"
}
def forward(self, image_path: str, question: str) -> str:
try:
with open(image_path, "rb") as img_file:
image_bytes = img_file.read()
# Encode image to base64 string
img_b64 = base64.b64encode(image_bytes).decode("utf-8")
# Prepare JSON payload - the exact structure depends on the model capabilities
# Here we send just the image for captioning
payload = {
"inputs": img_b64
}
response = requests.post(
self.api_url,
headers=self.headers,
json=payload,
timeout=60
)
if response.status_code == 200:
result = response.json()
caption = None
# Try common keys for caption output
if isinstance(result, dict):
caption = result.get("generated_text") or result.get("caption") or result.get("text")
elif isinstance(result, list) and len(result) > 0 and isinstance(result[0], dict):
caption = result[0].get("generated_text") or result[0].get("caption") or result[0].get("text")
if not caption:
return "Error: No caption found in model response."
# Combine caption with the question to form a simple answer
answer = f"Caption: {caption}\nAnswer to question '{question}': {caption}"
return answer.strip()
else:
return f"Error analyzing image: {response.status_code} {response.text}"
except Exception as e:
return f"Error analyzing image: {e}"
|