Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| import requests | |
| from smolagents import Tool | |
| class ImageAnalysisTool(Tool): | |
| name = "image_analysis" | |
| description = "Analyze the content of an image and answer a specific question about it using Hugging Face Inference API." | |
| inputs = { | |
| "image_path": { | |
| "type": "string", | |
| "description": "Path to the image file (jpg, png, etc.)" | |
| }, | |
| "question": { | |
| "type": "string", | |
| "description": "A question about the image content" | |
| } | |
| } | |
| output_type = "string" | |
| def __init__(self): | |
| super().__init__() | |
| api_token = os.getenv("HF_API_TOKEN") | |
| if not api_token: | |
| raise EnvironmentError("HF_API_TOKEN not found in environment variables.") | |
| self.api_url = "https://api-inference.huggingface.co/models/microsoft/git-base-captioning" | |
| self.headers = { | |
| "Authorization": f"Bearer {api_token}", | |
| "Content-Type": "application/json" | |
| } | |
| def forward(self, image_path: str, question: str) -> str: | |
| try: | |
| with open(image_path, "rb") as img_file: | |
| image_bytes = img_file.read() | |
| # Encode image to base64 string | |
| img_b64 = base64.b64encode(image_bytes).decode("utf-8") | |
| # Prepare JSON payload - the exact structure depends on the model capabilities | |
| # Here we send just the image for captioning | |
| payload = { | |
| "inputs": img_b64 | |
| } | |
| response = requests.post( | |
| self.api_url, | |
| headers=self.headers, | |
| json=payload, | |
| timeout=60 | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| caption = None | |
| # Try common keys for caption output | |
| if isinstance(result, dict): | |
| caption = result.get("generated_text") or result.get("caption") or result.get("text") | |
| elif isinstance(result, list) and len(result) > 0 and isinstance(result[0], dict): | |
| caption = result[0].get("generated_text") or result[0].get("caption") or result[0].get("text") | |
| if not caption: | |
| return "Error: No caption found in model response." | |
| # Combine caption with the question to form a simple answer | |
| answer = f"Caption: {caption}\nAnswer to question '{question}': {caption}" | |
| return answer.strip() | |
| else: | |
| return f"Error analyzing image: {response.status_code} {response.text}" | |
| except Exception as e: | |
| return f"Error analyzing image: {e}" | |