Final_Assignment_Template

Sleeping

App Files Files Community

dlaima commited on Jun 2

Commit

9571928

verified ·

1 Parent(s): ba3b3ff

Update image_analyzer.py

Browse files

Files changed (1) hide show

image_analyzer.py +48 -36

image_analyzer.py CHANGED Viewed

@@ -1,59 +1,71 @@
 import base64
 import requests
-import openai
 from smolagents import Tool
 class ImageAnalysisTool(Tool):
     name = "image_analysis"
-    description = "Analyze the content of an image and answer a specific question about it."
     inputs = {
-        "url": {
             "type": "string",
-            "description": "URL to the image"
         },
         "question": {
             "type": "string",
-            "description": "Question about the image content"
         }
     }
     output_type = "string"
-    def forward(self, url: str, question: str) -> str:
         try:
-            # Download image
-            image_path = "/tmp/image_input.jpg"
-            r = requests.get(url)
-            with open(image_path, "wb") as f:
-                f.write(r.content)
-            # Encode & analyze
-            base64_image = self.encode_image(image_path)
-            response = openai.ChatCompletion.create(
-                model="gpt-4-turbo",
-                messages=[
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": question},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{base64_image}"
-                                }
-                            }
-                        ]
-                    }
-                ],
-                max_tokens=300
             )
-            return response["choices"][0]["message"]["content"].strip()
         except Exception as e:
             return f"Error analyzing image: {e}"
-    def encode_image(self, image_path):
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode("utf-8")

+import os
 import base64
 import requests
 from smolagents import Tool
 class ImageAnalysisTool(Tool):
     name = "image_analysis"
+    description = "Analyze the content of an image and answer a specific question about it using HF Inference API."
     inputs = {
+        "image_path": {
             "type": "string",
+            "description": "Path to the image file (jpg, png, etc.)"
         },
         "question": {
             "type": "string",
+            "description": "A question about the image content"
         }
     }
     output_type = "string"
+    def __init__(self):
+        super().__init__()
+        # You can replace this with any vision model capable of VQA or image captioning
+        self.api_url = "https://api-inference.huggingface.co/models/microsoft/git-base-captioning"
+        self.headers = {
+            "Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}"
+        }
+    def forward(self, image_path: str, question: str) -> str:
         try:
+            with open(image_path, "rb") as img_file:
+                image_bytes = img_file.read()
+            # Prepare the payload depending on the model API.
+            # Some models accept just the image bytes and return captions,
+            # some support multimodal input with text question + image.
+            # For this example, we'll assume a captioning model and append question manually.
+            response = requests.post(
+                self.api_url,
+                headers=self.headers,
+                data=image_bytes,
+                timeout=60
             )
+            if response.status_code == 200:
+                result = response.json()
+                caption = None
+                # The format depends on the model; check keys like 'generated_text' or 'caption'
+                if isinstance(result, dict):
+                    caption = result.get("generated_text") or result.get("caption")
+                elif isinstance(result, list) and len(result) > 0:
+                    caption = result[0].get("generated_text") if "generated_text" in result[0] else None
+                if not caption:
+                    return "Error: No caption found in model response."
+                # Simple approach: combine caption + question to produce answer prompt
+                # If you want a deeper answer, you could chain a chat model here.
+                answer = f"Caption: {caption}\nAnswer to question '{question}': {caption}"
+                return answer.strip()
+            else:
+                return f"Error analyzing image: {response.status_code} {response.text}"
         except Exception as e:
             return f"Error analyzing image: {e}"