Final_Assignment_Template

Sleeping

App Files Files Community

dlaima commited on May 30

Commit

02840f8

verified ·

1 Parent(s): 339a971

Update image_analyzer.py

Browse files

Files changed (1) hide show

image_analyzer.py +34 -17

image_analyzer.py CHANGED Viewed

@@ -1,36 +1,53 @@
-import os
 import openai
 from smolagents import Tool
-openai.api_key = os.getenv("OPENAI_API_KEY")
-class ImageAnalyzer(Tool):
-    name = "image_analyzer"
-    description = "Analyze the given image and describe or reason about its contents."
     inputs = {
         "image_path": {
             "type": "string",
-            "description": "Path to the image file (e.g., a chessboard image)."
         },
         "question": {
             "type": "string",
-            "description": "The question to answer about the image (e.g., best chess move)."
         }
     }
     output_type = "string"
     def forward(self, image_path: str, question: str) -> str:
-        with open(image_path, "rb") as image_file:
-            response = openai.chat.completions.create(
-                model="gpt-4-vision-preview",
                 messages=[
-                    {"role": "user", "content": [
-                        {"type": "text", "text": question},
-                        {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + image_file.read().encode("base64").decode()}}
-                    ]}
                 ],
-                max_tokens=500
             )
-            return response.choices[0].message.content.strip()

+import base64
 import openai
 from smolagents import Tool
+class ImageAnalysisTool(Tool):
+    name = "image_analysis"
+    description = "Analyze the content of an image and answer a specific question about it."
     inputs = {
         "image_path": {
             "type": "string",
+            "description": "Path to the image file (jpg, png, etc.)"
         },
         "question": {
             "type": "string",
+            "description": "A question about the image content"
         }
     }
     output_type = "string"
+    def __init__(self):
+        super().__init__()
     def forward(self, image_path: str, question: str) -> str:
+        base64_image = self.encode_image(image_path)
+        try:
+            response = openai.ChatCompletion.create(
+                model="gpt-4-turbo",
                 messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": question},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}"
+                                }
+                            }
+                        ]
+                    }
                 ],
+                max_tokens=300
             )
+            return response["choices"][0]["message"]["content"]
+        except Exception as e:
+            return f"Error analyzing image: {e}"
+    def encode_image(self, image_path):
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")