dlaima commited on
Commit
02840f8
·
verified ·
1 Parent(s): 339a971

Update image_analyzer.py

Browse files
Files changed (1) hide show
  1. image_analyzer.py +34 -17
image_analyzer.py CHANGED
@@ -1,36 +1,53 @@
1
- import os
2
  import openai
3
  from smolagents import Tool
4
 
5
- openai.api_key = os.getenv("OPENAI_API_KEY")
6
-
7
- class ImageAnalyzer(Tool):
8
- name = "image_analyzer"
9
- description = "Analyze the given image and describe or reason about its contents."
10
  inputs = {
11
  "image_path": {
12
  "type": "string",
13
- "description": "Path to the image file (e.g., a chessboard image)."
14
  },
15
  "question": {
16
  "type": "string",
17
- "description": "The question to answer about the image (e.g., best chess move)."
18
  }
19
  }
20
  output_type = "string"
21
 
 
 
 
22
  def forward(self, image_path: str, question: str) -> str:
23
- with open(image_path, "rb") as image_file:
24
- response = openai.chat.completions.create(
25
- model="gpt-4-vision-preview",
 
26
  messages=[
27
- {"role": "user", "content": [
28
- {"type": "text", "text": question},
29
- {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + image_file.read().encode("base64").decode()}}
30
- ]}
 
 
 
 
 
 
 
 
31
  ],
32
- max_tokens=500
33
  )
34
- return response.choices[0].message.content.strip()
 
 
 
 
 
 
 
35
 
36
 
 
1
+ import base64
2
  import openai
3
  from smolagents import Tool
4
 
5
+ class ImageAnalysisTool(Tool):
6
+ name = "image_analysis"
7
+ description = "Analyze the content of an image and answer a specific question about it."
 
 
8
  inputs = {
9
  "image_path": {
10
  "type": "string",
11
+ "description": "Path to the image file (jpg, png, etc.)"
12
  },
13
  "question": {
14
  "type": "string",
15
+ "description": "A question about the image content"
16
  }
17
  }
18
  output_type = "string"
19
 
20
+ def __init__(self):
21
+ super().__init__()
22
+
23
  def forward(self, image_path: str, question: str) -> str:
24
+ base64_image = self.encode_image(image_path)
25
+ try:
26
+ response = openai.ChatCompletion.create(
27
+ model="gpt-4-turbo",
28
  messages=[
29
+ {
30
+ "role": "user",
31
+ "content": [
32
+ {"type": "text", "text": question},
33
+ {
34
+ "type": "image_url",
35
+ "image_url": {
36
+ "url": f"data:image/jpeg;base64,{base64_image}"
37
+ }
38
+ }
39
+ ]
40
+ }
41
  ],
42
+ max_tokens=300
43
  )
44
+ return response["choices"][0]["message"]["content"]
45
+ except Exception as e:
46
+ return f"Error analyzing image: {e}"
47
+
48
+ def encode_image(self, image_path):
49
+ with open(image_path, "rb") as image_file:
50
+ return base64.b64encode(image_file.read()).decode("utf-8")
51
+
52
 
53