dlaima commited on
Commit
5c5f32d
·
verified ·
1 Parent(s): 02a5e73

Update image_analyzer.py

Browse files
Files changed (1) hide show
  1. image_analyzer.py +15 -9
image_analyzer.py CHANGED
@@ -1,4 +1,5 @@
1
  import base64
 
2
  import openai
3
  from smolagents import Tool
4
 
@@ -6,23 +7,27 @@ class ImageAnalysisTool(Tool):
6
  name = "image_analysis"
7
  description = "Analyze the content of an image and answer a specific question about it."
8
  inputs = {
9
- "image_path": {
10
  "type": "string",
11
- "description": "Path to the image file (jpg, png, etc.)"
12
  },
13
  "question": {
14
  "type": "string",
15
- "description": "A question about the image content"
16
  }
17
  }
18
  output_type = "string"
19
 
20
- def __init__(self):
21
- super().__init__()
22
-
23
- def forward(self, image_path: str, question: str) -> str:
24
- base64_image = self.encode_image(image_path)
25
  try:
 
 
 
 
 
 
 
 
26
  response = openai.ChatCompletion.create(
27
  model="gpt-4-turbo",
28
  messages=[
@@ -41,7 +46,7 @@ class ImageAnalysisTool(Tool):
41
  ],
42
  max_tokens=300
43
  )
44
- return response["choices"][0]["message"]["content"]
45
  except Exception as e:
46
  return f"Error analyzing image: {e}"
47
 
@@ -50,4 +55,5 @@ class ImageAnalysisTool(Tool):
50
  return base64.b64encode(image_file.read()).decode("utf-8")
51
 
52
 
 
53
 
 
1
  import base64
2
+ import requests
3
  import openai
4
  from smolagents import Tool
5
 
 
7
  name = "image_analysis"
8
  description = "Analyze the content of an image and answer a specific question about it."
9
  inputs = {
10
+ "url": {
11
  "type": "string",
12
+ "description": "URL to the image"
13
  },
14
  "question": {
15
  "type": "string",
16
+ "description": "Question about the image content"
17
  }
18
  }
19
  output_type = "string"
20
 
21
+ def forward(self, url: str, question: str) -> str:
 
 
 
 
22
  try:
23
+ # Download image
24
+ image_path = "/tmp/image_input.jpg"
25
+ r = requests.get(url)
26
+ with open(image_path, "wb") as f:
27
+ f.write(r.content)
28
+
29
+ # Encode & analyze
30
+ base64_image = self.encode_image(image_path)
31
  response = openai.ChatCompletion.create(
32
  model="gpt-4-turbo",
33
  messages=[
 
46
  ],
47
  max_tokens=300
48
  )
49
+ return response["choices"][0]["message"]["content"].strip()
50
  except Exception as e:
51
  return f"Error analyzing image: {e}"
52
 
 
55
  return base64.b64encode(image_file.read()).decode("utf-8")
56
 
57
 
58
+
59