File size: 3,259 Bytes
6f5d1ef
86c6428
99c4fdf
86c6428
 
99c4fdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86c6428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99c4fdf
 
 
 
 
 
 
86c6428
99c4fdf
6f5d1ef
 
 
99c4fdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from smolagents import Tool, DuckDuckGoSearchTool, PythonInterpreterTool, VisitWebpageTool, WikipediaSearchTool
from openai import OpenAI
import whisper
import base64
import os

class read_file(Tool):
	name="read_file"
	description="Read a file and return the content."
	inputs={
		"file_path": {
			"type": "string",
			"description": "The path to the file to read."
		}
	}

	output_type = "string"

	def forward(self, file_path: str) -> str:
		"""
		Read the content of a file and return it as a string.
		"""
		try:
			with open(file_path, 'r') as file:
				content = file.read()
			return content
		except Exception as e:
			return f"Error reading file: {str(e)}"
		
class transcribe_audio(Tool):
	name="transcribe_audio"
	description="Transcribe an audio file and return the text."
	inputs={
		"audio_path": {
			"type": "string",
			"description": "The path to the audio file to transcribe."
		}
	}

	output_type = "string"

	def forward(self, audio_path: str) -> str:
		try:
			# Load the Whisper model
			model = whisper.load_model("small")
			# Transcribe the audio file
			result = model.transcribe(audio_path)
			return result['text']
		except Exception as e:
			return f"Error transcribing audio: {str(e)}"


def get_data_uri(image_path: str, base64_image: str):
	_, file_extension = os.path.splitext(image_path)
	file_extension = file_extension.lower().lstrip(".")
	mime_type = f"image/{file_extension}"
	data_uri = f"data:{mime_type};base64,{base64_image}"

	return data_uri

class describe_image(Tool):
	name="describe_image"
	description="Describe an image and return the description."
	inputs={
		"image_path": {
			"type": "string",
			"description": "The path to the image file to describe."
		}
	}

	output_type = "string"

	def forward(self, image_path: str) -> str:
		api_key = os.getenv("OPENROUTER_API_KEY")
		if not api_key:
			raise ValueError("OpenAI API key not provided and OPENAI_API_KEY environment variable not set")
		
		base_url = os.getenv("OPENROUTER_BASE_URL")
		client = OpenAI(api_key=api_key, base_url=base_url)

		try:
			with open(image_path, 'rb') as image_file:
				base64_image = base64.b64encode(image_file.read()).decode('utf-8')

				data_uri = get_data_uri(image_path, base64_image)

				response = client.chat.completions.create(
					model="gpt-4o",
					messages=[
						{
                            "role": "user",
                            "content": [
                                {"type": "text", "text": "Describe this image in detail. Include information about the main subject, setting, colors, and any notable elements."},
                                {
                                    "type": "image_url",
                                    "image_url": {"url": data_uri}
                                }
                            ]
                        }
					],
					max_tokens=500
				)

				return response.choices[0].message.content
		except Exception as e:
			return f"Error describing image: {str(e)}"
			

def return_tools() -> list[Tool]:
	"""
	Returns a list of tools to be used by the agent.
	"""
	return [
		read_file(),
		transcribe_audio(),
		describe_image(),
		DuckDuckGoSearchTool(),
		PythonInterpreterTool(),
		VisitWebpageTool(),
		WikipediaSearchTool(),
	]