Spaces:

ItzRoBeerT
/

GAIA_agent

Sleeping

File size: 3,259 Bytes

from smolagents import Tool, DuckDuckGoSearchTool, PythonInterpreterTool, VisitWebpageTool, WikipediaSearchTool
from openai import OpenAI
import whisper
import base64
import os

class read_file(Tool):
	name="read_file"
	description="Read a file and return the content."
	inputs={
		"file_path": {
			"type": "string",
			"description": "The path to the file to read."
		}
	}

	output_type = "string"

	def forward(self, file_path: str) -> str:
		"""
		Read the content of a file and return it as a string.
		"""
		try:
			with open(file_path, 'r') as file:
				content = file.read()
			return content
		except Exception as e:
			return f"Error reading file: {str(e)}"
		
class transcribe_audio(Tool):
	name="transcribe_audio"
	description="Transcribe an audio file and return the text."
	inputs={
		"audio_path": {
			"type": "string",
			"description": "The path to the audio file to transcribe."
		}
	}

	output_type = "string"

	def forward(self, audio_path: str) -> str:
		try:
			# Load the Whisper model
			model = whisper.load_model("small")
			# Transcribe the audio file
			result = model.transcribe(audio_path)
			return result['text']
		except Exception as e:
			return f"Error transcribing audio: {str(e)}"


def get_data_uri(image_path: str, base64_image: str):
	_, file_extension = os.path.splitext(image_path)
	file_extension = file_extension.lower().lstrip(".")
	mime_type = f"image/{file_extension}"
	data_uri = f"data:{mime_type};base64,{base64_image}"

	return data_uri

class describe_image(Tool):
	name="describe_image"
	description="Describe an image and return the description."
	inputs={
		"image_path": {
			"type": "string",
			"description": "The path to the image file to describe."
		}
	}

	output_type = "string"

	def forward(self, image_path: str) -> str:
		api_key = os.getenv("OPENROUTER_API_KEY")
		if not api_key:
			raise ValueError("OpenAI API key not provided and OPENAI_API_KEY environment variable not set")
		
		base_url = os.getenv("OPENROUTER_BASE_URL")
		client = OpenAI(api_key=api_key, base_url=base_url)

		try:
			with open(image_path, 'rb') as image_file:
				base64_image = base64.b64encode(image_file.read()).decode('utf-8')

				data_uri = get_data_uri(image_path, base64_image)

				response = client.chat.completions.create(
					model="gpt-4o",
					messages=[
						{
                            "role": "user",
                            "content": [
                                {"type": "text", "text": "Describe this image in detail. Include information about the main subject, setting, colors, and any notable elements."},
                                {
                                    "type": "image_url",
                                    "image_url": {"url": data_uri}
                                }
                            ]
                        }
					],
					max_tokens=500
				)

				return response.choices[0].message.content
		except Exception as e:
			return f"Error describing image: {str(e)}"
			

def return_tools() -> list[Tool]:
	"""
	Returns a list of tools to be used by the agent.
	"""
	return [
		read_file(),
		transcribe_audio(),
		describe_image(),
		DuckDuckGoSearchTool(),
		PythonInterpreterTool(),
		VisitWebpageTool(),
		WikipediaSearchTool(),
	]