Spaces:
Sleeping
Sleeping
File size: 3,259 Bytes
6f5d1ef 86c6428 99c4fdf 86c6428 99c4fdf 86c6428 99c4fdf 86c6428 99c4fdf 6f5d1ef 99c4fdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
from smolagents import Tool, DuckDuckGoSearchTool, PythonInterpreterTool, VisitWebpageTool, WikipediaSearchTool
from openai import OpenAI
import whisper
import base64
import os
class read_file(Tool):
name="read_file"
description="Read a file and return the content."
inputs={
"file_path": {
"type": "string",
"description": "The path to the file to read."
}
}
output_type = "string"
def forward(self, file_path: str) -> str:
"""
Read the content of a file and return it as a string.
"""
try:
with open(file_path, 'r') as file:
content = file.read()
return content
except Exception as e:
return f"Error reading file: {str(e)}"
class transcribe_audio(Tool):
name="transcribe_audio"
description="Transcribe an audio file and return the text."
inputs={
"audio_path": {
"type": "string",
"description": "The path to the audio file to transcribe."
}
}
output_type = "string"
def forward(self, audio_path: str) -> str:
try:
# Load the Whisper model
model = whisper.load_model("small")
# Transcribe the audio file
result = model.transcribe(audio_path)
return result['text']
except Exception as e:
return f"Error transcribing audio: {str(e)}"
def get_data_uri(image_path: str, base64_image: str):
_, file_extension = os.path.splitext(image_path)
file_extension = file_extension.lower().lstrip(".")
mime_type = f"image/{file_extension}"
data_uri = f"data:{mime_type};base64,{base64_image}"
return data_uri
class describe_image(Tool):
name="describe_image"
description="Describe an image and return the description."
inputs={
"image_path": {
"type": "string",
"description": "The path to the image file to describe."
}
}
output_type = "string"
def forward(self, image_path: str) -> str:
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
raise ValueError("OpenAI API key not provided and OPENAI_API_KEY environment variable not set")
base_url = os.getenv("OPENROUTER_BASE_URL")
client = OpenAI(api_key=api_key, base_url=base_url)
try:
with open(image_path, 'rb') as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
data_uri = get_data_uri(image_path, base64_image)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image in detail. Include information about the main subject, setting, colors, and any notable elements."},
{
"type": "image_url",
"image_url": {"url": data_uri}
}
]
}
],
max_tokens=500
)
return response.choices[0].message.content
except Exception as e:
return f"Error describing image: {str(e)}"
def return_tools() -> list[Tool]:
"""
Returns a list of tools to be used by the agent.
"""
return [
read_file(),
transcribe_audio(),
describe_image(),
DuckDuckGoSearchTool(),
PythonInterpreterTool(),
VisitWebpageTool(),
WikipediaSearchTool(),
]
|