File size: 4,605 Bytes
32bb869 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# Libs
import os
import requests
import pandas as pd
import google.genai as genai
import base64
from openai import OpenAI
from smolagents import Tool
# Local
from consts import DEFAULT_API_URL
# Dynamic model ID
try:
from app import _SELECTED_MODEL_ID
if not _SELECTED_MODEL_ID:
raise ImportError("Model ID not set in app.py")
except ImportError:
_SELECTED_MODEL_ID = "gpt-4.1-mini"
class GetTaskFileTool(Tool):
name = "get_task_file_tool"
description = """This tool downloads the file content associated with the given task_id if exists. Returns absolute file path"""
inputs = {
"task_id": {"type": "string", "description": "Task id"},
"file_name": {"type": "string", "description": "File name"},
}
output_type = "string"
def forward(self, task_id: str, file_name: str) -> str:
response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
response.raise_for_status()
with open(file_name, 'wb') as file:
file.write(response.content)
return os.path.abspath(file_name)
class LoadXlsxFileTool(Tool):
name = "load_xlsx_file_tool"
description = """This tool loads xlsx file into pandas and returns it"""
inputs = {
"file_path": {"type": "string", "description": "File path"}
}
output_type = "object"
def forward(self, file_path: str) -> object:
return pd.read_excel(file_path)
class LoadTextFileTool(Tool):
name = "load_text_file_tool"
description = """This tool loads any text file"""
inputs = {
"file_path": {"type": "string", "description": "File path"}
}
output_type = "string"
def forward(self, file_path: str) -> object:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
class AnalyzeImageTool(Tool):
name = "analyze_image_tool"
description = """This tool performs a custom analysis of the provided image and returns the corresponding result."""
inputs = {
"image_path": {"type": "string", "description": "Image path"},
"task": {"type": "string", "description": "Task to perform on the image, be detailed and clear"},
}
output_type = "string"
def __init__(self, model_id=None):
super().__init__()
self.model_id = model_id or "gpt-4.1-mini"
def forward(self, image_path: str, task: str) -> str:
"""
Analyze the image at `image_path` according to `task` and return the textual result.
"""
header = "Image analysis result:\n\n"
llm_instruction = (
"You are a highly capable image analysis tool, designed to examine images and deliver detailed descriptions, "
"insights, and relevant interpretations based on the task at hand.\n\n"
"Approach the task methodically and provide a thorough and well-reasoned response to the following:\n\n---\nTask:\n"
f"{task}\n\n"
)
try:
if "gemini" in self.model_id:
return header + self._analyze_with_gemini(image_path, llm_instruction)
return header + self._analyze_with_openai(image_path, llm_instruction)
except Exception as e:
return f"Error analyzing image: {e}.\nPlease try again."
def _analyze_with_gemini(self, image_path: str, task: str) -> str:
api_key = os.getenv("GOOGLEAI_API_KEY")
if not api_key:
raise ValueError("Environment variable GOOGLEAI_API_KEY is not set.")
client = genai.Client(api_key=api_key)
with open(image_path, "rb") as f:
image_data = f.read()
contents = [
{"inline_data": {"mime_type": "image/jpeg", "data": image_data}},
{"text": task},
]
response = client.models.generate_content(model=self.model_id, contents=contents)
return response.candidates[0].content.parts[0].text
def _analyze_with_openai(self, image_path: str, task: str) -> str:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
with open(image_path, "rb") as f:
encoded_image = base64.b64encode(f.read()).decode("utf-8")
payload = [
{
"role": "user",
"content": [
{"type": "input_text", "text": task},
{"type": "input_image", "image_url": f"data:image/jpeg;base64,{encoded_image}"},
],
}
]
response = client.responses.create(model=self.model_id, input=payload)
return response.output[0].content[0].text
|