agents-course-Final_Assignment

Paused

File size: 4,605 Bytes

32bb869

# Libs
import os
import requests
import pandas as pd
import google.genai as genai
import base64
from openai import OpenAI
from smolagents import Tool

# Local
from consts import DEFAULT_API_URL

# Dynamic model ID
try:
    from app import _SELECTED_MODEL_ID
    if not _SELECTED_MODEL_ID:
        raise ImportError("Model ID not set in app.py")
except ImportError:
    _SELECTED_MODEL_ID = "gpt-4.1-mini"

class GetTaskFileTool(Tool):
    name = "get_task_file_tool"
    description = """This tool downloads the file content associated with the given task_id if exists. Returns absolute file path"""
    inputs = {
        "task_id": {"type": "string", "description": "Task id"},
        "file_name": {"type": "string", "description": "File name"},
    }
    output_type = "string"

    def forward(self, task_id: str, file_name: str) -> str:
        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
        response.raise_for_status()
        with open(file_name, 'wb') as file:
            file.write(response.content)
        return os.path.abspath(file_name)

class LoadXlsxFileTool(Tool):
    name = "load_xlsx_file_tool"
    description = """This tool loads xlsx file into pandas and returns it"""
    inputs = {
        "file_path": {"type": "string", "description": "File path"}
    }
    output_type = "object"

    def forward(self, file_path: str) -> object:
        return pd.read_excel(file_path)

class LoadTextFileTool(Tool):
    name = "load_text_file_tool"
    description = """This tool loads any text file"""
    inputs = {
        "file_path": {"type": "string", "description": "File path"}
    }
    output_type = "string"

    def forward(self, file_path: str) -> object:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()

class AnalyzeImageTool(Tool):
    name = "analyze_image_tool"
    description = """This tool performs a custom analysis of the provided image and returns the corresponding result."""
    inputs = {
        "image_path": {"type": "string", "description": "Image path"},
        "task": {"type": "string", "description": "Task to perform on the image, be detailed and clear"},
    }
    output_type = "string"
    
    def __init__(self, model_id=None):
        super().__init__()
        self.model_id = model_id or "gpt-4.1-mini"
    
    def forward(self, image_path: str, task: str) -> str:
        """
        Analyze the image at `image_path` according to `task` and return the textual result.
        """
        header = "Image analysis result:\n\n"
        llm_instruction = (
            "You are a highly capable image analysis tool, designed to examine images and deliver detailed descriptions, "
            "insights, and relevant interpretations based on the task at hand.\n\n"
            "Approach the task methodically and provide a thorough and well-reasoned response to the following:\n\n---\nTask:\n"
            f"{task}\n\n"
        )
        try:
            if "gemini" in self.model_id:
                return header + self._analyze_with_gemini(image_path, llm_instruction)
            return header + self._analyze_with_openai(image_path, llm_instruction)
        except Exception as e:
            return f"Error analyzing image: {e}.\nPlease try again."

    def _analyze_with_gemini(self, image_path: str, task: str) -> str:
        api_key = os.getenv("GOOGLEAI_API_KEY")
        if not api_key:
            raise ValueError("Environment variable GOOGLEAI_API_KEY is not set.")
        client = genai.Client(api_key=api_key)

        with open(image_path, "rb") as f:
            image_data = f.read()

        contents = [
            {"inline_data": {"mime_type": "image/jpeg", "data": image_data}},
            {"text": task},
        ]
        response = client.models.generate_content(model=self.model_id, contents=contents)
        return response.candidates[0].content.parts[0].text

    def _analyze_with_openai(self, image_path: str, task: str) -> str:
        client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

        with open(image_path, "rb") as f:
            encoded_image = base64.b64encode(f.read()).decode("utf-8")

        payload = [
            {
                "role": "user",
                "content": [
                    {"type": "input_text", "text": task},
                    {"type": "input_image", "image_url": f"data:image/jpeg;base64,{encoded_image}"},
                ],
            }
        ]
        response = client.responses.create(model=self.model_id, input=payload)
        return response.output[0].content[0].text