gaia_final_assignment

Sleeping

File size: 3,053 Bytes


import os
import base64
from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
from smolagents.tools import tool

# Tools

simple_web_search_tool = DuckDuckGoSearchTool()
visit_web_page_tool = VisitWebpageTool()

@tool
def web_search_tool(query: str) -> str:
    """
    Given a question, search the web and return a summary answer.

    Args:
        query (str): The search query to look up.

    Returns:
        str: A relevant summary or result from DuckDuckGo.
    """
    try:
        url = "https://api.duckduckgo.com/"
        params = {"q": query, "format": "json", "no_html": 1}
        response = requests.get(url, params=params)
        data = response.json()

        if abstract := data.get("AbstractText"):
            return abstract
        elif related := data.get("RelatedTopics"):
            return related[0]["Text"] if related else "No result found."
        else:
            return "No relevant information found via DuckDuckGo."
    except Exception as e:
        raise RuntimeError(f"DuckDuckGo search failed: {str(e)}")

@tool
def image_analysis_tool(question: str, file_path: str) -> str:
    """
    Given a question and an image file path, analyze the image to answer the question.

    Args:
        question (str): A question about the image.
        file_path (str): Path to the image file.

    Returns:
        str: Answer to the question.

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        # Read and encode image to base64
        with open(file_path, "rb") as img_file:
            img_data = base64.b64encode(img_file.read()).decode("utf-8")

        # Format the content in a typical vision+text prompt format
        prompt = {
            "inputs": {
                "image": img_data,
                "question": question
            }
        }

        # You can return this dictionary directly if your model expects JSON format
        return prompt  # Actual agent model will process this
    except Exception as e:
        raise RuntimeError(f"Image analysis failed: {str(e)}")

@tool
def audio_analysis_tool(question: str, file_path: str) -> str:
    """
    Given a question and an audio file path, analyze the audio to answer the question.

    Args:
        question (str): A question about the audio.
        file_path (str): Path to the audio file.

    Returns:
        str: Structured prompt with audio and question (for agent model to process).

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        # Read and encode audio to base64
        with open(file_path, "rb") as audio_file:
            audio_data = base64.b64encode(audio_file.read()).decode("utf-8")

        # Format the content in a vision+text style prompt, adapted for audio
        prompt = {
            "inputs": {
                "audio": audio_data,
                "question": question
            }
        }

        return prompt  # The agent model will process this
    except Exception as e:
        raise RuntimeError(f"Audio analysis failed: {str(e)}")