gaia_final_assignment

Sleeping

File size: 10,721 Bytes


import os
import base64
from smolagents import DuckDuckGoSearchTool, VisitWebpageTool, GoogleSearchTool
from smolagents.tools import tool

from config import config

# Tools

if not os.environ.get("SERPER_API_KEY"):
    print("---------------DEN VRIKA KEY-----------")
    print("---------------DEN VRIKA KEY-----------")
    simple_web_search_tool = DuckDuckGoSearchTool()
else:
    print("!!!!!!!!!!!!! VRIKA KEY !!!!!!!!!!!!!!!!")
    print("!!!!!!!!!!!!! VRIKA KEY !!!!!!!!!!!!!!!!")
    simple_web_search_tool = GoogleSearchTool("serper")

visit_web_page_tool = VisitWebpageTool()

@tool
def web_search_tool(query: str) -> str:
    """
    Given a question, search the web and return a summary answer.

    Args:
        query (str): The search query to look up.

    Returns:
        str: A relevant summary or result from DuckDuckGo.
    """
    try:
        url = "https://api.duckduckgo.com/"
        params = {"q": query, "format": "json", "no_html": 1}
        response = requests.get(url, params=params)
        data = response.json()

        if abstract := data.get("AbstractText"):
            return abstract
        elif related := data.get("RelatedTopics"):
            return related[0]["Text"] if related else "No result found."
        else:
            return "No relevant information found via DuckDuckGo."
    except Exception as e:
        raise RuntimeError(f"DuckDuckGo search failed: {str(e)}")

@tool
def image_analysis_tool(question: str, file_path: str) -> str:
    """
    Given a question and an image file path, analyze the image to answer the question.

    Args:
        question (str): A question about the image.
        file_path (str): Path to the image file.

    Returns:
        str: Answer to the question.

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        # Read and encode image to base64
        with open(file_path, "rb") as img_file:
            img_data = base64.b64encode(img_file.read()).decode("utf-8")

        # Format the content in a typical vision+text prompt format
        prompt = {
            "inputs": {
                "image": img_data,
                "question": question
            }
        }

        # You can return this dictionary directly if your model expects JSON format
        return prompt  # Actual agent model will process this
    except Exception as e:
        raise RuntimeError(f"Image analysis failed: {str(e)}")

@tool
def audio_analysis_tool(question: str, file_path: str) -> str:
    """
    Given a question and an audio file path, analyze the audio to answer the question.

    Args:
        question (str): A question about the audio.
        file_path (str): Path to the audio file.

    Returns:
        str: Structured prompt with audio and question (for agent model to process).

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        # Read and encode audio to base64
        with open(file_path, "rb") as audio_file:
            audio_data = base64.b64encode(audio_file.read()).decode("utf-8")

        # Format the content in a vision+text style prompt, adapted for audio
        prompt = {
            "inputs": {
                "audio": audio_data,
                "question": question
            }
        }

        return prompt  # The agent model will process this
    except Exception as e:
        raise RuntimeError(f"Audio analysis failed: {str(e)}")

@tool
def video_analysis_tool(question: str, file_path: str) -> str:
    """
    Given a question and a video file path, analyze the video to answer the question.

    Args:
        question (str): A question about the video.
        file_path (str): Path to the video file.

    Returns:
        str: Structured prompt with video and question (for agent model to process).

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        # Read and encode video to base64
        with open(file_path, "rb") as video_file:
            video_data = base64.b64encode(video_file.read()).decode("utf-8")

        # Format the content in a vision+text style prompt, adapted for video
        prompt = {
            "inputs": {
                "video": video_data,
                "question": question
            }
        }

        return prompt  # The agent model will process this
    except Exception as e:
        raise RuntimeError(f"Video analysis failed: {str(e)}")

@tool
def youtube_analysis_tool(question: str, url: str) -> str:
    """
    Given a question and a YouTube video URL, analyze the video to answer the question.

    Args:
        question (str): A question about the YouTube video.
        url (str): The YouTube URL.

    Returns:
        str: Structured prompt with URL and question (for agent model to process).

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        # Prepare structured input to be processed by the agent model
        prompt = {
            "inputs": {
                "youtube_url": url,
                "question": question
            }
        }

        return prompt  # The agent model will handle downloading and processing
    except Exception as e:
        raise RuntimeError(f"YouTube analysis failed: {str(e)}")

@tool
def document_analysis_tool(question: str, file_path: str) -> str:
    """
    Given a question and a document file path, analyze the document to answer the question.

    Args:
        question (str): A question about the document.
        file_path (str): Path to the document file.

    Returns:
        str: Structured prompt with document content and question (for agent model to process).

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        if is_ext(file_path, ".docx"):
            # Extract text from .docx files
            text_data = read_docx_text(file_path)
            prompt = {
                "inputs": {
                    "document_type": "docx",
                    "document_content": text_data,
                    "question": question
                }
            }
        elif is_ext(file_path, ".pptx"):
            # Extract text from .pptx files
            text_data = read_pptx_text(file_path)
            prompt = {
                "inputs": {
                    "document_type": "pptx",
                    "document_content": text_data,
                    "question": question
                }
            }
        else:
            # For PDFs or other binary files, encode to base64
            with open(file_path, "rb") as file:
                encoded_data = base64.b64encode(file.read()).decode("utf-8")
            prompt = {
                "inputs": {
                    "document_type": "binary",
                    "document_base64": encoded_data,
                    "question": question
                }
            }

        return prompt  # Agent model will handle document type accordingly
    except Exception as e:
        raise RuntimeError(f"Document analysis failed: {str(e)}")

@tool
def arithmetic_tool(question: str, a: float, b: float) -> dict:
    """
    Given a question and two numbers, perform the calculation to answer the question.

    Args:
        question (str): A natural language arithmetic question.
        a (float): First number.
        b (float): Second number.

    Returns:
        dict: Structured input for the model or agent to interpret and compute.

    Raises:
        RuntimeError: If input or processing fails.
    """
    try:
        prompt = {
            "inputs": {
                "question": question,
                "a": a,
                "b": b
            }
        }

        return prompt  # Let the model/agent evaluate and compute the result
    except Exception as e:
        raise RuntimeError(f"Arithmetic processing failed: {str(e)}")

@tool
def code_generation_tool(question: str, json_data: str) -> dict:
    """
    Given a question and JSON data, generate and execute code to answer the question.

    Args:
        question (str): The question to be answered.
        json_data (str): Input JSON data as a string.

    Returns:
        dict: Structured input for the agent or model to process and respond.

    Raises:
        RuntimeError: If formatting or processing fails.
    """
    try:
        prompt = {
            "inputs": {
                "question": question,
                "json_data": json_data
            }
        }

        return prompt  # Model or code-executing agent will handle the execution logic
    except Exception as e:
        raise RuntimeError(f"Code generation processing failed: {str(e)}")

@tool
def code_execution_tool(question: str, file_path: str) -> dict:
    """
    Given a question and a Python file, prepare code execution context to answer the question.

    Args:
        question (str): The question to be answered.
        file_path (str): Path to the Python file.

    Returns:
        dict: Structured input with base64-encoded file and question.

    Raises:
        RuntimeError: If encoding or file handling fails.
    """
    try:
        # Read and encode the Python file
        with open(file_path, "rb") as py_file:
            code_data = base64.b64encode(py_file.read()).decode("utf-8")

        # Construct prompt structure
        prompt = {
            "inputs": {
                "question": question,
                "python_file": code_data,
                "file_name": os.path.basename(file_path)
            }
        }

        return prompt  # Model/agent will handle execution and answer
    except Exception as e:
        raise RuntimeError(f"Code execution processing failed: {str(e)}")

@tool
def add(a: float, b: float) -> float:
    """Add two numbers.
    
    Args:
        a: First number
        b: Second number
    Returns:
        Result number
    """
    return a + b

@tool
def subtract(a: float, b: float) -> float:
    """Subtract two numbers.
    
    Args:
        a: First number
        b: Second number
    Returns:
        Result number
    """
    return a - b

@tool
def multiply(a: float, b: float) -> float:
    """Multiply two numbers.
    Args:
        a: First number
        b: Second number
    Returns:
        Result number
    """
    return a * b

@tool
def divide(a: float, b: float) -> float:
    """Divide two numbers.
    
    Args:
        a: First number
        b: Second number
    Returns:
        Result number
    """
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

@tool
def modulus(a: float, b: float) -> float:
    """Get the modulus of two numbers.
    
    Args:
        a: First number
        b: Second number
    Returns:
        Result number
    """
    return a % b