File size: 3,053 Bytes
9ac9d5e
64c3879
 
9ac9d5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64c3879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9182c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

import os
import base64
from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
from smolagents.tools import tool

# Tools

simple_web_search_tool = DuckDuckGoSearchTool()
visit_web_page_tool = VisitWebpageTool()

@tool
def web_search_tool(query: str) -> str:
    """
    Given a question, search the web and return a summary answer.

    Args:
        query (str): The search query to look up.

    Returns:
        str: A relevant summary or result from DuckDuckGo.
    """
    try:
        url = "https://api.duckduckgo.com/"
        params = {"q": query, "format": "json", "no_html": 1}
        response = requests.get(url, params=params)
        data = response.json()

        if abstract := data.get("AbstractText"):
            return abstract
        elif related := data.get("RelatedTopics"):
            return related[0]["Text"] if related else "No result found."
        else:
            return "No relevant information found via DuckDuckGo."
    except Exception as e:
        raise RuntimeError(f"DuckDuckGo search failed: {str(e)}")

@tool
def image_analysis_tool(question: str, file_path: str) -> str:
    """
    Given a question and an image file path, analyze the image to answer the question.

    Args:
        question (str): A question about the image.
        file_path (str): Path to the image file.

    Returns:
        str: Answer to the question.

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        # Read and encode image to base64
        with open(file_path, "rb") as img_file:
            img_data = base64.b64encode(img_file.read()).decode("utf-8")

        # Format the content in a typical vision+text prompt format
        prompt = {
            "inputs": {
                "image": img_data,
                "question": question
            }
        }

        # You can return this dictionary directly if your model expects JSON format
        return prompt  # Actual agent model will process this
    except Exception as e:
        raise RuntimeError(f"Image analysis failed: {str(e)}")

@tool
def audio_analysis_tool(question: str, file_path: str) -> str:
    """
    Given a question and an audio file path, analyze the audio to answer the question.

    Args:
        question (str): A question about the audio.
        file_path (str): Path to the audio file.

    Returns:
        str: Structured prompt with audio and question (for agent model to process).

    Raises:
        RuntimeError: If processing fails.
    """
    try:
        # Read and encode audio to base64
        with open(file_path, "rb") as audio_file:
            audio_data = base64.b64encode(audio_file.read()).decode("utf-8")

        # Format the content in a vision+text style prompt, adapted for audio
        prompt = {
            "inputs": {
                "audio": audio_data,
                "question": question
            }
        }

        return prompt  # The agent model will process this
    except Exception as e:
        raise RuntimeError(f"Audio analysis failed: {str(e)}")