File size: 5,311 Bytes
7acb2e7
5675d05
7acb2e7
 
 
 
 
5675d05
7acb2e7
5675d05
 
 
7acb2e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5675d05
 
 
 
 
 
7acb2e7
5675d05
7acb2e7
 
 
 
5675d05
 
 
 
 
7acb2e7
 
 
 
 
 
 
 
 
5675d05
 
 
7acb2e7
 
 
 
 
 
 
 
5675d05
 
7acb2e7
 
5675d05
 
7acb2e7
 
 
 
 
 
 
5675d05
 
7acb2e7
 
 
 
5675d05
7acb2e7
 
 
 
 
 
 
 
 
 
5675d05
7acb2e7
 
5675d05
 
 
 
 
 
 
 
 
 
7acb2e7
 
 
 
 
 
 
 
 
 
 
 
 
5675d05
7acb2e7
5675d05
 
 
 
 
 
 
 
7acb2e7
 
5675d05
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

# a multi agent proposal to solve HF agent course final assignment
import os
import dotenv
from smolagents import CodeAgent, ToolCallingAgent
from smolagents import OpenAIServerModel
from tools.fetch import fetch_webpage, search_web
from smolagents import PythonInterpreterTool
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
from tools.stt import stt
from tools.image import analyze_image
from tools.mylogger import save_file_with_timestamp, mylog
import myprompts

dotenv.load_dotenv()

gemini_model = OpenAIServerModel(
    model_id="gemini-2.0-flash",
    api_key=os.environ["GEMINI_API_KEY"],
    # Google Gemini OpenAI-compatible API base URL
    api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
)

vllm_model = OpenAIServerModel(
    model_id="Qwen/Qwen2.5-1.5B-Instruct",
    api_base="http://192.168.1.39:18000/v1",  
    api_key="token-abc123",  
)

openai_41nano_model = OpenAIServerModel(
    model_id="gpt-4.1-nano",
    api_base="https://api.openai.com/v1",
    api_key=os.environ["OPENAI_API_KEY"],
)

openai_41mini_model = OpenAIServerModel(
    model_id="gpt-4.1-mini",
    api_base="https://api.openai.com/v1",
    api_key=os.environ["OPENAI_API_KEY"],
)


def check_final_answer(final_answer, agent_memory)  -> bool:
    """

    Check if the final answer is correct.

    This is a placeholder function. You can implement your own logic here.

    """
    # if return answer is more than 200 characters, we will assume it is not correct    
    if len(str(final_answer)) > 200:
        return False
    else:
        return True


web_agent = CodeAgent(
    model=openai_41nano_model,
    tools=[
        search_web,
        fetch_webpage,                
    ],
    name="web_agent",
    description="Use search engine to find webpages related to a subject and get the page content",
    additional_authorized_imports=["pandas", "numpy","bs4"],
    verbosity_level=1,    
    max_steps=7,
)

audiovideo_agent = CodeAgent(
    model=openai_41nano_model,
    tools=[
        get_youtube_transcript,
        get_youtube_title_description,
        stt,
        analyze_image
    ],
    name="audiovideo_agent",
    description="Extracts information from image, video or audio files from the web",
    additional_authorized_imports=["pandas", "numpy","bs4", "requests"],
    verbosity_level=1,
    max_steps=7,
)



manager_agent = CodeAgent(
    model=openai_41mini_model,
    tools=[ PythonInterpreterTool()],
    managed_agents=[web_agent, audiovideo_agent],    
    additional_authorized_imports=["pandas", "numpy","bs4"],
    planning_interval=5,
    verbosity_level=2,
    final_answer_checks=[check_final_answer],
    max_steps=15,
    name="manager_agent",
    description="A manager agent that coordinates the work of other agents to answer questions.",
)

class MultiAgent:
    def __init__(self):
        print("BasicAgent initialized.")

    def __call__(self, question: str) -> str:
        mylog(self.__class__.__name__, question)        

        try:
            prefix = """You are the top agent of a multi-agent system that can answer questions by coordinating the work of other agents.

            You will receive a question and you will decide which agent to use to answer it.

            You can use the web_agent to search the web for information and for fetching the content of a web page, or the audiovideo_agent to extract information from video or audio files.

            You can also use your own knowledge to answer the question.

            You need to respect the output format that is given to you.

            Finding the correct answer to the question need reasoning and plannig, read the question carrefully, think step by step and do not skip any steps.

            """

            question = prefix + "\nTHE QUESTION:\n" + question + '\n' + myprompts.output_format

            fixed_answer = ""

            fixed_answer = manager_agent.run(question)
            
            return fixed_answer
        except Exception as e:
            error = f"An error occurred while processing the question: {e}"
            print(error)
            return error


if __name__ == "__main__":
    # Example usage
    """"

    question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."

    """
    question = """

Hi, I was out sick from my classes on Friday, so I'm trying to figure out what I need to study for my Calculus mid-term next week. My friend from class sent me an audio recording of Professor Willowbrook giving out the recommended reading for the test, but my headphones are broken :(



Could you please listen to the recording for me and tell me the page numbers I'm supposed to go over? I've attached a file called Homework.mp3 that has the recording. Please provide just the page numbers as a comma-delimited list. And please provide the list in ascending order.



File URL: https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3

"""
    agent = MultiAgent()
    answer = agent(question)
    print(f"Answer: {answer}")