Denis Davydov
commited on
Commit
·
f9a7c9b
1
Parent(s):
81917a3
initial
Browse files- .gitignore +176 -0
- agent.py +146 -0
- app.py +8 -17
- docs/additional-readings.mdx +30 -0
- docs/conclusion.mdx +11 -0
- docs/get-your-certificate.mdx +24 -0
- docs/hands-on.mdx +52 -0
- docs/introduction.mdx +23 -0
- docs/what-is-gaia.mdx +70 -0
- requirements.txt +17 -1
- test_local.py +137 -0
- tools.py +180 -0
- utils.py +124 -0
.gitignore
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# UV
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
#uv.lock
|
102 |
+
|
103 |
+
# poetry
|
104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106 |
+
# commonly ignored for libraries.
|
107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108 |
+
#poetry.lock
|
109 |
+
|
110 |
+
# pdm
|
111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
112 |
+
#pdm.lock
|
113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
114 |
+
# in version control.
|
115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
116 |
+
.pdm.toml
|
117 |
+
.pdm-python
|
118 |
+
.pdm-build/
|
119 |
+
|
120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
121 |
+
__pypackages__/
|
122 |
+
|
123 |
+
# Celery stuff
|
124 |
+
celerybeat-schedule
|
125 |
+
celerybeat.pid
|
126 |
+
|
127 |
+
# SageMath parsed files
|
128 |
+
*.sage.py
|
129 |
+
|
130 |
+
# Environments
|
131 |
+
.env
|
132 |
+
.venv
|
133 |
+
env/
|
134 |
+
venv/
|
135 |
+
ENV/
|
136 |
+
env.bak/
|
137 |
+
venv.bak/
|
138 |
+
|
139 |
+
# Spyder project settings
|
140 |
+
.spyderproject
|
141 |
+
.spyproject
|
142 |
+
|
143 |
+
# Rope project settings
|
144 |
+
.ropeproject
|
145 |
+
|
146 |
+
# mkdocs documentation
|
147 |
+
/site
|
148 |
+
|
149 |
+
# mypy
|
150 |
+
.mypy_cache/
|
151 |
+
.dmypy.json
|
152 |
+
dmypy.json
|
153 |
+
|
154 |
+
# Pyre type checker
|
155 |
+
.pyre/
|
156 |
+
|
157 |
+
# pytype static type analyzer
|
158 |
+
.pytype/
|
159 |
+
|
160 |
+
# Cython debug symbols
|
161 |
+
cython_debug/
|
162 |
+
|
163 |
+
# PyCharm
|
164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
168 |
+
#.idea/
|
169 |
+
|
170 |
+
# Ruff stuff:
|
171 |
+
.ruff_cache/
|
172 |
+
|
173 |
+
# PyPI configuration file
|
174 |
+
.pypirc
|
175 |
+
|
176 |
+
.DS_Store
|
agent.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import TypedDict, Annotated
|
2 |
+
import os
|
3 |
+
from langgraph.graph.message import add_messages
|
4 |
+
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
|
5 |
+
from langgraph.prebuilt import ToolNode
|
6 |
+
from langgraph.graph import START, StateGraph
|
7 |
+
from langgraph.checkpoint.memory import MemorySaver
|
8 |
+
from langgraph.prebuilt import tools_condition
|
9 |
+
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
|
10 |
+
from tools import agent_tools
|
11 |
+
from utils import format_gaia_answer, analyze_question_type, create_execution_plan, log_agent_step
|
12 |
+
|
13 |
+
# Initialize LLM (same as unit3)
|
14 |
+
llm = HuggingFaceEndpoint(
|
15 |
+
repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
16 |
+
huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN"),
|
17 |
+
temperature=0.1,
|
18 |
+
max_new_tokens=1024,
|
19 |
+
)
|
20 |
+
|
21 |
+
chat = ChatHuggingFace(llm=llm, verbose=True)
|
22 |
+
chat_with_tools = chat.bind_tools(agent_tools)
|
23 |
+
|
24 |
+
# System prompt for intelligent question answering
|
25 |
+
SYSTEM_PROMPT = """You are a highly capable AI assistant designed to answer questions accurately and helpfully.
|
26 |
+
|
27 |
+
Your approach should include:
|
28 |
+
- Multi-step reasoning and planning for complex questions
|
29 |
+
- Intelligent tool usage when needed for web search, file processing, calculations, and analysis
|
30 |
+
- Precise, factual answers based on reliable information
|
31 |
+
- Breaking down complex questions into manageable steps
|
32 |
+
|
33 |
+
IMPORTANT GUIDELINES:
|
34 |
+
1. Think step-by-step and use available tools when they can help provide better answers
|
35 |
+
2. For current information: Search the web for up-to-date facts
|
36 |
+
3. For files: Process associated files when task_id is provided
|
37 |
+
4. For visual content: Analyze images carefully when present
|
38 |
+
5. For calculations: Use computational tools for accuracy
|
39 |
+
6. Provide concise, direct answers without unnecessary prefixes
|
40 |
+
7. Focus on accuracy and helpfulness
|
41 |
+
8. Be factual and avoid speculation
|
42 |
+
|
43 |
+
Your goal is to be as helpful and accurate as possible while using the right tools for each task."""
|
44 |
+
|
45 |
+
# Generate the AgentState
|
46 |
+
class AgentState(TypedDict):
|
47 |
+
messages: Annotated[list[AnyMessage], add_messages]
|
48 |
+
task_id: str
|
49 |
+
question_analysis: dict
|
50 |
+
|
51 |
+
def assistant(state: AgentState):
|
52 |
+
"""Main assistant function that processes messages and calls tools."""
|
53 |
+
messages = state["messages"]
|
54 |
+
|
55 |
+
# Add system prompt if not already present
|
56 |
+
if not any(isinstance(msg, SystemMessage) for msg in messages):
|
57 |
+
messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
|
58 |
+
|
59 |
+
# Get the response from the LLM
|
60 |
+
response = chat_with_tools.invoke(messages)
|
61 |
+
|
62 |
+
return {
|
63 |
+
"messages": [response],
|
64 |
+
}
|
65 |
+
|
66 |
+
def create_smart_agent():
|
67 |
+
"""Create and return the smart agent graph."""
|
68 |
+
# Build the graph
|
69 |
+
builder = StateGraph(AgentState)
|
70 |
+
|
71 |
+
# Define nodes
|
72 |
+
builder.add_node("assistant", assistant)
|
73 |
+
builder.add_node("tools", ToolNode(agent_tools))
|
74 |
+
|
75 |
+
# Define edges
|
76 |
+
builder.add_edge(START, "assistant")
|
77 |
+
builder.add_conditional_edges(
|
78 |
+
"assistant",
|
79 |
+
tools_condition,
|
80 |
+
)
|
81 |
+
builder.add_edge("tools", "assistant")
|
82 |
+
|
83 |
+
# Add memory
|
84 |
+
memory = MemorySaver()
|
85 |
+
agent = builder.compile(checkpointer=memory)
|
86 |
+
|
87 |
+
return agent
|
88 |
+
|
89 |
+
class SmartAgent:
|
90 |
+
"""High-level intelligent agent class that wraps the LangGraph agent."""
|
91 |
+
|
92 |
+
def __init__(self):
|
93 |
+
self.agent = create_smart_agent()
|
94 |
+
print("🤖 Smart Agent initialized with LangGraph and tools")
|
95 |
+
|
96 |
+
def __call__(self, question: str, task_id: str = None) -> str:
|
97 |
+
"""Process a question and return the formatted answer."""
|
98 |
+
try:
|
99 |
+
print(f"\n🎯 Processing question: {question[:100]}...")
|
100 |
+
|
101 |
+
# Analyze the question
|
102 |
+
analysis = analyze_question_type(question)
|
103 |
+
print(f"📊 Question analysis: {analysis}")
|
104 |
+
|
105 |
+
# Create execution plan
|
106 |
+
plan = create_execution_plan(question, task_id)
|
107 |
+
print(f"📋 Execution plan: {plan}")
|
108 |
+
|
109 |
+
# Prepare the question with task_id context if available
|
110 |
+
enhanced_question = question
|
111 |
+
if task_id:
|
112 |
+
enhanced_question = f"Task ID: {task_id}\n\nQuestion: {question}\n\nNote: If this question involves files, use the file_download tool with task_id '{task_id}' to access associated files."
|
113 |
+
|
114 |
+
# Invoke the agent
|
115 |
+
thread_id = f"task-{task_id}" if task_id else "general"
|
116 |
+
config = {"configurable": {"thread_id": thread_id}}
|
117 |
+
|
118 |
+
initial_state = {
|
119 |
+
"messages": [HumanMessage(content=enhanced_question)],
|
120 |
+
"task_id": task_id or "",
|
121 |
+
"question_analysis": analysis
|
122 |
+
}
|
123 |
+
|
124 |
+
result = self.agent.invoke(initial_state, config=config)
|
125 |
+
|
126 |
+
# Extract the final answer
|
127 |
+
if result and 'messages' in result and result['messages']:
|
128 |
+
final_message = result['messages'][-1]
|
129 |
+
raw_answer = final_message.content
|
130 |
+
else:
|
131 |
+
raw_answer = "No response generated"
|
132 |
+
|
133 |
+
# Format the answer for submission
|
134 |
+
formatted_answer = format_gaia_answer(raw_answer)
|
135 |
+
|
136 |
+
print(f"✅ Raw answer: {raw_answer}")
|
137 |
+
print(f"🎯 Formatted answer: {formatted_answer}")
|
138 |
+
|
139 |
+
return formatted_answer
|
140 |
+
|
141 |
+
except Exception as e:
|
142 |
+
error_msg = f"Error processing question: {str(e)}"
|
143 |
+
print(f"❌ {error_msg}")
|
144 |
+
return error_msg
|
145 |
+
|
146 |
+
smart_agent = SmartAgent()
|
app.py
CHANGED
@@ -3,22 +3,12 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
|
|
6 |
|
7 |
# (Keep Constants as is)
|
8 |
# --- Constants ---
|
9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
10 |
|
11 |
-
# --- Basic Agent Definition ---
|
12 |
-
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
13 |
-
class BasicAgent:
|
14 |
-
def __init__(self):
|
15 |
-
print("BasicAgent initialized.")
|
16 |
-
def __call__(self, question: str) -> str:
|
17 |
-
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
18 |
-
fixed_answer = "This is a default answer."
|
19 |
-
print(f"Agent returning fixed answer: {fixed_answer}")
|
20 |
-
return fixed_answer
|
21 |
-
|
22 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
23 |
"""
|
24 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
@@ -38,12 +28,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
38 |
questions_url = f"{api_url}/questions"
|
39 |
submit_url = f"{api_url}/submit"
|
40 |
|
41 |
-
# 1.
|
42 |
try:
|
43 |
-
agent =
|
|
|
44 |
except Exception as e:
|
45 |
-
print(f"Error
|
46 |
-
return f"Error initializing agent: {e}", None
|
47 |
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
48 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
49 |
print(agent_code)
|
@@ -80,7 +71,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
80 |
print(f"Skipping item with missing task_id or question: {item}")
|
81 |
continue
|
82 |
try:
|
83 |
-
submitted_answer = agent(question_text)
|
84 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
85 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
86 |
except Exception as e:
|
@@ -193,4 +184,4 @@ if __name__ == "__main__":
|
|
193 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
194 |
|
195 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
196 |
-
demo.launch(debug=True, share=False)
|
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
+
from agent import smart_agent
|
7 |
|
8 |
# (Keep Constants as is)
|
9 |
# --- Constants ---
|
10 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
13 |
"""
|
14 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
|
28 |
questions_url = f"{api_url}/questions"
|
29 |
submit_url = f"{api_url}/submit"
|
30 |
|
31 |
+
# 1. Use Smart Agent (imported from agent.py)
|
32 |
try:
|
33 |
+
agent = smart_agent
|
34 |
+
print("Smart Agent loaded successfully.")
|
35 |
except Exception as e:
|
36 |
+
print(f"Error loading smart agent: {e}")
|
37 |
+
return f"Error initializing smart agent: {e}", None
|
38 |
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
39 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
40 |
print(agent_code)
|
|
|
71 |
print(f"Skipping item with missing task_id or question: {item}")
|
72 |
continue
|
73 |
try:
|
74 |
+
submitted_answer = agent(question_text, task_id)
|
75 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
76 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
77 |
except Exception as e:
|
|
|
184 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
185 |
|
186 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
187 |
+
demo.launch(debug=True, share=False)
|
docs/additional-readings.mdx
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# And now? What topics I should learn?
|
2 |
+
|
3 |
+
Agentic AI is a rapidly evolving field, and understanding foundational protocols is essential for building intelligent, autonomous systems.
|
4 |
+
|
5 |
+
Two important standards you should get familiar with are:
|
6 |
+
|
7 |
+
- The **Model Context Protocol (MCP)**
|
8 |
+
- The **Agent-to-Agent Protocol (A2A)**
|
9 |
+
|
10 |
+
## 🔌 Model Context Protocol (MCP)
|
11 |
+
|
12 |
+
The **Model Context Protocol (MCP)** by Anthropic is an open standard that enables AI models to securely and seamlessly **connect with external tools, data sources, and applications**, making agents more capable and autonomous.
|
13 |
+
|
14 |
+
Think of MCP as a **universal adapter**, like a USB-C port, that allows AI models to plug into various digital environments **without needing custom integration for each one**.
|
15 |
+
|
16 |
+
MCP is quickly gaining traction across the industry, with major companies like OpenAI and Google beginning to adopt it.
|
17 |
+
|
18 |
+
📚 Learn more:
|
19 |
+
- [Anthropic's official announcement and documentation](https://www.anthropic.com/news/model-context-protocol)
|
20 |
+
- [MCP on Wikipedia](https://en.wikipedia.org/wiki/Model_Context_Protocol)
|
21 |
+
- [Blog on MCP](https://huggingface.co/blog/Kseniase/mcp)
|
22 |
+
|
23 |
+
## 🤝 Agent-to-Agent (A2A) Protocol
|
24 |
+
|
25 |
+
Google has developed the **Agent-to-Agent (A2A) protocol** as a complementary counterpart to Anthropic's Model Context Protocol (MCP).
|
26 |
+
|
27 |
+
While MCP connects agents to external tools, **A2A connects agents to each other**, paving the way for cooperative, multi-agent systems that can work together to solve complex problems.
|
28 |
+
|
29 |
+
📚 Dive deeper into A2A:
|
30 |
+
- [Google’s A2A announcement](https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/)
|
docs/conclusion.mdx
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Conclusion
|
2 |
+
|
3 |
+
**Congratulations on finishing the Agents Course!**
|
4 |
+
|
5 |
+
Through perseverance and dedication, you’ve built a solid foundation in the world of AI Agents.
|
6 |
+
|
7 |
+
But finishing this course is **not the end of your journey**. It’s just the beginning: don’t hesitate to explore the next section where we share curated resources to help you continue learning, including advanced topics like **MCPs** and beyond.
|
8 |
+
|
9 |
+
**Thank you** for being part of this course. **We hope you liked this course as much as we loved writing it**.
|
10 |
+
|
11 |
+
And don’t forget: **Keep Learning, Stay Awesome 🤗**
|
docs/get-your-certificate.mdx
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Claim Your Certificate 🎓
|
2 |
+
|
3 |
+
If you scored **above 30%, congratulations! 👏 You're now eligible to claim your official certificate.**
|
4 |
+
|
5 |
+
Follow the steps below to receive it:
|
6 |
+
|
7 |
+
1. Visit the [certificate page](https://huggingface.co/spaces/agents-course/Unit4-Final-Certificate).
|
8 |
+
2. **Sign in** with your Hugging Face account using the button provided.
|
9 |
+
3. **Enter your full name**. This is the name that will appear on your certificate.
|
10 |
+
4. Click **“Get My Certificate”** to verify your score and download your certificate.
|
11 |
+
|
12 |
+
<img src="https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit4/congrats.png" alt="Congrats!" />
|
13 |
+
|
14 |
+
Once you’ve got your certificate, feel free to:
|
15 |
+
- Add it to your **LinkedIn profile** 🧑💼
|
16 |
+
- Share it on **X**, **Bluesky**, etc. 🎉
|
17 |
+
|
18 |
+
**Don’t forget to tag [@huggingface](https://huggingface.co/huggingface). We’d be super proud and we’d love to cheer you on! 🤗**
|
19 |
+
|
20 |
+
<Tip>
|
21 |
+
|
22 |
+
If you have any issues with submission please open a discussion item on [The certification community tab](https://huggingface.co/spaces/agents-course/Unit4-Final-Certificate/discussions).
|
23 |
+
|
24 |
+
</Tip>
|
docs/hands-on.mdx
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Hands-On
|
2 |
+
|
3 |
+
Now that you’re ready to dive deeper into the creation of your final agent, let’s see how you can submit it for review.
|
4 |
+
|
5 |
+
## The Dataset
|
6 |
+
|
7 |
+
The Dataset used in this leaderboard consist of 20 questions extracted from the level 1 questions of the **validation** set from GAIA.
|
8 |
+
|
9 |
+
The chosen question were filtered based on the number of tools and steps needed to answer a question.
|
10 |
+
|
11 |
+
Based on the current look of the GAIA benchmark, we think that getting you to try to aim for 30% on level 1 question is a fair test.
|
12 |
+
|
13 |
+
<img src="https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit4/leaderboard%20GAIA%2024%3A04%3A2025.png" alt="GAIA current status!" />
|
14 |
+
|
15 |
+
## The process
|
16 |
+
|
17 |
+
Now the big question in your mind is probably : "How do I start submitting ?"
|
18 |
+
|
19 |
+
For this Unit, we created an API that will allow you to get the questions, and send your answers for scoring.
|
20 |
+
Here is a summary of the routes (see the [live documentation](https://agents-course-unit4-scoring.hf.space/docs) for interactive details):
|
21 |
+
|
22 |
+
* **`GET /questions`**: Retrieve the full list of filtered evaluation questions.
|
23 |
+
* **`GET /random-question`**: Fetch a single random question from the list.
|
24 |
+
* **`GET /files/{task_id}`**: Download a specific file associated with a given task ID.
|
25 |
+
* **`POST /submit`**: Submit agent answers, calculate the score, and update the leaderboard.
|
26 |
+
|
27 |
+
The submit function will compare the answer to the ground truth in an **EXACT MATCH** manner, hence prompt it well ! The GAIA team shared a prompting example for your agent [here](https://huggingface.co/spaces/gaia-benchmark/leaderboard) (for the sake of this course, make sure you don't include the text "FINAL ANSWER" in your submission, just make your agent reply with the answer and nothing else).
|
28 |
+
|
29 |
+
🎨 **Make the Template Your Own!**
|
30 |
+
|
31 |
+
To demonstrate the process of interacting with the API, we've included a [basic template](https://huggingface.co/spaces/agents-course/Final_Assignment_Template) as a starting point.
|
32 |
+
|
33 |
+
Please feel free—and **actively encouraged**—to change, add to, or completely restructure it! Modify it in any way that best suits your approach and creativity.
|
34 |
+
|
35 |
+
In order to submit this templates compute 3 things needed by the API :
|
36 |
+
|
37 |
+
* **Username:** Your Hugging Face username (here obtained via Gradio login), which is used to identify your submission.
|
38 |
+
* **Code Link (`agent_code`):** the URL linking to your Hugging Face Space code (`.../tree/main`) for verification purposes, so please keep your space public.
|
39 |
+
* **Answers (`answers`):** The list of responses (`{"task_id": ..., "submitted_answer": ...}`) generated by your Agent for scoring.
|
40 |
+
|
41 |
+
Hence we encourage you to start by duplicating this [template](https://huggingface.co/spaces/agents-course/Final_Assignment_Template) on your own huggingface profile.
|
42 |
+
|
43 |
+
🏆 Check out the leaderboard [here](https://huggingface.co/spaces/agents-course/Students_leaderboard)
|
44 |
+
|
45 |
+
*A friendly note: This leaderboard is meant for fun! We know it's possible to submit scores without full verification. If we see too many high scores posted without a public link to back them up, we might need to review, adjust, or remove some entries to keep the leaderboard useful.*
|
46 |
+
The leaderboard will show the link to your space code-base, since this leaderboard is for students only, please keep your space public if you get a score you're proud of.
|
47 |
+
<iframe
|
48 |
+
src="https://agents-course-students-leaderboard.hf.space"
|
49 |
+
frameborder="0"
|
50 |
+
width="850"
|
51 |
+
height="450"
|
52 |
+
></iframe>
|
docs/introduction.mdx
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Welcome to the final Unit [[introduction]]
|
2 |
+
|
3 |
+
<img src="https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit4/thumbnail.jpg" alt="AI Agents Course thumbnail" width="100%"/>
|
4 |
+
|
5 |
+
Welcome to the final unit of the course! 🎉
|
6 |
+
|
7 |
+
So far, you’ve **built a strong foundation in AI Agents**, from understanding their components to creating your own. With this knowledge, you’re now ready to **build powerful agents** and stay up-to-date with the latest advancements in this fast-evolving field.
|
8 |
+
|
9 |
+
This unit is all about applying what you’ve learned. It’s your **final hands-on project**, and completing it is your ticket to earning the **course certificate**.
|
10 |
+
|
11 |
+
## What’s the challenge?
|
12 |
+
|
13 |
+
You’ll create your own agent and **evaluate its performance using a subset of the [GAIA benchmark](https://huggingface.co/spaces/gaia-benchmark/leaderboard)**.
|
14 |
+
|
15 |
+
To successfully complete the course, your agent needs to score **30% or higher** on the benchmark. Achieve that, and you’ll earn your **Certificate of Completion**, officially recognizing your expertise. 🏅
|
16 |
+
|
17 |
+
Additionally, see how you stack up against your peers! A dedicated **[Student Leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard)** is available for you to submit your scores and see the community's progress.
|
18 |
+
|
19 |
+
> ** 🚨 Heads Up: Advanced & Hands-On Unit**
|
20 |
+
>
|
21 |
+
> Please be aware that this unit shifts towards a more practical, hands-on approach. Success in this section will require **more advanced coding knowledge** and relies on you navigating tasks with **less explicit guidance** compared to earlier parts of the course.
|
22 |
+
|
23 |
+
Sounds exciting? Let’s get started! 🚀
|
docs/what-is-gaia.mdx
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# What is GAIA?
|
2 |
+
|
3 |
+
[GAIA](https://huggingface.co/papers/2311.12983) is a **benchmark designed to evaluate AI assistants on real-world tasks** that require a combination of core capabilities—such as reasoning, multimodal understanding, web browsing, and proficient tool use.
|
4 |
+
|
5 |
+
It was introduced in the paper _"[GAIA: A Benchmark for General AI Assistants](https://huggingface.co/papers/2311.12983)"_.
|
6 |
+
|
7 |
+
The benchmark features **466 carefully curated questions** that are **conceptually simple for humans**, yet **remarkably challenging for current AI systems**.
|
8 |
+
|
9 |
+
To illustrate the gap:
|
10 |
+
- **Humans**: ~92% success rate
|
11 |
+
- **GPT-4 with plugins**: ~15%
|
12 |
+
- **Deep Research (OpenAI)**: 67.36% on the validation set
|
13 |
+
|
14 |
+
GAIA highlights the current limitations of AI models and provides a rigorous benchmark to evaluate progress toward truly general-purpose AI assistants.
|
15 |
+
|
16 |
+
## 🌱 GAIA’s Core Principles
|
17 |
+
|
18 |
+
GAIA is carefully designed around the following pillars:
|
19 |
+
|
20 |
+
- 🔍 **Real-world difficulty**: Tasks require multi-step reasoning, multimodal understanding, and tool interaction.
|
21 |
+
- 🧾 **Human interpretability**: Despite their difficulty for AI, tasks remain conceptually simple and easy to follow for humans.
|
22 |
+
- 🛡️ **Non-gameability**: Correct answers demand full task execution, making brute-forcing ineffective.
|
23 |
+
- 🧰 **Simplicity of evaluation**: Answers are concise, factual, and unambiguous—ideal for benchmarking.
|
24 |
+
|
25 |
+
## Difficulty Levels
|
26 |
+
|
27 |
+
GAIA tasks are organized into **three levels of increasing complexity**, each testing specific skills:
|
28 |
+
|
29 |
+
- **Level 1**: Requires less than 5 steps and minimal tool usage.
|
30 |
+
- **Level 2**: Involves more complex reasoning and coordination between multiple tools and 5-10 steps.
|
31 |
+
- **Level 3**: Demands long-term planning and advanced integration of various tools.
|
32 |
+
|
33 |
+

|
34 |
+
|
35 |
+
## Example of a Hard GAIA Question
|
36 |
+
|
37 |
+
> Which of the fruits shown in the 2008 painting "Embroidery from Uzbekistan" were served as part of the October 1949 breakfast menu for the ocean liner that was later used as a floating prop for the film "The Last Voyage"? Give the items as a comma-separated list, ordering them in clockwise order based on their arrangement in the painting starting from the 12 o'clock position. Use the plural form of each fruit.
|
38 |
+
|
39 |
+
As you can see, this question challenges AI systems in several ways:
|
40 |
+
|
41 |
+
- Requires a **structured response format**
|
42 |
+
- Involves **multimodal reasoning** (e.g., analyzing images)
|
43 |
+
- Demands **multi-hop retrieval** of interdependent facts:
|
44 |
+
- Identifying the fruits in the painting
|
45 |
+
- Discovering which ocean liner was used in *The Last Voyage*
|
46 |
+
- Looking up the breakfast menu from October 1949 for that ship
|
47 |
+
- Needs **correct sequencing** and high-level planning to solve in the right order
|
48 |
+
|
49 |
+
This kind of task highlights where standalone LLMs often fall short, making GAIA an ideal benchmark for **agent-based systems** that can reason, retrieve, and execute over multiple steps and modalities.
|
50 |
+
|
51 |
+

|
52 |
+
|
53 |
+
## Live Evaluation
|
54 |
+
|
55 |
+
To encourage continuous benchmarking, **GAIA provides a public leaderboard hosted on Hugging Face**, where you can test your models against **300 testing questions**.
|
56 |
+
|
57 |
+
👉 Check out the leaderboard [here](https://huggingface.co/spaces/gaia-benchmark/leaderboard)
|
58 |
+
|
59 |
+
<iframe
|
60 |
+
src="https://gaia-benchmark-leaderboard.hf.space"
|
61 |
+
frameborder="0"
|
62 |
+
width="850"
|
63 |
+
height="450"
|
64 |
+
></iframe>
|
65 |
+
|
66 |
+
Want to dive deeper into GAIA?
|
67 |
+
|
68 |
+
- 📄 [Read the full paper](https://huggingface.co/papers/2311.12983)
|
69 |
+
- 📄 [Deep Research release post by OpenAI](https://openai.com/index/introducing-deep-research/)
|
70 |
+
- 📄 [Open-source DeepResearch – Freeing our search agents](https://huggingface.co/blog/open-deep-research)
|
requirements.txt
CHANGED
@@ -1,2 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
gradio
|
2 |
-
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core dependencies from unit3
|
2 |
+
langchain
|
3 |
+
langchain-community
|
4 |
+
langchain-huggingface
|
5 |
+
langgraph
|
6 |
+
huggingface_hub
|
7 |
+
|
8 |
+
# Additional dependencies for GAIA
|
9 |
gradio
|
10 |
+
requests
|
11 |
+
pillow
|
12 |
+
PyPDF2
|
13 |
+
duckduckgo-search
|
14 |
+
python-dotenv
|
15 |
+
|
16 |
+
# For image processing and multimodal capabilities
|
17 |
+
transformers
|
18 |
+
torch
|
test_local.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Local testing script for the GAIA agent.
|
4 |
+
Run this to test the agent before deploying to HF Spaces.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import os
|
8 |
+
import sys
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
|
11 |
+
# Load environment variables
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
# Add current directory to path for imports
|
15 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
16 |
+
|
17 |
+
from utils import fetch_random_question, analyze_question_type
|
18 |
+
from agent import smart_agent
|
19 |
+
|
20 |
+
def test_question_analysis():
|
21 |
+
"""Test the question analysis functionality."""
|
22 |
+
print("🧪 Testing question analysis...")
|
23 |
+
|
24 |
+
test_questions = [
|
25 |
+
"What is the current population of Tokyo?",
|
26 |
+
"Calculate 15 * 23 + 45",
|
27 |
+
"Analyze the image shown in the document",
|
28 |
+
"Extract all dates from the provided text file"
|
29 |
+
]
|
30 |
+
|
31 |
+
for question in test_questions:
|
32 |
+
analysis = analyze_question_type(question)
|
33 |
+
print(f"Question: {question}")
|
34 |
+
print(f"Analysis: {analysis}")
|
35 |
+
print()
|
36 |
+
|
37 |
+
def test_tools():
|
38 |
+
"""Test individual tools."""
|
39 |
+
print("🔧 Testing individual tools...")
|
40 |
+
|
41 |
+
# Test calculator
|
42 |
+
from tools import calculator_tool
|
43 |
+
calc_result = calculator_tool.func("15 + 27")
|
44 |
+
print(f"Calculator test: {calc_result}")
|
45 |
+
|
46 |
+
# Test web search (if available)
|
47 |
+
try:
|
48 |
+
from tools import web_search_tool
|
49 |
+
search_result = web_search_tool.func("Python programming language")
|
50 |
+
print(f"Web search test: {search_result[:100]}...")
|
51 |
+
except Exception as e:
|
52 |
+
print(f"Web search test failed: {e}")
|
53 |
+
|
54 |
+
print()
|
55 |
+
|
56 |
+
def test_agent_simple():
|
57 |
+
"""Test the agent with a simple question."""
|
58 |
+
print("🤖 Testing Smart agent with simple question...")
|
59 |
+
|
60 |
+
test_question = "What is 25 + 17?"
|
61 |
+
try:
|
62 |
+
result = smart_agent(test_question)
|
63 |
+
print(f"Question: {test_question}")
|
64 |
+
print(f"Answer: {result}")
|
65 |
+
print("✅ Simple test passed!")
|
66 |
+
except Exception as e:
|
67 |
+
print(f"❌ Simple test failed: {e}")
|
68 |
+
|
69 |
+
print()
|
70 |
+
|
71 |
+
def test_agent_with_api():
|
72 |
+
"""Test the agent with a real GAIA question from the API."""
|
73 |
+
print("🌐 Testing with real GAIA question from API...")
|
74 |
+
|
75 |
+
try:
|
76 |
+
question_data = fetch_random_question()
|
77 |
+
if not question_data:
|
78 |
+
print("❌ Failed to fetch question from API")
|
79 |
+
return
|
80 |
+
|
81 |
+
task_id = question_data.get("task_id")
|
82 |
+
question = question_data.get("question")
|
83 |
+
|
84 |
+
print(f"Task ID: {task_id}")
|
85 |
+
print(f"Question: {question}")
|
86 |
+
|
87 |
+
# Run the agent
|
88 |
+
answer = smart_agent(question, task_id)
|
89 |
+
print(f"Agent Answer: {answer}")
|
90 |
+
print("✅ API test completed!")
|
91 |
+
|
92 |
+
except Exception as e:
|
93 |
+
print(f"❌ API test failed: {e}")
|
94 |
+
|
95 |
+
print()
|
96 |
+
|
97 |
+
def check_environment():
|
98 |
+
"""Check if all required environment variables are set."""
|
99 |
+
print("🔍 Checking environment...")
|
100 |
+
|
101 |
+
required_vars = ["HUGGINGFACE_API_TOKEN"]
|
102 |
+
missing_vars = []
|
103 |
+
|
104 |
+
for var in required_vars:
|
105 |
+
if not os.getenv(var):
|
106 |
+
missing_vars.append(var)
|
107 |
+
else:
|
108 |
+
print(f"✅ {var} is set")
|
109 |
+
|
110 |
+
if missing_vars:
|
111 |
+
print(f"❌ Missing environment variables: {missing_vars}")
|
112 |
+
print("Please set these in your .env file or environment")
|
113 |
+
return False
|
114 |
+
|
115 |
+
print("✅ All required environment variables are set")
|
116 |
+
return True
|
117 |
+
|
118 |
+
def main():
|
119 |
+
"""Run all tests."""
|
120 |
+
print("🚀 Starting GAIA Agent Local Tests")
|
121 |
+
print("=" * 50)
|
122 |
+
|
123 |
+
# Check environment first
|
124 |
+
if not check_environment():
|
125 |
+
print("❌ Environment check failed. Please fix and try again.")
|
126 |
+
return
|
127 |
+
|
128 |
+
print()
|
129 |
+
|
130 |
+
# Run tests
|
131 |
+
# test_question_analysis()
|
132 |
+
# test_tools()
|
133 |
+
# test_agent_simple()
|
134 |
+
test_agent_with_api()
|
135 |
+
|
136 |
+
if __name__ == "__main__":
|
137 |
+
main()
|
tools.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.tools import Tool
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
from PIL import Image
|
5 |
+
import io
|
6 |
+
import base64
|
7 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
8 |
+
from typing import Optional
|
9 |
+
import json
|
10 |
+
import PyPDF2
|
11 |
+
import tempfile
|
12 |
+
|
13 |
+
# Initialize web search tool
|
14 |
+
search_tool = DuckDuckGoSearchRun()
|
15 |
+
|
16 |
+
def web_search_tool_func(query: str) -> str:
|
17 |
+
"""Searches the web for information using DuckDuckGo."""
|
18 |
+
try:
|
19 |
+
results = search_tool.run(query)
|
20 |
+
return results
|
21 |
+
except Exception as e:
|
22 |
+
return f"Web search failed: {str(e)}"
|
23 |
+
|
24 |
+
web_search_tool = Tool(
|
25 |
+
name="web_search",
|
26 |
+
func=web_search_tool_func,
|
27 |
+
description="Searches the web for current information. Use this for factual questions, recent events, or when you need to find information not in your training data."
|
28 |
+
)
|
29 |
+
|
30 |
+
def file_download_tool_func(task_id: str) -> str:
|
31 |
+
"""Downloads a file associated with a GAIA task ID."""
|
32 |
+
try:
|
33 |
+
api_url = "https://agents-course-unit4-scoring.hf.space"
|
34 |
+
file_url = f"{api_url}/files/{task_id}"
|
35 |
+
|
36 |
+
response = requests.get(file_url, timeout=30)
|
37 |
+
response.raise_for_status()
|
38 |
+
|
39 |
+
# Save to temporary file
|
40 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".tmp") as temp_file:
|
41 |
+
temp_file.write(response.content)
|
42 |
+
temp_path = temp_file.name
|
43 |
+
|
44 |
+
# Try to determine file type and process accordingly
|
45 |
+
content_type = response.headers.get('content-type', '').lower()
|
46 |
+
|
47 |
+
if 'image' in content_type:
|
48 |
+
return f"Image file downloaded to {temp_path}. Use image_analysis_tool to analyze it."
|
49 |
+
elif 'pdf' in content_type:
|
50 |
+
return process_pdf_file(temp_path)
|
51 |
+
elif 'text' in content_type:
|
52 |
+
with open(temp_path, 'r', encoding='utf-8') as f:
|
53 |
+
content = f.read()
|
54 |
+
os.unlink(temp_path) # Clean up
|
55 |
+
return f"Text file content:\n{content}"
|
56 |
+
else:
|
57 |
+
return f"File downloaded to {temp_path}. Content type: {content_type}"
|
58 |
+
|
59 |
+
except Exception as e:
|
60 |
+
return f"Failed to download file for task {task_id}: {str(e)}"
|
61 |
+
|
62 |
+
def process_pdf_file(file_path: str) -> str:
|
63 |
+
"""Process a PDF file and extract text content."""
|
64 |
+
try:
|
65 |
+
with open(file_path, 'rb') as file:
|
66 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
67 |
+
text_content = ""
|
68 |
+
|
69 |
+
for page_num in range(len(pdf_reader.pages)):
|
70 |
+
page = pdf_reader.pages[page_num]
|
71 |
+
text_content += f"\n--- Page {page_num + 1} ---\n"
|
72 |
+
text_content += page.extract_text()
|
73 |
+
|
74 |
+
os.unlink(file_path) # Clean up
|
75 |
+
return f"PDF content extracted:\n{text_content}"
|
76 |
+
except Exception as e:
|
77 |
+
return f"Failed to process PDF: {str(e)}"
|
78 |
+
|
79 |
+
file_download_tool = Tool(
|
80 |
+
name="file_download",
|
81 |
+
func=file_download_tool_func,
|
82 |
+
description="Downloads and processes files associated with GAIA task IDs. Can handle images, PDFs, and text files."
|
83 |
+
)
|
84 |
+
|
85 |
+
def image_analysis_tool_func(image_path_or_description: str) -> str:
|
86 |
+
"""Analyzes images for GAIA questions. For now, returns a placeholder."""
|
87 |
+
# This is a simplified version - in a full implementation, you'd use a vision model
|
88 |
+
try:
|
89 |
+
if os.path.exists(image_path_or_description):
|
90 |
+
# Try to open and get basic info about the image
|
91 |
+
with Image.open(image_path_or_description) as img:
|
92 |
+
width, height = img.size
|
93 |
+
mode = img.mode
|
94 |
+
format_info = img.format
|
95 |
+
|
96 |
+
# Clean up the temporary file
|
97 |
+
os.unlink(image_path_or_description)
|
98 |
+
|
99 |
+
return f"Image analyzed: {width}x{height} pixels, mode: {mode}, format: {format_info}. Note: This is a basic analysis. For detailed image content analysis, a vision model would be needed."
|
100 |
+
else:
|
101 |
+
return f"Image analysis requested for: {image_path_or_description}. Note: Full image analysis requires a vision model integration."
|
102 |
+
except Exception as e:
|
103 |
+
return f"Image analysis failed: {str(e)}"
|
104 |
+
|
105 |
+
image_analysis_tool = Tool(
|
106 |
+
name="image_analysis",
|
107 |
+
func=image_analysis_tool_func,
|
108 |
+
description="Analyzes images to extract information. Use this for questions involving visual content."
|
109 |
+
)
|
110 |
+
|
111 |
+
def calculator_tool_func(expression: str) -> str:
|
112 |
+
"""Performs mathematical calculations safely."""
|
113 |
+
try:
|
114 |
+
# Basic safety check - only allow certain characters
|
115 |
+
allowed_chars = set('0123456789+-*/().= ')
|
116 |
+
if not all(c in allowed_chars for c in expression):
|
117 |
+
return f"Invalid characters in expression: {expression}"
|
118 |
+
|
119 |
+
# Use eval safely for basic math
|
120 |
+
result = eval(expression)
|
121 |
+
return f"Calculation result: {expression} = {result}"
|
122 |
+
except Exception as e:
|
123 |
+
return f"Calculation failed for '{expression}': {str(e)}"
|
124 |
+
|
125 |
+
calculator_tool = Tool(
|
126 |
+
name="calculator",
|
127 |
+
func=calculator_tool_func,
|
128 |
+
description="Performs mathematical calculations. Use this for numerical computations and math problems."
|
129 |
+
)
|
130 |
+
|
131 |
+
def text_processor_tool_func(text: str, operation: str = "summarize") -> str:
|
132 |
+
"""Processes text for various operations like summarization, extraction, etc."""
|
133 |
+
try:
|
134 |
+
if operation == "summarize":
|
135 |
+
# Simple summarization - take first and last sentences if long
|
136 |
+
sentences = text.split('.')
|
137 |
+
if len(sentences) > 5:
|
138 |
+
summary = '. '.join(sentences[:2] + sentences[-2:])
|
139 |
+
return f"Text summary: {summary}"
|
140 |
+
else:
|
141 |
+
return f"Text (short enough to not need summarization): {text}"
|
142 |
+
|
143 |
+
elif operation == "extract_numbers":
|
144 |
+
import re
|
145 |
+
numbers = re.findall(r'\d+(?:\.\d+)?', text)
|
146 |
+
return f"Numbers found in text: {numbers}"
|
147 |
+
|
148 |
+
elif operation == "extract_dates":
|
149 |
+
import re
|
150 |
+
# Simple date pattern matching
|
151 |
+
date_patterns = [
|
152 |
+
r'\d{1,2}/\d{1,2}/\d{4}', # MM/DD/YYYY
|
153 |
+
r'\d{4}-\d{1,2}-\d{1,2}', # YYYY-MM-DD
|
154 |
+
r'\b\w+ \d{1,2}, \d{4}\b' # Month DD, YYYY
|
155 |
+
]
|
156 |
+
dates = []
|
157 |
+
for pattern in date_patterns:
|
158 |
+
dates.extend(re.findall(pattern, text))
|
159 |
+
return f"Dates found in text: {dates}"
|
160 |
+
|
161 |
+
else:
|
162 |
+
return f"Text processing operation '{operation}' not supported. Available: summarize, extract_numbers, extract_dates"
|
163 |
+
|
164 |
+
except Exception as e:
|
165 |
+
return f"Text processing failed: {str(e)}"
|
166 |
+
|
167 |
+
text_processor_tool = Tool(
|
168 |
+
name="text_processor",
|
169 |
+
func=text_processor_tool_func,
|
170 |
+
description="Processes text for various operations like summarization, number extraction, date extraction. Specify operation as second parameter."
|
171 |
+
)
|
172 |
+
|
173 |
+
# List of all tools for easy import
|
174 |
+
agent_tools = [
|
175 |
+
web_search_tool,
|
176 |
+
file_download_tool,
|
177 |
+
image_analysis_tool,
|
178 |
+
calculator_tool,
|
179 |
+
text_processor_tool
|
180 |
+
]
|
utils.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
from typing import List, Dict, Any
|
4 |
+
|
5 |
+
def fetch_questions(api_url: str = "https://agents-course-unit4-scoring.hf.space") -> List[Dict[str, Any]]:
|
6 |
+
"""Fetch all questions from the GAIA API."""
|
7 |
+
try:
|
8 |
+
response = requests.get(f"{api_url}/questions", timeout=15)
|
9 |
+
response.raise_for_status()
|
10 |
+
return response.json()
|
11 |
+
except Exception as e:
|
12 |
+
print(f"Error fetching questions: {e}")
|
13 |
+
return []
|
14 |
+
|
15 |
+
def fetch_random_question(api_url: str = "https://agents-course-unit4-scoring.hf.space") -> Dict[str, Any]:
|
16 |
+
"""Fetch a random question from the GAIA API."""
|
17 |
+
try:
|
18 |
+
response = requests.get(f"{api_url}/random-question", timeout=15)
|
19 |
+
response.raise_for_status()
|
20 |
+
return response.json()
|
21 |
+
except Exception as e:
|
22 |
+
print(f"Error fetching random question: {e}")
|
23 |
+
return {}
|
24 |
+
|
25 |
+
def submit_answers(username: str, agent_code: str, answers: List[Dict[str, str]],
|
26 |
+
api_url: str = "https://agents-course-unit4-scoring.hf.space") -> Dict[str, Any]:
|
27 |
+
"""Submit answers to the GAIA API for scoring."""
|
28 |
+
try:
|
29 |
+
submission_data = {
|
30 |
+
"username": username.strip(),
|
31 |
+
"agent_code": agent_code,
|
32 |
+
"answers": answers
|
33 |
+
}
|
34 |
+
|
35 |
+
response = requests.post(f"{api_url}/submit", json=submission_data, timeout=60)
|
36 |
+
response.raise_for_status()
|
37 |
+
return response.json()
|
38 |
+
except Exception as e:
|
39 |
+
print(f"Error submitting answers: {e}")
|
40 |
+
return {"error": str(e)}
|
41 |
+
|
42 |
+
def format_gaia_answer(raw_answer: str) -> str:
|
43 |
+
"""Format the agent's raw answer for GAIA submission (exact match)."""
|
44 |
+
# Remove common prefixes that might interfere with exact matching
|
45 |
+
prefixes_to_remove = [
|
46 |
+
"FINAL ANSWER:",
|
47 |
+
"Final Answer:",
|
48 |
+
"Answer:",
|
49 |
+
"The answer is:",
|
50 |
+
"The final answer is:",
|
51 |
+
]
|
52 |
+
|
53 |
+
answer = raw_answer.strip()
|
54 |
+
|
55 |
+
for prefix in prefixes_to_remove:
|
56 |
+
if answer.startswith(prefix):
|
57 |
+
answer = answer[len(prefix):].strip()
|
58 |
+
|
59 |
+
# Remove trailing punctuation that might not be in ground truth
|
60 |
+
while answer and answer[-1] in '.!?':
|
61 |
+
answer = answer[:-1].strip()
|
62 |
+
|
63 |
+
return answer
|
64 |
+
|
65 |
+
def analyze_question_type(question: str) -> Dict[str, bool]:
|
66 |
+
"""Analyze what capabilities a question might need."""
|
67 |
+
question_lower = question.lower()
|
68 |
+
|
69 |
+
analysis = {
|
70 |
+
"needs_web_search": any(keyword in question_lower for keyword in [
|
71 |
+
"current", "recent", "latest", "today", "now", "2024", "2023"
|
72 |
+
]),
|
73 |
+
"needs_file_processing": "file" in question_lower or "document" in question_lower,
|
74 |
+
"needs_calculation": any(keyword in question_lower for keyword in [
|
75 |
+
"calculate", "compute", "sum", "total", "average", "percentage", "multiply", "divide"
|
76 |
+
]),
|
77 |
+
"needs_image_analysis": any(keyword in question_lower for keyword in [
|
78 |
+
"image", "picture", "photo", "visual", "shown", "displayed"
|
79 |
+
]),
|
80 |
+
"needs_text_processing": any(keyword in question_lower for keyword in [
|
81 |
+
"extract", "find in", "search for", "list", "count"
|
82 |
+
])
|
83 |
+
}
|
84 |
+
|
85 |
+
return analysis
|
86 |
+
|
87 |
+
def create_execution_plan(question: str, task_id: str = None) -> List[str]:
|
88 |
+
"""Create a step-by-step execution plan for a GAIA question."""
|
89 |
+
analysis = analyze_question_type(question)
|
90 |
+
plan = []
|
91 |
+
|
92 |
+
# Always start with understanding the question
|
93 |
+
plan.append("Analyze the question to understand what information is needed")
|
94 |
+
|
95 |
+
# Add file processing if needed
|
96 |
+
if task_id and analysis["needs_file_processing"]:
|
97 |
+
plan.append(f"Download and process any files associated with task {task_id}")
|
98 |
+
|
99 |
+
# Add web search if needed
|
100 |
+
if analysis["needs_web_search"]:
|
101 |
+
plan.append("Search the web for current/recent information")
|
102 |
+
|
103 |
+
# Add image analysis if needed
|
104 |
+
if analysis["needs_image_analysis"]:
|
105 |
+
plan.append("Analyze any images for visual information")
|
106 |
+
|
107 |
+
# Add calculation if needed
|
108 |
+
if analysis["needs_calculation"]:
|
109 |
+
plan.append("Perform necessary calculations")
|
110 |
+
|
111 |
+
# Add text processing if needed
|
112 |
+
if analysis["needs_text_processing"]:
|
113 |
+
plan.append("Process and extract specific information from text")
|
114 |
+
|
115 |
+
# Always end with synthesis
|
116 |
+
plan.append("Synthesize all information to provide the final answer")
|
117 |
+
|
118 |
+
return plan
|
119 |
+
|
120 |
+
def log_agent_step(step: str, result: str, step_number: int = None):
|
121 |
+
"""Log agent execution steps for debugging."""
|
122 |
+
prefix = f"Step {step_number}: " if step_number else ""
|
123 |
+
print(f"\n🤖 {prefix}{step}")
|
124 |
+
print(f"📝 Result: {result[:200]}{'...' if len(result) > 200 else ''}")
|