Denis Davydov commited on
Commit
f9a7c9b
·
1 Parent(s): 81917a3
.gitignore ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ .DS_Store
agent.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import TypedDict, Annotated
2
+ import os
3
+ from langgraph.graph.message import add_messages
4
+ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
5
+ from langgraph.prebuilt import ToolNode
6
+ from langgraph.graph import START, StateGraph
7
+ from langgraph.checkpoint.memory import MemorySaver
8
+ from langgraph.prebuilt import tools_condition
9
+ from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
10
+ from tools import agent_tools
11
+ from utils import format_gaia_answer, analyze_question_type, create_execution_plan, log_agent_step
12
+
13
+ # Initialize LLM (same as unit3)
14
+ llm = HuggingFaceEndpoint(
15
+ repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
16
+ huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN"),
17
+ temperature=0.1,
18
+ max_new_tokens=1024,
19
+ )
20
+
21
+ chat = ChatHuggingFace(llm=llm, verbose=True)
22
+ chat_with_tools = chat.bind_tools(agent_tools)
23
+
24
+ # System prompt for intelligent question answering
25
+ SYSTEM_PROMPT = """You are a highly capable AI assistant designed to answer questions accurately and helpfully.
26
+
27
+ Your approach should include:
28
+ - Multi-step reasoning and planning for complex questions
29
+ - Intelligent tool usage when needed for web search, file processing, calculations, and analysis
30
+ - Precise, factual answers based on reliable information
31
+ - Breaking down complex questions into manageable steps
32
+
33
+ IMPORTANT GUIDELINES:
34
+ 1. Think step-by-step and use available tools when they can help provide better answers
35
+ 2. For current information: Search the web for up-to-date facts
36
+ 3. For files: Process associated files when task_id is provided
37
+ 4. For visual content: Analyze images carefully when present
38
+ 5. For calculations: Use computational tools for accuracy
39
+ 6. Provide concise, direct answers without unnecessary prefixes
40
+ 7. Focus on accuracy and helpfulness
41
+ 8. Be factual and avoid speculation
42
+
43
+ Your goal is to be as helpful and accurate as possible while using the right tools for each task."""
44
+
45
+ # Generate the AgentState
46
+ class AgentState(TypedDict):
47
+ messages: Annotated[list[AnyMessage], add_messages]
48
+ task_id: str
49
+ question_analysis: dict
50
+
51
+ def assistant(state: AgentState):
52
+ """Main assistant function that processes messages and calls tools."""
53
+ messages = state["messages"]
54
+
55
+ # Add system prompt if not already present
56
+ if not any(isinstance(msg, SystemMessage) for msg in messages):
57
+ messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
58
+
59
+ # Get the response from the LLM
60
+ response = chat_with_tools.invoke(messages)
61
+
62
+ return {
63
+ "messages": [response],
64
+ }
65
+
66
+ def create_smart_agent():
67
+ """Create and return the smart agent graph."""
68
+ # Build the graph
69
+ builder = StateGraph(AgentState)
70
+
71
+ # Define nodes
72
+ builder.add_node("assistant", assistant)
73
+ builder.add_node("tools", ToolNode(agent_tools))
74
+
75
+ # Define edges
76
+ builder.add_edge(START, "assistant")
77
+ builder.add_conditional_edges(
78
+ "assistant",
79
+ tools_condition,
80
+ )
81
+ builder.add_edge("tools", "assistant")
82
+
83
+ # Add memory
84
+ memory = MemorySaver()
85
+ agent = builder.compile(checkpointer=memory)
86
+
87
+ return agent
88
+
89
+ class SmartAgent:
90
+ """High-level intelligent agent class that wraps the LangGraph agent."""
91
+
92
+ def __init__(self):
93
+ self.agent = create_smart_agent()
94
+ print("🤖 Smart Agent initialized with LangGraph and tools")
95
+
96
+ def __call__(self, question: str, task_id: str = None) -> str:
97
+ """Process a question and return the formatted answer."""
98
+ try:
99
+ print(f"\n🎯 Processing question: {question[:100]}...")
100
+
101
+ # Analyze the question
102
+ analysis = analyze_question_type(question)
103
+ print(f"📊 Question analysis: {analysis}")
104
+
105
+ # Create execution plan
106
+ plan = create_execution_plan(question, task_id)
107
+ print(f"📋 Execution plan: {plan}")
108
+
109
+ # Prepare the question with task_id context if available
110
+ enhanced_question = question
111
+ if task_id:
112
+ enhanced_question = f"Task ID: {task_id}\n\nQuestion: {question}\n\nNote: If this question involves files, use the file_download tool with task_id '{task_id}' to access associated files."
113
+
114
+ # Invoke the agent
115
+ thread_id = f"task-{task_id}" if task_id else "general"
116
+ config = {"configurable": {"thread_id": thread_id}}
117
+
118
+ initial_state = {
119
+ "messages": [HumanMessage(content=enhanced_question)],
120
+ "task_id": task_id or "",
121
+ "question_analysis": analysis
122
+ }
123
+
124
+ result = self.agent.invoke(initial_state, config=config)
125
+
126
+ # Extract the final answer
127
+ if result and 'messages' in result and result['messages']:
128
+ final_message = result['messages'][-1]
129
+ raw_answer = final_message.content
130
+ else:
131
+ raw_answer = "No response generated"
132
+
133
+ # Format the answer for submission
134
+ formatted_answer = format_gaia_answer(raw_answer)
135
+
136
+ print(f"✅ Raw answer: {raw_answer}")
137
+ print(f"🎯 Formatted answer: {formatted_answer}")
138
+
139
+ return formatted_answer
140
+
141
+ except Exception as e:
142
+ error_msg = f"Error processing question: {str(e)}"
143
+ print(f"❌ {error_msg}")
144
+ return error_msg
145
+
146
+ smart_agent = SmartAgent()
app.py CHANGED
@@ -3,22 +3,12 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -38,12 +28,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
44
  except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
  print(agent_code)
@@ -80,7 +71,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
@@ -193,4 +184,4 @@ if __name__ == "__main__":
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent import smart_agent
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
 
 
 
 
 
 
 
 
 
 
 
12
  def run_and_submit_all( profile: gr.OAuthProfile | None):
13
  """
14
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
28
  questions_url = f"{api_url}/questions"
29
  submit_url = f"{api_url}/submit"
30
 
31
+ # 1. Use Smart Agent (imported from agent.py)
32
  try:
33
+ agent = smart_agent
34
+ print("Smart Agent loaded successfully.")
35
  except Exception as e:
36
+ print(f"Error loading smart agent: {e}")
37
+ return f"Error initializing smart agent: {e}", None
38
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
39
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
40
  print(agent_code)
 
71
  print(f"Skipping item with missing task_id or question: {item}")
72
  continue
73
  try:
74
+ submitted_answer = agent(question_text, task_id)
75
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
76
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
77
  except Exception as e:
 
184
  print("-"*(60 + len(" App Starting ")) + "\n")
185
 
186
  print("Launching Gradio Interface for Basic Agent Evaluation...")
187
+ demo.launch(debug=True, share=False)
docs/additional-readings.mdx ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # And now? What topics I should learn?
2
+
3
+ Agentic AI is a rapidly evolving field, and understanding foundational protocols is essential for building intelligent, autonomous systems.
4
+
5
+ Two important standards you should get familiar with are:
6
+
7
+ - The **Model Context Protocol (MCP)**
8
+ - The **Agent-to-Agent Protocol (A2A)**
9
+
10
+ ## 🔌 Model Context Protocol (MCP)
11
+
12
+ The **Model Context Protocol (MCP)** by Anthropic is an open standard that enables AI models to securely and seamlessly **connect with external tools, data sources, and applications**, making agents more capable and autonomous.
13
+
14
+ Think of MCP as a **universal adapter**, like a USB-C port, that allows AI models to plug into various digital environments **without needing custom integration for each one**.
15
+
16
+ MCP is quickly gaining traction across the industry, with major companies like OpenAI and Google beginning to adopt it.
17
+
18
+ 📚 Learn more:
19
+ - [Anthropic's official announcement and documentation](https://www.anthropic.com/news/model-context-protocol)
20
+ - [MCP on Wikipedia](https://en.wikipedia.org/wiki/Model_Context_Protocol)
21
+ - [Blog on MCP](https://huggingface.co/blog/Kseniase/mcp)
22
+
23
+ ## 🤝 Agent-to-Agent (A2A) Protocol
24
+
25
+ Google has developed the **Agent-to-Agent (A2A) protocol** as a complementary counterpart to Anthropic's Model Context Protocol (MCP).
26
+
27
+ While MCP connects agents to external tools, **A2A connects agents to each other**, paving the way for cooperative, multi-agent systems that can work together to solve complex problems.
28
+
29
+ 📚 Dive deeper into A2A:
30
+ - [Google’s A2A announcement](https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/)
docs/conclusion.mdx ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Conclusion
2
+
3
+ **Congratulations on finishing the Agents Course!**
4
+
5
+ Through perseverance and dedication, you’ve built a solid foundation in the world of AI Agents.
6
+
7
+ But finishing this course is **not the end of your journey**. It’s just the beginning: don’t hesitate to explore the next section where we share curated resources to help you continue learning, including advanced topics like **MCPs** and beyond.
8
+
9
+ **Thank you** for being part of this course. **We hope you liked this course as much as we loved writing it**.
10
+
11
+ And don’t forget: **Keep Learning, Stay Awesome 🤗**
docs/get-your-certificate.mdx ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Claim Your Certificate 🎓
2
+
3
+ If you scored **above 30%, congratulations! 👏 You're now eligible to claim your official certificate.**
4
+
5
+ Follow the steps below to receive it:
6
+
7
+ 1. Visit the [certificate page](https://huggingface.co/spaces/agents-course/Unit4-Final-Certificate).
8
+ 2. **Sign in** with your Hugging Face account using the button provided.
9
+ 3. **Enter your full name**. This is the name that will appear on your certificate.
10
+ 4. Click **“Get My Certificate”** to verify your score and download your certificate.
11
+
12
+ <img src="https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit4/congrats.png" alt="Congrats!" />
13
+
14
+ Once you’ve got your certificate, feel free to:
15
+ - Add it to your **LinkedIn profile** 🧑‍💼
16
+ - Share it on **X**, **Bluesky**, etc. 🎉
17
+
18
+ **Don’t forget to tag [@huggingface](https://huggingface.co/huggingface). We’d be super proud and we’d love to cheer you on! 🤗**
19
+
20
+ <Tip>
21
+
22
+ If you have any issues with submission please open a discussion item on [The certification community tab](https://huggingface.co/spaces/agents-course/Unit4-Final-Certificate/discussions).
23
+
24
+ </Tip>
docs/hands-on.mdx ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hands-On
2
+
3
+ Now that you’re ready to dive deeper into the creation of your final agent, let’s see how you can submit it for review.
4
+
5
+ ## The Dataset
6
+
7
+ The Dataset used in this leaderboard consist of 20 questions extracted from the level 1 questions of the **validation** set from GAIA.
8
+
9
+ The chosen question were filtered based on the number of tools and steps needed to answer a question.
10
+
11
+ Based on the current look of the GAIA benchmark, we think that getting you to try to aim for 30% on level 1 question is a fair test.
12
+
13
+ <img src="https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit4/leaderboard%20GAIA%2024%3A04%3A2025.png" alt="GAIA current status!" />
14
+
15
+ ## The process
16
+
17
+ Now the big question in your mind is probably : "How do I start submitting ?"
18
+
19
+ For this Unit, we created an API that will allow you to get the questions, and send your answers for scoring.
20
+ Here is a summary of the routes (see the [live documentation](https://agents-course-unit4-scoring.hf.space/docs) for interactive details):
21
+
22
+ * **`GET /questions`**: Retrieve the full list of filtered evaluation questions.
23
+ * **`GET /random-question`**: Fetch a single random question from the list.
24
+ * **`GET /files/{task_id}`**: Download a specific file associated with a given task ID.
25
+ * **`POST /submit`**: Submit agent answers, calculate the score, and update the leaderboard.
26
+
27
+ The submit function will compare the answer to the ground truth in an **EXACT MATCH** manner, hence prompt it well ! The GAIA team shared a prompting example for your agent [here](https://huggingface.co/spaces/gaia-benchmark/leaderboard) (for the sake of this course, make sure you don't include the text "FINAL ANSWER" in your submission, just make your agent reply with the answer and nothing else).
28
+
29
+ 🎨 **Make the Template Your Own!**
30
+
31
+ To demonstrate the process of interacting with the API, we've included a [basic template](https://huggingface.co/spaces/agents-course/Final_Assignment_Template) as a starting point.
32
+
33
+ Please feel free—and **actively encouraged**—to change, add to, or completely restructure it! Modify it in any way that best suits your approach and creativity.
34
+
35
+ In order to submit this templates compute 3 things needed by the API :
36
+
37
+ * **Username:** Your Hugging Face username (here obtained via Gradio login), which is used to identify your submission.
38
+ * **Code Link (`agent_code`):** the URL linking to your Hugging Face Space code (`.../tree/main`) for verification purposes, so please keep your space public.
39
+ * **Answers (`answers`):** The list of responses (`{"task_id": ..., "submitted_answer": ...}`) generated by your Agent for scoring.
40
+
41
+ Hence we encourage you to start by duplicating this [template](https://huggingface.co/spaces/agents-course/Final_Assignment_Template) on your own huggingface profile.
42
+
43
+ 🏆 Check out the leaderboard [here](https://huggingface.co/spaces/agents-course/Students_leaderboard)
44
+
45
+ *A friendly note: This leaderboard is meant for fun! We know it's possible to submit scores without full verification. If we see too many high scores posted without a public link to back them up, we might need to review, adjust, or remove some entries to keep the leaderboard useful.*
46
+ The leaderboard will show the link to your space code-base, since this leaderboard is for students only, please keep your space public if you get a score you're proud of.
47
+ <iframe
48
+ src="https://agents-course-students-leaderboard.hf.space"
49
+ frameborder="0"
50
+ width="850"
51
+ height="450"
52
+ ></iframe>
docs/introduction.mdx ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to the final Unit [[introduction]]
2
+
3
+ <img src="https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit4/thumbnail.jpg" alt="AI Agents Course thumbnail" width="100%"/>
4
+
5
+ Welcome to the final unit of the course! 🎉
6
+
7
+ So far, you’ve **built a strong foundation in AI Agents**, from understanding their components to creating your own. With this knowledge, you’re now ready to **build powerful agents** and stay up-to-date with the latest advancements in this fast-evolving field.
8
+
9
+ This unit is all about applying what you’ve learned. It’s your **final hands-on project**, and completing it is your ticket to earning the **course certificate**.
10
+
11
+ ## What’s the challenge?
12
+
13
+ You’ll create your own agent and **evaluate its performance using a subset of the [GAIA benchmark](https://huggingface.co/spaces/gaia-benchmark/leaderboard)**.
14
+
15
+ To successfully complete the course, your agent needs to score **30% or higher** on the benchmark. Achieve that, and you’ll earn your **Certificate of Completion**, officially recognizing your expertise. 🏅
16
+
17
+ Additionally, see how you stack up against your peers! A dedicated **[Student Leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard)** is available for you to submit your scores and see the community's progress.
18
+
19
+ > ** 🚨 Heads Up: Advanced & Hands-On Unit**
20
+ >
21
+ > Please be aware that this unit shifts towards a more practical, hands-on approach. Success in this section will require **more advanced coding knowledge** and relies on you navigating tasks with **less explicit guidance** compared to earlier parts of the course.
22
+
23
+ Sounds exciting? Let’s get started! 🚀
docs/what-is-gaia.mdx ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # What is GAIA?
2
+
3
+ [GAIA](https://huggingface.co/papers/2311.12983) is a **benchmark designed to evaluate AI assistants on real-world tasks** that require a combination of core capabilities—such as reasoning, multimodal understanding, web browsing, and proficient tool use.
4
+
5
+ It was introduced in the paper _"[GAIA: A Benchmark for General AI Assistants](https://huggingface.co/papers/2311.12983)"_.
6
+
7
+ The benchmark features **466 carefully curated questions** that are **conceptually simple for humans**, yet **remarkably challenging for current AI systems**.
8
+
9
+ To illustrate the gap:
10
+ - **Humans**: ~92% success rate
11
+ - **GPT-4 with plugins**: ~15%
12
+ - **Deep Research (OpenAI)**: 67.36% on the validation set
13
+
14
+ GAIA highlights the current limitations of AI models and provides a rigorous benchmark to evaluate progress toward truly general-purpose AI assistants.
15
+
16
+ ## 🌱 GAIA’s Core Principles
17
+
18
+ GAIA is carefully designed around the following pillars:
19
+
20
+ - 🔍 **Real-world difficulty**: Tasks require multi-step reasoning, multimodal understanding, and tool interaction.
21
+ - 🧾 **Human interpretability**: Despite their difficulty for AI, tasks remain conceptually simple and easy to follow for humans.
22
+ - 🛡️ **Non-gameability**: Correct answers demand full task execution, making brute-forcing ineffective.
23
+ - 🧰 **Simplicity of evaluation**: Answers are concise, factual, and unambiguous—ideal for benchmarking.
24
+
25
+ ## Difficulty Levels
26
+
27
+ GAIA tasks are organized into **three levels of increasing complexity**, each testing specific skills:
28
+
29
+ - **Level 1**: Requires less than 5 steps and minimal tool usage.
30
+ - **Level 2**: Involves more complex reasoning and coordination between multiple tools and 5-10 steps.
31
+ - **Level 3**: Demands long-term planning and advanced integration of various tools.
32
+
33
+ ![GAIA levels](https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit4/gaia_levels.png)
34
+
35
+ ## Example of a Hard GAIA Question
36
+
37
+ > Which of the fruits shown in the 2008 painting "Embroidery from Uzbekistan" were served as part of the October 1949 breakfast menu for the ocean liner that was later used as a floating prop for the film "The Last Voyage"? Give the items as a comma-separated list, ordering them in clockwise order based on their arrangement in the painting starting from the 12 o'clock position. Use the plural form of each fruit.
38
+
39
+ As you can see, this question challenges AI systems in several ways:
40
+
41
+ - Requires a **structured response format**
42
+ - Involves **multimodal reasoning** (e.g., analyzing images)
43
+ - Demands **multi-hop retrieval** of interdependent facts:
44
+ - Identifying the fruits in the painting
45
+ - Discovering which ocean liner was used in *The Last Voyage*
46
+ - Looking up the breakfast menu from October 1949 for that ship
47
+ - Needs **correct sequencing** and high-level planning to solve in the right order
48
+
49
+ This kind of task highlights where standalone LLMs often fall short, making GAIA an ideal benchmark for **agent-based systems** that can reason, retrieve, and execute over multiple steps and modalities.
50
+
51
+ ![GAIA capabilities plot](https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit4/gaia_capabilities.png)
52
+
53
+ ## Live Evaluation
54
+
55
+ To encourage continuous benchmarking, **GAIA provides a public leaderboard hosted on Hugging Face**, where you can test your models against **300 testing questions**.
56
+
57
+ 👉 Check out the leaderboard [here](https://huggingface.co/spaces/gaia-benchmark/leaderboard)
58
+
59
+ <iframe
60
+ src="https://gaia-benchmark-leaderboard.hf.space"
61
+ frameborder="0"
62
+ width="850"
63
+ height="450"
64
+ ></iframe>
65
+
66
+ Want to dive deeper into GAIA?
67
+
68
+ - 📄 [Read the full paper](https://huggingface.co/papers/2311.12983)
69
+ - 📄 [Deep Research release post by OpenAI](https://openai.com/index/introducing-deep-research/)
70
+ - 📄 [Open-source DeepResearch – Freeing our search agents](https://huggingface.co/blog/open-deep-research)
requirements.txt CHANGED
@@ -1,2 +1,18 @@
 
 
 
 
 
 
 
 
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies from unit3
2
+ langchain
3
+ langchain-community
4
+ langchain-huggingface
5
+ langgraph
6
+ huggingface_hub
7
+
8
+ # Additional dependencies for GAIA
9
  gradio
10
+ requests
11
+ pillow
12
+ PyPDF2
13
+ duckduckgo-search
14
+ python-dotenv
15
+
16
+ # For image processing and multimodal capabilities
17
+ transformers
18
+ torch
test_local.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Local testing script for the GAIA agent.
4
+ Run this to test the agent before deploying to HF Spaces.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ from dotenv import load_dotenv
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Add current directory to path for imports
15
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
16
+
17
+ from utils import fetch_random_question, analyze_question_type
18
+ from agent import smart_agent
19
+
20
+ def test_question_analysis():
21
+ """Test the question analysis functionality."""
22
+ print("🧪 Testing question analysis...")
23
+
24
+ test_questions = [
25
+ "What is the current population of Tokyo?",
26
+ "Calculate 15 * 23 + 45",
27
+ "Analyze the image shown in the document",
28
+ "Extract all dates from the provided text file"
29
+ ]
30
+
31
+ for question in test_questions:
32
+ analysis = analyze_question_type(question)
33
+ print(f"Question: {question}")
34
+ print(f"Analysis: {analysis}")
35
+ print()
36
+
37
+ def test_tools():
38
+ """Test individual tools."""
39
+ print("🔧 Testing individual tools...")
40
+
41
+ # Test calculator
42
+ from tools import calculator_tool
43
+ calc_result = calculator_tool.func("15 + 27")
44
+ print(f"Calculator test: {calc_result}")
45
+
46
+ # Test web search (if available)
47
+ try:
48
+ from tools import web_search_tool
49
+ search_result = web_search_tool.func("Python programming language")
50
+ print(f"Web search test: {search_result[:100]}...")
51
+ except Exception as e:
52
+ print(f"Web search test failed: {e}")
53
+
54
+ print()
55
+
56
+ def test_agent_simple():
57
+ """Test the agent with a simple question."""
58
+ print("🤖 Testing Smart agent with simple question...")
59
+
60
+ test_question = "What is 25 + 17?"
61
+ try:
62
+ result = smart_agent(test_question)
63
+ print(f"Question: {test_question}")
64
+ print(f"Answer: {result}")
65
+ print("✅ Simple test passed!")
66
+ except Exception as e:
67
+ print(f"❌ Simple test failed: {e}")
68
+
69
+ print()
70
+
71
+ def test_agent_with_api():
72
+ """Test the agent with a real GAIA question from the API."""
73
+ print("🌐 Testing with real GAIA question from API...")
74
+
75
+ try:
76
+ question_data = fetch_random_question()
77
+ if not question_data:
78
+ print("❌ Failed to fetch question from API")
79
+ return
80
+
81
+ task_id = question_data.get("task_id")
82
+ question = question_data.get("question")
83
+
84
+ print(f"Task ID: {task_id}")
85
+ print(f"Question: {question}")
86
+
87
+ # Run the agent
88
+ answer = smart_agent(question, task_id)
89
+ print(f"Agent Answer: {answer}")
90
+ print("✅ API test completed!")
91
+
92
+ except Exception as e:
93
+ print(f"❌ API test failed: {e}")
94
+
95
+ print()
96
+
97
+ def check_environment():
98
+ """Check if all required environment variables are set."""
99
+ print("🔍 Checking environment...")
100
+
101
+ required_vars = ["HUGGINGFACE_API_TOKEN"]
102
+ missing_vars = []
103
+
104
+ for var in required_vars:
105
+ if not os.getenv(var):
106
+ missing_vars.append(var)
107
+ else:
108
+ print(f"✅ {var} is set")
109
+
110
+ if missing_vars:
111
+ print(f"❌ Missing environment variables: {missing_vars}")
112
+ print("Please set these in your .env file or environment")
113
+ return False
114
+
115
+ print("✅ All required environment variables are set")
116
+ return True
117
+
118
+ def main():
119
+ """Run all tests."""
120
+ print("🚀 Starting GAIA Agent Local Tests")
121
+ print("=" * 50)
122
+
123
+ # Check environment first
124
+ if not check_environment():
125
+ print("❌ Environment check failed. Please fix and try again.")
126
+ return
127
+
128
+ print()
129
+
130
+ # Run tests
131
+ # test_question_analysis()
132
+ # test_tools()
133
+ # test_agent_simple()
134
+ test_agent_with_api()
135
+
136
+ if __name__ == "__main__":
137
+ main()
tools.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import Tool
2
+ import requests
3
+ import os
4
+ from PIL import Image
5
+ import io
6
+ import base64
7
+ from langchain_community.tools import DuckDuckGoSearchRun
8
+ from typing import Optional
9
+ import json
10
+ import PyPDF2
11
+ import tempfile
12
+
13
+ # Initialize web search tool
14
+ search_tool = DuckDuckGoSearchRun()
15
+
16
+ def web_search_tool_func(query: str) -> str:
17
+ """Searches the web for information using DuckDuckGo."""
18
+ try:
19
+ results = search_tool.run(query)
20
+ return results
21
+ except Exception as e:
22
+ return f"Web search failed: {str(e)}"
23
+
24
+ web_search_tool = Tool(
25
+ name="web_search",
26
+ func=web_search_tool_func,
27
+ description="Searches the web for current information. Use this for factual questions, recent events, or when you need to find information not in your training data."
28
+ )
29
+
30
+ def file_download_tool_func(task_id: str) -> str:
31
+ """Downloads a file associated with a GAIA task ID."""
32
+ try:
33
+ api_url = "https://agents-course-unit4-scoring.hf.space"
34
+ file_url = f"{api_url}/files/{task_id}"
35
+
36
+ response = requests.get(file_url, timeout=30)
37
+ response.raise_for_status()
38
+
39
+ # Save to temporary file
40
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".tmp") as temp_file:
41
+ temp_file.write(response.content)
42
+ temp_path = temp_file.name
43
+
44
+ # Try to determine file type and process accordingly
45
+ content_type = response.headers.get('content-type', '').lower()
46
+
47
+ if 'image' in content_type:
48
+ return f"Image file downloaded to {temp_path}. Use image_analysis_tool to analyze it."
49
+ elif 'pdf' in content_type:
50
+ return process_pdf_file(temp_path)
51
+ elif 'text' in content_type:
52
+ with open(temp_path, 'r', encoding='utf-8') as f:
53
+ content = f.read()
54
+ os.unlink(temp_path) # Clean up
55
+ return f"Text file content:\n{content}"
56
+ else:
57
+ return f"File downloaded to {temp_path}. Content type: {content_type}"
58
+
59
+ except Exception as e:
60
+ return f"Failed to download file for task {task_id}: {str(e)}"
61
+
62
+ def process_pdf_file(file_path: str) -> str:
63
+ """Process a PDF file and extract text content."""
64
+ try:
65
+ with open(file_path, 'rb') as file:
66
+ pdf_reader = PyPDF2.PdfReader(file)
67
+ text_content = ""
68
+
69
+ for page_num in range(len(pdf_reader.pages)):
70
+ page = pdf_reader.pages[page_num]
71
+ text_content += f"\n--- Page {page_num + 1} ---\n"
72
+ text_content += page.extract_text()
73
+
74
+ os.unlink(file_path) # Clean up
75
+ return f"PDF content extracted:\n{text_content}"
76
+ except Exception as e:
77
+ return f"Failed to process PDF: {str(e)}"
78
+
79
+ file_download_tool = Tool(
80
+ name="file_download",
81
+ func=file_download_tool_func,
82
+ description="Downloads and processes files associated with GAIA task IDs. Can handle images, PDFs, and text files."
83
+ )
84
+
85
+ def image_analysis_tool_func(image_path_or_description: str) -> str:
86
+ """Analyzes images for GAIA questions. For now, returns a placeholder."""
87
+ # This is a simplified version - in a full implementation, you'd use a vision model
88
+ try:
89
+ if os.path.exists(image_path_or_description):
90
+ # Try to open and get basic info about the image
91
+ with Image.open(image_path_or_description) as img:
92
+ width, height = img.size
93
+ mode = img.mode
94
+ format_info = img.format
95
+
96
+ # Clean up the temporary file
97
+ os.unlink(image_path_or_description)
98
+
99
+ return f"Image analyzed: {width}x{height} pixels, mode: {mode}, format: {format_info}. Note: This is a basic analysis. For detailed image content analysis, a vision model would be needed."
100
+ else:
101
+ return f"Image analysis requested for: {image_path_or_description}. Note: Full image analysis requires a vision model integration."
102
+ except Exception as e:
103
+ return f"Image analysis failed: {str(e)}"
104
+
105
+ image_analysis_tool = Tool(
106
+ name="image_analysis",
107
+ func=image_analysis_tool_func,
108
+ description="Analyzes images to extract information. Use this for questions involving visual content."
109
+ )
110
+
111
+ def calculator_tool_func(expression: str) -> str:
112
+ """Performs mathematical calculations safely."""
113
+ try:
114
+ # Basic safety check - only allow certain characters
115
+ allowed_chars = set('0123456789+-*/().= ')
116
+ if not all(c in allowed_chars for c in expression):
117
+ return f"Invalid characters in expression: {expression}"
118
+
119
+ # Use eval safely for basic math
120
+ result = eval(expression)
121
+ return f"Calculation result: {expression} = {result}"
122
+ except Exception as e:
123
+ return f"Calculation failed for '{expression}': {str(e)}"
124
+
125
+ calculator_tool = Tool(
126
+ name="calculator",
127
+ func=calculator_tool_func,
128
+ description="Performs mathematical calculations. Use this for numerical computations and math problems."
129
+ )
130
+
131
+ def text_processor_tool_func(text: str, operation: str = "summarize") -> str:
132
+ """Processes text for various operations like summarization, extraction, etc."""
133
+ try:
134
+ if operation == "summarize":
135
+ # Simple summarization - take first and last sentences if long
136
+ sentences = text.split('.')
137
+ if len(sentences) > 5:
138
+ summary = '. '.join(sentences[:2] + sentences[-2:])
139
+ return f"Text summary: {summary}"
140
+ else:
141
+ return f"Text (short enough to not need summarization): {text}"
142
+
143
+ elif operation == "extract_numbers":
144
+ import re
145
+ numbers = re.findall(r'\d+(?:\.\d+)?', text)
146
+ return f"Numbers found in text: {numbers}"
147
+
148
+ elif operation == "extract_dates":
149
+ import re
150
+ # Simple date pattern matching
151
+ date_patterns = [
152
+ r'\d{1,2}/\d{1,2}/\d{4}', # MM/DD/YYYY
153
+ r'\d{4}-\d{1,2}-\d{1,2}', # YYYY-MM-DD
154
+ r'\b\w+ \d{1,2}, \d{4}\b' # Month DD, YYYY
155
+ ]
156
+ dates = []
157
+ for pattern in date_patterns:
158
+ dates.extend(re.findall(pattern, text))
159
+ return f"Dates found in text: {dates}"
160
+
161
+ else:
162
+ return f"Text processing operation '{operation}' not supported. Available: summarize, extract_numbers, extract_dates"
163
+
164
+ except Exception as e:
165
+ return f"Text processing failed: {str(e)}"
166
+
167
+ text_processor_tool = Tool(
168
+ name="text_processor",
169
+ func=text_processor_tool_func,
170
+ description="Processes text for various operations like summarization, number extraction, date extraction. Specify operation as second parameter."
171
+ )
172
+
173
+ # List of all tools for easy import
174
+ agent_tools = [
175
+ web_search_tool,
176
+ file_download_tool,
177
+ image_analysis_tool,
178
+ calculator_tool,
179
+ text_processor_tool
180
+ ]
utils.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from typing import List, Dict, Any
4
+
5
+ def fetch_questions(api_url: str = "https://agents-course-unit4-scoring.hf.space") -> List[Dict[str, Any]]:
6
+ """Fetch all questions from the GAIA API."""
7
+ try:
8
+ response = requests.get(f"{api_url}/questions", timeout=15)
9
+ response.raise_for_status()
10
+ return response.json()
11
+ except Exception as e:
12
+ print(f"Error fetching questions: {e}")
13
+ return []
14
+
15
+ def fetch_random_question(api_url: str = "https://agents-course-unit4-scoring.hf.space") -> Dict[str, Any]:
16
+ """Fetch a random question from the GAIA API."""
17
+ try:
18
+ response = requests.get(f"{api_url}/random-question", timeout=15)
19
+ response.raise_for_status()
20
+ return response.json()
21
+ except Exception as e:
22
+ print(f"Error fetching random question: {e}")
23
+ return {}
24
+
25
+ def submit_answers(username: str, agent_code: str, answers: List[Dict[str, str]],
26
+ api_url: str = "https://agents-course-unit4-scoring.hf.space") -> Dict[str, Any]:
27
+ """Submit answers to the GAIA API for scoring."""
28
+ try:
29
+ submission_data = {
30
+ "username": username.strip(),
31
+ "agent_code": agent_code,
32
+ "answers": answers
33
+ }
34
+
35
+ response = requests.post(f"{api_url}/submit", json=submission_data, timeout=60)
36
+ response.raise_for_status()
37
+ return response.json()
38
+ except Exception as e:
39
+ print(f"Error submitting answers: {e}")
40
+ return {"error": str(e)}
41
+
42
+ def format_gaia_answer(raw_answer: str) -> str:
43
+ """Format the agent's raw answer for GAIA submission (exact match)."""
44
+ # Remove common prefixes that might interfere with exact matching
45
+ prefixes_to_remove = [
46
+ "FINAL ANSWER:",
47
+ "Final Answer:",
48
+ "Answer:",
49
+ "The answer is:",
50
+ "The final answer is:",
51
+ ]
52
+
53
+ answer = raw_answer.strip()
54
+
55
+ for prefix in prefixes_to_remove:
56
+ if answer.startswith(prefix):
57
+ answer = answer[len(prefix):].strip()
58
+
59
+ # Remove trailing punctuation that might not be in ground truth
60
+ while answer and answer[-1] in '.!?':
61
+ answer = answer[:-1].strip()
62
+
63
+ return answer
64
+
65
+ def analyze_question_type(question: str) -> Dict[str, bool]:
66
+ """Analyze what capabilities a question might need."""
67
+ question_lower = question.lower()
68
+
69
+ analysis = {
70
+ "needs_web_search": any(keyword in question_lower for keyword in [
71
+ "current", "recent", "latest", "today", "now", "2024", "2023"
72
+ ]),
73
+ "needs_file_processing": "file" in question_lower or "document" in question_lower,
74
+ "needs_calculation": any(keyword in question_lower for keyword in [
75
+ "calculate", "compute", "sum", "total", "average", "percentage", "multiply", "divide"
76
+ ]),
77
+ "needs_image_analysis": any(keyword in question_lower for keyword in [
78
+ "image", "picture", "photo", "visual", "shown", "displayed"
79
+ ]),
80
+ "needs_text_processing": any(keyword in question_lower for keyword in [
81
+ "extract", "find in", "search for", "list", "count"
82
+ ])
83
+ }
84
+
85
+ return analysis
86
+
87
+ def create_execution_plan(question: str, task_id: str = None) -> List[str]:
88
+ """Create a step-by-step execution plan for a GAIA question."""
89
+ analysis = analyze_question_type(question)
90
+ plan = []
91
+
92
+ # Always start with understanding the question
93
+ plan.append("Analyze the question to understand what information is needed")
94
+
95
+ # Add file processing if needed
96
+ if task_id and analysis["needs_file_processing"]:
97
+ plan.append(f"Download and process any files associated with task {task_id}")
98
+
99
+ # Add web search if needed
100
+ if analysis["needs_web_search"]:
101
+ plan.append("Search the web for current/recent information")
102
+
103
+ # Add image analysis if needed
104
+ if analysis["needs_image_analysis"]:
105
+ plan.append("Analyze any images for visual information")
106
+
107
+ # Add calculation if needed
108
+ if analysis["needs_calculation"]:
109
+ plan.append("Perform necessary calculations")
110
+
111
+ # Add text processing if needed
112
+ if analysis["needs_text_processing"]:
113
+ plan.append("Process and extract specific information from text")
114
+
115
+ # Always end with synthesis
116
+ plan.append("Synthesize all information to provide the final answer")
117
+
118
+ return plan
119
+
120
+ def log_agent_step(step: str, result: str, step_number: int = None):
121
+ """Log agent execution steps for debugging."""
122
+ prefix = f"Step {step_number}: " if step_number else ""
123
+ print(f"\n🤖 {prefix}{step}")
124
+ print(f"📝 Result: {result[:200]}{'...' if len(result) > 200 else ''}")