Denis Davydov commited on
Commit
a5c9e62
·
1 Parent(s): f9a7c9b

enhanced web search

Browse files
Files changed (7) hide show
  1. agent.py +47 -42
  2. app.py +13 -3
  3. requirements.txt +5 -2
  4. test_agent_format.py +99 -0
  5. test_local.py +200 -99
  6. tools.py +242 -6
  7. utils.py +39 -55
agent.py CHANGED
@@ -1,52 +1,45 @@
1
  from typing import TypedDict, Annotated
2
  import os
 
3
  from langgraph.graph.message import add_messages
 
 
 
4
  from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
5
  from langgraph.prebuilt import ToolNode
6
  from langgraph.graph import START, StateGraph
7
  from langgraph.checkpoint.memory import MemorySaver
8
  from langgraph.prebuilt import tools_condition
9
- from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
10
  from tools import agent_tools
11
- from utils import format_gaia_answer, analyze_question_type, create_execution_plan, log_agent_step
12
 
13
- # Initialize LLM (same as unit3)
14
- llm = HuggingFaceEndpoint(
15
- repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
16
- huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN"),
17
  temperature=0.1,
18
- max_new_tokens=1024,
 
19
  )
20
 
21
- chat = ChatHuggingFace(llm=llm, verbose=True)
22
  chat_with_tools = chat.bind_tools(agent_tools)
23
 
24
- # System prompt for intelligent question answering
25
- SYSTEM_PROMPT = """You are a highly capable AI assistant designed to answer questions accurately and helpfully.
26
-
27
- Your approach should include:
28
- - Multi-step reasoning and planning for complex questions
29
- - Intelligent tool usage when needed for web search, file processing, calculations, and analysis
30
- - Precise, factual answers based on reliable information
31
- - Breaking down complex questions into manageable steps
32
 
33
- IMPORTANT GUIDELINES:
34
- 1. Think step-by-step and use available tools when they can help provide better answers
35
- 2. For current information: Search the web for up-to-date facts
36
- 3. For files: Process associated files when task_id is provided
37
- 4. For visual content: Analyze images carefully when present
38
- 5. For calculations: Use computational tools for accuracy
39
- 6. Provide concise, direct answers without unnecessary prefixes
40
- 7. Focus on accuracy and helpfulness
41
- 8. Be factual and avoid speculation
42
 
43
- Your goal is to be as helpful and accurate as possible while using the right tools for each task."""
44
 
45
  # Generate the AgentState
46
  class AgentState(TypedDict):
47
  messages: Annotated[list[AnyMessage], add_messages]
48
  task_id: str
49
- question_analysis: dict
50
 
51
  def assistant(state: AgentState):
52
  """Main assistant function that processes messages and calls tools."""
@@ -91,18 +84,14 @@ class SmartAgent:
91
 
92
  def __init__(self):
93
  self.agent = create_smart_agent()
94
- print("🤖 Smart Agent initialized with LangGraph and tools")
95
 
96
- def __call__(self, question: str, task_id: str = None) -> str:
97
- """Process a question and return the formatted answer."""
98
  try:
99
  print(f"\n🎯 Processing question: {question[:100]}...")
100
 
101
- # Analyze the question
102
- analysis = analyze_question_type(question)
103
- print(f"📊 Question analysis: {analysis}")
104
-
105
- # Create execution plan
106
  plan = create_execution_plan(question, task_id)
107
  print(f"📋 Execution plan: {plan}")
108
 
@@ -111,24 +100,35 @@ class SmartAgent:
111
  if task_id:
112
  enhanced_question = f"Task ID: {task_id}\n\nQuestion: {question}\n\nNote: If this question involves files, use the file_download tool with task_id '{task_id}' to access associated files."
113
 
114
- # Invoke the agent
115
  thread_id = f"task-{task_id}" if task_id else "general"
116
- config = {"configurable": {"thread_id": thread_id}}
 
 
 
117
 
118
  initial_state = {
119
  "messages": [HumanMessage(content=enhanced_question)],
120
- "task_id": task_id or "",
121
- "question_analysis": analysis
122
  }
123
 
124
  result = self.agent.invoke(initial_state, config=config)
125
 
126
- # Extract the final answer
127
  if result and 'messages' in result and result['messages']:
128
  final_message = result['messages'][-1]
129
  raw_answer = final_message.content
 
 
 
 
 
 
 
 
130
  else:
131
  raw_answer = "No response generated"
 
132
 
133
  # Format the answer for submission
134
  formatted_answer = format_gaia_answer(raw_answer)
@@ -136,11 +136,16 @@ class SmartAgent:
136
  print(f"✅ Raw answer: {raw_answer}")
137
  print(f"🎯 Formatted answer: {formatted_answer}")
138
 
139
- return formatted_answer
 
 
 
 
 
140
 
141
  except Exception as e:
142
  error_msg = f"Error processing question: {str(e)}"
143
  print(f"❌ {error_msg}")
144
- return error_msg
145
 
146
  smart_agent = SmartAgent()
 
1
  from typing import TypedDict, Annotated
2
  import os
3
+ from dotenv import load_dotenv
4
  from langgraph.graph.message import add_messages
5
+
6
+ # Load environment variables from .env file
7
+ load_dotenv()
8
  from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
9
  from langgraph.prebuilt import ToolNode
10
  from langgraph.graph import START, StateGraph
11
  from langgraph.checkpoint.memory import MemorySaver
12
  from langgraph.prebuilt import tools_condition
13
+ from langchain_openai import ChatOpenAI
14
  from tools import agent_tools
15
+ from utils import format_gaia_answer, create_execution_plan, log_agent_step
16
 
17
+ # Initialize OpenAI LLM with GPT-4o (most capable model)
18
+ chat = ChatOpenAI(
19
+ model="gpt-4o",
 
20
  temperature=0.1,
21
+ max_tokens=1024,
22
+ api_key=os.environ.get("OPENAI_API_KEY")
23
  )
24
 
 
25
  chat_with_tools = chat.bind_tools(agent_tools)
26
 
27
+ # System prompt for GAIA evaluation (exact format required by HF)
28
+ SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
 
 
 
 
 
 
29
 
30
+ You have access to tools that can help you:
31
+ - Search the web for current information
32
+ - Download and process files associated with task IDs
33
+ - Analyze images
34
+ - Perform calculations
35
+ - Process text
 
 
 
36
 
37
+ IMPORTANT: You must provide a specific answer in the FINAL ANSWER format. Do not say you cannot find information or provide general approaches. Use web search to find the information you need, but limit yourself to 2-3 search attempts maximum. If you cannot find perfect information, make your best determination based on what you found and provide a concrete FINAL ANSWER. Always end with a specific FINAL ANSWER, never with explanations about not finding information."""
38
 
39
  # Generate the AgentState
40
  class AgentState(TypedDict):
41
  messages: Annotated[list[AnyMessage], add_messages]
42
  task_id: str
 
43
 
44
  def assistant(state: AgentState):
45
  """Main assistant function that processes messages and calls tools."""
 
84
 
85
  def __init__(self):
86
  self.agent = create_smart_agent()
87
+ print("🤖 Smart Agent initialized with OpenAI GPT-4o and tools")
88
 
89
+ def __call__(self, question: str, task_id: str = None) -> tuple:
90
+ """Process a question and return the formatted answer and reasoning trace."""
91
  try:
92
  print(f"\n🎯 Processing question: {question[:100]}...")
93
 
94
+ # Create simple execution plan for logging
 
 
 
 
95
  plan = create_execution_plan(question, task_id)
96
  print(f"📋 Execution plan: {plan}")
97
 
 
100
  if task_id:
101
  enhanced_question = f"Task ID: {task_id}\n\nQuestion: {question}\n\nNote: If this question involves files, use the file_download tool with task_id '{task_id}' to access associated files."
102
 
103
+ # Invoke the agent - let GPT-4o decide what tools to use
104
  thread_id = f"task-{task_id}" if task_id else "general"
105
+ config = {
106
+ "configurable": {"thread_id": thread_id},
107
+ "recursion_limit": 15 # Allow more tool usage for complex searches
108
+ }
109
 
110
  initial_state = {
111
  "messages": [HumanMessage(content=enhanced_question)],
112
+ "task_id": task_id or ""
 
113
  }
114
 
115
  result = self.agent.invoke(initial_state, config=config)
116
 
117
+ # Extract the final answer and reasoning trace
118
  if result and 'messages' in result and result['messages']:
119
  final_message = result['messages'][-1]
120
  raw_answer = final_message.content
121
+
122
+ # Build reasoning trace from all messages
123
+ reasoning_trace = []
124
+ for msg in result['messages']:
125
+ if hasattr(msg, 'content') and msg.content:
126
+ reasoning_trace.append(msg.content)
127
+
128
+ reasoning_text = "\n---\n".join(reasoning_trace)
129
  else:
130
  raw_answer = "No response generated"
131
+ reasoning_text = "No reasoning trace available"
132
 
133
  # Format the answer for submission
134
  formatted_answer = format_gaia_answer(raw_answer)
 
136
  print(f"✅ Raw answer: {raw_answer}")
137
  print(f"🎯 Formatted answer: {formatted_answer}")
138
 
139
+ # Validate the formatted answer
140
+ if not formatted_answer or formatted_answer.strip() == "":
141
+ print("⚠️ WARNING: Empty formatted answer!")
142
+ formatted_answer = "ERROR: No valid answer extracted"
143
+
144
+ return formatted_answer, reasoning_text
145
 
146
  except Exception as e:
147
  error_msg = f"Error processing question: {str(e)}"
148
  print(f"❌ {error_msg}")
149
+ return error_msg, f"Error occurred: {str(e)}"
150
 
151
  smart_agent = SmartAgent()
app.py CHANGED
@@ -71,12 +71,22 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
71
  print(f"Skipping item with missing task_id or question: {item}")
72
  continue
73
  try:
74
- submitted_answer = agent(question_text, task_id)
75
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
 
 
 
76
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
77
  except Exception as e:
78
  print(f"Error running agent on task {task_id}: {e}")
79
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
80
 
81
  if not answers_payload:
82
  print("Agent did not produce any answers to submit.")
 
71
  print(f"Skipping item with missing task_id or question: {item}")
72
  continue
73
  try:
74
+ submitted_answer, reasoning_trace = agent(question_text, task_id)
75
+ answers_payload.append({
76
+ "task_id": task_id,
77
+ "model_answer": submitted_answer,
78
+ "reasoning_trace": reasoning_trace
79
+ })
80
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
81
  except Exception as e:
82
  print(f"Error running agent on task {task_id}: {e}")
83
+ error_answer = f"AGENT ERROR: {e}"
84
+ answers_payload.append({
85
+ "task_id": task_id,
86
+ "model_answer": error_answer,
87
+ "reasoning_trace": f"Error occurred: {str(e)}"
88
+ })
89
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_answer})
90
 
91
  if not answers_payload:
92
  print("Agent did not produce any answers to submit.")
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  # Core dependencies from unit3
2
  langchain
3
  langchain-community
4
- langchain-huggingface
5
  langgraph
6
  huggingface_hub
7
 
@@ -10,8 +10,11 @@ gradio
10
  requests
11
  pillow
12
  PyPDF2
13
- duckduckgo-search
14
  python-dotenv
 
 
 
15
 
16
  # For image processing and multimodal capabilities
17
  transformers
 
1
  # Core dependencies from unit3
2
  langchain
3
  langchain-community
4
+ langchain-openai
5
  langgraph
6
  huggingface_hub
7
 
 
10
  requests
11
  pillow
12
  PyPDF2
13
+ ddgs
14
  python-dotenv
15
+ beautifulsoup4
16
+ faiss-cpu
17
+ langchain-text-splitters
18
 
19
  # For image processing and multimodal capabilities
20
  transformers
test_agent_format.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify the agent's answer formatting works correctly.
4
+ """
5
+
6
+ import os
7
+ from agent import smart_agent
8
+ from utils import format_gaia_answer
9
+
10
+ def test_answer_formatting():
11
+ """Test the answer formatting function with various inputs."""
12
+
13
+ test_cases = [
14
+ # Test case: (raw_answer, expected_format)
15
+ ("I think the answer is 42. FINAL ANSWER: 42", "42"),
16
+ ("Let me calculate... FINAL ANSWER: 3.14159", "3.14159"),
17
+ ("After research, FINAL ANSWER: New York", "New York"),
18
+ ("The result is FINAL ANSWER: apple, banana, cherry", "apple, banana, cherry"),
19
+ ("FINAL ANSWER: 1,234", "1234"), # Should remove commas from numbers
20
+ ("FINAL ANSWER: \"Hello World\"", "Hello World"), # Should remove quotes
21
+ ("FINAL ANSWER: approximately 100", "100"), # Should remove qualifiers
22
+ ("No clear final answer format here", "No clear final answer format here"), # Fallback
23
+ ]
24
+
25
+ print("🧪 Testing answer formatting...")
26
+ for i, (raw, expected) in enumerate(test_cases, 1):
27
+ result = format_gaia_answer(raw)
28
+ status = "✅" if result == expected else "❌"
29
+ print(f"{status} Test {i}: '{raw}' -> '{result}' (expected: '{expected}')")
30
+ if result != expected:
31
+ print(f" ⚠️ Mismatch detected!")
32
+
33
+ print("\n" + "="*50)
34
+
35
+ def test_simple_question():
36
+ """Test the agent with a simple question."""
37
+ print("🤖 Testing agent with a simple question...")
38
+
39
+ question = "What is 2 + 2?"
40
+ try:
41
+ answer, reasoning = smart_agent(question)
42
+ print(f"Question: {question}")
43
+ print(f"Answer: {answer}")
44
+ print(f"Reasoning length: {len(reasoning)} characters")
45
+ print(f"Raw reasoning preview: {reasoning[:200]}...")
46
+
47
+ # Check if answer follows expected format
48
+ if answer and answer.strip():
49
+ print("✅ Agent returned a non-empty answer")
50
+ else:
51
+ print("❌ Agent returned empty answer")
52
+
53
+ except Exception as e:
54
+ print(f"❌ Error testing agent: {e}")
55
+
56
+ print("\n" + "="*50)
57
+
58
+ def test_api_format():
59
+ """Test that our submission format matches API expectations."""
60
+ print("📡 Testing API submission format...")
61
+
62
+ # Simulate what would be sent to the API
63
+ sample_submission = {
64
+ "task_id": "test_task_1",
65
+ "model_answer": "42",
66
+ "reasoning_trace": "I calculated 2+2 and got 4, but the question asks for something else..."
67
+ }
68
+
69
+ required_fields = ["task_id", "model_answer"]
70
+ optional_fields = ["reasoning_trace"]
71
+
72
+ print("Required fields check:")
73
+ for field in required_fields:
74
+ if field in sample_submission:
75
+ print(f"✅ {field}: {sample_submission[field]}")
76
+ else:
77
+ print(f"❌ Missing required field: {field}")
78
+
79
+ print("Optional fields check:")
80
+ for field in optional_fields:
81
+ if field in sample_submission:
82
+ print(f"✅ {field}: Present ({len(str(sample_submission[field]))} chars)")
83
+ else:
84
+ print(f"ℹ️ Optional field not present: {field}")
85
+
86
+ if __name__ == "__main__":
87
+ print("🔧 GAIA Agent Format Testing")
88
+ print("="*50)
89
+
90
+ # Test 1: Answer formatting
91
+ test_answer_formatting()
92
+
93
+ # Test 2: Simple agent question
94
+ test_simple_question()
95
+
96
+ # Test 3: API format
97
+ test_api_format()
98
+
99
+ print("🏁 Testing complete!")
test_local.py CHANGED
@@ -1,137 +1,238 @@
1
  #!/usr/bin/env python3
2
  """
3
- Local testing script for the GAIA agent.
4
- Run this to test the agent before deploying to HF Spaces.
5
  """
6
 
7
- import os
8
- import sys
9
- from dotenv import load_dotenv
10
-
11
- # Load environment variables
12
- load_dotenv()
13
-
14
- # Add current directory to path for imports
15
- sys.path.append(os.path.dirname(os.path.abspath(__file__)))
16
-
17
- from utils import fetch_random_question, analyze_question_type
18
  from agent import smart_agent
19
 
20
- def test_question_analysis():
21
- """Test the question analysis functionality."""
22
- print("🧪 Testing question analysis...")
23
-
24
- test_questions = [
25
- "What is the current population of Tokyo?",
26
- "Calculate 15 * 23 + 45",
27
- "Analyze the image shown in the document",
28
- "Extract all dates from the provided text file"
29
- ]
30
-
31
- for question in test_questions:
32
- analysis = analyze_question_type(question)
33
- print(f"Question: {question}")
34
- print(f"Analysis: {analysis}")
35
- print()
36
-
37
- def test_tools():
38
- """Test individual tools."""
39
- print("🔧 Testing individual tools...")
40
 
41
- # Test calculator
42
- from tools import calculator_tool
43
- calc_result = calculator_tool.func("15 + 27")
44
- print(f"Calculator test: {calc_result}")
45
 
46
- # Test web search (if available)
47
- try:
48
- from tools import web_search_tool
49
- search_result = web_search_tool.func("Python programming language")
50
- print(f"Web search test: {search_result[:100]}...")
51
- except Exception as e:
52
- print(f"Web search test failed: {e}")
53
 
 
54
  print()
55
-
56
- def test_agent_simple():
57
- """Test the agent with a simple question."""
58
- print("🤖 Testing Smart agent with simple question...")
59
 
60
- test_question = "What is 25 + 17?"
 
61
  try:
62
- result = smart_agent(test_question)
63
- print(f"Question: {test_question}")
64
- print(f"Answer: {result}")
65
- print("✅ Simple test passed!")
 
 
 
 
66
  except Exception as e:
67
- print(f"❌ Simple test failed: {e}")
 
68
 
 
 
 
 
 
 
69
  print()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- def test_agent_with_api():
72
- """Test the agent with a real GAIA question from the API."""
73
- print("🌐 Testing with real GAIA question from API...")
 
 
74
 
 
 
75
  try:
76
  question_data = fetch_random_question()
77
  if not question_data:
78
- print("❌ Failed to fetch question from API")
79
- return
80
 
81
- task_id = question_data.get("task_id")
82
- question = question_data.get("question")
83
 
84
- print(f"Task ID: {task_id}")
85
- print(f"Question: {question}")
 
 
 
 
 
 
86
 
87
- # Run the agent
88
- answer = smart_agent(question, task_id)
89
- print(f"Agent Answer: {answer}")
90
- print("✅ API test completed!")
 
 
 
 
 
 
 
 
 
 
91
 
92
  except Exception as e:
93
- print(f"❌ API test failed: {e}")
 
94
 
 
 
 
 
 
 
95
  print()
96
-
97
- def check_environment():
98
- """Check if all required environment variables are set."""
99
- print("🔍 Checking environment...")
100
 
101
- required_vars = ["HUGGINGFACE_API_TOKEN"]
102
- missing_vars = []
 
 
 
 
103
 
104
- for var in required_vars:
105
- if not os.getenv(var):
106
- missing_vars.append(var)
107
- else:
108
- print(f"✅ {var} is set")
109
 
110
- if missing_vars:
111
- print(f"❌ Missing environment variables: {missing_vars}")
112
- print("Please set these in your .env file or environment")
 
 
113
  return False
114
 
115
- print("✅ All required environment variables are set")
116
- return True
117
-
118
- def main():
119
- """Run all tests."""
120
- print("🚀 Starting GAIA Agent Local Tests")
121
- print("=" * 50)
 
 
 
 
122
 
123
- # Check environment first
124
- if not check_environment():
125
- print("❌ Environment check failed. Please fix and try again.")
126
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  print()
129
 
130
- # Run tests
131
- # test_question_analysis()
132
- # test_tools()
133
- # test_agent_simple()
134
- test_agent_with_api()
 
 
 
 
 
 
135
 
136
  if __name__ == "__main__":
137
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ Test script for validating agent performance on a random GAIA question.
4
+ Fetches one random question and tests the complete pipeline without submitting.
5
  """
6
 
7
+ import time
8
+ from utils import fetch_random_question, format_gaia_answer
 
 
 
 
 
 
 
 
 
9
  from agent import smart_agent
10
 
11
+ def test_predefined_gaia_question():
12
+ """Test the agent with a predefined GAIA question to verify web search and answer format."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ print("🧪 Testing predefined GAIA question (1928 Olympics)")
15
+ print("="*60)
 
 
16
 
17
+ # Predefined question that requires web search
18
+ question = "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer."
19
+ task_id = "predefined_test"
 
 
 
 
20
 
21
+ print(f"❓ Question: {question}")
22
  print()
 
 
 
 
23
 
24
+ # Run the agent
25
+ print("🤖 Running smart agent on the predefined question...")
26
  try:
27
+ start_time = time.time()
28
+ answer, reasoning_trace = smart_agent(question, task_id)
29
+ end_time = time.time()
30
+
31
+ processing_time = end_time - start_time
32
+ print(f"✅ Agent completed in {processing_time:.2f} seconds")
33
+ print()
34
+
35
  except Exception as e:
36
+ print(f"❌ Error running agent: {e}")
37
+ return False
38
 
39
+ # Display results
40
+ print("📊 AGENT RESULTS")
41
+ print("-" * 40)
42
+ print(f"🎯 Formatted Answer: '{answer}'")
43
+ print(f"📝 Reasoning Length: {len(reasoning_trace)} characters")
44
+ print(f"⏱️ Processing Time: {processing_time:.2f}s")
45
  print()
46
+
47
+ # Show reasoning trace preview
48
+ print("🧠 REASONING TRACE PREVIEW")
49
+ print("-" * 40)
50
+ reasoning_preview = reasoning_trace[:400] + "..." if len(reasoning_trace) > 400 else reasoning_trace
51
+ print(reasoning_preview)
52
+ print()
53
+
54
+ # Validate answer format for GAIA
55
+ print("✅ GAIA FORMAT VALIDATION")
56
+ print("-" * 40)
57
+
58
+ # Check if answer is not empty
59
+ if answer and answer.strip():
60
+ print("✅ Answer is not empty")
61
+ else:
62
+ print("❌ Answer is empty or None")
63
+ return False
64
+
65
+ # Check if answer looks like IOC country code (2-3 uppercase letters)
66
+ import re
67
+ if re.match(r'^[A-Z]{2,3}$', answer.strip()):
68
+ print(f"✅ Answer '{answer}' matches IOC country code format")
69
+ else:
70
+ print(f"⚠️ Answer '{answer}' may not be in correct IOC format (should be 2-3 uppercase letters)")
71
+
72
+ # Check if web search was used (look for web_search in reasoning)
73
+ if "web_search" in reasoning_trace.lower() or "search" in reasoning_trace.lower():
74
+ print("✅ Agent appears to have used web search")
75
+ else:
76
+ print("⚠️ No clear evidence of web search usage")
77
+
78
+ # Check answer length (should be short for country code)
79
+ if len(answer.strip()) <= 5:
80
+ print("✅ Answer length is appropriate for country code")
81
+ else:
82
+ print("⚠️ Answer seems too long for a country code")
83
+
84
+ print()
85
+
86
+ # Final validation
87
+ print("🏁 FINAL VALIDATION")
88
+ print("-" * 40)
89
+
90
+ if answer and answer.strip() and len(answer.strip()) <= 5:
91
+ print("✅ PREDEFINED TEST PASSED - Answer format suitable for GAIA")
92
+ print(f"🎯 Agent produced: '{answer}' for 1928 Olympics question")
93
+ return True
94
+ else:
95
+ print("❌ PREDEFINED TEST FAILED - Answer format needs improvement")
96
+ return False
97
 
98
+ def test_random_gaia_question():
99
+ """Test the agent with a random GAIA question and validate the complete pipeline."""
100
+
101
+ print("🔧 GAIA Random Question Test")
102
+ print("="*60)
103
 
104
+ # Step 1: Fetch a random question
105
+ print("📡 Fetching random question from GAIA API...")
106
  try:
107
  question_data = fetch_random_question()
108
  if not question_data:
109
+ print("❌ Failed to fetch random question")
110
+ return False
111
 
112
+ task_id = question_data.get("task_id", "unknown")
113
+ question_text = question_data.get("question", "")
114
 
115
+ if not question_text:
116
+ print(" No question text in response")
117
+ return False
118
+
119
+ print(f"✅ Successfully fetched question")
120
+ print(f"📋 Task ID: {task_id}")
121
+ print(f"❓ Question: {question_text}")
122
+ print()
123
 
124
+ except Exception as e:
125
+ print(f"❌ Error fetching question: {e}")
126
+ return False
127
+
128
+ # Step 2: Run the agent
129
+ print("🤖 Running smart agent on the question...")
130
+ try:
131
+ start_time = time.time()
132
+ answer, reasoning_trace = smart_agent(question_text, task_id)
133
+ end_time = time.time()
134
+
135
+ processing_time = end_time - start_time
136
+ print(f"✅ Agent completed in {processing_time:.2f} seconds")
137
+ print()
138
 
139
  except Exception as e:
140
+ print(f"❌ Error running agent: {e}")
141
+ return False
142
 
143
+ # Step 3: Display results
144
+ print("📊 AGENT RESULTS")
145
+ print("-" * 40)
146
+ print(f"🎯 Formatted Answer: '{answer}'")
147
+ print(f"📝 Reasoning Length: {len(reasoning_trace)} characters")
148
+ print(f"⏱️ Processing Time: {processing_time:.2f}s")
149
  print()
 
 
 
 
150
 
151
+ # Step 4: Show reasoning trace preview
152
+ print("🧠 REASONING TRACE PREVIEW")
153
+ print("-" * 40)
154
+ reasoning_preview = reasoning_trace[:300] + "..." if len(reasoning_trace) > 300 else reasoning_trace
155
+ print(reasoning_preview)
156
+ print()
157
 
158
+ # Step 5: Validate answer format
159
+ print("✅ ANSWER VALIDATION")
160
+ print("-" * 40)
 
 
161
 
162
+ # Check if answer is not empty
163
+ if answer and answer.strip():
164
+ print(" Answer is not empty")
165
+ else:
166
+ print("❌ Answer is empty or None")
167
  return False
168
 
169
+ # Check if answer contains error messages
170
+ if "ERROR" in answer.upper() or "FAILED" in answer.upper():
171
+ print("⚠️ Answer contains error message")
172
+ else:
173
+ print(" Answer appears to be valid (no error messages)")
174
+
175
+ # Check answer length (reasonable bounds)
176
+ if len(answer) > 1000:
177
+ print("⚠️ Answer is very long (>1000 chars) - might need review")
178
+ else:
179
+ print("✅ Answer length is reasonable")
180
 
181
+ print()
182
+
183
+ # Step 6: Show submission format
184
+ print("📡 SUBMISSION FORMAT PREVIEW")
185
+ print("-" * 40)
186
+
187
+ submission_entry = {
188
+ "task_id": task_id,
189
+ "model_answer": answer,
190
+ "reasoning_trace": reasoning_trace
191
+ }
192
+
193
+ # Validate required fields
194
+ required_fields = ["task_id", "model_answer"]
195
+ all_valid = True
196
+
197
+ for field in required_fields:
198
+ if field in submission_entry and submission_entry[field]:
199
+ print(f"✅ {field}: '{submission_entry[field][:50]}{'...' if len(str(submission_entry[field])) > 50 else ''}'")
200
+ else:
201
+ print(f"❌ Missing or empty {field}")
202
+ all_valid = False
203
+
204
+ # Check optional fields
205
+ if "reasoning_trace" in submission_entry and submission_entry["reasoning_trace"]:
206
+ print(f"✅ reasoning_trace: Present ({len(submission_entry['reasoning_trace'])} chars)")
207
+ else:
208
+ print("ℹ️ reasoning_trace: Not present (optional)")
209
 
210
  print()
211
 
212
+ # Step 7: Final validation
213
+ print("🏁 FINAL VALIDATION")
214
+ print("-" * 40)
215
+
216
+ if all_valid and answer and answer.strip():
217
+ print("✅ ALL CHECKS PASSED - Agent is ready for submission!")
218
+ print("🚀 You can now run the full evaluation with confidence.")
219
+ return True
220
+ else:
221
+ print("❌ SOME CHECKS FAILED - Please review the issues above.")
222
+ return False
223
 
224
  if __name__ == "__main__":
225
+ print("🧪 Testing agent with predefined GAIA question...")
226
+ print("This test validates web search functionality and answer formatting.")
227
+ print()
228
+
229
+ # Test the predefined 1928 Olympics question
230
+ success = test_predefined_gaia_question()
231
+
232
+ print("\n" + "="*60)
233
+ if success:
234
+ print("🎉 Predefined test completed successfully! Agent produces well-defined answers.")
235
+ print("💡 You can also run test_random_gaia_question() for additional testing.")
236
+ else:
237
+ print("⚠️ Predefined test revealed issues that need to be addressed.")
238
+ print("="*60)
tools.py CHANGED
@@ -4,23 +4,106 @@ import os
4
  from PIL import Image
5
  import io
6
  import base64
7
- from langchain_community.tools import DuckDuckGoSearchRun
8
  from typing import Optional
9
  import json
10
  import PyPDF2
11
  import tempfile
 
 
 
 
 
 
 
12
 
13
- # Initialize web search tool
14
- search_tool = DuckDuckGoSearchRun()
15
 
16
  def web_search_tool_func(query: str) -> str:
17
- """Searches the web for information using DuckDuckGo."""
18
  try:
19
- results = search_tool.run(query)
20
- return results
 
 
 
 
 
 
 
 
 
 
 
21
  except Exception as e:
22
  return f"Web search failed: {str(e)}"
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  web_search_tool = Tool(
25
  name="web_search",
26
  func=web_search_tool_func,
@@ -170,9 +253,162 @@ text_processor_tool = Tool(
170
  description="Processes text for various operations like summarization, number extraction, date extraction. Specify operation as second parameter."
171
  )
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  # List of all tools for easy import
174
  agent_tools = [
175
  web_search_tool,
 
176
  file_download_tool,
177
  image_analysis_tool,
178
  calculator_tool,
 
4
  from PIL import Image
5
  import io
6
  import base64
7
+ from ddgs import DDGS
8
  from typing import Optional
9
  import json
10
  import PyPDF2
11
  import tempfile
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+ from langchain_community.vectorstores import FAISS
15
+ from langchain_openai import OpenAIEmbeddings
16
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
17
+ from langchain.schema import Document
18
+ from dotenv import load_dotenv
19
 
20
+ # Load environment variables
21
+ load_dotenv()
22
 
23
  def web_search_tool_func(query: str) -> str:
24
+ """Enhanced web search with Wikipedia priority using DDGS."""
25
  try:
26
+ # Try Wikipedia-specific search first
27
+ print(f"🔍 Performing web search for: {query}")
28
+ wiki_results = search_wikipedia(query)
29
+ if wiki_results and len(wiki_results.strip()) > 100: # Good Wikipedia result
30
+ return f"Wikipedia search results:\n{wiki_results}"
31
+
32
+ # Fall back to general web search
33
+ general_results = search_general(query)
34
+ if general_results:
35
+ return f"Web search results:\n{general_results}"
36
+ else:
37
+ return "No relevant search results found."
38
+
39
  except Exception as e:
40
  return f"Web search failed: {str(e)}"
41
 
42
+ def search_wikipedia(query: str) -> str:
43
+ """Search Wikipedia specifically for factual information."""
44
+ try:
45
+ with DDGS() as ddgs:
46
+ # Try multiple Wikipedia search strategies
47
+ search_queries = [
48
+ f"site:en.wikipedia.org {query}", # English Wikipedia specifically
49
+ f"{query} site:wikipedia.org", # Alternative format
50
+ f"{query} wikipedia" # General Wikipedia search
51
+ ]
52
+
53
+ for search_query in search_queries:
54
+ try:
55
+ results = list(ddgs.text(search_query, max_results=3))
56
+
57
+ if results:
58
+ # Filter for relevant Wikipedia results
59
+ wiki_results = []
60
+ for result in results:
61
+ title = result.get('title', 'No title')
62
+ body = result.get('body', 'No content')
63
+ url = result.get('href', '')
64
+
65
+ # Only include if it's actually Wikipedia and relevant
66
+ if 'wikipedia.org' in url.lower() and any(term in title.lower() or term in body.lower() for term in query.lower().split()):
67
+ wiki_results.append(f"Title: {title}\nContent: {body}\nSource: {url}\n")
68
+
69
+ if wiki_results:
70
+ return "\n---\n".join(wiki_results)
71
+
72
+ except Exception:
73
+ continue # Try next search query
74
+
75
+ return "" # No good results found
76
+
77
+ except Exception as e:
78
+ return f"Wikipedia search failed: {str(e)}"
79
+
80
+ def search_general(query: str) -> str:
81
+ """General web search as fallback."""
82
+ try:
83
+ with DDGS() as ddgs:
84
+ results = list(ddgs.text(query, max_results=5))
85
+
86
+ if not results:
87
+ return ""
88
+
89
+ # Format general results
90
+ formatted_results = []
91
+ for result in results:
92
+ title = result.get('title', 'No title')
93
+ body = result.get('body', 'No content')
94
+ url = result.get('href', '')
95
+
96
+ # Prioritize reliable sources
97
+ if any(domain in url.lower() for domain in ['wikipedia.org', 'britannica.com', 'edu', 'gov']):
98
+ formatted_results.insert(0, f"Title: {title}\nContent: {body}\nSource: {url}\n")
99
+ else:
100
+ formatted_results.append(f"Title: {title}\nContent: {body}\nSource: {url}\n")
101
+
102
+ return "\n---\n".join(formatted_results)
103
+
104
+ except Exception as e:
105
+ return f"General search failed: {str(e)}"
106
+
107
  web_search_tool = Tool(
108
  name="web_search",
109
  func=web_search_tool_func,
 
253
  description="Processes text for various operations like summarization, number extraction, date extraction. Specify operation as second parameter."
254
  )
255
 
256
+ def enhanced_web_retrieval_tool_func(query: str) -> str:
257
+ """Enhanced web search with vector retrieval for deep content analysis."""
258
+ try:
259
+ print(f"🔍 Enhanced web retrieval for: {query}")
260
+
261
+ # Step 1: Get search results with URLs
262
+ search_results = get_search_urls(query)
263
+ if not search_results:
264
+ return "No search results found."
265
+
266
+ # Step 2: Fetch and process webpage content
267
+ documents = []
268
+ for result in search_results[:4]: # Top 4 results as requested
269
+ url = result.get('url', '')
270
+ title = result.get('title', 'No title')
271
+
272
+ print(f"📄 Fetching content from: {title}")
273
+ content = fetch_webpage_content(url)
274
+ if content:
275
+ doc = Document(
276
+ page_content=content,
277
+ metadata={"source": url, "title": title}
278
+ )
279
+ documents.append(doc)
280
+
281
+ if not documents:
282
+ return "Could not fetch content from any search results."
283
+
284
+ # Step 3: Create vector store and search
285
+ return search_documents_with_vector_store(documents, query)
286
+
287
+ except Exception as e:
288
+ return f"Enhanced web retrieval failed: {str(e)}"
289
+
290
+ def get_search_urls(query: str) -> list:
291
+ """Get search results from English Wikipedia only using DDGS."""
292
+ try:
293
+ with DDGS() as ddgs:
294
+ # Create Wikipedia-specific search queries
295
+ wikipedia_queries = [
296
+ f"site:en.wikipedia.org {query}",
297
+ f"{query} site:en.wikipedia.org"
298
+ ]
299
+
300
+ search_results = []
301
+ seen_urls = set()
302
+
303
+ for wiki_query in wikipedia_queries:
304
+ try:
305
+ results = list(ddgs.text(wiki_query, max_results=2))
306
+
307
+ for result in results:
308
+ url = result.get('href', '')
309
+
310
+ # Only include Wikipedia URLs and avoid duplicates
311
+ if 'en.wikipedia.org' in url and url not in seen_urls:
312
+ search_results.append({
313
+ 'url': url,
314
+ 'title': result.get('title', 'No title'),
315
+ 'snippet': result.get('body', 'No content')
316
+ })
317
+ seen_urls.add(url)
318
+
319
+ # Limit to 4 unique Wikipedia pages
320
+ if len(search_results) >= 4:
321
+ break
322
+
323
+ if len(search_results) >= 4:
324
+ break
325
+
326
+ except Exception:
327
+ continue # Try next query
328
+
329
+ return search_results
330
+
331
+ except Exception as e:
332
+ print(f"Wikipedia search URL retrieval failed: {e}")
333
+ return []
334
+
335
+ def fetch_webpage_content(url: str) -> str:
336
+ """Fetch and extract clean text content from a webpage."""
337
+ try:
338
+ headers = {
339
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
340
+ }
341
+
342
+ response = requests.get(url, headers=headers, timeout=10)
343
+ response.raise_for_status()
344
+
345
+ # Parse HTML and extract text
346
+ soup = BeautifulSoup(response.content, 'html.parser')
347
+
348
+ # Remove script and style elements
349
+ for script in soup(["script", "style"]):
350
+ script.decompose()
351
+
352
+ # Get text content
353
+ text = soup.get_text()
354
+
355
+ # Clean up text
356
+ lines = (line.strip() for line in text.splitlines())
357
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
358
+ text = ' '.join(chunk for chunk in chunks if chunk)
359
+
360
+ return text[:20000] # Increase to 20k characters to get more content
361
+
362
+ except Exception as e:
363
+ print(f"Failed to fetch content from {url}: {e}")
364
+ return ""
365
+
366
+ def search_documents_with_vector_store(documents: list, query: str) -> str:
367
+ """Create vector store and search for relevant information."""
368
+ try:
369
+ # Split documents into chunks
370
+ text_splitter = RecursiveCharacterTextSplitter(
371
+ chunk_size=1000,
372
+ chunk_overlap=200,
373
+ length_function=len,
374
+ )
375
+
376
+ splits = text_splitter.split_documents(documents)
377
+
378
+ if not splits:
379
+ return "No content to process after splitting."
380
+
381
+ # Create embeddings and vector store
382
+ embeddings = OpenAIEmbeddings()
383
+ vectorstore = FAISS.from_documents(splits, embeddings)
384
+
385
+ # Search for relevant chunks with the original query
386
+ relevant_docs = vectorstore.similarity_search(query, k=5)
387
+
388
+ # Format results
389
+ results = []
390
+ for i, doc in enumerate(relevant_docs, 1):
391
+ source = doc.metadata.get('source', 'Unknown source')
392
+ title = doc.metadata.get('title', 'No title')
393
+ content = doc.page_content[:5000] # First 500 chars
394
+
395
+ results.append(f"Result {i} from {title}:\n{content}\nSource: {source}\n")
396
+
397
+ return "\n---\n".join(results)
398
+
399
+ except Exception as e:
400
+ return f"Vector search failed: {str(e)}"
401
+
402
+ enhanced_web_retrieval_tool = Tool(
403
+ name="enhanced_web_retrieval",
404
+ func=enhanced_web_retrieval_tool_func,
405
+ description="Enhanced Wikipedia-only search with vector retrieval. Fetches full content from English Wikipedia pages and uses semantic search to find relevant information. Use this for factual questions that need detailed Wikipedia content analysis."
406
+ )
407
+
408
  # List of all tools for easy import
409
  agent_tools = [
410
  web_search_tool,
411
+ enhanced_web_retrieval_tool,
412
  file_download_tool,
413
  image_analysis_tool,
414
  calculator_tool,
utils.py CHANGED
@@ -41,78 +41,62 @@ def submit_answers(username: str, agent_code: str, answers: List[Dict[str, str]]
41
 
42
  def format_gaia_answer(raw_answer: str) -> str:
43
  """Format the agent's raw answer for GAIA submission (exact match)."""
44
- # Remove common prefixes that might interfere with exact matching
45
- prefixes_to_remove = [
46
- "FINAL ANSWER:",
47
- "Final Answer:",
48
- "Answer:",
49
- "The answer is:",
50
- "The final answer is:",
51
- ]
52
 
53
- answer = raw_answer.strip()
 
 
54
 
55
- for prefix in prefixes_to_remove:
56
- if answer.startswith(prefix):
57
- answer = answer[len(prefix):].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  # Remove trailing punctuation that might not be in ground truth
60
  while answer and answer[-1] in '.!?':
61
  answer = answer[:-1].strip()
62
 
63
- return answer
64
-
65
- def analyze_question_type(question: str) -> Dict[str, bool]:
66
- """Analyze what capabilities a question might need."""
67
- question_lower = question.lower()
 
 
68
 
69
- analysis = {
70
- "needs_web_search": any(keyword in question_lower for keyword in [
71
- "current", "recent", "latest", "today", "now", "2024", "2023"
72
- ]),
73
- "needs_file_processing": "file" in question_lower or "document" in question_lower,
74
- "needs_calculation": any(keyword in question_lower for keyword in [
75
- "calculate", "compute", "sum", "total", "average", "percentage", "multiply", "divide"
76
- ]),
77
- "needs_image_analysis": any(keyword in question_lower for keyword in [
78
- "image", "picture", "photo", "visual", "shown", "displayed"
79
- ]),
80
- "needs_text_processing": any(keyword in question_lower for keyword in [
81
- "extract", "find in", "search for", "list", "count"
82
- ])
83
- }
84
 
85
- return analysis
86
 
87
  def create_execution_plan(question: str, task_id: str = None) -> List[str]:
88
- """Create a step-by-step execution plan for a GAIA question."""
89
- analysis = analyze_question_type(question)
90
  plan = []
91
 
92
  # Always start with understanding the question
93
  plan.append("Analyze the question to understand what information is needed")
94
 
95
- # Add file processing if needed
96
- if task_id and analysis["needs_file_processing"]:
97
- plan.append(f"Download and process any files associated with task {task_id}")
98
-
99
- # Add web search if needed
100
- if analysis["needs_web_search"]:
101
- plan.append("Search the web for current/recent information")
102
-
103
- # Add image analysis if needed
104
- if analysis["needs_image_analysis"]:
105
- plan.append("Analyze any images for visual information")
106
-
107
- # Add calculation if needed
108
- if analysis["needs_calculation"]:
109
- plan.append("Perform necessary calculations")
110
-
111
- # Add text processing if needed
112
- if analysis["needs_text_processing"]:
113
- plan.append("Process and extract specific information from text")
114
 
115
- # Always end with synthesis
 
116
  plan.append("Synthesize all information to provide the final answer")
117
 
118
  return plan
 
41
 
42
  def format_gaia_answer(raw_answer: str) -> str:
43
  """Format the agent's raw answer for GAIA submission (exact match)."""
44
+ import re
 
 
 
 
 
 
 
45
 
46
+ # Look for FINAL ANSWER: pattern (case insensitive)
47
+ final_answer_pattern = r'FINAL ANSWER:\s*(.+?)(?:\n|$)'
48
+ match = re.search(final_answer_pattern, raw_answer, re.IGNORECASE | re.DOTALL)
49
 
50
+ if match:
51
+ answer = match.group(1).strip()
52
+ else:
53
+ # Fallback: try to extract from common patterns
54
+ fallback_patterns = [
55
+ r'(?:The\s+)?(?:final\s+)?answer\s+is:?\s*(.+?)(?:\n|$)',
56
+ r'(?:Answer|Result):\s*(.+?)(?:\n|$)',
57
+ ]
58
+
59
+ answer = raw_answer.strip()
60
+ for pattern in fallback_patterns:
61
+ match = re.search(pattern, answer, re.IGNORECASE)
62
+ if match:
63
+ answer = match.group(1).strip()
64
+ break
65
+
66
+ # Apply GAIA formatting rules
67
+ answer = answer.strip()
68
 
69
  # Remove trailing punctuation that might not be in ground truth
70
  while answer and answer[-1] in '.!?':
71
  answer = answer[:-1].strip()
72
 
73
+ # Remove quotes if they wrap the entire answer
74
+ if len(answer) >= 2 and answer[0] == answer[-1] and answer[0] in '"\'':
75
+ answer = answer[1:-1].strip()
76
+
77
+ # Additional cleanup for common issues
78
+ # Remove "approximately" or similar qualifiers
79
+ answer = re.sub(r'^(?:approximately|about|roughly|around)\s+', '', answer, flags=re.IGNORECASE)
80
 
81
+ # For numbers, ensure no commas (as per GAIA rules)
82
+ if re.match(r'^[\d,]+(?:\.\d+)?$', answer):
83
+ answer = answer.replace(',', '')
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ return answer
86
 
87
  def create_execution_plan(question: str, task_id: str = None) -> List[str]:
88
+ """Create a simple execution plan - let GPT-4o decide what tools to use."""
 
89
  plan = []
90
 
91
  # Always start with understanding the question
92
  plan.append("Analyze the question to understand what information is needed")
93
 
94
+ # Add file processing if task_id is provided
95
+ if task_id:
96
+ plan.append(f"Check for and process any files associated with task {task_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ # Let the LLM decide what other tools to use
99
+ plan.append("Use appropriate tools (web search, calculations, etc.) as needed")
100
  plan.append("Synthesize all information to provide the final answer")
101
 
102
  return plan