mjschock commited on
Commit
ff83a02
·
unverified ·
1 Parent(s): 13388e5

Add .env.example for API configuration and update .gitignore to exclude .env files. Refactor app.py to implement BasicAgent class, replacing AgentRunner, and improve error handling and logging. Remove unused configuration and graph modules to streamline the codebase.

Browse files
Files changed (12) hide show
  1. .env.example +4 -0
  2. .gitignore +1 -0
  3. app.py +41 -53
  4. configuration.py +0 -33
  5. graph.py +0 -241
  6. main.py +255 -0
  7. prompts/code_agent.yaml +0 -325
  8. prompts/toolcalling_agent.yaml +0 -239
  9. requirements.txt +3 -0
  10. runner.py +0 -180
  11. test_agent.py +0 -263
  12. tools.py +0 -85
.env.example ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Required API configuration
2
+ API_BASE=https://api.anthropic.com
3
+ API_KEY=sk-replace-with-your-api-key
4
+ MODEL_ID=anthropic/claude-3-7-sonnet-20250219
.gitignore CHANGED
@@ -1,3 +1,4 @@
 
1
  __pycache__
2
  .pytest_cache
3
  .venv
 
1
+ .env
2
  __pycache__
3
  .pytest_cache
4
  .venv
app.py CHANGED
@@ -1,25 +1,34 @@
1
  import os
2
-
3
  import gradio as gr
4
- import pandas as pd
5
  import requests
6
- from agent import AgentRunner
 
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
-
13
- def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
14
  """
15
- Fetches all questions, runs the AgentRunner on them, submits all answers,
16
  and displays the results.
17
  """
18
  # --- Determine HF Space Runtime URL and Repo URL ---
19
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
20
 
21
  if profile:
22
- username = f"{profile.username}"
23
  print(f"User logged in: {username}")
24
  else:
25
  print("User not logged in.")
@@ -31,7 +40,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
31
 
32
  # 1. Instantiate Agent ( modify this part to create your agent)
33
  try:
34
- agent = AgentRunner()
35
  except Exception as e:
36
  print(f"Error instantiating agent: {e}")
37
  return f"Error initializing agent: {e}", None
@@ -46,16 +55,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
46
  response.raise_for_status()
47
  questions_data = response.json()
48
  if not questions_data:
49
- print("Fetched questions list is empty.")
50
- return "Fetched questions list is empty or invalid format.", None
51
  print(f"Fetched {len(questions_data)} questions.")
52
  except requests.exceptions.RequestException as e:
53
  print(f"Error fetching questions: {e}")
54
  return f"Error fetching questions: {e}", None
55
  except requests.exceptions.JSONDecodeError as e:
56
- print(f"Error decoding JSON response from questions endpoint: {e}")
57
- print(f"Response text: {response.text[:500]}")
58
- return f"Error decoding server response for questions: {e}", None
59
  except Exception as e:
60
  print(f"An unexpected error occurred fetching questions: {e}")
61
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -72,36 +81,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
72
  continue
73
  try:
74
  submitted_answer = agent(question_text)
75
- answers_payload.append(
76
- {"task_id": task_id, "submitted_answer": submitted_answer}
77
- )
78
- results_log.append(
79
- {
80
- "Task ID": task_id,
81
- "Question": question_text,
82
- "Submitted Answer": submitted_answer,
83
- }
84
- )
85
  except Exception as e:
86
- print(f"Error running agent on task {task_id}: {e}")
87
- results_log.append(
88
- {
89
- "Task ID": task_id,
90
- "Question": question_text,
91
- "Submitted Answer": f"AGENT ERROR: {e}",
92
- }
93
- )
94
 
95
  if not answers_payload:
96
  print("Agent did not produce any answers to submit.")
97
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
98
 
99
- # 4. Prepare Submission
100
- submission_data = {
101
- "username": username.strip(),
102
- "agent_code": agent_code,
103
- "answers": answers_payload,
104
- }
105
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
106
  print(status_update)
107
 
@@ -171,19 +162,20 @@ with gr.Blocks() as demo:
171
 
172
  run_button = gr.Button("Run Evaluation & Submit All Answers")
173
 
174
- status_output = gr.Textbox(
175
- label="Run Status / Submission Result", lines=5, interactive=False
176
- )
177
  # Removed max_rows=10 from DataFrame constructor
178
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
179
 
180
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
181
 
182
  if __name__ == "__main__":
183
- print("\n" + "-" * 30 + " App Starting " + "-" * 30)
184
  # Check for SPACE_HOST and SPACE_ID at startup for information
185
  space_host_startup = os.getenv("SPACE_HOST")
186
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
187
 
188
  if space_host_startup:
189
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -191,18 +183,14 @@ if __name__ == "__main__":
191
  else:
192
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
193
 
194
- if space_id_startup: # Print repo URLs if SPACE_ID is found
195
  print(f"✅ SPACE_ID found: {space_id_startup}")
196
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
197
- print(
198
- f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
199
- )
200
  else:
201
- print(
202
- "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
203
- )
204
 
205
- print("-" * (60 + len(" App Starting ")) + "\n")
206
 
207
  print("Launching Gradio Interface for Basic Agent Evaluation...")
208
- demo.launch(debug=True, share=False)
 
1
  import os
 
2
  import gradio as gr
 
3
  import requests
4
+ import inspect
5
+ import pandas as pd
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+ # --- Basic Agent Definition ---
12
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
+ class BasicAgent:
14
+ def __init__(self):
15
+ print("BasicAgent initialized.")
16
+ def __call__(self, question: str) -> str:
17
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
18
+ fixed_answer = "This is a default answer."
19
+ print(f"Agent returning fixed answer: {fixed_answer}")
20
+ return fixed_answer
21
+
22
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
+ username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
 
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
+ agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
 
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
+ print("Fetched questions list is empty.")
59
+ return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
+ print(f"Error decoding JSON response from questions endpoint: {e}")
66
+ print(f"Response text: {response.text[:500]}")
67
+ return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
 
81
  continue
82
  try:
83
  submitted_answer = agent(question_text)
84
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
+ print(f"Error running agent on task {task_id}: {e}")
88
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
+ # 4. Prepare Submission
95
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
 
162
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
166
  # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
+ run_button.click(
170
+ fn=run_and_submit_all,
171
+ outputs=[status_output, results_table]
172
+ )
173
 
174
  if __name__ == "__main__":
175
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
190
  else:
191
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
192
 
193
+ print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
+ demo.launch(debug=True, share=False)
configuration.py DELETED
@@ -1,33 +0,0 @@
1
- """Define the configurable parameters for the agent."""
2
-
3
- from __future__ import annotations
4
-
5
- import os
6
- from dataclasses import dataclass, fields
7
- from typing import Optional
8
-
9
- from langchain_core.runnables import RunnableConfig
10
-
11
-
12
- @dataclass(kw_only=True)
13
- class Configuration:
14
- """The configuration for the agent."""
15
-
16
- # API configuration
17
- api_base: Optional[str] = "http://localhost:11434"
18
- api_key: Optional[str] = os.getenv("MODEL_API_KEY")
19
- model_id: Optional[str] = (
20
- f"ollama/{os.getenv('OLLAMA_MODEL', 'qwen2.5-coder:0.5b')}"
21
- )
22
-
23
- # Agent configuration
24
- my_configurable_param: str = "changeme"
25
-
26
- @classmethod
27
- def from_runnable_config(
28
- cls, config: Optional[RunnableConfig] = None
29
- ) -> Configuration:
30
- """Create a Configuration instance from a RunnableConfig object."""
31
- configurable = (config.get("configurable") or {}) if config else {}
32
- _fields = {f.name for f in fields(cls) if f.init}
33
- return cls(**{k: v for k, v in configurable.items() if k in _fields})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
graph.py DELETED
@@ -1,241 +0,0 @@
1
- """Define the agent graph and its components."""
2
-
3
- import logging
4
- import os
5
- from datetime import datetime
6
- from typing import Dict, List, Optional, TypedDict, Union
7
-
8
- import yaml
9
- from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
10
- from langchain_core.runnables import RunnableConfig
11
- from langgraph.graph import END, StateGraph
12
- from langgraph.types import interrupt
13
- from smolagents import CodeAgent, LiteLLMModel
14
-
15
- from configuration import Configuration
16
- from tools import tools
17
-
18
- # Configure logging
19
- logging.basicConfig(level=logging.INFO)
20
- logger = logging.getLogger(__name__)
21
-
22
- # Enable LiteLLM debug logging only if environment variable is set
23
- import litellm
24
-
25
- if os.getenv("LITELLM_DEBUG", "false").lower() == "true":
26
- litellm.set_verbose = True
27
- logger.setLevel(logging.DEBUG)
28
- else:
29
- litellm.set_verbose = False
30
- logger.setLevel(logging.INFO)
31
-
32
- # Configure LiteLLM to drop unsupported parameters
33
- litellm.drop_params = True
34
-
35
- # Load default prompt templates from local file
36
- current_dir = os.path.dirname(os.path.abspath(__file__))
37
- prompts_dir = os.path.join(current_dir, "prompts")
38
- yaml_path = os.path.join(prompts_dir, "code_agent.yaml")
39
-
40
- with open(yaml_path, "r") as f:
41
- prompt_templates = yaml.safe_load(f)
42
-
43
- # Initialize the model and agent using configuration
44
- config = Configuration()
45
- model = LiteLLMModel(
46
- api_base=config.api_base,
47
- api_key=config.api_key,
48
- model_id=config.model_id,
49
- )
50
-
51
- agent = CodeAgent(
52
- add_base_tools=True,
53
- max_steps=1, # Execute one step at a time
54
- model=model,
55
- prompt_templates=prompt_templates,
56
- tools=tools,
57
- verbosity_level=logging.DEBUG,
58
- )
59
-
60
-
61
- class AgentState(TypedDict):
62
- """State for the agent graph."""
63
-
64
- messages: List[Union[HumanMessage, AIMessage, SystemMessage]]
65
- question: str
66
- answer: Optional[str]
67
- step_logs: List[Dict]
68
- is_complete: bool
69
- step_count: int
70
- # Add memory-related fields
71
- context: Dict[str, any] # For storing contextual information
72
- memory_buffer: List[Dict] # For storing important information across steps
73
- last_action: Optional[str] # Track the last action taken
74
- action_history: List[Dict] # History of actions taken
75
- error_count: int # Track error frequency
76
- success_count: int # Track successful operations
77
-
78
-
79
- class AgentNode:
80
- """Node that runs the agent."""
81
-
82
- def __init__(self, agent: CodeAgent):
83
- """Initialize the agent node with an agent."""
84
- self.agent = agent
85
-
86
- def __call__(
87
- self, state: AgentState, config: Optional[RunnableConfig] = None
88
- ) -> AgentState:
89
- """Run the agent on the current state."""
90
- # Log current state
91
- logger.info("Current state before processing:")
92
- logger.info(f"Messages: {state['messages']}")
93
- logger.info(f"Question: {state['question']}")
94
- logger.info(f"Answer: {state['answer']}")
95
-
96
- # Get configuration
97
- cfg = Configuration.from_runnable_config(config)
98
- logger.info(f"Using configuration: {cfg}")
99
-
100
- # Log execution start
101
- logger.info("Starting agent execution")
102
-
103
- try:
104
- # Run the agent
105
- result = self.agent.run(state["question"])
106
-
107
- # Update memory-related fields
108
- new_state = state.copy()
109
- new_state["messages"].append(AIMessage(content=result))
110
- new_state["answer"] = result
111
- new_state["step_count"] += 1
112
- new_state["last_action"] = "agent_response"
113
- new_state["action_history"].append(
114
- {
115
- "step": state["step_count"],
116
- "action": "agent_response",
117
- "result": result,
118
- }
119
- )
120
- new_state["success_count"] += 1
121
-
122
- # Store important information in memory buffer
123
- if result:
124
- new_state["memory_buffer"].append(
125
- {
126
- "step": state["step_count"],
127
- "content": result,
128
- "timestamp": datetime.now().isoformat(),
129
- }
130
- )
131
-
132
- except Exception as e:
133
- logger.error(f"Error during agent execution: {str(e)}")
134
- new_state = state.copy()
135
- new_state["error_count"] += 1
136
- new_state["action_history"].append(
137
- {"step": state["step_count"], "action": "error", "error": str(e)}
138
- )
139
- raise
140
-
141
- # Log updated state
142
- logger.info("Updated state after processing:")
143
- logger.info(f"Messages: {new_state['messages']}")
144
- logger.info(f"Question: {new_state['question']}")
145
- logger.info(f"Answer: {new_state['answer']}")
146
-
147
- return new_state
148
-
149
-
150
- class StepCallbackNode:
151
- """Node that handles step callbacks and user interaction."""
152
-
153
- def __init__(self, name: str):
154
- self.name = name
155
-
156
- def __call__(self, state: dict) -> dict:
157
- """Process the state and handle user interaction."""
158
- print(f"\nCurrent step: {state.get('step_count', 0)}")
159
- print(f"Question: {state.get('question', 'No question')}")
160
- print(f"Current answer: {state.get('answer', 'No answer yet')}\n")
161
-
162
- while True:
163
- choice = input(
164
- "Enter 'c' to continue, 'q' to quit, 'i' for more info, or 'r' to reject answer: "
165
- ).lower()
166
-
167
- if choice == "c":
168
- # Mark as complete to continue
169
- state["is_complete"] = True
170
- return state
171
- elif choice == "q":
172
- # Mark as complete and set answer to None to quit
173
- state["is_complete"] = True
174
- state["answer"] = None
175
- return state
176
- elif choice == "i":
177
- # Show more information but don't mark as complete
178
- print("\nAdditional Information:")
179
- print(f"Messages: {state.get('messages', [])}")
180
- print(f"Step Logs: {state.get('step_logs', [])}")
181
- print(f"Context: {state.get('context', {})}")
182
- print(f"Memory Buffer: {state.get('memory_buffer', [])}")
183
- print(f"Last Action: {state.get('last_action', None)}")
184
- print(f"Action History: {state.get('action_history', [])}")
185
- print(f"Error Count: {state.get('error_count', 0)}")
186
- print(f"Success Count: {state.get('success_count', 0)}\n")
187
- elif choice == "r":
188
- # Reject the current answer and continue execution
189
- print("\nRejecting current answer and continuing execution...")
190
- # Clear the message history to prevent confusion
191
- state["messages"] = []
192
- state["answer"] = None
193
- state["is_complete"] = False
194
- return state
195
- else:
196
- print("Invalid choice. Please enter 'c', 'q', 'i', or 'r'.")
197
-
198
-
199
- def build_agent_graph(agent: AgentNode) -> StateGraph:
200
- """Build the agent graph."""
201
- # Initialize the graph
202
- workflow = StateGraph(AgentState)
203
-
204
- # Add nodes
205
- workflow.add_node("agent", agent)
206
- workflow.add_node("callback", StepCallbackNode("callback"))
207
-
208
- # Add edges
209
- workflow.add_edge("agent", "callback")
210
-
211
- # Add conditional edges for callback
212
- def should_continue(state: AgentState) -> str:
213
- """Determine the next node based on state."""
214
- # If we have no answer, continue to agent
215
- if not state["answer"]:
216
- logger.info("No answer found, continuing to agent")
217
- return "agent"
218
-
219
- # If we have an answer and it's complete, we're done
220
- if state["is_complete"]:
221
- logger.info(f"Found complete answer: {state['answer']}")
222
- return END
223
-
224
- # Otherwise, go to callback for user input
225
- logger.info(f"Waiting for user input for answer: {state['answer']}")
226
- return "callback"
227
-
228
- workflow.add_conditional_edges(
229
- "callback",
230
- should_continue,
231
- {END: END, "agent": "agent", "callback": "callback"},
232
- )
233
-
234
- # Set entry point
235
- workflow.set_entry_point("agent")
236
-
237
- return workflow.compile()
238
-
239
-
240
- # Initialize the agent graph
241
- agent_graph = build_agent_graph(AgentNode(agent))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import uuid # for generating thread IDs for checkpointer
4
+ from typing import AsyncIterator, Optional, TypedDict
5
+
6
+ from dotenv import find_dotenv, load_dotenv
7
+ from langgraph.checkpoint.memory import MemorySaver
8
+ from langgraph.graph import END, START, StateGraph
9
+ from smolagents import CodeAgent, LiteLLMModel
10
+ from smolagents.memory import ActionStep, FinalAnswerStep
11
+ from smolagents.monitoring import LogLevel
12
+
13
+ # Configure logging
14
+ logging.basicConfig(
15
+ level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
16
+ )
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Load environment variables
20
+ load_dotenv(find_dotenv())
21
+
22
+ # Get required environment variables with validation
23
+ API_BASE = os.getenv("API_BASE")
24
+ API_KEY = os.getenv("API_KEY")
25
+ MODEL_ID = os.getenv("MODEL_ID")
26
+
27
+ if not all([API_BASE, API_KEY, MODEL_ID]):
28
+ raise ValueError(
29
+ "Missing required environment variables: API_BASE, API_KEY, MODEL_ID"
30
+ )
31
+
32
+
33
+ # Define the state types for our graph
34
+ class AgentState(TypedDict):
35
+ task: str
36
+ current_step: Optional[dict] # Store serializable dict instead of ActionStep
37
+ error: Optional[str]
38
+ answer_text: Optional[str]
39
+
40
+
41
+ # Initialize model with error handling
42
+ try:
43
+ model = LiteLLMModel(
44
+ api_base=API_BASE,
45
+ api_key=API_KEY,
46
+ model_id=MODEL_ID,
47
+ )
48
+ except Exception as e:
49
+ logger.error(f"Failed to initialize model: {str(e)}")
50
+ raise
51
+
52
+ # Initialize agent with error handling
53
+ try:
54
+ agent = CodeAgent(
55
+ add_base_tools=True,
56
+ additional_authorized_imports=["pandas", "numpy"],
57
+ max_steps=10,
58
+ model=model,
59
+ tools=[],
60
+ step_callbacks=None,
61
+ verbosity_level=LogLevel.ERROR,
62
+ )
63
+ agent.logger.console.width = 66
64
+ except Exception as e:
65
+ logger.error(f"Failed to initialize agent: {str(e)}")
66
+ raise
67
+
68
+
69
+ async def process_step(state: AgentState) -> AgentState:
70
+ """Process a single step of the agent's execution."""
71
+ try:
72
+ # Clear previous step results before running agent.run
73
+ state["current_step"] = None
74
+ state["answer_text"] = None
75
+ state["error"] = None
76
+
77
+ steps = agent.run(
78
+ task=state["task"],
79
+ additional_args=None,
80
+ images=None,
81
+ max_steps=1, # Process one step at a time
82
+ stream=True,
83
+ reset=False, # Maintain agent's internal state across process_step calls
84
+ )
85
+
86
+ for step in steps:
87
+ if isinstance(step, ActionStep):
88
+ # Convert ActionStep to serializable dict using the correct attributes
89
+ state["current_step"] = {
90
+ "step_number": step.step_number,
91
+ "model_output": step.model_output,
92
+ "observations": step.observations,
93
+ "tool_calls": [
94
+ {"name": tc.name, "arguments": tc.arguments}
95
+ for tc in (step.tool_calls or [])
96
+ ],
97
+ "action_output": step.action_output,
98
+ }
99
+ logger.info(f"Processed action step {step.step_number}")
100
+ elif isinstance(step, FinalAnswerStep):
101
+ state["answer_text"] = step.final_answer
102
+ logger.info("Processed final answer")
103
+ logger.debug(f"Final answer details: {step}")
104
+ logger.info(f"Extracted answer text: {state['answer_text']}")
105
+ # Return immediately when we get a final answer
106
+ return state
107
+ # If loop finishes without FinalAnswerStep, return current state
108
+ return state
109
+ except Exception as e:
110
+ state["error"] = str(e)
111
+ logger.error(f"Error during agent execution step: {str(e)}")
112
+ return state
113
+
114
+
115
+ def should_continue(state: AgentState) -> bool:
116
+ """Determine if the agent should continue processing steps."""
117
+ # Continue if we don't have an answer_text and no error
118
+ continue_execution = state.get("answer_text") is None and state.get("error") is None
119
+ logger.debug(
120
+ f"Checking should_continue: answer_text={state.get('answer_text') is not None}, error={state.get('error') is not None} -> Continue={continue_execution}"
121
+ )
122
+ return continue_execution
123
+
124
+
125
+ # Build the LangGraph graph once with persistence
126
+ memory = MemorySaver()
127
+ builder = StateGraph(AgentState)
128
+ builder.add_node("process_step", process_step)
129
+ builder.add_edge(START, "process_step")
130
+ builder.add_conditional_edges(
131
+ "process_step", should_continue, {True: "process_step", False: END}
132
+ )
133
+ graph = builder.compile(checkpointer=memory)
134
+
135
+
136
+ async def stream_execution(task: str, thread_id: str) -> AsyncIterator[AgentState]:
137
+ """Stream the execution of the agent."""
138
+ if not task:
139
+ raise ValueError("Task cannot be empty")
140
+
141
+ logger.info(f"Initializing agent execution for task: {task}")
142
+
143
+ # Initialize the state
144
+ initial_state: AgentState = {
145
+ "task": task,
146
+ "current_step": None,
147
+ "error": None,
148
+ "answer_text": None,
149
+ }
150
+
151
+ # Pass thread_id via the config dict so the checkpointer can persist state
152
+ async for state in graph.astream(
153
+ initial_state, {"configurable": {"thread_id": thread_id}}
154
+ ):
155
+ yield state
156
+ # Propagate error immediately if it occurs without an answer
157
+ if state.get("error") and not state.get("answer_text"):
158
+ logger.error(f"Propagating error from stream: {state['error']}")
159
+ raise Exception(state["error"])
160
+
161
+
162
+ async def run_with_streaming(task: str, thread_id: str) -> dict:
163
+ """Run the agent with streaming output and return the results."""
164
+ last_state = None
165
+ steps = []
166
+ error = None
167
+ final_answer_text = None
168
+
169
+ try:
170
+ logger.info(f"Starting execution run for task: {task}")
171
+ async for state in stream_execution(task, thread_id):
172
+ last_state = state
173
+
174
+ if current_step := state.get("current_step"):
175
+ if not steps or steps[-1]["step_number"] != current_step["step_number"]:
176
+ steps.append(current_step)
177
+ # Keep print here for direct user feedback during streaming
178
+ print(f"\nStep {current_step['step_number']}:")
179
+ print(f"Model Output: {current_step['model_output']}")
180
+ print(f"Observations: {current_step['observations']}")
181
+ if current_step.get("tool_calls"):
182
+ print("Tool Calls:")
183
+ for tc in current_step["tool_calls"]:
184
+ print(f" - {tc['name']}: {tc['arguments']}")
185
+ if current_step.get("action_output"):
186
+ print(f"Action Output: {current_step['action_output']}")
187
+
188
+ # After the stream is finished, process the last state
189
+ logger.info("Stream finished.")
190
+ if last_state:
191
+ # LangGraph streams dicts where keys are node names, values are state dicts
192
+ node_name = list(last_state.keys())[0]
193
+ actual_state = last_state.get(node_name)
194
+ if actual_state:
195
+ final_answer_text = actual_state.get("answer_text")
196
+ error = actual_state.get("error")
197
+ logger.info(
198
+ f"Final answer text extracted from last state: {final_answer_text}"
199
+ )
200
+ logger.info(f"Error extracted from last state: {error}")
201
+ # Ensure steps list is consistent with the final state if needed
202
+ last_step_in_state = actual_state.get("current_step")
203
+ if last_step_in_state and (
204
+ not steps
205
+ or steps[-1]["step_number"] != last_step_in_state["step_number"]
206
+ ):
207
+ logger.debug("Adding last step from final state to steps list.")
208
+ steps.append(last_step_in_state)
209
+ else:
210
+ logger.warning(
211
+ "Could not find actual state dictionary within last_state."
212
+ )
213
+
214
+ return {"steps": steps, "final_answer": final_answer_text, "error": error}
215
+
216
+ except Exception as e:
217
+ import traceback
218
+
219
+ logger.error(
220
+ f"Exception during run_with_streaming: {str(e)}\n{traceback.format_exc()}"
221
+ )
222
+ # Attempt to return based on the last known state even if exception occurred outside stream
223
+ final_answer_text = None
224
+ error_msg = str(e)
225
+ if last_state:
226
+ node_name = list(last_state.keys())[0]
227
+ actual_state = last_state.get(node_name)
228
+ if actual_state:
229
+ final_answer_text = actual_state.get("answer_text")
230
+
231
+ return {"steps": steps, "final_answer": final_answer_text, "error": error_msg}
232
+
233
+
234
+ if __name__ == "__main__":
235
+ import asyncio
236
+ import uuid
237
+
238
+ # Example Usage
239
+ task_to_run = "What is the capital of France?"
240
+ thread_id = str(uuid.uuid4()) # Generate a unique thread ID for this run
241
+ logger.info(
242
+ f"Starting agent run from __main__ for task: '{task_to_run}' with thread_id: {thread_id}"
243
+ )
244
+ result = asyncio.run(run_with_streaming(task_to_run, thread_id))
245
+ logger.info("Agent run finished.")
246
+
247
+ # Print final results
248
+ print("\n--- Execution Results ---")
249
+ print(f"Number of Steps: {len(result.get('steps', []))}")
250
+ # Optionally print step details
251
+ # for i, step in enumerate(result.get('steps', [])):
252
+ # print(f"Step {i+1} Details: {step}")
253
+ print(f"Final Answer: {result.get('final_answer') or 'Not found'}")
254
+ if err := result.get("error"):
255
+ print(f"Error: {err}")
prompts/code_agent.yaml DELETED
@@ -1,325 +0,0 @@
1
- system_prompt: |-
2
- You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
3
- To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
4
- To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
5
-
6
- At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.
7
- Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '<end_code>' sequence.
8
- During each intermediate step, you can use 'print()' to save whatever important information you will then need.
9
- These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
10
- In the end you have to return a final answer using the `final_answer` tool.
11
-
12
- Here are a few examples using notional tools:
13
- ---
14
- Task: "Generate an image of the oldest person in this document."
15
-
16
- Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
17
- Code:
18
- ```py
19
- answer = document_qa(document=document, question="Who is the oldest person mentioned?")
20
- print(answer)
21
- ```<end_code>
22
- Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
23
-
24
- Thought: I will now generate an image showcasing the oldest person.
25
- Code:
26
- ```py
27
- image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
28
- final_answer(image)
29
- ```<end_code>
30
-
31
- ---
32
- Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
33
-
34
- Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool
35
- Code:
36
- ```py
37
- result = 5 + 3 + 1294.678
38
- final_answer(result)
39
- ```<end_code>
40
-
41
- ---
42
- Task:
43
- "Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French.
44
- You have been provided with these additional arguments, that you can access using the keys as variables in your python code:
45
- {'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}"
46
-
47
- Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.
48
- Code:
49
- ```py
50
- translated_question = translator(question=question, src_lang="French", tgt_lang="English")
51
- print(f"The translated question is {translated_question}.")
52
- answer = image_qa(image=image, question=translated_question)
53
- final_answer(f"The answer is {answer}")
54
- ```<end_code>
55
-
56
- ---
57
- Task:
58
- In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.
59
- What does he say was the consequence of Einstein learning too much math on his creativity, in one word?
60
-
61
- Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.
62
- Code:
63
- ```py
64
- pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")
65
- print(pages)
66
- ```<end_code>
67
- Observation:
68
- No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".
69
-
70
- Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query.
71
- Code:
72
- ```py
73
- pages = search(query="1979 interview Stanislaus Ulam")
74
- print(pages)
75
- ```<end_code>
76
- Observation:
77
- Found 6 pages:
78
- [Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)
79
-
80
- [Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)
81
-
82
- (truncated)
83
-
84
- Thought: I will read the first 2 pages to know more.
85
- Code:
86
- ```py
87
- for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:
88
- whole_page = visit_webpage(url)
89
- print(whole_page)
90
- print("\n" + "="*80 + "\n") # Print separator between pages
91
- ```<end_code>
92
- Observation:
93
- Manhattan Project Locations:
94
- Los Alamos, NM
95
- Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at
96
- (truncated)
97
-
98
- Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word.
99
- Code:
100
- ```py
101
- final_answer("diminished")
102
- ```<end_code>
103
-
104
- ---
105
- Task: "Which city has the highest population: Guangzhou or Shanghai?"
106
-
107
- Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.
108
- Code:
109
- ```py
110
- for city in ["Guangzhou", "Shanghai"]:
111
- print(f"Population {city}:", search(f"{city} population")
112
- ```<end_code>
113
- Observation:
114
- Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
115
- Population Shanghai: '26 million (2019)'
116
-
117
- Thought: Now I know that Shanghai has the highest population.
118
- Code:
119
- ```py
120
- final_answer("Shanghai")
121
- ```<end_code>
122
-
123
- ---
124
- Task: "What is the current age of the pope, raised to the power 0.36?"
125
-
126
- Thought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search.
127
- Code:
128
- ```py
129
- pope_age_wiki = wiki(query="current pope age")
130
- print("Pope age as per wikipedia:", pope_age_wiki)
131
- pope_age_search = web_search(query="current pope age")
132
- print("Pope age as per google search:", pope_age_search)
133
- ```<end_code>
134
- Observation:
135
- Pope age: "The pope Francis is currently 88 years old."
136
-
137
- Thought: I know that the pope is 88 years old. Let's compute the result using python code.
138
- Code:
139
- ```py
140
- pope_current_age = 88 ** 0.36
141
- final_answer(pope_current_age)
142
- ```<end_code>
143
-
144
- Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions:
145
- ```python
146
- {%- for tool in tools.values() %}
147
- def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
148
- """{{ tool.description }}
149
-
150
- Args:
151
- {%- for arg_name, arg_info in tool.inputs.items() %}
152
- {{ arg_name }}: {{ arg_info.description }}
153
- {%- endfor %}
154
- """
155
- {% endfor %}
156
- ```
157
-
158
- {%- if managed_agents and managed_agents.values() | list %}
159
- You can also give tasks to team members.
160
- Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
161
- Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
162
- Here is a list of the team members that you can call:
163
- ```python
164
- {%- for agent in managed_agents.values() %}
165
- def {{ agent.name }}("Your query goes here.") -> str:
166
- """{{ agent.description }}"""
167
- {% endfor %}
168
- ```
169
- {%- endif %}
170
-
171
- Here are the rules you should always follow to solve your task:
172
- 1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail.
173
- 2. Use only variables that you have defined!
174
- 3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wiki({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wiki(query="What is the place where James Bond lives?")'.
175
- 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
176
- 5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.
177
- 6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'.
178
- 7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
179
- 8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
180
- 9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
181
- 10. Don't give up! You're in charge of solving the task, not providing directions to solve it.
182
-
183
- Now Begin!
184
- planning:
185
- initial_plan : |-
186
- You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.
187
- Below I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.
188
-
189
- ## 1. Facts survey
190
- You will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.
191
- These "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:
192
- ### 1.1. Facts given in the task
193
- List here the specific facts given in the task that could help you (there might be nothing here).
194
-
195
- ### 1.2. Facts to look up
196
- List here any facts that we may need to look up.
197
- Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.
198
-
199
- ### 1.3. Facts to derive
200
- List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.
201
-
202
- Don't make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.
203
-
204
- ## 2. Plan
205
- Then for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
206
- This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
207
- Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
208
- After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
209
-
210
- You can leverage these tools, behaving like regular python functions:
211
- ```python
212
- {%- for tool in tools.values() %}
213
- def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
214
- """{{ tool.description }}
215
-
216
- Args:
217
- {%- for arg_name, arg_info in tool.inputs.items() %}
218
- {{ arg_name }}: {{ arg_info.description }}
219
- {%- endfor %}
220
- """
221
- {% endfor %}
222
- ```
223
-
224
- {%- if managed_agents and managed_agents.values() | list %}
225
- You can also give tasks to team members.
226
- Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
227
- Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
228
- Here is a list of the team members that you can call:
229
- ```python
230
- {%- for agent in managed_agents.values() %}
231
- def {{ agent.name }}("Your query goes here.") -> str:
232
- """{{ agent.description }}"""
233
- {% endfor %}
234
- ```
235
- {%- endif %}
236
-
237
- ---
238
- Now begin! Here is your task:
239
- ```
240
- {{task}}
241
- ```
242
- First in part 1, write the facts survey, then in part 2, write your plan.
243
- update_plan_pre_messages: |-
244
- You are a world expert at analyzing a situation, and plan accordingly towards solving a task.
245
- You have been given the following task:
246
- ```
247
- {{task}}
248
- ```
249
-
250
- Below you will find a history of attempts made to solve this task.
251
- You will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.
252
- If the previous tries so far have met some success, your updated plan can build on these results.
253
- If you are stalled, you can make a completely new plan starting from scratch.
254
-
255
- Find the task and history below:
256
- update_plan_post_messages: |-
257
- Now write your updated facts below, taking into account the above history:
258
- ## 1. Updated facts survey
259
- ### 1.1. Facts given in the task
260
- ### 1.2. Facts that we have learned
261
- ### 1.3. Facts still to look up
262
- ### 1.4. Facts still to derive
263
-
264
- Then write a step-by-step high-level plan to solve the task above.
265
- ## 2. Plan
266
- ### 2. 1. ...
267
- Etc.
268
- This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
269
- Beware that you have {remaining_steps} steps remaining.
270
- Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
271
- After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
272
-
273
- You can leverage these tools, behaving like regular python functions:
274
- ```python
275
- {%- for tool in tools.values() %}
276
- def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
277
- """{{ tool.description }}
278
-
279
- Args:
280
- {%- for arg_name, arg_info in tool.inputs.items() %}
281
- {{ arg_name }}: {{ arg_info.description }}
282
- {%- endfor %}"""
283
- {% endfor %}
284
- ```
285
-
286
- {%- if managed_agents and managed_agents.values() | list %}
287
- You can also give tasks to team members.
288
- Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
289
- Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
290
- Here is a list of the team members that you can call:
291
- ```python
292
- {%- for agent in managed_agents.values() %}
293
- def {{ agent.name }}("Your query goes here.") -> str:
294
- """{{ agent.description }}"""
295
- {% endfor %}
296
- ```
297
- {%- endif %}
298
-
299
- Now write your updated facts survey below, then your new plan.
300
- managed_agent:
301
- task: |-
302
- You're a helpful agent named '{{name}}'.
303
- You have been submitted this task by your manager.
304
- ---
305
- Task:
306
- {{task}}
307
- ---
308
- You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.
309
-
310
- Your final_answer WILL HAVE to contain these parts:
311
- ### 1. Task outcome (short version):
312
- ### 2. Task outcome (extremely detailed version):
313
- ### 3. Additional context (if relevant):
314
-
315
- Put all these in your final_answer tool, everything that you do not pass as an argument to final_answer will be lost.
316
- And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback.
317
- report: |-
318
- Here is the final answer from your managed agent '{{name}}':
319
- {{final_answer}}
320
- final_answer:
321
- pre_messages: |-
322
- An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory:
323
- post_messages: |-
324
- Based on the above, please provide an answer to the following user task:
325
- {{task}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/toolcalling_agent.yaml DELETED
@@ -1,239 +0,0 @@
1
- system_prompt: |-
2
- You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can.
3
- To do so, you have been given access to some tools.
4
-
5
- The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation".
6
- This Action/Observation can repeat N times, you should take several steps when needed.
7
-
8
- You can use the result of the previous action as input for the next action.
9
- The observation will always be a string: it can represent a file, like "image_1.jpg".
10
- Then you can use it as input for the next action. You can do it for instance as follows:
11
-
12
- Observation: "image_1.jpg"
13
-
14
- Action:
15
- {
16
- "name": "image_transformer",
17
- "arguments": {"image": "image_1.jpg"}
18
- }
19
-
20
- To provide the final answer to the task, use an action blob with "name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this:
21
- Action:
22
- {
23
- "name": "final_answer",
24
- "arguments": {"answer": "insert your final answer here"}
25
- }
26
-
27
-
28
- Here are a few examples using notional tools:
29
- ---
30
- Task: "Generate an image of the oldest person in this document."
31
-
32
- Action:
33
- {
34
- "name": "document_qa",
35
- "arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"}
36
- }
37
- Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
38
-
39
- Action:
40
- {
41
- "name": "image_generator",
42
- "arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."}
43
- }
44
- Observation: "image.png"
45
-
46
- Action:
47
- {
48
- "name": "final_answer",
49
- "arguments": "image.png"
50
- }
51
-
52
- ---
53
- Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
54
-
55
- Action:
56
- {
57
- "name": "python_interpreter",
58
- "arguments": {"code": "5 + 3 + 1294.678"}
59
- }
60
- Observation: 1302.678
61
-
62
- Action:
63
- {
64
- "name": "final_answer",
65
- "arguments": "1302.678"
66
- }
67
-
68
- ---
69
- Task: "Which city has the highest population , Guangzhou or Shanghai?"
70
-
71
- Action:
72
- {
73
- "name": "search",
74
- "arguments": "Population Guangzhou"
75
- }
76
- Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
77
-
78
-
79
- Action:
80
- {
81
- "name": "search",
82
- "arguments": "Population Shanghai"
83
- }
84
- Observation: '26 million (2019)'
85
-
86
- Action:
87
- {
88
- "name": "final_answer",
89
- "arguments": "Shanghai"
90
- }
91
-
92
- Above example were using notional tools that might not exist for you. You only have access to these tools:
93
- {%- for tool in tools.values() %}
94
- - {{ tool.name }}: {{ tool.description }}
95
- Takes inputs: {{tool.inputs}}
96
- Returns an output of type: {{tool.output_type}}
97
- {%- endfor %}
98
-
99
- {%- if managed_agents and managed_agents.values() | list %}
100
- You can also give tasks to team members.
101
- Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
102
- Given that this team member is a real human, you should be very verbose in your task.
103
- Here is a list of the team members that you can call:
104
- {%- for agent in managed_agents.values() %}
105
- - {{ agent.name }}: {{ agent.description }}
106
- {%- endfor %}
107
- {%- endif %}
108
-
109
- Here are the rules you should always follow to solve your task:
110
- 1. ALWAYS provide a tool call, else you will fail.
111
- 2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead.
112
- 3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.
113
- If no tool call is needed, use final_answer tool to return your answer.
114
- 4. Never re-do a tool call that you previously did with the exact same parameters.
115
-
116
- Now Begin!
117
- planning:
118
- initial_plan : |-
119
- You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.
120
- Below I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.
121
-
122
- ## 1. Facts survey
123
- You will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.
124
- These "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:
125
- ### 1.1. Facts given in the task
126
- List here the specific facts given in the task that could help you (there might be nothing here).
127
-
128
- ### 1.2. Facts to look up
129
- List here any facts that we may need to look up.
130
- Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.
131
-
132
- ### 1.3. Facts to derive
133
- List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.
134
-
135
- Don't make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.
136
-
137
- ## 2. Plan
138
- Then for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
139
- This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
140
- Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
141
- After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
142
-
143
- You can leverage these tools:
144
- {%- for tool in tools.values() %}
145
- - {{ tool.name }}: {{ tool.description }}
146
- Takes inputs: {{tool.inputs}}
147
- Returns an output of type: {{tool.output_type}}
148
- {%- endfor %}
149
-
150
- {%- if managed_agents and managed_agents.values() | list %}
151
- You can also give tasks to team members.
152
- Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
153
- Given that this team member is a real human, you should be very verbose in your task.
154
- Here is a list of the team members that you can call:
155
- {%- for agent in managed_agents.values() %}
156
- - {{ agent.name }}: {{ agent.description }}
157
- {%- endfor %}
158
- {%- endif %}
159
-
160
- ---
161
- Now begin! Here is your task:
162
- ```
163
- {{task}}
164
- ```
165
- First in part 1, write the facts survey, then in part 2, write your plan.
166
- update_plan_pre_messages: |-
167
- You are a world expert at analyzing a situation, and plan accordingly towards solving a task.
168
- You have been given the following task:
169
- ```
170
- {{task}}
171
- ```
172
-
173
- Below you will find a history of attempts made to solve this task.
174
- You will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.
175
- If the previous tries so far have met some success, your updated plan can build on these results.
176
- If you are stalled, you can make a completely new plan starting from scratch.
177
-
178
- Find the task and history below:
179
- update_plan_post_messages: |-
180
- Now write your updated facts below, taking into account the above history:
181
- ## 1. Updated facts survey
182
- ### 1.1. Facts given in the task
183
- ### 1.2. Facts that we have learned
184
- ### 1.3. Facts still to look up
185
- ### 1.4. Facts still to derive
186
-
187
- Then write a step-by-step high-level plan to solve the task above.
188
- ## 2. Plan
189
- ### 2. 1. ...
190
- Etc.
191
- This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
192
- Beware that you have {remaining_steps} steps remaining.
193
- Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
194
- After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
195
-
196
- You can leverage these tools:
197
- {%- for tool in tools.values() %}
198
- - {{ tool.name }}: {{ tool.description }}
199
- Takes inputs: {{tool.inputs}}
200
- Returns an output of type: {{tool.output_type}}
201
- {%- endfor %}
202
-
203
- {%- if managed_agents and managed_agents.values() | list %}
204
- You can also give tasks to team members.
205
- Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
206
- Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
207
- Here is a list of the team members that you can call:
208
- {%- for agent in managed_agents.values() %}
209
- - {{ agent.name }}: {{ agent.description }}
210
- {%- endfor %}
211
- {%- endif %}
212
-
213
- Now write your new plan below.
214
- managed_agent:
215
- task: |-
216
- You're a helpful agent named '{{name}}'.
217
- You have been submitted this task by your manager.
218
- ---
219
- Task:
220
- {{task}}
221
- ---
222
- You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.
223
-
224
- Your final_answer WILL HAVE to contain these parts:
225
- ### 1. Task outcome (short version):
226
- ### 2. Task outcome (extremely detailed version):
227
- ### 3. Additional context (if relevant):
228
-
229
- Put all these in your final_answer tool, everything that you do not pass as an argument to final_answer will be lost.
230
- And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback.
231
- report: |-
232
- Here is the final answer from your managed agent '{{name}}':
233
- {{final_answer}}
234
- final_answer:
235
- pre_messages: |-
236
- An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory:
237
- post_messages: |-
238
- Based on the above, please provide an answer to the following user task:
239
- {{task}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -3,8 +3,11 @@ duckduckgo-search>=8.0.1
3
  gradio[oauth]>=5.26.0
4
  isort>=6.0.1
5
  langgraph>=0.3.34
 
6
  pytest>=8.3.5
7
  pytest-cov>=6.1.1
 
8
  requests>=2.32.3
9
  smolagents[litellm]>=0.1.3
 
10
  wikipedia-api>=0.8.1
 
3
  gradio[oauth]>=5.26.0
4
  isort>=6.0.1
5
  langgraph>=0.3.34
6
+ litellm>=1.10.0
7
  pytest>=8.3.5
8
  pytest-cov>=6.1.1
9
+ python-dotenv>=1.0.0
10
  requests>=2.32.3
11
  smolagents[litellm]>=0.1.3
12
+ typing-extensions>=4.5.0
13
  wikipedia-api>=0.8.1
runner.py DELETED
@@ -1,180 +0,0 @@
1
- import logging
2
- import os
3
- import re
4
- import uuid
5
-
6
- from langgraph.types import Command
7
-
8
- from graph import agent_graph
9
-
10
- # Configure logging
11
- logging.basicConfig(level=logging.INFO) # Default to INFO level
12
- logger = logging.getLogger(__name__)
13
-
14
- # Enable LiteLLM debug logging only if environment variable is set
15
- import litellm
16
-
17
- if os.getenv("LITELLM_DEBUG", "false").lower() == "true":
18
- litellm.set_verbose = True
19
- logger.setLevel(logging.DEBUG)
20
- else:
21
- litellm.set_verbose = False
22
- logger.setLevel(logging.INFO)
23
-
24
-
25
- class AgentRunner:
26
- """Runner class for the code agent."""
27
-
28
- def __init__(self):
29
- """Initialize the agent runner with graph and tools."""
30
- logger.info("Initializing AgentRunner")
31
- self.graph = agent_graph
32
- self.last_state = None # Store the last state for testing/debugging
33
- self.thread_id = str(
34
- uuid.uuid4()
35
- ) # Generate a unique thread_id for this runner
36
- logger.info(f"Created AgentRunner with thread_id: {self.thread_id}")
37
-
38
- def _extract_answer(self, state: dict) -> str:
39
- """Extract the answer from the state."""
40
- if not state:
41
- return None
42
-
43
- # First try to get answer from direct answer field
44
- if "answer" in state and state["answer"]:
45
- logger.info(f"Found answer in direct field: {state['answer']}")
46
- return state["answer"]
47
-
48
- # Then try to get answer from messages
49
- if "messages" in state and state["messages"]:
50
- for msg in reversed(state["messages"]):
51
- if hasattr(msg, "content") and msg.content:
52
- # Look for code blocks that might contain the answer
53
- if "```" in msg.content:
54
- # Extract code between ```py and ``` or ```python and ```
55
- code_match = re.search(
56
- r"```(?:py|python)?\s*\n(.*?)\n```", msg.content, re.DOTALL
57
- )
58
- if code_match:
59
- code = code_match.group(1)
60
- # Look for final_answer call
61
- final_answer_match = re.search(
62
- r"final_answer\((.*?)\)", code
63
- )
64
- if final_answer_match:
65
- answer = final_answer_match.group(1)
66
- logger.info(
67
- f"Found answer in final_answer call: {answer}"
68
- )
69
- return answer
70
-
71
- # If no code block with final_answer, use the content
72
- logger.info(f"Found answer in message: {msg.content}")
73
- return msg.content
74
-
75
- return None
76
-
77
- def __call__(self, input_data) -> str:
78
- """Process a question through the agent graph and return the answer.
79
-
80
- Args:
81
- input_data: Either a question string or a Command object for resuming
82
-
83
- Returns:
84
- str: The agent's response
85
- """
86
- try:
87
- config = {"configurable": {"thread_id": self.thread_id}}
88
- logger.info(f"Using config: {config}")
89
-
90
- if isinstance(input_data, str):
91
- # Initial question
92
- logger.info(f"Processing initial question: {input_data}")
93
- initial_state = {
94
- "question": input_data,
95
- "messages": [],
96
- "answer": None,
97
- "step_logs": [],
98
- "is_complete": False,
99
- "step_count": 0,
100
- # Initialize new memory fields
101
- "context": {},
102
- "memory_buffer": [],
103
- "last_action": None,
104
- "action_history": [],
105
- "error_count": 0,
106
- "success_count": 0,
107
- }
108
- logger.info(f"Initial state: {initial_state}")
109
-
110
- # Use stream to get results
111
- logger.info("Starting graph stream for initial question")
112
- for chunk in self.graph.stream(initial_state, config):
113
- logger.debug(f"Received chunk: {chunk}")
114
- if isinstance(chunk, dict):
115
- if "__interrupt__" in chunk:
116
- logger.info("Detected interrupt in stream")
117
- logger.info(f"Interrupt details: {chunk['__interrupt__']}")
118
- # Let the graph handle the interrupt naturally
119
- continue
120
- answer = self._extract_answer(chunk)
121
- if answer:
122
- self.last_state = chunk
123
- # If the state is complete, return the answer
124
- if chunk.get("is_complete", False):
125
- return answer
126
- else:
127
- logger.debug(f"Skipping chunk without answer: {chunk}")
128
- else:
129
- # Resuming from interrupt
130
- logger.info(f"Resuming from interrupt with input: {input_data}")
131
- for result in self.graph.stream(input_data, config):
132
- logger.debug(f"Received resume result: {result}")
133
- if isinstance(result, dict):
134
- answer = self._extract_answer(result)
135
- if answer:
136
- self.last_state = result
137
- # If the state is complete, return the answer
138
- if result.get("is_complete", False):
139
- return answer
140
- else:
141
- logger.debug(f"Skipping result without answer: {result}")
142
-
143
- # If we get here, we didn't find an answer
144
- logger.warning("No answer generated from stream")
145
- return "No answer generated"
146
-
147
- except Exception as e:
148
- logger.error(f"Error processing input: {str(e)}")
149
- raise
150
-
151
-
152
- if __name__ == "__main__":
153
- import argparse
154
-
155
- from langgraph.types import Command
156
-
157
- # Set up argument parser
158
- parser = argparse.ArgumentParser(description="Run the agent with a question")
159
- parser.add_argument("question", type=str, help="The question to ask the agent")
160
- parser.add_argument(
161
- "--resume",
162
- type=str,
163
- help="Value to resume with after an interrupt",
164
- default=None,
165
- )
166
- args = parser.parse_args()
167
-
168
- # Create agent runner
169
- runner = AgentRunner()
170
-
171
- if args.resume:
172
- # Resume from interrupt with provided value
173
- print(f"\nResuming with value: {args.resume}")
174
- response = runner(Command(resume=args.resume))
175
- else:
176
- # Initial run with question
177
- print(f"\nAsking question: {args.question}")
178
- response = runner(args.question)
179
-
180
- print(f"\nFinal response: {response}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_agent.py DELETED
@@ -1,263 +0,0 @@
1
- import logging
2
-
3
- import pytest
4
-
5
- from runner import AgentRunner
6
-
7
- # Configure test logger
8
- test_logger = logging.getLogger("test_agent")
9
- test_logger.setLevel(logging.INFO)
10
-
11
- # Suppress specific warnings
12
- pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models")
13
-
14
- # Constants
15
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
- QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
17
-
18
-
19
- @pytest.fixture(scope="session")
20
- def agent():
21
- """Fixture to create and return an AgentRunner instance."""
22
- test_logger.info("Creating AgentRunner instance")
23
- return AgentRunner()
24
-
25
-
26
- # @pytest.fixture(scope="session")
27
- # def questions_data():
28
- # """Fixture to fetch questions from the API."""
29
- # test_logger.info(f"Fetching questions from: {QUESTIONS_URL}")
30
- # try:
31
- # response = requests.get(QUESTIONS_URL, timeout=15)
32
- # response.raise_for_status()
33
- # data = response.json()
34
- # if not data:
35
- # test_logger.error("Fetched questions list is empty.")
36
- # return []
37
- # test_logger.info(f"Fetched {len(data)} questions.")
38
- # return data
39
- # except requests.exceptions.RequestException as e:
40
- # test_logger.error(f"Error fetching questions: {e}")
41
- # return []
42
- # except requests.exceptions.JSONDecodeError as e:
43
- # test_logger.error(f"Error decoding JSON response from questions endpoint: {e}")
44
- # return []
45
- # except Exception as e:
46
- # test_logger.error(f"An unexpected error occurred fetching questions: {e}")
47
- # return []
48
- #
49
- # class TestAppQuestions:
50
- # """Test cases for questions from the app."""
51
- #
52
- # def test_first_app_question(self, agent, questions_data):
53
- # """Test the agent's response to the first app question."""
54
- # if not questions_data:
55
- # pytest.skip("No questions available from API")
56
- #
57
- # first_question = questions_data[0]
58
- # question_text = first_question.get("question")
59
- # task_id = first_question.get("task_id")
60
- #
61
- # if not question_text or not task_id:
62
- # pytest.skip("First question is missing required fields")
63
- #
64
- # test_logger.info(f"Testing with app question: {question_text}")
65
- #
66
- # response = agent(question_text)
67
- # test_logger.info(f"Agent response: {response}")
68
- #
69
- # # Check that the response contains the expected information
70
- # assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa"
71
- # assert "studio albums" in response.lower(), "Response should mention studio albums"
72
- # assert "2000" in response and "2009" in response, "Response should mention the year range"
73
- #
74
- # # Verify that a number is mentioned (either as word or digit)
75
- # import re
76
- # number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b'
77
- # has_number = bool(re.search(number_pattern, response.lower()))
78
- # assert has_number, "Response should include the number of albums"
79
- #
80
- # # Check for album names in the response
81
- # known_albums = [
82
- # "Corazón Libre",
83
- # "Cantora",
84
- # "Hermano",
85
- # "Acústico",
86
- # "Argentina quiere cantar"
87
- # ]
88
- # found_albums = [album for album in known_albums if album in response]
89
- # assert len(found_albums) > 0, "Response should mention at least some of the known albums"
90
- #
91
- # # Check for a structured response
92
- # assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \
93
- # "Response should list albums with years"
94
-
95
-
96
- class TestBasicCodeAgentCapabilities:
97
- """Test basic capabilities of the code agent."""
98
-
99
- def setup_method(self):
100
- """Setup method to initialize the agent before each test."""
101
- test_logger.info("Creating AgentRunner instance")
102
- self.agent = AgentRunner()
103
-
104
- def test_simple_math_calculation_with_steps(self):
105
- """Test that the agent can perform basic math calculations and log steps."""
106
- question = "What is the result of the following operation: 5 + 3 + 1294.678?"
107
- test_logger.info(f"Testing math calculation with question: {question}")
108
-
109
- # Run the agent and get the response
110
- response = self.agent(question)
111
-
112
- # Verify the response contains the correct result
113
- expected_result = str(5 + 3 + 1294.678)
114
- assert (
115
- expected_result in response
116
- ), f"Response should contain the result {expected_result}"
117
-
118
- # Verify step logs exist and have required fields
119
- assert self.agent.last_state is not None, "Agent should store last state"
120
- assert "step_logs" in self.agent.last_state, "State should contain step_logs"
121
- assert (
122
- len(self.agent.last_state["step_logs"]) > 0
123
- ), "Should have at least one step logged"
124
-
125
- # Verify each step has required fields
126
- for step in self.agent.last_state["step_logs"]:
127
- assert "step_number" in step, "Each step should have a step_number"
128
- assert any(
129
- key in step for key in ["thought", "code", "observation"]
130
- ), "Each step should have at least one of: thought, code, or observation"
131
-
132
- # Verify the final answer is indicated
133
- assert (
134
- "final_answer" in response.lower()
135
- ), "Response should indicate it's providing an answer"
136
-
137
- def test_document_qa_and_image_generation_with_steps(self):
138
- """Test that the agent can search for information and generate images, with step logging."""
139
- question = (
140
- "Search for information about the Mona Lisa and generate an image of it."
141
- )
142
- test_logger.info(
143
- f"Testing document QA and image generation with question: {question}"
144
- )
145
-
146
- # Run the agent and get the response
147
- response = self.agent(question)
148
-
149
- # Verify the response contains both search and image generation
150
- assert "mona lisa" in response.lower(), "Response should mention Mona Lisa"
151
- assert "image" in response.lower(), "Response should mention image generation"
152
-
153
- # Verify step logs exist and show logical progression
154
- assert self.agent.last_state is not None, "Agent should store last state"
155
- assert "step_logs" in self.agent.last_state, "State should contain step_logs"
156
- assert (
157
- len(self.agent.last_state["step_logs"]) > 1
158
- ), "Should have multiple steps logged"
159
-
160
- # Verify steps show logical progression
161
- steps = self.agent.last_state["step_logs"]
162
- search_steps = [step for step in steps if "search" in str(step).lower()]
163
- image_steps = [step for step in steps if "image" in str(step).lower()]
164
-
165
- assert len(search_steps) > 0, "Should have search steps"
166
- assert len(image_steps) > 0, "Should have image generation steps"
167
-
168
- # Verify each step has required fields
169
- for step in steps:
170
- assert "step_number" in step, "Each step should have a step_number"
171
- assert any(
172
- key in step for key in ["thought", "code", "observation"]
173
- ), "Each step should have at least one of: thought, code, or observation"
174
-
175
-
176
- def test_simple_math_calculation_with_steps():
177
- """Test that the agent can perform a simple math calculation and verify intermediate steps."""
178
- agent = AgentRunner()
179
- question = "What is the result of the following operation: 5 + 3 + 1294.678?"
180
-
181
- # Process the question
182
- response = agent(question)
183
-
184
- # Verify step logs exist and have required fields
185
- assert agent.last_state is not None, "Last state should be stored"
186
- step_logs = agent.last_state.get("step_logs", [])
187
- assert len(step_logs) > 0, "Should have recorded step logs"
188
-
189
- for step in step_logs:
190
- assert "step_number" in step, "Each step should have a step number"
191
- assert any(
192
- key in step for key in ["thought", "code", "observation"]
193
- ), "Each step should have at least one of thought/code/observation"
194
-
195
- # Verify final answer
196
- expected_result = 1302.678
197
-
198
- # Extract all numbers from the response
199
- import re
200
-
201
- # First check for LaTeX formatting
202
- latex_match = re.search(r"\\boxed{([^}]+)}", response)
203
- if latex_match:
204
- # Extract number from LaTeX box
205
- latex_content = latex_match.group(1)
206
- numbers = re.findall(r"\d+\.?\d*", latex_content)
207
- else:
208
- # Extract all numbers from the response
209
- numbers = re.findall(r"\d+\.?\d*", response)
210
-
211
- assert numbers, "Response should contain at least one number"
212
-
213
- # Check if any number matches the expected result
214
- has_correct_result = any(abs(float(n) - expected_result) < 0.001 for n in numbers)
215
- assert (
216
- has_correct_result
217
- ), f"Response should contain the result {expected_result}, got {response}"
218
-
219
- # Verify the response indicates it's a final answer
220
- assert (
221
- "final_answer" in response.lower()
222
- ), "Response should indicate it's using final_answer"
223
-
224
-
225
- def test_document_qa_and_image_generation_with_steps():
226
- """Test document QA and image generation with step verification."""
227
- agent = AgentRunner()
228
- question = "Can you search for information about the Mona Lisa and generate an image inspired by it?"
229
-
230
- # Process the question
231
- response = agent(question)
232
-
233
- # Verify step logs exist and demonstrate logical progression
234
- assert agent.last_state is not None, "Last state should be stored"
235
- step_logs = agent.last_state.get("step_logs", [])
236
- assert len(step_logs) > 0, "Should have recorded step logs"
237
-
238
- # Check for search and image generation steps
239
- has_search_step = False
240
- has_image_step = False
241
-
242
- for step in step_logs:
243
- assert "step_number" in step, "Each step should have a step number"
244
- assert any(
245
- key in step for key in ["thought", "code", "observation"]
246
- ), "Each step should have at least one of thought/code/observation"
247
-
248
- # Look for search and image steps in thoughts or code
249
- step_content = str(step.get("thought", "")) + str(step.get("code", ""))
250
- if "search" in step_content.lower():
251
- has_search_step = True
252
- if "image" in step_content.lower() or "dalle" in step_content.lower():
253
- has_image_step = True
254
-
255
- assert has_search_step, "Should include a search step"
256
- assert has_image_step, "Should include an image generation step"
257
- assert (
258
- "final_answer" in response.lower()
259
- ), "Response should indicate it's using final_answer"
260
-
261
-
262
- if __name__ == "__main__":
263
- pytest.main([__file__, "-s", "-v", "-x"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools.py DELETED
@@ -1,85 +0,0 @@
1
- import logging
2
-
3
- from smolagents import DuckDuckGoSearchTool, Tool, WikipediaSearchTool
4
-
5
- logger = logging.getLogger(__name__)
6
-
7
-
8
- class GeneralSearchTool(Tool):
9
- name = "search"
10
- description = """Performs a general web search using both DuckDuckGo and Wikipedia, then returns the combined search results."""
11
- inputs = {
12
- "query": {"type": "string", "description": "The search query to perform."}
13
- }
14
- output_type = "string"
15
-
16
- def __init__(self, max_results=10, **kwargs):
17
- super().__init__()
18
- self.max_results = max_results
19
- self.ddg_tool = DuckDuckGoSearchTool()
20
- self.wiki_tool = WikipediaSearchTool()
21
-
22
- def forward(self, query: str) -> str:
23
- # Get DuckDuckGo results
24
- try:
25
- ddg_results = self.ddg_tool.forward(query)
26
- except Exception as e:
27
- ddg_results = "No DuckDuckGo results found."
28
- logger.warning(f"DuckDuckGo search failed: {str(e)}")
29
-
30
- # Get Wikipedia results
31
- try:
32
- wiki_results = self.wiki_tool.forward(query)
33
- except Exception as e:
34
- wiki_results = "No Wikipedia results found."
35
- logger.warning(f"Wikipedia search failed: {str(e)}")
36
-
37
- # Combine and format results
38
- output = []
39
- if ddg_results and ddg_results != "No DuckDuckGo results found.":
40
- output.append("## DuckDuckGo Search Results\n\n" + ddg_results)
41
- if wiki_results and wiki_results != "No Wikipedia results found.":
42
- output.append("## Wikipedia Results\n\n" + wiki_results)
43
-
44
- if not output:
45
- raise Exception("No results found! Try a less restrictive/shorter query.")
46
-
47
- return "\n\n---\n\n".join(output)
48
-
49
-
50
- class MathTool(Tool):
51
- name = "math"
52
- description = """Performs mathematical calculations and returns the result."""
53
- inputs = {
54
- "expression": {
55
- "type": "string",
56
- "description": "The mathematical expression to evaluate.",
57
- }
58
- }
59
- output_type = "string"
60
-
61
- def forward(self, expression: str) -> str:
62
- try:
63
- # Use eval with a restricted set of builtins for safety
64
- safe_dict = {
65
- "__builtins__": {
66
- "abs": abs,
67
- "round": round,
68
- "min": min,
69
- "max": max,
70
- "sum": sum,
71
- }
72
- }
73
- result = eval(expression, safe_dict)
74
- return str(result)
75
- except Exception as e:
76
- raise Exception(f"Error evaluating expression: {str(e)}")
77
-
78
-
79
- # Export all tools
80
- tools = [
81
- # DuckDuckGoSearchTool(),
82
- GeneralSearchTool(),
83
- MathTool(),
84
- # WikipediaSearchTool(),
85
- ]