Spaces:
Build error
Build error
Add .env.example for API configuration and update .gitignore to exclude .env files. Refactor app.py to implement BasicAgent class, replacing AgentRunner, and improve error handling and logging. Remove unused configuration and graph modules to streamline the codebase.
Browse files- .env.example +4 -0
- .gitignore +1 -0
- app.py +41 -53
- configuration.py +0 -33
- graph.py +0 -241
- main.py +255 -0
- prompts/code_agent.yaml +0 -325
- prompts/toolcalling_agent.yaml +0 -239
- requirements.txt +3 -0
- runner.py +0 -180
- test_agent.py +0 -263
- tools.py +0 -85
.env.example
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Required API configuration
|
2 |
+
API_BASE=https://api.anthropic.com
|
3 |
+
API_KEY=sk-replace-with-your-api-key
|
4 |
+
MODEL_ID=anthropic/claude-3-7-sonnet-20250219
|
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
__pycache__
|
2 |
.pytest_cache
|
3 |
.venv
|
|
|
1 |
+
.env
|
2 |
__pycache__
|
3 |
.pytest_cache
|
4 |
.venv
|
app.py
CHANGED
@@ -1,25 +1,34 @@
|
|
1 |
import os
|
2 |
-
|
3 |
import gradio as gr
|
4 |
-
import pandas as pd
|
5 |
import requests
|
6 |
-
|
|
|
7 |
|
8 |
# (Keep Constants as is)
|
9 |
# --- Constants ---
|
10 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
"""
|
15 |
-
Fetches all questions, runs the
|
16 |
and displays the results.
|
17 |
"""
|
18 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
19 |
-
space_id = os.getenv("SPACE_ID")
|
20 |
|
21 |
if profile:
|
22 |
-
username
|
23 |
print(f"User logged in: {username}")
|
24 |
else:
|
25 |
print("User not logged in.")
|
@@ -31,7 +40,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
31 |
|
32 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
33 |
try:
|
34 |
-
agent =
|
35 |
except Exception as e:
|
36 |
print(f"Error instantiating agent: {e}")
|
37 |
return f"Error initializing agent: {e}", None
|
@@ -46,16 +55,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
46 |
response.raise_for_status()
|
47 |
questions_data = response.json()
|
48 |
if not questions_data:
|
49 |
-
|
50 |
-
|
51 |
print(f"Fetched {len(questions_data)} questions.")
|
52 |
except requests.exceptions.RequestException as e:
|
53 |
print(f"Error fetching questions: {e}")
|
54 |
return f"Error fetching questions: {e}", None
|
55 |
except requests.exceptions.JSONDecodeError as e:
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
except Exception as e:
|
60 |
print(f"An unexpected error occurred fetching questions: {e}")
|
61 |
return f"An unexpected error occurred fetching questions: {e}", None
|
@@ -72,36 +81,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
72 |
continue
|
73 |
try:
|
74 |
submitted_answer = agent(question_text)
|
75 |
-
answers_payload.append(
|
76 |
-
|
77 |
-
)
|
78 |
-
results_log.append(
|
79 |
-
{
|
80 |
-
"Task ID": task_id,
|
81 |
-
"Question": question_text,
|
82 |
-
"Submitted Answer": submitted_answer,
|
83 |
-
}
|
84 |
-
)
|
85 |
except Exception as e:
|
86 |
-
|
87 |
-
|
88 |
-
{
|
89 |
-
"Task ID": task_id,
|
90 |
-
"Question": question_text,
|
91 |
-
"Submitted Answer": f"AGENT ERROR: {e}",
|
92 |
-
}
|
93 |
-
)
|
94 |
|
95 |
if not answers_payload:
|
96 |
print("Agent did not produce any answers to submit.")
|
97 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
98 |
|
99 |
-
# 4. Prepare Submission
|
100 |
-
submission_data = {
|
101 |
-
"username": username.strip(),
|
102 |
-
"agent_code": agent_code,
|
103 |
-
"answers": answers_payload,
|
104 |
-
}
|
105 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
106 |
print(status_update)
|
107 |
|
@@ -171,19 +162,20 @@ with gr.Blocks() as demo:
|
|
171 |
|
172 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
173 |
|
174 |
-
status_output = gr.Textbox(
|
175 |
-
label="Run Status / Submission Result", lines=5, interactive=False
|
176 |
-
)
|
177 |
# Removed max_rows=10 from DataFrame constructor
|
178 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
179 |
|
180 |
-
run_button.click(
|
|
|
|
|
|
|
181 |
|
182 |
if __name__ == "__main__":
|
183 |
-
print("\n" + "-"
|
184 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
185 |
space_host_startup = os.getenv("SPACE_HOST")
|
186 |
-
space_id_startup = os.getenv("SPACE_ID")
|
187 |
|
188 |
if space_host_startup:
|
189 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
@@ -191,18 +183,14 @@ if __name__ == "__main__":
|
|
191 |
else:
|
192 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
193 |
|
194 |
-
if space_id_startup:
|
195 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
196 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
197 |
-
print(
|
198 |
-
f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
|
199 |
-
)
|
200 |
else:
|
201 |
-
print(
|
202 |
-
"ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
|
203 |
-
)
|
204 |
|
205 |
-
print("-"
|
206 |
|
207 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
208 |
-
demo.launch(debug=True, share=False)
|
|
|
1 |
import os
|
|
|
2 |
import gradio as gr
|
|
|
3 |
import requests
|
4 |
+
import inspect
|
5 |
+
import pandas as pd
|
6 |
|
7 |
# (Keep Constants as is)
|
8 |
# --- Constants ---
|
9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
10 |
|
11 |
+
# --- Basic Agent Definition ---
|
12 |
+
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
13 |
+
class BasicAgent:
|
14 |
+
def __init__(self):
|
15 |
+
print("BasicAgent initialized.")
|
16 |
+
def __call__(self, question: str) -> str:
|
17 |
+
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
18 |
+
fixed_answer = "This is a default answer."
|
19 |
+
print(f"Agent returning fixed answer: {fixed_answer}")
|
20 |
+
return fixed_answer
|
21 |
+
|
22 |
+
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
23 |
"""
|
24 |
+
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
25 |
and displays the results.
|
26 |
"""
|
27 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
28 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
29 |
|
30 |
if profile:
|
31 |
+
username= f"{profile.username}"
|
32 |
print(f"User logged in: {username}")
|
33 |
else:
|
34 |
print("User not logged in.")
|
|
|
40 |
|
41 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
42 |
try:
|
43 |
+
agent = BasicAgent()
|
44 |
except Exception as e:
|
45 |
print(f"Error instantiating agent: {e}")
|
46 |
return f"Error initializing agent: {e}", None
|
|
|
55 |
response.raise_for_status()
|
56 |
questions_data = response.json()
|
57 |
if not questions_data:
|
58 |
+
print("Fetched questions list is empty.")
|
59 |
+
return "Fetched questions list is empty or invalid format.", None
|
60 |
print(f"Fetched {len(questions_data)} questions.")
|
61 |
except requests.exceptions.RequestException as e:
|
62 |
print(f"Error fetching questions: {e}")
|
63 |
return f"Error fetching questions: {e}", None
|
64 |
except requests.exceptions.JSONDecodeError as e:
|
65 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
66 |
+
print(f"Response text: {response.text[:500]}")
|
67 |
+
return f"Error decoding server response for questions: {e}", None
|
68 |
except Exception as e:
|
69 |
print(f"An unexpected error occurred fetching questions: {e}")
|
70 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
|
81 |
continue
|
82 |
try:
|
83 |
submitted_answer = agent(question_text)
|
84 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
85 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
except Exception as e:
|
87 |
+
print(f"Error running agent on task {task_id}: {e}")
|
88 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
if not answers_payload:
|
91 |
print("Agent did not produce any answers to submit.")
|
92 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
93 |
|
94 |
+
# 4. Prepare Submission
|
95 |
+
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
|
|
|
|
|
|
|
96 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
97 |
print(status_update)
|
98 |
|
|
|
162 |
|
163 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
164 |
|
165 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
|
|
|
|
166 |
# Removed max_rows=10 from DataFrame constructor
|
167 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
168 |
|
169 |
+
run_button.click(
|
170 |
+
fn=run_and_submit_all,
|
171 |
+
outputs=[status_output, results_table]
|
172 |
+
)
|
173 |
|
174 |
if __name__ == "__main__":
|
175 |
+
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
176 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
177 |
space_host_startup = os.getenv("SPACE_HOST")
|
178 |
+
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
179 |
|
180 |
if space_host_startup:
|
181 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
183 |
else:
|
184 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
185 |
|
186 |
+
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
187 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
188 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
189 |
+
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
|
|
|
190 |
else:
|
191 |
+
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
|
|
|
|
192 |
|
193 |
+
print("-"*(60 + len(" App Starting ")) + "\n")
|
194 |
|
195 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
196 |
+
demo.launch(debug=True, share=False)
|
configuration.py
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
"""Define the configurable parameters for the agent."""
|
2 |
-
|
3 |
-
from __future__ import annotations
|
4 |
-
|
5 |
-
import os
|
6 |
-
from dataclasses import dataclass, fields
|
7 |
-
from typing import Optional
|
8 |
-
|
9 |
-
from langchain_core.runnables import RunnableConfig
|
10 |
-
|
11 |
-
|
12 |
-
@dataclass(kw_only=True)
|
13 |
-
class Configuration:
|
14 |
-
"""The configuration for the agent."""
|
15 |
-
|
16 |
-
# API configuration
|
17 |
-
api_base: Optional[str] = "http://localhost:11434"
|
18 |
-
api_key: Optional[str] = os.getenv("MODEL_API_KEY")
|
19 |
-
model_id: Optional[str] = (
|
20 |
-
f"ollama/{os.getenv('OLLAMA_MODEL', 'qwen2.5-coder:0.5b')}"
|
21 |
-
)
|
22 |
-
|
23 |
-
# Agent configuration
|
24 |
-
my_configurable_param: str = "changeme"
|
25 |
-
|
26 |
-
@classmethod
|
27 |
-
def from_runnable_config(
|
28 |
-
cls, config: Optional[RunnableConfig] = None
|
29 |
-
) -> Configuration:
|
30 |
-
"""Create a Configuration instance from a RunnableConfig object."""
|
31 |
-
configurable = (config.get("configurable") or {}) if config else {}
|
32 |
-
_fields = {f.name for f in fields(cls) if f.init}
|
33 |
-
return cls(**{k: v for k, v in configurable.items() if k in _fields})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
graph.py
DELETED
@@ -1,241 +0,0 @@
|
|
1 |
-
"""Define the agent graph and its components."""
|
2 |
-
|
3 |
-
import logging
|
4 |
-
import os
|
5 |
-
from datetime import datetime
|
6 |
-
from typing import Dict, List, Optional, TypedDict, Union
|
7 |
-
|
8 |
-
import yaml
|
9 |
-
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
|
10 |
-
from langchain_core.runnables import RunnableConfig
|
11 |
-
from langgraph.graph import END, StateGraph
|
12 |
-
from langgraph.types import interrupt
|
13 |
-
from smolagents import CodeAgent, LiteLLMModel
|
14 |
-
|
15 |
-
from configuration import Configuration
|
16 |
-
from tools import tools
|
17 |
-
|
18 |
-
# Configure logging
|
19 |
-
logging.basicConfig(level=logging.INFO)
|
20 |
-
logger = logging.getLogger(__name__)
|
21 |
-
|
22 |
-
# Enable LiteLLM debug logging only if environment variable is set
|
23 |
-
import litellm
|
24 |
-
|
25 |
-
if os.getenv("LITELLM_DEBUG", "false").lower() == "true":
|
26 |
-
litellm.set_verbose = True
|
27 |
-
logger.setLevel(logging.DEBUG)
|
28 |
-
else:
|
29 |
-
litellm.set_verbose = False
|
30 |
-
logger.setLevel(logging.INFO)
|
31 |
-
|
32 |
-
# Configure LiteLLM to drop unsupported parameters
|
33 |
-
litellm.drop_params = True
|
34 |
-
|
35 |
-
# Load default prompt templates from local file
|
36 |
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
37 |
-
prompts_dir = os.path.join(current_dir, "prompts")
|
38 |
-
yaml_path = os.path.join(prompts_dir, "code_agent.yaml")
|
39 |
-
|
40 |
-
with open(yaml_path, "r") as f:
|
41 |
-
prompt_templates = yaml.safe_load(f)
|
42 |
-
|
43 |
-
# Initialize the model and agent using configuration
|
44 |
-
config = Configuration()
|
45 |
-
model = LiteLLMModel(
|
46 |
-
api_base=config.api_base,
|
47 |
-
api_key=config.api_key,
|
48 |
-
model_id=config.model_id,
|
49 |
-
)
|
50 |
-
|
51 |
-
agent = CodeAgent(
|
52 |
-
add_base_tools=True,
|
53 |
-
max_steps=1, # Execute one step at a time
|
54 |
-
model=model,
|
55 |
-
prompt_templates=prompt_templates,
|
56 |
-
tools=tools,
|
57 |
-
verbosity_level=logging.DEBUG,
|
58 |
-
)
|
59 |
-
|
60 |
-
|
61 |
-
class AgentState(TypedDict):
|
62 |
-
"""State for the agent graph."""
|
63 |
-
|
64 |
-
messages: List[Union[HumanMessage, AIMessage, SystemMessage]]
|
65 |
-
question: str
|
66 |
-
answer: Optional[str]
|
67 |
-
step_logs: List[Dict]
|
68 |
-
is_complete: bool
|
69 |
-
step_count: int
|
70 |
-
# Add memory-related fields
|
71 |
-
context: Dict[str, any] # For storing contextual information
|
72 |
-
memory_buffer: List[Dict] # For storing important information across steps
|
73 |
-
last_action: Optional[str] # Track the last action taken
|
74 |
-
action_history: List[Dict] # History of actions taken
|
75 |
-
error_count: int # Track error frequency
|
76 |
-
success_count: int # Track successful operations
|
77 |
-
|
78 |
-
|
79 |
-
class AgentNode:
|
80 |
-
"""Node that runs the agent."""
|
81 |
-
|
82 |
-
def __init__(self, agent: CodeAgent):
|
83 |
-
"""Initialize the agent node with an agent."""
|
84 |
-
self.agent = agent
|
85 |
-
|
86 |
-
def __call__(
|
87 |
-
self, state: AgentState, config: Optional[RunnableConfig] = None
|
88 |
-
) -> AgentState:
|
89 |
-
"""Run the agent on the current state."""
|
90 |
-
# Log current state
|
91 |
-
logger.info("Current state before processing:")
|
92 |
-
logger.info(f"Messages: {state['messages']}")
|
93 |
-
logger.info(f"Question: {state['question']}")
|
94 |
-
logger.info(f"Answer: {state['answer']}")
|
95 |
-
|
96 |
-
# Get configuration
|
97 |
-
cfg = Configuration.from_runnable_config(config)
|
98 |
-
logger.info(f"Using configuration: {cfg}")
|
99 |
-
|
100 |
-
# Log execution start
|
101 |
-
logger.info("Starting agent execution")
|
102 |
-
|
103 |
-
try:
|
104 |
-
# Run the agent
|
105 |
-
result = self.agent.run(state["question"])
|
106 |
-
|
107 |
-
# Update memory-related fields
|
108 |
-
new_state = state.copy()
|
109 |
-
new_state["messages"].append(AIMessage(content=result))
|
110 |
-
new_state["answer"] = result
|
111 |
-
new_state["step_count"] += 1
|
112 |
-
new_state["last_action"] = "agent_response"
|
113 |
-
new_state["action_history"].append(
|
114 |
-
{
|
115 |
-
"step": state["step_count"],
|
116 |
-
"action": "agent_response",
|
117 |
-
"result": result,
|
118 |
-
}
|
119 |
-
)
|
120 |
-
new_state["success_count"] += 1
|
121 |
-
|
122 |
-
# Store important information in memory buffer
|
123 |
-
if result:
|
124 |
-
new_state["memory_buffer"].append(
|
125 |
-
{
|
126 |
-
"step": state["step_count"],
|
127 |
-
"content": result,
|
128 |
-
"timestamp": datetime.now().isoformat(),
|
129 |
-
}
|
130 |
-
)
|
131 |
-
|
132 |
-
except Exception as e:
|
133 |
-
logger.error(f"Error during agent execution: {str(e)}")
|
134 |
-
new_state = state.copy()
|
135 |
-
new_state["error_count"] += 1
|
136 |
-
new_state["action_history"].append(
|
137 |
-
{"step": state["step_count"], "action": "error", "error": str(e)}
|
138 |
-
)
|
139 |
-
raise
|
140 |
-
|
141 |
-
# Log updated state
|
142 |
-
logger.info("Updated state after processing:")
|
143 |
-
logger.info(f"Messages: {new_state['messages']}")
|
144 |
-
logger.info(f"Question: {new_state['question']}")
|
145 |
-
logger.info(f"Answer: {new_state['answer']}")
|
146 |
-
|
147 |
-
return new_state
|
148 |
-
|
149 |
-
|
150 |
-
class StepCallbackNode:
|
151 |
-
"""Node that handles step callbacks and user interaction."""
|
152 |
-
|
153 |
-
def __init__(self, name: str):
|
154 |
-
self.name = name
|
155 |
-
|
156 |
-
def __call__(self, state: dict) -> dict:
|
157 |
-
"""Process the state and handle user interaction."""
|
158 |
-
print(f"\nCurrent step: {state.get('step_count', 0)}")
|
159 |
-
print(f"Question: {state.get('question', 'No question')}")
|
160 |
-
print(f"Current answer: {state.get('answer', 'No answer yet')}\n")
|
161 |
-
|
162 |
-
while True:
|
163 |
-
choice = input(
|
164 |
-
"Enter 'c' to continue, 'q' to quit, 'i' for more info, or 'r' to reject answer: "
|
165 |
-
).lower()
|
166 |
-
|
167 |
-
if choice == "c":
|
168 |
-
# Mark as complete to continue
|
169 |
-
state["is_complete"] = True
|
170 |
-
return state
|
171 |
-
elif choice == "q":
|
172 |
-
# Mark as complete and set answer to None to quit
|
173 |
-
state["is_complete"] = True
|
174 |
-
state["answer"] = None
|
175 |
-
return state
|
176 |
-
elif choice == "i":
|
177 |
-
# Show more information but don't mark as complete
|
178 |
-
print("\nAdditional Information:")
|
179 |
-
print(f"Messages: {state.get('messages', [])}")
|
180 |
-
print(f"Step Logs: {state.get('step_logs', [])}")
|
181 |
-
print(f"Context: {state.get('context', {})}")
|
182 |
-
print(f"Memory Buffer: {state.get('memory_buffer', [])}")
|
183 |
-
print(f"Last Action: {state.get('last_action', None)}")
|
184 |
-
print(f"Action History: {state.get('action_history', [])}")
|
185 |
-
print(f"Error Count: {state.get('error_count', 0)}")
|
186 |
-
print(f"Success Count: {state.get('success_count', 0)}\n")
|
187 |
-
elif choice == "r":
|
188 |
-
# Reject the current answer and continue execution
|
189 |
-
print("\nRejecting current answer and continuing execution...")
|
190 |
-
# Clear the message history to prevent confusion
|
191 |
-
state["messages"] = []
|
192 |
-
state["answer"] = None
|
193 |
-
state["is_complete"] = False
|
194 |
-
return state
|
195 |
-
else:
|
196 |
-
print("Invalid choice. Please enter 'c', 'q', 'i', or 'r'.")
|
197 |
-
|
198 |
-
|
199 |
-
def build_agent_graph(agent: AgentNode) -> StateGraph:
|
200 |
-
"""Build the agent graph."""
|
201 |
-
# Initialize the graph
|
202 |
-
workflow = StateGraph(AgentState)
|
203 |
-
|
204 |
-
# Add nodes
|
205 |
-
workflow.add_node("agent", agent)
|
206 |
-
workflow.add_node("callback", StepCallbackNode("callback"))
|
207 |
-
|
208 |
-
# Add edges
|
209 |
-
workflow.add_edge("agent", "callback")
|
210 |
-
|
211 |
-
# Add conditional edges for callback
|
212 |
-
def should_continue(state: AgentState) -> str:
|
213 |
-
"""Determine the next node based on state."""
|
214 |
-
# If we have no answer, continue to agent
|
215 |
-
if not state["answer"]:
|
216 |
-
logger.info("No answer found, continuing to agent")
|
217 |
-
return "agent"
|
218 |
-
|
219 |
-
# If we have an answer and it's complete, we're done
|
220 |
-
if state["is_complete"]:
|
221 |
-
logger.info(f"Found complete answer: {state['answer']}")
|
222 |
-
return END
|
223 |
-
|
224 |
-
# Otherwise, go to callback for user input
|
225 |
-
logger.info(f"Waiting for user input for answer: {state['answer']}")
|
226 |
-
return "callback"
|
227 |
-
|
228 |
-
workflow.add_conditional_edges(
|
229 |
-
"callback",
|
230 |
-
should_continue,
|
231 |
-
{END: END, "agent": "agent", "callback": "callback"},
|
232 |
-
)
|
233 |
-
|
234 |
-
# Set entry point
|
235 |
-
workflow.set_entry_point("agent")
|
236 |
-
|
237 |
-
return workflow.compile()
|
238 |
-
|
239 |
-
|
240 |
-
# Initialize the agent graph
|
241 |
-
agent_graph = build_agent_graph(AgentNode(agent))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
ADDED
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
import uuid # for generating thread IDs for checkpointer
|
4 |
+
from typing import AsyncIterator, Optional, TypedDict
|
5 |
+
|
6 |
+
from dotenv import find_dotenv, load_dotenv
|
7 |
+
from langgraph.checkpoint.memory import MemorySaver
|
8 |
+
from langgraph.graph import END, START, StateGraph
|
9 |
+
from smolagents import CodeAgent, LiteLLMModel
|
10 |
+
from smolagents.memory import ActionStep, FinalAnswerStep
|
11 |
+
from smolagents.monitoring import LogLevel
|
12 |
+
|
13 |
+
# Configure logging
|
14 |
+
logging.basicConfig(
|
15 |
+
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
16 |
+
)
|
17 |
+
logger = logging.getLogger(__name__)
|
18 |
+
|
19 |
+
# Load environment variables
|
20 |
+
load_dotenv(find_dotenv())
|
21 |
+
|
22 |
+
# Get required environment variables with validation
|
23 |
+
API_BASE = os.getenv("API_BASE")
|
24 |
+
API_KEY = os.getenv("API_KEY")
|
25 |
+
MODEL_ID = os.getenv("MODEL_ID")
|
26 |
+
|
27 |
+
if not all([API_BASE, API_KEY, MODEL_ID]):
|
28 |
+
raise ValueError(
|
29 |
+
"Missing required environment variables: API_BASE, API_KEY, MODEL_ID"
|
30 |
+
)
|
31 |
+
|
32 |
+
|
33 |
+
# Define the state types for our graph
|
34 |
+
class AgentState(TypedDict):
|
35 |
+
task: str
|
36 |
+
current_step: Optional[dict] # Store serializable dict instead of ActionStep
|
37 |
+
error: Optional[str]
|
38 |
+
answer_text: Optional[str]
|
39 |
+
|
40 |
+
|
41 |
+
# Initialize model with error handling
|
42 |
+
try:
|
43 |
+
model = LiteLLMModel(
|
44 |
+
api_base=API_BASE,
|
45 |
+
api_key=API_KEY,
|
46 |
+
model_id=MODEL_ID,
|
47 |
+
)
|
48 |
+
except Exception as e:
|
49 |
+
logger.error(f"Failed to initialize model: {str(e)}")
|
50 |
+
raise
|
51 |
+
|
52 |
+
# Initialize agent with error handling
|
53 |
+
try:
|
54 |
+
agent = CodeAgent(
|
55 |
+
add_base_tools=True,
|
56 |
+
additional_authorized_imports=["pandas", "numpy"],
|
57 |
+
max_steps=10,
|
58 |
+
model=model,
|
59 |
+
tools=[],
|
60 |
+
step_callbacks=None,
|
61 |
+
verbosity_level=LogLevel.ERROR,
|
62 |
+
)
|
63 |
+
agent.logger.console.width = 66
|
64 |
+
except Exception as e:
|
65 |
+
logger.error(f"Failed to initialize agent: {str(e)}")
|
66 |
+
raise
|
67 |
+
|
68 |
+
|
69 |
+
async def process_step(state: AgentState) -> AgentState:
|
70 |
+
"""Process a single step of the agent's execution."""
|
71 |
+
try:
|
72 |
+
# Clear previous step results before running agent.run
|
73 |
+
state["current_step"] = None
|
74 |
+
state["answer_text"] = None
|
75 |
+
state["error"] = None
|
76 |
+
|
77 |
+
steps = agent.run(
|
78 |
+
task=state["task"],
|
79 |
+
additional_args=None,
|
80 |
+
images=None,
|
81 |
+
max_steps=1, # Process one step at a time
|
82 |
+
stream=True,
|
83 |
+
reset=False, # Maintain agent's internal state across process_step calls
|
84 |
+
)
|
85 |
+
|
86 |
+
for step in steps:
|
87 |
+
if isinstance(step, ActionStep):
|
88 |
+
# Convert ActionStep to serializable dict using the correct attributes
|
89 |
+
state["current_step"] = {
|
90 |
+
"step_number": step.step_number,
|
91 |
+
"model_output": step.model_output,
|
92 |
+
"observations": step.observations,
|
93 |
+
"tool_calls": [
|
94 |
+
{"name": tc.name, "arguments": tc.arguments}
|
95 |
+
for tc in (step.tool_calls or [])
|
96 |
+
],
|
97 |
+
"action_output": step.action_output,
|
98 |
+
}
|
99 |
+
logger.info(f"Processed action step {step.step_number}")
|
100 |
+
elif isinstance(step, FinalAnswerStep):
|
101 |
+
state["answer_text"] = step.final_answer
|
102 |
+
logger.info("Processed final answer")
|
103 |
+
logger.debug(f"Final answer details: {step}")
|
104 |
+
logger.info(f"Extracted answer text: {state['answer_text']}")
|
105 |
+
# Return immediately when we get a final answer
|
106 |
+
return state
|
107 |
+
# If loop finishes without FinalAnswerStep, return current state
|
108 |
+
return state
|
109 |
+
except Exception as e:
|
110 |
+
state["error"] = str(e)
|
111 |
+
logger.error(f"Error during agent execution step: {str(e)}")
|
112 |
+
return state
|
113 |
+
|
114 |
+
|
115 |
+
def should_continue(state: AgentState) -> bool:
|
116 |
+
"""Determine if the agent should continue processing steps."""
|
117 |
+
# Continue if we don't have an answer_text and no error
|
118 |
+
continue_execution = state.get("answer_text") is None and state.get("error") is None
|
119 |
+
logger.debug(
|
120 |
+
f"Checking should_continue: answer_text={state.get('answer_text') is not None}, error={state.get('error') is not None} -> Continue={continue_execution}"
|
121 |
+
)
|
122 |
+
return continue_execution
|
123 |
+
|
124 |
+
|
125 |
+
# Build the LangGraph graph once with persistence
|
126 |
+
memory = MemorySaver()
|
127 |
+
builder = StateGraph(AgentState)
|
128 |
+
builder.add_node("process_step", process_step)
|
129 |
+
builder.add_edge(START, "process_step")
|
130 |
+
builder.add_conditional_edges(
|
131 |
+
"process_step", should_continue, {True: "process_step", False: END}
|
132 |
+
)
|
133 |
+
graph = builder.compile(checkpointer=memory)
|
134 |
+
|
135 |
+
|
136 |
+
async def stream_execution(task: str, thread_id: str) -> AsyncIterator[AgentState]:
|
137 |
+
"""Stream the execution of the agent."""
|
138 |
+
if not task:
|
139 |
+
raise ValueError("Task cannot be empty")
|
140 |
+
|
141 |
+
logger.info(f"Initializing agent execution for task: {task}")
|
142 |
+
|
143 |
+
# Initialize the state
|
144 |
+
initial_state: AgentState = {
|
145 |
+
"task": task,
|
146 |
+
"current_step": None,
|
147 |
+
"error": None,
|
148 |
+
"answer_text": None,
|
149 |
+
}
|
150 |
+
|
151 |
+
# Pass thread_id via the config dict so the checkpointer can persist state
|
152 |
+
async for state in graph.astream(
|
153 |
+
initial_state, {"configurable": {"thread_id": thread_id}}
|
154 |
+
):
|
155 |
+
yield state
|
156 |
+
# Propagate error immediately if it occurs without an answer
|
157 |
+
if state.get("error") and not state.get("answer_text"):
|
158 |
+
logger.error(f"Propagating error from stream: {state['error']}")
|
159 |
+
raise Exception(state["error"])
|
160 |
+
|
161 |
+
|
162 |
+
async def run_with_streaming(task: str, thread_id: str) -> dict:
|
163 |
+
"""Run the agent with streaming output and return the results."""
|
164 |
+
last_state = None
|
165 |
+
steps = []
|
166 |
+
error = None
|
167 |
+
final_answer_text = None
|
168 |
+
|
169 |
+
try:
|
170 |
+
logger.info(f"Starting execution run for task: {task}")
|
171 |
+
async for state in stream_execution(task, thread_id):
|
172 |
+
last_state = state
|
173 |
+
|
174 |
+
if current_step := state.get("current_step"):
|
175 |
+
if not steps or steps[-1]["step_number"] != current_step["step_number"]:
|
176 |
+
steps.append(current_step)
|
177 |
+
# Keep print here for direct user feedback during streaming
|
178 |
+
print(f"\nStep {current_step['step_number']}:")
|
179 |
+
print(f"Model Output: {current_step['model_output']}")
|
180 |
+
print(f"Observations: {current_step['observations']}")
|
181 |
+
if current_step.get("tool_calls"):
|
182 |
+
print("Tool Calls:")
|
183 |
+
for tc in current_step["tool_calls"]:
|
184 |
+
print(f" - {tc['name']}: {tc['arguments']}")
|
185 |
+
if current_step.get("action_output"):
|
186 |
+
print(f"Action Output: {current_step['action_output']}")
|
187 |
+
|
188 |
+
# After the stream is finished, process the last state
|
189 |
+
logger.info("Stream finished.")
|
190 |
+
if last_state:
|
191 |
+
# LangGraph streams dicts where keys are node names, values are state dicts
|
192 |
+
node_name = list(last_state.keys())[0]
|
193 |
+
actual_state = last_state.get(node_name)
|
194 |
+
if actual_state:
|
195 |
+
final_answer_text = actual_state.get("answer_text")
|
196 |
+
error = actual_state.get("error")
|
197 |
+
logger.info(
|
198 |
+
f"Final answer text extracted from last state: {final_answer_text}"
|
199 |
+
)
|
200 |
+
logger.info(f"Error extracted from last state: {error}")
|
201 |
+
# Ensure steps list is consistent with the final state if needed
|
202 |
+
last_step_in_state = actual_state.get("current_step")
|
203 |
+
if last_step_in_state and (
|
204 |
+
not steps
|
205 |
+
or steps[-1]["step_number"] != last_step_in_state["step_number"]
|
206 |
+
):
|
207 |
+
logger.debug("Adding last step from final state to steps list.")
|
208 |
+
steps.append(last_step_in_state)
|
209 |
+
else:
|
210 |
+
logger.warning(
|
211 |
+
"Could not find actual state dictionary within last_state."
|
212 |
+
)
|
213 |
+
|
214 |
+
return {"steps": steps, "final_answer": final_answer_text, "error": error}
|
215 |
+
|
216 |
+
except Exception as e:
|
217 |
+
import traceback
|
218 |
+
|
219 |
+
logger.error(
|
220 |
+
f"Exception during run_with_streaming: {str(e)}\n{traceback.format_exc()}"
|
221 |
+
)
|
222 |
+
# Attempt to return based on the last known state even if exception occurred outside stream
|
223 |
+
final_answer_text = None
|
224 |
+
error_msg = str(e)
|
225 |
+
if last_state:
|
226 |
+
node_name = list(last_state.keys())[0]
|
227 |
+
actual_state = last_state.get(node_name)
|
228 |
+
if actual_state:
|
229 |
+
final_answer_text = actual_state.get("answer_text")
|
230 |
+
|
231 |
+
return {"steps": steps, "final_answer": final_answer_text, "error": error_msg}
|
232 |
+
|
233 |
+
|
234 |
+
if __name__ == "__main__":
|
235 |
+
import asyncio
|
236 |
+
import uuid
|
237 |
+
|
238 |
+
# Example Usage
|
239 |
+
task_to_run = "What is the capital of France?"
|
240 |
+
thread_id = str(uuid.uuid4()) # Generate a unique thread ID for this run
|
241 |
+
logger.info(
|
242 |
+
f"Starting agent run from __main__ for task: '{task_to_run}' with thread_id: {thread_id}"
|
243 |
+
)
|
244 |
+
result = asyncio.run(run_with_streaming(task_to_run, thread_id))
|
245 |
+
logger.info("Agent run finished.")
|
246 |
+
|
247 |
+
# Print final results
|
248 |
+
print("\n--- Execution Results ---")
|
249 |
+
print(f"Number of Steps: {len(result.get('steps', []))}")
|
250 |
+
# Optionally print step details
|
251 |
+
# for i, step in enumerate(result.get('steps', [])):
|
252 |
+
# print(f"Step {i+1} Details: {step}")
|
253 |
+
print(f"Final Answer: {result.get('final_answer') or 'Not found'}")
|
254 |
+
if err := result.get("error"):
|
255 |
+
print(f"Error: {err}")
|
prompts/code_agent.yaml
DELETED
@@ -1,325 +0,0 @@
|
|
1 |
-
system_prompt: |-
|
2 |
-
You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
|
3 |
-
To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
|
4 |
-
To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
|
5 |
-
|
6 |
-
At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.
|
7 |
-
Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '<end_code>' sequence.
|
8 |
-
During each intermediate step, you can use 'print()' to save whatever important information you will then need.
|
9 |
-
These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
|
10 |
-
In the end you have to return a final answer using the `final_answer` tool.
|
11 |
-
|
12 |
-
Here are a few examples using notional tools:
|
13 |
-
---
|
14 |
-
Task: "Generate an image of the oldest person in this document."
|
15 |
-
|
16 |
-
Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
|
17 |
-
Code:
|
18 |
-
```py
|
19 |
-
answer = document_qa(document=document, question="Who is the oldest person mentioned?")
|
20 |
-
print(answer)
|
21 |
-
```<end_code>
|
22 |
-
Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
|
23 |
-
|
24 |
-
Thought: I will now generate an image showcasing the oldest person.
|
25 |
-
Code:
|
26 |
-
```py
|
27 |
-
image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
|
28 |
-
final_answer(image)
|
29 |
-
```<end_code>
|
30 |
-
|
31 |
-
---
|
32 |
-
Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
|
33 |
-
|
34 |
-
Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool
|
35 |
-
Code:
|
36 |
-
```py
|
37 |
-
result = 5 + 3 + 1294.678
|
38 |
-
final_answer(result)
|
39 |
-
```<end_code>
|
40 |
-
|
41 |
-
---
|
42 |
-
Task:
|
43 |
-
"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French.
|
44 |
-
You have been provided with these additional arguments, that you can access using the keys as variables in your python code:
|
45 |
-
{'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}"
|
46 |
-
|
47 |
-
Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.
|
48 |
-
Code:
|
49 |
-
```py
|
50 |
-
translated_question = translator(question=question, src_lang="French", tgt_lang="English")
|
51 |
-
print(f"The translated question is {translated_question}.")
|
52 |
-
answer = image_qa(image=image, question=translated_question)
|
53 |
-
final_answer(f"The answer is {answer}")
|
54 |
-
```<end_code>
|
55 |
-
|
56 |
-
---
|
57 |
-
Task:
|
58 |
-
In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.
|
59 |
-
What does he say was the consequence of Einstein learning too much math on his creativity, in one word?
|
60 |
-
|
61 |
-
Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.
|
62 |
-
Code:
|
63 |
-
```py
|
64 |
-
pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")
|
65 |
-
print(pages)
|
66 |
-
```<end_code>
|
67 |
-
Observation:
|
68 |
-
No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".
|
69 |
-
|
70 |
-
Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query.
|
71 |
-
Code:
|
72 |
-
```py
|
73 |
-
pages = search(query="1979 interview Stanislaus Ulam")
|
74 |
-
print(pages)
|
75 |
-
```<end_code>
|
76 |
-
Observation:
|
77 |
-
Found 6 pages:
|
78 |
-
[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)
|
79 |
-
|
80 |
-
[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)
|
81 |
-
|
82 |
-
(truncated)
|
83 |
-
|
84 |
-
Thought: I will read the first 2 pages to know more.
|
85 |
-
Code:
|
86 |
-
```py
|
87 |
-
for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:
|
88 |
-
whole_page = visit_webpage(url)
|
89 |
-
print(whole_page)
|
90 |
-
print("\n" + "="*80 + "\n") # Print separator between pages
|
91 |
-
```<end_code>
|
92 |
-
Observation:
|
93 |
-
Manhattan Project Locations:
|
94 |
-
Los Alamos, NM
|
95 |
-
Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at
|
96 |
-
(truncated)
|
97 |
-
|
98 |
-
Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word.
|
99 |
-
Code:
|
100 |
-
```py
|
101 |
-
final_answer("diminished")
|
102 |
-
```<end_code>
|
103 |
-
|
104 |
-
---
|
105 |
-
Task: "Which city has the highest population: Guangzhou or Shanghai?"
|
106 |
-
|
107 |
-
Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.
|
108 |
-
Code:
|
109 |
-
```py
|
110 |
-
for city in ["Guangzhou", "Shanghai"]:
|
111 |
-
print(f"Population {city}:", search(f"{city} population")
|
112 |
-
```<end_code>
|
113 |
-
Observation:
|
114 |
-
Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
|
115 |
-
Population Shanghai: '26 million (2019)'
|
116 |
-
|
117 |
-
Thought: Now I know that Shanghai has the highest population.
|
118 |
-
Code:
|
119 |
-
```py
|
120 |
-
final_answer("Shanghai")
|
121 |
-
```<end_code>
|
122 |
-
|
123 |
-
---
|
124 |
-
Task: "What is the current age of the pope, raised to the power 0.36?"
|
125 |
-
|
126 |
-
Thought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search.
|
127 |
-
Code:
|
128 |
-
```py
|
129 |
-
pope_age_wiki = wiki(query="current pope age")
|
130 |
-
print("Pope age as per wikipedia:", pope_age_wiki)
|
131 |
-
pope_age_search = web_search(query="current pope age")
|
132 |
-
print("Pope age as per google search:", pope_age_search)
|
133 |
-
```<end_code>
|
134 |
-
Observation:
|
135 |
-
Pope age: "The pope Francis is currently 88 years old."
|
136 |
-
|
137 |
-
Thought: I know that the pope is 88 years old. Let's compute the result using python code.
|
138 |
-
Code:
|
139 |
-
```py
|
140 |
-
pope_current_age = 88 ** 0.36
|
141 |
-
final_answer(pope_current_age)
|
142 |
-
```<end_code>
|
143 |
-
|
144 |
-
Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions:
|
145 |
-
```python
|
146 |
-
{%- for tool in tools.values() %}
|
147 |
-
def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
|
148 |
-
"""{{ tool.description }}
|
149 |
-
|
150 |
-
Args:
|
151 |
-
{%- for arg_name, arg_info in tool.inputs.items() %}
|
152 |
-
{{ arg_name }}: {{ arg_info.description }}
|
153 |
-
{%- endfor %}
|
154 |
-
"""
|
155 |
-
{% endfor %}
|
156 |
-
```
|
157 |
-
|
158 |
-
{%- if managed_agents and managed_agents.values() | list %}
|
159 |
-
You can also give tasks to team members.
|
160 |
-
Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
|
161 |
-
Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
|
162 |
-
Here is a list of the team members that you can call:
|
163 |
-
```python
|
164 |
-
{%- for agent in managed_agents.values() %}
|
165 |
-
def {{ agent.name }}("Your query goes here.") -> str:
|
166 |
-
"""{{ agent.description }}"""
|
167 |
-
{% endfor %}
|
168 |
-
```
|
169 |
-
{%- endif %}
|
170 |
-
|
171 |
-
Here are the rules you should always follow to solve your task:
|
172 |
-
1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail.
|
173 |
-
2. Use only variables that you have defined!
|
174 |
-
3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wiki({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wiki(query="What is the place where James Bond lives?")'.
|
175 |
-
4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
|
176 |
-
5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.
|
177 |
-
6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'.
|
178 |
-
7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
|
179 |
-
8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
|
180 |
-
9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
|
181 |
-
10. Don't give up! You're in charge of solving the task, not providing directions to solve it.
|
182 |
-
|
183 |
-
Now Begin!
|
184 |
-
planning:
|
185 |
-
initial_plan : |-
|
186 |
-
You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.
|
187 |
-
Below I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.
|
188 |
-
|
189 |
-
## 1. Facts survey
|
190 |
-
You will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.
|
191 |
-
These "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:
|
192 |
-
### 1.1. Facts given in the task
|
193 |
-
List here the specific facts given in the task that could help you (there might be nothing here).
|
194 |
-
|
195 |
-
### 1.2. Facts to look up
|
196 |
-
List here any facts that we may need to look up.
|
197 |
-
Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.
|
198 |
-
|
199 |
-
### 1.3. Facts to derive
|
200 |
-
List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.
|
201 |
-
|
202 |
-
Don't make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.
|
203 |
-
|
204 |
-
## 2. Plan
|
205 |
-
Then for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
|
206 |
-
This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
|
207 |
-
Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
|
208 |
-
After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
|
209 |
-
|
210 |
-
You can leverage these tools, behaving like regular python functions:
|
211 |
-
```python
|
212 |
-
{%- for tool in tools.values() %}
|
213 |
-
def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
|
214 |
-
"""{{ tool.description }}
|
215 |
-
|
216 |
-
Args:
|
217 |
-
{%- for arg_name, arg_info in tool.inputs.items() %}
|
218 |
-
{{ arg_name }}: {{ arg_info.description }}
|
219 |
-
{%- endfor %}
|
220 |
-
"""
|
221 |
-
{% endfor %}
|
222 |
-
```
|
223 |
-
|
224 |
-
{%- if managed_agents and managed_agents.values() | list %}
|
225 |
-
You can also give tasks to team members.
|
226 |
-
Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
|
227 |
-
Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
|
228 |
-
Here is a list of the team members that you can call:
|
229 |
-
```python
|
230 |
-
{%- for agent in managed_agents.values() %}
|
231 |
-
def {{ agent.name }}("Your query goes here.") -> str:
|
232 |
-
"""{{ agent.description }}"""
|
233 |
-
{% endfor %}
|
234 |
-
```
|
235 |
-
{%- endif %}
|
236 |
-
|
237 |
-
---
|
238 |
-
Now begin! Here is your task:
|
239 |
-
```
|
240 |
-
{{task}}
|
241 |
-
```
|
242 |
-
First in part 1, write the facts survey, then in part 2, write your plan.
|
243 |
-
update_plan_pre_messages: |-
|
244 |
-
You are a world expert at analyzing a situation, and plan accordingly towards solving a task.
|
245 |
-
You have been given the following task:
|
246 |
-
```
|
247 |
-
{{task}}
|
248 |
-
```
|
249 |
-
|
250 |
-
Below you will find a history of attempts made to solve this task.
|
251 |
-
You will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.
|
252 |
-
If the previous tries so far have met some success, your updated plan can build on these results.
|
253 |
-
If you are stalled, you can make a completely new plan starting from scratch.
|
254 |
-
|
255 |
-
Find the task and history below:
|
256 |
-
update_plan_post_messages: |-
|
257 |
-
Now write your updated facts below, taking into account the above history:
|
258 |
-
## 1. Updated facts survey
|
259 |
-
### 1.1. Facts given in the task
|
260 |
-
### 1.2. Facts that we have learned
|
261 |
-
### 1.3. Facts still to look up
|
262 |
-
### 1.4. Facts still to derive
|
263 |
-
|
264 |
-
Then write a step-by-step high-level plan to solve the task above.
|
265 |
-
## 2. Plan
|
266 |
-
### 2. 1. ...
|
267 |
-
Etc.
|
268 |
-
This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
|
269 |
-
Beware that you have {remaining_steps} steps remaining.
|
270 |
-
Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
|
271 |
-
After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
|
272 |
-
|
273 |
-
You can leverage these tools, behaving like regular python functions:
|
274 |
-
```python
|
275 |
-
{%- for tool in tools.values() %}
|
276 |
-
def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
|
277 |
-
"""{{ tool.description }}
|
278 |
-
|
279 |
-
Args:
|
280 |
-
{%- for arg_name, arg_info in tool.inputs.items() %}
|
281 |
-
{{ arg_name }}: {{ arg_info.description }}
|
282 |
-
{%- endfor %}"""
|
283 |
-
{% endfor %}
|
284 |
-
```
|
285 |
-
|
286 |
-
{%- if managed_agents and managed_agents.values() | list %}
|
287 |
-
You can also give tasks to team members.
|
288 |
-
Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
|
289 |
-
Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
|
290 |
-
Here is a list of the team members that you can call:
|
291 |
-
```python
|
292 |
-
{%- for agent in managed_agents.values() %}
|
293 |
-
def {{ agent.name }}("Your query goes here.") -> str:
|
294 |
-
"""{{ agent.description }}"""
|
295 |
-
{% endfor %}
|
296 |
-
```
|
297 |
-
{%- endif %}
|
298 |
-
|
299 |
-
Now write your updated facts survey below, then your new plan.
|
300 |
-
managed_agent:
|
301 |
-
task: |-
|
302 |
-
You're a helpful agent named '{{name}}'.
|
303 |
-
You have been submitted this task by your manager.
|
304 |
-
---
|
305 |
-
Task:
|
306 |
-
{{task}}
|
307 |
-
---
|
308 |
-
You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.
|
309 |
-
|
310 |
-
Your final_answer WILL HAVE to contain these parts:
|
311 |
-
### 1. Task outcome (short version):
|
312 |
-
### 2. Task outcome (extremely detailed version):
|
313 |
-
### 3. Additional context (if relevant):
|
314 |
-
|
315 |
-
Put all these in your final_answer tool, everything that you do not pass as an argument to final_answer will be lost.
|
316 |
-
And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback.
|
317 |
-
report: |-
|
318 |
-
Here is the final answer from your managed agent '{{name}}':
|
319 |
-
{{final_answer}}
|
320 |
-
final_answer:
|
321 |
-
pre_messages: |-
|
322 |
-
An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory:
|
323 |
-
post_messages: |-
|
324 |
-
Based on the above, please provide an answer to the following user task:
|
325 |
-
{{task}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/toolcalling_agent.yaml
DELETED
@@ -1,239 +0,0 @@
|
|
1 |
-
system_prompt: |-
|
2 |
-
You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can.
|
3 |
-
To do so, you have been given access to some tools.
|
4 |
-
|
5 |
-
The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation".
|
6 |
-
This Action/Observation can repeat N times, you should take several steps when needed.
|
7 |
-
|
8 |
-
You can use the result of the previous action as input for the next action.
|
9 |
-
The observation will always be a string: it can represent a file, like "image_1.jpg".
|
10 |
-
Then you can use it as input for the next action. You can do it for instance as follows:
|
11 |
-
|
12 |
-
Observation: "image_1.jpg"
|
13 |
-
|
14 |
-
Action:
|
15 |
-
{
|
16 |
-
"name": "image_transformer",
|
17 |
-
"arguments": {"image": "image_1.jpg"}
|
18 |
-
}
|
19 |
-
|
20 |
-
To provide the final answer to the task, use an action blob with "name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this:
|
21 |
-
Action:
|
22 |
-
{
|
23 |
-
"name": "final_answer",
|
24 |
-
"arguments": {"answer": "insert your final answer here"}
|
25 |
-
}
|
26 |
-
|
27 |
-
|
28 |
-
Here are a few examples using notional tools:
|
29 |
-
---
|
30 |
-
Task: "Generate an image of the oldest person in this document."
|
31 |
-
|
32 |
-
Action:
|
33 |
-
{
|
34 |
-
"name": "document_qa",
|
35 |
-
"arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"}
|
36 |
-
}
|
37 |
-
Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
|
38 |
-
|
39 |
-
Action:
|
40 |
-
{
|
41 |
-
"name": "image_generator",
|
42 |
-
"arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."}
|
43 |
-
}
|
44 |
-
Observation: "image.png"
|
45 |
-
|
46 |
-
Action:
|
47 |
-
{
|
48 |
-
"name": "final_answer",
|
49 |
-
"arguments": "image.png"
|
50 |
-
}
|
51 |
-
|
52 |
-
---
|
53 |
-
Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
|
54 |
-
|
55 |
-
Action:
|
56 |
-
{
|
57 |
-
"name": "python_interpreter",
|
58 |
-
"arguments": {"code": "5 + 3 + 1294.678"}
|
59 |
-
}
|
60 |
-
Observation: 1302.678
|
61 |
-
|
62 |
-
Action:
|
63 |
-
{
|
64 |
-
"name": "final_answer",
|
65 |
-
"arguments": "1302.678"
|
66 |
-
}
|
67 |
-
|
68 |
-
---
|
69 |
-
Task: "Which city has the highest population , Guangzhou or Shanghai?"
|
70 |
-
|
71 |
-
Action:
|
72 |
-
{
|
73 |
-
"name": "search",
|
74 |
-
"arguments": "Population Guangzhou"
|
75 |
-
}
|
76 |
-
Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
|
77 |
-
|
78 |
-
|
79 |
-
Action:
|
80 |
-
{
|
81 |
-
"name": "search",
|
82 |
-
"arguments": "Population Shanghai"
|
83 |
-
}
|
84 |
-
Observation: '26 million (2019)'
|
85 |
-
|
86 |
-
Action:
|
87 |
-
{
|
88 |
-
"name": "final_answer",
|
89 |
-
"arguments": "Shanghai"
|
90 |
-
}
|
91 |
-
|
92 |
-
Above example were using notional tools that might not exist for you. You only have access to these tools:
|
93 |
-
{%- for tool in tools.values() %}
|
94 |
-
- {{ tool.name }}: {{ tool.description }}
|
95 |
-
Takes inputs: {{tool.inputs}}
|
96 |
-
Returns an output of type: {{tool.output_type}}
|
97 |
-
{%- endfor %}
|
98 |
-
|
99 |
-
{%- if managed_agents and managed_agents.values() | list %}
|
100 |
-
You can also give tasks to team members.
|
101 |
-
Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
|
102 |
-
Given that this team member is a real human, you should be very verbose in your task.
|
103 |
-
Here is a list of the team members that you can call:
|
104 |
-
{%- for agent in managed_agents.values() %}
|
105 |
-
- {{ agent.name }}: {{ agent.description }}
|
106 |
-
{%- endfor %}
|
107 |
-
{%- endif %}
|
108 |
-
|
109 |
-
Here are the rules you should always follow to solve your task:
|
110 |
-
1. ALWAYS provide a tool call, else you will fail.
|
111 |
-
2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead.
|
112 |
-
3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.
|
113 |
-
If no tool call is needed, use final_answer tool to return your answer.
|
114 |
-
4. Never re-do a tool call that you previously did with the exact same parameters.
|
115 |
-
|
116 |
-
Now Begin!
|
117 |
-
planning:
|
118 |
-
initial_plan : |-
|
119 |
-
You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.
|
120 |
-
Below I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.
|
121 |
-
|
122 |
-
## 1. Facts survey
|
123 |
-
You will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.
|
124 |
-
These "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:
|
125 |
-
### 1.1. Facts given in the task
|
126 |
-
List here the specific facts given in the task that could help you (there might be nothing here).
|
127 |
-
|
128 |
-
### 1.2. Facts to look up
|
129 |
-
List here any facts that we may need to look up.
|
130 |
-
Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.
|
131 |
-
|
132 |
-
### 1.3. Facts to derive
|
133 |
-
List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.
|
134 |
-
|
135 |
-
Don't make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.
|
136 |
-
|
137 |
-
## 2. Plan
|
138 |
-
Then for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
|
139 |
-
This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
|
140 |
-
Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
|
141 |
-
After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
|
142 |
-
|
143 |
-
You can leverage these tools:
|
144 |
-
{%- for tool in tools.values() %}
|
145 |
-
- {{ tool.name }}: {{ tool.description }}
|
146 |
-
Takes inputs: {{tool.inputs}}
|
147 |
-
Returns an output of type: {{tool.output_type}}
|
148 |
-
{%- endfor %}
|
149 |
-
|
150 |
-
{%- if managed_agents and managed_agents.values() | list %}
|
151 |
-
You can also give tasks to team members.
|
152 |
-
Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
|
153 |
-
Given that this team member is a real human, you should be very verbose in your task.
|
154 |
-
Here is a list of the team members that you can call:
|
155 |
-
{%- for agent in managed_agents.values() %}
|
156 |
-
- {{ agent.name }}: {{ agent.description }}
|
157 |
-
{%- endfor %}
|
158 |
-
{%- endif %}
|
159 |
-
|
160 |
-
---
|
161 |
-
Now begin! Here is your task:
|
162 |
-
```
|
163 |
-
{{task}}
|
164 |
-
```
|
165 |
-
First in part 1, write the facts survey, then in part 2, write your plan.
|
166 |
-
update_plan_pre_messages: |-
|
167 |
-
You are a world expert at analyzing a situation, and plan accordingly towards solving a task.
|
168 |
-
You have been given the following task:
|
169 |
-
```
|
170 |
-
{{task}}
|
171 |
-
```
|
172 |
-
|
173 |
-
Below you will find a history of attempts made to solve this task.
|
174 |
-
You will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.
|
175 |
-
If the previous tries so far have met some success, your updated plan can build on these results.
|
176 |
-
If you are stalled, you can make a completely new plan starting from scratch.
|
177 |
-
|
178 |
-
Find the task and history below:
|
179 |
-
update_plan_post_messages: |-
|
180 |
-
Now write your updated facts below, taking into account the above history:
|
181 |
-
## 1. Updated facts survey
|
182 |
-
### 1.1. Facts given in the task
|
183 |
-
### 1.2. Facts that we have learned
|
184 |
-
### 1.3. Facts still to look up
|
185 |
-
### 1.4. Facts still to derive
|
186 |
-
|
187 |
-
Then write a step-by-step high-level plan to solve the task above.
|
188 |
-
## 2. Plan
|
189 |
-
### 2. 1. ...
|
190 |
-
Etc.
|
191 |
-
This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
|
192 |
-
Beware that you have {remaining_steps} steps remaining.
|
193 |
-
Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
|
194 |
-
After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
|
195 |
-
|
196 |
-
You can leverage these tools:
|
197 |
-
{%- for tool in tools.values() %}
|
198 |
-
- {{ tool.name }}: {{ tool.description }}
|
199 |
-
Takes inputs: {{tool.inputs}}
|
200 |
-
Returns an output of type: {{tool.output_type}}
|
201 |
-
{%- endfor %}
|
202 |
-
|
203 |
-
{%- if managed_agents and managed_agents.values() | list %}
|
204 |
-
You can also give tasks to team members.
|
205 |
-
Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
|
206 |
-
Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
|
207 |
-
Here is a list of the team members that you can call:
|
208 |
-
{%- for agent in managed_agents.values() %}
|
209 |
-
- {{ agent.name }}: {{ agent.description }}
|
210 |
-
{%- endfor %}
|
211 |
-
{%- endif %}
|
212 |
-
|
213 |
-
Now write your new plan below.
|
214 |
-
managed_agent:
|
215 |
-
task: |-
|
216 |
-
You're a helpful agent named '{{name}}'.
|
217 |
-
You have been submitted this task by your manager.
|
218 |
-
---
|
219 |
-
Task:
|
220 |
-
{{task}}
|
221 |
-
---
|
222 |
-
You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.
|
223 |
-
|
224 |
-
Your final_answer WILL HAVE to contain these parts:
|
225 |
-
### 1. Task outcome (short version):
|
226 |
-
### 2. Task outcome (extremely detailed version):
|
227 |
-
### 3. Additional context (if relevant):
|
228 |
-
|
229 |
-
Put all these in your final_answer tool, everything that you do not pass as an argument to final_answer will be lost.
|
230 |
-
And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback.
|
231 |
-
report: |-
|
232 |
-
Here is the final answer from your managed agent '{{name}}':
|
233 |
-
{{final_answer}}
|
234 |
-
final_answer:
|
235 |
-
pre_messages: |-
|
236 |
-
An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory:
|
237 |
-
post_messages: |-
|
238 |
-
Based on the above, please provide an answer to the following user task:
|
239 |
-
{{task}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -3,8 +3,11 @@ duckduckgo-search>=8.0.1
|
|
3 |
gradio[oauth]>=5.26.0
|
4 |
isort>=6.0.1
|
5 |
langgraph>=0.3.34
|
|
|
6 |
pytest>=8.3.5
|
7 |
pytest-cov>=6.1.1
|
|
|
8 |
requests>=2.32.3
|
9 |
smolagents[litellm]>=0.1.3
|
|
|
10 |
wikipedia-api>=0.8.1
|
|
|
3 |
gradio[oauth]>=5.26.0
|
4 |
isort>=6.0.1
|
5 |
langgraph>=0.3.34
|
6 |
+
litellm>=1.10.0
|
7 |
pytest>=8.3.5
|
8 |
pytest-cov>=6.1.1
|
9 |
+
python-dotenv>=1.0.0
|
10 |
requests>=2.32.3
|
11 |
smolagents[litellm]>=0.1.3
|
12 |
+
typing-extensions>=4.5.0
|
13 |
wikipedia-api>=0.8.1
|
runner.py
DELETED
@@ -1,180 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
import os
|
3 |
-
import re
|
4 |
-
import uuid
|
5 |
-
|
6 |
-
from langgraph.types import Command
|
7 |
-
|
8 |
-
from graph import agent_graph
|
9 |
-
|
10 |
-
# Configure logging
|
11 |
-
logging.basicConfig(level=logging.INFO) # Default to INFO level
|
12 |
-
logger = logging.getLogger(__name__)
|
13 |
-
|
14 |
-
# Enable LiteLLM debug logging only if environment variable is set
|
15 |
-
import litellm
|
16 |
-
|
17 |
-
if os.getenv("LITELLM_DEBUG", "false").lower() == "true":
|
18 |
-
litellm.set_verbose = True
|
19 |
-
logger.setLevel(logging.DEBUG)
|
20 |
-
else:
|
21 |
-
litellm.set_verbose = False
|
22 |
-
logger.setLevel(logging.INFO)
|
23 |
-
|
24 |
-
|
25 |
-
class AgentRunner:
|
26 |
-
"""Runner class for the code agent."""
|
27 |
-
|
28 |
-
def __init__(self):
|
29 |
-
"""Initialize the agent runner with graph and tools."""
|
30 |
-
logger.info("Initializing AgentRunner")
|
31 |
-
self.graph = agent_graph
|
32 |
-
self.last_state = None # Store the last state for testing/debugging
|
33 |
-
self.thread_id = str(
|
34 |
-
uuid.uuid4()
|
35 |
-
) # Generate a unique thread_id for this runner
|
36 |
-
logger.info(f"Created AgentRunner with thread_id: {self.thread_id}")
|
37 |
-
|
38 |
-
def _extract_answer(self, state: dict) -> str:
|
39 |
-
"""Extract the answer from the state."""
|
40 |
-
if not state:
|
41 |
-
return None
|
42 |
-
|
43 |
-
# First try to get answer from direct answer field
|
44 |
-
if "answer" in state and state["answer"]:
|
45 |
-
logger.info(f"Found answer in direct field: {state['answer']}")
|
46 |
-
return state["answer"]
|
47 |
-
|
48 |
-
# Then try to get answer from messages
|
49 |
-
if "messages" in state and state["messages"]:
|
50 |
-
for msg in reversed(state["messages"]):
|
51 |
-
if hasattr(msg, "content") and msg.content:
|
52 |
-
# Look for code blocks that might contain the answer
|
53 |
-
if "```" in msg.content:
|
54 |
-
# Extract code between ```py and ``` or ```python and ```
|
55 |
-
code_match = re.search(
|
56 |
-
r"```(?:py|python)?\s*\n(.*?)\n```", msg.content, re.DOTALL
|
57 |
-
)
|
58 |
-
if code_match:
|
59 |
-
code = code_match.group(1)
|
60 |
-
# Look for final_answer call
|
61 |
-
final_answer_match = re.search(
|
62 |
-
r"final_answer\((.*?)\)", code
|
63 |
-
)
|
64 |
-
if final_answer_match:
|
65 |
-
answer = final_answer_match.group(1)
|
66 |
-
logger.info(
|
67 |
-
f"Found answer in final_answer call: {answer}"
|
68 |
-
)
|
69 |
-
return answer
|
70 |
-
|
71 |
-
# If no code block with final_answer, use the content
|
72 |
-
logger.info(f"Found answer in message: {msg.content}")
|
73 |
-
return msg.content
|
74 |
-
|
75 |
-
return None
|
76 |
-
|
77 |
-
def __call__(self, input_data) -> str:
|
78 |
-
"""Process a question through the agent graph and return the answer.
|
79 |
-
|
80 |
-
Args:
|
81 |
-
input_data: Either a question string or a Command object for resuming
|
82 |
-
|
83 |
-
Returns:
|
84 |
-
str: The agent's response
|
85 |
-
"""
|
86 |
-
try:
|
87 |
-
config = {"configurable": {"thread_id": self.thread_id}}
|
88 |
-
logger.info(f"Using config: {config}")
|
89 |
-
|
90 |
-
if isinstance(input_data, str):
|
91 |
-
# Initial question
|
92 |
-
logger.info(f"Processing initial question: {input_data}")
|
93 |
-
initial_state = {
|
94 |
-
"question": input_data,
|
95 |
-
"messages": [],
|
96 |
-
"answer": None,
|
97 |
-
"step_logs": [],
|
98 |
-
"is_complete": False,
|
99 |
-
"step_count": 0,
|
100 |
-
# Initialize new memory fields
|
101 |
-
"context": {},
|
102 |
-
"memory_buffer": [],
|
103 |
-
"last_action": None,
|
104 |
-
"action_history": [],
|
105 |
-
"error_count": 0,
|
106 |
-
"success_count": 0,
|
107 |
-
}
|
108 |
-
logger.info(f"Initial state: {initial_state}")
|
109 |
-
|
110 |
-
# Use stream to get results
|
111 |
-
logger.info("Starting graph stream for initial question")
|
112 |
-
for chunk in self.graph.stream(initial_state, config):
|
113 |
-
logger.debug(f"Received chunk: {chunk}")
|
114 |
-
if isinstance(chunk, dict):
|
115 |
-
if "__interrupt__" in chunk:
|
116 |
-
logger.info("Detected interrupt in stream")
|
117 |
-
logger.info(f"Interrupt details: {chunk['__interrupt__']}")
|
118 |
-
# Let the graph handle the interrupt naturally
|
119 |
-
continue
|
120 |
-
answer = self._extract_answer(chunk)
|
121 |
-
if answer:
|
122 |
-
self.last_state = chunk
|
123 |
-
# If the state is complete, return the answer
|
124 |
-
if chunk.get("is_complete", False):
|
125 |
-
return answer
|
126 |
-
else:
|
127 |
-
logger.debug(f"Skipping chunk without answer: {chunk}")
|
128 |
-
else:
|
129 |
-
# Resuming from interrupt
|
130 |
-
logger.info(f"Resuming from interrupt with input: {input_data}")
|
131 |
-
for result in self.graph.stream(input_data, config):
|
132 |
-
logger.debug(f"Received resume result: {result}")
|
133 |
-
if isinstance(result, dict):
|
134 |
-
answer = self._extract_answer(result)
|
135 |
-
if answer:
|
136 |
-
self.last_state = result
|
137 |
-
# If the state is complete, return the answer
|
138 |
-
if result.get("is_complete", False):
|
139 |
-
return answer
|
140 |
-
else:
|
141 |
-
logger.debug(f"Skipping result without answer: {result}")
|
142 |
-
|
143 |
-
# If we get here, we didn't find an answer
|
144 |
-
logger.warning("No answer generated from stream")
|
145 |
-
return "No answer generated"
|
146 |
-
|
147 |
-
except Exception as e:
|
148 |
-
logger.error(f"Error processing input: {str(e)}")
|
149 |
-
raise
|
150 |
-
|
151 |
-
|
152 |
-
if __name__ == "__main__":
|
153 |
-
import argparse
|
154 |
-
|
155 |
-
from langgraph.types import Command
|
156 |
-
|
157 |
-
# Set up argument parser
|
158 |
-
parser = argparse.ArgumentParser(description="Run the agent with a question")
|
159 |
-
parser.add_argument("question", type=str, help="The question to ask the agent")
|
160 |
-
parser.add_argument(
|
161 |
-
"--resume",
|
162 |
-
type=str,
|
163 |
-
help="Value to resume with after an interrupt",
|
164 |
-
default=None,
|
165 |
-
)
|
166 |
-
args = parser.parse_args()
|
167 |
-
|
168 |
-
# Create agent runner
|
169 |
-
runner = AgentRunner()
|
170 |
-
|
171 |
-
if args.resume:
|
172 |
-
# Resume from interrupt with provided value
|
173 |
-
print(f"\nResuming with value: {args.resume}")
|
174 |
-
response = runner(Command(resume=args.resume))
|
175 |
-
else:
|
176 |
-
# Initial run with question
|
177 |
-
print(f"\nAsking question: {args.question}")
|
178 |
-
response = runner(args.question)
|
179 |
-
|
180 |
-
print(f"\nFinal response: {response}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_agent.py
DELETED
@@ -1,263 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
|
3 |
-
import pytest
|
4 |
-
|
5 |
-
from runner import AgentRunner
|
6 |
-
|
7 |
-
# Configure test logger
|
8 |
-
test_logger = logging.getLogger("test_agent")
|
9 |
-
test_logger.setLevel(logging.INFO)
|
10 |
-
|
11 |
-
# Suppress specific warnings
|
12 |
-
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models")
|
13 |
-
|
14 |
-
# Constants
|
15 |
-
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
16 |
-
QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
|
17 |
-
|
18 |
-
|
19 |
-
@pytest.fixture(scope="session")
|
20 |
-
def agent():
|
21 |
-
"""Fixture to create and return an AgentRunner instance."""
|
22 |
-
test_logger.info("Creating AgentRunner instance")
|
23 |
-
return AgentRunner()
|
24 |
-
|
25 |
-
|
26 |
-
# @pytest.fixture(scope="session")
|
27 |
-
# def questions_data():
|
28 |
-
# """Fixture to fetch questions from the API."""
|
29 |
-
# test_logger.info(f"Fetching questions from: {QUESTIONS_URL}")
|
30 |
-
# try:
|
31 |
-
# response = requests.get(QUESTIONS_URL, timeout=15)
|
32 |
-
# response.raise_for_status()
|
33 |
-
# data = response.json()
|
34 |
-
# if not data:
|
35 |
-
# test_logger.error("Fetched questions list is empty.")
|
36 |
-
# return []
|
37 |
-
# test_logger.info(f"Fetched {len(data)} questions.")
|
38 |
-
# return data
|
39 |
-
# except requests.exceptions.RequestException as e:
|
40 |
-
# test_logger.error(f"Error fetching questions: {e}")
|
41 |
-
# return []
|
42 |
-
# except requests.exceptions.JSONDecodeError as e:
|
43 |
-
# test_logger.error(f"Error decoding JSON response from questions endpoint: {e}")
|
44 |
-
# return []
|
45 |
-
# except Exception as e:
|
46 |
-
# test_logger.error(f"An unexpected error occurred fetching questions: {e}")
|
47 |
-
# return []
|
48 |
-
#
|
49 |
-
# class TestAppQuestions:
|
50 |
-
# """Test cases for questions from the app."""
|
51 |
-
#
|
52 |
-
# def test_first_app_question(self, agent, questions_data):
|
53 |
-
# """Test the agent's response to the first app question."""
|
54 |
-
# if not questions_data:
|
55 |
-
# pytest.skip("No questions available from API")
|
56 |
-
#
|
57 |
-
# first_question = questions_data[0]
|
58 |
-
# question_text = first_question.get("question")
|
59 |
-
# task_id = first_question.get("task_id")
|
60 |
-
#
|
61 |
-
# if not question_text or not task_id:
|
62 |
-
# pytest.skip("First question is missing required fields")
|
63 |
-
#
|
64 |
-
# test_logger.info(f"Testing with app question: {question_text}")
|
65 |
-
#
|
66 |
-
# response = agent(question_text)
|
67 |
-
# test_logger.info(f"Agent response: {response}")
|
68 |
-
#
|
69 |
-
# # Check that the response contains the expected information
|
70 |
-
# assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa"
|
71 |
-
# assert "studio albums" in response.lower(), "Response should mention studio albums"
|
72 |
-
# assert "2000" in response and "2009" in response, "Response should mention the year range"
|
73 |
-
#
|
74 |
-
# # Verify that a number is mentioned (either as word or digit)
|
75 |
-
# import re
|
76 |
-
# number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b'
|
77 |
-
# has_number = bool(re.search(number_pattern, response.lower()))
|
78 |
-
# assert has_number, "Response should include the number of albums"
|
79 |
-
#
|
80 |
-
# # Check for album names in the response
|
81 |
-
# known_albums = [
|
82 |
-
# "Corazón Libre",
|
83 |
-
# "Cantora",
|
84 |
-
# "Hermano",
|
85 |
-
# "Acústico",
|
86 |
-
# "Argentina quiere cantar"
|
87 |
-
# ]
|
88 |
-
# found_albums = [album for album in known_albums if album in response]
|
89 |
-
# assert len(found_albums) > 0, "Response should mention at least some of the known albums"
|
90 |
-
#
|
91 |
-
# # Check for a structured response
|
92 |
-
# assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \
|
93 |
-
# "Response should list albums with years"
|
94 |
-
|
95 |
-
|
96 |
-
class TestBasicCodeAgentCapabilities:
|
97 |
-
"""Test basic capabilities of the code agent."""
|
98 |
-
|
99 |
-
def setup_method(self):
|
100 |
-
"""Setup method to initialize the agent before each test."""
|
101 |
-
test_logger.info("Creating AgentRunner instance")
|
102 |
-
self.agent = AgentRunner()
|
103 |
-
|
104 |
-
def test_simple_math_calculation_with_steps(self):
|
105 |
-
"""Test that the agent can perform basic math calculations and log steps."""
|
106 |
-
question = "What is the result of the following operation: 5 + 3 + 1294.678?"
|
107 |
-
test_logger.info(f"Testing math calculation with question: {question}")
|
108 |
-
|
109 |
-
# Run the agent and get the response
|
110 |
-
response = self.agent(question)
|
111 |
-
|
112 |
-
# Verify the response contains the correct result
|
113 |
-
expected_result = str(5 + 3 + 1294.678)
|
114 |
-
assert (
|
115 |
-
expected_result in response
|
116 |
-
), f"Response should contain the result {expected_result}"
|
117 |
-
|
118 |
-
# Verify step logs exist and have required fields
|
119 |
-
assert self.agent.last_state is not None, "Agent should store last state"
|
120 |
-
assert "step_logs" in self.agent.last_state, "State should contain step_logs"
|
121 |
-
assert (
|
122 |
-
len(self.agent.last_state["step_logs"]) > 0
|
123 |
-
), "Should have at least one step logged"
|
124 |
-
|
125 |
-
# Verify each step has required fields
|
126 |
-
for step in self.agent.last_state["step_logs"]:
|
127 |
-
assert "step_number" in step, "Each step should have a step_number"
|
128 |
-
assert any(
|
129 |
-
key in step for key in ["thought", "code", "observation"]
|
130 |
-
), "Each step should have at least one of: thought, code, or observation"
|
131 |
-
|
132 |
-
# Verify the final answer is indicated
|
133 |
-
assert (
|
134 |
-
"final_answer" in response.lower()
|
135 |
-
), "Response should indicate it's providing an answer"
|
136 |
-
|
137 |
-
def test_document_qa_and_image_generation_with_steps(self):
|
138 |
-
"""Test that the agent can search for information and generate images, with step logging."""
|
139 |
-
question = (
|
140 |
-
"Search for information about the Mona Lisa and generate an image of it."
|
141 |
-
)
|
142 |
-
test_logger.info(
|
143 |
-
f"Testing document QA and image generation with question: {question}"
|
144 |
-
)
|
145 |
-
|
146 |
-
# Run the agent and get the response
|
147 |
-
response = self.agent(question)
|
148 |
-
|
149 |
-
# Verify the response contains both search and image generation
|
150 |
-
assert "mona lisa" in response.lower(), "Response should mention Mona Lisa"
|
151 |
-
assert "image" in response.lower(), "Response should mention image generation"
|
152 |
-
|
153 |
-
# Verify step logs exist and show logical progression
|
154 |
-
assert self.agent.last_state is not None, "Agent should store last state"
|
155 |
-
assert "step_logs" in self.agent.last_state, "State should contain step_logs"
|
156 |
-
assert (
|
157 |
-
len(self.agent.last_state["step_logs"]) > 1
|
158 |
-
), "Should have multiple steps logged"
|
159 |
-
|
160 |
-
# Verify steps show logical progression
|
161 |
-
steps = self.agent.last_state["step_logs"]
|
162 |
-
search_steps = [step for step in steps if "search" in str(step).lower()]
|
163 |
-
image_steps = [step for step in steps if "image" in str(step).lower()]
|
164 |
-
|
165 |
-
assert len(search_steps) > 0, "Should have search steps"
|
166 |
-
assert len(image_steps) > 0, "Should have image generation steps"
|
167 |
-
|
168 |
-
# Verify each step has required fields
|
169 |
-
for step in steps:
|
170 |
-
assert "step_number" in step, "Each step should have a step_number"
|
171 |
-
assert any(
|
172 |
-
key in step for key in ["thought", "code", "observation"]
|
173 |
-
), "Each step should have at least one of: thought, code, or observation"
|
174 |
-
|
175 |
-
|
176 |
-
def test_simple_math_calculation_with_steps():
|
177 |
-
"""Test that the agent can perform a simple math calculation and verify intermediate steps."""
|
178 |
-
agent = AgentRunner()
|
179 |
-
question = "What is the result of the following operation: 5 + 3 + 1294.678?"
|
180 |
-
|
181 |
-
# Process the question
|
182 |
-
response = agent(question)
|
183 |
-
|
184 |
-
# Verify step logs exist and have required fields
|
185 |
-
assert agent.last_state is not None, "Last state should be stored"
|
186 |
-
step_logs = agent.last_state.get("step_logs", [])
|
187 |
-
assert len(step_logs) > 0, "Should have recorded step logs"
|
188 |
-
|
189 |
-
for step in step_logs:
|
190 |
-
assert "step_number" in step, "Each step should have a step number"
|
191 |
-
assert any(
|
192 |
-
key in step for key in ["thought", "code", "observation"]
|
193 |
-
), "Each step should have at least one of thought/code/observation"
|
194 |
-
|
195 |
-
# Verify final answer
|
196 |
-
expected_result = 1302.678
|
197 |
-
|
198 |
-
# Extract all numbers from the response
|
199 |
-
import re
|
200 |
-
|
201 |
-
# First check for LaTeX formatting
|
202 |
-
latex_match = re.search(r"\\boxed{([^}]+)}", response)
|
203 |
-
if latex_match:
|
204 |
-
# Extract number from LaTeX box
|
205 |
-
latex_content = latex_match.group(1)
|
206 |
-
numbers = re.findall(r"\d+\.?\d*", latex_content)
|
207 |
-
else:
|
208 |
-
# Extract all numbers from the response
|
209 |
-
numbers = re.findall(r"\d+\.?\d*", response)
|
210 |
-
|
211 |
-
assert numbers, "Response should contain at least one number"
|
212 |
-
|
213 |
-
# Check if any number matches the expected result
|
214 |
-
has_correct_result = any(abs(float(n) - expected_result) < 0.001 for n in numbers)
|
215 |
-
assert (
|
216 |
-
has_correct_result
|
217 |
-
), f"Response should contain the result {expected_result}, got {response}"
|
218 |
-
|
219 |
-
# Verify the response indicates it's a final answer
|
220 |
-
assert (
|
221 |
-
"final_answer" in response.lower()
|
222 |
-
), "Response should indicate it's using final_answer"
|
223 |
-
|
224 |
-
|
225 |
-
def test_document_qa_and_image_generation_with_steps():
|
226 |
-
"""Test document QA and image generation with step verification."""
|
227 |
-
agent = AgentRunner()
|
228 |
-
question = "Can you search for information about the Mona Lisa and generate an image inspired by it?"
|
229 |
-
|
230 |
-
# Process the question
|
231 |
-
response = agent(question)
|
232 |
-
|
233 |
-
# Verify step logs exist and demonstrate logical progression
|
234 |
-
assert agent.last_state is not None, "Last state should be stored"
|
235 |
-
step_logs = agent.last_state.get("step_logs", [])
|
236 |
-
assert len(step_logs) > 0, "Should have recorded step logs"
|
237 |
-
|
238 |
-
# Check for search and image generation steps
|
239 |
-
has_search_step = False
|
240 |
-
has_image_step = False
|
241 |
-
|
242 |
-
for step in step_logs:
|
243 |
-
assert "step_number" in step, "Each step should have a step number"
|
244 |
-
assert any(
|
245 |
-
key in step for key in ["thought", "code", "observation"]
|
246 |
-
), "Each step should have at least one of thought/code/observation"
|
247 |
-
|
248 |
-
# Look for search and image steps in thoughts or code
|
249 |
-
step_content = str(step.get("thought", "")) + str(step.get("code", ""))
|
250 |
-
if "search" in step_content.lower():
|
251 |
-
has_search_step = True
|
252 |
-
if "image" in step_content.lower() or "dalle" in step_content.lower():
|
253 |
-
has_image_step = True
|
254 |
-
|
255 |
-
assert has_search_step, "Should include a search step"
|
256 |
-
assert has_image_step, "Should include an image generation step"
|
257 |
-
assert (
|
258 |
-
"final_answer" in response.lower()
|
259 |
-
), "Response should indicate it's using final_answer"
|
260 |
-
|
261 |
-
|
262 |
-
if __name__ == "__main__":
|
263 |
-
pytest.main([__file__, "-s", "-v", "-x"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools.py
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
|
3 |
-
from smolagents import DuckDuckGoSearchTool, Tool, WikipediaSearchTool
|
4 |
-
|
5 |
-
logger = logging.getLogger(__name__)
|
6 |
-
|
7 |
-
|
8 |
-
class GeneralSearchTool(Tool):
|
9 |
-
name = "search"
|
10 |
-
description = """Performs a general web search using both DuckDuckGo and Wikipedia, then returns the combined search results."""
|
11 |
-
inputs = {
|
12 |
-
"query": {"type": "string", "description": "The search query to perform."}
|
13 |
-
}
|
14 |
-
output_type = "string"
|
15 |
-
|
16 |
-
def __init__(self, max_results=10, **kwargs):
|
17 |
-
super().__init__()
|
18 |
-
self.max_results = max_results
|
19 |
-
self.ddg_tool = DuckDuckGoSearchTool()
|
20 |
-
self.wiki_tool = WikipediaSearchTool()
|
21 |
-
|
22 |
-
def forward(self, query: str) -> str:
|
23 |
-
# Get DuckDuckGo results
|
24 |
-
try:
|
25 |
-
ddg_results = self.ddg_tool.forward(query)
|
26 |
-
except Exception as e:
|
27 |
-
ddg_results = "No DuckDuckGo results found."
|
28 |
-
logger.warning(f"DuckDuckGo search failed: {str(e)}")
|
29 |
-
|
30 |
-
# Get Wikipedia results
|
31 |
-
try:
|
32 |
-
wiki_results = self.wiki_tool.forward(query)
|
33 |
-
except Exception as e:
|
34 |
-
wiki_results = "No Wikipedia results found."
|
35 |
-
logger.warning(f"Wikipedia search failed: {str(e)}")
|
36 |
-
|
37 |
-
# Combine and format results
|
38 |
-
output = []
|
39 |
-
if ddg_results and ddg_results != "No DuckDuckGo results found.":
|
40 |
-
output.append("## DuckDuckGo Search Results\n\n" + ddg_results)
|
41 |
-
if wiki_results and wiki_results != "No Wikipedia results found.":
|
42 |
-
output.append("## Wikipedia Results\n\n" + wiki_results)
|
43 |
-
|
44 |
-
if not output:
|
45 |
-
raise Exception("No results found! Try a less restrictive/shorter query.")
|
46 |
-
|
47 |
-
return "\n\n---\n\n".join(output)
|
48 |
-
|
49 |
-
|
50 |
-
class MathTool(Tool):
|
51 |
-
name = "math"
|
52 |
-
description = """Performs mathematical calculations and returns the result."""
|
53 |
-
inputs = {
|
54 |
-
"expression": {
|
55 |
-
"type": "string",
|
56 |
-
"description": "The mathematical expression to evaluate.",
|
57 |
-
}
|
58 |
-
}
|
59 |
-
output_type = "string"
|
60 |
-
|
61 |
-
def forward(self, expression: str) -> str:
|
62 |
-
try:
|
63 |
-
# Use eval with a restricted set of builtins for safety
|
64 |
-
safe_dict = {
|
65 |
-
"__builtins__": {
|
66 |
-
"abs": abs,
|
67 |
-
"round": round,
|
68 |
-
"min": min,
|
69 |
-
"max": max,
|
70 |
-
"sum": sum,
|
71 |
-
}
|
72 |
-
}
|
73 |
-
result = eval(expression, safe_dict)
|
74 |
-
return str(result)
|
75 |
-
except Exception as e:
|
76 |
-
raise Exception(f"Error evaluating expression: {str(e)}")
|
77 |
-
|
78 |
-
|
79 |
-
# Export all tools
|
80 |
-
tools = [
|
81 |
-
# DuckDuckGoSearchTool(),
|
82 |
-
GeneralSearchTool(),
|
83 |
-
MathTool(),
|
84 |
-
# WikipediaSearchTool(),
|
85 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|