Spaces:
Sleeping
Sleeping
fixed tool calling bug; added no_think param and current_time to researcher; updated design diagram; no output guard for researcher
Browse files- agents.py +8 -0
- app.py +6 -10
- args.py +11 -1
- design.puml +7 -7
- design.yaml +7 -7
- graph.py +10 -1
- itf_agent.py +59 -1
- test.py +5 -3
agents.py
CHANGED
@@ -1,6 +1,8 @@
|
|
|
|
1 |
from args import Args
|
2 |
from itf_agent import IAgent
|
3 |
from toolbox import Toolbox
|
|
|
4 |
|
5 |
|
6 |
class Manager(IAgent):
|
@@ -48,6 +50,12 @@ class Researcher(IAgent):
|
|
48 |
]
|
49 |
super().__init__("05_researcher.txt", Args.PRIMARY_AGENT_PRESET, tools)
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
class Reasoner(IAgent):
|
53 |
"""
|
|
|
1 |
+
from typing import List
|
2 |
from args import Args
|
3 |
from itf_agent import IAgent
|
4 |
from toolbox import Toolbox
|
5 |
+
import datetime
|
6 |
|
7 |
|
8 |
class Manager(IAgent):
|
|
|
50 |
]
|
51 |
super().__init__("05_researcher.txt", Args.PRIMARY_AGENT_PRESET, tools)
|
52 |
|
53 |
+
def query(self, messages: List[str]) -> str:
|
54 |
+
last_message = messages[-1]
|
55 |
+
current_time = datetime.datetime.now().isoformat()
|
56 |
+
messages[-1] = f"Current time: {current_time}\n" + last_message
|
57 |
+
return super().query(messages)
|
58 |
+
|
59 |
|
60 |
class Reasoner(IAgent):
|
61 |
"""
|
app.py
CHANGED
@@ -4,12 +4,7 @@ import requests
|
|
4 |
import pandas as pd
|
5 |
|
6 |
from alfred import Alfred
|
7 |
-
|
8 |
-
|
9 |
-
# --- Constants ---
|
10 |
-
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
11 |
-
MOCK_SUBMISSION = True
|
12 |
-
QUESTIONS_LIMIT = 3 # Use 0 for no limit !
|
13 |
|
14 |
|
15 |
class Application:
|
@@ -32,7 +27,7 @@ class Application:
|
|
32 |
@staticmethod
|
33 |
def _get_runtime_and_repo_urls():
|
34 |
"""Determine HF Space Runtime URL and Repo URL"""
|
35 |
-
api_url = DEFAULT_API_URL
|
36 |
questions_url = f"{api_url}/questions"
|
37 |
submit_url = f"{api_url}/submit"
|
38 |
return questions_url, submit_url
|
@@ -88,8 +83,9 @@ class Application:
|
|
88 |
- results_log (list): Logs with "Task ID", "Question", and "Submitted Answer".
|
89 |
- answers_payload (list): Payload with "task_id" and "submitted_answer".
|
90 |
"""
|
91 |
-
|
92 |
-
|
|
|
93 |
|
94 |
results_log = []
|
95 |
answers_payload = []
|
@@ -128,7 +124,7 @@ class Application:
|
|
128 |
"""
|
129 |
print(f"Submitting {len(answers_payload)} answers to: {self.submit_url}")
|
130 |
try:
|
131 |
-
if MOCK_SUBMISSION:
|
132 |
app_username = self.username
|
133 |
class MockResponse:
|
134 |
status_code = 200
|
|
|
4 |
import pandas as pd
|
5 |
|
6 |
from alfred import Alfred
|
7 |
+
from args import Args
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
class Application:
|
|
|
27 |
@staticmethod
|
28 |
def _get_runtime_and_repo_urls():
|
29 |
"""Determine HF Space Runtime URL and Repo URL"""
|
30 |
+
api_url = Args.AppParams.DEFAULT_API_URL
|
31 |
questions_url = f"{api_url}/questions"
|
32 |
submit_url = f"{api_url}/submit"
|
33 |
return questions_url, submit_url
|
|
|
83 |
- results_log (list): Logs with "Task ID", "Question", and "Submitted Answer".
|
84 |
- answers_payload (list): Payload with "task_id" and "submitted_answer".
|
85 |
"""
|
86 |
+
questions_limit = Args.AppParams.QUESTIONS_LIMIT
|
87 |
+
if questions_limit > 0:
|
88 |
+
questions_data = questions_data[:questions_limit]
|
89 |
|
90 |
results_log = []
|
91 |
answers_payload = []
|
|
|
124 |
"""
|
125 |
print(f"Submitting {len(answers_payload)} answers to: {self.submit_url}")
|
126 |
try:
|
127 |
+
if Args.AppParams.MOCK_SUBMISSION:
|
128 |
app_username = self.username
|
129 |
class MockResponse:
|
130 |
status_code = 200
|
args.py
CHANGED
@@ -83,7 +83,7 @@ class Args:
|
|
83 |
primary_llm_interface=LLMInterface.OPENAI
|
84 |
# secondary_llm_interface=LLMInterface.HUGGINGFACE
|
85 |
vlm_interface=LLMInterface.OPENAI
|
86 |
-
primary_model="groot" if TEST_MODE else "
|
87 |
secondary_model="groot" if TEST_MODE else "qwen2.5-7b-instruct-1m"
|
88 |
vision_model="groot" if TEST_MODE else "qwen/qwen2.5-vl-7b"
|
89 |
api_base="http://127.0.0.1:1234/v1" # LM Studio local endpoint
|
@@ -102,6 +102,13 @@ class Args:
|
|
102 |
vlm_interface, vision_model,
|
103 |
temperature = None, max_tokens = 2048, repeat_penalty = None
|
104 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
class AlfredParams:
|
106 |
# Maximum number of interactions between Manager and Solver
|
107 |
MAX_INTERACTIONS = 6
|
@@ -109,3 +116,6 @@ class Args:
|
|
109 |
MAX_SOLVING_EFFORT = 6
|
110 |
# Verification happening every few messages to see whether the manager agent got stuck
|
111 |
AUDIT_INTERVAL = 3
|
|
|
|
|
|
|
|
83 |
primary_llm_interface=LLMInterface.OPENAI
|
84 |
# secondary_llm_interface=LLMInterface.HUGGINGFACE
|
85 |
vlm_interface=LLMInterface.OPENAI
|
86 |
+
primary_model="groot" if TEST_MODE else "qwen/qwen3-30b-a3b"
|
87 |
secondary_model="groot" if TEST_MODE else "qwen2.5-7b-instruct-1m"
|
88 |
vision_model="groot" if TEST_MODE else "qwen/qwen2.5-vl-7b"
|
89 |
api_base="http://127.0.0.1:1234/v1" # LM Studio local endpoint
|
|
|
102 |
vlm_interface, vision_model,
|
103 |
temperature = None, max_tokens = 2048, repeat_penalty = None
|
104 |
)
|
105 |
+
|
106 |
+
class AppParams:
|
107 |
+
# --- Constants ---
|
108 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
109 |
+
MOCK_SUBMISSION = True
|
110 |
+
QUESTIONS_LIMIT = 3 # Use 0 for no limit !
|
111 |
+
|
112 |
class AlfredParams:
|
113 |
# Maximum number of interactions between Manager and Solver
|
114 |
MAX_INTERACTIONS = 6
|
|
|
116 |
MAX_SOLVING_EFFORT = 6
|
117 |
# Verification happening every few messages to see whether the manager agent got stuck
|
118 |
AUDIT_INTERVAL = 3
|
119 |
+
|
120 |
+
class MiscParams:
|
121 |
+
NO_THINK = True
|
design.puml
CHANGED
@@ -8,31 +8,31 @@ node START TERMINAL_NODE_COLOR[
|
|
8 |
START
|
9 |
]
|
10 |
|
11 |
-
node manager
|
12 |
manager
|
13 |
]
|
14 |
|
15 |
-
node final_answer
|
16 |
final_answer
|
17 |
]
|
18 |
|
19 |
-
node auditor
|
20 |
auditor
|
21 |
]
|
22 |
|
23 |
-
node solver
|
24 |
solver
|
25 |
]
|
26 |
|
27 |
-
node researcher
|
28 |
researcher
|
29 |
]
|
30 |
|
31 |
-
node reasoner
|
32 |
reasoner
|
33 |
]
|
34 |
|
35 |
-
node viewer
|
36 |
viewer
|
37 |
]
|
38 |
|
|
|
8 |
START
|
9 |
]
|
10 |
|
11 |
+
node manager TESTED_NODE_COLOR[
|
12 |
manager
|
13 |
]
|
14 |
|
15 |
+
node final_answer TESTED_NODE_COLOR[
|
16 |
final_answer
|
17 |
]
|
18 |
|
19 |
+
node auditor TESTED_NODE_COLOR[
|
20 |
auditor
|
21 |
]
|
22 |
|
23 |
+
node solver TESTED_NODE_COLOR[
|
24 |
solver
|
25 |
]
|
26 |
|
27 |
+
node researcher TESTED_NODE_COLOR[
|
28 |
researcher
|
29 |
]
|
30 |
|
31 |
+
node reasoner TESTED_NODE_COLOR[
|
32 |
reasoner
|
33 |
]
|
34 |
|
35 |
+
node viewer IMPLEMENTED_NODE_COLOR[
|
36 |
viewer
|
37 |
]
|
38 |
|
design.yaml
CHANGED
@@ -8,34 +8,34 @@ nodes:
|
|
8 |
- name: manager
|
9 |
connections: [solver, auditor, final_answer]
|
10 |
description: Orchestrates the workflow by delegating tasks to specialized nodes and integrating their outputs
|
11 |
-
status:
|
12 |
|
13 |
- name: final_answer
|
14 |
connections: [END]
|
15 |
description: Formats and delivers the final response to the user
|
16 |
-
status:
|
17 |
|
18 |
- name: auditor
|
19 |
connections: [manager]
|
20 |
description: Reviews manager's outputs for accuracy, safety, and quality
|
21 |
-
status:
|
22 |
|
23 |
- name: solver
|
24 |
connections: [manager, researcher, reasoner, viewer]
|
25 |
description: Central problem-solving node that coordinates with specialized experts based on task requirements
|
26 |
-
status:
|
27 |
|
28 |
- name: researcher
|
29 |
connections: [solver]
|
30 |
description: Retrieves and synthesizes information from various sources to answer knowledge-based questions
|
31 |
-
status:
|
32 |
|
33 |
- name: reasoner
|
34 |
connections: [solver]
|
35 |
description: Performs logical reasoning, inference, and step-by-step problem-solving
|
36 |
-
status:
|
37 |
|
38 |
- name: viewer
|
39 |
connections: [solver]
|
40 |
description: Processes, analyzes, and generates vision related information
|
41 |
-
status:
|
|
|
8 |
- name: manager
|
9 |
connections: [solver, auditor, final_answer]
|
10 |
description: Orchestrates the workflow by delegating tasks to specialized nodes and integrating their outputs
|
11 |
+
status: TESTED
|
12 |
|
13 |
- name: final_answer
|
14 |
connections: [END]
|
15 |
description: Formats and delivers the final response to the user
|
16 |
+
status: TESTED
|
17 |
|
18 |
- name: auditor
|
19 |
connections: [manager]
|
20 |
description: Reviews manager's outputs for accuracy, safety, and quality
|
21 |
+
status: TESTED
|
22 |
|
23 |
- name: solver
|
24 |
connections: [manager, researcher, reasoner, viewer]
|
25 |
description: Central problem-solving node that coordinates with specialized experts based on task requirements
|
26 |
+
status: TESTED
|
27 |
|
28 |
- name: researcher
|
29 |
connections: [solver]
|
30 |
description: Retrieves and synthesizes information from various sources to answer knowledge-based questions
|
31 |
+
status: TESTED
|
32 |
|
33 |
- name: reasoner
|
34 |
connections: [solver]
|
35 |
description: Performs logical reasoning, inference, and step-by-step problem-solving
|
36 |
+
status: TESTED
|
37 |
|
38 |
- name: viewer
|
39 |
connections: [solver]
|
40 |
description: Processes, analyzes, and generates vision related information
|
41 |
+
status: IMPLEMENTED
|
graph.py
CHANGED
@@ -107,6 +107,14 @@ class Nodes:
|
|
107 |
instruction = "Formulate a definitive final answer in english. Be very concise and use no redundant words !"
|
108 |
state["messages"].append(instruction)
|
109 |
response = Agents.manager.query(state["messages"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
state["final_response"] = response
|
111 |
return state
|
112 |
|
@@ -140,7 +148,8 @@ class Nodes:
|
|
140 |
"""
|
141 |
Retrieves and synthesizes information from various sources to answer knowledge-based questions
|
142 |
"""
|
143 |
-
|
|
|
144 |
state["task_progress"].append(response)
|
145 |
return state
|
146 |
|
|
|
107 |
instruction = "Formulate a definitive final answer in english. Be very concise and use no redundant words !"
|
108 |
state["messages"].append(instruction)
|
109 |
response = Agents.manager.query(state["messages"])
|
110 |
+
|
111 |
+
# Post process the response
|
112 |
+
if "FINAL ANSWER:" in response:
|
113 |
+
response = response.split("FINAL ANSWER:", 1)[1]
|
114 |
+
if "</think>" in response:
|
115 |
+
response = response.split("</think>", 1)[1]
|
116 |
+
response = response.strip()
|
117 |
+
|
118 |
state["final_response"] = response
|
119 |
return state
|
120 |
|
|
|
148 |
"""
|
149 |
Retrieves and synthesizes information from various sources to answer knowledge-based questions
|
150 |
"""
|
151 |
+
# We do not use the output guard here as it might halucinate results if there are none.
|
152 |
+
response = Agents.researcher.query(state["task_progress"])
|
153 |
state["task_progress"].append(response)
|
154 |
return state
|
155 |
|
itf_agent.py
CHANGED
@@ -14,6 +14,7 @@ class IAgent():
|
|
14 |
self.name = self._format_name(sys_prompt_filename)
|
15 |
self.interface = agent_preset.get_interface()
|
16 |
self.mock = (agent_preset.get_model_name() == "groot")
|
|
|
17 |
|
18 |
# Load the system prompt from a file
|
19 |
system_prompt_path = os.path.join(os.getcwd(), "system_prompts", sys_prompt_filename)
|
@@ -75,6 +76,43 @@ class IAgent():
|
|
75 |
"""
|
76 |
return self.system_prompt
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
def query(self, messages: List[str]) -> str:
|
79 |
"""
|
80 |
Asynchronously queries the agent with a given question and returns the response.
|
@@ -98,9 +136,29 @@ class IAgent():
|
|
98 |
return response
|
99 |
|
100 |
system_prompt = self.get_system_prompt()
|
|
|
|
|
|
|
|
|
|
|
101 |
messages_with_roles = self._bake_roles(messages)
|
102 |
conversation = [SystemMessage(content=system_prompt)] + messages_with_roles
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
Args.LOGGER.log(logging.INFO, f"\nAgent '{self.name}' produced OUTPUT:\n{response}\n{separator}\n")
|
106 |
return response
|
|
|
14 |
self.name = self._format_name(sys_prompt_filename)
|
15 |
self.interface = agent_preset.get_interface()
|
16 |
self.mock = (agent_preset.get_model_name() == "groot")
|
17 |
+
self.tools = tools # <-- store tools for tool call execution
|
18 |
|
19 |
# Load the system prompt from a file
|
20 |
system_prompt_path = os.path.join(os.getcwd(), "system_prompts", sys_prompt_filename)
|
|
|
76 |
"""
|
77 |
return self.system_prompt
|
78 |
|
79 |
+
def _handle_tool_calls(self, tool_calls):
|
80 |
+
"""
|
81 |
+
Executes tool calls and returns their results as a string.
|
82 |
+
"""
|
83 |
+
tool_results = []
|
84 |
+
for call in tool_calls:
|
85 |
+
tool_name = None
|
86 |
+
tool_args = {}
|
87 |
+
# Qwen-style: call['function']['name'], call['function']['arguments']
|
88 |
+
if "function" in call:
|
89 |
+
tool_name = call["function"].get("name")
|
90 |
+
import json
|
91 |
+
try:
|
92 |
+
tool_args = json.loads(call["function"].get("arguments", "{}"))
|
93 |
+
except Exception:
|
94 |
+
tool_args = {}
|
95 |
+
# OpenAI-style: call['name'], call['args']
|
96 |
+
elif "name" in call and "args" in call:
|
97 |
+
tool_name = call["name"]
|
98 |
+
tool_args = call["args"]
|
99 |
+
tool = next((t for t in self.tools if getattr(t, "name", None) == tool_name), None)
|
100 |
+
if tool is not None:
|
101 |
+
try:
|
102 |
+
# Handle "__arg1" as positional argument for single-argument tools
|
103 |
+
if isinstance(tool_args, dict) and len(tool_args) == 1 and "__arg1" in tool_args:
|
104 |
+
result = tool.func(tool_args["__arg1"])
|
105 |
+
elif isinstance(tool_args, dict):
|
106 |
+
result = tool.func(**tool_args)
|
107 |
+
else:
|
108 |
+
result = tool.func(tool_args)
|
109 |
+
tool_results.append(f"[{tool_name}]: {result}")
|
110 |
+
except Exception as e:
|
111 |
+
tool_results.append(f"[{tool_name} ERROR]: {str(e)}")
|
112 |
+
else:
|
113 |
+
tool_results.append(f"[{tool_name} ERROR]: Tool not found")
|
114 |
+
return "\n".join(tool_results)
|
115 |
+
|
116 |
def query(self, messages: List[str]) -> str:
|
117 |
"""
|
118 |
Asynchronously queries the agent with a given question and returns the response.
|
|
|
136 |
return response
|
137 |
|
138 |
system_prompt = self.get_system_prompt()
|
139 |
+
|
140 |
+
# Disable thinking block for some models
|
141 |
+
if Args.MiscParams.NO_THINK:
|
142 |
+
messages[-1] += "\n/no_think"
|
143 |
+
|
144 |
messages_with_roles = self._bake_roles(messages)
|
145 |
conversation = [SystemMessage(content=system_prompt)] + messages_with_roles
|
146 |
+
|
147 |
+
raw_output = self.model.invoke(conversation)
|
148 |
+
|
149 |
+
# --- Unified output and tool call handling ---
|
150 |
+
response = ""
|
151 |
+
|
152 |
+
# 1. Handle tool calls if present
|
153 |
+
tool_calls = getattr(raw_output, "additional_kwargs", {}).get("tool_calls", None)
|
154 |
+
if tool_calls:
|
155 |
+
response = self._handle_tool_calls(tool_calls)
|
156 |
+
# 2. Otherwise, use standard LLM output if present
|
157 |
+
elif hasattr(raw_output, "content") and raw_output.content:
|
158 |
+
response = str(raw_output.content)
|
159 |
+
# 3. Fallback: string conversion
|
160 |
+
else:
|
161 |
+
response = str(raw_output)
|
162 |
|
163 |
Args.LOGGER.log(logging.INFO, f"\nAgent '{self.name}' produced OUTPUT:\n{response}\n{separator}\n")
|
164 |
return response
|
test.py
CHANGED
@@ -152,9 +152,9 @@ class TestAlfredAgent(unittest.TestCase):
|
|
152 |
"""
|
153 |
nodes = Nodes()
|
154 |
test_state: State = {
|
155 |
-
"initial_query": "What
|
156 |
-
"messages": ["What
|
157 |
-
"task_progress": ["
|
158 |
"audit_interval": 2,
|
159 |
"manager_queries": 1,
|
160 |
"solver_queries": 0,
|
@@ -379,4 +379,6 @@ class TestAlfredAgent(unittest.TestCase):
|
|
379 |
|
380 |
|
381 |
if __name__ == "__main__":
|
|
|
|
|
382 |
unittest.main()
|
|
|
152 |
"""
|
153 |
nodes = Nodes()
|
154 |
test_state: State = {
|
155 |
+
"initial_query": "What are the latest news headlines about artificial intelligence published this week?",
|
156 |
+
"messages": ["What are the latest news headlines about artificial intelligence published this week?"],
|
157 |
+
"task_progress": ["What are the latest news headlines about artificial intelligence published this week?"],
|
158 |
"audit_interval": 2,
|
159 |
"manager_queries": 1,
|
160 |
"solver_queries": 0,
|
|
|
379 |
|
380 |
|
381 |
if __name__ == "__main__":
|
382 |
+
# test = TestAlfredAgent()
|
383 |
+
# test.test_researcher_node()
|
384 |
unittest.main()
|