24Arys11 commited on
Commit
3d648f2
·
1 Parent(s): 4f53e02

fixed tool calling bug; added no_think param and current_time to researcher; updated design diagram; no output guard for researcher

Browse files
Files changed (8) hide show
  1. agents.py +8 -0
  2. app.py +6 -10
  3. args.py +11 -1
  4. design.puml +7 -7
  5. design.yaml +7 -7
  6. graph.py +10 -1
  7. itf_agent.py +59 -1
  8. test.py +5 -3
agents.py CHANGED
@@ -1,6 +1,8 @@
 
1
  from args import Args
2
  from itf_agent import IAgent
3
  from toolbox import Toolbox
 
4
 
5
 
6
  class Manager(IAgent):
@@ -48,6 +50,12 @@ class Researcher(IAgent):
48
  ]
49
  super().__init__("05_researcher.txt", Args.PRIMARY_AGENT_PRESET, tools)
50
 
 
 
 
 
 
 
51
 
52
  class Reasoner(IAgent):
53
  """
 
1
+ from typing import List
2
  from args import Args
3
  from itf_agent import IAgent
4
  from toolbox import Toolbox
5
+ import datetime
6
 
7
 
8
  class Manager(IAgent):
 
50
  ]
51
  super().__init__("05_researcher.txt", Args.PRIMARY_AGENT_PRESET, tools)
52
 
53
+ def query(self, messages: List[str]) -> str:
54
+ last_message = messages[-1]
55
+ current_time = datetime.datetime.now().isoformat()
56
+ messages[-1] = f"Current time: {current_time}\n" + last_message
57
+ return super().query(messages)
58
+
59
 
60
  class Reasoner(IAgent):
61
  """
app.py CHANGED
@@ -4,12 +4,7 @@ import requests
4
  import pandas as pd
5
 
6
  from alfred import Alfred
7
-
8
-
9
- # --- Constants ---
10
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
- MOCK_SUBMISSION = True
12
- QUESTIONS_LIMIT = 3 # Use 0 for no limit !
13
 
14
 
15
  class Application:
@@ -32,7 +27,7 @@ class Application:
32
  @staticmethod
33
  def _get_runtime_and_repo_urls():
34
  """Determine HF Space Runtime URL and Repo URL"""
35
- api_url = DEFAULT_API_URL
36
  questions_url = f"{api_url}/questions"
37
  submit_url = f"{api_url}/submit"
38
  return questions_url, submit_url
@@ -88,8 +83,9 @@ class Application:
88
  - results_log (list): Logs with "Task ID", "Question", and "Submitted Answer".
89
  - answers_payload (list): Payload with "task_id" and "submitted_answer".
90
  """
91
- if QUESTIONS_LIMIT > 0:
92
- questions_data = questions_data[:QUESTIONS_LIMIT]
 
93
 
94
  results_log = []
95
  answers_payload = []
@@ -128,7 +124,7 @@ class Application:
128
  """
129
  print(f"Submitting {len(answers_payload)} answers to: {self.submit_url}")
130
  try:
131
- if MOCK_SUBMISSION:
132
  app_username = self.username
133
  class MockResponse:
134
  status_code = 200
 
4
  import pandas as pd
5
 
6
  from alfred import Alfred
7
+ from args import Args
 
 
 
 
 
8
 
9
 
10
  class Application:
 
27
  @staticmethod
28
  def _get_runtime_and_repo_urls():
29
  """Determine HF Space Runtime URL and Repo URL"""
30
+ api_url = Args.AppParams.DEFAULT_API_URL
31
  questions_url = f"{api_url}/questions"
32
  submit_url = f"{api_url}/submit"
33
  return questions_url, submit_url
 
83
  - results_log (list): Logs with "Task ID", "Question", and "Submitted Answer".
84
  - answers_payload (list): Payload with "task_id" and "submitted_answer".
85
  """
86
+ questions_limit = Args.AppParams.QUESTIONS_LIMIT
87
+ if questions_limit > 0:
88
+ questions_data = questions_data[:questions_limit]
89
 
90
  results_log = []
91
  answers_payload = []
 
124
  """
125
  print(f"Submitting {len(answers_payload)} answers to: {self.submit_url}")
126
  try:
127
+ if Args.AppParams.MOCK_SUBMISSION:
128
  app_username = self.username
129
  class MockResponse:
130
  status_code = 200
args.py CHANGED
@@ -83,7 +83,7 @@ class Args:
83
  primary_llm_interface=LLMInterface.OPENAI
84
  # secondary_llm_interface=LLMInterface.HUGGINGFACE
85
  vlm_interface=LLMInterface.OPENAI
86
- primary_model="groot" if TEST_MODE else "qwen2.5-qwq-35b-eureka-cubed-abliterated-uncensored"
87
  secondary_model="groot" if TEST_MODE else "qwen2.5-7b-instruct-1m"
88
  vision_model="groot" if TEST_MODE else "qwen/qwen2.5-vl-7b"
89
  api_base="http://127.0.0.1:1234/v1" # LM Studio local endpoint
@@ -102,6 +102,13 @@ class Args:
102
  vlm_interface, vision_model,
103
  temperature = None, max_tokens = 2048, repeat_penalty = None
104
  )
 
 
 
 
 
 
 
105
  class AlfredParams:
106
  # Maximum number of interactions between Manager and Solver
107
  MAX_INTERACTIONS = 6
@@ -109,3 +116,6 @@ class Args:
109
  MAX_SOLVING_EFFORT = 6
110
  # Verification happening every few messages to see whether the manager agent got stuck
111
  AUDIT_INTERVAL = 3
 
 
 
 
83
  primary_llm_interface=LLMInterface.OPENAI
84
  # secondary_llm_interface=LLMInterface.HUGGINGFACE
85
  vlm_interface=LLMInterface.OPENAI
86
+ primary_model="groot" if TEST_MODE else "qwen/qwen3-30b-a3b"
87
  secondary_model="groot" if TEST_MODE else "qwen2.5-7b-instruct-1m"
88
  vision_model="groot" if TEST_MODE else "qwen/qwen2.5-vl-7b"
89
  api_base="http://127.0.0.1:1234/v1" # LM Studio local endpoint
 
102
  vlm_interface, vision_model,
103
  temperature = None, max_tokens = 2048, repeat_penalty = None
104
  )
105
+
106
+ class AppParams:
107
+ # --- Constants ---
108
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
109
+ MOCK_SUBMISSION = True
110
+ QUESTIONS_LIMIT = 3 # Use 0 for no limit !
111
+
112
  class AlfredParams:
113
  # Maximum number of interactions between Manager and Solver
114
  MAX_INTERACTIONS = 6
 
116
  MAX_SOLVING_EFFORT = 6
117
  # Verification happening every few messages to see whether the manager agent got stuck
118
  AUDIT_INTERVAL = 3
119
+
120
+ class MiscParams:
121
+ NO_THINK = True
design.puml CHANGED
@@ -8,31 +8,31 @@ node START TERMINAL_NODE_COLOR[
8
  START
9
  ]
10
 
11
- node manager IMPLEMENTED_NODE_COLOR[
12
  manager
13
  ]
14
 
15
- node final_answer IMPLEMENTED_NODE_COLOR[
16
  final_answer
17
  ]
18
 
19
- node auditor IMPLEMENTED_NODE_COLOR[
20
  auditor
21
  ]
22
 
23
- node solver IMPLEMENTED_NODE_COLOR[
24
  solver
25
  ]
26
 
27
- node researcher IMPLEMENTED_NODE_COLOR[
28
  researcher
29
  ]
30
 
31
- node reasoner IMPLEMENTED_NODE_COLOR[
32
  reasoner
33
  ]
34
 
35
- node viewer NOT_IMPLEMENTED_NODE_COLOR[
36
  viewer
37
  ]
38
 
 
8
  START
9
  ]
10
 
11
+ node manager TESTED_NODE_COLOR[
12
  manager
13
  ]
14
 
15
+ node final_answer TESTED_NODE_COLOR[
16
  final_answer
17
  ]
18
 
19
+ node auditor TESTED_NODE_COLOR[
20
  auditor
21
  ]
22
 
23
+ node solver TESTED_NODE_COLOR[
24
  solver
25
  ]
26
 
27
+ node researcher TESTED_NODE_COLOR[
28
  researcher
29
  ]
30
 
31
+ node reasoner TESTED_NODE_COLOR[
32
  reasoner
33
  ]
34
 
35
+ node viewer IMPLEMENTED_NODE_COLOR[
36
  viewer
37
  ]
38
 
design.yaml CHANGED
@@ -8,34 +8,34 @@ nodes:
8
  - name: manager
9
  connections: [solver, auditor, final_answer]
10
  description: Orchestrates the workflow by delegating tasks to specialized nodes and integrating their outputs
11
- status: IMPLEMENTED
12
 
13
  - name: final_answer
14
  connections: [END]
15
  description: Formats and delivers the final response to the user
16
- status: IMPLEMENTED
17
 
18
  - name: auditor
19
  connections: [manager]
20
  description: Reviews manager's outputs for accuracy, safety, and quality
21
- status: IMPLEMENTED
22
 
23
  - name: solver
24
  connections: [manager, researcher, reasoner, viewer]
25
  description: Central problem-solving node that coordinates with specialized experts based on task requirements
26
- status: IMPLEMENTED
27
 
28
  - name: researcher
29
  connections: [solver]
30
  description: Retrieves and synthesizes information from various sources to answer knowledge-based questions
31
- status: IMPLEMENTED
32
 
33
  - name: reasoner
34
  connections: [solver]
35
  description: Performs logical reasoning, inference, and step-by-step problem-solving
36
- status: IMPLEMENTED
37
 
38
  - name: viewer
39
  connections: [solver]
40
  description: Processes, analyzes, and generates vision related information
41
- status: NOT_IMPLEMENTED
 
8
  - name: manager
9
  connections: [solver, auditor, final_answer]
10
  description: Orchestrates the workflow by delegating tasks to specialized nodes and integrating their outputs
11
+ status: TESTED
12
 
13
  - name: final_answer
14
  connections: [END]
15
  description: Formats and delivers the final response to the user
16
+ status: TESTED
17
 
18
  - name: auditor
19
  connections: [manager]
20
  description: Reviews manager's outputs for accuracy, safety, and quality
21
+ status: TESTED
22
 
23
  - name: solver
24
  connections: [manager, researcher, reasoner, viewer]
25
  description: Central problem-solving node that coordinates with specialized experts based on task requirements
26
+ status: TESTED
27
 
28
  - name: researcher
29
  connections: [solver]
30
  description: Retrieves and synthesizes information from various sources to answer knowledge-based questions
31
+ status: TESTED
32
 
33
  - name: reasoner
34
  connections: [solver]
35
  description: Performs logical reasoning, inference, and step-by-step problem-solving
36
+ status: TESTED
37
 
38
  - name: viewer
39
  connections: [solver]
40
  description: Processes, analyzes, and generates vision related information
41
+ status: IMPLEMENTED
graph.py CHANGED
@@ -107,6 +107,14 @@ class Nodes:
107
  instruction = "Formulate a definitive final answer in english. Be very concise and use no redundant words !"
108
  state["messages"].append(instruction)
109
  response = Agents.manager.query(state["messages"])
 
 
 
 
 
 
 
 
110
  state["final_response"] = response
111
  return state
112
 
@@ -140,7 +148,8 @@ class Nodes:
140
  """
141
  Retrieves and synthesizes information from various sources to answer knowledge-based questions
142
  """
143
- response = Agents.guard_output(Agents.researcher, state["task_progress"])
 
144
  state["task_progress"].append(response)
145
  return state
146
 
 
107
  instruction = "Formulate a definitive final answer in english. Be very concise and use no redundant words !"
108
  state["messages"].append(instruction)
109
  response = Agents.manager.query(state["messages"])
110
+
111
+ # Post process the response
112
+ if "FINAL ANSWER:" in response:
113
+ response = response.split("FINAL ANSWER:", 1)[1]
114
+ if "</think>" in response:
115
+ response = response.split("</think>", 1)[1]
116
+ response = response.strip()
117
+
118
  state["final_response"] = response
119
  return state
120
 
 
148
  """
149
  Retrieves and synthesizes information from various sources to answer knowledge-based questions
150
  """
151
+ # We do not use the output guard here as it might halucinate results if there are none.
152
+ response = Agents.researcher.query(state["task_progress"])
153
  state["task_progress"].append(response)
154
  return state
155
 
itf_agent.py CHANGED
@@ -14,6 +14,7 @@ class IAgent():
14
  self.name = self._format_name(sys_prompt_filename)
15
  self.interface = agent_preset.get_interface()
16
  self.mock = (agent_preset.get_model_name() == "groot")
 
17
 
18
  # Load the system prompt from a file
19
  system_prompt_path = os.path.join(os.getcwd(), "system_prompts", sys_prompt_filename)
@@ -75,6 +76,43 @@ class IAgent():
75
  """
76
  return self.system_prompt
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def query(self, messages: List[str]) -> str:
79
  """
80
  Asynchronously queries the agent with a given question and returns the response.
@@ -98,9 +136,29 @@ class IAgent():
98
  return response
99
 
100
  system_prompt = self.get_system_prompt()
 
 
 
 
 
101
  messages_with_roles = self._bake_roles(messages)
102
  conversation = [SystemMessage(content=system_prompt)] + messages_with_roles
103
- response = str(self.model.invoke(conversation).content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  Args.LOGGER.log(logging.INFO, f"\nAgent '{self.name}' produced OUTPUT:\n{response}\n{separator}\n")
106
  return response
 
14
  self.name = self._format_name(sys_prompt_filename)
15
  self.interface = agent_preset.get_interface()
16
  self.mock = (agent_preset.get_model_name() == "groot")
17
+ self.tools = tools # <-- store tools for tool call execution
18
 
19
  # Load the system prompt from a file
20
  system_prompt_path = os.path.join(os.getcwd(), "system_prompts", sys_prompt_filename)
 
76
  """
77
  return self.system_prompt
78
 
79
+ def _handle_tool_calls(self, tool_calls):
80
+ """
81
+ Executes tool calls and returns their results as a string.
82
+ """
83
+ tool_results = []
84
+ for call in tool_calls:
85
+ tool_name = None
86
+ tool_args = {}
87
+ # Qwen-style: call['function']['name'], call['function']['arguments']
88
+ if "function" in call:
89
+ tool_name = call["function"].get("name")
90
+ import json
91
+ try:
92
+ tool_args = json.loads(call["function"].get("arguments", "{}"))
93
+ except Exception:
94
+ tool_args = {}
95
+ # OpenAI-style: call['name'], call['args']
96
+ elif "name" in call and "args" in call:
97
+ tool_name = call["name"]
98
+ tool_args = call["args"]
99
+ tool = next((t for t in self.tools if getattr(t, "name", None) == tool_name), None)
100
+ if tool is not None:
101
+ try:
102
+ # Handle "__arg1" as positional argument for single-argument tools
103
+ if isinstance(tool_args, dict) and len(tool_args) == 1 and "__arg1" in tool_args:
104
+ result = tool.func(tool_args["__arg1"])
105
+ elif isinstance(tool_args, dict):
106
+ result = tool.func(**tool_args)
107
+ else:
108
+ result = tool.func(tool_args)
109
+ tool_results.append(f"[{tool_name}]: {result}")
110
+ except Exception as e:
111
+ tool_results.append(f"[{tool_name} ERROR]: {str(e)}")
112
+ else:
113
+ tool_results.append(f"[{tool_name} ERROR]: Tool not found")
114
+ return "\n".join(tool_results)
115
+
116
  def query(self, messages: List[str]) -> str:
117
  """
118
  Asynchronously queries the agent with a given question and returns the response.
 
136
  return response
137
 
138
  system_prompt = self.get_system_prompt()
139
+
140
+ # Disable thinking block for some models
141
+ if Args.MiscParams.NO_THINK:
142
+ messages[-1] += "\n/no_think"
143
+
144
  messages_with_roles = self._bake_roles(messages)
145
  conversation = [SystemMessage(content=system_prompt)] + messages_with_roles
146
+
147
+ raw_output = self.model.invoke(conversation)
148
+
149
+ # --- Unified output and tool call handling ---
150
+ response = ""
151
+
152
+ # 1. Handle tool calls if present
153
+ tool_calls = getattr(raw_output, "additional_kwargs", {}).get("tool_calls", None)
154
+ if tool_calls:
155
+ response = self._handle_tool_calls(tool_calls)
156
+ # 2. Otherwise, use standard LLM output if present
157
+ elif hasattr(raw_output, "content") and raw_output.content:
158
+ response = str(raw_output.content)
159
+ # 3. Fallback: string conversion
160
+ else:
161
+ response = str(raw_output)
162
 
163
  Args.LOGGER.log(logging.INFO, f"\nAgent '{self.name}' produced OUTPUT:\n{response}\n{separator}\n")
164
  return response
test.py CHANGED
@@ -152,9 +152,9 @@ class TestAlfredAgent(unittest.TestCase):
152
  """
153
  nodes = Nodes()
154
  test_state: State = {
155
- "initial_query": "What is the capital of France?",
156
- "messages": ["What is the capital of France?"],
157
- "task_progress": ["Research: What is the capital of France?"],
158
  "audit_interval": 2,
159
  "manager_queries": 1,
160
  "solver_queries": 0,
@@ -379,4 +379,6 @@ class TestAlfredAgent(unittest.TestCase):
379
 
380
 
381
  if __name__ == "__main__":
 
 
382
  unittest.main()
 
152
  """
153
  nodes = Nodes()
154
  test_state: State = {
155
+ "initial_query": "What are the latest news headlines about artificial intelligence published this week?",
156
+ "messages": ["What are the latest news headlines about artificial intelligence published this week?"],
157
+ "task_progress": ["What are the latest news headlines about artificial intelligence published this week?"],
158
  "audit_interval": 2,
159
  "manager_queries": 1,
160
  "solver_queries": 0,
 
379
 
380
 
381
  if __name__ == "__main__":
382
+ # test = TestAlfredAgent()
383
+ # test.test_researcher_node()
384
  unittest.main()