naman1102 commited on
Commit
84345bd
Β·
1 Parent(s): b9bb826

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -69
app.py CHANGED
@@ -25,16 +25,20 @@ from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools,
25
  llm = ChatOpenAI(model_name="gpt-4o-mini")
26
 
27
  # ─── 1) plan_node ───
 
 
 
 
28
  # ─── 1) plan_node ───
29
  def plan_node(state: AgentState) -> AgentState:
30
  """
31
  Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
32
  then decide if it's confident enough to stop or if it needs one tool.
33
  If confident: return {"final_answer":"<answer>"}
34
- Otherwise: return exactly one of
35
  {"wiki_query":"..."},
36
  {"ocr_path":"..."},
37
- {"excel_path":"...", "excel_sheet_name":"..."},
38
  {"audio_path":"..."}
39
  """
40
  prior_msgs = state.get("messages", [])
@@ -72,12 +76,8 @@ def plan_node(state: AgentState) -> AgentState:
72
  try:
73
  parsed = json.loads(llm_out)
74
  if isinstance(parsed, dict):
75
- # Build a fresh state that carries only messages + tool_counter
76
- partial: AgentState = {
77
- "messages": new_msgs,
78
- "tool_counter": state.get("tool_counter", 0),
79
- }
80
- allowed_keys = {
81
  "final_answer",
82
  "wiki_query",
83
  "ocr_path",
@@ -86,13 +86,12 @@ def plan_node(state: AgentState) -> AgentState:
86
  "audio_path",
87
  }
88
  for k, v in parsed.items():
89
- if k in allowed_keys:
90
  partial[k] = v
91
  return partial
92
  except json.JSONDecodeError:
93
  pass
94
 
95
- # Fallback: interpret as a final answer (no further tools)
96
  return {
97
  "messages": new_msgs,
98
  "final_answer": "Sorry, I could not parse your intent.",
@@ -109,21 +108,19 @@ def tool_node(state: AgentState) -> AgentState:
109
  """
110
  Dispatch exactly one tool based on which key was set:
111
  - wiki_query β†’ wikipedia_search_tool
112
- - ocr_path β†’ ocr_image_tool
113
  - excel_path β†’ parse_excel_tool
114
  - audio_path β†’ audio_transcriber_tool
115
- - (web_search_query path is still here but not exposed to the LLM)
116
  """
117
- tool_counter = state.get("tool_counter", 0)
118
- if tool_counter > 5:
 
119
  return {}
 
120
  tool_counter += 1
121
- state["tool_counter"] = tool_counter
122
 
123
  if state.get("wiki_query"):
124
  return wikipedia_search_tool(state)
125
- if state.get("web_search_query"):
126
- return web_search_tool(state)
127
  if state.get("ocr_path"):
128
  return ocr_image_tool(state)
129
  if state.get("excel_path"):
@@ -131,37 +128,23 @@ def tool_node(state: AgentState) -> AgentState:
131
  if state.get("audio_path"):
132
  return audio_transcriber_tool(state)
133
 
134
- return {} # nothing to do
135
 
136
 
137
  # ─── 4) merge_tool_output ───
138
  def merge_tool_output(state: AgentState) -> AgentState:
139
  """
140
- Combine previous state and tool output into one, but remove any stale tool-request keys.
141
  """
142
  prev = state.get("prev_state", {}).copy()
143
 
144
- # Drop any lingering request keys so they don't persist
145
- for dead in [
146
- "wiki_query",
147
- "web_search_query",
148
- "ocr_path",
149
- "excel_path",
150
- "excel_sheet_name",
151
- "audio_path",
152
- ]:
153
  prev.pop(dead, None)
154
 
155
  merged = {**prev, **state}
156
- # Also drop them from the merged result
157
- for dead in [
158
- "wiki_query",
159
- "web_search_query",
160
- "ocr_path",
161
- "excel_path",
162
- "excel_sheet_name",
163
- "audio_path",
164
- ]:
165
  merged.pop(dead, None)
166
 
167
  merged.pop("prev_state", None)
@@ -179,14 +162,13 @@ def inspect_node(state: AgentState) -> AgentState:
179
  β€’ Return {"final_answer":"<final>"} if done, OR
180
  β€’ Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
181
  """
 
182
 
183
  # 0) If we've already called tools too many times, force a final answer:
184
- if state.get("tool_counter", 0) >= 5:
185
  return {
186
  "messages": state["messages"],
187
- "final_answer": state.get(
188
- "final_answer", "ERROR: no interim_answer to finalize."
189
- ),
190
  }
191
 
192
  messages_for_llm = []
@@ -240,25 +222,14 @@ def inspect_node(state: AgentState) -> AgentState:
240
  if isinstance(parsed, dict):
241
  # If GPT gave a final_answer, we finish here
242
  if "final_answer" in parsed:
243
- return {
244
- "messages": new_msgs,
245
- "final_answer": parsed["final_answer"],
246
- }
247
 
248
- # If GPT requested exactly one valid tool, return only that key + carry tool_counter
249
- valid_keys = {
250
- "wiki_query",
251
- "ocr_path",
252
- "excel_path",
253
- "excel_sheet_name",
254
- "audio_path",
255
- }
256
  requested_keys = set(parsed.keys()) & valid_keys
257
  if len(requested_keys) == 1:
258
- clean: AgentState = {
259
- "messages": new_msgs,
260
- "tool_counter": state.get("tool_counter", 0),
261
- }
262
  for k in requested_keys:
263
  clean[k] = parsed[k]
264
  return clean
@@ -270,10 +241,7 @@ def inspect_node(state: AgentState) -> AgentState:
270
  return {"messages": new_msgs, "final_answer": ia}
271
 
272
  # If there is no interim either, we cannot proceed
273
- return {
274
- "messages": new_msgs,
275
- "final_answer": "ERROR: could not parse inspect decision.",
276
- }
277
 
278
 
279
  # ─── 6) finalize_node ───
@@ -342,9 +310,12 @@ compiled_graph = graph.compile()
342
  # ─── 8) respond_to_input ───
343
  def respond_to_input(user_input: str, task_id) -> str:
344
  """
345
- Seed state['messages'] with a SystemMessage + HumanMessage(user_input),
346
- then invoke the cyclic graph. Return the final_answer from the resulting state.
347
  """
 
 
 
348
  system_msg = SystemMessage(
349
  content=(
350
  "You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
@@ -359,15 +330,10 @@ def respond_to_input(user_input: str, task_id) -> str:
359
  )
360
  human_msg = HumanMessage(content=user_input)
361
 
362
- initial_state: AgentState = {
363
- "messages": [system_msg, human_msg],
364
- "task_id": task_id,
365
- "tool_counter": 0,
366
- }
367
  final_state = compiled_graph.invoke(initial_state)
368
  return final_state.get("final_answer", "Error: No final answer generated.")
369
 
370
-
371
  class BasicAgent:
372
  def __init__(self):
373
  print("BasicAgent initialized.")
 
25
  llm = ChatOpenAI(model_name="gpt-4o-mini")
26
 
27
  # ─── 1) plan_node ───
28
+ # ─── 1) plan_node ───
29
+ tool_counter = 0
30
+
31
+
32
  # ─── 1) plan_node ───
33
  def plan_node(state: AgentState) -> AgentState:
34
  """
35
  Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
36
  then decide if it's confident enough to stop or if it needs one tool.
37
  If confident: return {"final_answer":"<answer>"}
38
+ Otherwise: return exactly one of:
39
  {"wiki_query":"..."},
40
  {"ocr_path":"..."},
41
+ {"excel_path":"...","excel_sheet_name":"..."},
42
  {"audio_path":"..."}
43
  """
44
  prior_msgs = state.get("messages", [])
 
76
  try:
77
  parsed = json.loads(llm_out)
78
  if isinstance(parsed, dict):
79
+ partial: AgentState = {"messages": new_msgs}
80
+ allowed = {
 
 
 
 
81
  "final_answer",
82
  "wiki_query",
83
  "ocr_path",
 
86
  "audio_path",
87
  }
88
  for k, v in parsed.items():
89
+ if k in allowed:
90
  partial[k] = v
91
  return partial
92
  except json.JSONDecodeError:
93
  pass
94
 
 
95
  return {
96
  "messages": new_msgs,
97
  "final_answer": "Sorry, I could not parse your intent.",
 
108
  """
109
  Dispatch exactly one tool based on which key was set:
110
  - wiki_query β†’ wikipedia_search_tool
111
+ - ocr_path β†’ ocr_image_tool
112
  - excel_path β†’ parse_excel_tool
113
  - audio_path β†’ audio_transcriber_tool
 
114
  """
115
+ global tool_counter
116
+ if tool_counter >= 5:
117
+ # If we've already run 5 tools, do nothing
118
  return {}
119
+
120
  tool_counter += 1
 
121
 
122
  if state.get("wiki_query"):
123
  return wikipedia_search_tool(state)
 
 
124
  if state.get("ocr_path"):
125
  return ocr_image_tool(state)
126
  if state.get("excel_path"):
 
128
  if state.get("audio_path"):
129
  return audio_transcriber_tool(state)
130
 
131
+ return {} # no tool key present
132
 
133
 
134
  # ─── 4) merge_tool_output ───
135
  def merge_tool_output(state: AgentState) -> AgentState:
136
  """
137
+ Combine previous state and tool output into one, but remove any stale request-keys.
138
  """
139
  prev = state.get("prev_state", {}).copy()
140
 
141
+ # Drop stale request-keys in prev
142
+ for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
 
 
 
 
 
 
 
143
  prev.pop(dead, None)
144
 
145
  merged = {**prev, **state}
146
+ # Drop them again from merged so they don't persist into the next cycle
147
+ for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
 
 
 
 
 
 
 
148
  merged.pop(dead, None)
149
 
150
  merged.pop("prev_state", None)
 
162
  β€’ Return {"final_answer":"<final>"} if done, OR
163
  β€’ Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
164
  """
165
+ global tool_counter
166
 
167
  # 0) If we've already called tools too many times, force a final answer:
168
+ if tool_counter >= 5:
169
  return {
170
  "messages": state["messages"],
171
+ "final_answer": state.get("final_answer", "ERROR: no interim_answer to finalize."),
 
 
172
  }
173
 
174
  messages_for_llm = []
 
222
  if isinstance(parsed, dict):
223
  # If GPT gave a final_answer, we finish here
224
  if "final_answer" in parsed:
225
+ return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
 
 
 
226
 
227
+ # If GPT requested exactly one valid tool, return only that key
228
+ valid_keys = {"wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"}
 
 
 
 
 
 
229
  requested_keys = set(parsed.keys()) & valid_keys
230
  if len(requested_keys) == 1:
231
+ clean: AgentState = {"messages": new_msgs}
232
+ # Carry forward the global tool_counter implicitly (no need to store in state)
 
 
233
  for k in requested_keys:
234
  clean[k] = parsed[k]
235
  return clean
 
241
  return {"messages": new_msgs, "final_answer": ia}
242
 
243
  # If there is no interim either, we cannot proceed
244
+ return {"messages": new_msgs, "final_answer": "ERROR: could not parse inspect decision."}
 
 
 
245
 
246
 
247
  # ─── 6) finalize_node ───
 
310
  # ─── 8) respond_to_input ───
311
  def respond_to_input(user_input: str, task_id) -> str:
312
  """
313
+ Reset the global tool_counter, seed state['messages'], invoke the graph,
314
+ and return the final_answer.
315
  """
316
+ global tool_counter
317
+ tool_counter = 0 # Reset on every new user query
318
+
319
  system_msg = SystemMessage(
320
  content=(
321
  "You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
 
330
  )
331
  human_msg = HumanMessage(content=user_input)
332
 
333
+ initial_state: AgentState = {"messages": [system_msg, human_msg], "task_id": task_id}
 
 
 
 
334
  final_state = compiled_graph.invoke(initial_state)
335
  return final_state.get("final_answer", "Error: No final answer generated.")
336
 
 
337
  class BasicAgent:
338
  def __init__(self):
339
  print("BasicAgent initialized.")