naman1102 commited on
Commit
b9bb826
Β·
1 Parent(s): 793ee73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -26
app.py CHANGED
@@ -24,17 +24,18 @@ from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools,
24
 
25
  llm = ChatOpenAI(model_name="gpt-4o-mini")
26
 
 
27
  # ─── 1) plan_node ───
28
  def plan_node(state: AgentState) -> AgentState:
29
  """
30
  Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
31
  then decide if it's confident enough to stop or if it needs one tool.
32
- If confident: return {"final_answer": "<answer>"}
33
- Otherwise: return exactly one of
34
- {"wiki_query": "..."},
35
- {"ocr_path": "..."},
36
- {"excel_path": "...", "excel_sheet_name": "..."},
37
- {"audio_path": "..."}
38
  """
39
  prior_msgs = state.get("messages", [])
40
  user_input = ""
@@ -71,19 +72,18 @@ def plan_node(state: AgentState) -> AgentState:
71
  try:
72
  parsed = json.loads(llm_out)
73
  if isinstance(parsed, dict):
74
- # Build a clean partial state that only carries over messages and tool_counter
75
  partial: AgentState = {
76
  "messages": new_msgs,
77
- "tool_counter": state.get("tool_counter", 0)
78
  }
79
- # Only copy allowed keys (LMM won't know about web_search_query, so it won't appear)
80
  allowed_keys = {
81
  "final_answer",
82
  "wiki_query",
83
  "ocr_path",
84
  "excel_path",
85
  "excel_sheet_name",
86
- "audio_path"
87
  }
88
  for k, v in parsed.items():
89
  if k in allowed_keys:
@@ -92,10 +92,10 @@ def plan_node(state: AgentState) -> AgentState:
92
  except json.JSONDecodeError:
93
  pass
94
 
95
- # Fallback: treat as a final answer (no further tools)
96
  return {
97
  "messages": new_msgs,
98
- "final_answer": "Sorry, I could not parse your intent."
99
  }
100
 
101
 
@@ -112,7 +112,7 @@ def tool_node(state: AgentState) -> AgentState:
112
  - ocr_path β†’ ocr_image_tool
113
  - excel_path β†’ parse_excel_tool
114
  - audio_path β†’ audio_transcriber_tool
115
- - (web_search_query path kept but not exposed to LLM)
116
  """
117
  tool_counter = state.get("tool_counter", 0)
118
  if tool_counter > 5:
@@ -120,7 +120,6 @@ def tool_node(state: AgentState) -> AgentState:
120
  tool_counter += 1
121
  state["tool_counter"] = tool_counter
122
 
123
- # Only one of these keys should be present at a time
124
  if state.get("wiki_query"):
125
  return wikipedia_search_tool(state)
126
  if state.get("web_search_query"):
@@ -131,7 +130,8 @@ def tool_node(state: AgentState) -> AgentState:
131
  return parse_excel_tool(state)
132
  if state.get("audio_path"):
133
  return audio_transcriber_tool(state)
134
- return {}
 
135
 
136
 
137
  # ─── 4) merge_tool_output ───
@@ -142,10 +142,28 @@ def merge_tool_output(state: AgentState) -> AgentState:
142
  prev = state.get("prev_state", {}).copy()
143
 
144
  # Drop any lingering request keys so they don't persist
145
- for dead in ["wiki_query", "web_search_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
 
 
 
 
 
 
 
146
  prev.pop(dead, None)
147
 
148
  merged = {**prev, **state}
 
 
 
 
 
 
 
 
 
 
 
149
  merged.pop("prev_state", None)
150
  return merged
151
 
@@ -161,11 +179,14 @@ def inspect_node(state: AgentState) -> AgentState:
161
  β€’ Return {"final_answer":"<final>"} if done, OR
162
  β€’ Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
163
  """
 
164
  # 0) If we've already called tools too many times, force a final answer:
165
  if state.get("tool_counter", 0) >= 5:
166
  return {
167
  "messages": state["messages"],
168
- "final_answer": state.get("final_answer", "ERROR: no interim_answer to finalize.")
 
 
169
  }
170
 
171
  messages_for_llm = []
@@ -219,14 +240,25 @@ def inspect_node(state: AgentState) -> AgentState:
219
  if isinstance(parsed, dict):
220
  # If GPT gave a final_answer, we finish here
221
  if "final_answer" in parsed:
222
- return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
 
 
 
223
 
224
- # If GPT requested exactly one valid tool, we return only that key
225
- valid_keys = {"wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"}
 
 
 
 
 
 
226
  requested_keys = set(parsed.keys()) & valid_keys
227
  if len(requested_keys) == 1:
228
- # Build a fresh dict with only messages + that single tool key
229
- clean = {"messages": new_msgs}
 
 
230
  for k in requested_keys:
231
  clean[k] = parsed[k]
232
  return clean
@@ -238,7 +270,10 @@ def inspect_node(state: AgentState) -> AgentState:
238
  return {"messages": new_msgs, "final_answer": ia}
239
 
240
  # If there is no interim either, we cannot proceed
241
- return {"messages": new_msgs, "final_answer": "ERROR: could not parse inspect decision."}
 
 
 
242
 
243
 
244
  # ─── 6) finalize_node ───
@@ -274,7 +309,7 @@ def route_plan(plan_out: AgentState) -> str:
274
  graph.add_conditional_edges(
275
  "plan",
276
  route_plan,
277
- {"store_prev_state": "store_prev_state", "finalize": "finalize"}
278
  )
279
 
280
  # store_prev_state β†’ tools
@@ -295,7 +330,7 @@ def route_inspect(inspect_out: AgentState) -> str:
295
  graph.add_conditional_edges(
296
  "inspect",
297
  route_inspect,
298
- {"store_prev_state": "store_prev_state", "finalize": "finalize"}
299
  )
300
 
301
  # finalize β†’ END
@@ -327,7 +362,7 @@ def respond_to_input(user_input: str, task_id) -> str:
327
  initial_state: AgentState = {
328
  "messages": [system_msg, human_msg],
329
  "task_id": task_id,
330
- "tool_counter": 0
331
  }
332
  final_state = compiled_graph.invoke(initial_state)
333
  return final_state.get("final_answer", "Error: No final answer generated.")
 
24
 
25
  llm = ChatOpenAI(model_name="gpt-4o-mini")
26
 
27
+ # ─── 1) plan_node ───
28
  # ─── 1) plan_node ───
29
  def plan_node(state: AgentState) -> AgentState:
30
  """
31
  Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
32
  then decide if it's confident enough to stop or if it needs one tool.
33
+ If confident: return {"final_answer":"<answer>"}
34
+ Otherwise: return exactly one of
35
+ {"wiki_query":"..."},
36
+ {"ocr_path":"..."},
37
+ {"excel_path":"...", "excel_sheet_name":"..."},
38
+ {"audio_path":"..."}
39
  """
40
  prior_msgs = state.get("messages", [])
41
  user_input = ""
 
72
  try:
73
  parsed = json.loads(llm_out)
74
  if isinstance(parsed, dict):
75
+ # Build a fresh state that carries only messages + tool_counter
76
  partial: AgentState = {
77
  "messages": new_msgs,
78
+ "tool_counter": state.get("tool_counter", 0),
79
  }
 
80
  allowed_keys = {
81
  "final_answer",
82
  "wiki_query",
83
  "ocr_path",
84
  "excel_path",
85
  "excel_sheet_name",
86
+ "audio_path",
87
  }
88
  for k, v in parsed.items():
89
  if k in allowed_keys:
 
92
  except json.JSONDecodeError:
93
  pass
94
 
95
+ # Fallback: interpret as a final answer (no further tools)
96
  return {
97
  "messages": new_msgs,
98
+ "final_answer": "Sorry, I could not parse your intent.",
99
  }
100
 
101
 
 
112
  - ocr_path β†’ ocr_image_tool
113
  - excel_path β†’ parse_excel_tool
114
  - audio_path β†’ audio_transcriber_tool
115
+ - (web_search_query path is still here but not exposed to the LLM)
116
  """
117
  tool_counter = state.get("tool_counter", 0)
118
  if tool_counter > 5:
 
120
  tool_counter += 1
121
  state["tool_counter"] = tool_counter
122
 
 
123
  if state.get("wiki_query"):
124
  return wikipedia_search_tool(state)
125
  if state.get("web_search_query"):
 
130
  return parse_excel_tool(state)
131
  if state.get("audio_path"):
132
  return audio_transcriber_tool(state)
133
+
134
+ return {} # nothing to do
135
 
136
 
137
  # ─── 4) merge_tool_output ───
 
142
  prev = state.get("prev_state", {}).copy()
143
 
144
  # Drop any lingering request keys so they don't persist
145
+ for dead in [
146
+ "wiki_query",
147
+ "web_search_query",
148
+ "ocr_path",
149
+ "excel_path",
150
+ "excel_sheet_name",
151
+ "audio_path",
152
+ ]:
153
  prev.pop(dead, None)
154
 
155
  merged = {**prev, **state}
156
+ # Also drop them from the merged result
157
+ for dead in [
158
+ "wiki_query",
159
+ "web_search_query",
160
+ "ocr_path",
161
+ "excel_path",
162
+ "excel_sheet_name",
163
+ "audio_path",
164
+ ]:
165
+ merged.pop(dead, None)
166
+
167
  merged.pop("prev_state", None)
168
  return merged
169
 
 
179
  β€’ Return {"final_answer":"<final>"} if done, OR
180
  β€’ Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
181
  """
182
+
183
  # 0) If we've already called tools too many times, force a final answer:
184
  if state.get("tool_counter", 0) >= 5:
185
  return {
186
  "messages": state["messages"],
187
+ "final_answer": state.get(
188
+ "final_answer", "ERROR: no interim_answer to finalize."
189
+ ),
190
  }
191
 
192
  messages_for_llm = []
 
240
  if isinstance(parsed, dict):
241
  # If GPT gave a final_answer, we finish here
242
  if "final_answer" in parsed:
243
+ return {
244
+ "messages": new_msgs,
245
+ "final_answer": parsed["final_answer"],
246
+ }
247
 
248
+ # If GPT requested exactly one valid tool, return only that key + carry tool_counter
249
+ valid_keys = {
250
+ "wiki_query",
251
+ "ocr_path",
252
+ "excel_path",
253
+ "excel_sheet_name",
254
+ "audio_path",
255
+ }
256
  requested_keys = set(parsed.keys()) & valid_keys
257
  if len(requested_keys) == 1:
258
+ clean: AgentState = {
259
+ "messages": new_msgs,
260
+ "tool_counter": state.get("tool_counter", 0),
261
+ }
262
  for k in requested_keys:
263
  clean[k] = parsed[k]
264
  return clean
 
270
  return {"messages": new_msgs, "final_answer": ia}
271
 
272
  # If there is no interim either, we cannot proceed
273
+ return {
274
+ "messages": new_msgs,
275
+ "final_answer": "ERROR: could not parse inspect decision.",
276
+ }
277
 
278
 
279
  # ─── 6) finalize_node ───
 
309
  graph.add_conditional_edges(
310
  "plan",
311
  route_plan,
312
+ {"store_prev_state": "store_prev_state", "finalize": "finalize"},
313
  )
314
 
315
  # store_prev_state β†’ tools
 
330
  graph.add_conditional_edges(
331
  "inspect",
332
  route_inspect,
333
+ {"store_prev_state": "store_prev_state", "finalize": "finalize"},
334
  )
335
 
336
  # finalize β†’ END
 
362
  initial_state: AgentState = {
363
  "messages": [system_msg, human_msg],
364
  "task_id": task_id,
365
+ "tool_counter": 0,
366
  }
367
  final_state = compiled_graph.invoke(initial_state)
368
  return final_state.get("final_answer", "Error: No final answer generated.")