naman1102 commited on
Commit
0fed708
·
1 Parent(s): cf84beb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -68
app.py CHANGED
@@ -26,33 +26,26 @@ tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
26
  llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
27
 
28
  # agent = create_react_agent(model=llm, tools=tool_node)
 
 
29
  def plan_node(state: AgentState) -> AgentState:
30
  """
31
- `state["messages"]` must already end in a HumanMessage containing the user’s question.
32
- We inspect that last HumanMessage and ask the LLM to set exactly one key:
33
- web_search_query
34
- • ocr_path
35
- • excel_path (and excel_sheet_name)
36
- • final_answer
37
- The LLM must return a bare Python‐dict literal containing exactly that one key.
38
  """
39
- # 1) Grab prior BaseMessage list
40
  prior_msgs = state.get("messages", [])
41
-
42
- # 2) Extract the last HumanMessage content (the user question)
43
  user_input = ""
44
  for msg in reversed(prior_msgs):
45
  if isinstance(msg, HumanMessage):
46
  user_input = msg.content
47
  break
48
 
49
- # 3) Build new_history = copy of prior_msgs (it already contains that HumanMessage)
50
- new_history = prior_msgs.copy()
51
-
52
- # 4) Append a SystemMessage explaining how to return exactly one key
53
- explanation = SystemMessage(
54
  content=(
55
- "You can set exactly one of these keys in a Python dict (and nothing else):\n"
56
  " • web_search_query: <search terms>\n"
57
  " • ocr_path: <path to an image file>\n"
58
  " • excel_path: <path to a .xlsx file>\n"
@@ -62,17 +55,17 @@ def plan_node(state: AgentState) -> AgentState:
62
  "Respond with only that Python dict literal—no extra text or explanation."
63
  )
64
  )
 
65
 
66
- # 5) Call the LLM with [ all previous BaseMessages ] + explanation
67
- prompt_messages = new_history + [explanation]
68
- llm_response = llm(prompt_messages)
69
  llm_out = llm_response.content.strip()
70
 
71
- # 6) Try to parse the LLM output as a dict
72
  try:
73
  parsed = eval(llm_out, {}, {})
74
  if isinstance(parsed, dict):
75
- partial: AgentState = {"messages": new_history}
76
  allowed = {
77
  "web_search_query",
78
  "ocr_path",
@@ -87,49 +80,55 @@ def plan_node(state: AgentState) -> AgentState:
87
  except Exception:
88
  pass
89
 
90
- # 7) Fallback if parsing failed
91
  return {
92
- "messages": new_history,
93
  "final_answer": "Sorry, I could not parse your intent."
94
  }
95
 
96
- # ─── 3) Define finalize_node (only takes state) ───
 
97
  def finalize_node(state: AgentState) -> AgentState:
98
  """
99
- By this time:
100
- - state['messages'] is a list of BaseMessage (SystemMessage/HumanMessage/AIMessage).
101
- - Possibly state['web_search_result'] or state['ocr_result'] or state['excel_result'] is set.
102
- - Or state['final_answer'] is already set (if plan_node decided no tool was needed).
103
-
104
- We append any tool results as SystemMessages, then prompt the LLM for one final answer.
105
  """
106
- # 1) Copy the existing BaseMessage list
107
- history = state.get("messages", []).copy()
108
-
109
- # 2) Append each tool result as a SystemMessage, if present
110
- if "web_search_result" in state and state["web_search_result"] is not None:
111
- history.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}"))
112
- if "ocr_result" in state and state["ocr_result"] is not None:
113
- history.append(SystemMessage(content=f"OCR_RESULT: {state['ocr_result']}"))
114
- if "excel_result" in state and state["excel_result"] is not None:
115
- history.append(SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}"))
116
-
117
- # 3) If plan_node already set a final_answer, just return it directly
 
 
 
 
118
  if state.get("final_answer") is not None:
119
  return {"final_answer": state["final_answer"]}
120
 
121
- # 4) Otherwise, ask the LLM to produce the final answer
122
- history.append(SystemMessage(content="Please provide the final answer now."))
123
- llm_response = llm(history)
 
 
 
 
124
  return {"final_answer": llm_response.content.strip()}
125
 
126
- # ─── 4) Wrap the low‐level tool wrappers in a ToolNode ───
 
127
  tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
128
 
129
- # ─── 5) Build and compile the StateGraph ───
130
  graph = StateGraph(AgentState)
131
 
132
- # 5.a) Register each node
133
  graph.add_node("plan", plan_node)
134
  graph.add_node("tools", tool_node)
135
  graph.add_node("run_tools", run_tools)
@@ -138,7 +137,7 @@ graph.add_node("finalize", finalize_node)
138
  # 5.b) Wire START → plan
139
  graph.add_edge(START, "plan")
140
 
141
- # 5.c) plan → conditional: if any tool key is set, go to "tools"; otherwise "finalize"
142
  def route_plan(plan_out: AgentState) -> str:
143
  if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
144
  return "tools"
@@ -150,24 +149,24 @@ graph.add_conditional_edges(
150
  {"tools": "tools", "finalize": "finalize"}
151
  )
152
 
153
- # 5.d) Wire tools → run_tools
154
  graph.add_edge("tools", "run_tools")
155
 
156
- # 5.e) Wire run_tools → finalize
157
  graph.add_edge("run_tools", "finalize")
158
 
159
- # 5.f) Wire finalize → END
160
  graph.add_edge("finalize", END)
161
 
162
  compiled_graph = graph.compile()
163
 
164
- # ─── 6) Define respond_to_input ───
 
165
  def respond_to_input(user_input: str) -> str:
166
  """
167
- Start with a SystemMessage + HumanMessage; then let the graph run:
168
- plan_node tools run_tools finalize_node. Return final_answer.
169
  """
170
- # 1) SystemMessage describing the tools
171
  system_msg = SystemMessage(
172
  content=(
173
  "You have access to exactly these tools:\n"
@@ -183,24 +182,15 @@ def respond_to_input(user_input: str) -> str:
183
  "Respond with only that Python dict literal—no extra text."
184
  )
185
  )
186
- # 2) HumanMessage wrapping the user’s question
187
  human_msg = HumanMessage(content=user_input)
188
 
189
- # 3) Build initial_state so that "messages" = [system_msg, human_msg]
190
  initial_state: AgentState = {"messages": [system_msg, human_msg]}
191
-
192
- # 4) Invoke the graph (no second argument needed)
193
  final_state = compiled_graph.invoke(initial_state)
194
-
195
- # 5) Return the "final_answer" or a fallback
196
  return final_state.get("final_answer", "Error: No final answer generated.")
197
 
198
- # ─── 7) BasicAgent wrapper ───
199
- class BasicAgent:
200
- def __init__(self):
201
- print("BasicAgent initialized.")
202
- def __call__(self, question: str) -> str:
203
- return respond_to_input(question)
204
  class BasicAgent:
205
  def __init__(self):
206
  print("BasicAgent initialized.")
 
26
  llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
27
 
28
  # agent = create_react_agent(model=llm, tools=tool_node)
29
+
30
+ # ─── 2) Revised plan_node ───
31
  def plan_node(state: AgentState) -> AgentState:
32
  """
33
+ Look at the last HumanMessage in state['messages'] to get user_input.
34
+ Then call llm with exactly [SystemMessage, HumanMessage(user_input)] so
35
+ we never feed in a list lacking an AIMessage internally.
 
 
 
 
36
  """
37
+ # 1) Find the last HumanMessage from prior history
38
  prior_msgs = state.get("messages", [])
 
 
39
  user_input = ""
40
  for msg in reversed(prior_msgs):
41
  if isinstance(msg, HumanMessage):
42
  user_input = msg.content
43
  break
44
 
45
+ # 2) Build a fresh SystemMessage explaining exactly one dict key
46
+ system_msg = SystemMessage(
 
 
 
47
  content=(
48
+ "You can set exactly one of these keys in a Python dict and nothing else:\n"
49
  " • web_search_query: <search terms>\n"
50
  " • ocr_path: <path to an image file>\n"
51
  " • excel_path: <path to a .xlsx file>\n"
 
55
  "Respond with only that Python dict literal—no extra text or explanation."
56
  )
57
  )
58
+ human_msg = HumanMessage(content=user_input)
59
 
60
+ # 3) Call the LLM with a brand‐new list [system_msg, human_msg]
61
+ llm_response = llm([system_msg, human_msg])
 
62
  llm_out = llm_response.content.strip()
63
 
64
+ # 4) Try to parse as a Python dict
65
  try:
66
  parsed = eval(llm_out, {}, {})
67
  if isinstance(parsed, dict):
68
+ partial: AgentState = {"messages": prior_msgs.copy()}
69
  allowed = {
70
  "web_search_query",
71
  "ocr_path",
 
80
  except Exception:
81
  pass
82
 
83
+ # 5) Fallback
84
  return {
85
+ "messages": prior_msgs.copy(),
86
  "final_answer": "Sorry, I could not parse your intent."
87
  }
88
 
89
+
90
+ # ─── 3) Revised finalize_node ───
91
  def finalize_node(state: AgentState) -> AgentState:
92
  """
93
+ Collect any tool results from state and then ask the LLM for a final answer.
94
+ We build a fresh list of SystemMessages for tool results (no reuse of prior AIMessage).
 
 
 
 
95
  """
96
+ # 1) Create a list of SystemMessages for each available tool result
97
+ messages_for_llm = []
98
+ if state.get("web_search_result") is not None:
99
+ messages_for_llm.append(
100
+ SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}")
101
+ )
102
+ if state.get("ocr_result") is not None:
103
+ messages_for_llm.append(
104
+ SystemMessage(content=f"OCR_RESULT: {state['ocr_result']}")
105
+ )
106
+ if state.get("excel_result") is not None:
107
+ messages_for_llm.append(
108
+ SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}")
109
+ )
110
+
111
+ # 2) If plan_node already set final_answer, return it without calling LLM again
112
  if state.get("final_answer") is not None:
113
  return {"final_answer": state["final_answer"]}
114
 
115
+ # 3) Otherwise, append our “please give final answer” SystemMessage
116
+ messages_for_llm.append(
117
+ SystemMessage(content="Please provide the final answer now.")
118
+ )
119
+
120
+ # 4) Call the LLM with our fresh list of SystemMessages
121
+ llm_response = llm(messages_for_llm)
122
  return {"final_answer": llm_response.content.strip()}
123
 
124
+
125
+ # ─── 4) Wrap tools in a ToolNode ───
126
  tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
127
 
128
+ # ─── 5) Build the graph ───
129
  graph = StateGraph(AgentState)
130
 
131
+ # 5.a) Register nodes
132
  graph.add_node("plan", plan_node)
133
  graph.add_node("tools", tool_node)
134
  graph.add_node("run_tools", run_tools)
 
137
  # 5.b) Wire START → plan
138
  graph.add_edge(START, "plan")
139
 
140
+ # 5.c) plan → conditional: if any tool key was set, go to "tools"; otherwise "finalize"
141
  def route_plan(plan_out: AgentState) -> str:
142
  if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
143
  return "tools"
 
149
  {"tools": "tools", "finalize": "finalize"}
150
  )
151
 
152
+ # 5.d) tools → run_tools
153
  graph.add_edge("tools", "run_tools")
154
 
155
+ # 5.e) run_tools → finalize
156
  graph.add_edge("run_tools", "finalize")
157
 
158
+ # 5.f) finalize → END
159
  graph.add_edge("finalize", END)
160
 
161
  compiled_graph = graph.compile()
162
 
163
+
164
+ # ─── 6) respond_to_input ───
165
  def respond_to_input(user_input: str) -> str:
166
  """
167
+ Seed state['messages'] with a SystemMessage (tools description) + HumanMessage(user_input).
168
+ Then invoke the graph; return the final_answer from the resulting state.
169
  """
 
170
  system_msg = SystemMessage(
171
  content=(
172
  "You have access to exactly these tools:\n"
 
182
  "Respond with only that Python dict literal—no extra text."
183
  )
184
  )
 
185
  human_msg = HumanMessage(content=user_input)
186
 
 
187
  initial_state: AgentState = {"messages": [system_msg, human_msg]}
 
 
188
  final_state = compiled_graph.invoke(initial_state)
 
 
189
  return final_state.get("final_answer", "Error: No final answer generated.")
190
 
191
+
192
+
193
+
 
 
 
194
  class BasicAgent:
195
  def __init__(self):
196
  print("BasicAgent initialized.")