naman1102 commited on
Commit
cf84beb
·
1 Parent(s): 3a03273

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -61
app.py CHANGED
@@ -26,49 +26,49 @@ tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
26
  llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
27
 
28
  # agent = create_react_agent(model=llm, tools=tool_node)
29
-
30
- # ─── Revised plan_node with NO extra arguments ───
31
  def plan_node(state: AgentState) -> AgentState:
32
  """
33
- Assumes that `state["messages"]` already ends with a HumanMessage of the user’s question.
34
- We look at that last HumanMessage, append it to our new history, and ask the LLM
35
- to set exactly one key in a Python dict: web_search_query, ocr_path,
36
- excel_path (+ excel_sheet_name), or final_answer.
 
 
 
37
  """
38
- # 1) Grab all prior BaseMessage objects (SystemMessage/HumanMessage/AIMessage) from state
39
  prior_msgs = state.get("messages", [])
40
 
41
- # 2) Find the very last HumanMessage (the user_input). We assume the last message is one.
42
- # If there is no HumanMessage, we treat user_input as empty.
43
  user_input = ""
44
  for msg in reversed(prior_msgs):
45
  if isinstance(msg, HumanMessage):
46
  user_input = msg.content
47
  break
48
 
49
- # 3) Build our new chat history by re‐using prior_msgs. It already includes that HumanMessage.
50
  new_history = prior_msgs.copy()
51
 
52
- # 4) Add a SystemMessage that instructs the LLM how to choose exactly one key
53
  explanation = SystemMessage(
54
  content=(
55
- "You can set exactly one of the following keys in a Python dict, and nothing else:\n"
56
- " • web_search_query: <search terms> \n"
57
- " • ocr_path: <path to an image file> \n"
58
- " • excel_path: <path to a .xlsx file> \n"
59
- " • excel_sheet_name: <sheet name> \n"
60
  "Or, if no tool is needed, set final_answer: <your answer>.\n"
61
  "Example: {'web_search_query':'Mercedes Sosa discography'}\n"
62
  "Respond with only that Python dict literal—no extra text or explanation."
63
  )
64
  )
65
 
66
- # 5) Compose the prompt as a list of BaseMessage, then call the LLM
67
  prompt_messages = new_history + [explanation]
68
  llm_response = llm(prompt_messages)
69
  llm_out = llm_response.content.strip()
70
 
71
- # 6) Parse the LLM’s output as a dict
72
  try:
73
  parsed = eval(llm_out, {}, {})
74
  if isinstance(parsed, dict):
@@ -93,18 +93,20 @@ def plan_node(state: AgentState) -> AgentState:
93
  "final_answer": "Sorry, I could not parse your intent."
94
  }
95
 
96
-
97
- # ─── Revised finalize_node with NO extra arguments ───
98
  def finalize_node(state: AgentState) -> AgentState:
99
  """
100
- Assumes that `state['messages']` is a list of BaseMessage, possibly ending in an AIMessage
101
- (or plan_node may have set final_answer directly). We append any tool results
102
- as SystemMessages, then prompt the LLM for one final answer.
 
 
 
103
  """
104
  # 1) Copy the existing BaseMessage list
105
  history = state.get("messages", []).copy()
106
 
107
- # 2) If any tool-result fields exist, append them as SystemMessages
108
  if "web_search_result" in state and state["web_search_result"] is not None:
109
  history.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}"))
110
  if "ocr_result" in state and state["ocr_result"] is not None:
@@ -112,39 +114,32 @@ def finalize_node(state: AgentState) -> AgentState:
112
  if "excel_result" in state and state["excel_result"] is not None:
113
  history.append(SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}"))
114
 
115
- # 3) If plan_node already set final_answer, just return it:
116
  if state.get("final_answer") is not None:
117
  return {"final_answer": state["final_answer"]}
118
 
119
- # 4) Otherwise, ask the LLM to give the final answer now
120
  history.append(SystemMessage(content="Please provide the final answer now."))
121
  llm_response = llm(history)
122
  return {"final_answer": llm_response.content.strip()}
123
 
 
124
  tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
125
 
126
- # ─── 5) Build the StateGraph ───
127
  graph = StateGraph(AgentState)
128
 
129
- # 5.a) Register nodes
130
  graph.add_node("plan", plan_node)
131
  graph.add_node("tools", tool_node)
132
  graph.add_node("run_tools", run_tools)
133
  graph.add_node("finalize", finalize_node)
134
 
135
- # 5.b) START → plan
136
  graph.add_edge(START, "plan")
137
 
138
-
139
-
140
-
141
-
142
-
143
  def route_plan(plan_out: AgentState) -> str:
144
- """
145
- plan_out is exactly what plan_node returned (a partial AgentState).
146
- If it set any of the tool-request keys, route to 'tools'; otherwise 'finalize'.
147
- """
148
  if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
149
  return "tools"
150
  return "finalize"
@@ -155,57 +150,57 @@ graph.add_conditional_edges(
155
  {"tools": "tools", "finalize": "finalize"}
156
  )
157
 
158
-
159
-
160
  graph.add_edge("tools", "run_tools")
161
 
162
- # 5.e) run_tools → finalize
163
  graph.add_edge("run_tools", "finalize")
164
 
165
- # 5.f) finalize → END
166
  graph.add_edge("finalize", END)
167
 
168
  compiled_graph = graph.compile()
169
 
 
170
  def respond_to_input(user_input: str) -> str:
171
  """
172
- Initialize with a SystemMessage (tools description) and the user’s question as a HumanMessage.
173
- Then run through plan → tools → run_tools → finalize. Return the "final_answer" from final_state.
174
  """
175
- # 1) Create a SystemMessage that tells the agent about its tools
176
  system_msg = SystemMessage(
177
  content=(
178
  "You have access to exactly these tools:\n"
179
- " 1) web_search(query:str) → Returns the top search results for the query.\n"
180
- " 2) parse_excel(path:str, sheet_name:str) → Reads an Excel file and returns its contents.\n"
181
- " 3) ocr_image(path:str) → Runs OCR on an image and returns any detected text.\n\n"
182
- "If you need a tool, set exactly one of these keys in a Pythondict:\n"
183
  " • web_search_query: <search terms>\n"
184
  " • ocr_path: <path to image>\n"
185
  " • excel_path: <path to xlsx>\n"
186
  " • excel_sheet_name: <sheet name>\n"
187
  "Otherwise, set final_answer: <your answer>.\n"
188
- "Respond with that Python dict literal—no extra text or explanation."
189
  )
190
  )
191
-
192
- # 2) Wrap the user_input in a HumanMessage
193
  human_msg = HumanMessage(content=user_input)
194
 
195
- # 3) Build the initial state so that "messages" contains both messages
196
- initial_state: AgentState = {
197
- "messages": [system_msg, human_msg],
198
- "user_input": user_input
199
- }
200
 
201
- # 4) Invoke the compiled graph (no second argument needed)
202
  final_state = compiled_graph.invoke(initial_state)
203
 
204
- # 5) Return the final answer (or a fallback if missing)
205
  return final_state.get("final_answer", "Error: No final answer generated.")
206
 
207
-
208
-
 
 
 
 
209
  class BasicAgent:
210
  def __init__(self):
211
  print("BasicAgent initialized.")
 
26
  llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
27
 
28
  # agent = create_react_agent(model=llm, tools=tool_node)
 
 
29
  def plan_node(state: AgentState) -> AgentState:
30
  """
31
+ `state["messages"]` must already end in a HumanMessage containing the user’s question.
32
+ We inspect that last HumanMessage and ask the LLM to set exactly one key:
33
+ web_search_query
34
+ ocr_path
35
+ • excel_path (and excel_sheet_name)
36
+ • final_answer
37
+ The LLM must return a bare Python‐dict literal containing exactly that one key.
38
  """
39
+ # 1) Grab prior BaseMessage list
40
  prior_msgs = state.get("messages", [])
41
 
42
+ # 2) Extract the last HumanMessage content (the user question)
 
43
  user_input = ""
44
  for msg in reversed(prior_msgs):
45
  if isinstance(msg, HumanMessage):
46
  user_input = msg.content
47
  break
48
 
49
+ # 3) Build new_history = copy of prior_msgs (it already contains that HumanMessage)
50
  new_history = prior_msgs.copy()
51
 
52
+ # 4) Append a SystemMessage explaining how to return exactly one key
53
  explanation = SystemMessage(
54
  content=(
55
+ "You can set exactly one of these keys in a Python dict (and nothing else):\n"
56
+ " • web_search_query: <search terms>\n"
57
+ " • ocr_path: <path to an image file>\n"
58
+ " • excel_path: <path to a .xlsx file>\n"
59
+ " • excel_sheet_name: <sheet name>\n"
60
  "Or, if no tool is needed, set final_answer: <your answer>.\n"
61
  "Example: {'web_search_query':'Mercedes Sosa discography'}\n"
62
  "Respond with only that Python dict literal—no extra text or explanation."
63
  )
64
  )
65
 
66
+ # 5) Call the LLM with [ all previous BaseMessages ] + explanation
67
  prompt_messages = new_history + [explanation]
68
  llm_response = llm(prompt_messages)
69
  llm_out = llm_response.content.strip()
70
 
71
+ # 6) Try to parse the LLM output as a dict
72
  try:
73
  parsed = eval(llm_out, {}, {})
74
  if isinstance(parsed, dict):
 
93
  "final_answer": "Sorry, I could not parse your intent."
94
  }
95
 
96
+ # ─── 3) Define finalize_node (only takes state) ───
 
97
  def finalize_node(state: AgentState) -> AgentState:
98
  """
99
+ By this time:
100
+ - state['messages'] is a list of BaseMessage (SystemMessage/HumanMessage/AIMessage).
101
+ - Possibly state['web_search_result'] or state['ocr_result'] or state['excel_result'] is set.
102
+ - Or state['final_answer'] is already set (if plan_node decided no tool was needed).
103
+
104
+ We append any tool results as SystemMessages, then prompt the LLM for one final answer.
105
  """
106
  # 1) Copy the existing BaseMessage list
107
  history = state.get("messages", []).copy()
108
 
109
+ # 2) Append each tool result as a SystemMessage, if present
110
  if "web_search_result" in state and state["web_search_result"] is not None:
111
  history.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}"))
112
  if "ocr_result" in state and state["ocr_result"] is not None:
 
114
  if "excel_result" in state and state["excel_result"] is not None:
115
  history.append(SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}"))
116
 
117
+ # 3) If plan_node already set a final_answer, just return it directly
118
  if state.get("final_answer") is not None:
119
  return {"final_answer": state["final_answer"]}
120
 
121
+ # 4) Otherwise, ask the LLM to produce the final answer
122
  history.append(SystemMessage(content="Please provide the final answer now."))
123
  llm_response = llm(history)
124
  return {"final_answer": llm_response.content.strip()}
125
 
126
+ # ─── 4) Wrap the low‐level tool wrappers in a ToolNode ───
127
  tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
128
 
129
+ # ─── 5) Build and compile the StateGraph ───
130
  graph = StateGraph(AgentState)
131
 
132
+ # 5.a) Register each node
133
  graph.add_node("plan", plan_node)
134
  graph.add_node("tools", tool_node)
135
  graph.add_node("run_tools", run_tools)
136
  graph.add_node("finalize", finalize_node)
137
 
138
+ # 5.b) Wire START → plan
139
  graph.add_edge(START, "plan")
140
 
141
+ # 5.c) plan → conditional: if any tool key is set, go to "tools"; otherwise "finalize"
 
 
 
 
142
  def route_plan(plan_out: AgentState) -> str:
 
 
 
 
143
  if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
144
  return "tools"
145
  return "finalize"
 
150
  {"tools": "tools", "finalize": "finalize"}
151
  )
152
 
153
+ # 5.d) Wire tools → run_tools
 
154
  graph.add_edge("tools", "run_tools")
155
 
156
+ # 5.e) Wire run_tools → finalize
157
  graph.add_edge("run_tools", "finalize")
158
 
159
+ # 5.f) Wire finalize → END
160
  graph.add_edge("finalize", END)
161
 
162
  compiled_graph = graph.compile()
163
 
164
+ # ─── 6) Define respond_to_input ───
165
  def respond_to_input(user_input: str) -> str:
166
  """
167
+ Start with a SystemMessage + HumanMessage; then let the graph run:
168
+ plan_node → tools → run_tools → finalize_node. Return final_answer.
169
  """
170
+ # 1) SystemMessage describing the tools
171
  system_msg = SystemMessage(
172
  content=(
173
  "You have access to exactly these tools:\n"
174
+ " 1) web_search(query:str) → Returns DuckDuckGo results.\n"
175
+ " 2) parse_excel(path:str, sheet_name:str) → Reads an Excel file.\n"
176
+ " 3) ocr_image(path:str) → Runs OCR on an image.\n\n"
177
+ "If you need a tool, set exactly one of these keys in a Python dict:\n"
178
  " • web_search_query: <search terms>\n"
179
  " • ocr_path: <path to image>\n"
180
  " • excel_path: <path to xlsx>\n"
181
  " • excel_sheet_name: <sheet name>\n"
182
  "Otherwise, set final_answer: <your answer>.\n"
183
+ "Respond with only that Python dict literal—no extra text."
184
  )
185
  )
186
+ # 2) HumanMessage wrapping the user’s question
 
187
  human_msg = HumanMessage(content=user_input)
188
 
189
+ # 3) Build initial_state so that "messages" = [system_msg, human_msg]
190
+ initial_state: AgentState = {"messages": [system_msg, human_msg]}
 
 
 
191
 
192
+ # 4) Invoke the graph (no second argument needed)
193
  final_state = compiled_graph.invoke(initial_state)
194
 
195
+ # 5) Return the "final_answer" or a fallback
196
  return final_state.get("final_answer", "Error: No final answer generated.")
197
 
198
+ # ─── 7) BasicAgent wrapper ───
199
+ class BasicAgent:
200
+ def __init__(self):
201
+ print("BasicAgent initialized.")
202
+ def __call__(self, question: str) -> str:
203
+ return respond_to_input(question)
204
  class BasicAgent:
205
  def __init__(self):
206
  print("BasicAgent initialized.")