naman1102 commited on
Commit
1bc8bac
·
1 Parent(s): 9a37625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -43
app.py CHANGED
@@ -27,32 +27,32 @@ llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
27
 
28
  agent = create_react_agent(model=llm, tools=tool_node)
29
 
 
30
  def plan_node(state: AgentState) -> AgentState:
31
  """
32
- Decide which tool (if any) to call. The state is expected to contain:
33
- - state["messages"]: a list of BaseMessage (SystemMessage, HumanMessage, etc.)
34
- - state["user_input"]: the raw user question (string)
35
-
36
- We append a new HumanMessage(user_input) to messages, then ask the LLM
37
- (via ChatOpenAI) to return exactly one key: web_search_query, ocr_path,
38
- excel_path (with excel_sheet_name), or final_answer. The LLM must reply
39
- with a bare Python‐dict literal.
40
-
41
- We then return a new partial AgentState that always includes an updated
42
- "messages" list plus exactly one of those tool‐request keys (or final_answer).
43
  """
44
- # 1) Pull user_input out of state
45
- user_input = state.get("user_input", "")
46
- # 2) Grab prior chat history, which should already be a list of BaseMessage
47
  prior_msgs = state.get("messages", [])
48
- # 3) Append the new user message as a HumanMessage
49
- new_history = prior_msgs + [HumanMessage(content=user_input)]
50
 
51
- # 4) Build a prompt that explains how to choose exactly one key
52
- # We leave new_history as a list of BaseMessage; LLM expects that format.
 
 
 
 
 
 
 
 
 
 
53
  explanation = SystemMessage(
54
  content=(
55
- "You can set exactly one of the following keys (in a Python dict) and nothing else:\n"
56
  " • web_search_query: <search terms> \n"
57
  " • ocr_path: <path to an image file> \n"
58
  " • excel_path: <path to a .xlsx file> \n"
@@ -63,20 +63,16 @@ def plan_node(state: AgentState) -> AgentState:
63
  )
64
  )
65
 
66
- # 5) Combine the user conversation with our explanation
67
  prompt_messages = new_history + [explanation]
68
-
69
- # 6) Call the LLM. Because prompt_messages is a list of BaseMessage,
70
- # ChatOpenAI will return an AIMessage.
71
  llm_response = llm(prompt_messages)
72
  llm_out = llm_response.content.strip()
73
 
74
- # 7) Try to eval the LLM response as a Python dict
75
  try:
76
  parsed = eval(llm_out, {}, {})
77
  if isinstance(parsed, dict):
78
- # Build a new state: keep our updated messages, plus exactly one key
79
- new_state: AgentState = {"messages": new_history}
80
  allowed = {
81
  "web_search_query",
82
  "ocr_path",
@@ -86,41 +82,44 @@ def plan_node(state: AgentState) -> AgentState:
86
  }
87
  for k, v in parsed.items():
88
  if k in allowed:
89
- new_state[k] = v
90
- return new_state
91
  except Exception:
92
  pass
93
 
94
- # 8) Fallback if parsing failed: keep messages, set a generic final_answer
95
  return {
96
  "messages": new_history,
97
  "final_answer": "Sorry, I could not parse your intent."
98
  }
99
 
100
 
101
- # ─── 5) Define “finalize” node: compose the final answer using any tool results ───
102
  def finalize_node(state: AgentState) -> AgentState:
103
  """
104
- After any tool results exist in state, or if final_answer was already set,
105
- ask the LLM to produce the final answer.
 
106
  """
107
- parts = state.get("messages", [])
 
 
 
108
  if "web_search_result" in state and state["web_search_result"] is not None:
109
- parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
110
  if "ocr_result" in state and state["ocr_result"] is not None:
111
- parts.append(f"OCR_RESULT: {state['ocr_result']}")
112
  if "excel_result" in state and state["excel_result"] is not None:
113
- parts.append(f"EXCEL_RESULT: {state['excel_result']}")
114
- # If plan already set final_answer, skip calling the LLM again
 
115
  if state.get("final_answer") is not None:
116
  return {"final_answer": state["final_answer"]}
117
 
118
- parts.append("ASSISTANT: Please provide the final answer now.")
119
- print("finalize_node content problem: ", parts)
120
- llm_out = llm(parts).content.strip()
121
- print("finalize_node passed")
122
- return {"final_answer": llm_out}
123
-
124
 
125
  tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
126
 
 
27
 
28
  agent = create_react_agent(model=llm, tools=tool_node)
29
 
30
+ # ─── Revised plan_node with NO extra arguments ───
31
  def plan_node(state: AgentState) -> AgentState:
32
  """
33
+ Assumes that `state["messages"]` already ends with a HumanMessage of the user’s question.
34
+ We look at that last HumanMessage, append it to our new history, and ask the LLM
35
+ to set exactly one key in a Python dict: web_search_query, ocr_path,
36
+ excel_path (+ excel_sheet_name), or final_answer.
 
 
 
 
 
 
 
37
  """
38
+ # 1) Grab all prior BaseMessage objects (SystemMessage/HumanMessage/AIMessage) from state
 
 
39
  prior_msgs = state.get("messages", [])
 
 
40
 
41
+ # 2) Find the very last HumanMessage (the user_input). We assume the last message is one.
42
+ # If there is no HumanMessage, we treat user_input as empty.
43
+ user_input = ""
44
+ for msg in reversed(prior_msgs):
45
+ if isinstance(msg, HumanMessage):
46
+ user_input = msg.content
47
+ break
48
+
49
+ # 3) Build our new chat history by re‐using prior_msgs. It already includes that HumanMessage.
50
+ new_history = prior_msgs.copy()
51
+
52
+ # 4) Add a SystemMessage that instructs the LLM how to choose exactly one key
53
  explanation = SystemMessage(
54
  content=(
55
+ "You can set exactly one of the following keys in a Python dict, and nothing else:\n"
56
  " • web_search_query: <search terms> \n"
57
  " • ocr_path: <path to an image file> \n"
58
  " • excel_path: <path to a .xlsx file> \n"
 
63
  )
64
  )
65
 
66
+ # 5) Compose the prompt as a list of BaseMessage, then call the LLM
67
  prompt_messages = new_history + [explanation]
 
 
 
68
  llm_response = llm(prompt_messages)
69
  llm_out = llm_response.content.strip()
70
 
71
+ # 6) Parse the LLM’s output as a dict
72
  try:
73
  parsed = eval(llm_out, {}, {})
74
  if isinstance(parsed, dict):
75
+ partial: AgentState = {"messages": new_history}
 
76
  allowed = {
77
  "web_search_query",
78
  "ocr_path",
 
82
  }
83
  for k, v in parsed.items():
84
  if k in allowed:
85
+ partial[k] = v
86
+ return partial
87
  except Exception:
88
  pass
89
 
90
+ # 7) Fallback if parsing failed
91
  return {
92
  "messages": new_history,
93
  "final_answer": "Sorry, I could not parse your intent."
94
  }
95
 
96
 
97
+ # ─── Revised finalize_node with NO extra arguments ───
98
  def finalize_node(state: AgentState) -> AgentState:
99
  """
100
+ Assumes that `state['messages']` is a list of BaseMessage, possibly ending in an AIMessage
101
+ (or plan_node may have set final_answer directly). We append any tool results
102
+ as SystemMessages, then prompt the LLM for one final answer.
103
  """
104
+ # 1) Copy the existing BaseMessage list
105
+ history = state.get("messages", []).copy()
106
+
107
+ # 2) If any tool-result fields exist, append them as SystemMessages
108
  if "web_search_result" in state and state["web_search_result"] is not None:
109
+ history.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}"))
110
  if "ocr_result" in state and state["ocr_result"] is not None:
111
+ history.append(SystemMessage(content=f"OCR_RESULT: {state['ocr_result']}"))
112
  if "excel_result" in state and state["excel_result"] is not None:
113
+ history.append(SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}"))
114
+
115
+ # 3) If plan_node already set final_answer, just return it:
116
  if state.get("final_answer") is not None:
117
  return {"final_answer": state["final_answer"]}
118
 
119
+ # 4) Otherwise, ask the LLM to give the final answer now
120
+ history.append(SystemMessage(content="Please provide the final answer now."))
121
+ llm_response = llm(history)
122
+ return {"final_answer": llm_response.content.strip()}
 
 
123
 
124
  tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
125