Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -27,32 +27,32 @@ llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
|
|
27 |
|
28 |
agent = create_react_agent(model=llm, tools=tool_node)
|
29 |
|
|
|
30 |
def plan_node(state: AgentState) -> AgentState:
|
31 |
"""
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
We append a new HumanMessage(user_input) to messages, then ask the LLM
|
37 |
-
(via ChatOpenAI) to return exactly one key: web_search_query, ocr_path,
|
38 |
-
excel_path (with excel_sheet_name), or final_answer. The LLM must reply
|
39 |
-
with a bare Python‐dict literal.
|
40 |
-
|
41 |
-
We then return a new partial AgentState that always includes an updated
|
42 |
-
"messages" list plus exactly one of those tool‐request keys (or final_answer).
|
43 |
"""
|
44 |
-
# 1)
|
45 |
-
user_input = state.get("user_input", "")
|
46 |
-
# 2) Grab prior chat history, which should already be a list of BaseMessage
|
47 |
prior_msgs = state.get("messages", [])
|
48 |
-
# 3) Append the new user message as a HumanMessage
|
49 |
-
new_history = prior_msgs + [HumanMessage(content=user_input)]
|
50 |
|
51 |
-
#
|
52 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
explanation = SystemMessage(
|
54 |
content=(
|
55 |
-
"You can set exactly one of the following keys
|
56 |
" • web_search_query: <search terms> \n"
|
57 |
" • ocr_path: <path to an image file> \n"
|
58 |
" • excel_path: <path to a .xlsx file> \n"
|
@@ -63,20 +63,16 @@ def plan_node(state: AgentState) -> AgentState:
|
|
63 |
)
|
64 |
)
|
65 |
|
66 |
-
# 5)
|
67 |
prompt_messages = new_history + [explanation]
|
68 |
-
|
69 |
-
# 6) Call the LLM. Because prompt_messages is a list of BaseMessage,
|
70 |
-
# ChatOpenAI will return an AIMessage.
|
71 |
llm_response = llm(prompt_messages)
|
72 |
llm_out = llm_response.content.strip()
|
73 |
|
74 |
-
#
|
75 |
try:
|
76 |
parsed = eval(llm_out, {}, {})
|
77 |
if isinstance(parsed, dict):
|
78 |
-
|
79 |
-
new_state: AgentState = {"messages": new_history}
|
80 |
allowed = {
|
81 |
"web_search_query",
|
82 |
"ocr_path",
|
@@ -86,41 +82,44 @@ def plan_node(state: AgentState) -> AgentState:
|
|
86 |
}
|
87 |
for k, v in parsed.items():
|
88 |
if k in allowed:
|
89 |
-
|
90 |
-
return
|
91 |
except Exception:
|
92 |
pass
|
93 |
|
94 |
-
#
|
95 |
return {
|
96 |
"messages": new_history,
|
97 |
"final_answer": "Sorry, I could not parse your intent."
|
98 |
}
|
99 |
|
100 |
|
101 |
-
# ───
|
102 |
def finalize_node(state: AgentState) -> AgentState:
|
103 |
"""
|
104 |
-
|
105 |
-
|
|
|
106 |
"""
|
107 |
-
|
|
|
|
|
|
|
108 |
if "web_search_result" in state and state["web_search_result"] is not None:
|
109 |
-
|
110 |
if "ocr_result" in state and state["ocr_result"] is not None:
|
111 |
-
|
112 |
if "excel_result" in state and state["excel_result"] is not None:
|
113 |
-
|
114 |
-
|
|
|
115 |
if state.get("final_answer") is not None:
|
116 |
return {"final_answer": state["final_answer"]}
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
return {"final_answer": llm_out}
|
123 |
-
|
124 |
|
125 |
tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
|
126 |
|
|
|
27 |
|
28 |
agent = create_react_agent(model=llm, tools=tool_node)
|
29 |
|
30 |
+
# ─── Revised plan_node with NO extra arguments ───
|
31 |
def plan_node(state: AgentState) -> AgentState:
|
32 |
"""
|
33 |
+
Assumes that `state["messages"]` already ends with a HumanMessage of the user’s question.
|
34 |
+
We look at that last HumanMessage, append it to our new history, and ask the LLM
|
35 |
+
to set exactly one key in a Python dict: web_search_query, ocr_path,
|
36 |
+
excel_path (+ excel_sheet_name), or final_answer.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
"""
|
38 |
+
# 1) Grab all prior BaseMessage objects (SystemMessage/HumanMessage/AIMessage) from state
|
|
|
|
|
39 |
prior_msgs = state.get("messages", [])
|
|
|
|
|
40 |
|
41 |
+
# 2) Find the very last HumanMessage (the user_input). We assume the last message is one.
|
42 |
+
# If there is no HumanMessage, we treat user_input as empty.
|
43 |
+
user_input = ""
|
44 |
+
for msg in reversed(prior_msgs):
|
45 |
+
if isinstance(msg, HumanMessage):
|
46 |
+
user_input = msg.content
|
47 |
+
break
|
48 |
+
|
49 |
+
# 3) Build our new chat history by re‐using prior_msgs. It already includes that HumanMessage.
|
50 |
+
new_history = prior_msgs.copy()
|
51 |
+
|
52 |
+
# 4) Add a SystemMessage that instructs the LLM how to choose exactly one key
|
53 |
explanation = SystemMessage(
|
54 |
content=(
|
55 |
+
"You can set exactly one of the following keys in a Python dict, and nothing else:\n"
|
56 |
" • web_search_query: <search terms> \n"
|
57 |
" • ocr_path: <path to an image file> \n"
|
58 |
" • excel_path: <path to a .xlsx file> \n"
|
|
|
63 |
)
|
64 |
)
|
65 |
|
66 |
+
# 5) Compose the prompt as a list of BaseMessage, then call the LLM
|
67 |
prompt_messages = new_history + [explanation]
|
|
|
|
|
|
|
68 |
llm_response = llm(prompt_messages)
|
69 |
llm_out = llm_response.content.strip()
|
70 |
|
71 |
+
# 6) Parse the LLM’s output as a dict
|
72 |
try:
|
73 |
parsed = eval(llm_out, {}, {})
|
74 |
if isinstance(parsed, dict):
|
75 |
+
partial: AgentState = {"messages": new_history}
|
|
|
76 |
allowed = {
|
77 |
"web_search_query",
|
78 |
"ocr_path",
|
|
|
82 |
}
|
83 |
for k, v in parsed.items():
|
84 |
if k in allowed:
|
85 |
+
partial[k] = v
|
86 |
+
return partial
|
87 |
except Exception:
|
88 |
pass
|
89 |
|
90 |
+
# 7) Fallback if parsing failed
|
91 |
return {
|
92 |
"messages": new_history,
|
93 |
"final_answer": "Sorry, I could not parse your intent."
|
94 |
}
|
95 |
|
96 |
|
97 |
+
# ─── Revised finalize_node with NO extra arguments ───
|
98 |
def finalize_node(state: AgentState) -> AgentState:
|
99 |
"""
|
100 |
+
Assumes that `state['messages']` is a list of BaseMessage, possibly ending in an AIMessage
|
101 |
+
(or plan_node may have set final_answer directly). We append any tool results
|
102 |
+
as SystemMessages, then prompt the LLM for one final answer.
|
103 |
"""
|
104 |
+
# 1) Copy the existing BaseMessage list
|
105 |
+
history = state.get("messages", []).copy()
|
106 |
+
|
107 |
+
# 2) If any tool-result fields exist, append them as SystemMessages
|
108 |
if "web_search_result" in state and state["web_search_result"] is not None:
|
109 |
+
history.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}"))
|
110 |
if "ocr_result" in state and state["ocr_result"] is not None:
|
111 |
+
history.append(SystemMessage(content=f"OCR_RESULT: {state['ocr_result']}"))
|
112 |
if "excel_result" in state and state["excel_result"] is not None:
|
113 |
+
history.append(SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}"))
|
114 |
+
|
115 |
+
# 3) If plan_node already set final_answer, just return it:
|
116 |
if state.get("final_answer") is not None:
|
117 |
return {"final_answer": state["final_answer"]}
|
118 |
|
119 |
+
# 4) Otherwise, ask the LLM to give the final answer now
|
120 |
+
history.append(SystemMessage(content="Please provide the final answer now."))
|
121 |
+
llm_response = llm(history)
|
122 |
+
return {"final_answer": llm_response.content.strip()}
|
|
|
|
|
123 |
|
124 |
tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
|
125 |
|