Spaces:
Sleeping
Sleeping
new
Browse files
app.py
CHANGED
@@ -20,46 +20,40 @@ from state import AgentState
|
|
20 |
# --- Constants ---
|
21 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
22 |
|
23 |
-
from tools import ocr_image_tool, parse_excel_tool, web_search_tool
|
24 |
tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
|
25 |
|
26 |
llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
|
27 |
|
28 |
agent = create_react_agent(model=llm, tools=tool_node)
|
29 |
|
30 |
-
# 2) Build a two‐edge graph:
|
31 |
def plan_node(state: AgentState, user_input: str) -> AgentState:
|
32 |
"""
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
"""
|
40 |
-
# 4.a) Grab prior chat history, append user_input:
|
41 |
prior = state.get("messages", [])
|
42 |
chat_history = prior + [f"USER: {user_input}"]
|
43 |
|
44 |
-
# 4.b) Send that to the LLM with a prompt explaining the new schema:
|
45 |
prompt = chat_history + [
|
46 |
-
"ASSISTANT: You can set one of the following keys:\n"
|
47 |
-
" • web_search_query: <
|
48 |
-
" • ocr_path: <path> \n"
|
49 |
-
" • excel_path: <path> \n"
|
50 |
-
" • excel_sheet_name: <sheet> \n"
|
51 |
"Or, if no tool is needed, set final_answer: <your answer>.\n"
|
52 |
-
"Respond with a Python‐dict literal that contains exactly one of those keys.\n"
|
53 |
"Example: {'web_search_query':'Mercedes Sosa discography'}\n"
|
54 |
-
"
|
55 |
]
|
56 |
llm_out = llm(prompt).content.strip()
|
57 |
|
58 |
-
# 4.c) Try to eval as a Python dict:
|
59 |
try:
|
60 |
-
parsed = eval(llm_out, {}, {})
|
61 |
if isinstance(parsed, dict):
|
62 |
-
# Only keep recognized keys, ignore anything else
|
63 |
new_state: AgentState = {"messages": chat_history}
|
64 |
allowed = {
|
65 |
"web_search_query",
|
@@ -75,22 +69,19 @@ def plan_node(state: AgentState, user_input: str) -> AgentState:
|
|
75 |
except Exception:
|
76 |
pass
|
77 |
|
78 |
-
#
|
79 |
return {
|
80 |
"messages": chat_history,
|
81 |
"final_answer": "Sorry, I could not parse your intent."
|
82 |
}
|
83 |
|
|
|
84 |
# ─── 5) Define “finalize” node: compose the final answer using any tool results ───
|
85 |
def finalize_node(state: AgentState) -> AgentState:
|
86 |
"""
|
87 |
-
|
88 |
-
|
89 |
-
- One or more of web_search_result, ocr_result, excel_result might be filled.
|
90 |
-
- Or, state['final_answer'] is already set, meaning no tool was needed.
|
91 |
-
We ask the LLM to produce a final text answer.
|
92 |
"""
|
93 |
-
# 5.a) Build a prompt listing any tool results:
|
94 |
parts = state.get("messages", [])
|
95 |
if "web_search_result" in state and state["web_search_result"] is not None:
|
96 |
parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
|
@@ -98,36 +89,36 @@ def finalize_node(state: AgentState) -> AgentState:
|
|
98 |
parts.append(f"OCR_RESULT: {state['ocr_result']}")
|
99 |
if "excel_result" in state and state["excel_result"] is not None:
|
100 |
parts.append(f"EXCEL_RESULT: {state['excel_result']}")
|
|
|
|
|
|
|
101 |
|
102 |
parts.append("ASSISTANT: Please provide the final answer now.")
|
103 |
llm_out = llm(parts).content.strip()
|
104 |
-
|
105 |
return {"final_answer": llm_out}
|
106 |
|
107 |
|
|
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
graph = StateGraph(AgentState)
|
118 |
|
119 |
-
#
|
120 |
graph.add_node("plan", plan_node)
|
121 |
graph.add_node("tools", tool_node)
|
|
|
122 |
graph.add_node("finalize", finalize_node)
|
123 |
|
124 |
-
#
|
125 |
graph.add_edge(START, "plan")
|
126 |
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
128 |
def route_plan(state: AgentState, plan_out: AgentState) -> str:
|
129 |
-
# If plan_node placed a "web_search_query", "ocr_path", or "excel_path", go to tools.
|
130 |
-
# (Note: plan_out already replaced state["messages"])
|
131 |
if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
|
132 |
return "tools"
|
133 |
return "finalize"
|
@@ -138,34 +129,27 @@ graph.add_conditional_edges(
|
|
138 |
{"tools": "tools", "finalize": "finalize"}
|
139 |
)
|
140 |
|
141 |
-
def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
|
142 |
-
"""
|
143 |
-
When a tool‐wrapper returns, it has already consumed the relevant key
|
144 |
-
(e.g. set web_search_query back to None) and added tool_result.
|
145 |
-
We just merge that into state.
|
146 |
-
"""
|
147 |
-
new_state = {**state, **tool_out}
|
148 |
-
return new_state
|
149 |
-
|
150 |
|
|
|
151 |
|
152 |
-
|
|
|
153 |
|
154 |
-
#
|
155 |
graph.add_edge("finalize", END)
|
156 |
|
157 |
compiled_graph = graph.compile()
|
158 |
|
159 |
-
# ─── 7) Define respond_to_input that drives the graph ───
|
160 |
def respond_to_input(user_input: str) -> str:
|
161 |
-
|
|
|
|
|
|
|
162 |
initial_state: AgentState = {"messages": []}
|
163 |
final_state = compiled_graph.invoke(initial_state, user_input)
|
164 |
-
# final_state should have 'final_answer'
|
165 |
return final_state.get("final_answer", "Error: No final answer generated.")
|
166 |
|
167 |
|
168 |
-
|
169 |
class BasicAgent:
|
170 |
def __init__(self):
|
171 |
print("BasicAgent initialized.")
|
|
|
20 |
# --- Constants ---
|
21 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
22 |
|
23 |
+
from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools
|
24 |
tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
|
25 |
|
26 |
llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
|
27 |
|
28 |
agent = create_react_agent(model=llm, tools=tool_node)
|
29 |
|
|
|
30 |
def plan_node(state: AgentState, user_input: str) -> AgentState:
|
31 |
"""
|
32 |
+
Decide which tool (if any) to call. Append the user_input to state["messages"] and
|
33 |
+
return a partial AgentState that sets exactly one of:
|
34 |
+
- web_search_query (string)
|
35 |
+
- ocr_path (string)
|
36 |
+
- excel_path (string) + excel_sheet_name (optional)
|
37 |
+
- final_answer (string)
|
38 |
"""
|
|
|
39 |
prior = state.get("messages", [])
|
40 |
chat_history = prior + [f"USER: {user_input}"]
|
41 |
|
|
|
42 |
prompt = chat_history + [
|
43 |
+
"ASSISTANT: You can set exactly one of the following keys in a Python dict:\n"
|
44 |
+
" • web_search_query: <search terms> \n"
|
45 |
+
" • ocr_path: <path to an image> \n"
|
46 |
+
" • excel_path: <path to xlsx> \n"
|
47 |
+
" • excel_sheet_name: <sheet name> \n"
|
48 |
"Or, if no tool is needed, set final_answer: <your answer>.\n"
|
|
|
49 |
"Example: {'web_search_query':'Mercedes Sosa discography'}\n"
|
50 |
+
"Respond with only that Python dict literal—no additional text."
|
51 |
]
|
52 |
llm_out = llm(prompt).content.strip()
|
53 |
|
|
|
54 |
try:
|
55 |
+
parsed = eval(llm_out, {}, {})
|
56 |
if isinstance(parsed, dict):
|
|
|
57 |
new_state: AgentState = {"messages": chat_history}
|
58 |
allowed = {
|
59 |
"web_search_query",
|
|
|
69 |
except Exception:
|
70 |
pass
|
71 |
|
72 |
+
# Fallback if parsing failed
|
73 |
return {
|
74 |
"messages": chat_history,
|
75 |
"final_answer": "Sorry, I could not parse your intent."
|
76 |
}
|
77 |
|
78 |
+
|
79 |
# ─── 5) Define “finalize” node: compose the final answer using any tool results ───
|
80 |
def finalize_node(state: AgentState) -> AgentState:
|
81 |
"""
|
82 |
+
After any tool results exist in state, or if final_answer was already set,
|
83 |
+
ask the LLM to produce the final answer.
|
|
|
|
|
|
|
84 |
"""
|
|
|
85 |
parts = state.get("messages", [])
|
86 |
if "web_search_result" in state and state["web_search_result"] is not None:
|
87 |
parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
|
|
|
89 |
parts.append(f"OCR_RESULT: {state['ocr_result']}")
|
90 |
if "excel_result" in state and state["excel_result"] is not None:
|
91 |
parts.append(f"EXCEL_RESULT: {state['excel_result']}")
|
92 |
+
# If plan already set final_answer, skip calling the LLM again
|
93 |
+
if state.get("final_answer") is not None:
|
94 |
+
return {"final_answer": state["final_answer"]}
|
95 |
|
96 |
parts.append("ASSISTANT: Please provide the final answer now.")
|
97 |
llm_out = llm(parts).content.strip()
|
|
|
98 |
return {"final_answer": llm_out}
|
99 |
|
100 |
|
101 |
+
tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
|
102 |
|
103 |
+
# ─── 5) Build the StateGraph ───
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
graph = StateGraph(AgentState)
|
105 |
|
106 |
+
# 5.a) Register nodes
|
107 |
graph.add_node("plan", plan_node)
|
108 |
graph.add_node("tools", tool_node)
|
109 |
+
graph.add_node("run_tools", run_tools)
|
110 |
graph.add_node("finalize", finalize_node)
|
111 |
|
112 |
+
# 5.b) START → plan
|
113 |
graph.add_edge(START, "plan")
|
114 |
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
# 4) After plan, we branch based on whether a tool key was set:
|
120 |
+
# If plan_node set web_search_query/ocr_path/excel_path, go to "tools"; otherwise go straight to "finalize".
|
121 |
def route_plan(state: AgentState, plan_out: AgentState) -> str:
|
|
|
|
|
122 |
if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
|
123 |
return "tools"
|
124 |
return "finalize"
|
|
|
129 |
{"tools": "tools", "finalize": "finalize"}
|
130 |
)
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
+
graph.add_edge("tools", "run_tools")
|
134 |
|
135 |
+
# 5.e) run_tools → finalize
|
136 |
+
graph.add_edge("run_tools", "finalize")
|
137 |
|
138 |
+
# 5.f) finalize → END
|
139 |
graph.add_edge("finalize", END)
|
140 |
|
141 |
compiled_graph = graph.compile()
|
142 |
|
|
|
143 |
def respond_to_input(user_input: str) -> str:
|
144 |
+
"""
|
145 |
+
Initialize with an empty messages list. Then run through plan → tools → run_tools → finalize.
|
146 |
+
Return the "final_answer" from the final state.
|
147 |
+
"""
|
148 |
initial_state: AgentState = {"messages": []}
|
149 |
final_state = compiled_graph.invoke(initial_state, user_input)
|
|
|
150 |
return final_state.get("final_answer", "Error: No final answer generated.")
|
151 |
|
152 |
|
|
|
153 |
class BasicAgent:
|
154 |
def __init__(self):
|
155 |
print("BasicAgent initialized.")
|
tools.py
CHANGED
@@ -69,3 +69,14 @@ def parse_excel_tool(state: AgentState) -> AgentState:
|
|
69 |
"excel_sheet_name": None,
|
70 |
"excel_result": text
|
71 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
"excel_sheet_name": None,
|
70 |
"excel_result": text
|
71 |
}
|
72 |
+
|
73 |
+
|
74 |
+
def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
|
75 |
+
"""
|
76 |
+
Merges whatever partial state the tool wrapper returned (tool_out)
|
77 |
+
into the main state. That is, combine previous keys with new keys:
|
78 |
+
new_state = { **state, **tool_out }.
|
79 |
+
This node should be wired as its own graph node, not as a transition function.
|
80 |
+
"""
|
81 |
+
new_state = {**state, **tool_out}
|
82 |
+
return new_state
|