Spaces:
Sleeping
Sleeping
version2
Browse files- app.py +175 -165
- old_app_copy.py +504 -0
app.py
CHANGED
@@ -22,17 +22,20 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
22 |
|
23 |
from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
|
24 |
|
25 |
-
llm = ChatOpenAI(model_name="gpt-
|
26 |
-
|
27 |
-
# agent = create_react_agent(model=llm, tools=tool_node)
|
28 |
|
|
|
29 |
def plan_node(state: AgentState) -> AgentState:
|
30 |
"""
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
"""
|
37 |
prior_msgs = state.get("messages", [])
|
38 |
user_input = ""
|
@@ -41,38 +44,32 @@ def plan_node(state: AgentState) -> AgentState:
|
|
41 |
user_input = msg.content
|
42 |
break
|
43 |
|
44 |
-
# (1) Build a fresh SystemMessage that tells the LLM exactly how to self‐evaluate
|
45 |
system_msg = SystemMessage(
|
46 |
content=(
|
47 |
-
"You are an agent that must do two things in
|
48 |
-
" 1)
|
49 |
-
" 2) Judge whether that answer is reliable
|
50 |
-
" • If you are fully confident
|
51 |
" {\"final_answer\":\"<your concise answer>\"}\n"
|
52 |
" and nothing else.\n"
|
53 |
-
" •
|
54 |
-
" {\"wiki_query\":\"<search
|
55 |
" {\"web_search_query\":\"<search terms>\"}\n"
|
56 |
-
" {\"ocr_path\":\"<
|
57 |
-
" {\"excel_path\":\"<
|
58 |
-
" {\"audio_path\":\"<
|
59 |
-
"
|
60 |
-
"
|
61 |
-
"\n"
|
62 |
f"User’s question: \"{user_input}\"\n"
|
63 |
)
|
64 |
)
|
65 |
human_msg = HumanMessage(content=user_input)
|
66 |
-
|
67 |
-
# (2) Call the LLM with this single system/human pair
|
68 |
llm_response = llm([system_msg, human_msg])
|
69 |
llm_out = llm_response.content.strip()
|
70 |
|
71 |
-
# (3) Append the LLM output into the message history
|
72 |
ai_msg = AIMessage(content=llm_out)
|
73 |
new_msgs = prior_msgs.copy() + [ai_msg]
|
74 |
|
75 |
-
# (4) Attempt to parse that JSON
|
76 |
try:
|
77 |
parsed = json.loads(llm_out)
|
78 |
if isinstance(parsed, dict):
|
@@ -93,208 +90,221 @@ def plan_node(state: AgentState) -> AgentState:
|
|
93 |
except json.JSONDecodeError:
|
94 |
pass
|
95 |
|
96 |
-
# (5) If parsing failed, fall back to a safe “sorry” answer
|
97 |
return {
|
98 |
"messages": new_msgs,
|
99 |
"final_answer": "Sorry, I could not parse your intent."
|
100 |
}
|
101 |
|
102 |
|
|
|
|
|
|
|
103 |
|
104 |
-
# ─── 3) Revised finalize_node ───
|
105 |
-
def finalize_node(state: AgentState) -> AgentState:
|
106 |
-
if state.get("final_answer") is not None:
|
107 |
-
return {"final_answer": state["final_answer"]}
|
108 |
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
question = ""
|
111 |
for msg in reversed(state.get("messages", [])):
|
112 |
if isinstance(msg, HumanMessage):
|
113 |
question = msg.content
|
114 |
break
|
|
|
115 |
|
116 |
-
#
|
117 |
-
combined = f"USER_QUESTION: {question}\n"
|
118 |
if sr := state.get("web_search_result"):
|
119 |
-
|
120 |
if orc := state.get("ocr_result"):
|
121 |
-
|
122 |
if exr := state.get("excel_result"):
|
123 |
-
|
124 |
-
# Note: your code already stores the audio transcription under "transcript"
|
125 |
if tr := state.get("transcript"):
|
126 |
-
|
127 |
if wr := state.get("wiki_result"):
|
128 |
-
|
129 |
-
|
130 |
-
#
|
131 |
-
|
132 |
-
"
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
"
|
137 |
-
"If
|
138 |
-
"
|
139 |
-
"
|
140 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
)
|
142 |
-
|
143 |
-
|
144 |
-
# print("\n>>> finalize_node JSON‐strict prompt:\n" + combined + "\n<<< end prompt >>>\n")
|
145 |
-
|
146 |
-
llm_response = llm.invoke([SystemMessage(content=combined)])
|
147 |
raw = llm_response.content.strip()
|
148 |
-
# print(">>> finalize_node got raw response:", raw)
|
149 |
|
|
|
150 |
try:
|
151 |
parsed = json.loads(raw)
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
out = wikipedia_search_tool(state)
|
170 |
-
return out
|
171 |
-
|
172 |
-
if state.get("web_search_query"):
|
173 |
-
# print(f">>> tools_node dispatching web_search_tool with query: {state['web_search_query']!r}")
|
174 |
-
out = web_search_tool(state)
|
175 |
-
return out
|
176 |
-
|
177 |
-
if state.get("ocr_path"):
|
178 |
-
# print(f">>> tools_node dispatching ocr_image_tool with path: {state['ocr_path']!r}")
|
179 |
-
out = ocr_image_tool(state)
|
180 |
-
return out
|
181 |
-
|
182 |
-
if state.get("excel_path"):
|
183 |
-
# We assume plan_node always sets both excel_path and excel_sheet_name together
|
184 |
-
# print(f">>> tools_node dispatching parse_excel_tool with path: {state['excel_path']!r}, sheet: {state.get('excel_sheet_name')!r}")
|
185 |
-
out = parse_excel_tool(state)
|
186 |
-
return out
|
187 |
-
|
188 |
-
if state.get("audio_path"):
|
189 |
-
# print(f">>> tools_node dispatching audio_transcriber_tool with path: {state['audio_path']!r}")
|
190 |
-
out = audio_transcriber_tool(state)
|
191 |
-
return out
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
# If we somehow reach here, no recognized tool key was set:
|
196 |
-
# print(">>> tools_node: no valid tool key found in state!")
|
197 |
-
return {}
|
198 |
|
|
|
|
|
|
|
|
|
199 |
|
200 |
-
# Add a node to store the previous state
|
201 |
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
-
def merge_tool_output(state: AgentState) -> AgentState:
|
206 |
-
prev_state = state.get("prev_state", {})
|
207 |
-
merged = {**prev_state, **state}
|
208 |
-
merged.pop("prev_state", None)
|
209 |
-
return merged
|
210 |
|
211 |
-
# ───
|
212 |
graph = StateGraph(AgentState)
|
213 |
|
214 |
-
#
|
215 |
graph.add_node("plan", plan_node)
|
216 |
graph.add_node("store_prev_state", store_prev_state)
|
217 |
graph.add_node("tools", tool_node)
|
218 |
graph.add_node("merge_tool_output", merge_tool_output)
|
|
|
219 |
graph.add_node("finalize", finalize_node)
|
220 |
|
221 |
-
#
|
222 |
graph.add_edge(START, "plan")
|
223 |
|
224 |
-
#
|
225 |
def route_plan(plan_out: AgentState) -> str:
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
if (
|
230 |
-
plan_out.get("web_search_query")
|
231 |
-
or plan_out.get("ocr_path")
|
232 |
-
or plan_out.get("excel_path")
|
233 |
-
or plan_out.get("audio_path")
|
234 |
-
or plan_out.get("wiki_query")
|
235 |
-
):
|
236 |
-
# print(">> route_plan ➡️ tools")
|
237 |
-
return "tools"
|
238 |
-
# print(">> route_plan ➡️ finalize")
|
239 |
-
return "finalize"
|
240 |
-
|
241 |
|
242 |
graph.add_conditional_edges(
|
243 |
"plan",
|
244 |
route_plan,
|
245 |
-
{"
|
246 |
)
|
247 |
|
248 |
-
#
|
249 |
graph.add_edge("store_prev_state", "tools")
|
250 |
|
251 |
-
#
|
252 |
graph.add_edge("tools", "merge_tool_output")
|
253 |
|
254 |
-
#
|
255 |
-
graph.add_edge("merge_tool_output", "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
-
#
|
258 |
graph.add_edge("finalize", END)
|
259 |
|
260 |
compiled_graph = graph.compile()
|
261 |
|
262 |
|
263 |
-
# ───
|
264 |
-
def respond_to_input(user_input: str
|
265 |
"""
|
266 |
-
Seed state['messages'] with a SystemMessage
|
267 |
-
|
268 |
"""
|
269 |
system_msg = SystemMessage(
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
" 4) If the user's question requires reading a spreadsheet, return:\n"
|
282 |
-
" {\"excel_path\":\"<local .xlsx path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
|
283 |
-
" and nothing else.\n"
|
284 |
-
" 5) If the user needs an audio transcription, return:\n"
|
285 |
-
" {\"audio_path\":\"<local audio file path>\"}\n"
|
286 |
-
" and nothing else.\n"
|
287 |
-
" 6) If you already know the answer without using any tool, return exactly:\n"
|
288 |
-
" {\"final_answer\":\"<your concise answer>\"}\n"
|
289 |
-
" and nothing else.\n"
|
290 |
-
"If the user's prompt explicitly tells you to perform a specific action (for example, “translate this sentence”), then do it directly and return your result as {\"final_answer\":\"<your answer>\"} or the appropriate tool key if needed. \n"
|
291 |
-
"Do NOT include any additional keys, explanation, or markdown—only one JSON object with exactly one key."
|
292 |
)
|
293 |
-
)
|
294 |
-
|
295 |
human_msg = HumanMessage(content=user_input)
|
296 |
|
297 |
-
initial_state: AgentState = {"messages": [system_msg, human_msg]
|
298 |
final_state = compiled_graph.invoke(initial_state)
|
299 |
return final_state.get("final_answer", "Error: No final answer generated.")
|
300 |
|
|
|
22 |
|
23 |
from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
|
24 |
|
25 |
+
llm = ChatOpenAI(model_name="gpt-4o-mini")
|
|
|
|
|
26 |
|
27 |
+
# ─── 1) plan_node ───
|
28 |
def plan_node(state: AgentState) -> AgentState:
|
29 |
"""
|
30 |
+
Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
|
31 |
+
then decide if it’s confident enough to stop or if it needs one tool.
|
32 |
+
If confident: return {"final_answer": "<answer>"}
|
33 |
+
Otherwise: return exactly one of
|
34 |
+
{"wiki_query": "..."},
|
35 |
+
{"web_search_query": "..."},
|
36 |
+
{"ocr_path": "..."},
|
37 |
+
{"excel_path": "...", "excel_sheet_name": "..."},
|
38 |
+
{"audio_path": "..."}
|
39 |
"""
|
40 |
prior_msgs = state.get("messages", [])
|
41 |
user_input = ""
|
|
|
44 |
user_input = msg.content
|
45 |
break
|
46 |
|
|
|
47 |
system_msg = SystemMessage(
|
48 |
content=(
|
49 |
+
"You are an agent that must do two things in one JSON output:\n\n"
|
50 |
+
" 1) Provide a concise, direct answer to the user’s question (no explanation).\n"
|
51 |
+
" 2) Judge whether that answer is reliable:\n"
|
52 |
+
" • If you are fully confident, return exactly:\n"
|
53 |
" {\"final_answer\":\"<your concise answer>\"}\n"
|
54 |
" and nothing else.\n"
|
55 |
+
" • Otherwise, return exactly one of:\n"
|
56 |
+
" {\"wiki_query\":\"<Wikipedia search>\"}\n"
|
57 |
" {\"web_search_query\":\"<search terms>\"}\n"
|
58 |
+
" {\"ocr_path\":\"<image path or task_id>\"}\n"
|
59 |
+
" {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
|
60 |
+
" {\"audio_path\":\"<audio path or task_id>\"}\n"
|
61 |
+
" and nothing else.\n"
|
62 |
+
"Do NOT wrap in markdown—output only a single JSON object.\n"
|
|
|
63 |
f"User’s question: \"{user_input}\"\n"
|
64 |
)
|
65 |
)
|
66 |
human_msg = HumanMessage(content=user_input)
|
|
|
|
|
67 |
llm_response = llm([system_msg, human_msg])
|
68 |
llm_out = llm_response.content.strip()
|
69 |
|
|
|
70 |
ai_msg = AIMessage(content=llm_out)
|
71 |
new_msgs = prior_msgs.copy() + [ai_msg]
|
72 |
|
|
|
73 |
try:
|
74 |
parsed = json.loads(llm_out)
|
75 |
if isinstance(parsed, dict):
|
|
|
90 |
except json.JSONDecodeError:
|
91 |
pass
|
92 |
|
|
|
93 |
return {
|
94 |
"messages": new_msgs,
|
95 |
"final_answer": "Sorry, I could not parse your intent."
|
96 |
}
|
97 |
|
98 |
|
99 |
+
# ─── 2) store_prev_state ───
|
100 |
+
def store_prev_state(state: AgentState) -> AgentState:
|
101 |
+
return {**state, "prev_state": state.copy()}
|
102 |
|
|
|
|
|
|
|
|
|
103 |
|
104 |
+
# ─── 3) tools_node ───
|
105 |
+
def tool_node(state: AgentState) -> AgentState:
|
106 |
+
"""
|
107 |
+
Dispatch exactly one tool based on which key was set:
|
108 |
+
- wiki_query → wikipedia_search_tool
|
109 |
+
- web_search_query → web_search_tool
|
110 |
+
- ocr_path → ocr_image_tool
|
111 |
+
- excel_path → parse_excel_tool
|
112 |
+
- audio_path → audio_transcriber_tool
|
113 |
+
"""
|
114 |
+
if state.get("wiki_query"):
|
115 |
+
return wikipedia_search_tool(state)
|
116 |
+
if state.get("web_search_query"):
|
117 |
+
return web_search_tool(state)
|
118 |
+
if state.get("ocr_path"):
|
119 |
+
return ocr_image_tool(state)
|
120 |
+
if state.get("excel_path"):
|
121 |
+
return parse_excel_tool(state)
|
122 |
+
if state.get("audio_path"):
|
123 |
+
return audio_transcriber_tool(state)
|
124 |
+
return {}
|
125 |
+
|
126 |
+
|
127 |
+
# ─── 4) merge_tool_output ───
|
128 |
+
def merge_tool_output(state: AgentState) -> AgentState:
|
129 |
+
"""
|
130 |
+
Combine previous state and tool output into one:
|
131 |
+
"""
|
132 |
+
prev = state.get("prev_state", {})
|
133 |
+
merged = {**prev, **state}
|
134 |
+
merged.pop("prev_state", None)
|
135 |
+
return merged
|
136 |
+
|
137 |
+
|
138 |
+
# ─── 5) inspect_node ───
|
139 |
+
def inspect_node(state: AgentState) -> AgentState:
|
140 |
+
"""
|
141 |
+
After running a tool, show GPT:
|
142 |
+
- ORIGINAL user question
|
143 |
+
- Any tool results (web_search_result, ocr_result, excel_result, transcript, wiki_result)
|
144 |
+
- The INTERIM_ANSWER (what plan_node initially provided under 'final_answer')
|
145 |
+
Then ask GPT to either:
|
146 |
+
• Return {"final_answer": "<final>"} if done, OR
|
147 |
+
• Return exactly one tool key to run next (wiki_query / web_search_query / ocr_path / excel_path & excel_sheet_name / audio_path).
|
148 |
+
"""
|
149 |
+
messages_for_llm = []
|
150 |
+
|
151 |
+
# 1) Re‐insert original user question
|
152 |
question = ""
|
153 |
for msg in reversed(state.get("messages", [])):
|
154 |
if isinstance(msg, HumanMessage):
|
155 |
question = msg.content
|
156 |
break
|
157 |
+
messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
|
158 |
|
159 |
+
# 2) Add any tool results
|
|
|
160 |
if sr := state.get("web_search_result"):
|
161 |
+
messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
|
162 |
if orc := state.get("ocr_result"):
|
163 |
+
messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
|
164 |
if exr := state.get("excel_result"):
|
165 |
+
messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
|
|
|
166 |
if tr := state.get("transcript"):
|
167 |
+
messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
|
168 |
if wr := state.get("wiki_result"):
|
169 |
+
messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
|
170 |
+
|
171 |
+
# 3) Add the interim answer under INTERIM_ANSWER
|
172 |
+
if ia := state.get("final_answer"):
|
173 |
+
messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {ia}"))
|
174 |
+
|
175 |
+
# 4) Prompt GPT to decide final or another tool
|
176 |
+
prompt = (
|
177 |
+
"You have a current draft answer (INTERIM_ANSWER) and possibly some tool results above.\n"
|
178 |
+
"If you are confident it’s correct, return exactly:\n"
|
179 |
+
" {\"final_answer\":\"<your final answer>\"}\n"
|
180 |
+
"and nothing else.\n"
|
181 |
+
"Otherwise, return exactly one of these JSON literals to fetch another tool:\n"
|
182 |
+
" {\"wiki_query\":\"<query for Wikipedia>\"}\n"
|
183 |
+
" {\"web_search_query\":\"<search terms>\"}\n"
|
184 |
+
" {\"ocr_path\":\"<image path or task_id>\"}\n"
|
185 |
+
" {\"excel_path\":\"<xls path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
|
186 |
+
" {\"audio_path\":\"<audio path or task_id>\"}\n"
|
187 |
+
"Do NOT wrap in markdown—return only the JSON object.\n"
|
188 |
)
|
189 |
+
messages_for_llm.append(SystemMessage(content=prompt))
|
190 |
+
llm_response = llm(messages_for_llm)
|
|
|
|
|
|
|
191 |
raw = llm_response.content.strip()
|
|
|
192 |
|
193 |
+
new_msgs = state["messages"] + [AIMessage(content=raw)]
|
194 |
try:
|
195 |
parsed = json.loads(raw)
|
196 |
+
if isinstance(parsed, dict):
|
197 |
+
partial: AgentState = {"messages": new_msgs}
|
198 |
+
allowed = {
|
199 |
+
"final_answer",
|
200 |
+
"wiki_query",
|
201 |
+
"web_search_query",
|
202 |
+
"ocr_path",
|
203 |
+
"excel_path",
|
204 |
+
"excel_sheet_name",
|
205 |
+
"audio_path"
|
206 |
+
}
|
207 |
+
for k, v in parsed.items():
|
208 |
+
if k in allowed:
|
209 |
+
partial[k] = v
|
210 |
+
return partial
|
211 |
+
except json.JSONDecodeError:
|
212 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
+
return {
|
215 |
+
"messages": new_msgs,
|
216 |
+
"final_answer": "ERROR: could not parse inspect decision."
|
217 |
+
}
|
218 |
|
|
|
219 |
|
220 |
+
# ─── 6) finalize_node ───
|
221 |
+
def finalize_node(state: AgentState) -> AgentState:
|
222 |
+
"""
|
223 |
+
If state already has "final_answer", return it. Otherwise, gather all tool outputs
|
224 |
+
and ask GPT for a final answer. But in our cyclic design, finalize_node is only called
|
225 |
+
after plan_node or inspect_node returned "final_answer".
|
226 |
+
"""
|
227 |
+
if fa := state.get("final_answer"):
|
228 |
+
return {"final_answer": fa}
|
229 |
+
# (In practice, we never reach here because we always pick finalize only when "final_answer" exists.)
|
230 |
+
return {"final_answer": "ERROR: finalize called without a final_answer."}
|
231 |
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
+
# ─── 7) Build the graph and wire edges ───
|
234 |
graph = StateGraph(AgentState)
|
235 |
|
236 |
+
# Register nodes
|
237 |
graph.add_node("plan", plan_node)
|
238 |
graph.add_node("store_prev_state", store_prev_state)
|
239 |
graph.add_node("tools", tool_node)
|
240 |
graph.add_node("merge_tool_output", merge_tool_output)
|
241 |
+
graph.add_node("inspect", inspect_node)
|
242 |
graph.add_node("finalize", finalize_node)
|
243 |
|
244 |
+
# START → plan
|
245 |
graph.add_edge(START, "plan")
|
246 |
|
247 |
+
# plan → either finalize (if plan set final_answer) or store_prev_state (if plan wants a tool)
|
248 |
def route_plan(plan_out: AgentState) -> str:
|
249 |
+
if plan_out.get("final_answer") is not None:
|
250 |
+
return "finalize"
|
251 |
+
return "store_prev_state"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
graph.add_conditional_edges(
|
254 |
"plan",
|
255 |
route_plan,
|
256 |
+
{"store_prev_state": "store_prev_state", "finalize": "finalize"}
|
257 |
)
|
258 |
|
259 |
+
# store_prev_state → tools
|
260 |
graph.add_edge("store_prev_state", "tools")
|
261 |
|
262 |
+
# tools → merge_tool_output
|
263 |
graph.add_edge("tools", "merge_tool_output")
|
264 |
|
265 |
+
# merge_tool_output → inspect
|
266 |
+
graph.add_edge("merge_tool_output", "inspect")
|
267 |
+
|
268 |
+
# inspect → either finalize (if inspect set final_answer) or store_prev_state (if inspect wants another tool)
|
269 |
+
def route_inspect(inspect_out: AgentState) -> str:
|
270 |
+
if inspect_out.get("final_answer") is not None:
|
271 |
+
return "finalize"
|
272 |
+
return "store_prev_state"
|
273 |
+
|
274 |
+
graph.add_conditional_edges(
|
275 |
+
"inspect",
|
276 |
+
route_inspect,
|
277 |
+
{"store_prev_state": "store_prev_state", "finalize": "finalize"}
|
278 |
+
)
|
279 |
|
280 |
+
# finalize → END
|
281 |
graph.add_edge("finalize", END)
|
282 |
|
283 |
compiled_graph = graph.compile()
|
284 |
|
285 |
|
286 |
+
# ─── 8) respond_to_input ───
|
287 |
+
def respond_to_input(user_input: str) -> str:
|
288 |
"""
|
289 |
+
Seed state['messages'] with a SystemMessage + HumanMessage(user_input),
|
290 |
+
then invoke the cyclic graph. Return the final_answer from the resulting state.
|
291 |
"""
|
292 |
system_msg = SystemMessage(
|
293 |
+
content=(
|
294 |
+
"You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
|
295 |
+
"Tools available:\n"
|
296 |
+
" • Wikipedia: set {\"wiki_query\":\"<search terms>\"}\n"
|
297 |
+
" • Web search: set {\"web_search_query\":\"<search terms>\"}\n"
|
298 |
+
" • OCR: set {\"ocr_path\":\"<image path or task_id>\"}\n"
|
299 |
+
" • Excel: set {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet>\"}\n"
|
300 |
+
" • Audio transcription: set {\"audio_path\":\"<audio path or task_id>\"}\n"
|
301 |
+
"If you can answer immediately, set {\"final_answer\":\"<answer>\"}. "
|
302 |
+
"Respond with only one JSON object and no extra formatting."
|
303 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
)
|
|
|
|
|
305 |
human_msg = HumanMessage(content=user_input)
|
306 |
|
307 |
+
initial_state: AgentState = {"messages": [system_msg, human_msg]}
|
308 |
final_state = compiled_graph.invoke(initial_state)
|
309 |
return final_state.get("final_answer", "Error: No final answer generated.")
|
310 |
|
old_app_copy.py
ADDED
@@ -0,0 +1,504 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import requests
|
4 |
+
import inspect
|
5 |
+
import pandas as pd
|
6 |
+
from langgraph.prebuilt import ToolNode
|
7 |
+
|
8 |
+
|
9 |
+
# from typing import Any, Dict
|
10 |
+
# from typing import TypedDict, Annotated
|
11 |
+
|
12 |
+
from langchain_openai import ChatOpenAI
|
13 |
+
from langgraph.graph import StateGraph, START, END
|
14 |
+
from langgraph.graph.message import add_messages
|
15 |
+
from langchain.schema import HumanMessage, SystemMessage, AIMessage
|
16 |
+
# Create a ToolNode that knows about your web_search function
|
17 |
+
import json
|
18 |
+
from state import AgentState
|
19 |
+
|
20 |
+
# --- Constants ---
|
21 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
22 |
+
|
23 |
+
from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
|
24 |
+
|
25 |
+
llm = ChatOpenAI(model_name="gpt-4o-mini")
|
26 |
+
|
27 |
+
# agent = create_react_agent(model=llm, tools=tool_node)
|
28 |
+
|
29 |
+
def plan_node(state: AgentState) -> AgentState:
|
30 |
+
"""
|
31 |
+
This plan_node will ask GPT to:
|
32 |
+
1) First write a concise *direct* answer.
|
33 |
+
2) Then decide whether it’s confident enough to stop (return {"final_answer": ...})
|
34 |
+
or if it needs to verify via one tool (return exactly one of {"wiki_query":...},
|
35 |
+
{"web_search_query":...}, {"ocr_path":...}, {"excel_path":...,"excel_sheet_name":...}, or {"audio_path":...}).
|
36 |
+
"""
|
37 |
+
prior_msgs = state.get("messages", [])
|
38 |
+
user_input = ""
|
39 |
+
for msg in reversed(prior_msgs):
|
40 |
+
if isinstance(msg, HumanMessage):
|
41 |
+
user_input = msg.content
|
42 |
+
break
|
43 |
+
|
44 |
+
# (1) Build a fresh SystemMessage that tells the LLM exactly how to self‐evaluate
|
45 |
+
system_msg = SystemMessage(
|
46 |
+
content=(
|
47 |
+
"You are an agent that must do two things in a single JSON output:\n\n"
|
48 |
+
" 1) Produce a concise, direct answer to the user’s question (no explanation, just the answer). \n"
|
49 |
+
" 2) Judge whether that answer is reliable. \n"
|
50 |
+
" • If you are fully confident and do NOT need any external verification, return exactly:\n"
|
51 |
+
" {\"final_answer\":\"<your concise answer>\"}\n"
|
52 |
+
" and nothing else.\n"
|
53 |
+
" • If you think you need to verify or look something up first, return exactly one of the following (and nothing else):\n"
|
54 |
+
" {\"wiki_query\":\"<search terms for Wikipedia>\"}\n"
|
55 |
+
" {\"web_search_query\":\"<search terms>\"}\n"
|
56 |
+
" {\"ocr_path\":\"<local image path or task_id>\"}\n"
|
57 |
+
" {\"excel_path\":\"<local .xlsx path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
|
58 |
+
" {\"audio_path\":\"<local audio path or task_id>\"}\n\n"
|
59 |
+
" You must pick exactly one key—either final_answer or exactly one tool key.\n"
|
60 |
+
" Do NOT wrap it in any markdown or extra text. Only output a single JSON object.\n"
|
61 |
+
"\n"
|
62 |
+
f"User’s question: \"{user_input}\"\n"
|
63 |
+
)
|
64 |
+
)
|
65 |
+
human_msg = HumanMessage(content=user_input)
|
66 |
+
|
67 |
+
# (2) Call the LLM with this single system/human pair
|
68 |
+
llm_response = llm([system_msg, human_msg])
|
69 |
+
llm_out = llm_response.content.strip()
|
70 |
+
|
71 |
+
# (3) Append the LLM output into the message history
|
72 |
+
ai_msg = AIMessage(content=llm_out)
|
73 |
+
new_msgs = prior_msgs.copy() + [ai_msg]
|
74 |
+
|
75 |
+
# (4) Attempt to parse that JSON
|
76 |
+
try:
|
77 |
+
parsed = json.loads(llm_out)
|
78 |
+
if isinstance(parsed, dict):
|
79 |
+
partial: AgentState = {"messages": new_msgs}
|
80 |
+
allowed_keys = {
|
81 |
+
"final_answer",
|
82 |
+
"wiki_query",
|
83 |
+
"web_search_query",
|
84 |
+
"ocr_path",
|
85 |
+
"excel_path",
|
86 |
+
"excel_sheet_name",
|
87 |
+
"audio_path"
|
88 |
+
}
|
89 |
+
for k, v in parsed.items():
|
90 |
+
if k in allowed_keys:
|
91 |
+
partial[k] = v
|
92 |
+
return partial
|
93 |
+
except json.JSONDecodeError:
|
94 |
+
pass
|
95 |
+
|
96 |
+
# (5) If parsing failed, fall back to a safe “sorry” answer
|
97 |
+
return {
|
98 |
+
"messages": new_msgs,
|
99 |
+
"final_answer": "Sorry, I could not parse your intent."
|
100 |
+
}
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
# ─── 3) Revised finalize_node ───
|
105 |
+
def finalize_node(state: AgentState) -> AgentState:
|
106 |
+
if state.get("final_answer") is not None:
|
107 |
+
return {"final_answer": state["final_answer"]}
|
108 |
+
|
109 |
+
# Re‐extract the last user question
|
110 |
+
question = ""
|
111 |
+
for msg in reversed(state.get("messages", [])):
|
112 |
+
if isinstance(msg, HumanMessage):
|
113 |
+
question = msg.content
|
114 |
+
break
|
115 |
+
|
116 |
+
# Build one monolithic context
|
117 |
+
combined = f"USER_QUESTION: {question}\n"
|
118 |
+
if sr := state.get("web_search_result"):
|
119 |
+
combined += f"WEB_SEARCH_RESULT: {sr}\n"
|
120 |
+
if orc := state.get("ocr_result"):
|
121 |
+
combined += f"OCR_RESULT: {orc}\n"
|
122 |
+
if exr := state.get("excel_result"):
|
123 |
+
combined += f"EXCEL_RESULT: {exr}\n"
|
124 |
+
# Note: your code already stores the audio transcription under "transcript"
|
125 |
+
if tr := state.get("transcript"):
|
126 |
+
combined += f"AUDIO_TRANSCRIPT: {tr}\n"
|
127 |
+
if wr := state.get("wiki_result"):
|
128 |
+
combined += f"WIKIPEDIA_RESULT: {wr}\n"
|
129 |
+
|
130 |
+
# Here we demand a JSON response with a single key "final_answer"
|
131 |
+
combined += (
|
132 |
+
"Based on the above, respond with exactly one JSON object, and nothing else. "
|
133 |
+
"The JSON object must have exactly one key: \"final_answer\". "
|
134 |
+
"For example:\n"
|
135 |
+
"{\"final_answer\":\"42\"}\n"
|
136 |
+
"Do NOT include any explanation, markdown, or any extra whitespace outside the JSON object. "
|
137 |
+
"If the answer is multiple words, put them in a comma-separated string, e.g. \"red,green,blue\". "
|
138 |
+
"If the answer is a number, it must be digits only—e.g. \"725.00\".\n"
|
139 |
+
"If the answer is a list of items, put them in a comma-separated string, e.g. \"item1,item2,item3\". "
|
140 |
+
"If the user prompt asks you to do something, then do it "
|
141 |
+
)
|
142 |
+
|
143 |
+
# Debug print
|
144 |
+
# print("\n>>> finalize_node JSON‐strict prompt:\n" + combined + "\n<<< end prompt >>>\n")
|
145 |
+
|
146 |
+
llm_response = llm.invoke([SystemMessage(content=combined)])
|
147 |
+
raw = llm_response.content.strip()
|
148 |
+
# print(">>> finalize_node got raw response:", raw)
|
149 |
+
|
150 |
+
try:
|
151 |
+
parsed = json.loads(raw)
|
152 |
+
return {"final_answer": parsed["final_answer"]}
|
153 |
+
except Exception as e:
|
154 |
+
# If the LLM did not return valid JSON, store the error so you can see it
|
155 |
+
# print(">>> finalize_node JSON parse error:", e, "raw was:", raw)
|
156 |
+
return {"final_answer": f"ERROR: invalid JSON from finalize_node: {raw}"}
|
157 |
+
|
158 |
+
# ─── 4) Wrap tools in a ToolNode ───
|
159 |
+
def tool_node(state: AgentState) -> AgentState:
|
160 |
+
"""
|
161 |
+
Inspect exactly which tool‐key was set in `state` and call that function.
|
162 |
+
Returns only the partial state (with the tool's outputs) so that merge_tool_output can combine it.
|
163 |
+
"""
|
164 |
+
# We expect exactly one of these keys to be non‐empty:
|
165 |
+
# "web_search_query", "ocr_path", "excel_path"/"excel_sheet_name", "audio_path"
|
166 |
+
# Whichever is present, call the corresponding tool and return its result.
|
167 |
+
|
168 |
+
if state.get("wiki_query"):
|
169 |
+
out = wikipedia_search_tool(state)
|
170 |
+
return out
|
171 |
+
|
172 |
+
if state.get("web_search_query"):
|
173 |
+
# print(f">>> tools_node dispatching web_search_tool with query: {state['web_search_query']!r}")
|
174 |
+
out = web_search_tool(state)
|
175 |
+
return out
|
176 |
+
|
177 |
+
if state.get("ocr_path"):
|
178 |
+
# print(f">>> tools_node dispatching ocr_image_tool with path: {state['ocr_path']!r}")
|
179 |
+
out = ocr_image_tool(state)
|
180 |
+
return out
|
181 |
+
|
182 |
+
if state.get("excel_path"):
|
183 |
+
# We assume plan_node always sets both excel_path and excel_sheet_name together
|
184 |
+
# print(f">>> tools_node dispatching parse_excel_tool with path: {state['excel_path']!r}, sheet: {state.get('excel_sheet_name')!r}")
|
185 |
+
out = parse_excel_tool(state)
|
186 |
+
return out
|
187 |
+
|
188 |
+
if state.get("audio_path"):
|
189 |
+
# print(f">>> tools_node dispatching audio_transcriber_tool with path: {state['audio_path']!r}")
|
190 |
+
out = audio_transcriber_tool(state)
|
191 |
+
return out
|
192 |
+
|
193 |
+
|
194 |
+
|
195 |
+
# If we somehow reach here, no recognized tool key was set:
|
196 |
+
# print(">>> tools_node: no valid tool key found in state!")
|
197 |
+
return {}
|
198 |
+
|
199 |
+
|
200 |
+
# Add a node to store the previous state
|
201 |
+
|
202 |
+
def store_prev_state(state: AgentState) -> AgentState:
|
203 |
+
return {**state, "prev_state": state.copy()}
|
204 |
+
|
205 |
+
def merge_tool_output(state: AgentState) -> AgentState:
|
206 |
+
prev_state = state.get("prev_state", {})
|
207 |
+
merged = {**prev_state, **state}
|
208 |
+
merged.pop("prev_state", None)
|
209 |
+
return merged
|
210 |
+
|
211 |
+
# ─── 5) Build the graph ───
|
212 |
+
graph = StateGraph(AgentState)
|
213 |
+
|
214 |
+
# 5.a) Register nodes
|
215 |
+
graph.add_node("plan", plan_node)
|
216 |
+
graph.add_node("store_prev_state", store_prev_state)
|
217 |
+
graph.add_node("tools", tool_node)
|
218 |
+
graph.add_node("merge_tool_output", merge_tool_output)
|
219 |
+
graph.add_node("finalize", finalize_node)
|
220 |
+
|
221 |
+
# 5.b) Wire START → plan
|
222 |
+
graph.add_edge(START, "plan")
|
223 |
+
|
224 |
+
# 5.c) plan → conditional: if any tool key was set, go to "tools"; otherwise "finalize"
|
225 |
+
def route_plan(plan_out: AgentState) -> str:
|
226 |
+
# print what keys are present in plan_out
|
227 |
+
# print(f">> route_plan sees plan_out keys: {list(plan_out.keys())}")
|
228 |
+
|
229 |
+
if (
|
230 |
+
plan_out.get("web_search_query")
|
231 |
+
or plan_out.get("ocr_path")
|
232 |
+
or plan_out.get("excel_path")
|
233 |
+
or plan_out.get("audio_path")
|
234 |
+
or plan_out.get("wiki_query")
|
235 |
+
):
|
236 |
+
# print(">> route_plan ➡️ tools")
|
237 |
+
return "tools"
|
238 |
+
# print(">> route_plan ➡️ finalize")
|
239 |
+
return "finalize"
|
240 |
+
|
241 |
+
|
242 |
+
graph.add_conditional_edges(
|
243 |
+
"plan",
|
244 |
+
route_plan,
|
245 |
+
{"tools": "store_prev_state", "finalize": "finalize"}
|
246 |
+
)
|
247 |
+
|
248 |
+
# 5.d) store_prev_state → tools
|
249 |
+
graph.add_edge("store_prev_state", "tools")
|
250 |
+
|
251 |
+
# 5.e) tools → merge_tool_output
|
252 |
+
graph.add_edge("tools", "merge_tool_output")
|
253 |
+
|
254 |
+
# 5.f) merge_tool_output → finalize
|
255 |
+
graph.add_edge("merge_tool_output", "finalize")
|
256 |
+
|
257 |
+
# 5.g) finalize → END
|
258 |
+
graph.add_edge("finalize", END)
|
259 |
+
|
260 |
+
compiled_graph = graph.compile()
|
261 |
+
|
262 |
+
|
263 |
+
# ���── 6) respond_to_input ───
|
264 |
+
def respond_to_input(user_input: str, task_id) -> str:
|
265 |
+
"""
|
266 |
+
Seed state['messages'] with a SystemMessage (tools description) + HumanMessage(user_input).
|
267 |
+
Then invoke the graph; return the final_answer from the resulting state.
|
268 |
+
"""
|
269 |
+
system_msg = SystemMessage(
|
270 |
+
content=(
|
271 |
+
"You are an agent that must choose exactly one of the following actions:\n"
|
272 |
+
" 1) If the user's question can be answered directly by consulting Wikipedia, return exactly:\n"
|
273 |
+
" {\"wiki_query\":\"<search terms for Wikipedia>\"}\n"
|
274 |
+
" and nothing else. Use Wikipedia before any other tool.\n"
|
275 |
+
" 2) Only if Wikipedia cannot directly answer, perform a web search and return:\n"
|
276 |
+
" {\"web_search_query\":\"<search terms>\"}\n"
|
277 |
+
" and nothing else.\n"
|
278 |
+
" 3) If the user's question requires extracting text from an image, return:\n"
|
279 |
+
" {\"ocr_path\":\"<local image path>\"}\n"
|
280 |
+
" and nothing else.\n"
|
281 |
+
" 4) If the user's question requires reading a spreadsheet, return:\n"
|
282 |
+
" {\"excel_path\":\"<local .xlsx path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
|
283 |
+
" and nothing else.\n"
|
284 |
+
" 5) If the user needs an audio transcription, return:\n"
|
285 |
+
" {\"audio_path\":\"<local audio file path>\"}\n"
|
286 |
+
" and nothing else.\n"
|
287 |
+
" 6) If you already know the answer without using any tool, return exactly:\n"
|
288 |
+
" {\"final_answer\":\"<your concise answer>\"}\n"
|
289 |
+
" and nothing else.\n"
|
290 |
+
"If the user's prompt explicitly tells you to perform a specific action (for example, “translate this sentence”), then do it directly and return your result as {\"final_answer\":\"<your answer>\"} or the appropriate tool key if needed. \n"
|
291 |
+
"Do NOT include any additional keys, explanation, or markdown—only one JSON object with exactly one key."
|
292 |
+
)
|
293 |
+
)
|
294 |
+
|
295 |
+
human_msg = HumanMessage(content=user_input)
|
296 |
+
|
297 |
+
initial_state: AgentState = {"messages": [system_msg, human_msg], "task_id": task_id}
|
298 |
+
final_state = compiled_graph.invoke(initial_state)
|
299 |
+
return final_state.get("final_answer", "Error: No final answer generated.")
|
300 |
+
|
301 |
+
|
302 |
+
|
303 |
+
|
304 |
+
class BasicAgent:
|
305 |
+
def __init__(self):
|
306 |
+
print("BasicAgent initialized.")
|
307 |
+
def __call__(self, question: str, task_id) -> str:
|
308 |
+
# print(f"Agent received question (first 50 chars): {question[:50]}...")
|
309 |
+
# fixed_answer = "This is a default answer."
|
310 |
+
# print(f"Agent returning fixed answer: {fixed_answer}")
|
311 |
+
print()
|
312 |
+
print()
|
313 |
+
print()
|
314 |
+
print()
|
315 |
+
|
316 |
+
|
317 |
+
print(f"Agent received question: {question}")
|
318 |
+
print()
|
319 |
+
return respond_to_input(question, task_id)
|
320 |
+
# return fixed_answer
|
321 |
+
|
322 |
+
|
323 |
+
|
324 |
+
|
325 |
+
|
326 |
+
|
327 |
+
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
328 |
+
"""
|
329 |
+
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
330 |
+
and displays the results.
|
331 |
+
"""
|
332 |
+
# --- Determine HF Space Runtime URL and Repo URL ---
|
333 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
334 |
+
|
335 |
+
if profile:
|
336 |
+
username= f"{profile.username}"
|
337 |
+
print(f"User logged in: {username}")
|
338 |
+
else:
|
339 |
+
print("User not logged in.")
|
340 |
+
return "Please Login to Hugging Face with the button.", None
|
341 |
+
|
342 |
+
api_url = DEFAULT_API_URL
|
343 |
+
questions_url = f"{api_url}/questions"
|
344 |
+
submit_url = f"{api_url}/submit"
|
345 |
+
|
346 |
+
# 1. Instantiate Agent ( modify this part to create your agent)
|
347 |
+
try:
|
348 |
+
agent = BasicAgent()
|
349 |
+
except Exception as e:
|
350 |
+
print(f"Error instantiating agent: {e}")
|
351 |
+
return f"Error initializing agent: {e}", None
|
352 |
+
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
353 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
354 |
+
print(agent_code)
|
355 |
+
|
356 |
+
# 2. Fetch Questions
|
357 |
+
print(f"Fetching questions from: {questions_url}")
|
358 |
+
try:
|
359 |
+
response = requests.get(questions_url, timeout=15)
|
360 |
+
response.raise_for_status()
|
361 |
+
questions_data = response.json()
|
362 |
+
if not questions_data:
|
363 |
+
print("Fetched questions list is empty.")
|
364 |
+
return "Fetched questions list is empty or invalid format.", None
|
365 |
+
print(f"Fetched {len(questions_data)} questions.")
|
366 |
+
except requests.exceptions.RequestException as e:
|
367 |
+
print(f"Error fetching questions: {e}")
|
368 |
+
return f"Error fetching questions: {e}", None
|
369 |
+
except requests.exceptions.JSONDecodeError as e:
|
370 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
371 |
+
print(f"Response text: {response.text[:500]}")
|
372 |
+
return f"Error decoding server response for questions: {e}", None
|
373 |
+
except Exception as e:
|
374 |
+
print(f"An unexpected error occurred fetching questions: {e}")
|
375 |
+
return f"An unexpected error occurred fetching questions: {e}", None
|
376 |
+
|
377 |
+
# 3. Run your Agent
|
378 |
+
|
379 |
+
results_log = []
|
380 |
+
answers_payload = []
|
381 |
+
print(f"Running agent on {len(questions_data)} questions...")
|
382 |
+
for item in questions_data:
|
383 |
+
task_id = item.get("task_id")
|
384 |
+
question_text = item.get("question")
|
385 |
+
if not task_id or question_text is None:
|
386 |
+
print(f"Skipping item with missing task_id or question: {item}")
|
387 |
+
continue
|
388 |
+
try:
|
389 |
+
submitted_answer = agent(question_text, task_id)
|
390 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
391 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
392 |
+
except Exception as e:
|
393 |
+
print(f"Error running agent on task {task_id}: {e}")
|
394 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
395 |
+
|
396 |
+
if not answers_payload:
|
397 |
+
print("Agent did not produce any answers to submit.")
|
398 |
+
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
399 |
+
|
400 |
+
# 4. Prepare Submission
|
401 |
+
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
402 |
+
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
403 |
+
print(status_update)
|
404 |
+
|
405 |
+
# 5. Submit
|
406 |
+
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
407 |
+
try:
|
408 |
+
response = requests.post(submit_url, json=submission_data, timeout=60)
|
409 |
+
response.raise_for_status()
|
410 |
+
result_data = response.json()
|
411 |
+
final_status = (
|
412 |
+
f"Submission Successful!\n"
|
413 |
+
f"User: {result_data.get('username')}\n"
|
414 |
+
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
415 |
+
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
416 |
+
f"Message: {result_data.get('message', 'No message received.')}"
|
417 |
+
)
|
418 |
+
print("Submission successful.")
|
419 |
+
results_df = pd.DataFrame(results_log)
|
420 |
+
return final_status, results_df
|
421 |
+
except requests.exceptions.HTTPError as e:
|
422 |
+
error_detail = f"Server responded with status {e.response.status_code}."
|
423 |
+
try:
|
424 |
+
error_json = e.response.json()
|
425 |
+
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
426 |
+
except requests.exceptions.JSONDecodeError:
|
427 |
+
error_detail += f" Response: {e.response.text[:500]}"
|
428 |
+
status_message = f"Submission Failed: {error_detail}"
|
429 |
+
print(status_message)
|
430 |
+
results_df = pd.DataFrame(results_log)
|
431 |
+
return status_message, results_df
|
432 |
+
except requests.exceptions.Timeout:
|
433 |
+
status_message = "Submission Failed: The request timed out."
|
434 |
+
print(status_message)
|
435 |
+
results_df = pd.DataFrame(results_log)
|
436 |
+
return status_message, results_df
|
437 |
+
except requests.exceptions.RequestException as e:
|
438 |
+
status_message = f"Submission Failed: Network error - {e}"
|
439 |
+
print(status_message)
|
440 |
+
results_df = pd.DataFrame(results_log)
|
441 |
+
return status_message, results_df
|
442 |
+
except Exception as e:
|
443 |
+
status_message = f"An unexpected error occurred during submission: {e}"
|
444 |
+
print(status_message)
|
445 |
+
results_df = pd.DataFrame(results_log)
|
446 |
+
return status_message, results_df
|
447 |
+
|
448 |
+
|
449 |
+
# --- Build Gradio Interface using Blocks ---
|
450 |
+
with gr.Blocks() as demo:
|
451 |
+
gr.Markdown("# Basic Agent Evaluation Runner")
|
452 |
+
gr.Markdown(
|
453 |
+
"""
|
454 |
+
**Instructions:**
|
455 |
+
|
456 |
+
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
|
457 |
+
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
458 |
+
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
459 |
+
|
460 |
+
---
|
461 |
+
**Disclaimers:**
|
462 |
+
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
463 |
+
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
|
464 |
+
"""
|
465 |
+
)
|
466 |
+
|
467 |
+
gr.LoginButton()
|
468 |
+
|
469 |
+
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
470 |
+
|
471 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
472 |
+
# Removed max_rows=10 from DataFrame constructor
|
473 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
474 |
+
|
475 |
+
run_button.click(
|
476 |
+
fn=run_and_submit_all,
|
477 |
+
outputs=[status_output, results_table]
|
478 |
+
)
|
479 |
+
|
480 |
+
if __name__ == "__main__":
|
481 |
+
# print("LangGraph version:", langgraph.__version__)
|
482 |
+
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
483 |
+
# Check for SPACE_HOST and SPACE_ID at startup for information
|
484 |
+
space_host_startup = os.getenv("SPACE_HOST")
|
485 |
+
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
486 |
+
# import langgraph
|
487 |
+
# print("▶︎ LangGraph version:", langgraph.__version__)
|
488 |
+
if space_host_startup:
|
489 |
+
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
490 |
+
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
491 |
+
else:
|
492 |
+
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
493 |
+
|
494 |
+
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
495 |
+
print(f"✅ SPACE_ID found: {space_id_startup}")
|
496 |
+
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
497 |
+
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
498 |
+
else:
|
499 |
+
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
500 |
+
|
501 |
+
print("-"*(60 + len(" App Starting ")) + "\n")
|
502 |
+
|
503 |
+
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
504 |
+
demo.launch(debug=True, share=False)
|