naman1102 commited on
Commit
4f25f4e
·
1 Parent(s): 4ad5e89
Files changed (7) hide show
  1. app.py +145 -334
  2. old2app.py +587 -0
  3. old2state.py +22 -0
  4. old2tools.py +422 -0
  5. old_app_copy.py +2 -2
  6. state.py +22 -21
  7. tools.py +99 -199
app.py CHANGED
@@ -1,17 +1,9 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- from langgraph.prebuilt import ToolNode
7
-
8
-
9
- # from typing import Any, Dict
10
- # from typing import TypedDict, Annotated
11
-
12
  from langchain_openai import ChatOpenAI
13
  from langgraph.graph import StateGraph, START, END
14
- from langgraph.graph.message import add_messages
15
  from langchain.schema import HumanMessage, SystemMessage, AIMessage
16
  # Create a ToolNode that knows about your web_search function
17
  import json
@@ -20,369 +12,188 @@ from state import AgentState
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
- from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
24
 
25
- llm = ChatOpenAI(model_name="gpt-4.1")
26
-
27
- # ─── 1) plan_node ───
28
- # ─── 1) plan_node ───
29
- tool_counter = 0
30
 
 
31
 
32
- # ─── 1) plan_node ───
33
- def plan_node(state: AgentState) -> AgentState:
34
- """
35
- Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
36
- then decide if it's confident enough to stop or if it needs one tool.
37
- If confident: return {"final_answer":"<answer>"}
38
- Otherwise: return exactly one of:
39
- {"wiki_query":"..."},
40
- {"ocr_path":"..."},
41
- {"excel_path":"...","excel_sheet_name":"..."},
42
- {"audio_path":"..."}
43
- """
44
- prior_msgs = state.get("messages", [])
45
- user_input = ""
46
- for msg in reversed(prior_msgs):
47
- if isinstance(msg, HumanMessage):
48
- user_input = msg.content
49
- break
50
-
51
- system_msg = SystemMessage(
52
- content=(
53
-
54
- "You are an agent that must do two things in one JSON output:\n\n"
55
- " 1) Provide a concise, direct answer to the user's question (no explanation).\n"
56
- " 2) Judge whether that answer is reliable:\n"
57
- " • If you are fully confident, return exactly:\n"
58
- " {\"final_answer\":\"<your concise answer>\"}\n"
59
- " and nothing else.\n"
60
- " • Otherwise, return exactly one of:\n"
61
- " {\"wiki_query\":\"<Wikipedia search>\"}\n"
62
- " {\"ocr_path\":\"<image path or task_id>\"}\n"
63
- " {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
64
- " {\"audio_path\":\"<audio path or task_id>\"}\n"
65
- " and nothing else.\n"
66
- "Do NOT wrap in markdown—output only a single JSON object.\n"
67
- f"User's question: \"{user_input}\"\n"
68
- )
69
- )
70
- human_msg = HumanMessage(content=user_input)
71
- llm_response = llm([system_msg, human_msg])
72
- llm_out = llm_response.content.strip()
73
 
74
- ai_msg = AIMessage(content=llm_out)
75
- new_msgs = prior_msgs.copy() + [ai_msg]
76
 
77
- try:
78
- parsed = json.loads(llm_out)
79
- if isinstance(parsed, dict):
80
- partial: AgentState = {"messages": new_msgs}
81
- allowed = {
82
- "final_answer",
83
- "wiki_query",
84
- "ocr_path",
85
- "excel_path",
86
- "excel_sheet_name",
87
- "audio_path",
88
- }
89
- for k, v in parsed.items():
90
- if k in allowed:
91
- partial[k] = v
92
- return partial
93
- except json.JSONDecodeError:
94
- pass
95
 
96
- return {
97
- "messages": new_msgs,
98
- "final_answer": "Sorry, I could not parse your intent.",
99
- }
100
 
 
101
 
102
- # ─── 2) store_prev_state ───
103
- def store_prev_state(state: AgentState) -> AgentState:
104
- return {**state, "prev_state": state.copy()}
 
 
 
105
 
106
 
107
- # ─── 3) tools_node ───
108
- def tool_node(state: AgentState) -> AgentState:
109
- """
110
- Dispatch exactly one tool based on which key was set:
111
- - wiki_query → wikipedia_search_tool
112
- - ocr_path → ocr_image_tool
113
- - excel_path → parse_excel_tool
114
- - audio_path → audio_transcriber_tool
115
- """
116
- global tool_counter
117
- if tool_counter >= 5:
118
- # If we've already run 5 tools, do nothing
119
- return {
120
- "messages": state["messages"],
121
- "final_answer": state.get("final_answer", "No interim answer available.")
122
- }
123
-
124
- tool_counter += 1
125
-
126
- if state.get("wiki_query"):
127
- return wikipedia_search_tool(state)
128
- if state.get("ocr_path"):
129
- return ocr_image_tool(state)
130
- if state.get("excel_path"):
131
- return parse_excel_tool(state)
132
- if state.get("audio_path"):
133
- return audio_transcriber_tool(state)
134
-
135
- return {} # no tool key present
136
-
137
-
138
- # ─── 4) merge_tool_output ───
139
- def merge_tool_output(state: AgentState) -> AgentState:
140
- """
141
- Combine previous state and tool output into one, but remove any stale request-keys.
142
- """
143
- prev = state.get("prev_state", {}).copy()
144
 
145
- # Drop stale request-keys in prev
146
- for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
147
- prev.pop(dead, None)
148
 
149
- merged = {**prev, **state}
150
- # Drop them again from merged so they don't persist into the next cycle
151
- for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
152
- merged.pop(dead, None)
153
 
154
- merged.pop("prev_state", None)
155
- return merged
156
 
 
157
 
158
- # ─── 5) inspect_node ───
159
- def inspect_node(state: AgentState) -> AgentState:
160
- """
161
- After running a tool, show GPT:
162
- - ORIGINAL user question
163
- - Any tool results (web_search_result, ocr_result, excel_result, transcript, wiki_result)
164
- - The INTERIM_ANSWER (always present if plan_node ran correctly)
165
-
166
- If tool_counter ≥ 5, use LLM once more (with full context) to craft a final answer.
167
- Otherwise, ask GPT to either:
168
- • Return {"final_answer":"<final>"} if done, OR
169
- • Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
170
- """
171
 
172
- global tool_counter
173
-
174
- # If we've already run 5 tools, ask GPT for a strictly‐formatted JSON final_answer
175
- if tool_counter >= 5:
176
- messages_for_llm = []
177
-
178
- # Re‐insert the user’s question
179
- question = ""
180
- for msg in reversed(state.get("messages", [])):
181
- if isinstance(msg, HumanMessage):
182
- question = msg.content
183
- break
184
- messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
185
-
186
- # Add any tool results so far
187
- if sr := state.get("web_search_result"):
188
- messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
189
- if orc := state.get("ocr_result"):
190
- messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
191
- if exr := state.get("excel_result"):
192
- messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
193
- if tr := state.get("transcript"):
194
- messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
195
- if wr := state.get("wiki_result"):
196
- messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
197
-
198
- # Show the interim answer
199
- interim = state.get("interim_answer", "")
200
- messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {interim}"))
201
-
202
- # Now ask for JSON ONLY (no reasoning, no extra text)
203
- final_prompt = (
204
- "Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
205
- "Using only the information above—including the USER_QUESTION, "
206
- "any TOOL_RESULT, and the INTERIM_ANSWER—produce a concise final answer. "
207
- "Return exactly one JSON object and nothing else, in this format:\n\n"
208
- "{\"final_answer\":\"<your final answer>\"}\n"
209
- "Do not include any other words or punctuation outside that JSON. if its numbers, dont show the units"
210
  )
211
- messages_for_llm.append(SystemMessage(content=final_prompt))
212
-
213
- llm_response = llm(messages_for_llm)
214
- raw = llm_response.content.strip()
215
- new_msgs = state["messages"] + [AIMessage(content=raw)]
216
-
217
- # Try to parse exactly one JSON with "final_answer"
218
- try:
219
- parsed = json.loads(raw)
220
- if isinstance(parsed, dict) and "final_answer" in parsed:
221
- return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
222
- except json.JSONDecodeError:
223
- pass
224
-
225
- # Fallback to returning the interim in case JSON parse fails
226
- return {"messages": new_msgs, "final_answer": interim}
227
- # ——————————— If tool_counter < 5, proceed as before ———————————
228
- messages_for_llm = []
229
-
230
- # (1) Re‐insert original user question
231
- question = ""
232
- for msg in reversed(state.get("messages", [])):
233
- if isinstance(msg, HumanMessage):
234
- question = msg.content
235
- break
236
- messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
237
-
238
- # (2) Add any tool results
239
- if sr := state.get("web_search_result"):
240
- messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
241
- if orc := state.get("ocr_result"):
242
- messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
243
- if exr := state.get("excel_result"):
244
- messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
245
- if tr := state.get("transcript"):
246
- messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
247
- if wr := state.get("wiki_result"):
248
- messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
249
-
250
- # (3) Always show the interim answer
251
- interim = state.get("interim_answer", "")
252
- messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {interim}"))
253
-
254
- # (4) Prompt GPT to decide final or another tool
255
- prompt = (
256
- "You have a current draft answer (INTERIM_ANSWER) and possibly some tool results above.\n"
257
- "If you are confident it’s correct, return exactly:\n"
258
- " {\"final_answer\":\"<your final answer>\"}\n"
259
- "and nothing else.\n"
260
- "Otherwise, return exactly one of these JSON literals to fetch another tool:\n"
261
- " {\"wiki_query\":\"<query for Wikipedia>\"}\n"
262
- " {\"ocr_path\":\"<image path or task_id>\"}\n"
263
- " {\"excel_path\":\"<xls path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
264
- " {\"audio_path\":\"<audio path or task_id>\"}\n"
265
- "Do NOT wrap in markdown—return only the JSON object.\n"
266
  )
267
- messages_for_llm.append(SystemMessage(content=prompt))
268
- llm_response = llm(messages_for_llm)
269
- raw = llm_response.content.strip()
270
- new_msgs = state["messages"] + [AIMessage(content=raw)]
271
-
272
- # Try to parse the LLM’s JSON
273
- try:
274
- parsed = json.loads(raw)
275
- if isinstance(parsed, dict):
276
- # (a) If GPT gave a final_answer, return immediately
277
- if "final_answer" in parsed:
278
- return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
279
-
280
- # (b) If GPT requested exactly one valid tool, return only that key
281
- valid_keys = {"wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"}
282
- requested_keys = set(parsed.keys()) & valid_keys
283
- if len(requested_keys) == 1:
284
- clean: AgentState = {"messages": new_msgs}
285
- for k in requested_keys:
286
- clean[k] = parsed[k]
287
- return clean
288
- except json.JSONDecodeError:
289
- pass
290
-
291
- # (c) Fallback: if GPT never returned a valid tool key or a final_answer,
292
- # just finalize with the existing interim_answer
293
- return {"messages": new_msgs, "final_answer": interim}
294
-
295
-
296
- # ─── 6) finalize_node ───
297
- def finalize_node(state: AgentState) -> AgentState:
298
- """
299
- If state already has "final_answer", return it. Otherwise, it's an error.
300
- """
301
- if fa := state.get("final_answer"):
302
- return {"final_answer": fa}
303
- return {"final_answer": "ERROR: finalize called without a final_answer."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
 
305
 
306
- # ─── 7) Build the graph and wire edges ───
307
  graph = StateGraph(AgentState)
308
 
309
  # Register nodes
310
- graph.add_node("plan", plan_node)
311
- graph.add_node("store_prev_state", store_prev_state)
312
- graph.add_node("tools", tool_node)
313
- graph.add_node("merge_tool_output", merge_tool_output)
314
- graph.add_node("inspect", inspect_node)
315
- graph.add_node("finalize", finalize_node)
316
-
317
- # START → plan
318
- graph.add_edge(START, "plan")
319
-
320
- # plan → either finalize (if plan set final_answer) or store_prev_state (if plan wants a tool)
321
- def route_plan(plan_out: AgentState) -> str:
322
- if plan_out.get("final_answer") is not None:
323
- return "finalize"
324
- return "store_prev_state"
 
 
 
 
325
 
326
  graph.add_conditional_edges(
327
- "plan",
328
- route_plan,
329
- {"store_prev_state": "store_prev_state", "finalize": "finalize"},
 
 
 
 
 
 
330
  )
331
 
332
- # store_prev_state tools
333
- graph.add_edge("store_prev_state", "tools")
 
334
 
335
- # toolsmerge_tool_output
336
- graph.add_edge("tools", "merge_tool_output")
 
 
 
 
 
 
 
 
 
 
 
337
 
338
- # merge_tool_output → inspect
339
- graph.add_edge("merge_tool_output", "inspect")
340
 
341
- # inspect → either finalize (if inspect set final_answer) or store_prev_state (if inspect wants another tool)
342
- def route_inspect(inspect_out: AgentState) -> str:
343
- if inspect_out.get("final_answer") is not None:
344
- return "finalize"
345
- return "store_prev_state"
346
 
347
- graph.add_conditional_edges(
348
- "inspect",
349
- route_inspect,
350
- {"store_prev_state": "store_prev_state", "finalize": "finalize"},
351
- )
352
 
353
- # finalize → END
354
- graph.add_edge("finalize", END)
355
 
356
- compiled_graph = graph.compile()
357
 
358
 
359
- # ─── 8) respond_to_input ───
360
- def respond_to_input(user_input: str, task_id) -> str:
361
- """
362
- Reset the global tool_counter, seed state['messages'], invoke the graph,
363
- and return the final_answer.
364
- """
365
- global tool_counter
366
- tool_counter = 0 # Reset on every new user query
367
 
368
- system_msg = SystemMessage(
369
- content=(
370
- "You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
371
- "Try not to use tools so many times. If you think you can answer the question without using a tool, do it Please.\n"
372
- "Tools available:\n"
373
- " • Wikipedia: set {\"wiki_query\":\"<search terms>\"}\n"
374
- " • OCR: set {\"ocr_path\":\"<image path or task_id>\"}\n"
375
- " • Excel: set {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet>\"}\n"
376
- " • Audio transcription: set {\"audio_path\":\"<audio path or task_id>\"}\n"
377
- "If you can answer immediately, set {\"final_answer\":\"<answer>\"}. "
378
- "Respond with only one JSON object and no extra formatting."
379
- )
380
- )
381
- human_msg = HumanMessage(content=user_input)
382
 
383
- initial_state: AgentState = {"messages": [system_msg, human_msg], "task_id": task_id}
384
- final_state = compiled_graph.invoke(initial_state)
385
- return final_state.get("final_answer", "Error: No final answer generated.")
386
 
387
  class BasicAgent:
388
  def __init__(self):
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
 
 
 
 
 
 
5
  from langchain_openai import ChatOpenAI
6
  from langgraph.graph import StateGraph, START, END
 
7
  from langchain.schema import HumanMessage, SystemMessage, AIMessage
8
  # Create a ToolNode that knows about your web_search function
9
  import json
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
 
15
 
16
+ from __future__ import annotations
 
 
 
 
17
 
18
+ import json
19
 
20
+ from typing import Any, Dict, List, Optional
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
 
22
 
23
+ # ─────────────────────────── External tools ──────────────────────────────
24
+ from tools import (
25
+ wikipedia_search_tool,
26
+ ocr_image_tool,
27
+ audio_transcriber_tool,
28
+ parse_excel_tool
29
+ )
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # ─────────────────────────── Configuration ───────────────────────────────
32
+ LLM = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.0)
33
+ MAX_TOOL_CALLS = 5
 
34
 
35
+ # ─────────────────────────── Helper utilities ────────────────────────────
36
 
37
+ def safe_json(text: str) -> Optional[Dict[str, Any]]:
38
+ try:
39
+ obj = json.loads(text.strip())
40
+ return obj if isinstance(obj, dict) else None
41
+ except json.JSONDecodeError:
42
+ return None
43
 
44
 
45
+ def brief(d: Dict[str, Any]) -> str:
46
+ for k in ("wiki_result", "ocr_result", "transcript"):
47
+ if k in d:
48
+ return f"{k}: {str(d[k])[:160].replace('\n', ' ')}…"
49
+ return "(no output)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # ─────────────────────────── Agent state ───────────────────────────────
 
 
52
 
 
 
 
 
53
 
 
 
54
 
55
+ # ───────────────────────────── Nodes ⬇ ───────────────────────────────────
56
 
57
+ def tool_selector(state: AgentState) -> AgentState:
58
+ """Ask the LLM what to do next (wiki / ocr / audio / excel / final)."""
59
+ if state.tool_calls >= MAX_TOOL_CALLS:
60
+ state.add(SystemMessage(content="You have reached the maximum number of tool calls. Use the already gathered information to answer the question."))
61
+ state.next_action = "final"
62
+ return state
 
 
 
 
 
 
 
63
 
64
+ prompt = SystemMessage(
65
+ content=(
66
+ "Reply with ONE JSON only (no markdown). Choices:\n"
67
+ " {'action':'wiki','query':'…'}\n"
68
+ " {'action':'ocr'}\n"
69
+ " {'action':'audio'}\n"
70
+ " {'action':'excel'}\n"
71
+ " {'action':'final'}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  )
74
+ raw = LLM(state.messages + [prompt]).content.strip()
75
+ state.add(AIMessage(content=raw))
76
+ parsed = safe_json(raw)
77
+ if not parsed or "action" not in parsed:
78
+ state.next_action = "final"
79
+ return state
80
+
81
+ state.next_action = parsed["action"]
82
+ state.query = parsed.get("query")
83
+ return state
84
+
85
+ # ------------- tool adapters -------------
86
+
87
+ def wiki_tool(state: AgentState) -> AgentState:
88
+ out = wikipedia_search_tool({"wiki_query": state.query or ""})
89
+ state.tool_calls += 1
90
+ state.add(SystemMessage(content=f"WIKI_TOOL_OUT: {brief(out)}"))
91
+ state.next_action = None
92
+ return state
93
+
94
+
95
+ def ocr_tool(state: AgentState) -> AgentState:
96
+ out = ocr_image_tool({"task_id": state.task_id, "ocr_path": ""})
97
+ state.tool_calls += 1
98
+ state.add(SystemMessage(content=f"OCR_TOOL_OUT: {brief(out)}"))
99
+ state.next_action = None
100
+ return state
101
+
102
+
103
+ def audio_tool(state: AgentState) -> AgentState:
104
+ out = audio_transcriber_tool({"task_id": state.task_id, "audio_path": ""})
105
+ state.tool_calls += 1
106
+ state.add(SystemMessage(content=f"AUDIO_TOOL_OUT: {brief(out)}"))
107
+ state.next_action = None
108
+ return state
109
+
110
+ def excel_tool(state: AgentState) -> AgentState:
111
+ result = parse_excel_tool({
112
+ "task_id": state.task_id,
113
+ "excel_sheet_name": state.sheet or ""
114
+ })
115
+ out = {"excel_result": result}
116
+ state.tool_calls += 1
117
+ state.add(SystemMessage(content=f"EXCEL_TOOL_OUT: {brief(out)}"))
118
+ state.next_action = None
119
+ return state
120
+
121
+
122
+ # ------------- final answer -------------
123
+
124
+ def final_answer(state: AgentState) -> AgentState:
125
+ wrap = SystemMessage(
126
+ content="Using everything so far, reply ONLY with {'final_answer':'…'}. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
127
+ )
128
+ raw = LLM(state.messages + [wrap]).content.strip()
129
+ state.add(AIMessage(content=raw))
130
+ parsed = safe_json(raw)
131
+ state.final_answer = parsed.get("final_answer") if parsed else "Unable to parse final answer."
132
+ return state
133
 
134
+ # ─────────────────────────── Graph wiring ───────────────────────────────
135
 
 
136
  graph = StateGraph(AgentState)
137
 
138
  # Register nodes
139
+ for name, fn in [
140
+ ("tool_selector", tool_selector),
141
+ ("wiki_tool", wiki_tool),
142
+ ("ocr_tool", ocr_tool),
143
+ ("audio_tool", audio_tool),
144
+ ("final_answer", final_answer),
145
+ ]:
146
+ graph.add_node(name, fn)
147
+
148
+ # Edges
149
+ graph.add_edge(START, "tool_selector")
150
+
151
+ def dispatch(state: AgentState) -> str:
152
+ return {
153
+ "wiki": "wiki_tool",
154
+ "ocr": "ocr_tool",
155
+ "audio": "audio_tool",
156
+ "final": "final_answer",
157
+ }.get(state.next_action, "final_answer")
158
 
159
  graph.add_conditional_edges(
160
+ "tool_selector",
161
+ dispatch,
162
+ {
163
+ "wiki_tool": "wiki_tool",
164
+ "ocr_tool": "ocr_tool",
165
+ "audio_tool": "audio_tool",
166
+ "excel_tool": "excel_tool",
167
+ "final_answer": "final_answer",
168
+ },
169
  )
170
 
171
+ # tools loop back to selector
172
+ for tool_name in ("wiki_tool", "ocr_tool", "audio_tool", "excel_tool"):
173
+ graph.add_edge(tool_name, "tool_selector")
174
 
175
+ # final_answerEND
176
+ graph.add_edge("final_answer", END)
177
+
178
+ compiled_graph = graph.compile()
179
+
180
+ # ─────────────────────────── Public API ────────────────────────────────
181
+
182
+ def answer(question: str, *, task_id: Optional[str] = None) -> str:
183
+ state = AgentState(user_question=question, task_id=task_id)
184
+ state.add(SystemMessage(content="You are a helpful assistant."))
185
+ state.add(HumanMessage(content=question))
186
+ compiled_graph.invoke(state)
187
+ return state.final_answer or "No answer."
188
 
 
 
189
 
 
 
 
 
 
190
 
 
 
 
 
 
191
 
 
 
192
 
 
193
 
194
 
 
 
 
 
 
 
 
 
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
 
 
 
197
 
198
  class BasicAgent:
199
  def __init__(self):
old2app.py ADDED
@@ -0,0 +1,587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import inspect
5
+ import pandas as pd
6
+ from langgraph.prebuilt import ToolNode
7
+
8
+
9
+ # from typing import Any, Dict
10
+ # from typing import TypedDict, Annotated
11
+
12
+ from langchain_openai import ChatOpenAI
13
+ from langgraph.graph import StateGraph, START, END
14
+ from langgraph.graph.message import add_messages
15
+ from langchain.schema import HumanMessage, SystemMessage, AIMessage
16
+ # Create a ToolNode that knows about your web_search function
17
+ import json
18
+ from old2state import AgentState
19
+
20
+ # --- Constants ---
21
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
+
23
+ from old2tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
24
+
25
+ llm = ChatOpenAI(model_name="gpt-4.1")
26
+
27
+ # ─── 1) plan_node ───
28
+ # ─── 1) plan_node ───
29
+ tool_counter = 0
30
+
31
+
32
+ # ─── 1) plan_node ───
33
+ def plan_node(state: AgentState) -> AgentState:
34
+ """
35
+ Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
36
+ then decide if it's confident enough to stop or if it needs one tool.
37
+ If confident: return {"final_answer":"<answer>"}
38
+ Otherwise: return exactly one of:
39
+ {"wiki_query":"..."},
40
+ {"ocr_path":"..."},
41
+ {"excel_path":"...","excel_sheet_name":"..."},
42
+ {"audio_path":"..."}
43
+ """
44
+ prior_msgs = state.get("messages", [])
45
+ user_input = ""
46
+ for msg in reversed(prior_msgs):
47
+ if isinstance(msg, HumanMessage):
48
+ user_input = msg.content
49
+ break
50
+
51
+ system_msg = SystemMessage(
52
+ content=(
53
+
54
+ "You are an agent that must do two things in one JSON output:\n\n"
55
+ " 1) Provide a concise, direct answer to the user's question (no explanation).\n"
56
+ " 2) Judge whether that answer is reliable:\n"
57
+ " • If you are fully confident, return exactly:\n"
58
+ " {\"final_answer\":\"<your concise answer>\"}\n"
59
+ " and nothing else.\n"
60
+ " • Otherwise, return exactly one of:\n"
61
+ " {\"wiki_query\":\"<Wikipedia search>\"}\n"
62
+ " {\"ocr_path\":\"<image path or task_id>\"}\n"
63
+ " {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
64
+ " {\"audio_path\":\"<audio path or task_id>\"}\n"
65
+ " and nothing else.\n"
66
+ "Do NOT wrap in markdown—output only a single JSON object.\n"
67
+ f"User's question: \"{user_input}\"\n"
68
+ )
69
+ )
70
+ human_msg = HumanMessage(content=user_input)
71
+ llm_response = llm([system_msg, human_msg])
72
+ llm_out = llm_response.content.strip()
73
+
74
+ ai_msg = AIMessage(content=llm_out)
75
+ new_msgs = prior_msgs.copy() + [ai_msg]
76
+
77
+ try:
78
+ parsed = json.loads(llm_out)
79
+ if isinstance(parsed, dict):
80
+ partial: AgentState = {"messages": new_msgs}
81
+ allowed = {
82
+ "final_answer",
83
+ "wiki_query",
84
+ "ocr_path",
85
+ "excel_path",
86
+ "excel_sheet_name",
87
+ "audio_path",
88
+ }
89
+ for k, v in parsed.items():
90
+ if k in allowed:
91
+ partial[k] = v
92
+ return partial
93
+ except json.JSONDecodeError:
94
+ pass
95
+
96
+ return {
97
+ "messages": new_msgs,
98
+ "final_answer": "Sorry, I could not parse your intent.",
99
+ }
100
+
101
+
102
+ # ─── 2) store_prev_state ───
103
+ def store_prev_state(state: AgentState) -> AgentState:
104
+ return {**state, "prev_state": state.copy()}
105
+
106
+
107
+ # ─── 3) tools_node ───
108
+ def tool_node(state: AgentState) -> AgentState:
109
+ """
110
+ Dispatch exactly one tool based on which key was set:
111
+ - wiki_query → wikipedia_search_tool
112
+ - ocr_path → ocr_image_tool
113
+ - excel_path → parse_excel_tool
114
+ - audio_path → audio_transcriber_tool
115
+ """
116
+ global tool_counter
117
+ if tool_counter >= 5:
118
+ # If we've already run 5 tools, do nothing
119
+ return {
120
+ "messages": state["messages"],
121
+ "final_answer": state.get("final_answer", "No interim answer available.")
122
+ }
123
+
124
+ tool_counter += 1
125
+
126
+ if state.get("wiki_query"):
127
+ return wikipedia_search_tool(state)
128
+ if state.get("ocr_path"):
129
+ return ocr_image_tool(state)
130
+ if state.get("excel_path"):
131
+ return parse_excel_tool(state)
132
+ if state.get("audio_path"):
133
+ return audio_transcriber_tool(state)
134
+
135
+ return {} # no tool key present
136
+
137
+
138
+ # ─── 4) merge_tool_output ───
139
+ def merge_tool_output(state: AgentState) -> AgentState:
140
+ """
141
+ Combine previous state and tool output into one, but remove any stale request-keys.
142
+ """
143
+ prev = state.get("prev_state", {}).copy()
144
+
145
+ # Drop stale request-keys in prev
146
+ for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
147
+ prev.pop(dead, None)
148
+
149
+ merged = {**prev, **state}
150
+ # Drop them again from merged so they don't persist into the next cycle
151
+ for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
152
+ merged.pop(dead, None)
153
+
154
+ merged.pop("prev_state", None)
155
+ return merged
156
+
157
+
158
+ # ─── 5) inspect_node ───
159
+ def inspect_node(state: AgentState) -> AgentState:
160
+ """
161
+ After running a tool, show GPT:
162
+ - ORIGINAL user question
163
+ - Any tool results (web_search_result, ocr_result, excel_result, transcript, wiki_result)
164
+ - The INTERIM_ANSWER (always present if plan_node ran correctly)
165
+
166
+ If tool_counter ≥ 5, use LLM once more (with full context) to craft a final answer.
167
+ Otherwise, ask GPT to either:
168
+ • Return {"final_answer":"<final>"} if done, OR
169
+ • Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
170
+ """
171
+
172
+ global tool_counter
173
+
174
+ # If we've already run 5 tools, ask GPT for a strictly‐formatted JSON final_answer
175
+ if tool_counter >= 5:
176
+ messages_for_llm = []
177
+
178
+ # Re‐insert the user’s question
179
+ question = ""
180
+ for msg in reversed(state.get("messages", [])):
181
+ if isinstance(msg, HumanMessage):
182
+ question = msg.content
183
+ break
184
+ messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
185
+
186
+ # Add any tool results so far
187
+ if sr := state.get("web_search_result"):
188
+ messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
189
+ if orc := state.get("ocr_result"):
190
+ messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
191
+ if exr := state.get("excel_result"):
192
+ messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
193
+ if tr := state.get("transcript"):
194
+ messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
195
+ if wr := state.get("wiki_result"):
196
+ messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
197
+
198
+ # Show the interim answer
199
+ interim = state.get("interim_answer", "")
200
+ messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {interim}"))
201
+
202
+ # Now ask for JSON ONLY (no reasoning, no extra text)
203
+ final_prompt = (
204
+ "Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
205
+ "Using only the information above—including the USER_QUESTION, "
206
+ "any TOOL_RESULT, and the INTERIM_ANSWER—produce a concise final answer. "
207
+ "Return exactly one JSON object and nothing else, in this format:\n\n"
208
+ "{\"final_answer\":\"<your final answer>\"}\n"
209
+ "Do not include any other words or punctuation outside that JSON. if its numbers, dont show the units"
210
+ )
211
+ messages_for_llm.append(SystemMessage(content=final_prompt))
212
+
213
+ llm_response = llm(messages_for_llm)
214
+ raw = llm_response.content.strip()
215
+ new_msgs = state["messages"] + [AIMessage(content=raw)]
216
+
217
+ # Try to parse exactly one JSON with "final_answer"
218
+ try:
219
+ parsed = json.loads(raw)
220
+ if isinstance(parsed, dict) and "final_answer" in parsed:
221
+ return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
222
+ except json.JSONDecodeError:
223
+ pass
224
+
225
+ # Fallback to returning the interim in case JSON parse fails
226
+ return {"messages": new_msgs, "final_answer": interim}
227
+ # ——————————— If tool_counter < 5, proceed as before ———————————
228
+ messages_for_llm = []
229
+
230
+ # (1) Re‐insert original user question
231
+ question = ""
232
+ for msg in reversed(state.get("messages", [])):
233
+ if isinstance(msg, HumanMessage):
234
+ question = msg.content
235
+ break
236
+ messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
237
+
238
+ # (2) Add any tool results
239
+ if sr := state.get("web_search_result"):
240
+ messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
241
+ if orc := state.get("ocr_result"):
242
+ messages_for_llm.append(SystemMessage(content=f"OCR_RESULT: {orc}"))
243
+ if exr := state.get("excel_result"):
244
+ messages_for_llm.append(SystemMessage(content=f"EXCEL_RESULT: {exr}"))
245
+ if tr := state.get("transcript"):
246
+ messages_for_llm.append(SystemMessage(content=f"AUDIO_TRANSCRIPT: {tr}"))
247
+ if wr := state.get("wiki_result"):
248
+ messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
249
+
250
+ # (3) Always show the interim answer
251
+ interim = state.get("interim_answer", "")
252
+ messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {interim}"))
253
+
254
+ # (4) Prompt GPT to decide final or another tool
255
+ prompt = (
256
+ "You have a current draft answer (INTERIM_ANSWER) and possibly some tool results above.\n"
257
+ "If you are confident it’s correct, return exactly:\n"
258
+ " {\"final_answer\":\"<your final answer>\"}\n"
259
+ "and nothing else.\n"
260
+ "Otherwise, return exactly one of these JSON literals to fetch another tool:\n"
261
+ " {\"wiki_query\":\"<query for Wikipedia>\"}\n"
262
+ " {\"ocr_path\":\"<image path or task_id>\"}\n"
263
+ " {\"excel_path\":\"<xls path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
264
+ " {\"audio_path\":\"<audio path or task_id>\"}\n"
265
+ "Do NOT wrap in markdown—return only the JSON object.\n"
266
+ )
267
+ messages_for_llm.append(SystemMessage(content=prompt))
268
+ llm_response = llm(messages_for_llm)
269
+ raw = llm_response.content.strip()
270
+ new_msgs = state["messages"] + [AIMessage(content=raw)]
271
+
272
+ # Try to parse the LLM’s JSON
273
+ try:
274
+ parsed = json.loads(raw)
275
+ if isinstance(parsed, dict):
276
+ # (a) If GPT gave a final_answer, return immediately
277
+ if "final_answer" in parsed:
278
+ return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
279
+
280
+ # (b) If GPT requested exactly one valid tool, return only that key
281
+ valid_keys = {"wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"}
282
+ requested_keys = set(parsed.keys()) & valid_keys
283
+ if len(requested_keys) == 1:
284
+ clean: AgentState = {"messages": new_msgs}
285
+ for k in requested_keys:
286
+ clean[k] = parsed[k]
287
+ return clean
288
+ except json.JSONDecodeError:
289
+ pass
290
+
291
+ # (c) Fallback: if GPT never returned a valid tool key or a final_answer,
292
+ # just finalize with the existing interim_answer
293
+ return {"messages": new_msgs, "final_answer": interim}
294
+
295
+
296
+ # ─── 6) finalize_node ───
297
+ def finalize_node(state: AgentState) -> AgentState:
298
+ """
299
+ If state already has "final_answer", return it. Otherwise, it's an error.
300
+ """
301
+ if fa := state.get("final_answer"):
302
+ return {"final_answer": fa}
303
+ return {"final_answer": "ERROR: finalize called without a final_answer."}
304
+
305
+
306
+ # ─── 7) Build the graph and wire edges ───
307
+ graph = StateGraph(AgentState)
308
+
309
+ # Register nodes
310
+ graph.add_node("plan", plan_node)
311
+ graph.add_node("store_prev_state", store_prev_state)
312
+ graph.add_node("tools", tool_node)
313
+ graph.add_node("merge_tool_output", merge_tool_output)
314
+ graph.add_node("inspect", inspect_node)
315
+ graph.add_node("finalize", finalize_node)
316
+
317
+ # START → plan
318
+ graph.add_edge(START, "plan")
319
+
320
+ # plan → either finalize (if plan set final_answer) or store_prev_state (if plan wants a tool)
321
+ def route_plan(plan_out: AgentState) -> str:
322
+ if plan_out.get("final_answer") is not None:
323
+ return "finalize"
324
+ return "store_prev_state"
325
+
326
+ graph.add_conditional_edges(
327
+ "plan",
328
+ route_plan,
329
+ {"store_prev_state": "store_prev_state", "finalize": "finalize"},
330
+ )
331
+
332
+ # store_prev_state → tools
333
+ graph.add_edge("store_prev_state", "tools")
334
+
335
+ # tools → merge_tool_output
336
+ graph.add_edge("tools", "merge_tool_output")
337
+
338
+ # merge_tool_output → inspect
339
+ graph.add_edge("merge_tool_output", "inspect")
340
+
341
+ # inspect → either finalize (if inspect set final_answer) or store_prev_state (if inspect wants another tool)
342
+ def route_inspect(inspect_out: AgentState) -> str:
343
+ if inspect_out.get("final_answer") is not None:
344
+ return "finalize"
345
+ return "store_prev_state"
346
+
347
+ graph.add_conditional_edges(
348
+ "inspect",
349
+ route_inspect,
350
+ {"store_prev_state": "store_prev_state", "finalize": "finalize"},
351
+ )
352
+
353
+ # finalize → END
354
+ graph.add_edge("finalize", END)
355
+
356
+ compiled_graph = graph.compile()
357
+
358
+
359
+ # ─── 8) respond_to_input ───
360
+ def respond_to_input(user_input: str, task_id) -> str:
361
+ """
362
+ Reset the global tool_counter, seed state['messages'], invoke the graph,
363
+ and return the final_answer.
364
+ """
365
+ global tool_counter
366
+ tool_counter = 0 # Reset on every new user query
367
+
368
+ system_msg = SystemMessage(
369
+ content=(
370
+ "You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
371
+ "Try not to use tools so many times. If you think you can answer the question without using a tool, do it Please.\n"
372
+ "Tools available:\n"
373
+ " • Wikipedia: set {\"wiki_query\":\"<search terms>\"}\n"
374
+ " • OCR: set {\"ocr_path\":\"<image path or task_id>\"}\n"
375
+ " • Excel: set {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet>\"}\n"
376
+ " • Audio transcription: set {\"audio_path\":\"<audio path or task_id>\"}\n"
377
+ "If you can answer immediately, set {\"final_answer\":\"<answer>\"}. "
378
+ "Respond with only one JSON object and no extra formatting."
379
+ )
380
+ )
381
+ human_msg = HumanMessage(content=user_input)
382
+
383
+ initial_state: AgentState = {"messages": [system_msg, human_msg], "task_id": task_id}
384
+ final_state = compiled_graph.invoke(initial_state)
385
+ return final_state.get("final_answer", "Error: No final answer generated.")
386
+
387
+ class BasicAgent:
388
+ def __init__(self):
389
+ print("BasicAgent initialized.")
390
+ def __call__(self, question: str, task_id) -> str:
391
+ # print(f"Agent received question (first 50 chars): {question[:50]}...")
392
+ # fixed_answer = "This is a default answer."
393
+ # print(f"Agent returning fixed answer: {fixed_answer}")
394
+ print()
395
+ print()
396
+ print()
397
+ print()
398
+
399
+
400
+ print(f"Agent received question: {question}")
401
+ print()
402
+ return respond_to_input(question, task_id)
403
+ # return fixed_answer
404
+
405
+
406
+
407
+
408
+
409
+
410
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
411
+ """
412
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
413
+ and displays the results.
414
+ """
415
+ # --- Determine HF Space Runtime URL and Repo URL ---
416
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
417
+
418
+ if profile:
419
+ username= f"{profile.username}"
420
+ print(f"User logged in: {username}")
421
+ else:
422
+ print("User not logged in.")
423
+ return "Please Login to Hugging Face with the button.", None
424
+
425
+ api_url = DEFAULT_API_URL
426
+ questions_url = f"{api_url}/questions"
427
+ submit_url = f"{api_url}/submit"
428
+
429
+ # 1. Instantiate Agent ( modify this part to create your agent)
430
+ try:
431
+ agent = BasicAgent()
432
+ except Exception as e:
433
+ print(f"Error instantiating agent: {e}")
434
+ return f"Error initializing agent: {e}", None
435
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
436
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
437
+ print(agent_code)
438
+
439
+ # 2. Fetch Questions
440
+ print(f"Fetching questions from: {questions_url}")
441
+ try:
442
+ response = requests.get(questions_url, timeout=15)
443
+ response.raise_for_status()
444
+ questions_data = response.json()
445
+ if not questions_data:
446
+ print("Fetched questions list is empty.")
447
+ return "Fetched questions list is empty or invalid format.", None
448
+ print(f"Fetched {len(questions_data)} questions.")
449
+ except requests.exceptions.RequestException as e:
450
+ print(f"Error fetching questions: {e}")
451
+ return f"Error fetching questions: {e}", None
452
+ except requests.exceptions.JSONDecodeError as e:
453
+ print(f"Error decoding JSON response from questions endpoint: {e}")
454
+ print(f"Response text: {response.text[:500]}")
455
+ return f"Error decoding server response for questions: {e}", None
456
+ except Exception as e:
457
+ print(f"An unexpected error occurred fetching questions: {e}")
458
+ return f"An unexpected error occurred fetching questions: {e}", None
459
+
460
+ # 3. Run your Agent
461
+
462
+ results_log = []
463
+ answers_payload = []
464
+ print(f"Running agent on {len(questions_data)} questions...")
465
+ for item in questions_data:
466
+ task_id = item.get("task_id")
467
+ question_text = item.get("question")
468
+ if not task_id or question_text is None:
469
+ print(f"Skipping item with missing task_id or question: {item}")
470
+ continue
471
+ try:
472
+ submitted_answer = agent(question_text, task_id)
473
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
474
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
475
+ except Exception as e:
476
+ print(f"Error running agent on task {task_id}: {e}")
477
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
478
+
479
+ if not answers_payload:
480
+ print("Agent did not produce any answers to submit.")
481
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
482
+
483
+ # 4. Prepare Submission
484
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
485
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
486
+ print(status_update)
487
+
488
+ # 5. Submit
489
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
490
+ try:
491
+ response = requests.post(submit_url, json=submission_data, timeout=60)
492
+ response.raise_for_status()
493
+ result_data = response.json()
494
+ final_status = (
495
+ f"Submission Successful!\n"
496
+ f"User: {result_data.get('username')}\n"
497
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
498
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
499
+ f"Message: {result_data.get('message', 'No message received.')}"
500
+ )
501
+ print("Submission successful.")
502
+ results_df = pd.DataFrame(results_log)
503
+ return final_status, results_df
504
+ except requests.exceptions.HTTPError as e:
505
+ error_detail = f"Server responded with status {e.response.status_code}."
506
+ try:
507
+ error_json = e.response.json()
508
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
509
+ except requests.exceptions.JSONDecodeError:
510
+ error_detail += f" Response: {e.response.text[:500]}"
511
+ status_message = f"Submission Failed: {error_detail}"
512
+ print(status_message)
513
+ results_df = pd.DataFrame(results_log)
514
+ return status_message, results_df
515
+ except requests.exceptions.Timeout:
516
+ status_message = "Submission Failed: The request timed out."
517
+ print(status_message)
518
+ results_df = pd.DataFrame(results_log)
519
+ return status_message, results_df
520
+ except requests.exceptions.RequestException as e:
521
+ status_message = f"Submission Failed: Network error - {e}"
522
+ print(status_message)
523
+ results_df = pd.DataFrame(results_log)
524
+ return status_message, results_df
525
+ except Exception as e:
526
+ status_message = f"An unexpected error occurred during submission: {e}"
527
+ print(status_message)
528
+ results_df = pd.DataFrame(results_log)
529
+ return status_message, results_df
530
+
531
+
532
+ # --- Build Gradio Interface using Blocks ---
533
+ with gr.Blocks() as demo:
534
+ gr.Markdown("# Basic Agent Evaluation Runner")
535
+ gr.Markdown(
536
+ """
537
+ **Instructions:**
538
+
539
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
540
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
541
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
542
+
543
+ ---
544
+ **Disclaimers:**
545
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
546
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
547
+ """
548
+ )
549
+
550
+ gr.LoginButton()
551
+
552
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
553
+
554
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
555
+ # Removed max_rows=10 from DataFrame constructor
556
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
557
+
558
+ run_button.click(
559
+ fn=run_and_submit_all,
560
+ outputs=[status_output, results_table]
561
+ )
562
+
563
+ if __name__ == "__main__":
564
+ # print("LangGraph version:", langgraph.__version__)
565
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
566
+ # Check for SPACE_HOST and SPACE_ID at startup for information
567
+ space_host_startup = os.getenv("SPACE_HOST")
568
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
569
+ # import langgraph
570
+ # print("▶︎ LangGraph version:", langgraph.__version__)
571
+ if space_host_startup:
572
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
573
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
574
+ else:
575
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
576
+
577
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
578
+ print(f"✅ SPACE_ID found: {space_id_startup}")
579
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
580
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
581
+ else:
582
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
583
+
584
+ print("-"*(60 + len(" App Starting ")) + "\n")
585
+
586
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
587
+ demo.launch(debug=True, share=False)
old2state.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing_extensions import TypedDict
2
+ from typing import Annotated
3
+ from langgraph.graph.message import add_messages
4
+
5
+ class AgentState(TypedDict, total=False):
6
+ messages: Annotated[list, add_messages]
7
+ web_search_query: str
8
+ ocr_path: str
9
+ excel_path: str
10
+ excel_sheet_name: str
11
+ web_search_result: str
12
+ ocr_result: str
13
+ excel_result: str
14
+ final_answer: str
15
+ user_input: str
16
+ audio_path: str
17
+ transcript: str
18
+ audio_transcript: str
19
+ wiki_query: str
20
+ wiki_result: str
21
+ task_id: str
22
+ tool_counter: int
old2tools.py ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools.py
2
+
3
+ import pandas as pd
4
+ # from langchain_community.tools import DuckDuckGoSearchRun
5
+ from pathlib import Path
6
+ # from PIL import Image
7
+ # import pytesseract
8
+ from old2state import AgentState
9
+ from langchain.schema import HumanMessage
10
+ import regex as re
11
+ import time
12
+ from duckduckgo_search import DDGS
13
+
14
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
+
16
+
17
+ def _download_file_for_task(task_id: str, ext: str) -> str:
18
+ """
19
+ Helper: attempt to GET the remote file for a given task_id.
20
+ Saves under ./hf_files/{task_id}.{ext}. Returns the local path if successful,
21
+ or an empty string if no file / download failed.
22
+ """
23
+
24
+ print("reached _download_file_for_task")
25
+ os.makedirs("hf_files", exist_ok=True)
26
+ local_path = os.path.join("hf_files", f"{task_id}.{ext}")
27
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
28
+
29
+ try:
30
+ resp = requests.get(url, timeout=10)
31
+ if resp.status_code == 200 and resp.content:
32
+ print(f"Downloaded file from {url} to {local_path}")
33
+ with open(local_path, "wb") as f:
34
+ f.write(resp.content)
35
+ return local_path
36
+ except Exception:
37
+ pass
38
+
39
+ # If we get here, either 404 or download error
40
+ return ""
41
+
42
+
43
+ def web_search_tool(state: AgentState) -> AgentState:
44
+ """
45
+ Expects: state["web_search_query"] is a non‐empty string.
46
+ Returns: {"web_search_query": None, "web_search_result": <string>}.
47
+ Retries up to 5 times on either a DuckDuckGo “202 Ratelimit” response or any exception (e.g. timeout).
48
+ """
49
+ print("reached web_search_tool")
50
+ query = state.get("web_search_query", "")
51
+ if not query:
52
+ return {} # nothing to do
53
+
54
+ ddg = DDGS()
55
+ max_retries = 5
56
+ result_text = ""
57
+
58
+ for attempt in range(1, max_retries + 1):
59
+ try:
60
+ result_text = str(ddg.text(query, max_results=5))
61
+ except Exception as e:
62
+ # Network error or timeout—retry up to max_retries
63
+ if attempt < max_retries:
64
+ print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
65
+ time.sleep(4)
66
+ continue
67
+ else:
68
+ # Final attempt failed
69
+ return {
70
+ "web_search_query": None,
71
+ "web_search_result": f"Error during DuckDuckGo search: {e}"
72
+ }
73
+
74
+ # Check for DuckDuckGo rate‐limit indicator
75
+ if "202 Ratelimit" in result_text:
76
+ if attempt < max_retries:
77
+ print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
78
+ time.sleep(4)
79
+ continue
80
+ else:
81
+ # Final attempt still rate‐limited
82
+ break
83
+
84
+ # Successful response (no exception and no rate‐limit text)
85
+ break
86
+
87
+ return {
88
+ "web_search_query": None,
89
+ "web_search_result": result_text
90
+ }
91
+
92
+
93
+
94
+ def ocr_image_tool(state: AgentState) -> AgentState:
95
+ """
96
+ Expects: state["ocr_path"] is either:
97
+ • a local image path (e.g. "./hf_files/abc.png"), OR
98
+ • a Task ID (e.g. "abc123"), in which case we try downloading
99
+ GET {DEFAULT_API_URL}/files/{task_id} with .png/.jpg/.jpeg extensions.
100
+
101
+ Returns:
102
+ {
103
+ "ocr_path": None,
104
+ "ocr_result": "<OCR text + brief caption or an error message>"
105
+ }
106
+ """
107
+ print("reached ocr_image_tool")
108
+ path_or_id = state.get("ocr_path", "")
109
+ # if not path_or_id:
110
+ # return {}
111
+
112
+ # 1) Determine local_img: either existing path_or_id or download by Task ID
113
+ # local_img = ""
114
+ # if os.path.exists(path_or_id):
115
+ # local_img = path_or_id
116
+ # else:
117
+ for ext in ("png", "jpg", "jpeg"):
118
+ candidate = _download_file_for_task(state.get("task_id"), ext)
119
+ if candidate:
120
+ local_img = candidate
121
+ break
122
+
123
+ if not local_img or not os.path.exists(local_img):
124
+ return {
125
+ "ocr_path": None,
126
+ "ocr_result": "Error: No image file found (local nonexistent or download failed)."
127
+ }
128
+
129
+ # 2) Read raw bytes
130
+ try:
131
+ with open(local_img, "rb") as f:
132
+ image_bytes = f.read()
133
+ except Exception as e:
134
+ return {
135
+ "ocr_path": None,
136
+ "ocr_result": f"Error reading image file: {e}"
137
+ }
138
+
139
+ # 3) Prepare HF Inference headers
140
+ hf_token = os.getenv("HF_TOKEN")
141
+ if not hf_token:
142
+ return {
143
+ "ocr_path": None,
144
+ "ocr_result": "Error: HUGGINGFACE_API_KEY not set in environment."
145
+ }
146
+
147
+ headers = {"Authorization": f"Bearer {hf_token}"}
148
+
149
+ # 4) Call HF’s vision-ocr to extract text
150
+ ocr_text = ""
151
+ try:
152
+ ocr_resp = requests.post(
153
+ "https://api-inference.huggingface.co/models/google/vit-ocr",
154
+ headers=headers,
155
+ files={"file": image_bytes},
156
+ timeout=30
157
+ )
158
+ ocr_resp.raise_for_status()
159
+ ocr_json = ocr_resp.json()
160
+
161
+ # The JSON has “pages” → list of blocks → “lines” → each line has “text”
162
+ lines = []
163
+ for page in ocr_json.get("pages", []):
164
+ for line in page.get("lines", []):
165
+ lines.append(line.get("text", "").strip())
166
+ ocr_text = "\n".join(lines).strip() or "(no visible text)"
167
+ except Exception as e:
168
+ ocr_text = f"Error during HF OCR: {e}"
169
+
170
+ # 5) Call HF’s image-captioning to get a brief description
171
+ caption = ""
172
+ try:
173
+ cap_resp = requests.post(
174
+ "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base",
175
+ headers=headers,
176
+ files={"file": image_bytes},
177
+ timeout=30
178
+ )
179
+ cap_resp.raise_for_status()
180
+ cap_json = cap_resp.json()
181
+ # The response looks like: {"generated_text": "...caption..."}
182
+ caption = cap_json.get("generated_text", "").strip()
183
+ if not caption:
184
+ caption = "(no caption returned)"
185
+ except Exception as e:
186
+ caption = f"Error during HF captioning: {e}"
187
+
188
+ # 6) Combine OCR + caption
189
+ combined = f"OCR text:\n{ocr_text}\n\nImage caption:\n{caption}"
190
+ print("combined: ")
191
+ return {
192
+ "ocr_path": None,
193
+ "ocr_result": combined
194
+ }
195
+
196
+ def parse_excel_tool(state: AgentState) -> AgentState:
197
+ """
198
+ Expects state["excel_path"] to be either:
199
+ • A real local .xlsx path, or
200
+ • A Task ID string (e.g. "abc123"), in which case we GET /files/abc123.xlsx.
201
+ Returns:
202
+ {
203
+ "excel_path": None,
204
+ "excel_sheet_name": None,
205
+ "excel_result": "<stringified records or Markdown table>"
206
+ }
207
+ Always attempts to download the file for the given path or task ID.
208
+ """
209
+ print("reached parse_excel_tool")
210
+ local_xlsx = _download_file_for_task(state.get("task_id"), "xlsx")
211
+ path_or_id = state.get("excel_path", "")
212
+ sheet = state.get("excel_sheet_name", "")
213
+ if not path_or_id:
214
+ return {}
215
+
216
+ # Always attempt to download the file, regardless of local existence
217
+
218
+
219
+ # If we finally have a real file, read it
220
+ if local_xlsx and os.path.exists(local_xlsx):
221
+ try:
222
+ print("reached excel file found")
223
+ xls = pd.ExcelFile(local_xlsx)
224
+ if sheet and sheet in xls.sheet_names:
225
+ df = pd.read_excel(xls, sheet_name=sheet)
226
+ else:
227
+ df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
228
+ records = df.to_dict(orient="records")
229
+ text = str(records)
230
+ print("reached excel file found: ")
231
+ print(text)
232
+ print()
233
+ return {
234
+ "excel_path": None,
235
+ "excel_sheet_name": None,
236
+ "excel_result": text
237
+ }
238
+ except Exception as e:
239
+ print(f">>> parse_excel_tool: Error reading Excel file {local_xlsx}: {e}")
240
+ # Fall back to scanning for Markdown below
241
+
242
+ # Fallback: scan any HumanMessage for a Markdown‐style table
243
+ messages = state.get("messages", [])
244
+ table_lines = []
245
+ collecting = False
246
+
247
+ for msg in messages:
248
+ if isinstance(msg, HumanMessage):
249
+ for line in msg.content.splitlines():
250
+ if re.match(r"^\s*\|\s*[-A-Za-z0-9]", line):
251
+ collecting = True
252
+ if collecting:
253
+ if not re.match(r"^\s*\|", line):
254
+ collecting = False
255
+ break
256
+ table_lines.append(line)
257
+ if table_lines:
258
+ break
259
+
260
+ if not table_lines:
261
+ return {
262
+ "excel_path": None,
263
+ "excel_sheet_name": None,
264
+ "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
265
+ }
266
+
267
+ clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
268
+ table_block = "\n".join(clean_rows).strip()
269
+ print(f"Parsed excel as excel_result: {table_block}")
270
+ return {
271
+ "excel_path": None,
272
+ "excel_sheet_name": None,
273
+ "excel_result": table_block
274
+ }
275
+
276
+
277
+
278
+
279
+ import os
280
+
281
+
282
+
283
+
284
+
285
+ import os
286
+ import openai
287
+ from old2state import AgentState
288
+
289
+ def audio_transcriber_tool(state: AgentState) -> AgentState:
290
+ """
291
+ LangGraph tool for transcribing audio via OpenAI's Whisper API.
292
+ Expects: state["audio_path"] to be either:
293
+ • A local file path (e.g. "./hf_files/abc.mp3"), OR
294
+ • A Task ID (e.g. "abc123"), in which case we try downloading
295
+ GET {DEFAULT_API_URL}/files/{task_id} with .mp3, .wav, .m4a extensions.
296
+ Returns:
297
+ {
298
+ "audio_path": None,
299
+ "transcript": "<text or error message>"
300
+ }
301
+ Always attempts to download the file for the given path or task ID.
302
+ """
303
+ print("reached audio_transcriber_tool")
304
+ path_or_id = state.get("audio_path", "")
305
+ if not path_or_id:
306
+ return {}
307
+
308
+ # Always attempt to download the file, regardless of local existence
309
+ local_audio = ""
310
+ for ext in ("mp3", "wav", "m4a"):
311
+ candidate = _download_file_for_task(state.get("task_id"), ext)
312
+ if candidate:
313
+ local_audio = candidate
314
+ break
315
+
316
+ if not local_audio or not os.path.exists(local_audio):
317
+ return {
318
+ "audio_path": None,
319
+ "transcript": "Error: No audio file found (download failed)."
320
+ }
321
+
322
+ # Send to OpenAI Whisper
323
+ try:
324
+ openai.api_key = os.getenv("OPENAI_API_KEY")
325
+ if not openai.api_key:
326
+ raise RuntimeError("OPENAI_API_KEY is not set in environment.")
327
+
328
+ with open(local_audio, "rb") as audio_file:
329
+ print("reached openai.audio.transcriptions.create")
330
+ response = openai.audio.transcriptions.create(
331
+ model="whisper-1",
332
+ file=audio_file,
333
+ )
334
+ print("reached response")
335
+ text = response.text.strip()
336
+ except Exception as e:
337
+ text = f"Error during transcription: {e}"
338
+ print(f"Transcripted as transcript: {text}")
339
+ return {
340
+ "audio_path": None,
341
+ "transcript": text
342
+ }
343
+ # tools.py
344
+
345
+ import re
346
+ import requests
347
+ from old2state import AgentState
348
+
349
+ def wikipedia_search_tool(state: AgentState) -> AgentState:
350
+ """
351
+ LangGraph wrapper for searching Wikipedia.
352
+ Expects: state["wiki_query"] to be a non‐empty string.
353
+ Returns:
354
+ {
355
+ "wiki_query": None,
356
+ "wiki_result": "<text summary of first matching page or an error message>"
357
+ }
358
+ If no valid wiki_query is provided, returns {}.
359
+ """
360
+ print("reached wikipedia search tool")
361
+ query = state.get("wiki_query", "").strip()
362
+ if not query:
363
+ return {}
364
+
365
+ try:
366
+ # 1) Use the MediaWiki API to search for page titles matching the query
367
+ search_params = {
368
+ "action": "query",
369
+ "list": "search",
370
+ "srsearch": query,
371
+ "format": "json",
372
+ "utf8": 1
373
+ }
374
+ search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
375
+ search_resp.raise_for_status()
376
+ search_data = search_resp.json()
377
+
378
+ search_results = search_data.get("query", {}).get("search", [])
379
+ # print("wikipedia: search_results",search_results)
380
+ if not search_results:
381
+ return {"wiki_query": None, "wiki_result": f"No Wikipedia page found for '{query}'."}
382
+
383
+ # 2) Take the first search result's title
384
+ first_title = search_results[0].get("title", "")
385
+ if not first_title:
386
+ return {"wiki_query": None, "wiki_result": "Unexpected format from Wikipedia search."}
387
+
388
+ # 3) Fetch the page summary for that title via the REST summary endpoint
389
+ title_for_url = requests.utils.requote_uri(first_title)
390
+ summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
391
+ summary_resp = requests.get(summary_url, timeout=10)
392
+ summary_resp.raise_for_status()
393
+ summary_data = summary_resp.json()
394
+
395
+ # 4) Extract either the "extract" field or a fallback message
396
+ summary_text = summary_data.get("extract")
397
+ if not summary_text:
398
+ summary_text = summary_data.get("description", "No summary available.")
399
+
400
+ return {
401
+ "wiki_query": None,
402
+ "wiki_result": f"Title: {first_title}\n\n{summary_text}"
403
+ }
404
+
405
+ except requests.exceptions.RequestException as e:
406
+ return {"wiki_query": None, "wiki_result": f"Wikipedia search error: {e}"}
407
+ except Exception as e:
408
+ return {"wiki_query": None, "wiki_result": f"Unexpected error in wikipedia_search_tool: {e}"}
409
+
410
+
411
+
412
+
413
+
414
+ def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
415
+ """
416
+ Merges whatever partial state the tool wrapper returned (tool_out)
417
+ into the main state. That is, combine previous keys with new keys:
418
+ new_state = { **state, **tool_out }.
419
+ This node should be wired as its own graph node, not as a transition function.
420
+ """
421
+ new_state = {**state, **tool_out}
422
+ return new_state
old_app_copy.py CHANGED
@@ -15,12 +15,12 @@ from langgraph.graph.message import add_messages
15
  from langchain.schema import HumanMessage, SystemMessage, AIMessage
16
  # Create a ToolNode that knows about your web_search function
17
  import json
18
- from state import AgentState
19
 
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
- from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
24
 
25
  llm = ChatOpenAI(model_name="gpt-4o-mini")
26
 
 
15
  from langchain.schema import HumanMessage, SystemMessage, AIMessage
16
  # Create a ToolNode that knows about your web_search function
17
  import json
18
+ from old2state import AgentState
19
 
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
+ from old2tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools, audio_transcriber_tool, wikipedia_search_tool
24
 
25
  llm = ChatOpenAI(model_name="gpt-4o-mini")
26
 
state.py CHANGED
@@ -1,22 +1,23 @@
1
- from typing_extensions import TypedDict
2
- from typing import Annotated
3
- from langgraph.graph.message import add_messages
 
 
4
 
5
- class AgentState(TypedDict, total=False):
6
- messages: Annotated[list, add_messages]
7
- web_search_query: str
8
- ocr_path: str
9
- excel_path: str
10
- excel_sheet_name: str
11
- web_search_result: str
12
- ocr_result: str
13
- excel_result: str
14
- final_answer: str
15
- user_input: str
16
- audio_path: str
17
- transcript: str
18
- audio_transcript: str
19
- wiki_query: str
20
- wiki_result: str
21
- task_id: str
22
- tool_counter: int
 
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Dict, Any, Optional
3
+ import json
4
+ from dataclasses import dataclass, field, asdict
5
+ from langchain.schema import SystemMessage, HumanMessage, AIMessage, BaseMessage
6
 
7
+
8
+ @dataclass
9
+ class AgentState:
10
+ """Single source‑of‑truth context for one user query run."""
11
+
12
+ user_question: str
13
+ task_id: Optional[str] = None
14
+ messages: List[BaseMessage] = field(default_factory=list)
15
+
16
+ next_action: Optional[str] = None # wiki | ocr | audio | final
17
+ query: Optional[str] = None # wiki search term
18
+ tool_calls: int = 0
19
+
20
+ final_answer: Optional[str] = None
21
+
22
+ def add(self, *msgs: BaseMessage):
23
+ self.messages.extend(msgs)
 
tools.py CHANGED
@@ -1,14 +1,12 @@
1
  # tools.py
2
 
3
  import pandas as pd
4
- # from langchain_community.tools import DuckDuckGoSearchRun
5
  from pathlib import Path
6
- # from PIL import Image
7
- # import pytesseract
8
- from state import AgentState
9
- from langchain.schema import HumanMessage
10
  import regex as re
11
  import time
 
12
  from duckduckgo_search import DDGS
13
 
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -39,83 +37,20 @@ def _download_file_for_task(task_id: str, ext: str) -> str:
39
  # If we get here, either 404 or download error
40
  return ""
41
 
42
-
43
- def web_search_tool(state: AgentState) -> AgentState:
44
- """
45
- Expects: state["web_search_query"] is a non‐empty string.
46
- Returns: {"web_search_query": None, "web_search_result": <string>}.
47
- Retries up to 5 times on either a DuckDuckGo “202 Ratelimit” response or any exception (e.g. timeout).
48
- """
49
- print("reached web_search_tool")
50
- query = state.get("web_search_query", "")
51
- if not query:
52
- return {} # nothing to do
53
-
54
- ddg = DDGS()
55
- max_retries = 5
56
- result_text = ""
57
-
58
- for attempt in range(1, max_retries + 1):
59
- try:
60
- result_text = str(ddg.text(query, max_results=5))
61
- except Exception as e:
62
- # Network error or timeout—retry up to max_retries
63
- if attempt < max_retries:
64
- print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
65
- time.sleep(4)
66
- continue
67
- else:
68
- # Final attempt failed
69
- return {
70
- "web_search_query": None,
71
- "web_search_result": f"Error during DuckDuckGo search: {e}"
72
- }
73
-
74
- # Check for DuckDuckGo rate‐limit indicator
75
- if "202 Ratelimit" in result_text:
76
- if attempt < max_retries:
77
- print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
78
- time.sleep(4)
79
- continue
80
- else:
81
- # Final attempt still rate‐limited
82
- break
83
-
84
- # Successful response (no exception and no rate‐limit text)
85
- break
86
-
87
- return {
88
- "web_search_query": None,
89
- "web_search_result": result_text
90
- }
91
-
92
-
93
-
94
- def ocr_image_tool(state: AgentState) -> AgentState:
95
  """
96
  Expects: state["ocr_path"] is either:
97
  • a local image path (e.g. "./hf_files/abc.png"), OR
98
  • a Task ID (e.g. "abc123"), in which case we try downloading
99
  GET {DEFAULT_API_URL}/files/{task_id} with .png/.jpg/.jpeg extensions.
100
 
101
- Returns:
102
- {
103
- "ocr_path": None,
104
- "ocr_result": "<OCR text + brief caption or an error message>"
105
- }
106
  """
107
  print("reached ocr_image_tool")
108
- path_or_id = state.get("ocr_path", "")
109
- # if not path_or_id:
110
- # return {}
111
-
112
- # 1) Determine local_img: either existing path_or_id or download by Task ID
113
- # local_img = ""
114
- # if os.path.exists(path_or_id):
115
- # local_img = path_or_id
116
- # else:
117
  for ext in ("png", "jpg", "jpeg"):
118
- candidate = _download_file_for_task(state.get("task_id"), ext)
119
  if candidate:
120
  local_img = candidate
121
  break
@@ -188,105 +123,39 @@ def ocr_image_tool(state: AgentState) -> AgentState:
188
  # 6) Combine OCR + caption
189
  combined = f"OCR text:\n{ocr_text}\n\nImage caption:\n{caption}"
190
  print("combined: ")
191
- return {
192
- "ocr_path": None,
193
- "ocr_result": combined
194
- }
195
 
196
- def parse_excel_tool(state: AgentState) -> AgentState:
197
  """
198
- Expects state["excel_path"] to be either:
199
- A real local .xlsx path, or
200
- A Task ID string (e.g. "abc123"), in which case we GET /files/abc123.xlsx.
201
- Returns:
202
- {
203
- "excel_path": None,
204
- "excel_sheet_name": None,
205
- "excel_result": "<stringified records or Markdown table>"
206
- }
207
- Always attempts to download the file for the given path or task ID.
208
  """
209
- print("reached parse_excel_tool")
210
- local_xlsx = _download_file_for_task(state.get("task_id"), "xlsx")
211
- path_or_id = state.get("excel_path", "")
212
- sheet = state.get("excel_sheet_name", "")
213
- if not path_or_id:
214
- return {}
215
-
216
- # Always attempt to download the file, regardless of local existence
217
-
218
-
219
- # If we finally have a real file, read it
220
- if local_xlsx and os.path.exists(local_xlsx):
221
- try:
222
- print("reached excel file found")
223
- xls = pd.ExcelFile(local_xlsx)
224
- if sheet and sheet in xls.sheet_names:
225
- df = pd.read_excel(xls, sheet_name=sheet)
226
- else:
227
- df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
228
- records = df.to_dict(orient="records")
229
- text = str(records)
230
- print("reached excel file found: ")
231
- print(text)
232
- print()
233
- return {
234
- "excel_path": None,
235
- "excel_sheet_name": None,
236
- "excel_result": text
237
- }
238
- except Exception as e:
239
- print(f">>> parse_excel_tool: Error reading Excel file {local_xlsx}: {e}")
240
- # Fall back to scanning for Markdown below
241
-
242
- # Fallback: scan any HumanMessage for a Markdown‐style table
243
- messages = state.get("messages", [])
244
- table_lines = []
245
- collecting = False
246
-
247
- for msg in messages:
248
- if isinstance(msg, HumanMessage):
249
- for line in msg.content.splitlines():
250
- if re.match(r"^\s*\|\s*[-A-Za-z0-9]", line):
251
- collecting = True
252
- if collecting:
253
- if not re.match(r"^\s*\|", line):
254
- collecting = False
255
- break
256
- table_lines.append(line)
257
- if table_lines:
258
- break
259
-
260
- if not table_lines:
261
- return {
262
- "excel_path": None,
263
- "excel_sheet_name": None,
264
- "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
265
- }
266
-
267
- clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
268
- table_block = "\n".join(clean_rows).strip()
269
- print(f"Parsed excel as excel_result: {table_block}")
270
- return {
271
- "excel_path": None,
272
- "excel_sheet_name": None,
273
- "excel_result": table_block
274
- }
275
-
276
-
277
-
278
-
279
- import os
280
-
281
-
282
 
 
 
 
283
 
 
 
 
 
 
 
 
 
 
 
284
 
285
- import os
286
  import openai
287
- from state import AgentState
288
 
289
- def audio_transcriber_tool(state: AgentState) -> AgentState:
290
  """
291
  LangGraph tool for transcribing audio via OpenAI's Whisper API.
292
  Expects: state["audio_path"] to be either:
@@ -301,23 +170,21 @@ def audio_transcriber_tool(state: AgentState) -> AgentState:
301
  Always attempts to download the file for the given path or task ID.
302
  """
303
  print("reached audio_transcriber_tool")
304
- path_or_id = state.get("audio_path", "")
305
- if not path_or_id:
306
- return {}
307
 
308
  # Always attempt to download the file, regardless of local existence
309
  local_audio = ""
310
  for ext in ("mp3", "wav", "m4a"):
311
- candidate = _download_file_for_task(state.get("task_id"), ext)
312
  if candidate:
313
  local_audio = candidate
314
  break
315
 
316
  if not local_audio or not os.path.exists(local_audio):
317
- return {
318
- "audio_path": None,
319
- "transcript": "Error: No audio file found (download failed)."
320
- }
321
 
322
  # Send to OpenAI Whisper
323
  try:
@@ -336,17 +203,13 @@ def audio_transcriber_tool(state: AgentState) -> AgentState:
336
  except Exception as e:
337
  text = f"Error during transcription: {e}"
338
  print(f"Transcripted as transcript: {text}")
339
- return {
340
- "audio_path": None,
341
- "transcript": text
342
- }
343
  # tools.py
344
 
345
  import re
346
  import requests
347
- from state import AgentState
348
 
349
- def wikipedia_search_tool(state: AgentState) -> AgentState:
350
  """
351
  LangGraph wrapper for searching Wikipedia.
352
  Expects: state["wiki_query"] to be a non‐empty string.
@@ -358,7 +221,7 @@ def wikipedia_search_tool(state: AgentState) -> AgentState:
358
  If no valid wiki_query is provided, returns {}.
359
  """
360
  print("reached wikipedia search tool")
361
- query = state.get("wiki_query", "").strip()
362
  if not query:
363
  return {}
364
 
@@ -397,26 +260,63 @@ def wikipedia_search_tool(state: AgentState) -> AgentState:
397
  if not summary_text:
398
  summary_text = summary_data.get("description", "No summary available.")
399
 
400
- return {
401
- "wiki_query": None,
402
- "wiki_result": f"Title: {first_title}\n\n{summary_text}"
403
- }
404
 
405
  except requests.exceptions.RequestException as e:
406
- return {"wiki_query": None, "wiki_result": f"Wikipedia search error: {e}"}
407
  except Exception as e:
408
- return {"wiki_query": None, "wiki_result": f"Unexpected error in wikipedia_search_tool: {e}"}
409
-
410
-
411
-
412
-
413
-
414
- def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
415
- """
416
- Merges whatever partial state the tool wrapper returned (tool_out)
417
- into the main state. That is, combine previous keys with new keys:
418
- new_state = { **state, **tool_out }.
419
- This node should be wired as its own graph node, not as a transition function.
420
- """
421
- new_state = {**state, **tool_out}
422
- return new_state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # tools.py
2
 
3
  import pandas as pd
4
+
5
  from pathlib import Path
6
+
 
 
 
7
  import regex as re
8
  import time
9
+ import os
10
  from duckduckgo_search import DDGS
11
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
37
  # If we get here, either 404 or download error
38
  return ""
39
 
40
+ def ocr_image_tool(args: dict) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  """
42
  Expects: state["ocr_path"] is either:
43
  • a local image path (e.g. "./hf_files/abc.png"), OR
44
  • a Task ID (e.g. "abc123"), in which case we try downloading
45
  GET {DEFAULT_API_URL}/files/{task_id} with .png/.jpg/.jpeg extensions.
46
 
47
+ Returns: "OCR text + brief caption or an error message"
48
+
 
 
 
49
  """
50
  print("reached ocr_image_tool")
51
+ # path_or_id = state.get("ocr_path", "")
 
 
 
 
 
 
 
 
52
  for ext in ("png", "jpg", "jpeg"):
53
+ candidate = _download_file_for_task(args["task_id"], ext)
54
  if candidate:
55
  local_img = candidate
56
  break
 
123
  # 6) Combine OCR + caption
124
  combined = f"OCR text:\n{ocr_text}\n\nImage caption:\n{caption}"
125
  print("combined: ")
126
+ return combined
127
+
 
 
128
 
129
+ def parse_excel_tool(args: dict) -> str:
130
  """
131
+ Downloads <task_id>.xlsx (if any) and returns a stringified list of
132
+ records from the specified sheet. No fallback to user-supplied tables.
133
+ Expected keys in `args`:
134
+ • task_id – required (used to download the file)
135
+ • excel_sheet_name – optional sheet to load
136
+ returns: stringified list of records from the specified sheet
 
 
 
 
137
  """
138
+ task_id = args.get("task_id", "")
139
+ sheet = args.get("excel_sheet_name", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
+ local_xlsx = _download_file_for_task(task_id, "xlsx")
142
+ if not local_xlsx or not os.path.exists(local_xlsx):
143
+ return "Error: Excel file not found for this task."
144
 
145
+ try:
146
+ xls = pd.ExcelFile(local_xlsx)
147
+ df = pd.read_excel(
148
+ xls,
149
+ sheet_name=sheet if sheet and sheet in xls.sheet_names else xls.sheet_names[0]
150
+ )
151
+ return str(df.to_dict(orient="records"))
152
+ except Exception as e:
153
+ return f"Error reading Excel file: {e}"
154
+
155
 
 
156
  import openai
 
157
 
158
+ def audio_transcriber_tool(args: dict) -> str:
159
  """
160
  LangGraph tool for transcribing audio via OpenAI's Whisper API.
161
  Expects: state["audio_path"] to be either:
 
170
  Always attempts to download the file for the given path or task ID.
171
  """
172
  print("reached audio_transcriber_tool")
173
+ # path_or_id = state.get("audio_path", "")
174
+ # if not path_or_id:
175
+ # return {}
176
 
177
  # Always attempt to download the file, regardless of local existence
178
  local_audio = ""
179
  for ext in ("mp3", "wav", "m4a"):
180
+ candidate = _download_file_for_task(args["task_id"], ext)
181
  if candidate:
182
  local_audio = candidate
183
  break
184
 
185
  if not local_audio or not os.path.exists(local_audio):
186
+ return "Error: No audio file found (download failed)."
187
+
 
 
188
 
189
  # Send to OpenAI Whisper
190
  try:
 
203
  except Exception as e:
204
  text = f"Error during transcription: {e}"
205
  print(f"Transcripted as transcript: {text}")
206
+ return text
 
 
 
207
  # tools.py
208
 
209
  import re
210
  import requests
 
211
 
212
+ def wikipedia_search_tool(args: dict) -> str:
213
  """
214
  LangGraph wrapper for searching Wikipedia.
215
  Expects: state["wiki_query"] to be a non‐empty string.
 
221
  If no valid wiki_query is provided, returns {}.
222
  """
223
  print("reached wikipedia search tool")
224
+ query = args["wiki_query"]
225
  if not query:
226
  return {}
227
 
 
260
  if not summary_text:
261
  summary_text = summary_data.get("description", "No summary available.")
262
 
263
+ return f"Title: {first_title}\n\n{summary_text}"
264
+
 
 
265
 
266
  except requests.exceptions.RequestException as e:
267
+ return f"Wikipedia search error: {e}"
268
  except Exception as e:
269
+ return f"Unexpected error in wikipedia_search_tool: {e}"
270
+
271
+
272
+
273
+
274
+
275
+ # def web_search_tool(state: AgentState) -> AgentState:
276
+ # """
277
+ # Expects: state["web_search_query"] is a non‐empty string.
278
+ # Returns: {"web_search_query": None, "web_search_result": <string>}.
279
+ # Retries up to 5 times on either a DuckDuckGo “202 Ratelimit” response or any exception (e.g. timeout).
280
+ # """
281
+ # print("reached web_search_tool")
282
+ # query = state.get("web_search_query", "")
283
+ # if not query:
284
+ # return {} # nothing to do
285
+
286
+ # ddg = DDGS()
287
+ # max_retries = 5
288
+ # result_text = ""
289
+
290
+ # for attempt in range(1, max_retries + 1):
291
+ # try:
292
+ # result_text = str(ddg.text(query, max_results=5))
293
+ # except Exception as e:
294
+ # # Network error or timeout—retry up to max_retries
295
+ # if attempt < max_retries:
296
+ # print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
297
+ # time.sleep(4)
298
+ # continue
299
+ # else:
300
+ # # Final attempt failed
301
+ # return {
302
+ # "web_search_query": None,
303
+ # "web_search_result": f"Error during DuckDuckGo search: {e}"
304
+ # }
305
+
306
+ # # Check for DuckDuckGo rate‐limit indicator
307
+ # if "202 Ratelimit" in result_text:
308
+ # if attempt < max_retries:
309
+ # print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
310
+ # time.sleep(4)
311
+ # continue
312
+ # else:
313
+ # # Final attempt still rate‐limited
314
+ # break
315
+
316
+ # # Successful response (no exception and no rate‐limit text)
317
+ # break
318
+
319
+ # return {
320
+ # "web_search_query": None,
321
+ # "web_search_result": result_text
322
+ # }