naman1102 commited on
Commit
65abbbc
·
1 Parent(s): c99f0eb
Files changed (2) hide show
  1. app.py +40 -56
  2. tools.py +11 -0
app.py CHANGED
@@ -20,46 +20,40 @@ from state import AgentState
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
- from tools import ocr_image_tool, parse_excel_tool, web_search_tool
24
  tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
25
 
26
  llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
27
 
28
  agent = create_react_agent(model=llm, tools=tool_node)
29
 
30
- # 2) Build a two‐edge graph:
31
  def plan_node(state: AgentState, user_input: str) -> AgentState:
32
  """
33
- Reads state['messages'] + user_input and decides:
34
- If it needs to call web_search, set state['web_search_query'] to a query.
35
- Else if it needs to call ocr, set state['ocr_path'] to the image path.
36
- Else if it needs Excel, set state['excel_path'] and 'excel_sheet_name'.
37
- Otherwise, set state['final_answer'] to a plain text answer.
38
- We also append user_input to state['messages'] so the LLM sees the full history.
39
  """
40
- # 4.a) Grab prior chat history, append user_input:
41
  prior = state.get("messages", [])
42
  chat_history = prior + [f"USER: {user_input}"]
43
 
44
- # 4.b) Send that to the LLM with a prompt explaining the new schema:
45
  prompt = chat_history + [
46
- "ASSISTANT: You can set one of the following keys:\n"
47
- " • web_search_query: <string> \n"
48
- " • ocr_path: <path> \n"
49
- " • excel_path: <path> \n"
50
- " • excel_sheet_name: <sheet> \n"
51
  "Or, if no tool is needed, set final_answer: <your answer>.\n"
52
- "Respond with a Python‐dict literal that contains exactly one of those keys.\n"
53
  "Example: {'web_search_query':'Mercedes Sosa discography'}\n"
54
- "No additional text!"
55
  ]
56
  llm_out = llm(prompt).content.strip()
57
 
58
- # 4.c) Try to eval as a Python dict:
59
  try:
60
- parsed = eval(llm_out, {}, {}) # trust that user obeyed instructions
61
  if isinstance(parsed, dict):
62
- # Only keep recognized keys, ignore anything else
63
  new_state: AgentState = {"messages": chat_history}
64
  allowed = {
65
  "web_search_query",
@@ -75,22 +69,19 @@ def plan_node(state: AgentState, user_input: str) -> AgentState:
75
  except Exception:
76
  pass
77
 
78
- # 4.d) If parsing failed, or they returned something else, set a fallback
79
  return {
80
  "messages": chat_history,
81
  "final_answer": "Sorry, I could not parse your intent."
82
  }
83
 
 
84
  # ─── 5) Define “finalize” node: compose the final answer using any tool results ───
85
  def finalize_node(state: AgentState) -> AgentState:
86
  """
87
- By this point:
88
- - state['messages'] contains the chat history (ending with how we requested a tool).
89
- - One or more of web_search_result, ocr_result, excel_result might be filled.
90
- - Or, state['final_answer'] is already set, meaning no tool was needed.
91
- We ask the LLM to produce a final text answer.
92
  """
93
- # 5.a) Build a prompt listing any tool results:
94
  parts = state.get("messages", [])
95
  if "web_search_result" in state and state["web_search_result"] is not None:
96
  parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
@@ -98,36 +89,36 @@ def finalize_node(state: AgentState) -> AgentState:
98
  parts.append(f"OCR_RESULT: {state['ocr_result']}")
99
  if "excel_result" in state and state["excel_result"] is not None:
100
  parts.append(f"EXCEL_RESULT: {state['excel_result']}")
 
 
 
101
 
102
  parts.append("ASSISTANT: Please provide the final answer now.")
103
  llm_out = llm(parts).content.strip()
104
-
105
  return {"final_answer": llm_out}
106
 
107
 
 
108
 
109
-
110
-
111
-
112
-
113
-
114
-
115
-
116
-
117
  graph = StateGraph(AgentState)
118
 
119
- # 6.a) Register nodes in order:
120
  graph.add_node("plan", plan_node)
121
  graph.add_node("tools", tool_node)
 
122
  graph.add_node("finalize", finalize_node)
123
 
124
- # 6.b) START → "plan"
125
  graph.add_edge(START, "plan")
126
 
127
- # 6.c) If plan_node sets a tool‐query key, go to "tools"; otherwise go to "finalize".
 
 
 
 
 
128
  def route_plan(state: AgentState, plan_out: AgentState) -> str:
129
- # If plan_node placed a "web_search_query", "ocr_path", or "excel_path", go to tools.
130
- # (Note: plan_out already replaced state["messages"])
131
  if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
132
  return "tools"
133
  return "finalize"
@@ -138,34 +129,27 @@ graph.add_conditional_edges(
138
  {"tools": "tools", "finalize": "finalize"}
139
  )
140
 
141
- def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
142
- """
143
- When a tool‐wrapper returns, it has already consumed the relevant key
144
- (e.g. set web_search_query back to None) and added tool_result.
145
- We just merge that into state.
146
- """
147
- new_state = {**state, **tool_out}
148
- return new_state
149
-
150
 
 
151
 
152
- graph.add_edge("tools", "finalize", run_tools)
 
153
 
154
- # 6.e) "finalize" → END
155
  graph.add_edge("finalize", END)
156
 
157
  compiled_graph = graph.compile()
158
 
159
- # ─── 7) Define respond_to_input that drives the graph ───
160
  def respond_to_input(user_input: str) -> str:
161
- # On first turn, messages=[], no query keys set.
 
 
 
162
  initial_state: AgentState = {"messages": []}
163
  final_state = compiled_graph.invoke(initial_state, user_input)
164
- # final_state should have 'final_answer'
165
  return final_state.get("final_answer", "Error: No final answer generated.")
166
 
167
 
168
-
169
  class BasicAgent:
170
  def __init__(self):
171
  print("BasicAgent initialized.")
 
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
+ from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools
24
  tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
25
 
26
  llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
27
 
28
  agent = create_react_agent(model=llm, tools=tool_node)
29
 
 
30
  def plan_node(state: AgentState, user_input: str) -> AgentState:
31
  """
32
+ Decide which tool (if any) to call. Append the user_input to state["messages"] and
33
+ return a partial AgentState that sets exactly one of:
34
+ - web_search_query (string)
35
+ - ocr_path (string)
36
+ - excel_path (string) + excel_sheet_name (optional)
37
+ - final_answer (string)
38
  """
 
39
  prior = state.get("messages", [])
40
  chat_history = prior + [f"USER: {user_input}"]
41
 
 
42
  prompt = chat_history + [
43
+ "ASSISTANT: You can set exactly one of the following keys in a Python dict:\n"
44
+ " • web_search_query: <search terms> \n"
45
+ " • ocr_path: <path to an image> \n"
46
+ " • excel_path: <path to xlsx> \n"
47
+ " • excel_sheet_name: <sheet name> \n"
48
  "Or, if no tool is needed, set final_answer: <your answer>.\n"
 
49
  "Example: {'web_search_query':'Mercedes Sosa discography'}\n"
50
+ "Respond with only that Python dict literal—no additional text."
51
  ]
52
  llm_out = llm(prompt).content.strip()
53
 
 
54
  try:
55
+ parsed = eval(llm_out, {}, {})
56
  if isinstance(parsed, dict):
 
57
  new_state: AgentState = {"messages": chat_history}
58
  allowed = {
59
  "web_search_query",
 
69
  except Exception:
70
  pass
71
 
72
+ # Fallback if parsing failed
73
  return {
74
  "messages": chat_history,
75
  "final_answer": "Sorry, I could not parse your intent."
76
  }
77
 
78
+
79
  # ─── 5) Define “finalize” node: compose the final answer using any tool results ───
80
  def finalize_node(state: AgentState) -> AgentState:
81
  """
82
+ After any tool results exist in state, or if final_answer was already set,
83
+ ask the LLM to produce the final answer.
 
 
 
84
  """
 
85
  parts = state.get("messages", [])
86
  if "web_search_result" in state and state["web_search_result"] is not None:
87
  parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
 
89
  parts.append(f"OCR_RESULT: {state['ocr_result']}")
90
  if "excel_result" in state and state["excel_result"] is not None:
91
  parts.append(f"EXCEL_RESULT: {state['excel_result']}")
92
+ # If plan already set final_answer, skip calling the LLM again
93
+ if state.get("final_answer") is not None:
94
+ return {"final_answer": state["final_answer"]}
95
 
96
  parts.append("ASSISTANT: Please provide the final answer now.")
97
  llm_out = llm(parts).content.strip()
 
98
  return {"final_answer": llm_out}
99
 
100
 
101
+ tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
102
 
103
+ # ─── 5) Build the StateGraph ───
 
 
 
 
 
 
 
104
  graph = StateGraph(AgentState)
105
 
106
+ # 5.a) Register nodes
107
  graph.add_node("plan", plan_node)
108
  graph.add_node("tools", tool_node)
109
+ graph.add_node("run_tools", run_tools)
110
  graph.add_node("finalize", finalize_node)
111
 
112
+ # 5.b) START → plan
113
  graph.add_edge(START, "plan")
114
 
115
+
116
+
117
+
118
+
119
+ # 4) After plan, we branch based on whether a tool key was set:
120
+ # If plan_node set web_search_query/ocr_path/excel_path, go to "tools"; otherwise go straight to "finalize".
121
  def route_plan(state: AgentState, plan_out: AgentState) -> str:
 
 
122
  if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
123
  return "tools"
124
  return "finalize"
 
129
  {"tools": "tools", "finalize": "finalize"}
130
  )
131
 
 
 
 
 
 
 
 
 
 
132
 
133
+ graph.add_edge("tools", "run_tools")
134
 
135
+ # 5.e) run_tools → finalize
136
+ graph.add_edge("run_tools", "finalize")
137
 
138
+ # 5.f) finalize → END
139
  graph.add_edge("finalize", END)
140
 
141
  compiled_graph = graph.compile()
142
 
 
143
  def respond_to_input(user_input: str) -> str:
144
+ """
145
+ Initialize with an empty messages list. Then run through plan → tools → run_tools → finalize.
146
+ Return the "final_answer" from the final state.
147
+ """
148
  initial_state: AgentState = {"messages": []}
149
  final_state = compiled_graph.invoke(initial_state, user_input)
 
150
  return final_state.get("final_answer", "Error: No final answer generated.")
151
 
152
 
 
153
  class BasicAgent:
154
  def __init__(self):
155
  print("BasicAgent initialized.")
tools.py CHANGED
@@ -69,3 +69,14 @@ def parse_excel_tool(state: AgentState) -> AgentState:
69
  "excel_sheet_name": None,
70
  "excel_result": text
71
  }
 
 
 
 
 
 
 
 
 
 
 
 
69
  "excel_sheet_name": None,
70
  "excel_result": text
71
  }
72
+
73
+
74
+ def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
75
+ """
76
+ Merges whatever partial state the tool wrapper returned (tool_out)
77
+ into the main state. That is, combine previous keys with new keys:
78
+ new_state = { **state, **tool_out }.
79
+ This node should be wired as its own graph node, not as a transition function.
80
+ """
81
+ new_state = {**state, **tool_out}
82
+ return new_state