naman1102 commited on
Commit
a03e926
·
1 Parent(s): e89e29d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -101
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- from langgraph.prebuilt import ToolNode
7
  from tools import web_search, parse_excel, ocr_image
8
  # import langgraph
9
  from typing import TypedDict, Annotated
@@ -26,116 +26,57 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
26
 
27
  # --- Basic Agent Definition ---
28
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
29
-
30
  class AgentState(TypedDict):
31
- # We store the full chat history as a list of strings.
32
- messages: Annotated[list[str], add_messages]
33
- # If the agent requests a tool, it will fill in:
34
- tool_request: dict | None
35
- # Whenever a tool runs, its result goes here:
36
- tool_result: str | None
37
-
38
- # 2) Wrap ChatOpenAI in a function whose signature is (state, user_input) → new_state
39
- llm = ChatOpenAI(model_name="gpt-4.1-mini")
40
-
41
- def agent_node(state: AgentState, user_input: str) -> AgentState:
42
- """
43
- This function replaces raw ChatOpenAI. It must accept (state, user_input)
44
- and return a new AgentState dict.
45
- """
46
- # 2.a) Grab prior chat history (empty list on first turn)
47
- prior_msgs = state.get("messages", [])
48
- # 2.b) Append the new user_input
49
- chat_history = prior_msgs + [f"USER: {user_input}"]
50
- # 2.c) Ask the LLM for a response
51
- llm_output = llm(chat_history).content
52
-
53
- # 2.d) Check if the LLM output is valid Python dict literal indicating a tool call.
54
- # If it is, parse it and stash in state["tool_request"]. Otherwise, no tool.
55
- tool_req = None
56
- try:
57
- parsed = eval(llm_output)
58
- if isinstance(parsed, dict) and parsed.get("tool"):
59
- tool_req = parsed
60
- except Exception:
61
- tool_req = None
62
-
63
- # 2.e) Construct the new state:
64
- return {
65
- "messages": chat_history + [f"ASSISTANT: {llm_output}"],
66
- "tool_request": tool_req,
67
- "tool_result": None # will be filled by the tool_node if invoked
68
- }
69
-
70
- # 3) Create a ToolNode for all three tools, then wrap it in a function
71
- # whose signature is also (state, tool_request) → new_state.
72
- underlying_tool_node = ToolNode([ocr_image, parse_excel, web_search])
73
-
74
- def tool_node(state: AgentState, tool_request: dict) -> AgentState:
75
- """
76
- The graph will only call this when tool_request is a dict like
77
- {"tool": "...", "path": "...", ...}
78
- Use the underlying ToolNode to run it and store the result.
79
- """
80
- # 3.a) Run the actual ToolNode on that dict:
81
- result_text = underlying_tool_node.run(tool_request)
82
-
83
- # 3.b) Update state.messages to note the tool's output,
84
- # and clear tool_request so we don't loop.
85
- return {
86
- "messages": [f"TOOL ({tool_request['tool']}): {result_text}"],
87
- "tool_request": None,
88
- "tool_result": result_text
89
- }
90
-
91
- # 4) Build and register nodes exactly as in the tutorial
92
  graph = StateGraph(AgentState)
93
- graph.add_node("agent", agent_node)
94
- graph.add_node("tools", tool_node)
95
 
96
- # 5) Simple START "agent" edge (no third argument needed)
97
  graph.add_edge(START, "agent")
98
 
99
- # 6) Simple "tools" "agent" edge (again, no third argument)
100
- graph.add_edge("tools", "agent")
101
 
102
- # 7) Conditional branching out of "agent," exactly like the tutorial
103
- def route_agent(state: AgentState, agent_out):
104
- """
105
- When the LLM (agent_node) runs, it returns an AgentState where
106
- - state["tool_request"] is either a dict (if a tool was asked) or None.
107
- - state["tool_result"] is always None on entry to agent_node.
108
-
109
- route_agent must look at that returned state (called agent_out)
110
- and decide:
111
- • If agent_out["tool_request"] is not None, go to "tools".
112
- • Otherwise, terminate (go to END).
113
- """
114
- if agent_out.get("tool_request") is not None:
115
- return "tools"
116
- return "final"
117
-
118
- graph.add_conditional_edges(
119
- "agent", # source
120
- route_agent, # routing function (signature: (state, agent_out) → str key)
121
- {
122
- "tools": "tools", # if route_agent(...) == "tools", transition to node "tools"
123
- "final": END # if route_agent(...) == "final", stop execution
124
- }
125
- )
126
-
127
- # 8) Compile the graph (now graph.run(...) will work)
128
  compiled_graph = graph.compile()
129
 
130
- # 9) Define respond_to_input so that Gradio (and the Hugging Face submission) can call it
131
  def respond_to_input(user_input: str) -> str:
132
- initial_state: AgentState = {"messages": [], "tool_request": None, "tool_result": None}
133
- # ✔️ use .invoke() in v0.4.7
 
 
 
 
134
  final_state = compiled_graph.invoke(initial_state, user_input)
135
- # Return the last assistant message
136
- last = final_state["messages"][-1]
137
- return last.replace("ASSISTANT: ", "")
138
-
 
139
 
140
 
141
  class BasicAgent:
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from langgraph.prebuilt import ToolNode, create_react_agent
7
  from tools import web_search, parse_excel, ocr_image
8
  # import langgraph
9
  from typing import TypedDict, Annotated
 
26
 
27
  # --- Basic Agent Definition ---
28
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
29
  class AgentState(TypedDict):
30
+ # Keep track of the full chat history so the LLM sees it each time
31
+ messages: list[str]
32
+ # These two fields are _optional_—we won’t even use them directly in our code,
33
+ # but the ReAct agent will populate them when it calls a tool.
34
+ tool_name: str # e.g. "ocr_image" or "web_search"
35
+ tool_input: str # whatever arguments you passed into the tool
36
+
37
+ # ─── 2) Instantiate your LLM (ChatOpenAI) ───
38
+ llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
39
+
40
+ # ─── 3) Wrap your three tools into a single ToolNode ───
41
+ # ToolNode lets LangGraph know “these are the only tool functions the agent may call”
42
+ tool_node = ToolNode([ocr_image, parse_excel, web_search])
43
+
44
+ # ─── 4) Use create_react_agent to build a ReAct‐style agent for you ───
45
+ # This single “agent” node will:
46
+ # • Take the entire AgentState (including messages),
47
+ # Look at state["messages"], decide if it needs to call a tool,
48
+ # • If so, emit {"tool": "<tool_name>", "input": "<tool_input>"},
49
+ # and then feed results back into the LLM automatically,
50
+ # • If not, emit a final answer as plain text.
51
+ agent = create_react_agent(llm, tool_node)
52
+
53
+ # ─── 5) Build a graph with exactly two edges, just like the tutorial ───
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  graph = StateGraph(AgentState)
55
+ graph.add_node("agent", agent)
 
56
 
57
+ # 5.a) Whenever user input arrives, send it into the “agent” node:
58
  graph.add_edge(START, "agent")
59
 
60
+ # 5.b) Once “agent” produces its final text (not a tool call), go to END:
61
+ graph.add_edge("agent", END)
62
 
63
+ # 5.c) Compile so we can call .invoke(...) at runtime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  compiled_graph = graph.compile()
65
 
66
+ # ─── 6) Define a simple function Gradio (and the “submit all” loop) can call ───
67
  def respond_to_input(user_input: str) -> str:
68
+ initial_state: AgentState = {
69
+ "messages": [], # No history on the first turn
70
+ "tool_name": "",
71
+ "tool_input": ""
72
+ }
73
+ # In v0.4.7, use .invoke(...) on the compiled graph
74
  final_state = compiled_graph.invoke(initial_state, user_input)
75
+ # `create_react_agent` always appends its LLM text into state["messages"].
76
+ # The last entry of “ASSISTANT: <answer>” is the final answer.
77
+ last_line = final_state["messages"][-1]
78
+ # If the agent prefixes with “ASSISTANT: ”, strip it off.
79
+ return last_line.replace("ASSISTANT: ", "", 1)
80
 
81
 
82
  class BasicAgent: