New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 2

Commit

0fed708

1 Parent(s): cf84beb

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -68

app.py CHANGED Viewed

@@ -26,33 +26,26 @@ tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
 llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
 # agent = create_react_agent(model=llm, tools=tool_node)
 def plan_node(state: AgentState) -> AgentState:
     """
-    `state["messages"]` must already end in a HumanMessage containing the user’s question.
-    We inspect that last HumanMessage and ask the LLM to set exactly one key:
-      • web_search_query
-      • ocr_path
-      • excel_path (and excel_sheet_name)
-      • final_answer
-    The LLM must return a bare Python‐dict literal containing exactly that one key.
     """
-    # 1) Grab prior BaseMessage list
     prior_msgs = state.get("messages", [])
-    # 2) Extract the last HumanMessage content (the user question)
     user_input = ""
     for msg in reversed(prior_msgs):
         if isinstance(msg, HumanMessage):
             user_input = msg.content
             break
-    # 3) Build new_history = copy of prior_msgs (it already contains that HumanMessage)
-    new_history = prior_msgs.copy()
-    # 4) Append a SystemMessage explaining how to return exactly one key
-    explanation = SystemMessage(
         content=(
-            "You can set exactly one of these keys in a Python dict (and nothing else):\n"
             "  • web_search_query: <search terms>\n"
             "  • ocr_path: <path to an image file>\n"
             "  • excel_path: <path to a .xlsx file>\n"
@@ -62,17 +55,17 @@ def plan_node(state: AgentState) -> AgentState:
             "Respond with only that Python dict literal—no extra text or explanation."
         )
     )
-    # 5) Call the LLM with [ all previous BaseMessages ] + explanation
-    prompt_messages = new_history + [explanation]
-    llm_response = llm(prompt_messages)
     llm_out = llm_response.content.strip()
-    # 6) Try to parse the LLM output as a dict
     try:
         parsed = eval(llm_out, {}, {})
         if isinstance(parsed, dict):
-            partial: AgentState = {"messages": new_history}
             allowed = {
                 "web_search_query",
                 "ocr_path",
@@ -87,49 +80,55 @@ def plan_node(state: AgentState) -> AgentState:
     except Exception:
         pass
-    # 7) Fallback if parsing failed
     return {
-        "messages": new_history,
         "final_answer": "Sorry, I could not parse your intent."
     }
-# ─── 3) Define finalize_node (only takes state) ───
 def finalize_node(state: AgentState) -> AgentState:
     """
-    By this time:
-      - state['messages'] is a list of BaseMessage (SystemMessage/HumanMessage/AIMessage).
-      - Possibly state['web_search_result'] or state['ocr_result'] or state['excel_result'] is set.
-      - Or state['final_answer'] is already set (if plan_node decided no tool was needed).
-    We append any tool results as SystemMessages, then prompt the LLM for one final answer.
     """
-    # 1) Copy the existing BaseMessage list
-    history = state.get("messages", []).copy()
-    # 2) Append each tool result as a SystemMessage, if present
-    if "web_search_result" in state and state["web_search_result"] is not None:
-        history.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}"))
-    if "ocr_result" in state and state["ocr_result"] is not None:
-        history.append(SystemMessage(content=f"OCR_RESULT: {state['ocr_result']}"))
-    if "excel_result" in state and state["excel_result"] is not None:
-        history.append(SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}"))
-    # 3) If plan_node already set a final_answer, just return it directly
     if state.get("final_answer") is not None:
         return {"final_answer": state["final_answer"]}
-    # 4) Otherwise, ask the LLM to produce the final answer
-    history.append(SystemMessage(content="Please provide the final answer now."))
-    llm_response = llm(history)
     return {"final_answer": llm_response.content.strip()}
-# ─── 4) Wrap the low‐level tool wrappers in a ToolNode ───
 tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
-# ─── 5) Build and compile the StateGraph ───
 graph = StateGraph(AgentState)
-# 5.a) Register each node
 graph.add_node("plan", plan_node)
 graph.add_node("tools", tool_node)
 graph.add_node("run_tools", run_tools)
@@ -138,7 +137,7 @@ graph.add_node("finalize", finalize_node)
 # 5.b) Wire START → plan
 graph.add_edge(START, "plan")
-# 5.c) plan → conditional: if any tool key is set, go to "tools"; otherwise "finalize"
 def route_plan(plan_out: AgentState) -> str:
     if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
         return "tools"
@@ -150,24 +149,24 @@ graph.add_conditional_edges(
     {"tools": "tools", "finalize": "finalize"}
 )
-# 5.d) Wire tools → run_tools
 graph.add_edge("tools", "run_tools")
-# 5.e) Wire run_tools → finalize
 graph.add_edge("run_tools", "finalize")
-# 5.f) Wire finalize → END
 graph.add_edge("finalize", END)
 compiled_graph = graph.compile()
-# ─── 6) Define respond_to_input ───
 def respond_to_input(user_input: str) -> str:
     """
-    Start with a SystemMessage + HumanMessage; then let the graph run:
-    plan_node → tools → run_tools → finalize_node. Return final_answer.
     """
-    # 1) SystemMessage describing the tools
     system_msg = SystemMessage(
         content=(
             "You have access to exactly these tools:\n"
@@ -183,24 +182,15 @@ def respond_to_input(user_input: str) -> str:
             "Respond with only that Python dict literal—no extra text."
         )
     )
-    # 2) HumanMessage wrapping the user’s question
     human_msg = HumanMessage(content=user_input)
-    # 3) Build initial_state so that "messages" = [system_msg, human_msg]
     initial_state: AgentState = {"messages": [system_msg, human_msg]}
-    # 4) Invoke the graph (no second argument needed)
     final_state = compiled_graph.invoke(initial_state)
-    # 5) Return the "final_answer" or a fallback
     return final_state.get("final_answer", "Error: No final answer generated.")
-# ─── 7) BasicAgent wrapper ───
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        return respond_to_input(question)
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")

 llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
 # agent = create_react_agent(model=llm, tools=tool_node)
+# ─── 2) Revised plan_node ───
 def plan_node(state: AgentState) -> AgentState:
     """
+    Look at the last HumanMessage in state['messages'] to get user_input.
+    Then call llm with exactly [SystemMessage, HumanMessage(user_input)] so
+    we never feed in a list lacking an AIMessage internally.
     """
+    # 1) Find the last HumanMessage from prior history
     prior_msgs = state.get("messages", [])
     user_input = ""
     for msg in reversed(prior_msgs):
         if isinstance(msg, HumanMessage):
             user_input = msg.content
             break
+    # 2) Build a fresh SystemMessage explaining exactly one dict key
+    system_msg = SystemMessage(
         content=(
+            "You can set exactly one of these keys in a Python dict and nothing else:\n"
             "  • web_search_query: <search terms>\n"
             "  • ocr_path: <path to an image file>\n"
             "  • excel_path: <path to a .xlsx file>\n"
             "Respond with only that Python dict literal—no extra text or explanation."
         )
     )
+    human_msg = HumanMessage(content=user_input)
+    # 3) Call the LLM with a brand‐new list [system_msg, human_msg]
+    llm_response = llm([system_msg, human_msg])
     llm_out = llm_response.content.strip()
+    # 4) Try to parse as a Python dict
     try:
         parsed = eval(llm_out, {}, {})
         if isinstance(parsed, dict):
+            partial: AgentState = {"messages": prior_msgs.copy()}
             allowed = {
                 "web_search_query",
                 "ocr_path",
     except Exception:
         pass
+    # 5) Fallback
     return {
+        "messages": prior_msgs.copy(),
         "final_answer": "Sorry, I could not parse your intent."
     }
+# ─── 3) Revised finalize_node ───
 def finalize_node(state: AgentState) -> AgentState:
     """
+    Collect any tool results from state and then ask the LLM for a final answer.
+    We build a fresh list of SystemMessages for tool results (no reuse of prior AIMessage).
     """
+    # 1) Create a list of SystemMessages for each available tool result
+    messages_for_llm = []
+    if state.get("web_search_result") is not None:
+        messages_for_llm.append(
+            SystemMessage(content=f"WEB_SEARCH_RESULT: {state['web_search_result']}")
+        )
+    if state.get("ocr_result") is not None:
+        messages_for_llm.append(
+            SystemMessage(content=f"OCR_RESULT: {state['ocr_result']}")
+        )
+    if state.get("excel_result") is not None:
+        messages_for_llm.append(
+            SystemMessage(content=f"EXCEL_RESULT: {state['excel_result']}")
+        )
+    # 2) If plan_node already set final_answer, return it without calling LLM again
     if state.get("final_answer") is not None:
         return {"final_answer": state["final_answer"]}
+    # 3) Otherwise, append our “please give final answer” SystemMessage
+    messages_for_llm.append(
+        SystemMessage(content="Please provide the final answer now.")
+    )
+    # 4) Call the LLM with our fresh list of SystemMessages
+    llm_response = llm(messages_for_llm)
     return {"final_answer": llm_response.content.strip()}
+# ─── 4) Wrap tools in a ToolNode ───
 tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
+# ─── 5) Build the graph ───
 graph = StateGraph(AgentState)
+# 5.a) Register nodes
 graph.add_node("plan", plan_node)
 graph.add_node("tools", tool_node)
 graph.add_node("run_tools", run_tools)
 # 5.b) Wire START → plan
 graph.add_edge(START, "plan")
+# 5.c) plan → conditional: if any tool key was set, go to "tools"; otherwise "finalize"
 def route_plan(plan_out: AgentState) -> str:
     if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
         return "tools"
     {"tools": "tools", "finalize": "finalize"}
 )
+# 5.d) tools → run_tools
 graph.add_edge("tools", "run_tools")
+# 5.e) run_tools → finalize
 graph.add_edge("run_tools", "finalize")
+# 5.f) finalize → END
 graph.add_edge("finalize", END)
 compiled_graph = graph.compile()
+# ─── 6) respond_to_input ───
 def respond_to_input(user_input: str) -> str:
     """
+    Seed state['messages'] with a SystemMessage (tools description) + HumanMessage(user_input).
+    Then invoke the graph; return the final_answer from the resulting state.
     """
     system_msg = SystemMessage(
         content=(
             "You have access to exactly these tools:\n"
             "Respond with only that Python dict literal—no extra text."
         )
     )
     human_msg = HumanMessage(content=user_input)
     initial_state: AgentState = {"messages": [system_msg, human_msg]}
     final_state = compiled_graph.invoke(initial_state)
     return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")