New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 2

Commit

65abbbc

1 Parent(s): c99f0eb

new

Browse files

Files changed (2) hide show

app.py +40 -56
tools.py +11 -0

app.py CHANGED Viewed

@@ -20,46 +20,40 @@ from state import AgentState
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-from tools import ocr_image_tool, parse_excel_tool, web_search_tool
 tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
 llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
 agent = create_react_agent(model=llm, tools=tool_node)
-# 2) Build a two‐edge graph:
 def plan_node(state: AgentState, user_input: str) -> AgentState:
     """
-    Reads state['messages'] + user_input and decides:
-      • If it needs to call web_search, set state['web_search_query'] to a query.
-      • Else if it needs to call ocr, set state['ocr_path'] to the image path.
-      • Else if it needs Excel, set state['excel_path'] and 'excel_sheet_name'.
-      • Otherwise, set state['final_answer'] to a plain text answer.
-    We also append user_input to state['messages'] so the LLM sees the full history.
     """
-    # 4.a) Grab prior chat history, append user_input:
     prior = state.get("messages", [])
     chat_history = prior + [f"USER: {user_input}"]
-    # 4.b) Send that to the LLM with a prompt explaining the new schema:
     prompt = chat_history + [
-        "ASSISTANT: You can set one of the following keys:\n"
-        "  • web_search_query: <string>  \n"
-        "  • ocr_path: <path>  \n"
-        "  • excel_path: <path>  \n"
-        "  • excel_sheet_name: <sheet>  \n"
         "Or, if no tool is needed, set final_answer: <your answer>.\n"
-        "Respond with a Python‐dict literal that contains exactly one of those keys.\n"
         "Example: {'web_search_query':'Mercedes Sosa discography'}\n"
-        "No additional text!"
     ]
     llm_out = llm(prompt).content.strip()
-    # 4.c) Try to eval as a Python dict:
     try:
-        parsed = eval(llm_out, {}, {})  # trust that user obeyed instructions
         if isinstance(parsed, dict):
-            # Only keep recognized keys, ignore anything else
             new_state: AgentState = {"messages": chat_history}
             allowed = {
                 "web_search_query",
@@ -75,22 +69,19 @@ def plan_node(state: AgentState, user_input: str) -> AgentState:
     except Exception:
         pass
-    # 4.d) If parsing failed, or they returned something else, set a fallback
     return {
         "messages": chat_history,
         "final_answer": "Sorry, I could not parse your intent."
     }
 # ─── 5) Define “finalize” node: compose the final answer using any tool results ───
 def finalize_node(state: AgentState) -> AgentState:
     """
-    By this point:
-      - state['messages'] contains the chat history (ending with how we requested a tool).
-      - One or more of web_search_result, ocr_result, excel_result might be filled.
-      - Or, state['final_answer'] is already set, meaning no tool was needed.
-    We ask the LLM to produce a final text answer.
     """
-    # 5.a) Build a prompt listing any tool results:
     parts = state.get("messages", [])
     if "web_search_result" in state and state["web_search_result"] is not None:
         parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
@@ -98,36 +89,36 @@ def finalize_node(state: AgentState) -> AgentState:
         parts.append(f"OCR_RESULT: {state['ocr_result']}")
     if "excel_result" in state and state["excel_result"] is not None:
         parts.append(f"EXCEL_RESULT: {state['excel_result']}")
     parts.append("ASSISTANT: Please provide the final answer now.")
     llm_out = llm(parts).content.strip()
     return {"final_answer": llm_out}
 graph = StateGraph(AgentState)
-# 6.a) Register nodes in order:
 graph.add_node("plan", plan_node)
 graph.add_node("tools", tool_node)
 graph.add_node("finalize", finalize_node)
-# 6.b) START → "plan"
 graph.add_edge(START, "plan")
-# 6.c) If plan_node sets a tool‐query key, go to "tools"; otherwise go to "finalize".
 def route_plan(state: AgentState, plan_out: AgentState) -> str:
-    # If plan_node placed a "web_search_query", "ocr_path", or "excel_path", go to tools.
-    # (Note: plan_out already replaced state["messages"])
     if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
         return "tools"
     return "finalize"
@@ -138,34 +129,27 @@ graph.add_conditional_edges(
     {"tools": "tools", "finalize": "finalize"}
 )
-def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
-    """
-    When a tool‐wrapper returns, it has already consumed the relevant key
-    (e.g. set web_search_query back to None) and added tool_result.
-    We just merge that into state.
-    """
-    new_state = {**state, **tool_out}
-    return new_state
-graph.add_edge("tools", "finalize", run_tools)
-# 6.e) "finalize" → END
 graph.add_edge("finalize", END)
 compiled_graph = graph.compile()
-# ─── 7) Define respond_to_input that drives the graph ───
 def respond_to_input(user_input: str) -> str:
-    # On first turn, messages=[], no query keys set.
     initial_state: AgentState = {"messages": []}
     final_state = compiled_graph.invoke(initial_state, user_input)
-    # final_state should have 'final_answer'
     return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools
 tool_node = ToolNode([ocr_image_tool, parse_excel_tool, web_search_tool])
 llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.0)
 agent = create_react_agent(model=llm, tools=tool_node)
 def plan_node(state: AgentState, user_input: str) -> AgentState:
     """
+    Decide which tool (if any) to call. Append the user_input to state["messages"] and
+    return a partial AgentState that sets exactly one of:
+      - web_search_query (string)
+      - ocr_path (string)
+      - excel_path (string) + excel_sheet_name (optional)
+      - final_answer (string)
     """
     prior = state.get("messages", [])
     chat_history = prior + [f"USER: {user_input}"]
     prompt = chat_history + [
+        "ASSISTANT: You can set exactly one of the following keys in a Python dict:\n"
+        "  • web_search_query: <search terms>  \n"
+        "  • ocr_path: <path to an image>  \n"
+        "  • excel_path: <path to xlsx>  \n"
+        "  • excel_sheet_name: <sheet name>  \n"
         "Or, if no tool is needed, set final_answer: <your answer>.\n"
         "Example: {'web_search_query':'Mercedes Sosa discography'}\n"
+        "Respond with only that Python dict literal—no additional text."
     ]
     llm_out = llm(prompt).content.strip()
     try:
+        parsed = eval(llm_out, {}, {})
         if isinstance(parsed, dict):
             new_state: AgentState = {"messages": chat_history}
             allowed = {
                 "web_search_query",
     except Exception:
         pass
+    # Fallback if parsing failed
     return {
         "messages": chat_history,
         "final_answer": "Sorry, I could not parse your intent."
     }
 # ─── 5) Define “finalize” node: compose the final answer using any tool results ───
 def finalize_node(state: AgentState) -> AgentState:
     """
+    After any tool results exist in state, or if final_answer was already set,
+    ask the LLM to produce the final answer.
     """
     parts = state.get("messages", [])
     if "web_search_result" in state and state["web_search_result"] is not None:
         parts.append(f"WEB_SEARCH_RESULT: {state['web_search_result']}")
         parts.append(f"OCR_RESULT: {state['ocr_result']}")
     if "excel_result" in state and state["excel_result"] is not None:
         parts.append(f"EXCEL_RESULT: {state['excel_result']}")
+    # If plan already set final_answer, skip calling the LLM again
+    if state.get("final_answer") is not None:
+        return {"final_answer": state["final_answer"]}
     parts.append("ASSISTANT: Please provide the final answer now.")
     llm_out = llm(parts).content.strip()
     return {"final_answer": llm_out}
+tool_node = ToolNode([web_search_tool, ocr_image_tool, parse_excel_tool])
+# ─── 5) Build the StateGraph ───
 graph = StateGraph(AgentState)
+# 5.a) Register nodes
 graph.add_node("plan", plan_node)
 graph.add_node("tools", tool_node)
+graph.add_node("run_tools", run_tools)
 graph.add_node("finalize", finalize_node)
+# 5.b) START → plan
 graph.add_edge(START, "plan")
+# 4) After plan, we branch based on whether a tool key was set:
+#    If plan_node set web_search_query/ocr_path/excel_path, go to "tools"; otherwise go straight to "finalize".
 def route_plan(state: AgentState, plan_out: AgentState) -> str:
     if plan_out.get("web_search_query") or plan_out.get("ocr_path") or plan_out.get("excel_path"):
         return "tools"
     return "finalize"
     {"tools": "tools", "finalize": "finalize"}
 )
+graph.add_edge("tools", "run_tools")
+# 5.e) run_tools → finalize
+graph.add_edge("run_tools", "finalize")
+# 5.f) finalize → END
 graph.add_edge("finalize", END)
 compiled_graph = graph.compile()
 def respond_to_input(user_input: str) -> str:
+    """
+    Initialize with an empty messages list. Then run through plan → tools → run_tools → finalize.
+    Return the "final_answer" from the final state.
+    """
     initial_state: AgentState = {"messages": []}
     final_state = compiled_graph.invoke(initial_state, user_input)
     return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")

tools.py CHANGED Viewed

@@ -69,3 +69,14 @@ def parse_excel_tool(state: AgentState) -> AgentState:
         "excel_sheet_name": None,
         "excel_result": text
     }

         "excel_sheet_name": None,
         "excel_result": text
     }
+def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
+    """
+    Merges whatever partial state the tool wrapper returned (tool_out)
+    into the main state. That is, combine previous keys with new keys:
+      new_state = { **state, **tool_out }.
+    This node should be wired as its own graph node, not as a transition function.
+    """
+    new_state = {**state, **tool_out}
+    return new_state