New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 3

Commit

84345bd

1 Parent(s): b9bb826

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -69

app.py CHANGED Viewed

@@ -25,16 +25,20 @@ from tools import ocr_image_tool, parse_excel_tool, web_search_tool, run_tools,
 llm = ChatOpenAI(model_name="gpt-4o-mini")
 # ─── 1) plan_node ───
 # ─── 1) plan_node ───
 def plan_node(state: AgentState) -> AgentState:
     """
     Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
             then decide if it's confident enough to stop or if it needs one tool.
     If confident: return {"final_answer":"<answer>"}
-    Otherwise:   return exactly one of
                  {"wiki_query":"..."},
                  {"ocr_path":"..."},
-                 {"excel_path":"...", "excel_sheet_name":"..."},
                  {"audio_path":"..."}
     """
     prior_msgs = state.get("messages", [])
@@ -72,12 +76,8 @@ def plan_node(state: AgentState) -> AgentState:
     try:
         parsed = json.loads(llm_out)
         if isinstance(parsed, dict):
-            # Build a fresh state that carries only messages + tool_counter
-            partial: AgentState = {
-                "messages": new_msgs,
-                "tool_counter": state.get("tool_counter", 0),
-            }
-            allowed_keys = {
                 "final_answer",
                 "wiki_query",
                 "ocr_path",
@@ -86,13 +86,12 @@ def plan_node(state: AgentState) -> AgentState:
                 "audio_path",
             }
             for k, v in parsed.items():
-                if k in allowed_keys:
                     partial[k] = v
             return partial
     except json.JSONDecodeError:
         pass
-    # Fallback: interpret as a final answer (no further tools)
     return {
         "messages": new_msgs,
         "final_answer": "Sorry, I could not parse your intent.",
@@ -109,21 +108,19 @@ def tool_node(state: AgentState) -> AgentState:
     """
     Dispatch exactly one tool based on which key was set:
       - wiki_query → wikipedia_search_tool
-      - ocr_path → ocr_image_tool
       - excel_path → parse_excel_tool
       - audio_path → audio_transcriber_tool
-      - (web_search_query path is still here but not exposed to the LLM)
     """
-    tool_counter = state.get("tool_counter", 0)
-    if tool_counter > 5:
         return {}
     tool_counter += 1
-    state["tool_counter"] = tool_counter
     if state.get("wiki_query"):
         return wikipedia_search_tool(state)
-    if state.get("web_search_query"):
-        return web_search_tool(state)
     if state.get("ocr_path"):
         return ocr_image_tool(state)
     if state.get("excel_path"):
@@ -131,37 +128,23 @@ def tool_node(state: AgentState) -> AgentState:
     if state.get("audio_path"):
         return audio_transcriber_tool(state)
-    return {}  # nothing to do
 # ─── 4) merge_tool_output ───
 def merge_tool_output(state: AgentState) -> AgentState:
     """
-    Combine previous state and tool output into one, but remove any stale tool-request keys.
     """
     prev = state.get("prev_state", {}).copy()
-    # Drop any lingering request keys so they don't persist
-    for dead in [
-        "wiki_query",
-        "web_search_query",
-        "ocr_path",
-        "excel_path",
-        "excel_sheet_name",
-        "audio_path",
-    ]:
         prev.pop(dead, None)
     merged = {**prev, **state}
-    # Also drop them from the merged result
-    for dead in [
-        "wiki_query",
-        "web_search_query",
-        "ocr_path",
-        "excel_path",
-        "excel_sheet_name",
-        "audio_path",
-    ]:
         merged.pop(dead, None)
     merged.pop("prev_state", None)
@@ -179,14 +162,13 @@ def inspect_node(state: AgentState) -> AgentState:
       • Return {"final_answer":"<final>"} if done, OR
       • Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
     """
     # 0) If we've already called tools too many times, force a final answer:
-    if state.get("tool_counter", 0) >= 5:
         return {
             "messages": state["messages"],
-            "final_answer": state.get(
-                "final_answer", "ERROR: no interim_answer to finalize."
-            ),
         }
     messages_for_llm = []
@@ -240,25 +222,14 @@ def inspect_node(state: AgentState) -> AgentState:
         if isinstance(parsed, dict):
             # If GPT gave a final_answer, we finish here
             if "final_answer" in parsed:
-                return {
-                    "messages": new_msgs,
-                    "final_answer": parsed["final_answer"],
-                }
-            # If GPT requested exactly one valid tool, return only that key + carry tool_counter
-            valid_keys = {
-                "wiki_query",
-                "ocr_path",
-                "excel_path",
-                "excel_sheet_name",
-                "audio_path",
-            }
             requested_keys = set(parsed.keys()) & valid_keys
             if len(requested_keys) == 1:
-                clean: AgentState = {
-                    "messages": new_msgs,
-                    "tool_counter": state.get("tool_counter", 0),
-                }
                 for k in requested_keys:
                     clean[k] = parsed[k]
                 return clean
@@ -270,10 +241,7 @@ def inspect_node(state: AgentState) -> AgentState:
         return {"messages": new_msgs, "final_answer": ia}
     # If there is no interim either, we cannot proceed
-    return {
-        "messages": new_msgs,
-        "final_answer": "ERROR: could not parse inspect decision.",
-    }
 # ─── 6) finalize_node ───
@@ -342,9 +310,12 @@ compiled_graph = graph.compile()
 # ─── 8) respond_to_input ───
 def respond_to_input(user_input: str, task_id) -> str:
     """
-    Seed state['messages'] with a SystemMessage + HumanMessage(user_input),
-    then invoke the cyclic graph. Return the final_answer from the resulting state.
     """
     system_msg = SystemMessage(
         content=(
             "You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
@@ -359,15 +330,10 @@ def respond_to_input(user_input: str, task_id) -> str:
     )
     human_msg = HumanMessage(content=user_input)
-    initial_state: AgentState = {
-        "messages": [system_msg, human_msg],
-        "task_id": task_id,
-        "tool_counter": 0,
-    }
     final_state = compiled_graph.invoke(initial_state)
     return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")

 llm = ChatOpenAI(model_name="gpt-4o-mini")
 # ─── 1) plan_node ───
+# ─── 1) plan_node ───
+tool_counter = 0
 # ─── 1) plan_node ───
 def plan_node(state: AgentState) -> AgentState:
     """
     Step 1: Ask GPT to draft a concise direct answer (INTERIM_ANSWER),
             then decide if it's confident enough to stop or if it needs one tool.
     If confident: return {"final_answer":"<answer>"}
+    Otherwise:   return exactly one of:
                  {"wiki_query":"..."},
                  {"ocr_path":"..."},
+                 {"excel_path":"...","excel_sheet_name":"..."},
                  {"audio_path":"..."}
     """
     prior_msgs = state.get("messages", [])
     try:
         parsed = json.loads(llm_out)
         if isinstance(parsed, dict):
+            partial: AgentState = {"messages": new_msgs}
+            allowed = {
                 "final_answer",
                 "wiki_query",
                 "ocr_path",
                 "audio_path",
             }
             for k, v in parsed.items():
+                if k in allowed:
                     partial[k] = v
             return partial
     except json.JSONDecodeError:
         pass
     return {
         "messages": new_msgs,
         "final_answer": "Sorry, I could not parse your intent.",
     """
     Dispatch exactly one tool based on which key was set:
       - wiki_query → wikipedia_search_tool
+      - ocr_path   → ocr_image_tool
       - excel_path → parse_excel_tool
       - audio_path → audio_transcriber_tool
     """
+    global tool_counter
+    if tool_counter >= 5:
+        # If we've already run 5 tools, do nothing
         return {}
     tool_counter += 1
     if state.get("wiki_query"):
         return wikipedia_search_tool(state)
     if state.get("ocr_path"):
         return ocr_image_tool(state)
     if state.get("excel_path"):
     if state.get("audio_path"):
         return audio_transcriber_tool(state)
+    return {}  # no tool key present
 # ─── 4) merge_tool_output ───
 def merge_tool_output(state: AgentState) -> AgentState:
     """
+    Combine previous state and tool output into one, but remove any stale request-keys.
     """
     prev = state.get("prev_state", {}).copy()
+    # Drop stale request-keys in prev
+    for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
         prev.pop(dead, None)
     merged = {**prev, **state}
+    # Drop them again from merged so they don't persist into the next cycle
+    for dead in ["wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"]:
         merged.pop(dead, None)
     merged.pop("prev_state", None)
       • Return {"final_answer":"<final>"} if done, OR
       • Return exactly one tool key to run next (wiki_query / ocr_path / excel_path & excel_sheet_name / audio_path).
     """
+    global tool_counter
     # 0) If we've already called tools too many times, force a final answer:
+    if tool_counter >= 5:
         return {
             "messages": state["messages"],
+            "final_answer": state.get("final_answer", "ERROR: no interim_answer to finalize."),
         }
     messages_for_llm = []
         if isinstance(parsed, dict):
             # If GPT gave a final_answer, we finish here
             if "final_answer" in parsed:
+                return {"messages": new_msgs, "final_answer": parsed["final_answer"]}
+            # If GPT requested exactly one valid tool, return only that key
+            valid_keys = {"wiki_query", "ocr_path", "excel_path", "excel_sheet_name", "audio_path"}
             requested_keys = set(parsed.keys()) & valid_keys
             if len(requested_keys) == 1:
+                clean: AgentState = {"messages": new_msgs}
+                # Carry forward the global tool_counter implicitly (no need to store in state)
                 for k in requested_keys:
                     clean[k] = parsed[k]
                 return clean
         return {"messages": new_msgs, "final_answer": ia}
     # If there is no interim either, we cannot proceed
+    return {"messages": new_msgs, "final_answer": "ERROR: could not parse inspect decision."}
 # ─── 6) finalize_node ───
 # ─── 8) respond_to_input ───
 def respond_to_input(user_input: str, task_id) -> str:
     """
+    Reset the global tool_counter, seed state['messages'], invoke the graph,
+    and return the final_answer.
     """
+    global tool_counter
+    tool_counter = 0  # Reset on every new user query
     system_msg = SystemMessage(
         content=(
             "You are an agent orchestrator. Decide whether to use a tool or answer directly.\n"
     )
     human_msg = HumanMessage(content=user_input)
+    initial_state: AgentState = {"messages": [system_msg, human_msg], "task_id": task_id}
     final_state = compiled_graph.invoke(initial_state)
     return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")