New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 2

Commit

2a2fc01

1 Parent(s): 59f78f5

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -36

app.py CHANGED Viewed

@@ -126,29 +126,30 @@ def tool_node(state: AgentState) -> AgentState:
 # ─── 4) merge_tool_output ───
 def merge_tool_output(state: AgentState) -> AgentState:
-    """
-    Combine previous state and tool output into one:
-    """
-    prev = state.get("prev_state", {})
-    merged = {**prev, **state}
     merged.pop("prev_state", None)
     return merged
 # ─── 5) inspect_node ───
 def inspect_node(state: AgentState) -> AgentState:
-    """
-    After running a tool, show GPT:
-      - ORIGINAL user question
-      - Any tool results (web_search_result, ocr_result, excel_result, transcript, wiki_result)
-      - The INTERIM_ANSWER (what plan_node initially provided under 'final_answer')
-    Then ask GPT to either:
-      • Return {"final_answer": "<final>"} if done, OR
-      • Return exactly one tool key to run next (wiki_query / web_search_query / ocr_path / excel_path & excel_sheet_name / audio_path).
-    """
     messages_for_llm = []
-    # 1) Re‐insert original user question
     question = ""
     for msg in reversed(state.get("messages", [])):
         if isinstance(msg, HumanMessage):
@@ -156,7 +157,7 @@ def inspect_node(state: AgentState) -> AgentState:
             break
     messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
-    # 2) Add any tool results
     if sr := state.get("web_search_result"):
         messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
     if orc := state.get("ocr_result"):
@@ -168,23 +169,57 @@ def inspect_node(state: AgentState) -> AgentState:
     if wr := state.get("wiki_result"):
         messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
-    # 3) Add the interim answer under INTERIM_ANSWER
     if ia := state.get("final_answer"):
         messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {ia}"))
-    # 4) Prompt GPT to decide final or another tool
     prompt = (
         "You have a current draft answer (INTERIM_ANSWER) and possibly some tool results above.\n"
-        "If you are confident it’s correct, return exactly:\n"
         "  {\"final_answer\":\"<your final answer>\"}\n"
         "and nothing else.\n"
-        "Otherwise, return exactly one of these JSON literals to fetch another tool:\n"
         "  {\"wiki_query\":\"<query for Wikipedia>\"}\n"
         "  {\"web_search_query\":\"<search terms>\"}\n"
         "  {\"ocr_path\":\"<image path or task_id>\"}\n"
         "  {\"excel_path\":\"<xls path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
         "  {\"audio_path\":\"<audio path or task_id>\"}\n"
         "Do NOT wrap in markdown—return only the JSON object.\n"
     )
     messages_for_llm.append(SystemMessage(content=prompt))
     llm_response = llm(messages_for_llm)
@@ -194,26 +229,34 @@ def inspect_node(state: AgentState) -> AgentState:
     try:
         parsed = json.loads(raw)
         if isinstance(parsed, dict):
-            partial: AgentState = {"messages": new_msgs}
-            allowed = {
-                "final_answer",
-                "wiki_query",
-                "web_search_query",
-                "ocr_path",
-                "excel_path",
-                "excel_sheet_name",
-                "audio_path"
             }
             for k, v in parsed.items():
-                if k in allowed:
-                    partial[k] = v
             return partial
     except json.JSONDecodeError:
         pass
     return {
         "messages": new_msgs,
-        "final_answer": "ERROR: could not parse inspect decision."
     }
@@ -284,9 +327,10 @@ compiled_graph = graph.compile()
 # ─── 8) respond_to_input ───
-def respond_to_input(user_input: str, task_id) -> str:
     """
     Seed state['messages'] with a SystemMessage + HumanMessage(user_input),
     then invoke the cyclic graph. Return the final_answer from the resulting state.
     """
     system_msg = SystemMessage(
@@ -298,19 +342,23 @@ def respond_to_input(user_input: str, task_id) -> str:
             "  • OCR: set {\"ocr_path\":\"<image path or task_id>\"}\n"
             "  • Excel: set {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet>\"}\n"
             "  • Audio transcription: set {\"audio_path\":\"<audio path or task_id>\"}\n"
-            "If you can answer immediately, set {\"final_answer\":\"<answer>\"}. "
             "Respond with only one JSON object and no extra formatting."
         )
     )
     human_msg = HumanMessage(content=user_input)
-    initial_state: AgentState = {"messages": [system_msg, human_msg], "task_id": task_id}
     final_state = compiled_graph.invoke(initial_state)
     return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")

 # ─── 4) merge_tool_output ───
 def merge_tool_output(state: AgentState) -> AgentState:
+    prev_state = state.get("prev_state", {})
+    merged = {**prev_state, **state}
     merged.pop("prev_state", None)
+    # Detect which tool key was used in prev_state (it’s exactly one of these)
+    for tool_key in ("wiki_query", "web_search_query", "ocr_path", "excel_path", "audio_path"):
+        if prev_state.get(tool_key) is not None:
+            # Increment the count of tool calls
+            merged["tool_calls"] = merged.get("tool_calls", 0) + 1
+            # Record that we have used this tool_key
+            used = merged.get("used_tools", []).copy()
+            if tool_key not in used:
+                used.append(tool_key)
+            merged["used_tools"] = used
+            break
     return merged
 # ─── 5) inspect_node ───
 def inspect_node(state: AgentState) -> AgentState:
     messages_for_llm = []
+    # 1) Original question
     question = ""
     for msg in reversed(state.get("messages", [])):
         if isinstance(msg, HumanMessage):
             break
     messages_for_llm.append(SystemMessage(content=f"USER_QUESTION: {question}"))
+    # 2) Any tool results so far
     if sr := state.get("web_search_result"):
         messages_for_llm.append(SystemMessage(content=f"WEB_SEARCH_RESULT: {sr}"))
     if orc := state.get("ocr_result"):
     if wr := state.get("wiki_result"):
         messages_for_llm.append(SystemMessage(content=f"WIKIPEDIA_RESULT: {wr}"))
+    # 3) Interim answer
     if ia := state.get("final_answer"):
         messages_for_llm.append(SystemMessage(content=f"INTERIM_ANSWER: {ia}"))
+    # 4) How many times have we called a tool?
+    used_tools = state.get("used_tools", [])
+    tool_calls = state.get("tool_calls", 0)
+    # If we've already tried all five tools once, or exceeded a small limit (e.g. 5),
+    # force a final answer now. We append a dummy instruction so the LLM knows:
+    if tool_calls >= 5 or len(used_tools) >= 5:
+        # The user’s interim answer and all tool results exist;
+        # we instruct GPT to treat it as final.
+        prompt = (
+            "We have already used every available tool or reached the maximum number of attempts.\n"
+            "Therefore, return exactly {\"final_answer\":\"<your best final answer>\"} and nothing else.\n"
+        )
+        messages_for_llm.append(SystemMessage(content=prompt))
+        llm_response = llm(messages_for_llm)
+        raw = llm_response.content.strip()
+        new_msgs = state["messages"] + [AIMessage(content=raw)]
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, dict) and "final_answer" in parsed:
+                return {"messages": new_msgs, "final_answer": parsed["final_answer"],
+                        "used_tools": used_tools, "tool_calls": tool_calls}
+        except json.JSONDecodeError:
+            pass
+        # Fallback
+        return {
+            "messages": new_msgs,
+            "final_answer": "ERROR: inspect forced final but parsing failed.",
+            "used_tools": used_tools,
+            "tool_calls": tool_calls
+        }
+    # 5) Otherwise, ask GPT if it wants another tool
     prompt = (
         "You have a current draft answer (INTERIM_ANSWER) and possibly some tool results above.\n"
+        "If you are confident it’s now correct, return exactly:\n"
         "  {\"final_answer\":\"<your final answer>\"}\n"
         "and nothing else.\n"
+        "Otherwise, return exactly one of these JSON literals to fetch another tool, "
+        "but DO NOT return a tool you have already used:\n"
         "  {\"wiki_query\":\"<query for Wikipedia>\"}\n"
         "  {\"web_search_query\":\"<search terms>\"}\n"
         "  {\"ocr_path\":\"<image path or task_id>\"}\n"
         "  {\"excel_path\":\"<xls path>\", \"excel_sheet_name\":\"<sheet name>\"}\n"
         "  {\"audio_path\":\"<audio path or task_id>\"}\n"
         "Do NOT wrap in markdown—return only the JSON object.\n"
+        f"Already used tools: {used_tools}\n"
     )
     messages_for_llm.append(SystemMessage(content=prompt))
     llm_response = llm(messages_for_llm)
     try:
         parsed = json.loads(raw)
         if isinstance(parsed, dict):
+            # If GPT asks for a tool that’s already in used_tools, override and force finalize.
+            for key in parsed:
+                if key in ("wiki_query", "web_search_query", "ocr_path", "excel_path", "audio_path"):
+                    if key in used_tools:
+                        # GPT tried to reuse a tool → force a final answer instead
+                        return {
+                            "messages": new_msgs,
+                            "final_answer": state.get("final_answer", ""),
+                            "used_tools": used_tools,
+                            "tool_calls": tool_calls
+                        }
+            # Otherwise, it’s either final_answer or a brand‐new tool request
+            partial: AgentState = {
+                "messages": new_msgs,
+                "used_tools": used_tools,
+                "tool_calls": tool_calls
             }
             for k, v in parsed.items():
+                partial[k] = v
             return partial
     except json.JSONDecodeError:
         pass
     return {
         "messages": new_msgs,
+        "final_answer": "ERROR: could not parse inspect decision.",
+        "used_tools": used_tools,
+        "tool_calls": tool_calls
     }
 # ─── 8) respond_to_input ───
+def respond_to_input(user_input: str, task_id: str) -> str:
     """
     Seed state['messages'] with a SystemMessage + HumanMessage(user_input),
+    include the current task_id so that OCR/Audio tools can fetch files,
     then invoke the cyclic graph. Return the final_answer from the resulting state.
     """
     system_msg = SystemMessage(
             "  • OCR: set {\"ocr_path\":\"<image path or task_id>\"}\n"
             "  • Excel: set {\"excel_path\":\"<xlsx path>\", \"excel_sheet_name\":\"<sheet>\"}\n"
             "  • Audio transcription: set {\"audio_path\":\"<audio path or task_id>\"}\n"
+            "If you can answer immediately, set {\"final_answer\":\"<answer>\"}.\n"
             "Respond with only one JSON object and no extra formatting."
         )
     )
     human_msg = HumanMessage(content=user_input)
+    initial_state: AgentState = {
+        "messages": [system_msg, human_msg],
+        "task_id": task_id,
+        "used_tools": [],    # track which tools have been requested
+        "tool_calls": 0      # count of how many tool invocations so far
+    }
     final_state = compiled_graph.invoke(initial_state)
     return final_state.get("final_answer", "Error: No final answer generated.")
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")