New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 2

Commit

7dbc634

1 Parent(s): 5a43f6c

excel_fix

Browse files

Files changed (2) hide show

app.py +17 -17
tools.py +60 -17

app.py CHANGED Viewed

@@ -39,7 +39,7 @@ def plan_node(state: AgentState) -> AgentState:
     system_msg = SystemMessage(
         content=(
             "You are an agent that decides whether to call a tool or answer directly.\n"
-            "User’s question: \"" + user_input + "\"\n\n"
             "• If you can answer directly, return exactly {\"final_answer\":\"<your answer>\"}.\n"
             "• Otherwise, respond with exactly one of:\n"
             "    {\"web_search_query\":\"<search terms>\"}\n"
@@ -56,9 +56,9 @@ def plan_node(state: AgentState) -> AgentState:
     llm_out = llm_response.content.strip()
     # ── DEBUG: print raw LLM output ──
-    print("\n>>> plan_node got raw LLM output:")
-    print(llm_out)
-    print("<<< end raw output\n")
     # (3) Append the LLM output to the message history
     ai_msg = AIMessage(content=llm_out)
@@ -67,7 +67,7 @@ def plan_node(state: AgentState) -> AgentState:
     # (4) Try parsing as JSON
     try:
         parsed = json.loads(llm_out)
-        print(">>> plan_node parsed JSON:", parsed)
         if isinstance(parsed, dict):
             partial: AgentState = {"messages": new_msgs}
             allowed = {
@@ -81,13 +81,13 @@ def plan_node(state: AgentState) -> AgentState:
             for k, v in parsed.items():
                 if k in allowed:
                     partial[k] = v
-                    print(f">>> plan_node is setting {k!r} → {v!r}")
             return partial
     except json.JSONDecodeError as e:
-        print(">>> plan_node JSON parse error:", e)
     # (5) Fallback
-    print(">>> plan_node falling back to final_answer alone\n")
     return {"messages": new_msgs, "final_answer": "Sorry, I could not parse your intent."}
@@ -136,28 +136,28 @@ def tool_node(state: AgentState) -> AgentState:
     #   "web_search_query", "ocr_path", "excel_path"/"excel_sheet_name", "audio_path"
     # Whichever is present, call the corresponding tool and return its result.
     if state.get("web_search_query"):
-        print(f">>> tools_node dispatching web_search_tool with query: {state['web_search_query']!r}")
         out = web_search_tool(state)
         return out
     if state.get("ocr_path"):
-        print(f">>> tools_node dispatching ocr_image_tool with path: {state['ocr_path']!r}")
         out = ocr_image_tool(state)
         return out
     if state.get("excel_path"):
         # We assume plan_node always sets both excel_path and excel_sheet_name together
-        print(f">>> tools_node dispatching parse_excel_tool with path: {state['excel_path']!r}, sheet: {state.get('excel_sheet_name')!r}")
         out = parse_excel_tool(state)
         return out
     if state.get("audio_path"):
-        print(f">>> tools_node dispatching audio_transcriber_tool with path: {state['audio_path']!r}")
         out = audio_transcriber_tool(state)
         return out
     # If we somehow reach here, no recognized tool key was set:
-    print(">>> tools_node: no valid tool key found in state!")
     return {}
@@ -188,7 +188,7 @@ graph.add_edge(START, "plan")
 # 5.c) plan → conditional: if any tool key was set, go to "tools"; otherwise "finalize"
 def route_plan(plan_out: AgentState) -> str:
     # print what keys are present in plan_out
-    print(f">> route_plan sees plan_out keys: {list(plan_out.keys())}")
     if (
         plan_out.get("web_search_query")
@@ -196,9 +196,9 @@ def route_plan(plan_out: AgentState) -> str:
         or plan_out.get("excel_path")
         or plan_out.get("audio_path")
     ):
-        print(">> route_plan ➡️ tools")
         return "tools"
-    print(">> route_plan ➡️ finalize")
     return "finalize"

     system_msg = SystemMessage(
         content=(
             "You are an agent that decides whether to call a tool or answer directly.\n"
+            "User's question: \"" + user_input + "\"\n\n"
             "• If you can answer directly, return exactly {\"final_answer\":\"<your answer>\"}.\n"
             "• Otherwise, respond with exactly one of:\n"
             "    {\"web_search_query\":\"<search terms>\"}\n"
     llm_out = llm_response.content.strip()
     # ── DEBUG: print raw LLM output ──
+    # print("\n>>> plan_node got raw LLM output:")
+    # print(llm_out)
+    # print("<<< end raw output\n")
     # (3) Append the LLM output to the message history
     ai_msg = AIMessage(content=llm_out)
     # (4) Try parsing as JSON
     try:
         parsed = json.loads(llm_out)
+        # print(">>> plan_node parsed JSON:", parsed)
         if isinstance(parsed, dict):
             partial: AgentState = {"messages": new_msgs}
             allowed = {
             for k, v in parsed.items():
                 if k in allowed:
                     partial[k] = v
+                    # print(f">>> plan_node is setting {k!r} → {v!r}")
             return partial
     except json.JSONDecodeError as e:
+        # print(">>> plan_node JSON parse error:", e)
+        pass
     # (5) Fallback
+    # print(">>> plan_node falling back to final_answer alone\n")
     return {"messages": new_msgs, "final_answer": "Sorry, I could not parse your intent."}
     #   "web_search_query", "ocr_path", "excel_path"/"excel_sheet_name", "audio_path"
     # Whichever is present, call the corresponding tool and return its result.
     if state.get("web_search_query"):
+        # print(f">>> tools_node dispatching web_search_tool with query: {state['web_search_query']!r}")
         out = web_search_tool(state)
         return out
     if state.get("ocr_path"):
+        # print(f">>> tools_node dispatching ocr_image_tool with path: {state['ocr_path']!r}")
         out = ocr_image_tool(state)
         return out
     if state.get("excel_path"):
         # We assume plan_node always sets both excel_path and excel_sheet_name together
+        # print(f">>> tools_node dispatching parse_excel_tool with path: {state['excel_path']!r}, sheet: {state.get('excel_sheet_name')!r}")
         out = parse_excel_tool(state)
         return out
     if state.get("audio_path"):
+        # print(f">>> tools_node dispatching audio_transcriber_tool with path: {state['audio_path']!r}")
         out = audio_transcriber_tool(state)
         return out
     # If we somehow reach here, no recognized tool key was set:
+    # print(">>> tools_node: no valid tool key found in state!")
     return {}
 # 5.c) plan → conditional: if any tool key was set, go to "tools"; otherwise "finalize"
 def route_plan(plan_out: AgentState) -> str:
     # print what keys are present in plan_out
+    # print(f">> route_plan sees plan_out keys: {list(plan_out.keys())}")
     if (
         plan_out.get("web_search_query")
         or plan_out.get("excel_path")
         or plan_out.get("audio_path")
     ):
+        # print(">> route_plan ➡️ tools")
         return "tools"
+    # print(">> route_plan ➡️ finalize")
     return "finalize"

tools.py CHANGED Viewed

@@ -49,34 +49,77 @@ def ocr_image_tool(state: AgentState) -> AgentState:
 def parse_excel_tool(state: AgentState) -> AgentState:
     """
-    Expects: state["excel_path"] is a path to an .xlsx file,
-             and state["excel_sheet_name"] optionally names a sheet.
-    Returns: {"excel_path": None, "excel_sheet_name": None, "excel_result": <string>}.
     """
-    print("reached parse excel tool")
     path = state.get("excel_path", "")
     sheet = state.get("excel_sheet_name", "")
     if not path:
         return {}
-    try:
-        xls = pd.ExcelFile(path)
-        if sheet and sheet in xls.sheet_names:
-            df = pd.read_excel(xls, sheet_name=sheet)
-        else:
-            df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
-        records = df.to_dict(orient="records")
-        text = str(records)
-    except Exception as e:
-        text = f"Error reading Excel: {e}"
-    print(f"excel_result: {text}")
     return {
         "excel_path": None,
         "excel_sheet_name": None,
-        "excel_result": text
     }
 def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
     """
     Merges whatever partial state the tool wrapper returned (tool_out)

 def parse_excel_tool(state: AgentState) -> AgentState:
     """
+    Attempts to read an actual .xlsx file at state["excel_path"]. If the file isn’t found,
+    scans the conversation history for a Markdown‐style table and returns that instead.
+    Returns:
+      {
+        "excel_path": None,
+        "excel_sheet_name": None,
+        "excel_result": "<either CSV‐like text or extracted Markdown table>"
+      }
+    If neither a real file nor a table block is found, returns an error message.
     """
     path = state.get("excel_path", "")
     sheet = state.get("excel_sheet_name", "")
     if not path:
         return {}
+    # 1) Try reading the real file first
+    if os.path.exists(path):
+        try:
+            xls = pd.ExcelFile(path)
+            if sheet and sheet in xls.sheet_names:
+                df = pd.read_excel(xls, sheet_name=sheet)
+            else:
+                df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
+            records = df.to_dict(orient="records")
+            text = str(records)
+            return {
+                "excel_path": None,
+                "excel_sheet_name": None,
+                "excel_result": text
+            }
+        except Exception as e:
+            # If there's an I/O or parsing error, fall through to table‐extraction
+            print(f">>> parse_excel_tool: Error reading Excel file {path}: {e}")
+    # 2) Fallback: extract a Markdown table from any HumanMessage in state["messages"]
+    messages = state.get("messages", [])
+    table_lines = []
+    collecting = False
+    for msg in messages:
+        if isinstance(msg, HumanMessage):
+            for line in msg.content.splitlines():
+                # Start collecting when we see the first table header row
+                if re.match(r"^\s*\|\s*[-A-Za-z0-9]", line):
+                    collecting = True
+                if collecting:
+                    if not re.match(r"^\s*\|", line):
+                        # stop when the block ends (blank line or non‐table line)
+                        collecting = False
+                        break
+                    table_lines.append(line)
+            if table_lines:
+                break
+    if not table_lines:
+        return {
+            "excel_path": None,
+            "excel_sheet_name": None,
+            "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
+        }
+    # Remove any separator rows like "| ---- | ---- |"
+    clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
+    table_block = "\n".join(clean_rows).strip()
     return {
         "excel_path": None,
         "excel_sheet_name": None,
+        "excel_result": table_block
     }
 def run_tools(state: AgentState, tool_out: AgentState) -> AgentState:
     """
     Merges whatever partial state the tool wrapper returned (tool_out)