# tools.py import pandas as pd from langchain_community.tools import DuckDuckGoSearchRun from pathlib import Path from PIL import Image import pytesseract from state import AgentState def web_search_tool(state: AgentState) -> AgentState: """ Expects: state["web_search_query"] is a non‐empty string. Returns: {"web_search_query": None, "web_search_result": } We also clear web_search_query so we don’t loop forever. """ query = state.get("web_search_query", "") if not query: return {} # nothing to do # Run DuckDuckGo ddg = DuckDuckGoSearchRun() result_text = ddg.run(query) return { "web_search_query": None, "web_search_result": result_text } def ocr_image_tool(state: AgentState) -> AgentState: """ Expects: state["ocr_path"] is a path to an image file. Returns: {"ocr_path": None, "ocr_result": }. """ path = state.get("ocr_path", "") if not path: return {} try: img = Image.open(path) text = pytesseract.image_to_string(img) text = text.strip() or "(no visible text)" except Exception as e: text = f"Error during OCR: {e}" return { "ocr_path": None, "ocr_result": text } def parse_excel_tool(state: AgentState) -> AgentState: """ Expects: state["excel_path"] is a path to an .xlsx file, and state["excel_sheet_name"] optionally names a sheet. Returns: {"excel_path": None, "excel_sheet_name": None, "excel_result": }. """ path = state.get("excel_path", "") sheet = state.get("excel_sheet_name", "") if not path: return {} try: xls = pd.ExcelFile(path) if sheet and sheet in xls.sheet_names: df = pd.read_excel(xls, sheet_name=sheet) else: df = pd.read_excel(xls, sheet_name=xls.sheet_names[0]) records = df.to_dict(orient="records") text = str(records) except Exception as e: text = f"Error reading Excel: {e}" return { "excel_path": None, "excel_sheet_name": None, "excel_result": text } def run_tools(state: AgentState, tool_out: AgentState) -> AgentState: """ Merges whatever partial state the tool wrapper returned (tool_out) into the main state. That is, combine previous keys with new keys: new_state = { **state, **tool_out }. This node should be wired as its own graph node, not as a transition function. """ new_state = {**state, **tool_out} return new_state