from langchain_core.tools import tool from langchain_community.tools import DuckDuckGoSearchRun import pandas as pd @tool def web_search(query: str) -> str: """ Search the web for information. Args: query: The query to search the web for. Returns: The search results. """ print(f"Reached: web_search") ddg = DuckDuckGoSearchRun() return ddg.run(query) @tool def parse_excel(path: str, sheet_name: str = None) -> str: """ Read in an Excel file at `path`, optionally select a sheet by name (or default to the first sheet), then convert the DataFrame to a JSON-like string. Return that text so the LLM can reason over it. Example return value (collapsed): "[{'Name': 'Alice', 'Score': 95}, {'Name': 'Bob', 'Score': 88}, ...]" """ # 1. Load the Excel workbook print(f"Reached: parse_excel") try: xls = pd.ExcelFile(path) except FileNotFoundError: return f"Error: could not find file at {path}." # 2. Choose the sheet if sheet_name and sheet_name in xls.sheet_names: df = pd.read_excel(xls, sheet_name=sheet_name) else: # default to first sheet df = pd.read_excel(xls, sheet_name=xls.sheet_names[0]) # 3. Option A: convert to JSON records = df.to_dict(orient="records") return str(records) # tools.py from pathlib import Path from PIL import Image import pytesseract @tool def ocr_image(path: str) -> str: """ Run OCR on the image at `path` and return the extracted text. - Expects that Tesseract is installed on the host machine. - If the file is missing or unreadable, returns an error string. """ print(f"Reached: ocr_image") file = Path(path) if not file.exists(): return f"Error: could not find image at {path}" try: # Open image via PIL img = Image.open(file) except Exception as e: return f"Error: could not open image: {e}" try: # Run pytesseract OCR text = pytesseract.image_to_string(img) except Exception as e: return f"Error: OCR failed: {e}" return text.strip() or "(no visible text detected)"