Spaces:
Sleeping
Sleeping
| from langchain_core.tools import tool | |
| from langchain_community.tools import DuckDuckGoSearchRun | |
| import pandas as pd | |
| def web_search(query: str) -> str: | |
| """ | |
| Search the web for information. | |
| Args: | |
| query: The query to search the web for. | |
| Returns: | |
| The search results. | |
| """ | |
| print(f"Reached: web_search: {query}") | |
| ddg = DuckDuckGoSearchRun() | |
| return ddg.run(query) | |
| def parse_excel(path: str, sheet_name: str = None) -> str: | |
| """ | |
| Read in an Excel file at `path`, optionally select a sheet by name (or default to the first sheet), | |
| then convert the DataFrame to a JSON-like string. Return that text so the LLM can reason over it. | |
| Example return value (collapsed): | |
| "[{'Name': 'Alice', 'Score': 95}, {'Name': 'Bob', 'Score': 88}, ...]" | |
| """ | |
| # 1. Load the Excel workbook | |
| print(f"Reached: parse_excel: {path} {sheet_name}") | |
| try: | |
| xls = pd.ExcelFile(path) | |
| except FileNotFoundError: | |
| return f"Error: could not find file at {path}." | |
| # 2. Choose the sheet | |
| if sheet_name and sheet_name in xls.sheet_names: | |
| df = pd.read_excel(xls, sheet_name=sheet_name) | |
| else: | |
| # default to first sheet | |
| df = pd.read_excel(xls, sheet_name=xls.sheet_names[0]) | |
| # 3. Option A: convert to JSON | |
| records = df.to_dict(orient="records") | |
| return str(records) | |
| # tools.py | |
| from pathlib import Path | |
| from PIL import Image | |
| import pytesseract | |
| def ocr_image(path: str) -> str: | |
| """ | |
| Run OCR on the image at `path` and return the extracted text. | |
| - Expects that Tesseract is installed on the host machine. | |
| - If the file is missing or unreadable, returns an error string. | |
| """ | |
| print(f"Reached: ocr_image: {path}") | |
| file = Path(path) | |
| if not file.exists(): | |
| return f"Error: could not find image at {path}" | |
| try: | |
| # Open image via PIL | |
| img = Image.open(file) | |
| except Exception as e: | |
| return f"Error: could not open image: {e}" | |
| try: | |
| # Run pytesseract OCR | |
| text = pytesseract.image_to_string(img) | |
| except Exception as e: | |
| return f"Error: OCR failed: {e}" | |
| return text.strip() or "(no visible text detected)" | |