Spaces:
Sleeping
Sleeping
from langchain_core.tools import tool | |
from langchain_community.tools import DuckDuckGoSearchRun | |
import pandas as pd | |
def web_search(query: str) -> str: | |
""" | |
Search the web for information. | |
Args: | |
query: The query to search the web for. | |
Returns: | |
The search results. | |
""" | |
print(f"Reached: web_search") | |
ddg = DuckDuckGoSearchRun() | |
return ddg.run(query) | |
def parse_excel(path: str, sheet_name: str = None) -> str: | |
""" | |
Read in an Excel file at `path`, optionally select a sheet by name (or default to the first sheet), | |
then convert the DataFrame to a JSON-like string. Return that text so the LLM can reason over it. | |
Example return value (collapsed): | |
"[{'Name': 'Alice', 'Score': 95}, {'Name': 'Bob', 'Score': 88}, ...]" | |
""" | |
# 1. Load the Excel workbook | |
print(f"Reached: parse_excel") | |
try: | |
xls = pd.ExcelFile(path) | |
except FileNotFoundError: | |
return f"Error: could not find file at {path}." | |
# 2. Choose the sheet | |
if sheet_name and sheet_name in xls.sheet_names: | |
df = pd.read_excel(xls, sheet_name=sheet_name) | |
else: | |
# default to first sheet | |
df = pd.read_excel(xls, sheet_name=xls.sheet_names[0]) | |
# 3. Option A: convert to JSON | |
records = df.to_dict(orient="records") | |
return str(records) | |
# tools.py | |
from pathlib import Path | |
from PIL import Image | |
import pytesseract | |
def ocr_image(path: str) -> str: | |
""" | |
Run OCR on the image at `path` and return the extracted text. | |
- Expects that Tesseract is installed on the host machine. | |
- If the file is missing or unreadable, returns an error string. | |
""" | |
print(f"Reached: ocr_image") | |
file = Path(path) | |
if not file.exists(): | |
return f"Error: could not find image at {path}" | |
try: | |
# Open image via PIL | |
img = Image.open(file) | |
except Exception as e: | |
return f"Error: could not open image: {e}" | |
try: | |
# Run pytesseract OCR | |
text = pytesseract.image_to_string(img) | |
except Exception as e: | |
return f"Error: OCR failed: {e}" | |
return text.strip() or "(no visible text detected)" | |