naman1102's picture
debig
f116621
raw
history blame
2.2 kB
from langchain_core.tools import tool
from langchain_community.tools import DuckDuckGoSearchRun
import pandas as pd
@tool
def web_search(query: str) -> str:
"""
Search the web for information.
Args:
query: The query to search the web for.
Returns:
The search results.
"""
print(f"Reached: web_search")
ddg = DuckDuckGoSearchRun()
return ddg.run(query)
@tool
def parse_excel(path: str, sheet_name: str = None) -> str:
"""
Read in an Excel file at `path`, optionally select a sheet by name (or default to the first sheet),
then convert the DataFrame to a JSON-like string. Return that text so the LLM can reason over it.
Example return value (collapsed):
"[{'Name': 'Alice', 'Score': 95}, {'Name': 'Bob', 'Score': 88}, ...]"
"""
# 1. Load the Excel workbook
print(f"Reached: parse_excel")
try:
xls = pd.ExcelFile(path)
except FileNotFoundError:
return f"Error: could not find file at {path}."
# 2. Choose the sheet
if sheet_name and sheet_name in xls.sheet_names:
df = pd.read_excel(xls, sheet_name=sheet_name)
else:
# default to first sheet
df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
# 3. Option A: convert to JSON
records = df.to_dict(orient="records")
return str(records)
# tools.py
from pathlib import Path
from PIL import Image
import pytesseract
@tool
def ocr_image(path: str) -> str:
"""
Run OCR on the image at `path` and return the extracted text.
- Expects that Tesseract is installed on the host machine.
- If the file is missing or unreadable, returns an error string.
"""
print(f"Reached: ocr_image")
file = Path(path)
if not file.exists():
return f"Error: could not find image at {path}"
try:
# Open image via PIL
img = Image.open(file)
except Exception as e:
return f"Error: could not open image: {e}"
try:
# Run pytesseract OCR
text = pytesseract.image_to_string(img)
except Exception as e:
return f"Error: OCR failed: {e}"
return text.strip() or "(no visible text detected)"