Spaces:
Sleeping
Sleeping
File size: 2,199 Bytes
1f5cba5 1f08b63 1f5cba5 36b6dba f116621 1f5cba5 f116621 1f5cba5 f116621 1f5cba5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from langchain_core.tools import tool
from langchain_community.tools import DuckDuckGoSearchRun
import pandas as pd
@tool
def web_search(query: str) -> str:
"""
Search the web for information.
Args:
query: The query to search the web for.
Returns:
The search results.
"""
print(f"Reached: web_search")
ddg = DuckDuckGoSearchRun()
return ddg.run(query)
@tool
def parse_excel(path: str, sheet_name: str = None) -> str:
"""
Read in an Excel file at `path`, optionally select a sheet by name (or default to the first sheet),
then convert the DataFrame to a JSON-like string. Return that text so the LLM can reason over it.
Example return value (collapsed):
"[{'Name': 'Alice', 'Score': 95}, {'Name': 'Bob', 'Score': 88}, ...]"
"""
# 1. Load the Excel workbook
print(f"Reached: parse_excel")
try:
xls = pd.ExcelFile(path)
except FileNotFoundError:
return f"Error: could not find file at {path}."
# 2. Choose the sheet
if sheet_name and sheet_name in xls.sheet_names:
df = pd.read_excel(xls, sheet_name=sheet_name)
else:
# default to first sheet
df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
# 3. Option A: convert to JSON
records = df.to_dict(orient="records")
return str(records)
# tools.py
from pathlib import Path
from PIL import Image
import pytesseract
@tool
def ocr_image(path: str) -> str:
"""
Run OCR on the image at `path` and return the extracted text.
- Expects that Tesseract is installed on the host machine.
- If the file is missing or unreadable, returns an error string.
"""
print(f"Reached: ocr_image")
file = Path(path)
if not file.exists():
return f"Error: could not find image at {path}"
try:
# Open image via PIL
img = Image.open(file)
except Exception as e:
return f"Error: could not open image: {e}"
try:
# Run pytesseract OCR
text = pytesseract.image_to_string(img)
except Exception as e:
return f"Error: OCR failed: {e}"
return text.strip() or "(no visible text detected)"
|