File size: 2,199 Bytes
1f5cba5
1f08b63
1f5cba5
 
 
36b6dba
 
 
 
 
 
 
f116621
1f5cba5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f116621
1f5cba5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f116621
1f5cba5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from langchain_core.tools import tool
from langchain_community.tools import DuckDuckGoSearchRun
import pandas as pd
@tool
def web_search(query: str) -> str:
    """
    Search the web for information.
    Args:
        query: The query to search the web for.
    Returns:
        The search results.
    """
    print(f"Reached: web_search")
    ddg = DuckDuckGoSearchRun()
    return ddg.run(query)


@tool
def parse_excel(path: str, sheet_name: str = None) -> str:

    """
    Read in an Excel file at `path`, optionally select a sheet by name (or default to the first sheet),
    then convert the DataFrame to a JSON-like string. Return that text so the LLM can reason over it.
    
    Example return value (collapsed): 
      "[{'Name': 'Alice', 'Score': 95}, {'Name': 'Bob', 'Score': 88}, ...]"
    """
    # 1. Load the Excel workbook
    print(f"Reached: parse_excel")      
    try:
        xls = pd.ExcelFile(path)
    except FileNotFoundError:
        return f"Error: could not find file at {path}."
    
    # 2. Choose the sheet
    if sheet_name and sheet_name in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet_name)
    else:
        # default to first sheet
        df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
    
    # 3. Option A: convert to JSON
    records = df.to_dict(orient="records")
    return str(records)  



# tools.py

from pathlib import Path
from PIL import Image
import pytesseract


@tool
def ocr_image(path: str) -> str:
    """
    Run OCR on the image at `path` and return the extracted text.
    - Expects that Tesseract is installed on the host machine.
    - If the file is missing or unreadable, returns an error string.
    """
    print(f"Reached: ocr_image")
    file = Path(path)
    if not file.exists():
        return f"Error: could not find image at {path}"
    try:
        # Open image via PIL
        img = Image.open(file)
    except Exception as e:
        return f"Error: could not open image: {e}"

    try:
        # Run pytesseract OCR
        text = pytesseract.image_to_string(img)
    except Exception as e:
        return f"Error: OCR failed: {e}"

    return text.strip() or "(no visible text detected)"