New_Final_Assignment

Sleeping

File size: 2,199 Bytes

from langchain_core.tools import tool
from langchain_community.tools import DuckDuckGoSearchRun
import pandas as pd
@tool
def web_search(query: str) -> str:
    """
    Search the web for information.
    Args:
        query: The query to search the web for.
    Returns:
        The search results.
    """
    print(f"Reached: web_search")
    ddg = DuckDuckGoSearchRun()
    return ddg.run(query)


@tool
def parse_excel(path: str, sheet_name: str = None) -> str:

    """
    Read in an Excel file at `path`, optionally select a sheet by name (or default to the first sheet),
    then convert the DataFrame to a JSON-like string. Return that text so the LLM can reason over it.
    
    Example return value (collapsed): 
      "[{'Name': 'Alice', 'Score': 95}, {'Name': 'Bob', 'Score': 88}, ...]"
    """
    # 1. Load the Excel workbook
    print(f"Reached: parse_excel")      
    try:
        xls = pd.ExcelFile(path)
    except FileNotFoundError:
        return f"Error: could not find file at {path}."
    
    # 2. Choose the sheet
    if sheet_name and sheet_name in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet_name)
    else:
        # default to first sheet
        df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
    
    # 3. Option A: convert to JSON
    records = df.to_dict(orient="records")
    return str(records)  



# tools.py

from pathlib import Path
from PIL import Image
import pytesseract


@tool
def ocr_image(path: str) -> str:
    """
    Run OCR on the image at `path` and return the extracted text.
    - Expects that Tesseract is installed on the host machine.
    - If the file is missing or unreadable, returns an error string.
    """
    print(f"Reached: ocr_image")
    file = Path(path)
    if not file.exists():
        return f"Error: could not find image at {path}"
    try:
        # Open image via PIL
        img = Image.open(file)
    except Exception as e:
        return f"Error: could not open image: {e}"

    try:
        # Run pytesseract OCR
        text = pytesseract.image_to_string(img)
    except Exception as e:
        return f"Error: OCR failed: {e}"

    return text.strip() or "(no visible text detected)"