New_Final_Assignment

Sleeping

File size: 16,445 Bytes

1f5cba5
 
0e29657
4f25f4e
1f5cba5
305d4ff
3563dd6
c927679
4f25f4e
266fff4
9af3089
a1dc7ba
1ae0aa0
 
 
14fa0cc
0c482eb
 
 
2871b51
9450587
0c482eb
0e29657
0c482eb
 
 
0e29657
9afd718
e339dd2
0c482eb
 
 
838224c
0c482eb
 
 
8ae792e
0c482eb
 
 
 
305d4ff
0c482eb
 
 
 
 
9af3089
 
1f5cba5
ebf7d5c
 
 
 
 
 
 
 
 
 
1f5cba5
9017277
 
 
 
f74ec57
9017277
 
a14b206
9017277
 
0c482eb
9017277
133d76b
9017277
133d76b
 
9017277
133d76b
9017277
133d76b
 
9017277
0c482eb
9017277
 
133d76b
9017277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f25f4e
9af3089
 
0e29657
ebf7d5c
 
 
9af3089
ebf7d5c
 
 
 
 
 
0e29657
9af3089
 
7fb0070
4f25f4e
 
 
7fb0070
4f25f4e
 
 
 
 
 
3872131
4f25f4e
 
 
 
09b1a3d
 
9af3089
 
7fb0070
ebf7d5c
 
 
 
 
 
 
 
 
 
7fb0070
e339dd2
9af3089
7fb0070
7c5f7b3
 
 
9af3089
7c5f7b3
 
 
0c482eb
 
8ae792e
4f25f4e
 
0c482eb
7c5f7b3
7fb0070
09b1a3d
 
 
 
0c482eb
abff174
03df343
 
 
 
8ae792e
abff174
7fb0070
 
838224c
4f25f4e
a59a680
 
 
 
 
9af3089
 
a59a680
ebf7d5c
 
a1dc7ba
 
 
ebf7d5c
 
a1dc7ba
 
 
919fd15
a59a680
a1dc7ba
919fd15
 
 
 
 
 
a1dc7ba
919fd15
 
 
 
 
a1dc7ba
ce34e8f
a1dc7ba
 
 
 
 
 
 
 
 
 
 
 
 
 
919fd15
 
 
a1dc7ba
 
 
 
919fd15
 
 
a1dc7ba
 
 
 
 
 
 
 
 
 
 
 
919fd15
 
 
e5782f0
919fd15
a1dc7ba
 
e5782f0
919fd15
ce34e8f
e5782f0
919fd15
a1dc7ba
14fa0cc
1ae0aa0
14fa0cc
ebf7d5c
 
 
 
 
 
 
 
 
 
14fa0cc
8ae792e
ce34e8f
1ae0aa0
 
 
 
 
8ae792e
1ae0aa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce34e8f
1ae0aa0
4f25f4e
 
3872131
 
5b43bea
4f25f4e
9af3089
 
3872131
ebf7d5c
 
 
 
 
 
 
 
 
 
3872131
8ae792e
3872131
9af3089
 
 
 
3872131
8ae792e
3872131
 
 
8ae792e
 
3872131
 
 
 
 
 
 
 
9af3089
3872131
 
 
4f25f4e
 
ebf7d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ae792e
ebf7d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ae792e
ebf7d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ae792e
ebf7d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ae792e
ebf7d5c
 
 
 
 
e5782f0
 
 
 
ebf7d5c
e5782f0
ebf7d5c
e5782f0
 
 
919fd15
 
 
 
 
e5782f0
 
 
919fd15
 
 
 
e5782f0
 
 
 
 
 
 
 
 
 
 
 
 
919fd15
e5782f0
 
 
 
 
 
 
 
919fd15
e5782f0
919fd15
 
e5782f0

# tools.py

import pandas as pd

from pathlib import Path
import requests
import regex as re
import time
import os
from duckduckgo_search import DDGS
from langchain_core.tools import tool
from langchain_community.document_loaders import ArxivLoader
import arxiv
import fitz  # PyMuPDF
import tempfile

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


# Removed complex safety wrapper - keeping things simple

def _download_file_for_task(task_id: str, ext: str) -> str:
    """
    Helper: attempt to GET the remote file for a given task_id.
    Saves under ./hf_files/{task_id}.{ext}. Returns the local path if successful,
    or an empty string if no file / download failed.
    """
    
    print("reached _download_file_for_task")
    os.makedirs("hf_files", exist_ok=True)
    local_path = os.path.join("hf_files", f"{task_id}.{ext}")
    url = f"{DEFAULT_API_URL}/files/{task_id}"
    
    try:
        resp = requests.get(url, timeout=10)
        if resp.status_code == 200 and resp.content:
            print(f"\n Downloaded file from {url} to {local_path} \n")
            with open(local_path, "wb") as f:
                f.write(resp.content)
            return local_path
    except Exception:
        print(f"Error downloading file from {url} to {local_path}")
        pass

    # If we get here, either 404 or download error
    return ""

@tool
def image_tool(task_id: str) -> str:
    """
    TOOL NAME: Image Analysis Tool

    Purpose: When the user asks about images, photos, or visual content, use this tool to get a description of the image.

    Input: A task_id string that identifies the specific image to analyze.

    Example usage:
    - "What is shown in this image?"
    - "Describe the contents of the picture"
    - "What objects are visible in the photo?"
    """

    import requests, os

    # Try downloading image with one of the allowed extensions
    for ext in ("png", "jpg", "jpeg"):
        file_path = _download_file_for_task(task_id, ext)
        if file_path and os.path.exists(file_path):
            break
    else:
        return f"Error: Image file for task_id '{task_id}' not found."

    # Read the image bytes
    try:
        with open(file_path, "rb") as f:
            image_bytes = f.read()
    except Exception as e:
        return f"Error reading image: {str(e)}"

    # Load HF token
    hf_token = os.getenv("HF_TOKEN")
    if not hf_token:
        return "Error: HF_TOKEN not set in environment."

    # Use a single reliable model
    model = "Salesforce/blip-image-captioning-base"
    headers = {"Authorization": f"Bearer {hf_token}"}

    try:
        response = requests.post(
            f"https://api-inference.huggingface.co/models/{model}",
            headers=headers,
            files={"file": image_bytes},
            timeout=30
        )
    except Exception as e:
        return f"Error calling HuggingFace API: {e}"

    # Parse response
    if response.status_code != 200:
        return f"Error from model ({model}): {response.status_code} - {response.text}"

    try:
        result = response.json()
        if isinstance(result, list) and result:
            caption = result[0].get("generated_text", "").strip()
        elif isinstance(result, dict):
            caption = result.get("generated_text", "").strip()
        else:
            caption = ""
    except Exception as e:
        return f"Error parsing response: {e}"

    if not caption:
        return "No caption generated by model."

    return f"Image Caption:\n{caption}"




@tool
def excel_tool(task_id: str) -> str:
    """
    TOOL NAME: Excel Data Analysis Tool

    Purpose: When the user asks about data in spreadsheets, tables, or Excel files, use this tool to read and analyze the data.

    Input: A task_id string that identifies the specific Excel file to analyze.

    Example usage:
    - "What data is in this spreadsheet?"
    - "Analyze the Excel file contents"
    - "Show me the data from the table"
    """
    print("reached excel_tool")
    sheet   = "Sheet1"

    local_xlsx = _download_file_for_task(task_id, "xlsx")
    if not local_xlsx or not os.path.exists(local_xlsx):
        return "Error: Excel file not found for this task."

    try:
        xls = pd.ExcelFile(local_xlsx)
        df  = pd.read_excel(
            xls,
            sheet_name=sheet if sheet and sheet in xls.sheet_names else xls.sheet_names[0]
        )
        print(f"Excel file read successfully: {str(df.to_dict(orient='records'))}")
        return str(df.to_dict(orient="records"))
    except Exception as e:
        return f"Error reading Excel file: {e}"
    

import openai
@tool
def audio_transcriber_tool(task_id: str) -> str:
    """
    TOOL NAME: Audio Transcription Tool

    Purpose: When the user asks about audio files, speech, or wants to know what was said in an audio recording, use this tool.

    Input: A task_id string that identifies the specific audio file to transcribe.

    Example usage:
    - "What is said in this audio file?"
    - "Transcribe the speech from the recording"
    - "Convert the audio to text"
    """
    print("reached audio_transcriber_tool")


    # Always attempt to download the file, regardless of local existence
    local_audio = ""
    for ext in ("mp3", "wav", "m4a"):
        candidate = _download_file_for_task(task_id, ext)
        if candidate:
            local_audio = candidate
            break

    if not local_audio or not os.path.exists(local_audio):
        print("Error: No audio file found (download failed).")
        return "Error: No audio file found (download failed)."
        

    # Send to OpenAI Whisper
    try:
        openai.api_key = os.getenv("OPENAI_API_KEY")
        if not openai.api_key:
            raise RuntimeError("OPENAI_API_KEY is not set in environment.")

        with open(local_audio, "rb") as audio_file:
            print("reached openai.audio.transcriptions.create")
            response = openai.audio.transcriptions.create(
                model="whisper-1", 
                file=audio_file,
            )
            # print("reached response")
        text = response.text.strip()
    except Exception as e:
        text = f"Error during transcription: {e}"
    print(f"Transcripted as transcript: {text}")
    return text
# tools.py

import re
import requests

@tool
def wikipedia_search_tool(wiki_query: str) -> str:
    """
    TOOL NAME: Wikipedia Search Tool

    Purpose: When the user asks about general knowledge, facts, or wants to know about a specific topic, use this tool.

    Input: A string describing the topic to search for on Wikipedia.

    Example usage:
    - "What is the capital of France?"
    - "Find information about quantum computing"
    - "What is the history of the internet?"
    If no valid wiki_query is provided, returns an empty string.
    """
    print("reached wikipedia search tool")

    # --- Simple in-memory cache to avoid repeated look-ups in a single session
    if not hasattr(wikipedia_search_tool, "_cache"):
        wikipedia_search_tool._cache = {}

    query = wiki_query.strip()
    if not query:
        return ""

    if query in wikipedia_search_tool._cache:
        print("Returning cached Wikipedia result for query:", query)
        return wikipedia_search_tool._cache[query]

    try:
        # 1) Use the MediaWiki API to search for page titles matching the query
        search_params = {
            "action": "query",
            "list": "search",
            "srsearch": query,
            "format": "json",
            "utf8": 1
        }
        search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
        search_resp.raise_for_status()
        search_data = search_resp.json()

        search_results = search_data.get("query", {}).get("search", [])
        if not search_results:
            msg = f"No Wikipedia page found for '{query}'. [END_OF_SEARCH]"
            wikipedia_search_tool._cache[query] = msg
            return msg

        # 2) Take the first search result's title
        first_title = search_results[0].get("title", "")
        if not first_title:
            msg = "Unexpected format from Wikipedia search. [END_OF_SEARCH]"
            wikipedia_search_tool._cache[query] = msg
            return msg

        # 3) Fetch the page summary for that title via the REST summary endpoint
        title_for_url = requests.utils.requote_uri(first_title)
        summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
        summary_resp = requests.get(summary_url, timeout=10)
        summary_resp.raise_for_status()
        summary_data = summary_resp.json()

        # 4) Extract either the "extract" field or a fallback message
        summary_text = summary_data.get("extract")
        if not summary_text:
            summary_text = summary_data.get("description", "No summary available.")

        result = f"Title: {first_title}\n\n{summary_text}\n\n[END_OF_SEARCH]"
        wikipedia_search_tool._cache[query] = result
        print("Submitted wiki successfully")
        return result

    except requests.exceptions.RequestException as e:
        print("Wikipedia search error: ", e)
        return f"Wikipedia search error: {e} [END_OF_SEARCH]"
    except Exception as e:
        print("Unexpected error in wikipedia_search_tool: ", e)
        return f"Unexpected error in wikipedia_search_tool: {e} [END_OF_SEARCH]"

@tool
def arxiv_search_tool(query: str) -> str:
    """
    TOOL NAME: ArXiv Academic Search Tool

    Purpose: When the user asks for academic research, scientific papers, or technical information, use this tool.

    Input: A string describing the academic topic to search for on ArXiv.

    Example usage:
    - "Find research papers about machine learning"
    - "What are recent studies on climate change?"
    - "Search for papers on quantum computing"
    """
    print("Reached ArXiv tool, with query = ", query)
    try:
        # Search arXiv for the top result
        search = arxiv.Search(query=query, max_results=1, sort_by=arxiv.SortCriterion.Relevance)
        result = next(search.results(), None)

        if not result:
            print("No arXiv result found")
            return "No results found. [END_OF_SEARCH]"

        # Download PDF
        pdf_url = result.pdf_url
        response = requests.get(pdf_url)
        response.raise_for_status()

        # Save and open PDF
        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmp:
            tmp.write(response.content)
            tmp.flush()

            doc = fitz.open(tmp.name)
            text = ""
            for page in doc:
                text += page.get_text()

        # Clean and trim text
        text = " ".join(text.split())
        summary = text[:3000] + "..." if len(text) > 3000 else text

        return f"Title: {result.title}\n\nSummary:\n{summary}\n\n[END_OF_SEARCH]"

    except Exception as e:
        return f"Error fetching arXiv content: {e} [END_OF_SEARCH]"


from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
LLM = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.2)

@tool
def analyze_code_tool(task_id: str) -> str:
    """
    TOOL NAME: Code Analysis Tool

    Purpose: When the user asks about code, programming files, or wants to understand what a script does, use this tool.

    Input: A task_id string that identifies the specific code file to analyze.

    Example usage:
    - "What does this Python code do?"
    - "Analyze the code file for bugs"
    - "Explain the functions in this script"
    """
    print("Reached analyze_code_tool")
    code_txt = ""
    if not task_id:
        code_txt = "No code provided."
    else:
        path = _download_file_for_task(task_id, "py")
        if not path:
            print("Error: .py file not found for this task.")
            return "Error: .py file not found for this task."
        code_txt = Path(path).read_text(encoding="utf-8", errors="ignore")

    lines = code_txt.splitlines()
    code_sample = "\n".join(lines)

    prompt = [
        SystemMessage(content="You are a senior Python code reviewer."),
        HumanMessage(content=(
            "Please analyse the following code. "
            "Summarise what it does, list key functions/classes, "
            "and point out any obvious bugs, performance issues or style problems.\n\n"
            f"```python\n{code_sample}\n```"
            "If you can then find the output of the code and return it in the output."
        ))
    ]
    return LLM.invoke(prompt).content.strip()



# ─────────────────────────── Math Tools ───────────────────────────────

@tool
def add_tool(a: float, b: float) -> str:
    """
    TOOL NAME: Addition Tool

    Purpose: When the user asks to add numbers or perform addition calculations, use this tool.

    Input: Two numbers (a and b) to add together.

    Example usage:
    - "What is 25 + 17?"
    - "Add 3.14 and 2.86"
    - "Calculate the sum of 100 and 250"
    """
    print("Reached add_tool")
    result = a + b
    return f"Addition result: {a} + {b} = {result}"

@tool
def subtract_tool(a: float, b: float) -> str:
    """
    TOOL NAME: Subtraction Tool

    Purpose: When the user asks to subtract numbers or perform subtraction calculations, use this tool.

    Input: Two numbers (a and b) where b is subtracted from a.

    Example usage:
    - "What is 50 - 23?"
    - "Subtract 15.5 from 40.2"
    - "Calculate 1000 minus 347"
    """
    print("Reached subtract_tool")
    result = a - b
    return f"Subtraction result: {a} - {b} = {result}"

@tool
def multiply_tool(a: float, b: float) -> str:
    """
    TOOL NAME: Multiplication Tool

    Purpose: When the user asks to multiply numbers or perform multiplication calculations, use this tool.

    Input: Two numbers (a and b) to multiply together.

    Example usage:
    - "What is 8 × 7?"
    - "Multiply 12.5 by 4"
    - "Calculate the product of 15 and 20"
    """
    print("Reached multiply_tool")
    result = a * b
    return f"Multiplication result: {a} × {b} = {result}"

@tool
def divide_tool(a: float, b: float) -> str:
    """
    TOOL NAME: Division Tool

    Purpose: When the user asks to divide numbers or perform division calculations, use this tool.

    Input: Two numbers (a and b) where a is divided by b.

    Example usage:
    - "What is 100 ÷ 4?"
    - "Divide 75 by 3"
    - "Calculate 144 divided by 12"
    """
    print("Reached divide_tool")
    if b == 0:
        return "Division error: Cannot divide by zero"
    result = a / b
    return f"Division result: {a} ÷ {b} = {result}"

@tool
def web_search_tool(query: str) -> str:
    """
    TOOL NAME: Web Search Tool

    Purpose: When the user asks for current information, recent news, or topics not covered by Wikipedia, use this tool.

    Input: A string describing what to search for on the web.
    """
    print("reached web_search_tool")

    if not hasattr(web_search_tool, "_cache"):
        web_search_tool._cache = {}

    query = query.strip()
    if not query:
        return "No search query provided."

    if query in web_search_tool._cache:
        print("Returning cached web search result for query:", query)
        return web_search_tool._cache[query]

    ddg = DDGS()
    max_retries = 5
    result_text = ""

    for attempt in range(1, max_retries + 1):
        try:
            result_text = str(ddg.text(query, max_results=5))
        except Exception as e:
            if attempt < max_retries:
                print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
                time.sleep(4)
                continue
            else:
                return f"Error during DuckDuckGo search: {e} [END_OF_SEARCH]"

        if "202 Ratelimit" in result_text:
            if attempt < max_retries:
                print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
                time.sleep(4)
                continue
            else:
                break
        break  # Successful

    result_text += "\n\n[END_OF_SEARCH]"
    web_search_tool._cache[query] = result_text
    print("Submitted web search successfully")
    return result_text