File size: 6,854 Bytes
ce69239
714d637
77866f8
 
 
 
406ca82
083f54c
104dc35
65744fb
083f54c
65744fb
ccd1eae
976257a
 
 
2fc0386
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e057ea
 
 
714d637
 
7e057ea
 
 
714d637
 
7e057ea
 
 
714d637
ffff64a
 
 
 
 
714d637
 
 
7e057ea
77866f8
2fc0386
7e057ea
16fdaf8
 
77866f8
16fdaf8
 
77866f8
 
 
 
16fdaf8
 
 
 
 
 
 
 
 
 
104dc35
16fdaf8
 
 
77866f8
 
16fdaf8
0c0a3c5
77866f8
 
 
65744fb
 
 
2105776
 
65744fb
2105776
976257a
 
65744fb
 
083f54c
 
 
 
 
 
 
 
65744fb
083f54c
38a896e
083f54c
 
 
 
65744fb
ccd1eae
 
 
 
 
 
 
 
 
 
 
 
7123af6
ccd1eae
 
 
 
 
 
 
 
 
 
 
 
 
976257a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406ca82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
from langchain_core.tools import tool
import datetime
import requests
import openai
import os
import tempfile
import pandas as pd
from urllib.parse import urlparse, parse_qs
from openai import OpenAI
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
from pytube import extract
from openai import OpenAI
from bs4 import BeautifulSoup
from io import BytesIO
from PyPDF2 import PdfReader

@tool
def add(a: float, b: float) -> float:
    """ Adds two numbers.
    Args:
        a (float): first number
        b (float): second number
    """
    return a + b

@tool
def subtract(a: float, b: float) -> int:
    """ Subtracts two numbers.
    Args:
        a (float): first number
        b (float): second number
    """
    return a - b

@tool
def multiply(a: float, b: float) -> float:
    """ Multiplies two numbers.
    Args:
        a (float): first number
        b (float): second number
    """
    return a * b
    
@tool
def divide(a: float, b: float) -> float:
    """ Divides two numbers.
    Args:
        a (float): first number
        b (float): second number
    """
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

@tool
def power(a: float, b: float) -> float:
    """ Calculates the power of two numbers.
    Args:
        a (float): first number
        b (float): second number
    """
    return a**b

calculator_basic = [add, subtract, multiply, divide, power]


@tool
def current_date(_) -> str :
    """ Returns the current date in YYYY-MM-DD format """
    return datetime.datetime.now().strftime("%Y-%m-%d")

@tool
def day_of_week(_) -> str :
    """ Returns the current day of the week (e.g., Monday, Tuesday) """
    return datetime.datetime.now().strftime("%A")

@tool
def days_until(date_str: str) -> str :
    """ Returns the number of days from today until a given date (input format: YYYY-MM-DD) """
    try:
        future_date = datetime.datetime.strptime(date_str, "%Y-%m-%d").date()
        today = datetime.date.today()

        delta_days = (future_date - today).days
        return f"{delta_days} days until {date_str}"
    except Exception as e:
        return f"Error parsing date: {str(e)}"

datetime_tools = [current_date, day_of_week, days_until]


@tool
def transcribe_audio(audio_file: str, file_extension: str) -> str:
    """ Transcribes an audio file to text
    Args:
        audio_file (str): local file path to the audio file (.mp3, .m4a, etc.)
        file_extension (str): file extension of the audio, e.g. mp3
    Returns:
        str: The transcribed text from the audio.
    """
    try:
        response = requests.get(audio_file)  # download the audio_file
        response.raise_for_status()  # check if the http request was successful
        
        # clean file extension and save to disk
        file_extension = file_extension.replace('.','')
        filename = f'tmp.{file_extension}'
        with open(filename, 'wb') as file:  # opens a new file for writing with a name like, e.g. tmp.mp3
            file.write(response.content)    # write(w) the binary(b) contents (audio file) to disk
        
        # transcribe audio with OpenAI Whisper
        client = OpenAI()
        
        # read(r) the audio file from disk in binary(b) mode "rb"; the "with" block ensures the file is automatically closed afterward
        with open(filename, "rb") as audio_content:
            transcription = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_content
            )
        return transcription.text

    except Exception as e:
        return f"transcribe_audio failed: {e}"

@tool
def transcribe_youtube(youtube_url: str) -> str:
    """ Transcribes a YouTube video
    Args:
        youtube_url (str): youtube video's url
    Returns:
        str: The transcribed text from the video.
    """
    try:
        query = urlparse(youtube_url).query
        video_id = parse_qs(query)['v'][0]
    except Exception:
        return "invalid YouTube URL"
        
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = transcript_list.find_transcript(['en']).fetch()
        # keep only text
        text = '\n'.join([t['text'] for t in transcript])
        return text

    except (TranscriptsDisabled, NoTranscriptFound, VideoUnavailable) as e:
        return f"transcript unavailable: {str(e)}"
    
    except Exception as e:
        return f"transcribe_youtube failed: {e}"

@tool
def query_image(query: str, image_url: str) -> str:
    """ Ask anything about an image using a Vision Language Model
    Args:
        query (str): the query about the image, e.g. how many animals are on the image?
        image_url (str): the image's URL
    """
    try:
        client = OpenAI()
        response = client.responses.create(
            model="gpt-4o-mini",
            input=[
                {
                    "role": "user",
                    "content": [
                        {"type": "input_text", "text": query},
                        {"type": "input_image","image_url": image_url},
                    ],
                }
            ],
        )
        return response.output_text

    except Exception as e:
        return f"query_image failed: {e}"

@tool
def webpage_content(url: str) -> str:
    """ Fetch text from a webpage or PDF file.
    Args:
        url (str): The URL of the webpage to fetch.
    Returns:
        str: Extracted text.
    """
    try:
        response = requests.get(url)
        response.raise_for_status()

        content_type = response.headers.get("Content-Type", "")

        # PDF file
        if "pdf" in content_type:
            pdf_content = BytesIO(response.content)
            reader = PdfReader(pdf_content)
            return "\n".join(page.extract_text() or "" for page in reader.pages)

        # HTML file
        soup = BeautifulSoup(response.text, "html.parser")
        body = soup.body
        return body.get_text(separator="\n", strip=True) if body else soup.get_text(strip=True)

    except Exception as e:
        return f"webpage_content failed: {e}"

@tool
def read_excel(file_url: str) -> str:
    """ Reads an Excel file from a URL and returns the content as CSV text.
    Args:
        file_url (str): URL to the Excel file (.xlsx, .xls)
    Returns:
        str: Content of the Excel file as CSV text.
    """
    try:
        response = requests.get(file_url)
        response.raise_for_status()

        excel_content = BytesIO(response.content)
        df = pd.read_excel(excel_content)
        
        return df.to_csv(index=False)  # convert dataframe to CSV string for easy processing

    except Exception as e:
        return f"read_excel failed: {str(e)}"