Spaces:
Sleeping
Sleeping
from langchain_core.tools import tool | |
import datetime | |
import requests | |
import openai | |
import os | |
import tempfile | |
from urllib.parse import urlparse | |
from openai import OpenAI | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from pytube import extract | |
def current_date(_) -> str : | |
""" Returns the current date in YYYY-MM-DD format """ | |
return datetime.datetime.now().strftime("%Y-%m-%d") | |
def day_of_week(_) -> str : | |
""" Returns the current day of the week (e.g., Monday, Tuesday) """ | |
return datetime.datetime.now().strftime("%A") | |
def days_until(date_str: str) -> str : | |
""" Returns the number of days from today until a given date (input format: YYYY-MM-DD) """ | |
try: | |
future_date = datetime.datetime.strptime(date_str, "%Y-%m-%d").date() | |
today = datetime.date.today() | |
delta_days = (future_date - today).days | |
return f"{delta_days} days until {date_str}" | |
except Exception as e: | |
return f"Error parsing date: {str(e)}" | |
datetime_tools = [current_date, day_of_week, days_until] | |
def transcribe_audio(audio_file: str, file_extension: str) -> str: | |
""" Transcribes an audio file to text | |
Args: | |
audio_file (str): local file path to the audio file (.mp3, .m4a, etc.) | |
file_extension (str): file extension of the audio, e.g. mp3 | |
Returns: | |
str: The transcribed text from the audio. | |
""" | |
try: | |
response = requests.get(audio_file) # download the audio_file | |
response.raise_for_status() # check if the http request was successful | |
# clean file extension and save to disk | |
file_extension = file_extension.replace('.','') | |
filename = f'tmp.{file_extension}' | |
with open(filename, 'wb') as file: # opens a new file for writing with a name like, e.g. tmp.mp3 | |
file.write(response.content) # write(w) the binary(b) contents (audio file) to disk | |
# transcribe audio with OpenAI Whisper | |
client = OpenAI() | |
# read(r) the audio file from disk in binary(b) mode "rb"; the "with" block ensures the file is automatically closed afterward | |
with open(filename, "rb") as audio_content: | |
transcription = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_content | |
) | |
return transcription.text | |
except Exception as e: | |
return f"transcribe_audio failed: {e}" | |
# TESTTTTT | |
def get_youtube_transcript(page_url: str) -> str: | |
"""Get the transcript of a YouTube video | |
Args: | |
page_url (str): YouTube URL of the video | |
""" | |
try: | |
# get video ID from URL | |
video_id = extract.video_id(page_url) | |
# get transcript | |
ytt_api = YouTubeTranscriptApi() | |
transcript = ytt_api.fetch(video_id) | |
# keep only text | |
txt = '\n'.join([s.text for s in transcript.snippets]) | |
return txt | |
except Exception as e: | |
return f"get_youtube_transcript failed: {e}" |