from smolagents import tool from youtube_transcript_api import YouTubeTranscriptApi from bs4 import BeautifulSoup from tools.fetch import fetch_webpage @tool # a function to get youtube transcript from video id def get_youtube_transcript(video_id: str) -> str: """ Fetches the transcript of a YouTube video given its video ID. Args: video_id (str): The ID of the YouTube video. Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345. Returns: str: The transcript of the YouTube video. as a single string with each line separated by a newline character. """ # Initialize the YouTubeTranscriptApi ytt_api = YouTubeTranscriptApi() fetched_transcript = ytt_api.fetch(video_id) raw_data = fetched_transcript.to_raw_data() # raw data is in the form of [{ 'text': 'Hey there', 'start': 0.0, 'duration': 1.54 }, { 'text': 'how are you',, 'start': 1.54, 'duration': 4.16 }, ... ] we will return ony the text element as lines transcript = "\n".join([item['text'] for item in raw_data]) return transcript @tool # a function to get video title and description from video url def get_youtube_title_description(video_url: str) -> str: """ Fetches the title and description of a YouTube video given its video ID. Args: video_url (str): The url of the YouTube video. Returns: str: The title and description of the YouTube video. """ # Initialize the YouTube object soup = BeautifulSoup(fetch_webpage(video_url, convert_to_markdown=False), "html.parser") # Extract the title by looking at the meta tag with name="title" and getting the content metatitle = soup.find("meta", {"name": "title"}) if metatitle is not None: title = metatitle["content"] else: title = "No title found" # same for description metadescription = soup.find("meta", {"name": "description"}) if metadescription is not None: description = metadescription["content"] else: description = "No description found" return f"Title: {title}\nDescription: {description}" if __name__ == "__main__": from dotenv import load_dotenv load_dotenv # Test the function video_id = "1htKBjuUWec" # Replace with your YouTube video ID video_url = "https://www.youtube.com/watch?v=" + video_id # Get the title and description try: title_description = get_youtube_title_description(video_url) print(title_description) except Exception as e: print(f"Error fetching title and description: {e}") try: transcript = get_youtube_transcript(video_id) except Exception as e: print(f"Error fetching transcript: {e}") print(transcript)