Spaces:
Runtime error
Runtime error
File size: 2,938 Bytes
7acb2e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from smolagents import tool
from youtube_transcript_api import YouTubeTranscriptApi
from bs4 import BeautifulSoup
from tools.fetch import fetch_webpage
@tool
# a function to get youtube transcript from video id
def get_youtube_transcript(video_id: str) -> str:
"""
Fetches the transcript of a YouTube video given its video ID.
Args:
video_id (str): The ID of the YouTube video. Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345.
Returns:
str: The transcript of the YouTube video. as a single string with each line separated by a newline character.
"""
# Initialize the YouTubeTranscriptApi
ytt_api = YouTubeTranscriptApi()
fetched_transcript = ytt_api.fetch(video_id)
raw_data = fetched_transcript.to_raw_data()
# raw data is in the form of [{ 'text': 'Hey there', 'start': 0.0, 'duration': 1.54 }, { 'text': 'how are you',, 'start': 1.54, 'duration': 4.16 }, ... ] we will return ony the text element as lines
transcript = "\n".join([item['text'] for item in raw_data])
return transcript
@tool
# a function to get video title and description from video url
def get_youtube_title_description(video_url: str) -> str:
"""
Fetches the title and description of a YouTube video given its video ID.
Args:
video_url (str): The url of the YouTube video.
Returns:
str: The title and description of the YouTube video.
"""
# Initialize the YouTube object
soup = BeautifulSoup(fetch_webpage(video_url, convert_to_markdown=False), "html.parser")
# Extract the title by looking at the meta tag with name="title" and getting the content
metatitle = soup.find("meta", {"name": "title"})
if metatitle is not None:
title = metatitle["content"]
else:
title = "No title found"
# same for description
metadescription = soup.find("meta", {"name": "description"})
if metadescription is not None:
description = metadescription["content"]
else:
description = "No description found"
return f"Title: {title}\nDescription: {description}"
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv
# Test the function
video_id = "1htKBjuUWec" # Replace with your YouTube video ID
video_url = "https://www.youtube.com/watch?v=" + video_id
# Get the title and description
try:
title_description = get_youtube_title_description(video_url)
print(title_description)
except Exception as e:
print(f"Error fetching title and description: {e}")
try:
transcript = get_youtube_transcript(video_id)
except Exception as e:
print(f"Error fetching transcript: {e}")
print(transcript) |