File size: 2,938 Bytes
7acb2e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

from smolagents import tool
from youtube_transcript_api import YouTubeTranscriptApi
from bs4 import BeautifulSoup
from tools.fetch import fetch_webpage

@tool
# a function to get youtube transcript from video id
def get_youtube_transcript(video_id: str) -> str:
    """

    Fetches the transcript of a YouTube video given its video ID.    

    Args:

        video_id (str): The ID of the YouTube video. Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345.

    Returns:

        str: The transcript of the YouTube video. as a single string with each line separated by a newline character.

    """
    # Initialize the YouTubeTranscriptApi
    ytt_api = YouTubeTranscriptApi()
    fetched_transcript = ytt_api.fetch(video_id)
    raw_data = fetched_transcript.to_raw_data()
    # raw data is in the form of [{        'text': 'Hey there',        'start': 0.0,        'duration': 1.54    },    {        'text': 'how are you',,        'start': 1.54,        'duration': 4.16    },   ... ] we will return ony the text element as lines
    transcript = "\n".join([item['text'] for item in raw_data])
    return transcript


@tool
# a function to get video title and description from video url
def get_youtube_title_description(video_url: str) -> str:
    """

    Fetches the title and description of a YouTube video given its video ID.    

    Args:

        video_url (str): The url of the YouTube video.

    Returns:

        str: The title and description of the YouTube video.

    """
    # Initialize the YouTube object
    soup = BeautifulSoup(fetch_webpage(video_url, convert_to_markdown=False), "html.parser")
    # Extract the title by looking at the meta tag with name="title" and getting the content
    metatitle = soup.find("meta", {"name": "title"})
    if metatitle is not None:
        title = metatitle["content"]
    else:
        title = "No title found"

    # same for description
    metadescription = soup.find("meta", {"name": "description"})    
    if metadescription is not None:
        description = metadescription["content"]
    else:
        description = "No description found"    
    
    return f"Title: {title}\nDescription: {description}"


if __name__ == "__main__":
    from dotenv import load_dotenv
    load_dotenv
    # Test the function
    video_id = "1htKBjuUWec"  # Replace with your YouTube video ID
    video_url = "https://www.youtube.com/watch?v=" + video_id
    # Get the title and description
    try:
        title_description = get_youtube_title_description(video_url)
        print(title_description)
    except Exception as e:
        print(f"Error fetching title and description: {e}")
    
    try:
        transcript = get_youtube_transcript(video_id)
    except Exception as e:
        print(f"Error fetching transcript: {e}")
    print(transcript)