import os import tempfile from langchain_core.tools import tool from pytubefix import YouTube from .video_sampling import extract_frames, extract_frame_captions def download_video(url, output_path): """ Downloads the video into an output path. Args: url: The URL of the YouTube video. output_path: The output folder where to download the video. Returns: The file name of the downloaded video. """ yt = YouTube(url) stream = yt.streams.get_lowest_resolution() stream.download(output_path) return os.path.join(output_path, stream.default_filename) def youtube_video_frame_sampler(addr: str, dest_dir: str) -> None: """ Downsamples a YouTube video into frames. Saves the frames into a destination directory. Returns the path to the destination directory. Args: addr: The URL of the YouTube video. dest_dir: The destination directory. """ temp_dir = tempfile.TemporaryDirectory() download_path = temp_dir.name video_path = download_video(addr, download_path) extract_frames(video_path, dest_dir) @tool(parse_docstring=True) def youtube_video_to_frame_captions(addr: str) -> str: """ Analyzes video frames from a YouTube video and obtains captions for each frame. This is useful when we need to answer questions on the images shown in the video. It adds computer vision capabilities to the LLM. Args: addr: The URL of the YouTube video. Returns: Frame descriptions as a list of strings. """ temp_dir = tempfile.TemporaryDirectory() download_path = temp_dir.name video_path = download_video(addr, download_path) return extract_frame_captions(video_path)