|
import os |
|
import tempfile |
|
|
|
from langchain_core.tools import tool |
|
from pytubefix import YouTube |
|
|
|
from .video_sampling import extract_frames, extract_frame_captions |
|
|
|
|
|
def download_video(url, output_path): |
|
""" |
|
Downloads the video into an output path. |
|
|
|
Args: |
|
url: The URL of the YouTube video. |
|
output_path: The output folder where to download the video. |
|
|
|
Returns: |
|
The file name of the downloaded video. |
|
""" |
|
yt = YouTube(url) |
|
stream = yt.streams.get_lowest_resolution() |
|
stream.download(output_path) |
|
return os.path.join(output_path, stream.default_filename) |
|
|
|
|
|
def youtube_video_frame_sampler(addr: str, dest_dir: str) -> None: |
|
""" |
|
Downsamples a YouTube video into frames. Saves the frames into a destination |
|
directory. Returns the path to the destination directory. |
|
|
|
Args: |
|
addr: The URL of the YouTube video. |
|
dest_dir: The destination directory. |
|
""" |
|
|
|
temp_dir = tempfile.TemporaryDirectory() |
|
download_path = temp_dir.name |
|
|
|
video_path = download_video(addr, download_path) |
|
extract_frames(video_path, dest_dir) |
|
|
|
|
|
@tool(parse_docstring=True) |
|
def youtube_video_to_frame_captions(addr: str) -> str: |
|
""" |
|
Analyzes video frames from a YouTube video and obtains |
|
captions for each frame. This is useful when we need to |
|
answer questions on the images shown in the video. It adds |
|
computer vision capabilities to the LLM. |
|
|
|
Args: |
|
addr: The URL of the YouTube video. |
|
|
|
Returns: |
|
Frame descriptions as a list of strings. |
|
""" |
|
temp_dir = tempfile.TemporaryDirectory() |
|
download_path = temp_dir.name |
|
|
|
video_path = download_video(addr, download_path) |
|
return extract_frame_captions(video_path) |
|
|