import streamlit as st from pytube import YouTube from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import SRTFormatter from phi.agent import Agent from phi.model.google import Gemini from phi.tools.duckduckgo import DuckDuckGo from google.generativeai import upload_file, get_file import google.generativeai as genai import time from pathlib import Path from dotenv import load_dotenv import os import tempfile # Load environment variables load_dotenv() API_KEY = os.getenv("GOOGLE_API_KEY") if API_KEY: genai.configure(api_key=API_KEY) # Page configuration st.set_page_config( page_title="YouTube Video Summarizer", page_icon="🎥", layout="wide" ) st.title("Phidata YouTube Video Summarizer Agent 🎥🎤🖬") st.header("Powered by Gemini 2.0 Flash Exp") @st.cache_resource def initialize_agent(): return Agent( name="YouTube Video Summarizer", model=Gemini(id="gemini-2.0-flash-exp"), tools=[DuckDuckGo()], markdown=True, ) # Initialize the agent multimodal_Agent = initialize_agent() # Function to get captions using youtube-transcript-api def get_youtube_captions(youtube_url): """ Retrieves YouTube video captions using youtube-transcript-api. Parameters: - youtube_url: The URL of the YouTube video. Returns: - The captions of the video in SRT format, or a message if captions are not available. """ # Extract video ID from YouTube URL (for both regular videos and Shorts) if "shorts" in youtube_url: video_id = youtube_url.split("shorts/")[-1].split("?")[0] else: video_id = youtube_url.split("v=")[-1].split("&")[0] try: # Fetch transcript for the video transcript = YouTubeTranscriptApi.get_transcript(video_id) # Format the transcript in SRT format formatter = SRTFormatter() formatted_transcript = formatter.format_transcript(transcript) return formatted_transcript except Exception as e: return f"Error: {str(e)}" # YouTube video URL input youtube_url = st.text_input( "Enter the YouTube video link", placeholder="Paste the YouTube video URL here", help="Provide the link to the YouTube video you want to summarize." ) if youtube_url: try: with st.spinner("Fetching and processing the YouTube video..."): # Get captions using youtube-transcript-api captions = get_youtube_captions(youtube_url) # Check if captions are available if "Error" not in captions: # Display captions st.subheader("Video Captions:") st.text(captions) else: st.error(captions) # User query input user_query = st.text_area( "What insights are you seeking from the video?", placeholder="Ask anything about the video content. The AI agent will analyze and gather additional context if needed.", help="Provide specific questions or insights you want from the video." ) if st.button("🔍 Analyze Video", key="analyze_video_button"): if not user_query: st.warning("Please enter a question or insight to analyze the video.") else: try: with st.spinner("Analyzing video and gathering insights..."): # Upload and process the transcript (captions) processed_captions = upload_file(captions) while processed_captions.state.name == "PROCESSING": time.sleep(1) processed_captions = get_file(processed_captions.name) # Prompt generation for analysis analysis_prompt = ( f""" Analyze the content of the uploaded YouTube video. Respond to the following query using video insights and supplementary web research: {user_query} Provide a detailed, user-friendly, and actionable response. """ ) # AI agent processing response = multimodal_Agent.run(analysis_prompt, videos=[processed_captions]) # Display the result st.subheader("Analysis Result") st.markdown(response.content) except Exception as error: st.error(f"An error occurred during analysis: {error}") except Exception as e: st.error(f"An error occurred: {e}") else: st.info("Enter a YouTube video link to begin analysis.")