import gradio as gr from phi.agent import Agent from phi.model.google import Gemini from phi.model.groq import Groq from phi.tools.duckduckgo import DuckDuckGo from phi.tools.youtube_tools import YouTubeTools from google.generativeai import upload_file, get_file import google.generativeai as genai from dotenv import load_dotenv import time import tempfile import os from pathlib import Path # Load environment variables load_dotenv() # Google API Key API_KEY = os.getenv("GOOGLE_API_KEY") if API_KEY: genai.configure(api_key=API_KEY) # Groq API Key groq_api_key = os.getenv("Groq_Api_Key") # Initialize Multimodal Agent def initialize_multimodal_agent(): return Agent( name="Video AI Summarizer", model=Gemini(id="gemini-2.0-flash-exp"), tools=[DuckDuckGo()], markdown=True, ) # Video Analysis Function def analyze_video(video_file, user_query): if not user_query: return "Please enter a question or insight to analyze the video." with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video: temp_video.write(video_file.read()) video_path = temp_video.name # Upload and process video try: processed_video = upload_file(video_path) while processed_video.state.name == "PROCESSING": time.sleep(1) processed_video = get_file(processed_video.name) analysis_prompt = f""" Analyze the uploaded video for content and context. Respond to the following query using video insights and supplementary web research: {user_query} Provide a detailed, user-friendly, and actionable response. """ multimodal_agent = initialize_multimodal_agent() response = multimodal_agent.run(analysis_prompt, videos=[processed_video]) # Clean up temporary video file Path(video_path).unlink(missing_ok=True) return response.content except Exception as error: Path(video_path).unlink(missing_ok=True) return f"An error occurred during analysis: {error}" # YouTube Summarization Function def summarize_youtube(video_url): if not video_url.strip(): return "Please enter a valid YouTube video URL." try: youtube_agent = Agent( tools=[YouTubeTools()], model=Groq(id="llama-3.3-70b-versatile", api_key=groq_api_key), show_tool_calls=True, description="You are a YouTube agent. Obtain the captions of a YouTube video and answer questions.", ) response = youtube_agent.print_response(f"Summarize this video {video_url}", markdown=True) return response except Exception as e: return f"An error occurred: {e}" # Gradio Interface with gr.Blocks() as demo: gr.Markdown("# Phidata Video AI Summarizer Agent 🎥🎤🖬") with gr.Tab("🎥 Video Upload"): gr.Markdown("### 🎥 Analyze Uploaded Video") video_file = gr.File(label="Upload a video file", type="filepath", file_types=["mp4", "mov", "avi"]) user_query = gr.Textbox(label="What insights are you seeking from the video?", placeholder="Ask anything about the video content.") analyze_button = gr.Button("🔍 Analyze Video") analysis_result = gr.Textbox(label="Analysis Result", interactive=False) analyze_button.click(fn=analyze_video, inputs=[video_file, user_query], outputs=analysis_result) with gr.Tab("🌐 YouTube Summarizer"): gr.Markdown("### 🌐 Summarize YouTube Videos") video_url = gr.Textbox(label="Enter YouTube video URL:") summarize_button = gr.Button("Summarize Video") youtube_summary = gr.Textbox(label="Summary", interactive=False) summarize_button.click(fn=summarize_youtube, inputs=video_url, outputs=youtube_summary) # Launch the interface demo.launch()