Manasa1's picture
Update app.py
e525fa5 verified
raw
history blame
3.88 kB
import gradio as gr
from phi.agent import Agent
from phi.model.google import Gemini
from phi.model.groq import Groq
from phi.tools.duckduckgo import DuckDuckGo
from phi.tools.youtube_tools import YouTubeTools
from google.generativeai import upload_file, get_file
import google.generativeai as genai
from dotenv import load_dotenv
import time
import tempfile
import os
from pathlib import Path
# Load environment variables
load_dotenv()
# Google API Key
API_KEY = os.getenv("GOOGLE_API_KEY")
if API_KEY:
genai.configure(api_key=API_KEY)
# Groq API Key
groq_api_key = os.getenv("Groq_Api_Key")
# Initialize Multimodal Agent
def initialize_multimodal_agent():
return Agent(
name="Video AI Summarizer",
model=Gemini(id="gemini-2.0-flash-exp"),
tools=[DuckDuckGo()],
markdown=True,
)
# Video Analysis Function
def analyze_video(video_file, user_query):
if not user_query:
return "Please enter a question or insight to analyze the video."
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
temp_video.write(video_file.read())
video_path = temp_video.name
# Upload and process video
try:
processed_video = upload_file(video_path)
while processed_video.state.name == "PROCESSING":
time.sleep(1)
processed_video = get_file(processed_video.name)
analysis_prompt = f"""
Analyze the uploaded video for content and context.
Respond to the following query using video insights and supplementary web research:
{user_query}
Provide a detailed, user-friendly, and actionable response.
"""
multimodal_agent = initialize_multimodal_agent()
response = multimodal_agent.run(analysis_prompt, videos=[processed_video])
# Clean up temporary video file
Path(video_path).unlink(missing_ok=True)
return response.content
except Exception as error:
Path(video_path).unlink(missing_ok=True)
return f"An error occurred during analysis: {error}"
# YouTube Summarization Function
def summarize_youtube(video_url):
if not video_url.strip():
return "Please enter a valid YouTube video URL."
try:
youtube_agent = Agent(
tools=[YouTubeTools()],
model=Groq(id="llama-3.3-70b-versatile", api_key=groq_api_key),
show_tool_calls=True,
description="You are a YouTube agent. Obtain the captions of a YouTube video and answer questions.",
)
response = youtube_agent.print_response(f"Summarize this video {video_url}", markdown=True)
return response
except Exception as e:
return f"An error occurred: {e}"
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# Phidata Video AI Summarizer Agent πŸŽ₯πŸŽ€πŸ–¬")
with gr.Tab("πŸŽ₯ Video Upload"):
gr.Markdown("### πŸŽ₯ Analyze Uploaded Video")
video_file = gr.File(label="Upload a video file", type="file", file_types=["mp4", "mov", "avi"])
user_query = gr.Textbox(label="What insights are you seeking from the video?", placeholder="Ask anything about the video content.")
analyze_button = gr.Button("πŸ” Analyze Video")
analysis_result = gr.Textbox(label="Analysis Result", interactive=False)
analyze_button.click(fn=analyze_video, inputs=[video_file, user_query], outputs=analysis_result)
with gr.Tab("🌐 YouTube Summarizer"):
gr.Markdown("### 🌐 Summarize YouTube Videos")
video_url = gr.Textbox(label="Enter YouTube video URL:")
summarize_button = gr.Button("Summarize Video")
youtube_summary = gr.Textbox(label="Summary", interactive=False)
summarize_button.click(fn=summarize_youtube, inputs=video_url, outputs=youtube_summary)
# Launch the interface
demo.launch()