File size: 4,785 Bytes
456bd3e
b9ae70e
 
 
456bd3e
 
b9ae70e
 
 
 
 
 
68aa964
 
b9ae70e
 
456bd3e
b9ae70e
 
 
68aa964
b9ae70e
68aa964
b9ae70e
68aa964
b9ae70e
 
 
 
 
456bd3e
b9ae70e
 
 
 
 
456bd3e
b9ae70e
 
 
456bd3e
 
 
 
 
 
 
 
b9ae70e
456bd3e
b9ae70e
 
 
 
456bd3e
 
 
 
 
 
 
 
 
 
 
 
 
 
b9ae70e
 
 
 
 
 
 
456bd3e
b9ae70e
 
 
 
 
68aa964
b9ae70e
 
 
 
 
456bd3e
 
b9ae70e
68aa964
 
b9ae70e
 
 
 
 
 
 
 
 
 
456bd3e
 
 
 
 
 
 
 
 
 
 
 
 
10b32d9
456bd3e
 
 
 
 
 
 
 
b9ae70e
456bd3e
b9ae70e
68aa964
b9ae70e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import streamlit as st
from phi.agent import Agent
from phi.model.google import Gemini
from phi.tools.duckduckgo import DuckDuckGo
from phi.tools.youtube_tools import YouTubeTools  # Import YouTubeTools for YouTube summarization
from google.generativeai import upload_file, get_file
import google.generativeai as genai

import time
from pathlib import Path
import tempfile
from dotenv import load_dotenv
load_dotenv()

import os

API_KEY = os.getenv("GOOGLE_API_KEY")
if API_KEY:
    genai.configure(api_key=API_KEY)

# Page configuration
st.set_page_config(
    page_title="Multimodal AI Agent- Video Summarizer",
    page_icon="πŸŽ₯",
    layout="wide"
)

st.title("Phidata Video AI Summarizer Agent πŸŽ₯πŸŽ€πŸ–¬")
st.header("Powered by Gemini 2.0 Flash Exp")

# Initialize the agent
@st.cache_resource
def initialize_agent():
    return Agent(
        name="Video AI Summarizer",
        model=Gemini(id="gemini-2.0-flash-exp"),
        tools=[DuckDuckGo(), YouTubeTools()],  # Add YouTubeTools here for YouTube video summarization
        markdown=True,
    )

multimodal_Agent = initialize_agent()

# Input field for user query
user_query = st.text_area(
    "What insights are you seeking from the video?",
    placeholder="Ask anything about the video content. The AI agent will analyze and gather additional context if needed.",
    help="Provide specific questions or insights you want from the video."
)

# Check for either video file upload or YouTube URL
video_file = st.file_uploader(
    "Upload a video file", type=['mp4', 'mov', 'avi'], help="Upload a video for AI analysis"
)

youtube_url = st.text_input("Or, paste a YouTube URL to summarize:")

# Unified analysis prompt
def generate_analysis_prompt(user_query, video_source):
    return (
        f"""
        Analyze the {video_source} for content and context.
        Respond to the following query using video insights and supplementary web research:
        {user_query}
        Provide a detailed, user-friendly, and actionable response.
        """
    )

# Process uploaded video
if video_file:
    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
        temp_video.write(video_file.read())
        video_path = temp_video.name

    st.video(video_path, format="video/mp4", start_time=0)

    if st.button("πŸ” Analyze Uploaded Video", key="analyze_uploaded_video_button"):
        if not user_query:
            st.warning("Please enter a question or insight to analyze the video.")
        else:
            try:
                with st.spinner("Processing video and gathering insights..."):
                    # Upload and process video file
                    processed_video = upload_file(video_path)
                    while processed_video.state.name == "PROCESSING":
                        time.sleep(1)
                        processed_video = get_file(processed_video.name)

                    # Use the unified prompt for analysis (Uploaded video)
                    analysis_prompt = generate_analysis_prompt(user_query, "uploaded video")

                    # AI agent processing
                    response = multimodal_Agent.run(analysis_prompt, videos=[processed_video])

                # Display the result
                st.subheader("Analysis Result")
                st.markdown(response.content)

            except Exception as error:
                st.error(f"An error occurred during analysis: {error}")
            finally:
                # Clean up temporary video file
                Path(video_path).unlink(missing_ok=True)

# Process YouTube URL
elif youtube_url:
    if st.button("πŸ” Summarize YouTube Video", key="summarize_youtube_button"):
        if not user_query:
            st.warning("Please enter a question or insight to analyze the YouTube video.")
        else:
            try:
                with st.spinner("Fetching video captions and summarizing..."):
                    # Use the unified prompt for summarization (YouTube URL)
                    analysis_prompt = generate_analysis_prompt(user_query, "YouTube URL")

                    # Summarize the YouTube video using the agent
                    response = multimodal_Agent.run(f"{analysis_prompt} {youtube_url}")
                    
                    # Display the result
                    st.subheader("Video Summary")
                    st.markdown(response["content"], unsafe_allow_html=True)

            except Exception as error:
                st.error(f"An error occurred: {error}")

else:
    st.info("Upload a video file or paste a YouTube URL to begin analysis.")

# Customize text area height
st.markdown(
    """
    <style>
    .stTextArea textarea {
        height: 100px;
    }
    </style>
    """,
    unsafe_allow_html=True
)