Spaces:

codelion
/

videoanalysis

Sleeping

File size: 4,950 Bytes

f8aaa9d
 
0f96bc2
 
f8aaa9d
 
 
 
 
0f96bc2
f8aaa9d
 
 
 
0f96bc2
f8aaa9d
 
0f96bc2
f8aaa9d
 
 
 
 
 
0f96bc2
f8aaa9d
 
 
 
 
 
 
 
 
 
0f96bc2
f8aaa9d
0f96bc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8aaa9d
 
 
 
 
0f96bc2
 
 
 
f8aaa9d
0f96bc2
f8aaa9d
0f96bc2
 
 
 
 
 
f8aaa9d
 
 
 
0f96bc2
 
 
 
 
 
 
 
f8aaa9d
0f96bc2
f8aaa9d
0f96bc2
 
f8aaa9d
 
0f96bc2
 
f8aaa9d
0f96bc2
f8aaa9d
 
0f96bc2
 
 
 
 
 
 
 
f8aaa9d
 
 
0f96bc2
 
 
f8aaa9d

import os
import gradio as gr
import matplotlib.pyplot as plt
from collections import Counter
from google import genai
from google.genai import types
from google.genai.types import Part
from tenacity import retry, stop_after_attempt, wait_random_exponential

# Retrieve API key from environment variables.
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise ValueError("Please set the GOOGLE_API_KEY environment variable.")

# Initialize the Gemini API client via AI Studio using the API key.
client = genai.Client(api_key=GOOGLE_API_KEY)

# Use the Gemini 2.0 Flash model.
MODEL_NAME = "gemini-2.0-flash-001"

@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
def call_gemini(video_url: str, prompt: str) -> str:
    """
    Call the Gemini model with the provided video URL and prompt.
    The video URL is passed as a URI part with MIME type "video/webm".
    """
    response = client.models.generate_content(
        model=MODEL_NAME,
        contents=[
            Part.from_uri(file_uri=video_url, mime_type="video/webm"),
            prompt,
        ],
    )
    return response.text

def generate_chart(analysis_text: str) -> plt.Figure:
    """
    Create a simple bar chart based on the frequency of selected keywords in the analysis.
    """
    # Define keywords of interest
    keywords = ["suspicious", "anomaly", "incident", "alert", "object", "movement"]
    # Lowercase the analysis text and split into words
    words = analysis_text.lower().split()
    # Count occurrences for each keyword
    counter = Counter({kw: words.count(kw) for kw in keywords})
    
    # Create a bar chart using matplotlib
    fig, ax = plt.subplots(figsize=(6, 4))
    ax.bar(counter.keys(), counter.values(), color="skyblue")
    ax.set_title("Keyword Frequency in Analysis")
    ax.set_ylabel("Count")
    ax.set_xlabel("Keyword")
    plt.tight_layout()
    return fig

def analyze_video(video_url: str, user_query: str) -> (str, plt.Figure):
    """
    Perform iterative (agentic) video analysis.
    The analysis is refined over several iterations, incorporating the user query if provided.
    Returns a Markdown report and a matplotlib chart.
    """
    analysis = ""
    num_iterations = 3

    for i in range(num_iterations):
        base_prompt = "You are a video analysis agent focusing on security and surveillance. Provide a detailed summary of the video, highlighting key events, suspicious activities, or anomalies."
        if user_query:
            base_prompt += f" Also, focus on the following query: {user_query}"
            
        if i == 0:
            prompt = base_prompt
        else:
            prompt = (f"Based on the previous analysis: \"{analysis}\". "
                      "Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, "
                      "and details that would help a security team understand the situation better. ")
            if user_query:
                prompt += f"Remember to focus on: {user_query}"
                
        try:
            analysis = call_gemini(video_url, prompt)
        except Exception as e:
            analysis += f"\n[Error during iteration {i+1}: {e}]"
            break

    # Create a Markdown report (adding headings and bullet points if desired)
    markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
    
    # Generate a chart visualization based on the analysis text.
    chart_fig = generate_chart(analysis)
    return markdown_report, chart_fig

def gradio_interface(video_url: str, user_query: str) -> (str, any):
    """
    Gradio interface function that takes a video URL and an optional query,
    then returns a Markdown report and a visualization chart.
    """
    if not video_url:
        return "Please provide a valid video URL.", None
    return analyze_video(video_url, user_query)

# Define the Gradio interface with two inputs and two outputs.
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"),
        gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
    ],
    outputs=[
        gr.Markdown(label="Security & Surveillance Analysis Report"),
        gr.Plot(label="Visualization: Keyword Frequency")
    ],
    title="AI Video Analysis and Summariser Agent",
    description=(
        "This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
        "to iteratively analyze a video for security and surveillance insights. Provide a video URL and, optionally, "
        "a query to guide the analysis. The tool returns a detailed Markdown report along with a bar chart visualization "
        "of keyword frequency."
    )
)

if __name__ == "__main__":
    iface.launch()