videoanalysis / app.py
codelion's picture
Update app.py
0f96bc2 verified
raw
history blame
4.95 kB
import os
import gradio as gr
import matplotlib.pyplot as plt
from collections import Counter
from google import genai
from google.genai import types
from google.genai.types import Part
from tenacity import retry, stop_after_attempt, wait_random_exponential
# Retrieve API key from environment variables.
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
# Initialize the Gemini API client via AI Studio using the API key.
client = genai.Client(api_key=GOOGLE_API_KEY)
# Use the Gemini 2.0 Flash model.
MODEL_NAME = "gemini-2.0-flash-001"
@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
def call_gemini(video_url: str, prompt: str) -> str:
"""
Call the Gemini model with the provided video URL and prompt.
The video URL is passed as a URI part with MIME type "video/webm".
"""
response = client.models.generate_content(
model=MODEL_NAME,
contents=[
Part.from_uri(file_uri=video_url, mime_type="video/webm"),
prompt,
],
)
return response.text
def generate_chart(analysis_text: str) -> plt.Figure:
"""
Create a simple bar chart based on the frequency of selected keywords in the analysis.
"""
# Define keywords of interest
keywords = ["suspicious", "anomaly", "incident", "alert", "object", "movement"]
# Lowercase the analysis text and split into words
words = analysis_text.lower().split()
# Count occurrences for each keyword
counter = Counter({kw: words.count(kw) for kw in keywords})
# Create a bar chart using matplotlib
fig, ax = plt.subplots(figsize=(6, 4))
ax.bar(counter.keys(), counter.values(), color="skyblue")
ax.set_title("Keyword Frequency in Analysis")
ax.set_ylabel("Count")
ax.set_xlabel("Keyword")
plt.tight_layout()
return fig
def analyze_video(video_url: str, user_query: str) -> (str, plt.Figure):
"""
Perform iterative (agentic) video analysis.
The analysis is refined over several iterations, incorporating the user query if provided.
Returns a Markdown report and a matplotlib chart.
"""
analysis = ""
num_iterations = 3
for i in range(num_iterations):
base_prompt = "You are a video analysis agent focusing on security and surveillance. Provide a detailed summary of the video, highlighting key events, suspicious activities, or anomalies."
if user_query:
base_prompt += f" Also, focus on the following query: {user_query}"
if i == 0:
prompt = base_prompt
else:
prompt = (f"Based on the previous analysis: \"{analysis}\". "
"Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, "
"and details that would help a security team understand the situation better. ")
if user_query:
prompt += f"Remember to focus on: {user_query}"
try:
analysis = call_gemini(video_url, prompt)
except Exception as e:
analysis += f"\n[Error during iteration {i+1}: {e}]"
break
# Create a Markdown report (adding headings and bullet points if desired)
markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
# Generate a chart visualization based on the analysis text.
chart_fig = generate_chart(analysis)
return markdown_report, chart_fig
def gradio_interface(video_url: str, user_query: str) -> (str, any):
"""
Gradio interface function that takes a video URL and an optional query,
then returns a Markdown report and a visualization chart.
"""
if not video_url:
return "Please provide a valid video URL.", None
return analyze_video(video_url, user_query)
# Define the Gradio interface with two inputs and two outputs.
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"),
gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
],
outputs=[
gr.Markdown(label="Security & Surveillance Analysis Report"),
gr.Plot(label="Visualization: Keyword Frequency")
],
title="AI Video Analysis and Summariser Agent",
description=(
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
"to iteratively analyze a video for security and surveillance insights. Provide a video URL and, optionally, "
"a query to guide the analysis. The tool returns a detailed Markdown report along with a bar chart visualization "
"of keyword frequency."
)
)
if __name__ == "__main__":
iface.launch()