Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
from collections import Counter | |
from google import genai | |
from google.genai import types | |
from google.genai.types import Part | |
from tenacity import retry, stop_after_attempt, wait_random_exponential | |
# Retrieve API key from environment variables. | |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") | |
if not GOOGLE_API_KEY: | |
raise ValueError("Please set the GOOGLE_API_KEY environment variable.") | |
# Initialize the Gemini API client via AI Studio using the API key. | |
client = genai.Client(api_key=GOOGLE_API_KEY) | |
# Use the Gemini 2.0 Flash model. | |
MODEL_NAME = "gemini-2.0-flash-001" | |
def call_gemini(video_url: str, prompt: str) -> str: | |
""" | |
Call the Gemini model with the provided video URL and prompt. | |
The video URL is passed as a URI part with MIME type "video/webm". | |
""" | |
response = client.models.generate_content( | |
model=MODEL_NAME, | |
contents=[ | |
Part.from_uri(file_uri=video_url, mime_type="video/webm"), | |
prompt, | |
], | |
) | |
return response.text | |
def generate_chart(analysis_text: str) -> plt.Figure: | |
""" | |
Create a simple bar chart based on the frequency of selected keywords in the analysis. | |
""" | |
# Define keywords of interest | |
keywords = ["suspicious", "anomaly", "incident", "alert", "object", "movement"] | |
# Lowercase the analysis text and split into words | |
words = analysis_text.lower().split() | |
# Count occurrences for each keyword | |
counter = Counter({kw: words.count(kw) for kw in keywords}) | |
# Create a bar chart using matplotlib | |
fig, ax = plt.subplots(figsize=(6, 4)) | |
ax.bar(counter.keys(), counter.values(), color="skyblue") | |
ax.set_title("Keyword Frequency in Analysis") | |
ax.set_ylabel("Count") | |
ax.set_xlabel("Keyword") | |
plt.tight_layout() | |
return fig | |
def analyze_video(video_url: str, user_query: str) -> (str, plt.Figure): | |
""" | |
Perform iterative (agentic) video analysis. | |
The analysis is refined over several iterations, incorporating the user query if provided. | |
Returns a Markdown report and a matplotlib chart. | |
""" | |
analysis = "" | |
num_iterations = 3 | |
for i in range(num_iterations): | |
base_prompt = "You are a video analysis agent focusing on security and surveillance. Provide a detailed summary of the video, highlighting key events, suspicious activities, or anomalies." | |
if user_query: | |
base_prompt += f" Also, focus on the following query: {user_query}" | |
if i == 0: | |
prompt = base_prompt | |
else: | |
prompt = (f"Based on the previous analysis: \"{analysis}\". " | |
"Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, " | |
"and details that would help a security team understand the situation better. ") | |
if user_query: | |
prompt += f"Remember to focus on: {user_query}" | |
try: | |
analysis = call_gemini(video_url, prompt) | |
except Exception as e: | |
analysis += f"\n[Error during iteration {i+1}: {e}]" | |
break | |
# Create a Markdown report (adding headings and bullet points if desired) | |
markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n" | |
# Generate a chart visualization based on the analysis text. | |
chart_fig = generate_chart(analysis) | |
return markdown_report, chart_fig | |
def gradio_interface(video_url: str, user_query: str) -> (str, any): | |
""" | |
Gradio interface function that takes a video URL and an optional query, | |
then returns a Markdown report and a visualization chart. | |
""" | |
if not video_url: | |
return "Please provide a valid video URL.", None | |
return analyze_video(video_url, user_query) | |
# Define the Gradio interface with two inputs and two outputs. | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=[ | |
gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"), | |
gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance") | |
], | |
outputs=[ | |
gr.Markdown(label="Security & Surveillance Analysis Report"), | |
gr.Plot(label="Visualization: Keyword Frequency") | |
], | |
title="AI Video Analysis and Summariser Agent", | |
description=( | |
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio " | |
"to iteratively analyze a video for security and surveillance insights. Provide a video URL and, optionally, " | |
"a query to guide the analysis. The tool returns a detailed Markdown report along with a bar chart visualization " | |
"of keyword frequency." | |
) | |
) | |
if __name__ == "__main__": | |
iface.launch() | |