codelion commited on
Commit
78aee58
·
verified ·
1 Parent(s): c137e5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -157
app.py CHANGED
@@ -1,198 +1,111 @@
1
  import os
 
2
  import gradio as gr
3
- import cv2
4
  from google import genai
5
- from google.genai.types import Part
6
- from tenacity import retry, stop_after_attempt, wait_random_exponential
7
 
8
  # Retrieve API key from environment variables
9
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
10
  if not GOOGLE_API_KEY:
11
- raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
12
 
13
  # Initialize the Gemini API client
14
  client = genai.Client(api_key=GOOGLE_API_KEY)
 
15
 
16
- # Define the model name
17
- MODEL_NAME = "gemini-2.0-flash"
18
-
19
- @retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
20
- def call_gemini(video_file: str, prompt: str) -> str:
21
- """
22
- Call the Gemini model with a video file and prompt.
23
-
24
- Args:
25
- video_file (str): Path to the video file
26
- prompt (str): Text prompt to guide the analysis
27
-
28
- Returns:
29
- str: Response text from the Gemini API
30
- """
31
- with open(video_file, "rb") as f:
32
- file_bytes = f.read()
33
- response = client.models.generate_content(
34
- model=MODEL_NAME,
35
- contents=[
36
- Part(file_data=file_bytes, mime_type="video/mp4"),
37
- Part(text=prompt)
38
- ]
39
- )
40
- return response.text
41
-
42
- def safe_call_gemini(video_file: str, prompt: str) -> str:
43
  """
44
- Wrapper for call_gemini that catches exceptions and returns error messages.
45
 
46
  Args:
47
  video_file (str): Path to the video file
48
- prompt (str): Text prompt for the API
49
 
50
  Returns:
51
- str: API response or error message
52
  """
53
  try:
54
- return call_gemini(video_file, prompt)
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
- error_msg = f"Gemini call failed: {str(e)}"
57
- print(error_msg)
58
- return error_msg
59
-
60
- def hhmmss_to_seconds(time_str: str) -> float:
61
- """
62
- Convert a HH:MM:SS formatted string into seconds.
63
-
64
- Args:
65
- time_str (str): Time string in HH:MM:SS format
66
-
67
- Returns:
68
- float: Time in seconds
69
- """
70
- parts = time_str.strip().split(":")
71
- parts = [float(p) for p in parts]
72
- if len(parts) == 3:
73
- return parts[0] * 3600 + parts[1] * 60 + parts[2]
74
- elif len(parts) == 2:
75
- return parts[0] * 60 + parts[1]
76
- else:
77
- return parts[0]
78
-
79
- def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
80
- """
81
- Extract key frames from the video based on timestamps provided by Gemini.
82
-
83
- Args:
84
- video_file (str): Path to the video file
85
- summary (str): Video summary to provide context
86
- user_query (str): Optional user query to focus the analysis
87
-
88
- Returns:
89
- list: List of tuples (image_array, caption)
90
- """
91
- prompt = (
92
- "List the key timestamps in the video and a brief description of the event at that time. "
93
- "Output one line per event in the format: HH:MM:SS - description. Do not include any extra text."
94
- )
95
- prompt += f" Video Summary: {summary}"
96
- if user_query:
97
- prompt += f" Focus on: {user_query}"
98
-
99
- key_frames_response = safe_call_gemini(video_file, prompt)
100
- if "Gemini call failed" in key_frames_response:
101
- return []
102
-
103
- lines = key_frames_response.strip().split("\n")
104
- key_frames = []
105
- for line in lines:
106
- if " - " in line:
107
- parts = line.split(" - ", 1)
108
- timestamp = parts[0].strip()
109
- description = parts[1].strip()
110
- key_frames.append({"timestamp": timestamp, "description": description})
111
-
112
- extracted_frames = []
113
- cap = cv2.VideoCapture(video_file)
114
- if not cap.isOpened():
115
- print("Error: Could not open the uploaded video file.")
116
- return extracted_frames
117
 
118
- for frame_obj in key_frames:
119
- ts = frame_obj.get("timestamp")
120
- description = frame_obj.get("description", "")
121
- try:
122
- seconds = hhmmss_to_seconds(ts)
123
- except Exception:
124
- continue
125
- cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
126
- ret, frame = cap.read()
127
- if ret:
128
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
129
- caption = f"{ts}: {description}"
130
- extracted_frames.append((frame_rgb, caption))
131
- cap.release()
132
- return extracted_frames
133
-
134
- def analyze_video(video_file: str, user_query: str) -> (str, list):
135
  """
136
- Analyze the video and generate a summary and key frames.
137
 
138
  Args:
139
  video_file (str): Path to the video file
140
  user_query (str): Optional query to guide the analysis
141
 
142
  Returns:
143
- tuple: (Markdown report, list of key frames)
144
- """
145
- summary_prompt = "Summarize this video."
146
- if user_query:
147
- summary_prompt += f" Also focus on: {user_query}"
148
- summary = safe_call_gemini(video_file, summary_prompt)
149
-
150
- markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{summary}\n"
151
- key_frames_gallery = get_key_frames(video_file, summary, user_query)
152
- if not key_frames_gallery:
153
- markdown_report += "\n*No key frames were extracted.*\n"
154
- else:
155
- markdown_report += "\n**Key Frames Extracted:**\n"
156
- for idx, (img, caption) in enumerate(key_frames_gallery, start=1):
157
- markdown_report += f"- **Frame {idx}:** {caption}\n"
158
- return markdown_report, key_frames_gallery
159
-
160
- def gradio_interface(video_file, user_query: str) -> (str, list):
161
- """
162
- Gradio interface function to process video and return results.
163
-
164
- Args:
165
- video_file (str): Path to the uploaded video file
166
- user_query (str): Optional query to guide analysis
167
-
168
- Returns:
169
- tuple: (Markdown report, gallery of key frames)
170
  """
 
171
  if not video_file or not os.path.exists(video_file):
172
- return "Please upload a valid video file.", []
173
  if not video_file.lower().endswith('.mp4'):
174
- return "Please upload an MP4 video file.", []
175
- return analyze_video(video_file, user_query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  # Define the Gradio interface
178
  iface = gr.Interface(
179
- fn=gradio_interface,
180
  inputs=[
181
- gr.Video(label="Upload Video File"),
182
- gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis",
183
- placeholder="e.g., focus on unusual movements near the entrance")
184
- ],
185
- outputs=[
186
- gr.Markdown(label="Security & Surveillance Analysis Report"),
187
- gr.Gallery(label="Extracted Key Frames", columns=2)
188
  ],
189
- title="AI Video Analysis and Summariser Agent",
 
190
  description=(
191
- "This tool uses Google's Gemini 2.0 Flash model to analyze an uploaded video. "
192
- "It returns a brief summary and extracts key frames based on that summary. "
193
- "Provide a video file and, optionally, a query to guide the analysis."
194
  )
195
  )
196
 
197
  if __name__ == "__main__":
198
- iface.launch()
 
 
1
  import os
2
+ import time
3
  import gradio as gr
 
4
  from google import genai
5
+ from google.genai import types
 
6
 
7
  # Retrieve API key from environment variables
8
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
9
  if not GOOGLE_API_KEY:
10
+ raise ValueError("Please set the GOOGLE_API_KEY environment variable with your Google Cloud API key.")
11
 
12
  # Initialize the Gemini API client
13
  client = genai.Client(api_key=GOOGLE_API_KEY)
14
+ MODEL_NAME = "gemini-2.5-pro-exp-03-25" # Model from the notebook that supports video analysis
15
 
16
+ def upload_and_process_video(video_file: str) -> types.File:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
+ Upload a video file to the Gemini API and wait for processing.
19
 
20
  Args:
21
  video_file (str): Path to the video file
 
22
 
23
  Returns:
24
+ types.File: Processed video file object
25
  """
26
  try:
27
+ video_file_obj = client.files.upload(file=video_file)
28
+ while video_file_obj.state == "PROCESSING":
29
+ print(f"Processing {video_file}...")
30
+ time.sleep(10)
31
+ video_file_obj = client.files.get(name=video_file_obj.name)
32
+
33
+ if video_file_obj.state == "FAILED":
34
+ raise ValueError(f"Video processing failed: {video_file_obj.state}")
35
+
36
+ print(f"Video processing complete: {video_file_obj.uri}")
37
+ return video_file_obj
38
  except Exception as e:
39
+ raise Exception(f"Error uploading video: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ def analyze_video(video_file: str, user_query: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  """
43
+ Analyze the video using the Gemini API and return a summary.
44
 
45
  Args:
46
  video_file (str): Path to the video file
47
  user_query (str): Optional query to guide the analysis
48
 
49
  Returns:
50
+ str: Markdown-formatted report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  """
52
+ # Validate input
53
  if not video_file or not os.path.exists(video_file):
54
+ return "Please upload a valid video file."
55
  if not video_file.lower().endswith('.mp4'):
56
+ return "Please upload an MP4 video file."
57
+
58
+ try:
59
+ # Upload and process the video
60
+ video_file_obj = upload_and_process_video(video_file)
61
+
62
+ # Prepare prompt
63
+ prompt = "Provide a detailed summary of this video."
64
+ if user_query:
65
+ prompt += f" Focus on: {user_query}"
66
+
67
+ # Analyze video with Gemini API
68
+ response = client.models.generate_content(
69
+ model=MODEL_NAME,
70
+ contents=[
71
+ video_file_obj, # Pass the processed video file object
72
+ prompt
73
+ ]
74
+ )
75
+ summary = response.text
76
+
77
+ # Generate Markdown report
78
+ markdown_report = (
79
+ "## Video Analysis Report\n\n"
80
+ f"**Summary:**\n{summary}\n"
81
+ )
82
+ return markdown_report
83
+
84
+ except Exception as e:
85
+ error_msg = (
86
+ "## Video Analysis Report\n\n"
87
+ f"**Error:** Unable to analyze video.\n"
88
+ f"Details: {str(e)}\n"
89
+ )
90
+ return error_msg
91
 
92
  # Define the Gradio interface
93
  iface = gr.Interface(
94
+ fn=analyze_video,
95
  inputs=[
96
+ gr.Video(label="Upload Video File (MP4)", type="filepath"),
97
+ gr.Textbox(label="Analysis Query (optional)",
98
+ placeholder="e.g., focus on main events or themes")
 
 
 
 
99
  ],
100
+ outputs=gr.Markdown(label="Video Analysis Report"),
101
+ title="AI Video Analysis Agent with Gemini",
102
  description=(
103
+ "Upload an MP4 video to get a summary using Google's Gemini API. "
104
+ "This tool analyzes the video content directly without audio or frame extraction. "
105
+ "Optionally, provide a query to guide the analysis."
106
  )
107
  )
108
 
109
  if __name__ == "__main__":
110
+ # Launch with share=True to create a public link
111
+ iface.launch(share=True)