y-video / app.py
Docfile's picture
Update app.py
378ed8f verified
raw
history blame
4 kB
import gradio as gr
import google.generativeai as genai
import cv2
import os
import shutil
# Set up your API key
genai.configure(api_key="YOUR_API_KEY")
# Constants
FRAME_EXTRACTION_DIRECTORY = "/content/frames"
FRAME_PREFIX = "_frame"
# Function to create/cleanup frame output directory
def create_frame_output_dir(output_dir):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
else:
shutil.rmtree(output_dir)
os.makedirs(output_dir)
# Function to extract frames from video
def extract_frame_from_video(video_file_path):
create_frame_output_dir(FRAME_EXTRACTION_DIRECTORY)
vidcap = cv2.VideoCapture(video_file_path)
fps = vidcap.get(cv2.CAP_PROP_FPS)
frame_duration = 1 / fps
output_file_prefix = os.path.basename(video_file_path).replace('.', '_')
frame_count = 0
count = 0
while vidcap.isOpened():
success, frame = vidcap.read()
if not success:
break
if int(count / fps) == frame_count:
min = frame_count // 60
sec = frame_count % 60
time_string = f"{min:02d}:{sec:02d}"
image_name = f"{output_file_prefix}{FRAME_PREFIX}{time_string}.jpg"
output_filename = os.path.join(FRAME_EXTRACTION_DIRECTORY, image_name)
cv2.imwrite(output_filename, frame)
frame_count += 1
count += 1
vidcap.release()
return frame_count
# Class to represent a file
class File:
def __init__(self, file_path: str, display_name: str = None):
self.file_path = file_path
if display_name:
self.display_name = display_name
self.timestamp = self.get_timestamp(file_path)
def set_file_response(self, response):
self.response = response
def get_timestamp(self, filename):
parts = filename.split(FRAME_PREFIX)
if len(parts) != 2:
return None
return parts[1].split('.')[0]
# Function to upload files to Gemini
def upload_files(files_to_upload):
uploaded_files = []
for file in files_to_upload:
response = genai.upload_file(path=file.file_path)
file.set_file_response(response)
uploaded_files.append(file)
return uploaded_files
# Function to generate description using Gemini
def generate_description(uploaded_files):
prompt = "Describe this video."
model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
request = [prompt]
for file in uploaded_files:
request.append(file.timestamp)
request.append(file.response)
response = model.generate_content(request, request_options={"timeout": 600})
return response.text
# Function to delete files from Gemini
def delete_files(uploaded_files):
for file in uploaded_files:
genai.delete_file(file.response.name)
# Gradio interface
def process_video(video_file):
try:
# Extract frames
frame_count = extract_frame_from_video(video_file.name)
# Prepare files for upload
files = os.listdir(FRAME_EXTRACTION_DIRECTORY)
files = sorted(files)
files_to_upload = []
for file in files:
files_to_upload.append(
File(file_path=os.path.join(FRAME_EXTRACTION_DIRECTORY, file))
)
# Upload files to Gemini
uploaded_files = upload_files(files_to_upload)
# Generate description
description = generate_description(uploaded_files)
# Delete files from Gemini
delete_files(uploaded_files)
return f"Video processed successfully! Description:\n\n{description}"
except Exception as e:
return f"An error occurred: {str(e)}"
# Create Gradio interface
iface = gr.Interface(
fn=process_video,
inputs=gr.Video(type="filepath"),
outputs=gr.Textbox(),
title="Video Description with Gemini",
description="Upload a video to get a description using Google Gemini",
)
# Launch the interface
iface.launch()