File size: 3,999 Bytes
f74e492
378ed8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f74e492
378ed8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f74e492
378ed8f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
import google.generativeai as genai
import cv2
import os
import shutil

# Set up your API key
genai.configure(api_key="YOUR_API_KEY")

# Constants
FRAME_EXTRACTION_DIRECTORY = "/content/frames"
FRAME_PREFIX = "_frame"

# Function to create/cleanup frame output directory
def create_frame_output_dir(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    else:
        shutil.rmtree(output_dir)
        os.makedirs(output_dir)

# Function to extract frames from video
def extract_frame_from_video(video_file_path):
    create_frame_output_dir(FRAME_EXTRACTION_DIRECTORY)
    vidcap = cv2.VideoCapture(video_file_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    frame_duration = 1 / fps
    output_file_prefix = os.path.basename(video_file_path).replace('.', '_')
    frame_count = 0
    count = 0
    while vidcap.isOpened():
        success, frame = vidcap.read()
        if not success:
            break
        if int(count / fps) == frame_count:
            min = frame_count // 60
            sec = frame_count % 60
            time_string = f"{min:02d}:{sec:02d}"
            image_name = f"{output_file_prefix}{FRAME_PREFIX}{time_string}.jpg"
            output_filename = os.path.join(FRAME_EXTRACTION_DIRECTORY, image_name)
            cv2.imwrite(output_filename, frame)
            frame_count += 1
        count += 1
    vidcap.release()
    return frame_count

# Class to represent a file
class File:
    def __init__(self, file_path: str, display_name: str = None):
        self.file_path = file_path
        if display_name:
            self.display_name = display_name
        self.timestamp = self.get_timestamp(file_path)

    def set_file_response(self, response):
        self.response = response

    def get_timestamp(self, filename):
        parts = filename.split(FRAME_PREFIX)
        if len(parts) != 2:
            return None
        return parts[1].split('.')[0]

# Function to upload files to Gemini
def upload_files(files_to_upload):
    uploaded_files = []
    for file in files_to_upload:
        response = genai.upload_file(path=file.file_path)
        file.set_file_response(response)
        uploaded_files.append(file)
    return uploaded_files

# Function to generate description using Gemini
def generate_description(uploaded_files):
    prompt = "Describe this video."
    model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
    request = [prompt]
    for file in uploaded_files:
        request.append(file.timestamp)
        request.append(file.response)
    response = model.generate_content(request, request_options={"timeout": 600})
    return response.text

# Function to delete files from Gemini
def delete_files(uploaded_files):
    for file in uploaded_files:
        genai.delete_file(file.response.name)

# Gradio interface
def process_video(video_file):
    try:
        # Extract frames
        frame_count = extract_frame_from_video(video_file.name)
        
        # Prepare files for upload
        files = os.listdir(FRAME_EXTRACTION_DIRECTORY)
        files = sorted(files)
        files_to_upload = []
        for file in files:
            files_to_upload.append(
                File(file_path=os.path.join(FRAME_EXTRACTION_DIRECTORY, file))
            )

        # Upload files to Gemini
        uploaded_files = upload_files(files_to_upload)
        
        # Generate description
        description = generate_description(uploaded_files)

        # Delete files from Gemini
        delete_files(uploaded_files)

        return f"Video processed successfully! Description:\n\n{description}"
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Create Gradio interface
iface = gr.Interface(
    fn=process_video,
    inputs=gr.Video(type="filepath"),
    outputs=gr.Textbox(),
    title="Video Description with Gemini",
    description="Upload a video to get a description using Google Gemini",
)

# Launch the interface
iface.launch()