File size: 2,700 Bytes
3b421e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7efde76
f0272e1
3b421e3
f0272e1
 
7efde76
3b421e3
 
f0272e1
3b421e3
 
 
 
7efde76
3b421e3
 
 
 
7efde76
3b421e3
f0272e1
 
3b421e3
 
 
f0272e1
7efde76
3b421e3
f0272e1
3b421e3
 
f0272e1
 
 
 
 
 
 
 
 
3b421e3
f0272e1
 
 
7efde76
 
3b421e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import sys
import subprocess
import pkg_resources

required_packages = {
    'torch': 'torch',
    'gradio': 'gradio',
    'transformers': 'transformers',
    'decord': 'decord',
    'numpy': 'numpy'
}

def install_packages(packages):
    for package in packages:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

def check_and_install_packages():
    installed_packages = {pkg.key for pkg in pkg_resources.working_set}
    missing_packages = [required_packages[pkg] for pkg in required_packages if pkg not in installed_packages]
    
    if missing_packages:
        print("Installing missing packages...")
        install_packages(missing_packages)
        print("Packages installed successfully.")
    else:
        print("All required packages are already installed.")

# Check and install required packages
check_and_install_packages()

# Now import the required modules
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from decord import VideoReader, cpu
import numpy as np

# Define a simple video processing function (placeholder for LLaVA-Video)
def process_video(video_path, max_frames=64):
    vr = VideoReader(video_path, ctx=cpu(0))
    total_frames = len(vr)
    frame_indices = np.linspace(0, total_frames - 1, max_frames, dtype=int)
    frames = vr.get_batch(frame_indices).asnumpy()
    return frames

# Define a simple text generation function (placeholder for actual model)
def generate_response(video_frames, question):
    # This is a placeholder. In reality, you'd use the LLaVA-Video model here.
    return f"Analyzed {len(video_frames)} frames. Your question was: {question}"

def analyze_instagram_short(video_file, question):
    if video_file is None:
        return "Please upload an Instagram short video."
    
    video_frames = process_video(video_file)
    response = generate_response(video_frames, question)
    return response

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🎥 Instagram Short Video Analyzer")
    gr.Markdown("Upload your Instagram short video and ask questions about its content!")
    
    with gr.Row():
        with gr.Column():
            video_input = gr.Video(label="Upload Instagram Short Video")
            question_input = gr.Textbox(label="Ask a question about the video", placeholder="What's happening in this Instagram short?")
            submit_button = gr.Button("Analyze Short Video")
        output = gr.Textbox(label="Analysis Result")
    
    submit_button.click(
        fn=analyze_instagram_short,
        inputs=[video_input, question_input],
        outputs=output
    )

if __name__ == "__main__":
    demo.launch()