Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,9 +2,15 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
4 |
|
5 |
-
# Load the processor and model
|
6 |
-
processor = AutoProcessor.from_pretrained(
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Set the device (use GPU if available)
|
10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -12,17 +18,12 @@ model.to(device)
|
|
12 |
|
13 |
def analyze_video(video_path):
|
14 |
"""
|
15 |
-
|
16 |
-
then uses the LLaVA-Video-7B-Qwen2 model to analyze the video.
|
17 |
-
|
18 |
-
The prompt instructs the model to analyze the video and return
|
19 |
-
the moment when the crowd is most engaged.
|
20 |
"""
|
21 |
-
# Define the prompt
|
22 |
prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
|
23 |
|
24 |
-
# Process the video and prompt
|
25 |
-
# Note: The processor is expected to handle the video input (e.g., by reading frames).
|
26 |
inputs = processor(text=prompt, video=video_path, return_tensors="pt")
|
27 |
|
28 |
# Move all tensor inputs to the selected device
|
|
|
2 |
import torch
|
3 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
4 |
|
5 |
+
# Load the processor and model, trusting the remote code for custom implementations
|
6 |
+
processor = AutoProcessor.from_pretrained(
|
7 |
+
"lmms-lab/LLaVA-Video-7B-Qwen2",
|
8 |
+
trust_remote_code=True
|
9 |
+
)
|
10 |
+
model = AutoModelForCausalLM.from_pretrained(
|
11 |
+
"lmms-lab/LLaVA-Video-7B-Qwen2",
|
12 |
+
trust_remote_code=True
|
13 |
+
)
|
14 |
|
15 |
# Set the device (use GPU if available)
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
18 |
|
19 |
def analyze_video(video_path):
|
20 |
"""
|
21 |
+
Analyzes a concert/event video to determine the moment when the crowd is most engaged.
|
|
|
|
|
|
|
|
|
22 |
"""
|
23 |
+
# Define the prompt instructing the model on what to do
|
24 |
prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
|
25 |
|
26 |
+
# Process the video and prompt
|
|
|
27 |
inputs = processor(text=prompt, video=video_path, return_tensors="pt")
|
28 |
|
29 |
# Move all tensor inputs to the selected device
|