saadfarhad commited on
Commit
deb3cb9
·
verified ·
1 Parent(s): 6d16e6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -2,9 +2,15 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoProcessor, AutoModelForCausalLM
4
 
5
- # Load the processor and model from Hugging Face
6
- processor = AutoProcessor.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2")
7
- model = AutoModelForCausalLM.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2")
 
 
 
 
 
 
8
 
9
  # Set the device (use GPU if available)
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -12,17 +18,12 @@ model.to(device)
12
 
13
  def analyze_video(video_path):
14
  """
15
- This function accepts the path to a video file,
16
- then uses the LLaVA-Video-7B-Qwen2 model to analyze the video.
17
-
18
- The prompt instructs the model to analyze the video and return
19
- the moment when the crowd is most engaged.
20
  """
21
- # Define the prompt for the model
22
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
23
 
24
- # Process the video and prompt.
25
- # Note: The processor is expected to handle the video input (e.g., by reading frames).
26
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
27
 
28
  # Move all tensor inputs to the selected device
 
2
  import torch
3
  from transformers import AutoProcessor, AutoModelForCausalLM
4
 
5
+ # Load the processor and model, trusting the remote code for custom implementations
6
+ processor = AutoProcessor.from_pretrained(
7
+ "lmms-lab/LLaVA-Video-7B-Qwen2",
8
+ trust_remote_code=True
9
+ )
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ "lmms-lab/LLaVA-Video-7B-Qwen2",
12
+ trust_remote_code=True
13
+ )
14
 
15
  # Set the device (use GPU if available)
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
18
 
19
  def analyze_video(video_path):
20
  """
21
+ Analyzes a concert/event video to determine the moment when the crowd is most engaged.
 
 
 
 
22
  """
23
+ # Define the prompt instructing the model on what to do
24
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
25
 
26
+ # Process the video and prompt
 
27
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
28
 
29
  # Move all tensor inputs to the selected device