saadfarhad commited on
Commit
704bddb
·
verified ·
1 Parent(s): 3610af4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -1,30 +1,31 @@
1
- from transformers import AutoProcessor, AutoModelForCausalLM
2
- import torch
3
  import gradio as gr
 
 
4
 
5
- # Ensure you use the latest version of transformers!
6
- # For example, in your requirements.txt, you might include:
7
- # transformers>=4.31.0
8
-
9
- # Load the processor and model while trusting remote code.
10
  processor = AutoProcessor.from_pretrained(
11
  "lmms-lab/LLaVA-Video-7B-Qwen2",
12
  trust_remote_code=True
13
  )
14
- model = AutoModelForCausalLM.from_pretrained(
15
  "lmms-lab/LLaVA-Video-7B-Qwen2",
16
  trust_remote_code=True
17
  )
18
 
 
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
  model.to(device)
21
 
22
  def analyze_video(video_path):
23
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
24
- # The processor is expected to handle both text and video input.
25
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
26
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
 
27
  outputs = model.generate(**inputs, max_new_tokens=100)
 
 
28
  answer = processor.decode(outputs[0], skip_special_tokens=True)
29
  return answer
30
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoProcessor, AutoModel
4
 
5
+ # Load the processor and model with remote code enabled.
 
 
 
 
6
  processor = AutoProcessor.from_pretrained(
7
  "lmms-lab/LLaVA-Video-7B-Qwen2",
8
  trust_remote_code=True
9
  )
10
+ model = AutoModel.from_pretrained(
11
  "lmms-lab/LLaVA-Video-7B-Qwen2",
12
  trust_remote_code=True
13
  )
14
 
15
+ # Use GPU if available.
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  model.to(device)
18
 
19
  def analyze_video(video_path):
20
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
21
+ # Process text and video.
22
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
23
  inputs = {k: v.to(device) for k, v in inputs.items()}
24
+
25
+ # Generate a response (this assumes the remote code has added a generate method).
26
  outputs = model.generate(**inputs, max_new_tokens=100)
27
+
28
+ # Decode the output tokens.
29
  answer = processor.decode(outputs[0], skip_special_tokens=True)
30
  return answer
31