saadfarhad commited on
Commit
5642ff6
·
verified ·
1 Parent(s): deb3cb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -1,42 +1,49 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoProcessor, AutoModelForCausalLM
4
 
5
- # Load the processor and model, trusting the remote code for custom implementations
 
 
 
 
 
6
  processor = AutoProcessor.from_pretrained(
7
  "lmms-lab/LLaVA-Video-7B-Qwen2",
8
  trust_remote_code=True
9
  )
10
- model = AutoModelForCausalLM.from_pretrained(
11
  "lmms-lab/LLaVA-Video-7B-Qwen2",
12
  trust_remote_code=True
13
  )
14
 
15
- # Set the device (use GPU if available)
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  model.to(device)
18
 
19
  def analyze_video(video_path):
20
  """
21
- Analyzes a concert/event video to determine the moment when the crowd is most engaged.
 
 
22
  """
23
- # Define the prompt instructing the model on what to do
24
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
25
 
26
- # Process the video and prompt
 
27
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
28
 
29
- # Move all tensor inputs to the selected device
30
  inputs = {key: value.to(device) for key, value in inputs.items()}
31
 
32
- # Generate the model's response
33
  outputs = model.generate(**inputs, max_new_tokens=100)
34
 
35
- # Decode the generated tokens to a human-readable string
36
  answer = processor.decode(outputs[0], skip_special_tokens=True)
37
  return answer
38
 
39
- # Create the Gradio Interface
40
  iface = gr.Interface(
41
  fn=analyze_video,
42
  inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoProcessor
4
 
5
+ # Import the custom model class directly from the remote code.
6
+ # Note: The import path here is based on the repository structure. If this fails,
7
+ # check the model repository's files to confirm the correct import path and class name.
8
+ from transformers.models.llava.modeling_llava import LlavaForCausalLM
9
+
10
+ # Load the processor and model while trusting remote code.
11
  processor = AutoProcessor.from_pretrained(
12
  "lmms-lab/LLaVA-Video-7B-Qwen2",
13
  trust_remote_code=True
14
  )
15
+ model = LlavaForCausalLM.from_pretrained(
16
  "lmms-lab/LLaVA-Video-7B-Qwen2",
17
  trust_remote_code=True
18
  )
19
 
20
+ # Set device to GPU if available.
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
  model.to(device)
23
 
24
  def analyze_video(video_path):
25
  """
26
+ This function accepts the path to a video file,
27
+ then uses the LLaVA-Video model to analyze it for the moment
28
+ when the crowd is most engaged.
29
  """
 
30
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
31
 
32
+ # Process the text and video input.
33
+ # (Make sure that the processor handles video inputs as expected.)
34
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
35
 
36
+ # Move tensors to the device.
37
  inputs = {key: value.to(device) for key, value in inputs.items()}
38
 
39
+ # Generate a response.
40
  outputs = model.generate(**inputs, max_new_tokens=100)
41
 
42
+ # Decode the generated tokens to a string.
43
  answer = processor.decode(outputs[0], skip_special_tokens=True)
44
  return answer
45
 
46
+ # Create the Gradio interface.
47
  iface = gr.Interface(
48
  fn=analyze_video,
49
  inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),