saadfarhad commited on
Commit
021fd45
·
verified ·
1 Parent(s): 5642ff6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -28
app.py CHANGED
@@ -1,58 +1,39 @@
1
- import gradio as gr
2
  import torch
3
- from transformers import AutoProcessor
4
 
5
- # Import the custom model class directly from the remote code.
6
- # Note: The import path here is based on the repository structure. If this fails,
7
- # check the model repository's files to confirm the correct import path and class name.
8
- from transformers.models.llava.modeling_llava import LlavaForCausalLM
9
 
10
  # Load the processor and model while trusting remote code.
11
  processor = AutoProcessor.from_pretrained(
12
  "lmms-lab/LLaVA-Video-7B-Qwen2",
13
  trust_remote_code=True
14
  )
15
- model = LlavaForCausalLM.from_pretrained(
16
  "lmms-lab/LLaVA-Video-7B-Qwen2",
17
  trust_remote_code=True
18
  )
19
 
20
- # Set device to GPU if available.
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
  model.to(device)
23
 
24
  def analyze_video(video_path):
25
- """
26
- This function accepts the path to a video file,
27
- then uses the LLaVA-Video model to analyze it for the moment
28
- when the crowd is most engaged.
29
- """
30
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
31
-
32
- # Process the text and video input.
33
- # (Make sure that the processor handles video inputs as expected.)
34
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
35
-
36
- # Move tensors to the device.
37
- inputs = {key: value.to(device) for key, value in inputs.items()}
38
-
39
- # Generate a response.
40
  outputs = model.generate(**inputs, max_new_tokens=100)
41
-
42
- # Decode the generated tokens to a string.
43
  answer = processor.decode(outputs[0], skip_special_tokens=True)
44
  return answer
45
 
46
- # Create the Gradio interface.
47
  iface = gr.Interface(
48
  fn=analyze_video,
49
  inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),
50
  outputs=gr.Textbox(label="Engagement Analysis"),
51
  title="Crowd Engagement Analyzer",
52
- description=(
53
- "Upload a video of a concert or event and the model will analyze "
54
- "the video to identify the moment when the crowd is most engaged."
55
- )
56
  )
57
 
58
  if __name__ == "__main__":
 
1
+ from transformers import AutoProcessor, AutoModelForCausalLM
2
  import torch
3
+ import gradio as gr
4
 
5
+ # Ensure you use the latest version of transformers!
6
+ # For example, in your requirements.txt, you might include:
7
+ # transformers>=4.31.0
 
8
 
9
  # Load the processor and model while trusting remote code.
10
  processor = AutoProcessor.from_pretrained(
11
  "lmms-lab/LLaVA-Video-7B-Qwen2",
12
  trust_remote_code=True
13
  )
14
+ model = AutoModelForCausalLM.from_pretrained(
15
  "lmms-lab/LLaVA-Video-7B-Qwen2",
16
  trust_remote_code=True
17
  )
18
 
 
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
  model.to(device)
21
 
22
  def analyze_video(video_path):
 
 
 
 
 
23
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
24
+ # The processor is expected to handle both text and video input.
 
 
25
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
26
+ inputs = {k: v.to(device) for k, v in inputs.items()}
 
 
 
 
27
  outputs = model.generate(**inputs, max_new_tokens=100)
 
 
28
  answer = processor.decode(outputs[0], skip_special_tokens=True)
29
  return answer
30
 
 
31
  iface = gr.Interface(
32
  fn=analyze_video,
33
  inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),
34
  outputs=gr.Textbox(label="Engagement Analysis"),
35
  title="Crowd Engagement Analyzer",
36
+ description="Upload a video of a concert or event and the model will analyze the moment when the crowd is most engaged."
 
 
 
37
  )
38
 
39
  if __name__ == "__main__":