Spaces:

saadfarhad
/

Crowdanalyzer_v1

Runtime error

saadfarhad commited on Feb 9

Commit

deb3cb9

verified ·

1 Parent(s): 6d16e6e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,9 +2,15 @@ import gradio as gr
 import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
-# Load the processor and model from Hugging Face
-processor = AutoProcessor.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2")
-model = AutoModelForCausalLM.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2")
 # Set the device (use GPU if available)
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -12,17 +18,12 @@ model.to(device)
 def analyze_video(video_path):
     """
-    This function accepts the path to a video file,
-    then uses the LLaVA-Video-7B-Qwen2 model to analyze the video.
-    The prompt instructs the model to analyze the video and return
-    the moment when the crowd is most engaged.
     """
-    # Define the prompt for the model
     prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
-    # Process the video and prompt.
-    # Note: The processor is expected to handle the video input (e.g., by reading frames).
     inputs = processor(text=prompt, video=video_path, return_tensors="pt")
     # Move all tensor inputs to the selected device

 import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
+# Load the processor and model, trusting the remote code for custom implementations
+processor = AutoProcessor.from_pretrained(
+    "lmms-lab/LLaVA-Video-7B-Qwen2",
+    trust_remote_code=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+    "lmms-lab/LLaVA-Video-7B-Qwen2",
+    trust_remote_code=True
+)
 # Set the device (use GPU if available)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def analyze_video(video_path):
     """
+    Analyzes a concert/event video to determine the moment when the crowd is most engaged.
     """
+    # Define the prompt instructing the model on what to do
     prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
+    # Process the video and prompt
     inputs = processor(text=prompt, video=video_path, return_tensors="pt")
     # Move all tensor inputs to the selected device