Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoProcessor | |
# Import the custom model class directly from the remote code. | |
# Note: The import path here is based on the repository structure. If this fails, | |
# check the model repository's files to confirm the correct import path and class name. | |
from transformers.models.llava.modeling_llava import LlavaForCausalLM | |
# Load the processor and model while trusting remote code. | |
processor = AutoProcessor.from_pretrained( | |
"lmms-lab/LLaVA-Video-7B-Qwen2", | |
trust_remote_code=True | |
) | |
model = LlavaForCausalLM.from_pretrained( | |
"lmms-lab/LLaVA-Video-7B-Qwen2", | |
trust_remote_code=True | |
) | |
# Set device to GPU if available. | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
def analyze_video(video_path): | |
""" | |
This function accepts the path to a video file, | |
then uses the LLaVA-Video model to analyze it for the moment | |
when the crowd is most engaged. | |
""" | |
prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged." | |
# Process the text and video input. | |
# (Make sure that the processor handles video inputs as expected.) | |
inputs = processor(text=prompt, video=video_path, return_tensors="pt") | |
# Move tensors to the device. | |
inputs = {key: value.to(device) for key, value in inputs.items()} | |
# Generate a response. | |
outputs = model.generate(**inputs, max_new_tokens=100) | |
# Decode the generated tokens to a string. | |
answer = processor.decode(outputs[0], skip_special_tokens=True) | |
return answer | |
# Create the Gradio interface. | |
iface = gr.Interface( | |
fn=analyze_video, | |
inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"), | |
outputs=gr.Textbox(label="Engagement Analysis"), | |
title="Crowd Engagement Analyzer", | |
description=( | |
"Upload a video of a concert or event and the model will analyze " | |
"the video to identify the moment when the crowd is most engaged." | |
) | |
) | |
if __name__ == "__main__": | |
iface.launch() | |