Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import importlib | |
from transformers import AutoConfig, AutoProcessor, AutoModelForCausalLM | |
from transformers.models.llava.configuration_llava import LlavaConfig | |
# --- Diagnostic: Load the configuration --- | |
config = AutoConfig.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2", trust_remote_code=True) | |
print("Configuration type:", type(config)) | |
print("Configuration architectures:", config.architectures) | |
# Expecting the architecture name to be "LlavaQwenForCausalLM" | |
arch = config.architectures[0] # This should be "LlavaQwenForCausalLM" | |
# --- Dynamic Import: Retrieve the model class by name --- | |
# Import the module that (should) contain the custom model class. | |
module = importlib.import_module("transformers.models.llava.modeling_llava") | |
try: | |
model_cls = getattr(module, arch) | |
print("Successfully imported model class:", model_cls) | |
except AttributeError: | |
raise ImportError(f"Cannot find class {arch} in module transformers.models.llava.modeling_llava") | |
# --- Register the Custom Model Class --- | |
# This tells the auto loader that for LlavaConfig, use our dynamically imported model class. | |
AutoModelForCausalLM.register(LlavaConfig, model_cls) | |
# --- Load Processor and Model --- | |
processor = AutoProcessor.from_pretrained( | |
"lmms-lab/LLaVA-Video-7B-Qwen2", | |
trust_remote_code=True | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
"lmms-lab/LLaVA-Video-7B-Qwen2", | |
trust_remote_code=True | |
) | |
# Move model to GPU if available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
def analyze_video(video_path): | |
prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged." | |
# Process the text and video input | |
inputs = processor(text=prompt, video=video_path, return_tensors="pt") | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
# Generate output (assuming the custom model implements generate) | |
outputs = model.generate(**inputs, max_new_tokens=100) | |
answer = processor.decode(outputs[0], skip_special_tokens=True) | |
return answer | |
# Create the Gradio Interface | |
iface = gr.Interface( | |
fn=analyze_video, | |
inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"), | |
outputs=gr.Textbox(label="Engagement Analysis"), | |
title="Crowd Engagement Analyzer", | |
description="Upload a video of a concert or event and the model will analyze the moment when the crowd is most engaged." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |