Spaces:

saadfarhad
/

Crowdanalyzer_v1

Runtime error

File size: 2,493 Bytes

021fd45
704bddb
9cfec01
 
 
6d16e6e
9cfec01
 
b21f7ff
 
 
9cfec01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deb3cb9
 
 
 
9cfec01
deb3cb9
 
 
6d16e6e
9cfec01
6d16e6e
 
 
 
 
9cfec01
6d16e6e
021fd45
9cfec01
6d16e6e
 
 
 
9cfec01
6d16e6e
 
 
 
 
021fd45
6d16e6e

import gradio as gr
import torch
import importlib
from transformers import AutoConfig, AutoProcessor, AutoModelForCausalLM
from transformers.models.llava.configuration_llava import LlavaConfig

# --- Diagnostic: Load the configuration ---
config = AutoConfig.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2", trust_remote_code=True)
print("Configuration type:", type(config))
print("Configuration architectures:", config.architectures)

# Expecting the architecture name to be "LlavaQwenForCausalLM"
arch = config.architectures[0]  # This should be "LlavaQwenForCausalLM"

# --- Dynamic Import: Retrieve the model class by name ---
# Import the module that (should) contain the custom model class.
module = importlib.import_module("transformers.models.llava.modeling_llava")
try:
    model_cls = getattr(module, arch)
    print("Successfully imported model class:", model_cls)
except AttributeError:
    raise ImportError(f"Cannot find class {arch} in module transformers.models.llava.modeling_llava")

# --- Register the Custom Model Class ---
# This tells the auto loader that for LlavaConfig, use our dynamically imported model class.
AutoModelForCausalLM.register(LlavaConfig, model_cls)

# --- Load Processor and Model ---
processor = AutoProcessor.from_pretrained(
    "lmms-lab/LLaVA-Video-7B-Qwen2",
    trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
    "lmms-lab/LLaVA-Video-7B-Qwen2",
    trust_remote_code=True
)

# Move model to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

def analyze_video(video_path):
    prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
    # Process the text and video input
    inputs = processor(text=prompt, video=video_path, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    # Generate output (assuming the custom model implements generate)
    outputs = model.generate(**inputs, max_new_tokens=100)
    answer = processor.decode(outputs[0], skip_special_tokens=True)
    return answer

# Create the Gradio Interface
iface = gr.Interface(
    fn=analyze_video,
    inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),
    outputs=gr.Textbox(label="Engagement Analysis"),
    title="Crowd Engagement Analyzer",
    description="Upload a video of a concert or event and the model will analyze the moment when the crowd is most engaged."
)

if __name__ == "__main__":
    iface.launch()