Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import spaces | |
import torch | |
import tempfile | |
import imageio | |
from decord import VideoReader, cpu | |
from transformers import pipeline | |
hf_token = os.environ.get("HUGGINGFACE_TOKEN") | |
model_id = "google/gemma-3-27b-it" | |
NUM_FRAMES = 8 | |
# 从视频中采样 N 帧 | |
def sample_video_frames(video_path, num_frames=NUM_FRAMES): | |
vr = VideoReader(video_path, ctx=cpu(0)) | |
total_frames = len(vr) | |
indices = [int(i) for i in torch.linspace(0, total_frames - 1, steps=num_frames)] | |
frames = [vr[i].asnumpy() for i in indices] | |
pil_frames = [imageio.core.util.Array(frame) for frame in frames] | |
return pil_frames | |
# 推理函数:加载模型、采样视频帧、推理 | |
def analyze_video(video_file): | |
# 从上传的视频中采样图像帧 | |
frames = sample_video_frames(video_file.name) | |
# 构造单轮 prompt(可改为你需要的评估内容) | |
system_prompt = ( | |
"You are a helpful AI assistant that analyzes AR effects in videos. " | |
"Evaluate the realism and placement of virtual objects in the provided video frames." | |
) | |
user_prompt = "Based on the frames, describe how well the AR objects blend into the real environment." | |
# 构造输入对话历史(含图像) | |
history = [ | |
{ | |
"role": "system", | |
"content": [{"type": "text", "text": system_prompt}] | |
}, | |
{ | |
"role": "user", | |
"content": [{"type": "text", "text": user_prompt}] + [{"type": "image", "image": frame} for frame in frames] | |
} | |
] | |
# 调用 pipeline 推理 | |
pipe = pipeline( | |
"image-text-to-text", | |
model=model_id, | |
token=hf_token, | |
torch_dtype=torch.bfloat16, | |
model_kwargs={"device_map": "auto"} | |
) | |
result = pipe(text=history, max_new_tokens=512) | |
return result[0]["generated_text"][-1]["content"] | |
# Gradio 界面 | |
gr.Interface( | |
fn=analyze_video, | |
inputs=gr.Video(label="Upload an AR Video (.mp4 only)"), | |
outputs=gr.Textbox(label="Gemma Analysis Result"), | |
title="Gemma-3-27B Video Analysis (ZeroGPU)", | |
description="Uploads a video, extracts 8 frames, and uses Gemma-3-27B to analyze AR realism." | |
).launch() | |